summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip/tnet.c
diff options
context:
space:
mode:
authorErik Nordmark <Erik.Nordmark@Sun.COM>2009-11-11 11:49:49 -0800
committerErik Nordmark <Erik.Nordmark@Sun.COM>2009-11-11 11:49:49 -0800
commitbd670b35a010421b6e1a5536c34453a827007c81 (patch)
tree97c2057b6771dd40411a12eb89d2db2e2b2cce31 /usr/src/uts/common/inet/ip/tnet.c
parentb3388e4fc5f5c24c8a39fbe132a00b02dae5b717 (diff)
downloadillumos-joyent-bd670b35a010421b6e1a5536c34453a827007c81.tar.gz
PSARC/2009/331 IP Datapath Refactoring
PSARC/2008/522 EOF of 2001/070 IPsec HW Acceleration support PSARC/2009/495 netstat -r flags for blackhole and reject routes PSARC 2009/496 EOF of XRESOLV PSARC/2009/494 IP_DONTFRAG socket option PSARC/2009/515 fragmentation controls for ping and traceroute 6798716 ip_newroute delenda est 6798739 ARP and IP are too separate 6807265 IPv4 ip2mac() support 6756382 Please remove Venus IPsec HWACCEL code 6880632 sendto/sendmsg never returns EHOSTUNREACH in Solaris 6748582 sendmsg() return OK, but doesn't send message using IPv4-mapped x IPv6 addr 1119790 TCP and path mtu discovery 4637227 should support equal-cost multi-path (ECMP) 5078568 getsockopt() for IPV6_PATHMTU on a non-connected socket should not succeed 6419648 "AR* contract private note" should be removed as part of ATM SW EOL 6274715 Arp could keep the old entry in the cache while it waits for an arp response 6605615 Remove duplicated TCP/IP opt_set/opt_get code; use conn_t 6874677 IP_TTL can be used to send with ttl zero 4034090 arp should not let you delete your own entry 6882140 Implement IP_DONTFRAG socket option 6883858 Implement ping -D option; traceroute -F should work for IPv6 and shared-IP zones 1119792 TCP/IP black hole detection is broken on receiver 4078796 Directed broadcast forwarding code has problems 4104337 restrict the IPPROTO_IP and IPPROTO_IPV6 options based on the socket family 4203747 Source address selection for source routed packets 4230259 pmtu is increased every ip_ire_pathmtu_interval timer value. 4300533 When sticky option ipv6_pktinfo set to bogus address subsequent connect time out 4471035 ire_delete_cache_gw is called through ire_walk unnecessarily 4514572 SO_DONTROUTE socket option doesn't work with IPv6 4524980 tcp_lookup_ipv4() should compare the ifindex against tcpb->tcpb_bound_if 4532714 machine fails to switch quickly among failed default routes 4634219 IPv6 path mtu discovery is broken when using routing header 4691581 udp broadcast handling causes too many replicas 4708405 mcast is broken on machines when all interfaces are IFF_POINTOPOINT 4770457 netstat/route: source address of interface routes pretends to be gateway address 4786974 use routing table to determine routes/interface for multicast 4792619 An ip_fanout_udp_ipc_v6() routine might lead to some simpler code 4816115 Nuke ipsec_out_use_global_policy 4862844 ipsec offload corner case 4867533 tcp_rq and tcp_wq are redundant 4868589 NCEs should be shared across an IPMP group 4872093 unplumbing an improper virtual interface panics in ip_newroute_get_dst_ill() 4901671 FireEngine needs some cleanup 4907617 IPsec identity latching should be done before sending SYN-ACK 4941461 scopeid and IPV6_PKTINFO with UDP/ICMP connect() does not work properly 4944981 ip does nothing with IP6I_NEXTHOP 4963353 IPv4 and IPv6 proto fanout codes could be brought closer 4963360 consider passing zoneid using ip6i_t instead of ipsec_out_t in NDP 4963734 new ip6_asp locking is used incorrectly in ip_newroute_v6() 5008315 IPv6 code passes ip6i_t to IPsec code instead of ip6_t 5009636 memory leak in ip_fanout_proto_v6() 5092337 tcp/udp option handling can use some cleanup 5035841 Solaris can fail to create a valid broadcast ire 5043747 ar_query_xmit: Could not find the ace 5051574 tcp_check_policy is missing some checks 6305037 full hardware checksum is discarded when there're more than 2 mblks in the chain 6311149 ip.c needs to be put through a woodchipper 4708860 Unable to reassemble CGTP fragmented multicast packets 6224628 Large IPv6 packets with IPsec protection sometimes have length mismatch. 6213243 Solaris does not currently support Dead Gateway Detection 5029091 duplicate code in IP's input path for TCP/UDP/SCTP 4674643 through IPv6 CGTP routes, the very first packet is sent only after a while 6207318 Multiple default routes do not round robin connections to routers. 4823410 IP has an inconsistent view of link mtu 5105520 adding interface route to down interface causes ifconfig hang 5105707 advanced sockets API introduced some dead code 6318399 IP option handling for icmp and udp is too complicated 6321434 Every dropped packet in IP should use ip_drop_packet() 6341693 ifconfig mtu should operate on the physical interface, not individual ipif's 6352430 The credentials attached to an mblk are not particularly useful 6357894 uninitialised ipp_hoplimit needs to be cleaned up. 6363568 ip_xmit_v6() may be missing IRE releases in error cases 6364828 ip_rput_forward needs a makeover 6384416 System panics when running as multicast forwarder using multicast tunnels 6402382 TX: UDP v6 slowpath is not modified to handle mac_exempt conns 6418413 assertion failed ipha->ipha_ident == 0||ipha->ipha_ident == 0xFFFF 6420916 assertion failures in ipv6 wput path 6430851 use of b_prev to store ifindex is not 100% safe 6446106 IPv6 packets stored in nce->nce_qd_mp will be sent with incorrect tcp/udp checksums 6453711 SCTP OOTB sent as if genetated by global zone 6465212 ARP/IP merge should remove ire_freemblk.esballoc 6490163 ip_input() could misbehave if the first mblk's size is not big enough 6496664 missing ipif_refrele leads to reference leak and deferred crash in ip_wput_ipsec_out_v6 6504856 memory leak in ip_fanout_proto_v6() when using link local outer tunnel addresses 6507765 IRE cache hash function performs badly 6510186 IP_FORWARD_PROG bit is easily overlooked 6514727 cgtp ipv6 failure on snv54 6528286 MULTIRT (CGTP) should offload checksum to hardware 6533904 SCTP: doesn't support traffic class for IPv6 6539415 TX: ipif source selection is flawed for unlabeled gateways 6539851 plumbed unworking nic blocks sending broadcast packets 6564468 non-solaris SCTP stack over rawip socket: netstat command counts rawipInData not rawipOutDatagrams 6568511 ipIfStatsOutDiscards not bumped when discarding an ipsec packet on the wrong NIC 6584162 tcp_g_q_inactive() makes incorrect use of taskq_dispatch() 6603974 round-robin default with many interfaces causes infinite temporary IRE thrashing 6611750 ilm_lookup_ill_index_v4 was born an orphan 6618423 ip_wput_frag_mdt sends out packets that void pfhooks 6620964 IRE max bucket count calculations performed in ip_ire_init() are flawed 6626266 various _broadcasts seem redundant 6638182 IP_PKTINFO + SO_DONTROUTE + CIPSO IP option == panic 6647710 IPv6 possible DoS vulnerability 6657357 nce should be kmem_cache alloc'ed from an nce_cache. 6685131 ilg_add -> conn_ilg_alloc interacting with conn_ilg[] walkers can cause panic. 6730298 adding 0.0.0.0 key with mask != 0 causes 'route delete default' to fail 6730976 vni and ipv6 doesn't quite work. 6740956 assertion failed: mp->b_next == 0L && mp->b_prev == 0L in nce_queue_mp_common() 6748515 BUMP_MIB() is occasionally done on the wrong ill 6753250 ip_output_v6() `notv6' error path has an errant ill_refrele() 6756411 NULL-pointer dereference in ip_wput_local() 6769582 IP must forward packet returned from FW-HOOK 6781525 bogus usesrc usage leads directly to panic 6422839 System paniced in ip_multicast_loopback due to NULL pointer dereference 6785521 initial IPv6 DAD solicitation is dropped in ip_newroute_ipif_v6() 6787370 ipnet devices not seeing forwarded IP packets on outgoing interface 6791187 ip*dbg() calls in ip_output_options() claim to originate from ip_wput() 6794047 nce_fp_mp prevents sharing of NCEs across an IPMP group 6797926 many unnecessary ip0dbg() in ip_rput_data_v6 6846919 Packet queued for ND gets sent in the clear. 6856591 ping doesn't send packets with DF set 6861113 arp module has incorrect dependency path for hook module 6865664 IPV6_NEXTHOP does not work with TCP socket 6874681 No ICMP time exceeded when a router receives packet with ttl = 0 6880977 ip_wput_ire() uses over 1k of stack 6595433 IPsec performance could be significantly better when calling hw crypto provider synchronously 6848397 ifconfig down of an interface can hang. 6849602 IPV6_PATHMTU size issue for UDP 6885359 Add compile-time option for testing pure IPsec overhead 6889268 Odd loopback source address selection with IPMP 6895420 assertion failed: connp->conn_helper_info == NULL 6851189 Routing-related panic occurred during reboot on T2000 system running snv_117 6896174 Post-async-encryption, AH+ESP packets may have misinitialized ipha/ip6 6896687 iptun presents IPv6 with an MTU < 1280 6897006 assertion failed: ipif->ipif_id != 0 in ip_sioctl_slifzone_restart
Diffstat (limited to 'usr/src/uts/common/inet/ip/tnet.c')
-rw-r--r--usr/src/uts/common/inet/ip/tnet.c311
1 files changed, 139 insertions, 172 deletions
diff --git a/usr/src/uts/common/inet/ip/tnet.c b/usr/src/uts/common/inet/ip/tnet.c
index 1e5c0eb170..262d5bc339 100644
--- a/usr/src/uts/common/inet/ip/tnet.c
+++ b/usr/src/uts/common/inet/ip/tnet.c
@@ -133,16 +133,7 @@ int tsol_strict_error;
* - A set of route-related attributes that only get set for prefix
* IREs. If this is non-NULL, the prefix IRE has been associated
* with a set of gateway security attributes by way of route add/
- * change functionality. This field stays NULL for IRE_CACHEs.
- *
- * igsa_gcgrp
- *
- * - Group of gc's which only gets set for IRE_CACHEs. Each of the gc
- * points to a gcdb record that contains the security attributes
- * used to perform the credential checks of the packet which uses
- * the IRE. If the group is not empty, the list of gc's can be
- * traversed starting at gcgrp_head. This field stays NULL for
- * prefix IREs.
+ * change functionality.
*/
static kmem_cache_t *ire_gw_secattr_cache;
@@ -223,7 +214,6 @@ ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
attrp->igsa_rhc = NULL;
attrp->igsa_gc = NULL;
- attrp->igsa_gcgrp = NULL;
return (0);
}
@@ -257,14 +247,9 @@ ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
GC_REFRELE(attrp->igsa_gc);
attrp->igsa_gc = NULL;
}
- if (attrp->igsa_gcgrp != NULL) {
- GCGRP_REFRELE(attrp->igsa_gcgrp);
- attrp->igsa_gcgrp = NULL;
- }
ASSERT(attrp->igsa_rhc == NULL);
ASSERT(attrp->igsa_gc == NULL);
- ASSERT(attrp->igsa_gcgrp == NULL);
kmem_cache_free(ire_gw_secattr_cache, attrp);
}
@@ -387,9 +372,6 @@ rtsa_validate(const struct rtsa_s *rp)
/*
* A brief explanation of the reference counting scheme:
*
- * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp;
- * IRE_CACHEs have it vice-versa.
- *
* Apart from dynamic references due to to reference holds done
* actively by threads, we have the following references:
*
@@ -402,8 +384,6 @@ rtsa_validate(const struct rtsa_s *rp)
* to the gc_refcnt.
*
* gcgrp_refcnt:
- * - An IRE_CACHE that points to an igsa_gcgrp contributes a reference
- * to the gcgrp_refcnt of the associated tsol_gcgrp_t.
* - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
* a reference to the gcgrp_refcnt.
*/
@@ -613,7 +593,6 @@ gcgrp_inactive(tsol_gcgrp_t *gcgrp)
mod_hash_t *hashp;
ASSERT(MUTEX_HELD(&gcgrp_lock));
- ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock));
ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
@@ -686,21 +665,21 @@ cipso_to_sl(const uchar_t *option, bslabel_t *sl)
}
/*
- * If present, parse a CIPSO label in the incoming packet and
- * construct a ts_label_t that reflects the CIPSO label and attach it
- * to the dblk cred. Later as the mblk flows up through the stack any
+ * If present, parse the CIPSO label in the incoming packet and
+ * construct a ts_label_t that reflects the CIPSO label and put it in
+ * the ip_recv_attr_t. Later as the packet flows up through the stack any
* code that needs to examine the packet label can inspect the label
- * from the dblk cred. This function is called right in ip_rput for
- * all packets, i.e. locally destined and to be forwarded packets. The
- * forwarding path needs to examine the label to determine how to
- * forward the packet.
+ * from the ira_tsl. This function is
+ * called right in ip_input for all packets, i.e. locally destined and
+ * to be forwarded packets. The forwarding path needs to examine the label
+ * to determine how to forward the packet.
*
* This routine pulls all message text up into the first mblk.
* For IPv4, only the first 20 bytes of the IP header are guaranteed
* to exist. For IPv6, only the IPv6 header is guaranteed to exist.
*/
boolean_t
-tsol_get_pkt_label(mblk_t *mp, int version)
+tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira)
{
tsol_tpc_t *src_rhtp = NULL;
uchar_t *opt_ptr = NULL;
@@ -713,7 +692,6 @@ tsol_get_pkt_label(mblk_t *mp, int version)
const void *src;
const ip6_t *ip6h;
cred_t *credp;
- pid_t cpid;
int proto;
ASSERT(DB_TYPE(mp) == M_DATA);
@@ -846,28 +824,37 @@ tsol_get_pkt_label(mblk_t *mp, int version)
return (B_FALSE);
}
- /* Make sure no other thread is messing with this mblk */
- ASSERT(DB_REF(mp) == 1);
- /* Preserve db_cpid */
- credp = msg_extractcred(mp, &cpid);
- if (credp == NULL) {
+ if (ira->ira_cred == NULL) {
credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
+ if (credp == NULL)
+ return (B_FALSE);
} else {
cred_t *newcr;
- newcr = copycred_from_bslabel(credp, &sl, doi,
+ newcr = copycred_from_bslabel(ira->ira_cred, &sl, doi,
KM_NOSLEEP);
- crfree(credp);
+ if (newcr == NULL)
+ return (B_FALSE);
+ if (ira->ira_free_flags & IRA_FREE_CRED) {
+ crfree(ira->ira_cred);
+ ira->ira_free_flags &= ~IRA_FREE_CRED;
+ ira->ira_cred = NULL;
+ }
credp = newcr;
}
- if (credp == NULL)
- return (B_FALSE);
- crgetlabel(credp)->tsl_flags |= label_flags;
-
- mblk_setcred(mp, credp, cpid);
- crfree(credp); /* mblk has ref on cred */
+ /*
+ * Put the label in ira_tsl for convinience, while keeping
+ * the cred in ira_cred for getpeerucred which is used to get
+ * labels with TX.
+ * Note: no explicit refcnt/free_flag for ira_tsl. The free_flag
+ * for IRA_FREE_CRED is sufficient for both.
+ */
+ ira->ira_tsl = crgetlabel(credp);
+ ira->ira_cred = credp;
+ ira->ira_free_flags |= IRA_FREE_CRED;
+ ira->ira_tsl->tsl_flags |= label_flags;
return (B_TRUE);
}
@@ -878,25 +865,25 @@ tsol_get_pkt_label(mblk_t *mp, int version)
*/
boolean_t
tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
- boolean_t shared_addr, const conn_t *connp)
+ ip_recv_attr_t *ira, const conn_t *connp)
{
const cred_t *credp;
ts_label_t *plabel, *conn_plabel;
tsol_tpc_t *tp;
boolean_t retv;
const bslabel_t *label, *conn_label;
+ boolean_t shared_addr = (ira->ira_flags & IRAF_TX_SHARED_ADDR);
/*
- * The cases in which this can happen are:
- * - IPv6 Router Alert, where ip_rput_data_v6 deliberately skips
- * over the label attachment process.
- * - MLD output looped-back to ourselves.
- * - IPv4 Router Discovery, where tsol_get_pkt_label intentionally
- * avoids the labeling process.
- * We trust that all valid paths in the code set the cred pointer when
- * needed.
+ * tsol_get_pkt_label intentionally avoids the labeling process for:
+ * - IPv6 router and neighbor discovery as well as redirects.
+ * - MLD packets. (Anything between ICMPv6 code 130 and 138.)
+ * - IGMP packets.
+ * - IPv4 router discovery.
+ * In those cases ire_cred is NULL.
*/
- if ((credp = msg_getcred(mp, NULL)) == NULL)
+ credp = ira->ira_cred;
+ if (credp == NULL)
return (B_TRUE);
/*
@@ -904,17 +891,18 @@ tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
* same zoneid as the selected destination, then no checks are
* necessary. Membership in the zone is enough proof. This is
* intended to be a hot path through this function.
+ * Note: Using crgetzone here is ok since the peer is local.
*/
if (!crisremote(credp) &&
crgetzone(credp) == crgetzone(connp->conn_cred))
return (B_TRUE);
- plabel = crgetlabel(credp);
+ plabel = ira->ira_tsl;
conn_plabel = crgetlabel(connp->conn_cred);
ASSERT(plabel != NULL && conn_plabel != NULL);
label = label2bslabel(plabel);
- conn_label = label2bslabel(crgetlabel(connp->conn_cred));
+ conn_label = label2bslabel(conn_plabel);
/*
@@ -954,12 +942,8 @@ tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
blequal(label, conn_label))
return (B_TRUE);
- /*
- * conn_zoneid is global for an exclusive stack, thus we use
- * conn_cred to get the zoneid
- */
if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) ||
- (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID &&
+ (!connp->conn_zone_is_global &&
(plabel->tsl_doi != conn_plabel->tsl_doi ||
!bldominates(conn_label, label)))) {
DTRACE_PROBE3(
@@ -1046,16 +1030,13 @@ tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
}
boolean_t
-tsol_can_accept_raw(mblk_t *mp, boolean_t check_host)
+tsol_can_accept_raw(mblk_t *mp, ip_recv_attr_t *ira, boolean_t check_host)
{
ts_label_t *plabel = NULL;
tsol_tpc_t *src_rhtp, *dst_rhtp;
boolean_t retv;
- cred_t *credp;
- credp = msg_getcred(mp, NULL);
- if (credp != NULL)
- plabel = crgetlabel(credp);
+ plabel = ira->ira_tsl;
/* We are bootstrapping or the internal template was never deleted */
if (plabel == NULL)
@@ -1144,7 +1125,7 @@ tsol_can_accept_raw(mblk_t *mp, boolean_t check_host)
* TSLF_UNLABELED flag is sufficient.
*/
boolean_t
-tsol_can_reply_error(const mblk_t *mp)
+tsol_can_reply_error(const mblk_t *mp, ip_recv_attr_t *ira)
{
ts_label_t *plabel = NULL;
tsol_tpc_t *rhtp;
@@ -1152,7 +1133,6 @@ tsol_can_reply_error(const mblk_t *mp)
const ip6_t *ip6h;
boolean_t retv;
bslabel_t *pktbs;
- cred_t *credp;
/* Caller must pull up at least the IP header */
ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
@@ -1161,9 +1141,7 @@ tsol_can_reply_error(const mblk_t *mp)
if (!tsol_strict_error)
return (B_TRUE);
- credp = msg_getcred(mp, NULL);
- if (credp != NULL)
- plabel = crgetlabel(credp);
+ plabel = ira->ira_tsl;
/* We are bootstrapping or the internal template was never deleted */
if (plabel == NULL)
@@ -1227,33 +1205,30 @@ tsol_can_reply_error(const mblk_t *mp)
}
/*
- * Finds the zone associated with the given packet. Returns GLOBAL_ZONEID if
- * the zone cannot be located.
+ * Finds the zone associated with the receive attributes. Returns GLOBAL_ZONEID
+ * if the zone cannot be located.
*
* This is used by the classifier when the packet matches an ALL_ZONES IRE, and
* there's no MLP defined.
*
* Note that we assume that this is only invoked in the ALL_ZONES case.
- * Handling other cases would require handle exclusive stack zones where either
+ * Handling other cases would require handling exclusive IP zones where either
* this routine or the callers would have to map from
* the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc.
*/
zoneid_t
-tsol_packet_to_zoneid(const mblk_t *mp)
+tsol_attr_to_zoneid(const ip_recv_attr_t *ira)
{
- cred_t *cr = msg_getcred(mp, NULL);
zone_t *zone;
ts_label_t *label;
- if (cr != NULL) {
- if ((label = crgetlabel(cr)) != NULL) {
- zone = zone_find_by_label(label);
- if (zone != NULL) {
- zoneid_t zoneid = zone->zone_id;
+ if ((label = ira->ira_tsl) != NULL) {
+ zone = zone_find_by_label(label);
+ if (zone != NULL) {
+ zoneid_t zoneid = zone->zone_id;
- zone_rele(zone);
- return (zoneid);
- }
+ zone_rele(zone);
+ return (zoneid);
}
}
return (GLOBAL_ZONEID);
@@ -1273,7 +1248,7 @@ tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
/* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
if (!is_system_labeled() ||
(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
- IRE_INTERFACE)))
+ IRE_IF_ALL | IRE_MULTICAST | IRE_NOROUTE)))
goto done;
/*
@@ -1304,29 +1279,16 @@ tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
mutex_enter(&attrp->igsa_lock);
/*
- * Depending on the IRE type (prefix vs. cache), we seek the group
+ * We seek the group
* structure which contains all security credentials of the gateway.
- * A prefix IRE is associated with at most one gateway credential,
- * while a cache IRE is associated with every credentials that the
- * gateway has.
+ * An offline IRE is associated with at most one gateway credential.
*/
- if ((gc = attrp->igsa_gc) != NULL) { /* prefix */
+ if ((gc = attrp->igsa_gc) != NULL) {
gcgrp = gc->gc_grp;
ASSERT(gcgrp != NULL);
rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
- } else if ((gcgrp = attrp->igsa_gcgrp) != NULL) { /* cache */
- rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
- gc = gcgrp->gcgrp_head;
- if (gc == NULL) {
- /* gc group is empty, so the drop lock now */
- ASSERT(gcgrp->gcgrp_count == 0);
- rw_exit(&gcgrp->gcgrp_rwlock);
- gcgrp = NULL;
- }
- }
-
- if (gcgrp != NULL)
GCGRP_REFHOLD(gcgrp);
+ }
if ((gw_rhc = attrp->igsa_rhc) != NULL) {
/*
@@ -1354,12 +1316,11 @@ tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
ASSERT(ga->ga_af == AF_INET6);
paddr = &ga->ga_addr;
}
- } else if (ire->ire_ipversion == IPV6_VERSION &&
- !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
- paddr = &ire->ire_gateway_addr_v6;
- } else if (ire->ire_ipversion == IPV4_VERSION &&
- ire->ire_gateway_addr != INADDR_ANY) {
- paddr = &ire->ire_gateway_addr;
+ } else if (ire->ire_type & IRE_OFFLINK) {
+ if (ire->ire_ipversion == IPV6_VERSION)
+ paddr = &ire->ire_gateway_addr_v6;
+ else if (ire->ire_ipversion == IPV4_VERSION)
+ paddr = &ire->ire_gateway_addr;
}
/* We've found a gateway address to do the template lookup */
@@ -1408,6 +1369,7 @@ tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
}
if (gc != NULL) {
+
tsol_gcdb_t *gcdb;
/*
* In the case of IRE_CACHE we've got one or more gateway
@@ -1418,18 +1380,9 @@ tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
* just the route itself, so the loop is executed only once.
*/
ASSERT(gcgrp != NULL);
- do {
- gcdb = gc->gc_db;
- if (tsl->tsl_doi == gcdb->gcdb_doi &&
- _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange))
- break;
- if (ire->ire_type == IRE_CACHE)
- gc = gc->gc_next;
- else
- gc = NULL;
- } while (gc != NULL);
-
- if (gc == NULL) {
+ gcdb = gc->gc_db;
+ if (tsl->tsl_doi != gcdb->gcdb_doi ||
+ !_blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) {
DTRACE_PROBE3(
tx__ip__log__drop__irematch__nogcmatched,
char *, "ire(1), tsl(2): all gc failed match",
@@ -1493,12 +1446,13 @@ done:
/*
* Performs label accreditation checks for packet forwarding.
+ * Add or remove a CIPSO option as needed.
*
* Returns a pointer to the modified mblk if allowed for forwarding,
* or NULL if the packet must be dropped.
*/
mblk_t *
-tsol_ip_forward(ire_t *ire, mblk_t *mp)
+tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira)
{
tsol_ire_gw_secattr_t *attrp = NULL;
ipha_t *ipha;
@@ -1516,11 +1470,14 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp)
boolean_t need_tpc_rele = B_FALSE;
ipaddr_t *gw;
ip_stack_t *ipst = ire->ire_ipst;
- cred_t *credp;
- pid_t pid;
+ int err;
+ ts_label_t *effective_tsl = NULL;
ASSERT(ire != NULL && mp != NULL);
- ASSERT(ire->ire_stq != NULL);
+ /*
+ * Note that the ire is the first one found, i.e., an IRE_OFFLINK if
+ * the destination is offlink.
+ */
af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
@@ -1530,16 +1487,6 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp)
psrc = &ipha->ipha_src;
pdst = &ipha->ipha_dst;
proto = ipha->ipha_protocol;
-
- /*
- * off_link is TRUE if destination not directly reachable.
- * Surya note: we avoid creation of per-dst IRE_CACHE entries
- * for forwarded packets, so we set off_link to be TRUE
- * if the packet dst is different from the ire_addr of
- * the ire for the nexthop.
- */
- off_link = ((ipha->ipha_dst != ire->ire_addr) ||
- (ire->ire_gateway_addr != INADDR_ANY));
if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
return (NULL);
} else {
@@ -1561,14 +1508,15 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp)
}
proto = *nexthdrp;
}
-
- /* destination not directly reachable? */
- off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6);
if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
return (NULL);
}
+ /*
+ * off_link is TRUE if destination not directly reachable.
+ */
+ off_link = (ire->ire_type & IRE_OFFLINK);
- if ((tsl = msg_getlabel(mp)) == NULL)
+ if ((tsl = ira->ira_tsl) == NULL)
return (mp);
if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
@@ -1611,11 +1559,7 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp)
attrp = ire->ire_gw_secattr;
gw_rhtp = attrp->igsa_rhc->rhc_tpc;
} else {
- /*
- * use the ire_addr if this is the IRE_CACHE of nexthop
- */
- gw = (ire->ire_gateway_addr == NULL? &ire->ire_addr :
- &ire->ire_gateway_addr);
+ gw = &ire->ire_gateway_addr;
gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE);
need_tpc_rele = B_TRUE;
}
@@ -1702,7 +1646,13 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp)
/* adjust is negative */
ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
mp->b_wptr += adjust;
-
+ /*
+ * Note that caller adjusts ira_pktlen and
+ * ira_ip_hdr_length
+ *
+ * For AF_INET6 note that tsol_remove_secopt_v6
+ * adjusted ip6_plen.
+ */
if (af == AF_INET) {
ipha = (ipha_t *)mp->b_rptr;
iplen = ntohs(ipha->ipha_length) + adjust;
@@ -1729,17 +1679,34 @@ tsol_ip_forward(ire_t *ire, mblk_t *mp)
(!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
goto keep_label;
-
- credp = msg_getcred(mp, &pid);
- if ((af == AF_INET &&
- tsol_check_label(credp, &mp, CONN_MAC_DEFAULT, ipst, pid) != 0) ||
- (af == AF_INET6 &&
- tsol_check_label_v6(credp, &mp, CONN_MAC_DEFAULT, ipst,
- pid) != 0)) {
+ /*
+ * Since we are forwarding packets we use GLOBAL_ZONEID for
+ * the IRE lookup in tsol_check_label.
+ * Since mac_exempt is false the zoneid isn't used for anything
+ * but the IRE lookup, hence we set zone_is_global to false.
+ */
+ if (af == AF_INET) {
+ err = tsol_check_label_v4(tsl, GLOBAL_ZONEID, &mp,
+ CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
+ } else {
+ err = tsol_check_label_v6(tsl, GLOBAL_ZONEID, &mp,
+ CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
+ }
+ if (err != 0) {
+ BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
+ ip_drop_output("tsol_check_label", mp, NULL);
+ freemsg(mp);
mp = NULL;
goto keep_label;
}
+ /*
+ * The effective_tsl must never affect the routing decision, hence
+ * we ignore it here.
+ */
+ if (effective_tsl != NULL)
+ label_rele(effective_tsl);
+
if (af == AF_INET) {
ipha = (ipha_t *)mp->b_rptr;
ipha->ipha_hdr_checksum = 0;
@@ -1885,13 +1852,13 @@ tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
}
int
-tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
- tsol_gcgrp_t *gcgrp)
+tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc)
{
tsol_ire_gw_secattr_t *attrp;
boolean_t exists = B_FALSE;
in_addr_t ga_addr4;
void *paddr = NULL;
+ tsol_gcgrp_t *gcgrp = NULL;
ASSERT(ire != NULL);
@@ -1917,20 +1884,16 @@ tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
if (attrp->igsa_gc != NULL)
GC_REFRELE(attrp->igsa_gc);
- if (attrp->igsa_gcgrp != NULL)
- GCGRP_REFRELE(attrp->igsa_gcgrp);
}
ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
/*
* References already held by caller and we keep them;
- * note that both gc and gcgrp may be set to NULL to
- * clear out igsa_gc and igsa_gcgrp, respectively.
+ * note that gc may be set to NULL to clear out igsa_gc.
*/
attrp->igsa_gc = gc;
- attrp->igsa_gcgrp = gcgrp;
- if (gcgrp == NULL && gc != NULL) {
+ if (gc != NULL) {
gcgrp = gc->gc_grp;
ASSERT(gcgrp != NULL);
}
@@ -1955,12 +1918,11 @@ tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
ASSERT(ga->ga_af == AF_INET6);
paddr = &ga->ga_addr;
}
- } else if (ipversion == IPV6_VERSION &&
- !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
- paddr = &ire->ire_gateway_addr_v6;
- } else if (ipversion == IPV4_VERSION &&
- ire->ire_gateway_addr != INADDR_ANY) {
- paddr = &ire->ire_gateway_addr;
+ } else if (ire->ire_type & IRE_OFFLINK) {
+ if (ipversion == IPV6_VERSION)
+ paddr = &ire->ire_gateway_addr_v6;
+ else if (ipversion == IPV4_VERSION)
+ paddr = &ire->ire_gateway_addr;
}
/*
@@ -1990,7 +1952,7 @@ tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
* If we can't figure out what it is, then return mlptSingle. That's actually
* an error case.
*
- * The callers are assume to pass in zone->zone_id and not the zoneid that
+ * The callers are assumed to pass in zone->zone_id and not the zoneid that
* is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an
* exclusive stack zone).
*/
@@ -2022,23 +1984,28 @@ tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
version = IPV4_VERSION;
}
+ /* Check whether the IRE_LOCAL (or ipif) is ALL_ZONES */
if (version == IPV4_VERSION) {
in4 = *(const in_addr_t *)addr;
if ((in4 == INADDR_ANY) || CLASSD(in4)) {
return (mlptBoth);
}
- ire = ire_cache_lookup(in4, ip_zoneid, NULL, ipst);
+ ire = ire_ftable_lookup_v4(in4, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
+ NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
+ 0, ipst, NULL);
} else {
if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) ||
IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) {
return (mlptBoth);
}
- ire = ire_cache_lookup_v6(addr, ip_zoneid, NULL, ipst);
+ ire = ire_ftable_lookup_v6(addr, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
+ NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
+ 0, ipst, NULL);
}
/*
* If we can't find the IRE, then we have to behave exactly like
- * ip_bind_laddr{,_v6}. That means looking up the IPIF so that users
- * can bind to addresses on "down" interfaces.
+ * ip_laddr_verify_{v4,v6}. That means looking up the IPIF so that
+ * users can bind to addresses on "down" interfaces.
*
* If we can't find that either, then the bind is going to fail, so
* just give up. Note that there's a miniscule chance that the address
@@ -2047,10 +2014,10 @@ tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
if (ire == NULL) {
if (version == IPV4_VERSION)
ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
- ip_zoneid, NULL, NULL, NULL, NULL, ipst);
+ ip_zoneid, ipst);
else
ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
- NULL, ip_zoneid, NULL, NULL, NULL, NULL, ipst);
+ NULL, ip_zoneid, ipst);
if (ipif == NULL) {
return (mlptSingle);
}