diff options
author | Erik Nordmark <Erik.Nordmark@Sun.COM> | 2009-11-11 11:49:49 -0800 |
---|---|---|
committer | Erik Nordmark <Erik.Nordmark@Sun.COM> | 2009-11-11 11:49:49 -0800 |
commit | bd670b35a010421b6e1a5536c34453a827007c81 (patch) | |
tree | 97c2057b6771dd40411a12eb89d2db2e2b2cce31 /usr/src/uts/common/inet/optcom.c | |
parent | b3388e4fc5f5c24c8a39fbe132a00b02dae5b717 (diff) | |
download | illumos-joyent-bd670b35a010421b6e1a5536c34453a827007c81.tar.gz |
PSARC/2009/331 IP Datapath Refactoring
PSARC/2008/522 EOF of 2001/070 IPsec HW Acceleration support
PSARC/2009/495 netstat -r flags for blackhole and reject routes
PSARC 2009/496 EOF of XRESOLV
PSARC/2009/494 IP_DONTFRAG socket option
PSARC/2009/515 fragmentation controls for ping and traceroute
6798716 ip_newroute delenda est
6798739 ARP and IP are too separate
6807265 IPv4 ip2mac() support
6756382 Please remove Venus IPsec HWACCEL code
6880632 sendto/sendmsg never returns EHOSTUNREACH in Solaris
6748582 sendmsg() return OK, but doesn't send message using IPv4-mapped x IPv6 addr
1119790 TCP and path mtu discovery
4637227 should support equal-cost multi-path (ECMP)
5078568 getsockopt() for IPV6_PATHMTU on a non-connected socket should not succeed
6419648 "AR* contract private note" should be removed as part of ATM SW EOL
6274715 Arp could keep the old entry in the cache while it waits for an arp response
6605615 Remove duplicated TCP/IP opt_set/opt_get code; use conn_t
6874677 IP_TTL can be used to send with ttl zero
4034090 arp should not let you delete your own entry
6882140 Implement IP_DONTFRAG socket option
6883858 Implement ping -D option; traceroute -F should work for IPv6 and shared-IP zones
1119792 TCP/IP black hole detection is broken on receiver
4078796 Directed broadcast forwarding code has problems
4104337 restrict the IPPROTO_IP and IPPROTO_IPV6 options based on the socket family
4203747 Source address selection for source routed packets
4230259 pmtu is increased every ip_ire_pathmtu_interval timer value.
4300533 When sticky option ipv6_pktinfo set to bogus address subsequent connect time out
4471035 ire_delete_cache_gw is called through ire_walk unnecessarily
4514572 SO_DONTROUTE socket option doesn't work with IPv6
4524980 tcp_lookup_ipv4() should compare the ifindex against tcpb->tcpb_bound_if
4532714 machine fails to switch quickly among failed default routes
4634219 IPv6 path mtu discovery is broken when using routing header
4691581 udp broadcast handling causes too many replicas
4708405 mcast is broken on machines when all interfaces are IFF_POINTOPOINT
4770457 netstat/route: source address of interface routes pretends to be gateway address
4786974 use routing table to determine routes/interface for multicast
4792619 An ip_fanout_udp_ipc_v6() routine might lead to some simpler code
4816115 Nuke ipsec_out_use_global_policy
4862844 ipsec offload corner case
4867533 tcp_rq and tcp_wq are redundant
4868589 NCEs should be shared across an IPMP group
4872093 unplumbing an improper virtual interface panics in ip_newroute_get_dst_ill()
4901671 FireEngine needs some cleanup
4907617 IPsec identity latching should be done before sending SYN-ACK
4941461 scopeid and IPV6_PKTINFO with UDP/ICMP connect() does not work properly
4944981 ip does nothing with IP6I_NEXTHOP
4963353 IPv4 and IPv6 proto fanout codes could be brought closer
4963360 consider passing zoneid using ip6i_t instead of ipsec_out_t in NDP
4963734 new ip6_asp locking is used incorrectly in ip_newroute_v6()
5008315 IPv6 code passes ip6i_t to IPsec code instead of ip6_t
5009636 memory leak in ip_fanout_proto_v6()
5092337 tcp/udp option handling can use some cleanup
5035841 Solaris can fail to create a valid broadcast ire
5043747 ar_query_xmit: Could not find the ace
5051574 tcp_check_policy is missing some checks
6305037 full hardware checksum is discarded when there're more than 2 mblks in the chain
6311149 ip.c needs to be put through a woodchipper
4708860 Unable to reassemble CGTP fragmented multicast packets
6224628 Large IPv6 packets with IPsec protection sometimes have length mismatch.
6213243 Solaris does not currently support Dead Gateway Detection
5029091 duplicate code in IP's input path for TCP/UDP/SCTP
4674643 through IPv6 CGTP routes, the very first packet is sent only after a while
6207318 Multiple default routes do not round robin connections to routers.
4823410 IP has an inconsistent view of link mtu
5105520 adding interface route to down interface causes ifconfig hang
5105707 advanced sockets API introduced some dead code
6318399 IP option handling for icmp and udp is too complicated
6321434 Every dropped packet in IP should use ip_drop_packet()
6341693 ifconfig mtu should operate on the physical interface, not individual ipif's
6352430 The credentials attached to an mblk are not particularly useful
6357894 uninitialised ipp_hoplimit needs to be cleaned up.
6363568 ip_xmit_v6() may be missing IRE releases in error cases
6364828 ip_rput_forward needs a makeover
6384416 System panics when running as multicast forwarder using multicast tunnels
6402382 TX: UDP v6 slowpath is not modified to handle mac_exempt conns
6418413 assertion failed ipha->ipha_ident == 0||ipha->ipha_ident == 0xFFFF
6420916 assertion failures in ipv6 wput path
6430851 use of b_prev to store ifindex is not 100% safe
6446106 IPv6 packets stored in nce->nce_qd_mp will be sent with incorrect tcp/udp checksums
6453711 SCTP OOTB sent as if genetated by global zone
6465212 ARP/IP merge should remove ire_freemblk.esballoc
6490163 ip_input() could misbehave if the first mblk's size is not big enough
6496664 missing ipif_refrele leads to reference leak and deferred crash in ip_wput_ipsec_out_v6
6504856 memory leak in ip_fanout_proto_v6() when using link local outer tunnel addresses
6507765 IRE cache hash function performs badly
6510186 IP_FORWARD_PROG bit is easily overlooked
6514727 cgtp ipv6 failure on snv54
6528286 MULTIRT (CGTP) should offload checksum to hardware
6533904 SCTP: doesn't support traffic class for IPv6
6539415 TX: ipif source selection is flawed for unlabeled gateways
6539851 plumbed unworking nic blocks sending broadcast packets
6564468 non-solaris SCTP stack over rawip socket: netstat command counts rawipInData not rawipOutDatagrams
6568511 ipIfStatsOutDiscards not bumped when discarding an ipsec packet on the wrong NIC
6584162 tcp_g_q_inactive() makes incorrect use of taskq_dispatch()
6603974 round-robin default with many interfaces causes infinite temporary IRE thrashing
6611750 ilm_lookup_ill_index_v4 was born an orphan
6618423 ip_wput_frag_mdt sends out packets that void pfhooks
6620964 IRE max bucket count calculations performed in ip_ire_init() are flawed
6626266 various _broadcasts seem redundant
6638182 IP_PKTINFO + SO_DONTROUTE + CIPSO IP option == panic
6647710 IPv6 possible DoS vulnerability
6657357 nce should be kmem_cache alloc'ed from an nce_cache.
6685131 ilg_add -> conn_ilg_alloc interacting with conn_ilg[] walkers can cause panic.
6730298 adding 0.0.0.0 key with mask != 0 causes 'route delete default' to fail
6730976 vni and ipv6 doesn't quite work.
6740956 assertion failed: mp->b_next == 0L && mp->b_prev == 0L in nce_queue_mp_common()
6748515 BUMP_MIB() is occasionally done on the wrong ill
6753250 ip_output_v6() `notv6' error path has an errant ill_refrele()
6756411 NULL-pointer dereference in ip_wput_local()
6769582 IP must forward packet returned from FW-HOOK
6781525 bogus usesrc usage leads directly to panic
6422839 System paniced in ip_multicast_loopback due to NULL pointer dereference
6785521 initial IPv6 DAD solicitation is dropped in ip_newroute_ipif_v6()
6787370 ipnet devices not seeing forwarded IP packets on outgoing interface
6791187 ip*dbg() calls in ip_output_options() claim to originate from ip_wput()
6794047 nce_fp_mp prevents sharing of NCEs across an IPMP group
6797926 many unnecessary ip0dbg() in ip_rput_data_v6
6846919 Packet queued for ND gets sent in the clear.
6856591 ping doesn't send packets with DF set
6861113 arp module has incorrect dependency path for hook module
6865664 IPV6_NEXTHOP does not work with TCP socket
6874681 No ICMP time exceeded when a router receives packet with ttl = 0
6880977 ip_wput_ire() uses over 1k of stack
6595433 IPsec performance could be significantly better when calling hw crypto provider synchronously
6848397 ifconfig down of an interface can hang.
6849602 IPV6_PATHMTU size issue for UDP
6885359 Add compile-time option for testing pure IPsec overhead
6889268 Odd loopback source address selection with IPMP
6895420 assertion failed: connp->conn_helper_info == NULL
6851189 Routing-related panic occurred during reboot on T2000 system running snv_117
6896174 Post-async-encryption, AH+ESP packets may have misinitialized ipha/ip6
6896687 iptun presents IPv6 with an MTU < 1280
6897006 assertion failed: ipif->ipif_id != 0 in ip_sioctl_slifzone_restart
Diffstat (limited to 'usr/src/uts/common/inet/optcom.c')
-rw-r--r-- | usr/src/uts/common/inet/optcom.c | 463 |
1 files changed, 96 insertions, 367 deletions
diff --git a/usr/src/uts/common/inet/optcom.c b/usr/src/uts/common/inet/optcom.c index e35b7f6af5..e4d1abff4c 100644 --- a/usr/src/uts/common/inet/optcom.c +++ b/usr/src/uts/common/inet/optcom.c @@ -58,21 +58,21 @@ * Function prototypes */ static t_scalar_t process_topthdrs_first_pass(mblk_t *, cred_t *, optdb_obj_t *, - boolean_t *, size_t *); + size_t *); static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *, optdb_obj_t *dbobjp, - mblk_t *first_mp, boolean_t is_restart, boolean_t *queued_statusp); + t_uscalar_t *worst_statusp); static t_uscalar_t get_worst_status(t_uscalar_t, t_uscalar_t); static int do_opt_default(queue_t *, struct T_opthdr *, uchar_t **, t_uscalar_t *, cred_t *, optdb_obj_t *); static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **, t_uscalar_t *, cred_t *cr, optdb_obj_t *); -static int do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, +static void do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp, - cred_t *, optdb_obj_t *dbobjp, mblk_t *first_mp); + cred_t *, optdb_obj_t *dbobjp); static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t); static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t); -static boolean_t opt_length_ok(opdes_t *, struct T_opthdr *); +static boolean_t opt_length_ok(opdes_t *, t_uscalar_t optlen); static t_uscalar_t optcom_max_optbuf_len(opdes_t *, uint_t); static boolean_t opt_bloated_maxsize(opdes_t *); @@ -176,35 +176,15 @@ optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) * job requested. * XXX Code below needs some restructuring after we have some more * macros to support 'struct opthdr' in the headers. - * - * IP-MT notes: The option management framework functions svr4_optcom_req() and - * tpi_optcom_req() allocate and prepend an M_CTL mblk to the actual - * T_optmgmt_req mblk and pass the chain as an additional parameter to the - * protocol set functions. If a protocol set function (such as ip_opt_set) - * cannot process the option immediately it can return EINPROGRESS. ip_opt_set - * enqueues the message in the appropriate sq and returns EINPROGRESS. Later - * the sq framework arranges to restart this operation and passes control to - * the restart function ip_restart_optmgmt() which in turn calls - * svr4_optcom_req() or tpi_optcom_req() to restart the option processing. - * - * XXX Remove the asynchronous behavior of svr_optcom_req() and - * tpi_optcom_req(). */ -int -svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, - boolean_t pass_to_ip) +void +svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp) { pfi_t deffn = dbobjp->odb_deffn; pfi_t getfn = dbobjp->odb_getfn; opt_set_fn setfn = dbobjp->odb_setfn; opdes_t *opt_arr = dbobjp->odb_opt_des_arr; uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; - boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; - opt_restart_t *or; - struct opthdr *restart_opt; - boolean_t is_restart = B_FALSE; - mblk_t *first_mp; - t_uscalar_t max_optbuf_len; int len; mblk_t *mp1 = NULL; @@ -214,33 +194,10 @@ svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, struct opthdr *opt_end; struct opthdr *opt_start; opdes_t *optd; - boolean_t pass_to_next = B_FALSE; struct T_optmgmt_ack *toa; struct T_optmgmt_req *tor; int error; - /* - * Allocate M_CTL and prepend to the packet for restarting this - * option if needed. IP may need to queue and restart the option - * if it cannot obtain exclusive conditions immediately. Please see - * IP-MT notes before the start of svr4_optcom_req - */ - if (mp->b_datap->db_type == M_CTL) { - is_restart = B_TRUE; - first_mp = mp; - mp = mp->b_cont; - ASSERT(mp->b_wptr - mp->b_rptr >= - sizeof (struct T_optmgmt_req)); - tor = (struct T_optmgmt_req *)mp->b_rptr; - ASSERT(tor->MGMT_flags == T_NEGOTIATE); - - or = (opt_restart_t *)first_mp->b_rptr; - opt_start = or->or_start; - opt_end = or->or_end; - restart_opt = or->or_ropt; - goto restart; - } - tor = (struct T_optmgmt_req *)mp->b_rptr; /* Verify message integrity. */ if (mp->b_wptr - mp->b_rptr < sizeof (struct T_optmgmt_req)) @@ -255,7 +212,7 @@ svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, break; default: optcom_err_ack(q, mp, TBADFLAG, 0); - return (0); + return; } if (tor->MGMT_flags == T_DEFAULT) { /* Is it a request for default option settings? */ @@ -278,7 +235,6 @@ svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, * ----historical comment end ------- */ /* T_DEFAULT not passed down */ - ASSERT(topmost_tpiprovider == B_TRUE); freemsg(mp); max_optbuf_len = optcom_max_optbuf_len(opt_arr, opt_arr_cnt); @@ -286,7 +242,7 @@ svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, if (!mp) { no_mem:; optcom_err_ack(q, mp, TSYSERR, ENOMEM); - return (0); + return; } /* Initialize the T_optmgmt_ack header. */ @@ -362,7 +318,7 @@ no_mem:; mp->b_datap->db_type = M_PCPROTO; /* Ship it back. */ qreply(q, mp); - return (0); + return; } /* T_DEFAULT processing complete - no more T_DEFAULT */ @@ -414,15 +370,15 @@ no_mem:; goto bad_opt; error = proto_opt_check(opt->level, opt->name, opt->len, NULL, - opt_arr, opt_arr_cnt, topmost_tpiprovider, + opt_arr, opt_arr_cnt, tor->MGMT_flags == T_NEGOTIATE, tor->MGMT_flags == T_CHECK, cr); if (error < 0) { optcom_err_ack(q, mp, -error, 0); - return (0); + return; } else if (error > 0) { optcom_err_ack(q, mp, TSYSERR, error); - return (0); + return; } } /* end for loop scanning option buffer */ @@ -491,24 +447,9 @@ no_mem:; /* Ditch the input buffer. */ freemsg(mp); mp = mp1; - /* Always let the next module look at the option. */ - pass_to_next = B_TRUE; break; case T_NEGOTIATE: - first_mp = allocb(sizeof (opt_restart_t), BPRI_LO); - if (first_mp == NULL) { - optcom_err_ack(q, mp, TSYSERR, ENOMEM); - return (0); - } - first_mp->b_datap->db_type = M_CTL; - or = (opt_restart_t *)first_mp->b_rptr; - or->or_start = opt_start; - or->or_end = opt_end; - or->or_type = T_SVR4_OPTMGMT_REQ; - or->or_private = 0; - first_mp->b_cont = mp; -restart: /* * Here we are expecting that the response buffer is exactly * the same size as the input buffer. We pass each opthdr @@ -523,22 +464,16 @@ restart: */ toa = (struct T_optmgmt_ack *)tor; - for (opt = is_restart ? restart_opt: opt_start; opt < opt_end; - opt = next_opt) { + for (opt = opt_start; opt < opt_end; opt = next_opt) { int error; - /* - * Point to the current option in or, in case this - * option has to be restarted later on - */ - or->or_ropt = opt; next_opt = (struct opthdr *)((uchar_t *)&opt[1] + _TPI_ALIGN_OPT(opt->len)); error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE, opt->level, opt->name, opt->len, (uchar_t *)&opt[1], - &opt->len, (uchar_t *)&opt[1], NULL, cr, first_mp); + &opt->len, (uchar_t *)&opt[1], NULL, cr); /* * Treat positive "errors" as real. * Note: negative errors are to be treated as @@ -549,99 +484,48 @@ restart: * it is valid but was either handled upstream * or will be handled downstream. */ - if (error == EINPROGRESS) { - /* - * The message is queued and will be - * reprocessed later. Typically ip queued - * the message to get some exclusive conditions - * and later on calls this func again. - */ - return (EINPROGRESS); - } else if (error > 0) { + if (error > 0) { optcom_err_ack(q, mp, TSYSERR, error); - freeb(first_mp); - return (0); + return; } /* * error < 0 means option is not recognized. - * But with OP_PASSNEXT the next module - * might recognize it. */ } - /* Done with the restart control mp. */ - freeb(first_mp); - pass_to_next = B_TRUE; break; default: optcom_err_ack(q, mp, TBADFLAG, 0); - return (0); + return; } - if (pass_to_next && (q->q_next != NULL || pass_to_ip)) { - /* Send it down to the next module and let it reply */ - toa->PRIM_type = T_SVR4_OPTMGMT_REQ; /* Changed by IP to ACK */ - if (q->q_next != NULL) - putnext(q, mp); - else - ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); - } else { - /* Set common fields in the header. */ - toa->MGMT_flags = T_SUCCESS; - mp->b_datap->db_type = M_PCPROTO; - toa->PRIM_type = T_OPTMGMT_ACK; - qreply(q, mp); - } - return (0); + /* Set common fields in the header. */ + toa->MGMT_flags = T_SUCCESS; + mp->b_datap->db_type = M_PCPROTO; + toa->PRIM_type = T_OPTMGMT_ACK; + qreply(q, mp); + return; bad_opt:; optcom_err_ack(q, mp, TBADOPT, 0); - return (0); } /* * New optcom_req inspired by TPI/XTI semantics */ -int -tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, - boolean_t pass_to_ip) +void +tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp) { t_scalar_t t_error; mblk_t *toa_mp; - boolean_t pass_to_next; size_t toa_len; struct T_optmgmt_ack *toa; struct T_optmgmt_req *tor = (struct T_optmgmt_req *)mp->b_rptr; - - opt_restart_t *or; - boolean_t is_restart = B_FALSE; - mblk_t *first_mp = NULL; t_uscalar_t worst_status; - boolean_t queued_status; - - /* - * Allocate M_CTL and prepend to the packet for restarting this - * option if needed. IP may need to queue and restart the option - * if it cannot obtain exclusive conditions immediately. Please see - * IP-MT notes before the start of svr4_optcom_req - */ - if (mp->b_datap->db_type == M_CTL) { - is_restart = B_TRUE; - first_mp = mp; - toa_mp = mp->b_cont; - mp = toa_mp->b_cont; - ASSERT(mp->b_wptr - mp->b_rptr >= - sizeof (struct T_optmgmt_req)); - tor = (struct T_optmgmt_req *)mp->b_rptr; - ASSERT(tor->MGMT_flags == T_NEGOTIATE); - - or = (opt_restart_t *)first_mp->b_rptr; - goto restart; - } /* Verify message integrity. */ if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req)) { optcom_err_ack(q, mp, TBADOPT, 0); - return (0); + return; } /* Verify MGMT_flags legal */ @@ -654,7 +538,7 @@ tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, break; default: optcom_err_ack(q, mp, TBADFLAG, 0); - return (0); + return; } /* @@ -669,7 +553,6 @@ tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, * T_ALLOPT mean that length can be different for output buffer). */ - pass_to_next = B_FALSE; /* initial value */ toa_len = 0; /* initial value */ /* @@ -677,13 +560,11 @@ tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, * - estimate cumulative length needed for results * - set "status" field based on permissions, option header check * etc. - * - determine "pass_to_next" whether we need to send request to - * downstream module/driver. */ if ((t_error = process_topthdrs_first_pass(mp, cr, dbobjp, - &pass_to_next, &toa_len)) != 0) { + &toa_len)) != 0) { optcom_err_ack(q, mp, t_error, 0); - return (0); + return; } /* @@ -697,26 +578,14 @@ tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, toa_mp = allocb_tmpl(toa_len, mp); if (!toa_mp) { optcom_err_ack(q, mp, TSYSERR, ENOMEM); - return (0); + return; } - first_mp = allocb(sizeof (opt_restart_t), BPRI_LO); - if (first_mp == NULL) { - freeb(toa_mp); - optcom_err_ack(q, mp, TSYSERR, ENOMEM); - return (0); - } - first_mp->b_datap->db_type = M_CTL; - or = (opt_restart_t *)first_mp->b_rptr; /* * Set initial values for generating output. */ - or->or_worst_status = T_SUCCESS; - or->or_type = T_OPTMGMT_REQ; - or->or_private = 0; - /* remaining fields fileed in do_options_second_pass */ + worst_status = T_SUCCESS; /* initial value */ -restart: /* * This routine makes another pass through the option buffer this * time acting on the request based on "status" result in the @@ -724,19 +593,11 @@ restart: * all options of a certain level and acts on each for this request. */ if ((t_error = do_options_second_pass(q, mp, toa_mp, cr, dbobjp, - first_mp, is_restart, &queued_status)) != 0) { + &worst_status)) != 0) { freemsg(toa_mp); optcom_err_ack(q, mp, t_error, 0); - return (0); - } - if (queued_status) { - /* Option will be restarted */ - return (EINPROGRESS); + return; } - worst_status = or->or_worst_status; - /* Done with the first mp */ - freeb(first_mp); - toa_mp->b_cont = NULL; /* * Following code relies on the coincidence that T_optmgmt_req @@ -749,34 +610,12 @@ restart: toa->MGMT_flags = tor->MGMT_flags; - freemsg(mp); /* free input mblk */ - /* - * If there is atleast one option that requires a downstream - * forwarding and if it is possible, we forward the message - * downstream. Else we ack it. - */ - if (pass_to_next && (q->q_next != NULL || pass_to_ip)) { - /* - * We pass it down as T_OPTMGMT_REQ. This code relies - * on the happy coincidence that T_optmgmt_req and - * T_optmgmt_ack are identical data structures - * at the binary representation level. - */ - toa_mp->b_datap->db_type = M_PROTO; - toa->PRIM_type = T_OPTMGMT_REQ; - if (q->q_next != NULL) - putnext(q, toa_mp); - else - ip_output(Q_TO_CONN(q), toa_mp, q, IP_WPUT); - } else { - toa->PRIM_type = T_OPTMGMT_ACK; - toa_mp->b_datap->db_type = M_PCPROTO; - toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */ - qreply(q, toa_mp); - } - return (0); + toa->PRIM_type = T_OPTMGMT_ACK; + toa_mp->b_datap->db_type = M_PCPROTO; + toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */ + qreply(q, toa_mp); } @@ -786,17 +625,14 @@ restart: * - estimate cumulative length needed for results * - set "status" field based on permissions, option header check * etc. - * - determine "pass_to_next" whether we need to send request to - * downstream module/driver. */ static t_scalar_t process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, - boolean_t *pass_to_nextp, size_t *toa_lenp) + size_t *toa_lenp) { opdes_t *opt_arr = dbobjp->odb_opt_des_arr; uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; - boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; optlevel_t *valid_level_arr = dbobjp->odb_valid_levels_arr; uint_t valid_level_arr_cnt = dbobjp->odb_valid_levels_arr_cnt; struct T_opthdr *opt; @@ -843,18 +679,14 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, * unchanged if they do not understand an * option. */ - if (topmost_tpiprovider) { - if (!opt_level_valid(opt->level, - valid_level_arr, - valid_level_arr_cnt)) - return (TBADOPT); - /* - * level is valid - initialize - * option as not supported - */ - opt->status = T_NOTSUPPORT; - } - + if (!opt_level_valid(opt->level, + valid_level_arr, valid_level_arr_cnt)) + return (TBADOPT); + /* + * level is valid - initialize + * option as not supported + */ + opt->status = T_NOTSUPPORT; *toa_lenp += _TPI_ALIGN_TOPT(opt->len); continue; } @@ -866,7 +698,6 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, */ allopt_len = 0; if (tor->MGMT_flags == T_CHECK || - !topmost_tpiprovider || ((allopt_len = opt_level_allopts_lengths(opt->level, opt_arr, opt_arr_cnt)) == 0)) { /* @@ -874,11 +705,6 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, * It is not valid to to use T_ALLOPT with * T_CHECK flag. * - * T_ALLOPT is assumed "expanded" at the - * topmost_tpiprovider level so it should not - * be there as an "option name" if this is not - * a topmost_tpiprovider call and we fail it. - * * opt_level_allopts_lengths() is used to verify * that "level" associated with the T_ALLOPT is * supported. @@ -892,15 +718,8 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, *toa_lenp += allopt_len; opt->status = T_SUCCESS; - /* XXX - always set T_ALLOPT 'pass_to_next' for now */ - *pass_to_nextp = B_TRUE; continue; } - /* - * Check if option wants to flow downstream - */ - if (optd->opdes_props & OP_PASSNEXT) - *pass_to_nextp = B_TRUE; /* Additional checks dependent on operation. */ switch (tor->MGMT_flags) { @@ -972,7 +791,9 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, * Note: This can override anything about this * option request done at a higher level. */ - if (!opt_length_ok(optd, opt)) { + if (opt->len < sizeof (struct T_opthdr) || + !opt_length_ok(optd, + opt->len - sizeof (struct T_opthdr))) { /* bad size */ *toa_lenp += _TPI_ALIGN_TOPT(opt->len); opt->status = T_FAILURE; @@ -1034,23 +855,14 @@ process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp, */ static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr, - optdb_obj_t *dbobjp, mblk_t *first_mp, boolean_t is_restart, - boolean_t *queued_statusp) + optdb_obj_t *dbobjp, t_uscalar_t *worst_statusp) { - boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; int failed_option; struct T_opthdr *opt; - struct T_opthdr *opt_start, *opt_end, *restart_opt; + struct T_opthdr *opt_start, *opt_end; uchar_t *optr; uint_t optset_context; struct T_optmgmt_req *tor = (struct T_optmgmt_req *)reqmp->b_rptr; - opt_restart_t *or; - t_uscalar_t *worst_statusp; - int err; - - *queued_statusp = B_FALSE; - or = (opt_restart_t *)first_mp->b_rptr; - worst_statusp = &or->or_worst_status; optr = (uchar_t *)ack_mp->b_rptr + sizeof (struct T_optmgmt_ack); /* assumed int32_t aligned */ @@ -1058,32 +870,16 @@ do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr, /* * Set initial values for scanning input */ - if (is_restart) { - opt_start = (struct T_opthdr *)or->or_start; - opt_end = (struct T_opthdr *)or->or_end; - restart_opt = (struct T_opthdr *)or->or_ropt; - } else { - opt_start = (struct T_opthdr *)mi_offset_param(reqmp, - tor->OPT_offset, tor->OPT_length); - if (opt_start == NULL) - return (TBADOPT); - opt_end = (struct T_opthdr *)((uchar_t *)opt_start + - tor->OPT_length); - or->or_start = (struct opthdr *)opt_start; - or->or_end = (struct opthdr *)opt_end; - /* - * construct the mp chain, in case the setfn needs to - * queue this and restart option processing later on. - */ - first_mp->b_cont = ack_mp; - ack_mp->b_cont = reqmp; - } + opt_start = (struct T_opthdr *)mi_offset_param(reqmp, + tor->OPT_offset, tor->OPT_length); + if (opt_start == NULL) + return (TBADOPT); + opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length); ASSERT(__TPI_TOPT_ISALIGNED(opt_start)); /* verified in first pass */ - for (opt = is_restart ? restart_opt : opt_start; - opt && (opt < opt_end); + for (opt = opt_start; opt && (opt < opt_end); opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) { - or->or_ropt = (struct opthdr *)opt; + /* verified in first pass */ ASSERT(_TPI_TOPT_VALID(opt, opt_start, opt_end)); @@ -1144,9 +940,7 @@ do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr, */ if (do_opt_default(q, opt, &optr, worst_statusp, cr, dbobjp) < 0) { - /* fail or pass transparently */ - if (topmost_tpiprovider) - opt->status = T_FAILURE; + opt->status = T_FAILURE; bcopy(opt, optr, opt->len); optr += _TPI_ALIGN_TOPT(opt->len); *worst_statusp = get_worst_status(opt->status, @@ -1166,12 +960,8 @@ do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr, optset_context = SETFN_OPTCOM_CHECKONLY; else /* T_NEGOTIATE */ optset_context = SETFN_OPTCOM_NEGOTIATE; - err = do_opt_check_or_negotiate(q, opt, optset_context, - &optr, worst_statusp, cr, dbobjp, first_mp); - if (err == EINPROGRESS) { - *queued_statusp = B_TRUE; - return (0); - } + do_opt_check_or_negotiate(q, opt, optset_context, + &optr, worst_statusp, cr, dbobjp); break; default: return (TBADFLAG); @@ -1236,7 +1026,6 @@ do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, pfi_t deffn = dbobjp->odb_deffn; opdes_t *opt_arr = dbobjp->odb_opt_des_arr; uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; - boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; struct T_opthdr *topth; opdes_t *optd; @@ -1248,15 +1037,8 @@ do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, optd = proto_opt_lookup(reqopt->level, reqopt->name, opt_arr, opt_arr_cnt); - if (optd == NULL) { - /* - * not found - fail this one. Should not happen - * for topmost_tpiprovider as calling routine - * should have verified it. - */ - ASSERT(!topmost_tpiprovider); - return (-1); - } + /* Calling routine should have verified it it exists */ + ASSERT(optd != NULL); topth = (struct T_opthdr *)(*resptrp); topth->level = reqopt->level; @@ -1333,10 +1115,7 @@ do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, * * lookup and stuff default values of all the options of the * level specified - * Note: This expansion of T_ALLOPT should happen in - * a topmost_tpiprovider. */ - ASSERT(topmost_tpiprovider); for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) { if (reqopt->level != optd->opdes_level) continue; @@ -1453,8 +1232,6 @@ do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, pfi_t getfn = dbobjp->odb_getfn; opdes_t *opt_arr = dbobjp->odb_opt_des_arr; uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; - boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; - struct T_opthdr *topth; opdes_t *optd; int optlen; @@ -1484,7 +1261,6 @@ do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, *resptrp -= sizeof (struct T_opthdr); } } else { /* T_ALLOPT processing */ - ASSERT(topmost_tpiprovider == B_TRUE); /* scan and get all options */ for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) { /* skip other levels */ @@ -1530,14 +1306,9 @@ do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, } if (*resptrp == initptr) { /* - * getfn failed and does not want to handle this option. Maybe - * something downstream will or something upstream did. (If - * topmost_tpiprovider, initialize "status" to failure which - * can possibly change downstream). Copy the input "as is" from - * input option buffer if any to maintain transparency. + * getfn failed and does not want to handle this option. */ - if (topmost_tpiprovider) - reqopt->status = T_FAILURE; + reqopt->status = T_FAILURE; bcopy(reqopt, *resptrp, reqopt->len); *resptrp += _TPI_ALIGN_TOPT(reqopt->len); *worst_statusp = get_worst_status(reqopt->status, @@ -1545,18 +1316,15 @@ do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp, } } -/* ARGSUSED */ -static int +static void do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp, - cred_t *cr, optdb_obj_t *dbobjp, mblk_t *first_mp) + cred_t *cr, optdb_obj_t *dbobjp) { pfi_t deffn = dbobjp->odb_deffn; opt_set_fn setfn = dbobjp->odb_setfn; opdes_t *opt_arr = dbobjp->odb_opt_des_arr; uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt; - boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider; - struct T_opthdr *topth; opdes_t *optd; int error; @@ -1572,12 +1340,10 @@ do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, error = (*setfn)(q, optset_context, reqopt->level, reqopt->name, reqopt->len - sizeof (struct T_opthdr), _TPI_TOPT_DATA(reqopt), &optlen, _TPI_TOPT_DATA(topth), - NULL, cr, first_mp); + NULL, cr); if (error) { /* failed - reset "*resptrp" */ *resptrp -= sizeof (struct T_opthdr); - if (error == EINPROGRESS) - return (error); } else { /* * success - "value" already filled in setfn() @@ -1594,7 +1360,6 @@ do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, } else { /* T_ALLOPT processing */ /* only for T_NEGOTIATE case */ ASSERT(optset_context == SETFN_OPTCOM_NEGOTIATE); - ASSERT(topmost_tpiprovider == B_TRUE); /* scan and set all options to default value */ for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) { @@ -1670,7 +1435,7 @@ do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE, reqopt->level, optd->opdes_name, optsize, (uchar_t *)optd->opdes_defbuf, &optlen, - _TPI_TOPT_DATA(topth), NULL, cr, NULL); + _TPI_TOPT_DATA(topth), NULL, cr); if (error) { /* * failed, return as T_FAILURE and null value @@ -1693,20 +1458,14 @@ do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt, if (*resptrp == initptr) { /* - * setfn failed and does not want to handle this option. Maybe - * something downstream will or something upstream - * did. Copy the input as is from input option buffer if any to - * maintain transparency (maybe something at a level above - * did something. + * setfn failed and does not want to handle this option. */ - if (topmost_tpiprovider) - reqopt->status = T_FAILURE; + reqopt->status = T_FAILURE; bcopy(reqopt, *resptrp, reqopt->len); *resptrp += _TPI_ALIGN_TOPT(reqopt->len); *worst_statusp = get_worst_status(reqopt->status, *worst_statusp); } - return (0); } /* @@ -1886,7 +1645,8 @@ tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp, */ /* verify length */ - if (!opt_length_ok(optd, opt)) { + if (opt->len < (t_uscalar_t)sizeof (struct T_opthdr) || + !opt_length_ok(optd, opt->len - sizeof (struct T_opthdr))) { /* bad size */ if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) { /* option is absolute requirement */ @@ -1914,7 +1674,7 @@ tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp, error = (*setfn)(q, optset_context, opt->level, opt->name, opt->len - (t_uscalar_t)sizeof (struct T_opthdr), _TPI_TOPT_DATA(opt), &olen, _TPI_TOPT_DATA(opt), - thisdg_attrs, cr, NULL); + thisdg_attrs, cr); if (olen > (int)(opt->len - sizeof (struct T_opthdr))) { /* @@ -2113,8 +1873,12 @@ opt_bloated_maxsize(opdes_t *optd) return (B_FALSE); } +/* + * optlen is the length of the option content + * Caller should check the optlen is at least sizeof (struct T_opthdr) + */ static boolean_t -opt_length_ok(opdes_t *optd, struct T_opthdr *opt) +opt_length_ok(opdes_t *optd, t_uscalar_t optlen) { /* * Verify length. @@ -2122,95 +1886,60 @@ opt_length_ok(opdes_t *optd, struct T_opthdr *opt) * less than maxlen of variable length option. */ if (optd->opdes_props & OP_VARLEN) { - if (opt->len <= optd->opdes_size + - (t_uscalar_t)sizeof (struct T_opthdr)) + if (optlen <= optd->opdes_size) return (B_TRUE); } else { /* fixed length option */ - if (opt->len == optd->opdes_size + - (t_uscalar_t)sizeof (struct T_opthdr)) + if (optlen == optd->opdes_size) return (B_TRUE); } return (B_FALSE); } /* - * This routine appends a pssed in hop-by-hop option to the existing - * option (in this case a cipso label encoded in HOPOPT option). The - * passed in option is always padded. The 'reservelen' is the - * length of reserved data (label). New memory will be allocated if - * the current buffer is not large enough. Return failure if memory + * This routine manages the allocation and free of the space for + * an extension header or option. Returns failure if memory * can not be allocated. */ int -optcom_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, - uchar_t **optbufp, uint_t *optlenp, uint_t reservelen) +optcom_pkt_set(uchar_t *invalp, uint_t inlen, + uchar_t **optbufp, uint_t *optlenp) { uchar_t *optbuf; uchar_t *optp; - if (!sticky) { - *optbufp = invalp; - *optlenp = inlen; - return (0); - } - - if (inlen == *optlenp - reservelen) { + if (inlen == *optlenp) { /* Unchanged length - no need to reallocate */ - optp = *optbufp + reservelen; + optp = *optbufp; bcopy(invalp, optp, inlen); - if (reservelen != 0) { - /* - * Convert the NextHeader and Length of the - * passed in hop-by-hop header to pads - */ - optp[0] = IP6OPT_PADN; - optp[1] = 0; - } return (0); } - if (inlen + reservelen > 0) { + if (inlen > 0) { /* Allocate new buffer before free */ - optbuf = kmem_alloc(inlen + reservelen, KM_NOSLEEP); + optbuf = kmem_alloc(inlen, KM_NOSLEEP); if (optbuf == NULL) return (ENOMEM); } else { optbuf = NULL; } - /* Copy out old reserved data (label) */ - if (reservelen > 0) - bcopy(*optbufp, optbuf, reservelen); - /* Free old buffer */ if (*optlenp != 0) kmem_free(*optbufp, *optlenp); if (inlen > 0) - bcopy(invalp, optbuf + reservelen, inlen); + bcopy(invalp, optbuf, inlen); - if (reservelen != 0) { - /* - * Convert the NextHeader and Length of the - * passed in hop-by-hop header to pads - */ - optbuf[reservelen] = IP6OPT_PADN; - optbuf[reservelen + 1] = 0; - /* - * Set the Length of the hop-by-hop header, number of 8 - * byte-words following the 1st 8 bytes - */ - optbuf[1] = (reservelen + inlen - 1) >> 3; - } *optbufp = optbuf; - *optlenp = inlen + reservelen; + *optlenp = inlen; return (0); } int process_auxiliary_options(conn_t *connp, void *control, t_uscalar_t controllen, - void *optbuf, optdb_obj_t *dbobjp, int (*opt_set_fn)(conn_t *, uint_t, int, - int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *), cred_t *cr) + void *optbuf, optdb_obj_t *dbobjp, int (*opt_set_fn)(conn_t *, + uint_t, int, int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *), + cred_t *cr) { struct cmsghdr *cmsg; opdes_t *optd; @@ -2254,7 +1983,7 @@ process_auxiliary_options(conn_t *connp, void *control, t_uscalar_t controllen, } error = opt_set_fn(connp, SETFN_UD_NEGOTIATE, optd->opdes_level, optd->opdes_name, len, (uchar_t *)CMSG_CONTENT(cmsg), - &outlen, (uchar_t *)CMSG_CONTENT(cmsg), (void *)optbuf, cr); + &outlen, (uchar_t *)CMSG_CONTENT(cmsg), optbuf, cr); if (error > 0) { return (error); } else if (outlen > len) { |