/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * DLPI driver for RSM over Wildcat */ #ifndef _SYS_WRSMD_H_ #define _SYS_WRSMD_H_ #pragma ident "%Z%%M% %I% %E% SMI" #ifdef __cplusplus extern "C" { #endif #include #include /* * This driver is only supported on sparc systems, so there is no * need to worry about byte ordering issues. * However, need to update this version number whenever there is a * change in the layout of wrsmd remote shared memory that could * lead to incompatibilities between systems. */ #define WRSMD_VERSION (2) /* * static limits */ #define WRSMD_DUMP_IOCTL (5618) #define WRSMD_DUMP_DEST (5619) #define RSM_MAX_DESTADDR 256 /* Wildcat maximum -- (must be power of 2) */ #define RSM_DLPI_QPRI 8 /* XXX what is the priority range?? */ #define RSM_DLPI_QDEPTH 100 #define RSM_DLPI_QFLAGS (RSM_INTR_SEND_Q_NO_FENCE) #ifdef _KERNEL #define WRSMD_CACHELINE_SIZE (0x40) #define WRSMD_CACHELINE_SHIFT 6 #define WRSMD_CACHELINE_OFFSET (WRSMD_CACHELINE_SIZE - 1) #define WRSMD_CACHELINE_MASK (~WRSMD_CACHELINE_OFFSET) #define WRSMD_CACHELINE_ROUNDUP(b) \ (((uint64_t)(b) + WRSMD_CACHELINE_OFFSET) & WRSMD_CACHELINE_MASK) /* * Use the same format address as ethernet when interacting with higher * level modules. */ typedef struct dl_rsm_addr { union { rsm_addr_t rsm; /* real RSM HW address */ struct { unsigned char zeroes[7]; unsigned char addr; } wrsm; struct { /* address in ethernet format */ ushort_t zero; struct ether_addr addr; } ether; } m; } dl_rsm_addr_t; /* * Declarations specific to the medium */ #define MEDIUM_MTU (64*1024-1) /* max frame w/o header */ #define MEDIUM_MIN (1) /* min frame w/header w/o fcs */ #define MEDIUMSAP_MAX (0xffff) /* max valid medium sap */ /* * Definitions for module_info. */ #define WRSMDIDNUM (726) /* module ID number */ #define WRSMDNAME "wrsmd" /* module name */ #define WRSMDMINPSZ (0) /* min packet size */ #define WRSMDMAXPSZ (65536) /* max packet size */ #define WRSMDHIWAT (65536) /* hi-water mark */ #define WRSMDLOWAT (1) /* lo-water mark */ /* * Driver parameters, from .conf file */ struct wrsmd_param { /* Size of packet buffers (must be multiple of 64 bytes) */ uint_t wrsmd_buffer_size; /* Mask of base ID bits in IP address */ uint_t wrsmd_netmask; /* Number of packet buffers exported to each communicating peer */ ushort_t wrsmd_buffers; /* Size of communications queues (must be at least wrsmd_buffers) */ ushort_t wrsmd_queue_size; /* Number of buffers which won't be loaned upstream */ ushort_t wrsmd_buffers_retained; /* Time to reclaim idle connection after (in seconds) UNIMPLEMENTED */ uint_t wrsmd_idle_reclaim_time; /* Number of retries after a read or write error */ ushort_t wrsmd_err_retries; /* Maximum # of queue packets per destination */ ushort_t wrsmd_max_queued_pkts; /* Initial FQE timeout interval */ int wrsmd_nobuf_init_tmo; /* Maximum FQE timeout interval */ int wrsmd_nobuf_max_tmo; /* Time after which we drop packets instead of doing FQE timeout */ uint_t wrsmd_nobuf_drop_tmo; /* Initial message timeout interval */ int wrsmd_msg_init_tmo; /* Maximum message timeout interval */ int wrsmd_msg_max_tmo; /* Time after which we drop connection instead of doing msg timeout */ uint_t wrsmd_msg_drop_tmo; /* Acknowledgment timeout interval */ int wrsmd_ack_tmo; /* Queue element sync timeout interval */ uint_t wrsmd_sync_tmo; /* * timeout interval to wait before tearing down connection * after last attach to device is removed. */ uint_t wrsmd_teardown_tmo; /* Number of packets to try and batch up in one transmission. */ ushort_t wrsmd_train_size; /* Number of free buffers to try and batch up in one transmission. */ ushort_t wrsmd_fqe_sync_size; }; /* * Defaults and limits for parameters * Timeout parameter values now given in milliseconds, * rather than ticks. Any values modified in wrsmd.conf * must now be in milliseconds. Values get rounded up to * the next tick value, with granularity 10 ms for the default * 100 hz. */ #define WRSMD_BUFFERS_DFLT 32 #define WRSMD_BUFFER_SIZE_DFLT 16384 #define WRSMD_QUEUE_SIZE_DFLT 64 #define WRSMD_BUFFERS_RETAINED_DFLT 32 #define WRSMD_IDLE_RECLAIM_TIME_DFLT 36000 #define WRSMD_ERR_RETRIES_DFLT 1 #define WRSMD_MAX_QUEUED_PKTS_DFLT 100 #define WRSMD_NOBUF_INIT_TMO_DFLT 10 #define WRSMD_NOBUF_MAX_TMO_DFLT 2560 #define WRSMD_NOBUF_DROP_TMO_DFLT 5000 #define WRSMD_MSG_INIT_TMO_DFLT 10 #define WRSMD_MSG_MAX_TMO_DFLT 1280 #define WRSMD_MSG_DROP_TMO_DFLT 30000 #define WRSMD_ACK_TMO_DFLT 1000 #define WRSMD_SYNC_TMO_DFLT 10 /* * We set this to two clock ticks to allow free destination timeouts * (<= 1 tick) to complete first, before next teardown timeout, * allowing fewer iterations of the latter. */ #define WRSMD_TEARDOWN_TMO_DFLT 20 #define WRSMD_TRAIN_SIZE_DFLT 8 #define WRSMD_FQE_SYNC_SIZE_DFLT 16 /* * Macro to convert millisecond timeout parameters to clock ticks. */ #define WRSMD_TICKS(x) (drv_usectohz(1000 * (x))) /* Definition of each possible event type */ #define WRSMD_EVT_FREEDEST 0 #define WRSMD_EVT_SYNC 1 #define WRSMD_EVT_SYNC_DQE 2 /* * Per-Stream instance state information. * * Each instance is dynamically allocated at open() and freed at * close(). Each per-stream instance points to at most one per-device * structure using the ss_wrsmdp field. All instances are threaded * together into one list of active instances ordered on minor device * number. */ typedef struct wrsmdstr { struct wrsmdstr *ss_nextp; /* next in list */ queue_t *ss_rq; /* ptr to our read queue */ struct wrsmd *ss_wrsmdp; /* attached device, if any */ ushort_t ss_state; /* current state */ ushort_t ss_flags; /* misc flags */ t_uscalar_t ss_sap; /* bound sap (from dl_bind_req_t) */ minor_t ss_minor; /* minor device number */ kmutex_t ss_lock; /* protect this struct */ } wrsmdstr_t; _NOTE(READ_ONLY_DATA(wrsmdstr::ss_rq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmdstr::ss_lock, wrsmdstr::ss_wrsmdp)) _NOTE(MUTEX_PROTECTS_DATA(wrsmdstr::ss_lock, wrsmdstr::ss_state)) _NOTE(MUTEX_PROTECTS_DATA(wrsmdstr::ss_lock, wrsmdstr::ss_sap)) _NOTE(MUTEX_PROTECTS_DATA(wrsmdstr::ss_lock, wrsmdstr::ss_flags)) _NOTE(READ_ONLY_DATA(wrsmdstr::ss_minor)) /* * For performance reasons, we read the following things in wrsmdsendup() * without getting ss_lock. As long as accesses to these variables are atomic, * we believe nothing bad will happen. */ _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmdstr::ss_wrsmdp)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmdstr::ss_sap)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmdstr::ss_flags)) /* Per-stream flags */ #define WRSMD_SLRAW 0x02 /* M_DATA plain raw mode */ #define WRSMD_SLALLPHYS 0x04 /* "promiscuous mode" */ #define WRSMD_SLALLSAP 0x08 /* enable all ether type values */ #define WRSMD_SLFAST 0x10 /* "fast mode" */ typedef struct wrsmd_event { struct wrsmd_event *next; int type; void *arg; } wrsmd_event_t; /* * Per-Device instance state information. * * Each instance is dynamically allocated on first attach. */ typedef struct wrsmd { struct wrsmd *wrsmd_nextp; /* next in linked list */ queue_t *wrsmd_wq; /* ptr to one of our wq's, doesn't */ /* matter which one, to run wsrv */ queue_t *wrsmd_ipq; /* IP queue, iff there's only one */ krwlock_t wrsmd_ipq_rwlock; /* protects wrsmd_ipq */ dev_info_t *wrsmd_dip; /* dev info */ ushort_t wrsmd_flags; /* misc. flags */ ushort_t wrsmd_promisc; /* # of WRSMD_SLALLPHYS streams */ ushort_t wrsmd_attached_streams; /* streams attached to device */ kmutex_t wrsmd_lock; /* protect this struct */ kmutex_t wrsmd_dest_lock; /* protect dest table (below) */ /* Counters to keep stats for netstat support */ uint64_t wrsmd_ipackets; /* # packets received */ uint32_t wrsmd_ierrors; /* # total input errors */ uint64_t wrsmd_opackets; /* # packets sent */ uint32_t wrsmd_oerrors; /* # total output errors */ uint32_t wrsmd_collisions; /* # collisions (FQE waits) */ uint32_t wrsmd_in_bytes; /* # bytes input (32 bit) */ uint32_t wrsmd_in_bytes64; /* # bytes input (64 bit) */ uint64_t wrsmd_out_bytes; /* # bytes output (32 bit) */ uint64_t wrsmd_out_bytes64; /* # bytes output (64 bit) */ /* Other counters, for internal use */ uint32_t wrsmd_xfers; /* # calls to wrsmd_xmit */ uint32_t wrsmd_xfer_pkts; /* # pkts sent out by xmit */ uint32_t wrsmd_syncdqes; /* # syncdqe-ints sent out by xmit */ uint32_t wrsmd_lbufs; /* # times we loaned bufs */ uint32_t wrsmd_nlbufs; /* # times we had to alloc buf */ uint32_t wrsmd_pullup; /* # times we had to coalesce pkts */ uint32_t wrsmd_pullup_fail; /* # times we couldn't pullup */ uint32_t wrsmd_starts; /* # calls to wrsmdstart */ uint32_t wrsmd_start_xfers; /* # calls to wrsmdxfer from start */ uint32_t wrsmd_fqetmo_hint; /* # times fqe tmo ended by hint */ uint32_t wrsmd_fqetmo_drops; /* # pkts dropped by fqetmo */ uint32_t wrsmd_maxq_drops; /* # pkts dropped 'cause q too long */ uint32_t wrsmd_errs; /* # errors on transfers */ struct wrsmd_param wrsmd_param; /* parameters */ struct kstat *wrsmd_ksp; /* our kstats */ dl_rsm_addr_t wrsmd_rsm_addr; /* our RSM hardware address */ uint_t wrsmd_ctlr_id; /* our RSM controller id */ rsm_controller_object_t wrsmd_ctlr; rsm_controller_attr_t *wrsmd_ctlr_attr; int wrsmd_numdest; /* Number of valid entries in desttbl */ struct wrsmd_dest /* table for destination structures */ *wrsmd_desttbl[RSM_MAX_DESTADDR]; struct wrsmd_dest *wrsmd_runq; /* service routine run queue */ kmutex_t wrsmd_runq_lock; /* protects wrsmd_runq, among others */ timeout_id_t wrsmd_teardown_tmo_id; /* teardown device */ /* Event thread for making RSM calles from non callback context */ kmutex_t event_lock; kthread_t *event_thread; kcondvar_t event_cv; boolean_t stop_events; kcondvar_t event_thread_exit_cv; wrsmd_event_t *events; } wrsmd_t; _NOTE(READ_ONLY_DATA(wrsmd::wrsmd_dip)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_lock, wrsmd::wrsmd_flags)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_lock, wrsmd::wrsmd_promisc)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_promisc)) _NOTE(READ_ONLY_DATA(wrsmd::wrsmd_param)) _NOTE(READ_ONLY_DATA(wrsmd_param)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_lock, wrsmd::wrsmd_ksp)) _NOTE(READ_ONLY_DATA(wrsmd::wrsmd_ctlr_id)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_runq_lock, wrsmd::wrsmd_runq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_runq_lock, wrsmd::wrsmd_wq)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_dip)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_ipackets)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_ierrors)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_opackets)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_oerrors)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd::wrsmd_collisions)) /* RSMPI progress flags */ #define WRSMDREGHANDLER 0x01 #define WRSMDGOTCTLR 0x02 /* Attach progress bitmask */ #define WRSMD_ATT_MUTEX 0x01 #define WRSMD_ATT_LINKED 0x02 #define WRSMD_ATT_MINOR 0x04 #define WRSMD_ATT_KSTAT 0x08 #define WRSMD_ATT_EVT_THREAD 0x10 #define WRSMD_ATT_ALL \ (WRSMD_ATT_MUTEX | WRSMD_ATT_LINKED | WRSMD_ATT_MINOR \ | WRSMD_ATT_KSTAT | WRSMD_ATT_EVT_THREAD) /* * Number of bytes to add to buffer size to leave room for * headers from other streams modules: * * TCP Header is 14 bytes * IP Header is 20 bytes */ #define WRSMDHEADROOM 34 /* * Full dlsap address format */ typedef struct wrsmddladdr { struct ether_addr dl_addr; /* RSM hardware addr */ ushort_t dl_sap; /* SAP */ } wrsmddladdr_t; /* * Full DLSAP address length */ #define WRSMD_DEVICE_ADDRL (sizeof (ushort_t) + sizeof (struct ether_addr)) #define WRSMD_IP_SAP 0x800 /* IP's sap */ #define WRSMD_BCAST_ADDRL (sizeof (struct ether_addr)) #define DLADDRL (80) /* * Export some of the error counters via the kstats mechanism. */ typedef struct wrsmd_stat { struct kstat_named rsm_ipackets; struct kstat_named rsm_ipackets64; struct kstat_named rsm_ierrors; struct kstat_named rsm_opackets; struct kstat_named rsm_opackets64; struct kstat_named rsm_oerrors; struct kstat_named rsm_collisions; struct kstat_named rsm_xfers; struct kstat_named rsm_xfer_pkts; struct kstat_named rsm_syncdqes; struct kstat_named rsm_lbufs; struct kstat_named rsm_nlbufs; struct kstat_named rsm_pullup; struct kstat_named rsm_pullup_fail; struct kstat_named rsm_starts; struct kstat_named rsm_start_xfers; struct kstat_named rsm_fqetmo_hint; struct kstat_named rsm_fqetmo_drops; struct kstat_named rsm_maxq_drops; struct kstat_named rsm_errs; struct kstat_named rsm_in_bytes; struct kstat_named rsm_in_bytes64; struct kstat_named rsm_out_bytes; struct kstat_named rsm_out_bytes64; } wrsmd_stat_t; _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_ipackets)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_ierrors)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_opackets)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_oerrors)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_collisions)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_xfers)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_xfer_pkts)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_lbufs)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_nlbufs)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_pullup)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_pullup_fail)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_starts)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_start_xfers)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_fqetmo_hint)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_fqetmo_drops)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_maxq_drops)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_errs)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_in_bytes)) _NOTE(SCHEME_PROTECTS_DATA("inconsistency OK", wrsmd_stat::rsm_out_bytes)) /* Some streams defines */ #define DB_BASE(mp) ((mp)->b_datap->db_base) #define DB_LIM(mp) ((mp)->b_datap->db_lim) #define DB_REF(mp) ((mp)->b_datap->db_ref) #define DB_TYPE(mp) ((mp)->b_datap->db_type) #define MBLKL(mp) ((mp)->b_wptr - (mp)->b_rptr) #define MBLKSIZE(mp) ((mp)->b_datap->db_lim - (mp)->b_datap->db_base) #define MBLKHEAD(mp) ((mp)->b_rptr - (mp)->b_datap->db_base) #define MBLKTAIL(mp) ((mp)->b_datap->db_lim - (mp)->b_wptr) /* * On Wildcat, if there is a data delivery problem with one of the 32 byte * halves of a 64 byte write to the remote node, the remote side writes all * 0's to that 32 byte region of memory. We guarantee that the 4 byte fqe * entries and 8 byte byte dqe entries (described below) are aligned in a * way that guarantees that each fits within a single 32 byte region, so * checking for any non-zero value within the entry is sufficient to * guarantee that the write was successful. * * We use the seqnum as the write validity check, which means it must never * be 0. A non-0 value ensures that the remote write was successful. * * Each fqe and dqe is 64 bytes in size. This guarantees that * we can write one entry at a time atomically, without disturbing any other * entries. This also quarantees alignment to wildcat hardware. It does, * however, waste some space. * */ struct align_64byte { /* Align to 64 bytes */ uint64_t pad[8]; }; /* * Delivery Queue Entry, used to denote buffers containing new packets. */ #define WRSMD_DQE_SEQ_MASK 0xFF /* All 1's sequence */ typedef union wrsmd_dqe { struct align_64byte align; /* Align to 64 bytes */ struct wrsmd_dqe_s { /* actual structure */ ushort_t dq_length; /* True length of packet */ ushort_t dq_sap; /* Packet's SAP */ uchar_t dq_seqnum; /* Sequence number - validity check */ uchar_t dq_offset; /* Packet offset within buffer */ ushort_t dq_bufnum; /* Buffer number */ } s; } wrsmd_dqe_t; /* * Free Queue Entry, used to denote buffers which are available to be filled. */ #define WRSMD_FQE_SEQ_MASK 0xFF /* All 1's sequence */ typedef union wrsmd_fqe { struct align_64byte align; /* Align to 64 bytes */ struct wrsmd_fqe_s { uchar_t fq_seqnum; /* Sequence number - validity check */ uchar_t fq_filler; /* Unused */ ushort_t fq_bufnum; /* Buffer number */ } s; } wrsmd_fqe_t; /* * Segment data formats */ /* * The major version should be bumped whenever the contents of the * xfer segments are changed in a non-upward-compatible way, to prevent * confused attempts at communication with machines running older protocol * versions. */ #define WRSMD_VERS_MAJOR 1 #define WRSMD_VERS_MINOR 0 /* * Header for the data transfer segment. */ typedef struct wrsmd_xfer_hdr { size_t rx_segsize; /* size of segment */ uint32_t rx_cookie; /* magic cookie */ uint32_t rx_bufsize; /* size of buffers */ ushort_t rx_numbufs; /* number of buffers */ ushort_t rx_numfqes; /* number of elements in free queue */ ushort_t rx_numdqes; /* num of elements in delivery queue */ uint32_t rx_buf_offset; /* offset to start of buffers */ uint32_t rx_fq_offset; /* offset to start of free queue */ uint32_t rx_dq_offset; /* offset to start of delivery queue */ } wrsmd_xfer_hdr_t; #define WRSMD_XFER_COOKIE 0x58664572 /* 'XfEr' */ /* * Structure describing a loaned-up buffer */ typedef struct wrsmdbuf { frtn_t rb_frtn; /* Pointer to our free routine */ int rb_bufnum; /* Number of loaned buffer */ struct wrsmd_dest *rb_rd; /* Destination buffer belongs to */ } wrsmdbuf_t; _NOTE(READ_ONLY_DATA(wrsmdbuf::rb_frtn)) _NOTE(READ_ONLY_DATA(wrsmdbuf::rb_bufnum)) _NOTE(READ_ONLY_DATA(wrsmdbuf::rb_rd)) /* * Structure describing a packet which is currently being sent */ typedef struct wrsmd_pkt { mblk_t *rd_pkt_ptr; /* packet pointer */ ushort_t rd_pkt_offset; /* packet offset within buffer */ uint_t rd_pkt_len; /* real length of packet */ ushort_t rd_pkt_sap; /* packet SAP */ } wrsmd_pkt_t; /* * WRSMD message types */ #define WRSMD_MSG_REQ_CONNECT 1 #define WRSMD_MSG_CON_ACCEPT 2 #define WRSMD_MSG_CON_ACK 3 #define WRSMD_MSG_SYNC_DQE 4 #define WRSMD_REXMIT 127 /* * * R S M D C O N N E C T I O N P R O T O C O L * * * The connection protocol for the RSM DLPI driver is a follows: * * INITIATOR RESPONDER * * 1 Send RSDM_REQ_CONNECT * Includes xfer segment ID * * 2 Send WRSMD_CON_ACCEPT * Includes xfer segment ID * * 3 Send WRSMD_CON_ACK * * If an WRSMD_REQ_CONNECT message is received while an * WRSMD_REQ_CONNECT is outstanding to the same node ID: if the * node receiving the duplicate WRSMD_REQ_CONNECT has a higher * numbered ID, it will accept the connection. The lower numbered * node will reject the duplicate. * * The special message type WRSMD_REXMIT causes us to retransmit the * last message we sent (unsuccessfully or successfully), without * incrementing the sequence number on the message. This is used when * we get a timeout waiting for a response to an WRSMDM_REQ_CONNECT * request and want to resend it. * */ typedef struct wrsmd_msg_header { uint8_t wrsmd_version; /* Increment when incompatible change made */ uint8_t reqtype; /* One of the above */ uint16_t seqno; /* Sequence number */ } wrsmd_msg_header_t; typedef struct wrsmd_con_request { rsm_memseg_id_t send_segid; /* Segment you should use to talk to me */ } wrsmd_con_request_t; typedef struct wrsmd_con_accept { rsm_memseg_id_t send_segid; /* Segment you should use to talk to me */ rsm_memseg_id_t rcv_segid; /* Segment I use to talk to you */ } wrsmd_con_accept_t; typedef struct wrsmd_con_ack { rsm_memseg_id_t send_segid; /* Segment you should use to talk to me */ rsm_memseg_id_t rcv_segid; /* Segment I use to talk to you */ } wrsmd_con_ack_t; typedef struct wrsmd_syncdqe { rsm_memseg_id_t rcv_segid; /* Segment I use to talk to you */ } wrsmd_syncdqe_t; typedef union wrsmd_msg { uint64_t align; struct { wrsmd_msg_header_t hdr; union { wrsmd_con_request_t con_request; wrsmd_con_accept_t con_accept; wrsmd_con_ack_t con_ack; wrsmd_syncdqe_t syncdqe; } m; } p; } wrsmd_msg_t; /* * Structure describing someone else communicating with us (a destination) */ typedef struct wrsmd_dest { /* Basics */ wrsmd_t *rd_wrsmdp; /* Pointer to our device structure */ rsm_addr_t rd_rsm_addr; /* Address of destination */ /* Interrupt queue */ rsm_send_q_handle_t rsm_sendq; wrsmd_msg_t rsm_previous_msg; int rsm_previous_msg_valid; /* Packet queue */ mblk_t *rd_queue_h, /* queue of packets waiting to go out */ *rd_queue_t; ushort_t rd_queue_len; /* number of packets on above queue */ /* Local transfer segment */ caddr_t rd_rawmem_base_addr; size_t rd_rawmem_base_size; rsm_memory_local_t rd_memory; rsm_memseg_id_t rd_lxfersegid; rsm_memseg_export_handle_t rd_lxferhand; /* Remote transfer segment */ wrsmd_xfer_hdr_t rd_rxferhdr; int rd_rxferhdr_valid; off_t rd_rbufoff; boolean_t rd_segid_valid; rsm_memseg_id_t rd_rxfersegid; rsm_memseg_import_handle_t rd_rxferhand; uint16_t rd_lastconnmsg_seq; /* * Free queue we're writing to (describing buffers on our node * available to partner; lives on partner) */ off_t rd_fqw_f_off; /* First usable element in queue */ ushort_t rd_fqw_seq; /* Sequence number we will write next */ ushort_t rd_num_fqws; /* Number of usable elements in queue */ /* * Delivery queue that we're writing to (describing buffers on * partner that we've filled with data; lives on partner) */ off_t rd_dqw_f_off; /* First usable element in queue */ ushort_t rd_dqw_seq; /* Sequence number we will write next */ ushort_t rd_num_dqws; /* Number of usable elements in queue */ /* Buffers (on partner) that we're writing to */ uint_t rd_rbuflen; /* Length of remote buffers */ ushort_t rd_numrbuf; /* Number of remote buffers */ /* * Free queue we're reading from (describing buffers on partner * available to us; lives on our node) */ volatile wrsmd_fqe_t /* Pointers to ... */ *rd_fqr_f, /* First usable element in queue */ *rd_fqr_l, /* Last usable element in queue */ *rd_fqr_n; /* Element we'll read next */ ushort_t rd_fqr_seq; /* Sequence number we expect to read next */ ushort_t rd_num_fqrs; /* Number of usable elements in queue */ /* * Delivery queue we're reading from (describing buffers on our * node that the partner has filled with data; lives on our node) */ volatile wrsmd_dqe_t /* Pointers to ... */ *rd_dqr_f, /* First usable element in queue */ *rd_dqr_l, /* Last usable element in queue */ *rd_dqr_n; /* Element we'll read next */ ushort_t rd_dqr_seq; /* Sequence number we expect to read next */ ushort_t rd_num_dqrs; /* Number of usable elements in queue */ /* (Local) buffers we're reading from */ volatile void *rd_lbuf; /* Start of first local buffer */ uint_t rd_lbuflen; /* Length of each local buffer */ ushort_t rd_numlbufs; /* Number of local buffers */ wrsmdbuf_t *rd_bufbase; /* Local buffer description structures, */ /* for use in loaning buffers upward */ /* Information on cached FQE's */ ushort_t rd_cached_fqr_cnt; /* number of cached fqe's */ ushort_t *rd_cached_fqr; /* buffer numbers from cached fqe's */ /* * Shadow free queue - local copy of free queue that lives on * partner */ wrsmd_fqe_t /* Pointers to ... */ *rd_shdwfqw_f_addr, /* Start of alloc'd memory for queue */ *rd_shdwfqw_f, /* First usable element */ *rd_shdwfqw_l, /* Last usable element */ *rd_shdwfqw_i, /* Next element added to queue goes here */ *rd_shdwfqw_o; /* Next element transmitted comes from here */ /* * Shadow delivery queue - local copy of delivery queue that lives * on partner */ wrsmd_dqe_t /* Pointers to ... */ *rd_shdwdqw_f_addr, /* Start of alloc'd memory for queue */ *rd_shdwdqw_f, /* First usable element */ *rd_shdwdqw_l, /* Last usable element */ *rd_shdwdqw_i, /* Next element added to queue goes here */ *rd_shdwdqw_o; /* Next element transmitted comes from here */ ushort_t rd_shdwfqw_errflag; /* If nonzero, we had an error last */ /* time we tried to write an FQE */ ushort_t rd_shdwdqw_errflag; /* If nonzero, we had an error last */ /* time we tried to write a DQE */ ushort_t rd_stopq; /* If nonzero, we shouldn't try to */ /* sync queues, the network is down */ /* State information */ ushort_t rd_state; /* State (WRSMD_STATE_xxx, see below) */ ushort_t rd_estate; /* Event State */ ushort_t rd_sstate; /* Segment state (bitmask of WRSMD_RSMS_xxx) */ ushort_t rd_dstate; /* Delete state (0-2), != 0 means deleting */ short rd_refcnt; /* Destination reference count */ /* Command/sequence information */ ushort_t rd_nseq; /* Seq # we'll put on next message */ uchar_t rd_recvdack; /* Nonzero if we've gotten a valid ACK */ uchar_t rd_sentconn; /* Nonzero if we've sent a CONN */ /* Last message transmitted, for rexmits if needed */ wrsmd_msg_t rd_lastobmsg; /* Last outbound message */ /* Timeout information */ timeout_id_t rd_fqe_tmo_id; /* timeout ID for free queue retry. */ timeout_id_t rd_tmo_id; /* timeout ID for empty queue retry, etc. */ int rd_tmo_int; /* backoff interval for timeout */ int rd_tmo_tot; /* ticks we've waited so far this timeout */ ushort_t rd_nlb; /* number of outstanding loaned buffers */ ushort_t rd_nlb_del; /* if nonzero, we're being deleted, and are */ /* waiting for rd_nlb to go to 0 */ kmutex_t rd_nlb_lock; /* mutex to protect rd_nlb/rd_nlb_del */ kmutex_t rd_lock; /* mutex to protect this data structure */ kmutex_t rd_net_lock; /* mutex to protect segment data/pointers */ kmutex_t rd_xmit_lock; /* mutex to protect xmit stuff */ struct wrsmd_dest *rd_next; /* ptrs for svc routine run queue */ } wrsmd_dest_t; _NOTE(READ_ONLY_DATA(wrsmd_dest::rd_wrsmdp)) _NOTE(READ_ONLY_DATA(wrsmd_dest::rd_rsm_addr)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_xmit_lock, wrsmd_dest::rd_queue_h)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_xmit_lock, wrsmd_dest::rd_queue_t)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_xmit_lock, wrsmd_dest::rd_queue_len)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_lxferhand)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_rbuflen)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_rbuflen)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_numrbuf)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_numrbuf)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_lbuf)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_lbuf)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_lbuflen)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_lbuflen)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_numlbufs)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_numlbufs)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_lock, wrsmd_dest::rd_bufbase)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_bufbase)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwfqw_f)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwfqw_l)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwfqw_i)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwfqw_o)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_fqw_seq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwdqw_f)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwdqw_l)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwdqw_i)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwdqw_o)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_dqw_seq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_fqr_f)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_fqr_l)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_fqr_n)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_fqr_seq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_dqr_f)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_dqr_l)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_dqr_n)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_dqr_seq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_cached_fqr_cnt)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_cached_fqr)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwfqw_errflag)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_shdwdqw_errflag)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_net_lock, wrsmd_dest::rd_stopq)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_xmit_lock, wrsmd_dest::rd_tmo_id)) _NOTE(DATA_READABLE_WITHOUT_LOCK(wrsmd_dest::rd_tmo_id)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_xmit_lock, wrsmd_dest::rd_tmo_int)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_xmit_lock, wrsmd_dest::rd_tmo_tot)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_nlb_lock, wrsmd_dest::rd_nlb)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd_dest::rd_nlb_lock, wrsmd_dest::rd_nlb_del)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_runq_lock, wrsmd_dest::rd_state)) _NOTE(SCHEME_PROTECTS_DATA("see comment below", wrsmd_dest::rd_next)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_dest_lock, wrsmd_dest::rd_dstate)) _NOTE(MUTEX_PROTECTS_DATA(wrsmd::wrsmd_dest_lock, wrsmd_dest::rd_refcnt)) /* * Run queue: * * Certain operations on destinations are performed by the driver's write * service routine (wrsmd_wsrv). In order to arrange for this, there is a * queue of destinations waiting to be processed by the service routine. * Each device's wrsmd_runq points to the head of this queue of destinations, * which are linked together via rd_next. Whenever the service routine * runs, after it has served its usual purpose of processing messages from * the stream's service queue, it traverses its list of destinations and * performs appropriate operations on them, depending on their state. * * The rd_next pointer is protected by the runq_lock everywhere but in the * middle of the service routine. Essentially, the service routine takes a * whole chain of destination entries off of the run queue at once (inside * the runq_lock), and then traverses the list (outside the runq_lock). Since * a scheduled destination should never be given a new state except by the * service routine, there should be no conflicting updates to rd_next. * * Destination states: * * A scheduled state means the destination is on the run queue; an unscheduled * state means the destination is not. State transitions are always from * scheduled to unscheduled or vice versa. * * A state with a name of the form WRSMD_STATE_S_xxx is a scheduled state where * the service routine is going to do xxx next. These states have odd numbers. * * A state with a name of the form WRSMD_STATE_W_xxx is an unscheduled state * where we are waiting for xxx to happen. These states have even numbers. */ #define WRSMD_STATE_NEW 0 /* Newly created */ #define WRSMD_STATE_INPROGRESS 1000 /* Being processed */ #define WRSMD_STATE_DELETING 2000 /* Being deleted */ #define WRSMD_STATE_W_SCONNTMO 2 /* Waiting for conn rxmit tmo */ #define WRSMD_STATE_W_ACCEPT 4 /* Waiting for accept msg */ #define WRSMD_STATE_W_ACK 6 /* Waiting for ack msg */ #define WRSMD_STATE_W_READY 8 /* Connected, wait for pkt */ #define WRSMD_STATE_W_FQE 10 /* Waiting for fqe to xmit */ #define WRSMD_STATE_S_REQ_CONNECT 1 /* Srv: send conn request */ #define WRSMD_STATE_S_NEWCONN 3 /* Srv: setup/accept new conn */ #define WRSMD_STATE_S_CONNXFER_ACCEPT 5 /* Srv: connxfer, then accept */ #define WRSMD_STATE_S_CONNXFER_ACK 7 /* Srv: connxfer, then ack */ #define WRSMD_STATE_S_XFER 9 /* Srv: xfer data */ #define WRSMD_STATE_S_DELETE 11 /* Srv: delete this dest */ #define WRSMD_STATE_S_SCONN 13 /* Srv: resend last conn */ #define WRSMD_SCHED_STATE(s) ((s) & 1) #define WRSMD_STATE_STR(x) ( \ (x == WRSMD_STATE_NEW) ? "WRSMD_STATE_NEW" : \ (x == WRSMD_STATE_INPROGRESS) ? "WRSMD_STATE_INPROGRESS" : \ (x == WRSMD_STATE_DELETING) ? "WRSMD_STATE_DELETING" : \ (x == WRSMD_STATE_W_SCONNTMO) ? "WRSMD_STATE_W_SCONNTMO" : \ (x == WRSMD_STATE_W_ACCEPT) ? "WRSMD_STATE_W_ACCEPT" : \ (x == WRSMD_STATE_W_ACK) ? "WRSMD_STATE_W_ACK" : \ (x == WRSMD_STATE_W_READY) ? "WRSMD_STATE_W_READY" : \ (x == WRSMD_STATE_W_FQE) ? "WRSMD_STATE_W_FQE" : \ (x == WRSMD_STATE_S_REQ_CONNECT) ? "WRSMD_STATE_S_REQ_CONNECT" :\ (x == WRSMD_STATE_S_NEWCONN) ? "WRSMD_STATE_S_NEWCONN" : \ (x == WRSMD_STATE_S_CONNXFER_ACCEPT) ? \ "WRSMD_STATE_S_CONNXFER_ACCEPT" : \ (x == WRSMD_STATE_S_CONNXFER_ACK) ? "WRSMD_STATE_S_CONNXFER_ACK" : \ (x == WRSMD_STATE_S_XFER) ? "WRSMD_STATE_S_XFER" : \ (x == WRSMD_STATE_S_DELETE) ? "WRSMD_STATE_S_DELETE" : \ (x == WRSMD_STATE_S_SCONN) ? "WRSMD_STATE_S_SCONN" : \ "unknown") /* * RSM driver state - basically, what segments we've created/connected. We * keep a bitmask of the ones we've done, so that when we delete a * destination we don't try and undo something we never did. Also, we * sometimes check to make sure rd_sstate is WRSMD_RSMS_ALL before trying to * perform an operation on a destination, to ensure we don't get ahead of * our initialization. */ #define WRSMD_RSMS_LXFER_C 0x01 /* Create local xfer segment */ #define WRSMD_RSMS_LXFER_P 0x02 /* Publish local xfer segment */ #define WRSMD_RSMS_RXFER_S 0x04 /* Create send queue to remote node */ #define WRSMD_RSMS_RXFER_C 0x10 /* Connect to remote xfer */ #define WRSMD_RSMS_ALL \ (WRSMD_RSMS_LXFER_C | WRSMD_RSMS_LXFER_P | WRSMD_RSMS_RXFER_S | \ WRSMD_RSMS_RXFER_C) #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _SYS_WRSMD_H_ */