summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/ena/ena.h
blob: 467da40f4b75e575a749a940ad9971d0b6097014 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2021 Oxide Computer Company
 */

#ifndef	_ENA_H
#define	_ENA_H

#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/list.h>
#include <sys/time.h>
#include <sys/modctl.h>
#include <sys/conf.h>
#include <sys/cpuvar.h>
#include <sys/pci.h>
#include <sys/sysmacros.h>
#include <sys/mac.h>
#include <sys/mac_ether.h>
#include <sys/mac_provider.h>
#include <sys/pattr.h>
#include <sys/strsun.h>
#include <sys/ethernet.h>
#include <sys/vlan.h>
#include <sys/utsname.h>
#include "ena_hw.h"

/*
 * AWS ENA Ethernet Driver
 */

#ifdef __cplusplus
extern "C" {
#endif

#define	ENA_MODULE_NAME	"ena"

/*
 * The minimum supported ENA device controller version.
 */
#define	ENA_CTRL_MAJOR_VSN_MIN		0
#define	ENA_CTRL_MINOR_VSN_MIN		0
#define	ENA_CTRL_SUBMINOR_VSN_MIN	1

#define	ENA_MODULE_VER_MAJOR	1
#define	ENA_MODULE_VER_MINOR	0
#define	ENA_MODULE_VER_SUBMINOR	0

/*
 * The Linux driver doesn't document what the specification version
 * number controls or the contract around version changes. The best we
 * can do is use the same version that they use and port version
 * changes as they come (the last one was in 2018).
 *
 * common: ENA_COMMON_SPEC_VERSION_{MAJOR,MINOR}
 */
#define	ENA_SPEC_VERSION_MAJOR	2
#define	ENA_SPEC_VERSION_MINOR	0


/* This represents BAR 0. */
#define	ENA_REG_NUMBER	1

/*
 * A sentinel value passed as argument to ena_ring_rx() to indicate
 * the Rx ring is being read in interrupt mode, not polling mode.
 */
#define	ENA_INTERRUPT_MODE	-1

#define	ENA_RX_BUF_IPHDR_ALIGNMENT	2
#define	ENA_ADMINQ_DEPTH		32
#define	ENA_AENQ_NUM_DESCS		32

/* Convert milliseconds to nanoseconds. */
#define	ENA_MS_TO_NS(ms)	((ms) * 1000000ul)

/*
 * The default amount of time we will wait for an admin command to
 * complete, specified in microseconds. In this case, 500 milliseconds.
 */
#define	ENA_ADMIN_CMD_DEF_TIMEOUT	MSEC2NSEC(500)

/*
 * Property macros.
 */
#define	ENA_PROP_RXQ_NUM_DESCS	"rx_queue_num_descs"
#define	ENA_PROP_RXQ_NUM_DESCS_MIN	64

#define	ENA_PROP_TXQ_NUM_DESCS	"tx_queue_num_descs"
#define	ENA_PROP_TXQ_NUM_DESCS_MIN	64

#define	ENA_PROP_RXQ_INTR_LIMIT	"rx_queue_intr_limit"
#define	ENA_PROP_RXQ_INTR_LIMIT_MIN	16
#define	ENA_PROP_RXQ_INTR_LIMIT_MAX	4096
#define	ENA_PROP_RXQ_INTR_LIMIT_DEF	256

#define	ENA_DMA_BIT_MASK(x)	((1ULL << (x)) - 1ULL)
#define	ENA_DMA_VERIFY_ADDR(ena, phys_addr)				\
	VERIFY3U(ENA_DMA_BIT_MASK((ena)->ena_dma_width) & (phys_addr), \
	    ==, (phys_addr))

typedef struct ena_dma_conf {
	size_t		edc_size;
	uint64_t	edc_align;
	int		edc_sgl;
	uchar_t		edc_endian;
	boolean_t	edc_stream;
} ena_dma_conf_t;

typedef struct ena_dma_buf {
	caddr_t			edb_va;
	size_t			edb_len;
	/*
	 * The length given by DMA engine, kept around for debugging
	 * purposes.
	 */
	size_t			edb_real_len;
	size_t			edb_used_len;
	ddi_acc_handle_t	edb_acc_hdl;
	ddi_dma_handle_t	edb_dma_hdl;
	const ddi_dma_cookie_t	*edb_cookie;
} ena_dma_buf_t;

/*
 * We always sync the entire range, and therefore expect success.
 */
#ifdef DEBUG
#define	ENA_DMA_SYNC(buf, flag)					\
	ASSERT0(ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
#else  /* DEBUG */
#define	ENA_DMA_SYNC(buf, flag)					\
	((void)ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
#endif

typedef struct ena_aenq_grpstr {
	enahw_aenq_groups_t	eag_type;
	const char		*eag_str;
} ena_aenq_grpstr_t;

typedef struct ena_aenq_synstr {
	enahw_aenq_syndrome_t	eas_type;
	const char		*eas_str;
} ena_aenq_synstr_t;

typedef void (*ena_aenq_hdlr_t)(void *data, enahw_aenq_desc_t *desc);

typedef struct ena_aenq {
	enahw_aenq_desc_t	*eaenq_descs;
	ena_dma_buf_t		eaenq_dma;
	ena_aenq_hdlr_t		eaenq_hdlrs[ENAHW_AENQ_GROUPS_ARR_NUM];
	uint16_t		eaenq_num_descs;
	uint16_t		eaenq_head;
	uint8_t			eaenq_phase;
} ena_aenq_t;

typedef struct ena_admin_sq {
	enahw_cmd_desc_t	*eas_entries;
	ena_dma_buf_t		eas_dma;
	uint32_t		*eas_dbaddr;
	uint16_t		eas_tail;
	uint8_t			eas_phase;
} ena_admin_sq_t;

typedef struct ena_admin_cq {
	enahw_resp_desc_t	*eac_entries;
	ena_dma_buf_t		eac_dma;
	uint16_t		eac_head;
	uint8_t			eac_phase;
} ena_admin_cq_t;

/*
 * The command context is used to track outstanding requests and match
 * them to device responses.
 */
typedef struct ena_cmd_ctx {
	list_node_t		ectx_node;

	/*
	 * The index into ea_cmd_ctxs where this ctx lives. Used as
	 * the command ID value in the command descriptor. This allows
	 * us to match a response to its associated context.
	 */
	uint16_t		ectx_id;

	/* Is the command pending? */
	boolean_t		ectx_pending;

	/* The type of command associated with this context. */
	enahw_cmd_opcode_t	ectx_cmd_opcode;

	/*
	 * The location to copy the full response to. This is
	 * specified by the caller of the command during
	 * submission.
	 */
	enahw_resp_desc_t	*ectx_resp;
} ena_cmd_ctx_t;

/*
 * The admin queue, the queue through which commands are sent to the
 * device.
 *
 * WO: Write Once (at initialization)
 *
 * In general, only a single lock needs to be held in order to access
 * the different parts of the admin queue:
 *
 *  sq_lock: Any data deailng with submitting admin commands, which
 *  includes acquiring a command context.
 *
 *  cq_lock: Any data dealing with reading command responses.
 *
 *  stat_lock: For accessing statistics.
 *
 * In some cases, the ectx_lock/stat_lock may be held in tandem with
 * either the SQ or CQ lock. In that case, the SQ/CQ lock is always
 * entered first.
 */
typedef struct ena_adminq {
	kmutex_t		ea_sq_lock;	/* WO */
	kmutex_t		ea_cq_lock;	/* WO */
	kmutex_t		ea_stat_lock;	/* WO */

	hrtime_t		ea_cmd_timeout_ns; /* WO */

	uint16_t		ea_qlen;	/* WO */
	boolean_t		ea_poll_mode;	/* WO */

	ena_cmd_ctx_t		*ea_cmd_ctxs;	  /* WO */
	list_t			ea_cmd_ctxs_free; /* ea_sq_lock */
	uint16_t		ea_pending_cmds; /* ea_sq_lock */
	ena_admin_sq_t		ea_sq; /* eq_sq_lock */
	ena_admin_cq_t		ea_cq; /* eq_cq_lock */

	/* ea_stat_lock */
	struct ena_adminq_stats {
		uint64_t cmds_fail;
		uint64_t cmds_submitted;
		uint64_t cmds_success;
		uint64_t queue_full;
	} ea_stats;
} ena_adminq_t;

typedef enum ena_attach_seq {
	ENA_ATTACH_PCI = 1,	 /* PCI config space */
	ENA_ATTACH_REGS,	 /* BAR mapping */
	ENA_ATTACH_DEV_INIT,	 /* ENA device initialization */
	ENA_ATTACH_READ_CONF,	 /* Read driver conf file */
	ENA_ATTACH_DEV_CFG,	 /* Set any needed device config */
	ENA_ATTACH_INTR_ALLOC,	 /* interrupt handles allocated */
	ENA_ATTACH_INTR_HDLRS,	 /* intr handlers set */
	ENA_ATTACH_TXQS_ALLOC,	 /* Tx Queues allocated */
	ENA_ATTACH_RXQS_ALLOC,	 /* Tx Queues allocated */
	ENA_ATTACH_MAC_REGISTER, /* registered with mac */
	ENA_ATTACH_INTRS_ENABLE, /* interrupts are enabled */
	ENA_ATTACH_END
} ena_attach_seq_t;

#define	ENA_ATTACH_SEQ_FIRST	(ENA_ATTACH_PCI)
#define	ENA_ATTACH_NUM_ENTRIES	(ENA_ATTACH_END - 1)

struct ena;
typedef boolean_t (*ena_attach_fn_t)(struct ena *);
typedef void (*ena_cleanup_fn_t)(struct ena *);

typedef struct ena_attach_desc {
	ena_attach_seq_t ead_seq;
	const char *ead_name;
	ena_attach_fn_t ead_attach_fn;
	boolean_t ead_attach_hard_fail;
	ena_cleanup_fn_t ead_cleanup_fn;
} ena_attach_desc_t;

typedef enum {
	ENA_TCB_NONE,
	ENA_TCB_COPY
} ena_tcb_type_t;

/*
 * The TCB is used to track information relating to the Tx of a
 * packet. At the moment we support copy only.
 */
typedef struct ena_tx_control_block {
	mblk_t		*etcb_mp;
	ena_tcb_type_t	etcb_type;
	ena_dma_buf_t	etcb_dma;
} ena_tx_control_block_t;

typedef enum ena_txq_state {
	ENA_TXQ_STATE_NONE		= 0,
	ENA_TXQ_STATE_HOST_ALLOC	= 1 << 0,
	ENA_TXQ_STATE_CQ_CREATED	= 1 << 1,
	ENA_TXQ_STATE_SQ_CREATED	= 1 << 2,
	ENA_TXQ_STATE_READY		= 1 << 3, /* TxQ ready and waiting */
	ENA_TXQ_STATE_RUNNING		= 1 << 4, /* intrs enabled */
} ena_txq_state_t;

typedef struct ena_txq_stat {
	/* Number of times mac_ether_offload_info() has failed. */
	kstat_named_t	ets_hck_meoifail;

	/*
	 * Total number of times the ring was blocked due to
	 * insufficient descriptors, or unblocked due to recycling
	 * descriptors.
	 */
	kstat_named_t	ets_blocked;
	kstat_named_t	ets_unblocked;

	/* The total number descriptors that have been recycled. */
	kstat_named_t	ets_recycled;

	/*
	 * Number of bytes and packets that have been _submitted_ to
	 * the device.
	 */
	kstat_named_t	ets_bytes;
	kstat_named_t	ets_packets;
} ena_txq_stat_t;

/*
 * A transmit queue, made up of a Submission Queue (SQ) and Completion
 * Queue (CQ) to form a logical descriptor ring for sending packets.
 *
 * Write Once (WO)
 *
 *   This value is written once, before the datapath is activated, in
 *   a function which is controlled by mac(9E). Some values may be
 *   written earlier, during ena attach, like et_ena and
 *   et_sq_num_descs.
 *
 * Tx Mutex (TM) -- et_lock
 *
 *   This value is protected by the Tx queue's mutex. Some values may
 *   be initialized in a WO path, but also continually updated as part
 *   of normal datapath operation, such as et_sq_avail_descs. These
 *   values need mutex protection.
 */
typedef struct ena_txq {
	kmutex_t		et_lock; /* WO */

	struct ena		*et_ena; /* WO */
	uint_t			et_txqs_idx; /* WO */
	mac_ring_handle_t	et_mrh;	 /* WO */
	uint64_t		et_m_gen_num; /* TM */
	ena_txq_state_t		et_state; /* WO */
	uint16_t		et_intr_vector; /* WO */

	enahw_tx_desc_t		*et_sq_descs; /* TM */
	ena_dma_buf_t		et_sq_dma;    /* WO */

	/* Is the Tx queue currently in a blocked state? */
	boolean_t		et_blocked; /* TM */

	/*
	 * The number of descriptors owned by this ring. This value
	 * never changes after initialization.
	 */
	uint16_t		et_sq_num_descs;   /* WO */

	/*
	 * The number of descriptors currently available for Tx
	 * submission. When this value reaches zero the ring must
	 * block until device notifies us of freed descriptors.
	 */
	uint16_t		et_sq_avail_descs; /* TM */

	/*
	 * The current tail index of the queue (the first free
	 * descriptor for host Tx submission). After initialization,
	 * this value only increments, relying on unsigned wrap
	 * around. The ENA device seems to expect this behavior,
	 * performing its own modulo on the value for the purposes of
	 * indexing, much like the driver code needs to do in order to
	 * access the proper TCB entry.
	 */
	uint16_t		et_sq_tail_idx;  /* TM */

	/*
	 * The phase is used to know which CQ descriptors may be
	 * reclaimed. This is explained further in ena.c.
	 */
	uint16_t		et_sq_phase; /* TM */
	uint16_t		et_sq_hw_idx; /* WO */

	/*
	 * The "doorbell" address is how the host indicates to the
	 * device which descriptors are ready for Tx processing.
	 */
	uint32_t		*et_sq_db_addr; /* WO */

	/*
	 * The TCBs track host Tx information, like a pointer to the
	 * mblk being submitted. Currently we maintain a 1:1 mapping
	 * of SQ descriptors to TCBs as Tx is copy only.
	 */
	ena_tx_control_block_t	*et_tcbs;    /* TM */

	enahw_tx_cdesc_t	*et_cq_descs; /* TM */
	ena_dma_buf_t		et_cq_dma;    /* WO */
	uint16_t		et_cq_num_descs; /* WO */
	uint16_t		et_cq_head_idx; /* TM */
	uint16_t		et_cq_phase;	/* TM */
	uint16_t		et_cq_hw_idx;	/* WO */

	/*
	 * This address is used to control the CQ interrupts.
	 */
	uint32_t		*et_cq_unmask_addr; /* WO */
	uint32_t		*et_cq_head_db_addr; /* WO (currently unused) */
	uint32_t		*et_cq_numa_addr;    /* WO (currently unused) */

	/*
	 * This mutex protects the Tx queue stats. This mutex may be
	 * entered while et_lock is held, but et_lock is not required
	 * to access/modify the stats. However, if both locks are
	 * held, then et_lock must be entered first.
	 */
	kmutex_t		et_stat_lock;
	ena_txq_stat_t		et_stat;
	kstat_t			*et_kstat;
} ena_txq_t;

typedef enum ena_rxq_state {
	ENA_RXQ_STATE_NONE		= 0,
	ENA_RXQ_STATE_HOST_ALLOC	= 1 << 0,
	ENA_RXQ_STATE_CQ_CREATED	= 1 << 1,
	ENA_RXQ_STATE_SQ_CREATED	= 1 << 2,
	ENA_RXQ_STATE_READY		= 1 << 3, /* RxQ ready and waiting */
	ENA_RXQ_STATE_RUNNING		= 1 << 4, /* intrs enabled */
} ena_rxq_state_t;

typedef struct ena_rx_ctrl_block {
	ena_dma_buf_t	ercb_dma;
	uint8_t		ercb_offset;
	uint16_t	ercb_length;
} ena_rx_ctrl_block_t;

typedef enum {
	ENA_RXQ_MODE_POLLING	= 1,
	ENA_RXQ_MODE_INTR	= 2,
} ena_rxq_mode_t;

typedef struct ena_rxq_stat_t {
	/* The total number of packets/bytes received on this queue. */
	kstat_named_t	ers_packets;
	kstat_named_t	ers_bytes;

	/*
	 * At this time we expect all incoming frames to fit in a
	 * single buffer/descriptor. In some rare event that the
	 * device doesn't cooperate this stat is incremented.
	 */
	kstat_named_t	ers_multi_desc;

	/*
	 * The total number of times we failed to allocate a new mblk
	 * for an incoming frame.
	 */
	kstat_named_t	ers_allocb_fail;

	/*
	 * The total number of times the Rx interrupt handler reached
	 * its maximum limit for number of packets to process in a
	 * single interrupt. If you see this number increase
	 * continuously at a steady rate, then it may be an indication
	 * the driver is not entering polling mode.
	 */
	kstat_named_t	ers_intr_limit;

	/*
	 * The total number of times the device detected an incorrect
	 * IPv4 header checksum.
	 */
	kstat_named_t	ers_hck_ipv4_err;

	/*
	 * The total number of times the device detected an incorrect
	 * L4/ULP checksum.
	 */
	kstat_named_t	ers_hck_l4_err;
} ena_rxq_stat_t;

/*
 * A receive queue, made up of a Submission Queue (SQ) and Completion
 * Queue (CQ) to form a logical descriptor ring for receiving packets.
 *
 * Write Once (WO)
 *
 *   This value is written once, before the datapath is activated, in
 *   a function which is controlled by mac(9E).
 *
 * Rx Mutex (RM) -- er_lock
 *
 *   This value is protected by the Rx queue's mutex. Some values may
 *   be initialized in a WO path, but also continually updated as part
 *   of normal datapath operation, such as er_sq_avail_descs. These
 *   values need mutex protection.
 */
typedef struct ena_rxq {
	kmutex_t		er_lock;

	struct ena		*er_ena; /* WO */
	uint_t			er_rxqs_idx; /* WO */
	mac_ring_handle_t	er_mrh;	 /* WO */
	uint64_t		er_m_gen_num; /* WO */
	ena_rxq_state_t		er_state; /* WO */
	uint16_t		er_intr_vector; /* WO */
	ena_rxq_mode_t		er_mode;	/* RM */
	uint16_t		er_intr_limit;	/* RM */

	enahw_rx_desc_t		*er_sq_descs; /* RM */
	ena_dma_buf_t		er_sq_dma;    /* WO */
	uint16_t		er_sq_num_descs;   /* WO */
	uint16_t		er_sq_avail_descs; /* RM */
	uint16_t		er_sq_tail_idx;  /* RM */
	uint16_t		er_sq_phase; /* RM */
	uint16_t		er_sq_hw_idx;	/* WO */
	uint32_t		*er_sq_db_addr; /* WO */

	enahw_rx_cdesc_t	*er_cq_descs; /* RM */
	ena_dma_buf_t		er_cq_dma;    /* WO */
	uint16_t		er_cq_num_descs; /* WO */
	uint16_t		er_cq_head_idx;	 /* RM */
	uint16_t		er_cq_phase;	 /* RM */
	uint16_t		er_cq_hw_idx;	 /* WO */
	uint32_t		*er_cq_unmask_addr; /* WO */
	uint32_t		*er_cq_head_db_addr; /* WO (currently unused) */
	uint32_t		*er_cq_numa_addr;    /* WO (currently unused) */

	ena_rx_ctrl_block_t	*er_rcbs; /* RM */

	kmutex_t		er_stat_lock;
	ena_rxq_stat_t		er_stat;
	kstat_t			*er_kstat;
} ena_rxq_t;

/* These are stats based off of enahw_resp_basic_stats_t. */
typedef struct ena_basic_stat {
	kstat_named_t	ebs_tx_bytes;
	kstat_named_t	ebs_tx_pkts;
	kstat_named_t	ebs_tx_drops;

	kstat_named_t	ebs_rx_bytes;
	kstat_named_t	ebs_rx_pkts;
	kstat_named_t	ebs_rx_drops;
} ena_basic_stat_t;

/* These are stats based off of enahw_resp_eni_stats_t. */
typedef struct ena_extended_stat {
	kstat_named_t	ees_bw_in_exceeded;
	kstat_named_t	ees_bw_out_exceeded;
	kstat_named_t	ees_pps_exceeded;
	kstat_named_t	ees_conns_exceeded;
	kstat_named_t	ees_linklocal_exceeded;
} ena_extended_stat_t;

/* These stats monitor which AENQ handlers have been called. */
typedef struct ena_aenq_stat {
	kstat_named_t	eaes_default;
	kstat_named_t	eaes_link_change;
} ena_aenq_stat_t;

#define	ENA_STATE_PRIMORDIAL	0x1u
#define	ENA_STATE_RUNNING	0x2u

/*
 * This structure contains the per-instance (PF of VF) state of the
 * device.
 */
typedef struct ena {
	dev_info_t		*ena_dip;
	int			ena_instance;

	/*
	 * Global lock, used to synchronize administration changes to
	 * the ena_t. This lock should not be held in the datapath.
	 */
	kmutex_t		ena_lock;
	ena_attach_seq_t	ena_attach_seq;

	/*
	 * We use atomic ops for ena_state so that datapath consumers
	 * do not need to enter ena_lock.
	 */
	uint32_t		ena_state;

	/*
	 * PCI config space and BAR handle.
	 */
	ddi_acc_handle_t	ena_pci_hdl;
	off_t			ena_reg_size;
	caddr_t			ena_reg_base;
	ddi_device_acc_attr_t	ena_reg_attr;
	ddi_acc_handle_t	ena_reg_hdl;

	/*
	 * Vendor information.
	 */
	uint16_t		ena_pci_vid;
	uint16_t		ena_pci_did;
	uint8_t			ena_pci_rev;
	uint16_t		ena_pci_svid;
	uint16_t		ena_pci_sdid;

	/*
	 * Device and controller versions.
	 */
	uint32_t		ena_dev_major_vsn;
	uint32_t		ena_dev_minor_vsn;
	uint32_t		ena_ctrl_major_vsn;
	uint32_t		ena_ctrl_minor_vsn;
	uint32_t		ena_ctrl_subminor_vsn;
	uint32_t		ena_ctrl_impl_id;

	/*
	 * Interrupts
	 */
	int			ena_num_intrs;
	ddi_intr_handle_t	*ena_intr_handles;
	size_t			ena_intr_handles_sz;
	int			ena_intr_caps;
	uint_t			ena_intr_pri;

	mac_handle_t		ena_mh;

	size_t			ena_page_sz;

	/*
	 * The MTU and data layer frame sizes.
	 */
	uint32_t		ena_mtu;
	uint32_t		ena_max_frame_hdr;
	uint32_t		ena_max_frame_total;

	/* The size (in bytes) of the Rx/Tx data buffers. */
	uint32_t		ena_tx_buf_sz;
	uint32_t		ena_rx_buf_sz;

	/*
	 * The maximum number of Scatter Gather List segments the
	 * device can address.
	 */
	uint8_t			ena_tx_sgl_max_sz;
	uint8_t			ena_rx_sgl_max_sz;

	/* The number of descriptors per Rx/Tx queue. */
	uint16_t		ena_rxq_num_descs;
	uint16_t		ena_txq_num_descs;

	/*
	 * The maximum number of frames which may be read per Rx
	 * interrupt.
	 */
	uint16_t		ena_rxq_intr_limit;

	/* The Rx/Tx data queues (rings). */
	ena_rxq_t		*ena_rxqs;
	uint16_t		ena_num_rxqs;
	ena_txq_t		*ena_txqs;
	uint16_t		ena_num_txqs;

	/* These statistics are device-wide. */
	kstat_t			*ena_device_basic_kstat;
	kstat_t			*ena_device_extended_kstat;

	/*
	 * This tracks AENQ-related stats, it is implicitly
	 * device-wide.
	 */
	ena_aenq_stat_t		ena_aenq_stat;
	kstat_t			*ena_aenq_kstat;

	/*
	 * The Admin Queue, through which call device commands are
	 * sent.
	 */
	ena_adminq_t		ena_aq;

	ena_aenq_t		ena_aenq;
	ena_dma_buf_t		ena_host_info;

	/*
	 * Hardware info
	 */
	uint32_t		ena_supported_features;
	uint8_t			ena_dma_width;
	boolean_t		ena_link_up;
	boolean_t		ena_link_autoneg;
	boolean_t		ena_link_full_duplex;
	link_duplex_t		ena_link_duplex;
	uint64_t		ena_link_speed_mbits;
	enahw_link_speeds_t	ena_link_speeds;
	link_state_t		ena_link_state;
	uint32_t		ena_aenq_supported_groups;
	uint32_t		ena_aenq_enabled_groups;

	uint32_t		ena_tx_max_sq_num;
	uint32_t		ena_tx_max_sq_num_descs;
	uint32_t		ena_tx_max_cq_num;
	uint32_t		ena_tx_max_cq_num_descs;
	uint16_t		ena_tx_max_desc_per_pkt;
	uint32_t		ena_tx_max_hdr_len;

	uint32_t		ena_rx_max_sq_num;
	uint32_t		ena_rx_max_sq_num_descs;
	uint32_t		ena_rx_max_cq_num;
	uint32_t		ena_rx_max_cq_num_descs;
	uint16_t		ena_rx_max_desc_per_pkt;

	/* This is calculated from the Rx/Tx queue nums. */
	uint16_t		ena_max_io_queues;

	/* Hardware Offloads */
	boolean_t		ena_tx_l3_ipv4_csum;

	boolean_t		ena_tx_l4_ipv4_part_csum;
	boolean_t		ena_tx_l4_ipv4_full_csum;
	boolean_t		ena_tx_l4_ipv4_lso;

	boolean_t		ena_tx_l4_ipv6_part_csum;
	boolean_t		ena_tx_l4_ipv6_full_csum;
	boolean_t		ena_tx_l4_ipv6_lso;

	boolean_t		ena_rx_l3_ipv4_csum;
	boolean_t		ena_rx_l4_ipv4_csum;
	boolean_t		ena_rx_l4_ipv6_csum;
	boolean_t		ena_rx_hash;

	uint32_t		ena_max_mtu;
	uint8_t			ena_mac_addr[ETHERADDRL];
} ena_t;

/*
 * Logging functions.
 */
/*PRINTFLIKE2*/
extern void ena_err(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
/*PRINTFLIKE2*/
extern void ena_dbg(const ena_t *, const char *, ...) __KPRINTFLIKE(2);

extern uint32_t ena_hw_bar_read32(const ena_t *, const uint16_t);
extern uint32_t ena_hw_abs_read32(const ena_t *, uint32_t *);
extern void ena_hw_bar_write32(const ena_t *, const uint16_t, const uint32_t);
extern void ena_hw_abs_write32(const ena_t *, uint32_t *, const uint32_t);

/*
 * Stats
 */
extern void ena_stat_device_basic_cleanup(ena_t *);
extern boolean_t ena_stat_device_basic_init(ena_t *);

extern void ena_stat_device_extended_cleanup(ena_t *);
extern boolean_t ena_stat_device_extended_init(ena_t *);

extern void ena_stat_aenq_cleanup(ena_t *);
extern boolean_t ena_stat_aenq_init(ena_t *);

extern void ena_stat_rxq_cleanup(ena_rxq_t *);
extern boolean_t ena_stat_rxq_init(ena_rxq_t *);
extern void ena_stat_txq_cleanup(ena_txq_t *);
extern boolean_t ena_stat_txq_init(ena_txq_t *);

/*
 * DMA
 */
extern boolean_t ena_dma_alloc(ena_t *, ena_dma_buf_t *, ena_dma_conf_t *,
    size_t);
extern void ena_dma_free(ena_dma_buf_t *);
extern void ena_set_dma_addr(const ena_t *, const uint64_t, enahw_addr_t *);
extern void ena_set_dma_addr_values(const ena_t *, const uint64_t, uint32_t *,
    uint16_t *);

/*
 * Interrupts
 */
extern boolean_t ena_intr_add_handlers(ena_t *);
extern void ena_intr_remove_handlers(ena_t *);
extern void ena_tx_intr_work(ena_txq_t *);
extern void ena_rx_intr_work(ena_rxq_t *);
extern void ena_aenq_work(ena_t *);
extern boolean_t ena_intrs_disable(ena_t *);
extern boolean_t ena_intrs_enable(ena_t *);

/*
 * MAC
 */
extern boolean_t ena_mac_register(ena_t *);
extern int ena_mac_unregister(ena_t *);
extern void ena_ring_tx_stop(mac_ring_driver_t);
extern int ena_ring_tx_start(mac_ring_driver_t, uint64_t);
extern mblk_t *ena_ring_tx(void *, mblk_t *);
extern void ena_ring_rx_stop(mac_ring_driver_t);
extern int ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num);
extern int ena_m_stat(void *, uint_t, uint64_t *);
extern mblk_t *ena_ring_rx_poll(void *, int);
extern int ena_ring_rx_stat(mac_ring_driver_t, uint_t, uint64_t *);
extern int ena_ring_tx_stat(mac_ring_driver_t, uint_t, uint64_t *);

/*
 * Admin API
 */
extern int ena_admin_submit_cmd(ena_t *, enahw_cmd_desc_t *,
    enahw_resp_desc_t *, ena_cmd_ctx_t **);
extern int ena_admin_poll_for_resp(ena_t *, ena_cmd_ctx_t *);
extern void ena_free_host_info(ena_t *);
extern boolean_t ena_init_host_info(ena_t *);
extern int ena_create_cq(ena_t *, uint16_t, uint64_t, boolean_t, uint32_t,
    uint16_t *, uint32_t **, uint32_t **, uint32_t **);
extern int ena_destroy_cq(ena_t *, uint16_t);
extern int ena_create_sq(ena_t *, uint16_t, uint64_t, boolean_t, uint16_t,
    uint16_t *, uint32_t **);
extern int ena_destroy_sq(ena_t *, uint16_t, boolean_t);
extern int ena_set_feature(ena_t *, enahw_cmd_desc_t *,
    enahw_resp_desc_t *, const enahw_feature_id_t, const uint8_t);
extern int ena_get_feature(ena_t *, enahw_resp_desc_t *,
    const enahw_feature_id_t, const uint8_t);
extern int ena_admin_get_basic_stats(ena_t *, enahw_resp_desc_t *);
extern int ena_admin_get_eni_stats(ena_t *, enahw_resp_desc_t *);
extern int enahw_resp_status_to_errno(ena_t *, enahw_resp_status_t);

/*
 * Rx/Tx allocations
 */
extern boolean_t ena_alloc_rxq(ena_rxq_t *);
extern void ena_cleanup_rxq(ena_rxq_t *);
extern boolean_t ena_alloc_txq(ena_txq_t *);
extern void ena_cleanup_txq(ena_txq_t *);

extern ena_aenq_grpstr_t ena_groups_str[];

#ifdef __cplusplus
}
#endif

#endif	/* _ENA_H */