1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
|
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
* Copyright 2019 Joyent, Inc.
* Copyright 2017 Tegile Systems, Inc. All rights reserved.
*/
/*
* -------------------------
* Interrupt Handling Theory
* -------------------------
*
* There are a couple different sets of interrupts that we need to worry about:
*
* - Interrupts from receive queues
* - Interrupts from transmit queues
* - 'Other Interrupts', such as the administrative queue
*
* 'Other Interrupts' are asynchronous events such as a link status change event
* being posted to the administrative queue, unrecoverable ECC errors, and more.
* If we have something being posted to the administrative queue, then we go
* through and process it, because it's generally enabled as a separate logical
* interrupt. Note, we may need to do more here eventually. To re-enable the
* interrupts from the 'Other Interrupts' section, we need to clear the PBA and
* write ENA to PFINT_ICR0.
*
* Interrupts from the transmit and receive queues indicates that our requests
* have been processed. In the rx case, it means that we have data that we
* should take a look at and send up the stack. In the tx case, it means that
* data which we got from MAC has now been sent out on the wire and we can free
* the associated data. Most of the logic for acting upon the presence of this
* data can be found in i40e_transciever.c which handles all of the DMA, rx, and
* tx operations. This file is dedicated to handling and dealing with interrupt
* processing.
*
* All devices supported by this driver support three kinds of interrupts:
*
* o Extended Message Signaled Interrupts (MSI-X)
* o Message Signaled Interrupts (MSI)
* o Legacy PCI interrupts (INTx)
*
* Generally speaking the hardware logically handles MSI and INTx the same and
* restricts us to only using a single interrupt, which isn't the interesting
* case. With MSI-X available, each physical function of the device provides the
* opportunity for multiple interrupts which is what we'll focus on.
*
* --------------------
* Interrupt Management
* --------------------
*
* By default, the admin queue, which consists of the asynchronous other
* interrupts is always bound to MSI-X vector zero. Next, we spread out all of
* the other interrupts that we have available to us over the remaining
* interrupt vectors.
*
* This means that there may be multiple queues, both tx and rx, which are
* mapped to the same interrupt. When the interrupt fires, we'll have to check
* all of them for servicing, before we go through and indicate that the
* interrupt is claimed.
*
* The hardware provides the means of mapping various queues to MSI-X interrupts
* by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These
* registers can also be used to enable and disable whether or not the queue is
* a source of interrupts. As part of this, the hardware requires that we
* maintain a linked list of queues for each interrupt vector. While it may seem
* like this is only there for the purproses of ITRs, that's not the case. The
* first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each
* queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
* register.
*
* Finally, the individual interrupt vector itself has the ability to be enabled
* and disabled. The overall interrupt is controlled through the
* I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
* as a whole.
*
* Note that this means that both the individual queue and the interrupt as a
* whole can be toggled and re-enabled.
*
* -------------------
* Non-MSIX Management
* -------------------
*
* We may have a case where the Operating System is unable to actually allocate
* any MSI-X to the system. In such a world, there is only one transmit/receive
* queue pair and it is bound to the same interrupt with index zero. The
* hardware doesn't allow us access to additional interrupt vectors in these
* modes. Note that technically we could support more transmit/receive queues if
* we wanted.
*
* In this world, because the interrupts for the admin queue and traffic are
* mixed together, we have to consult ICR0 to determine what has occurred. The
* QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which
* allows us to set a specific bit in ICR0. There are up to seven such bits;
* however, we only use the bit 0 and 1 for the rx and tx queue respectively.
* These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and
* I40E_INTR_NOTX_{R|T}X_MASK registers respectively.
*
* Unfortunately, these corresponding queue bits have no corresponding entry in
* the ICR0_ENA register. So instead, when enabling interrupts on the queues, we
* end up enabling it on the queue registers rather than on the MSI-X registers.
* In the MSI-X world, because they can be enabled and disabled, this is
* different and the queues can always be enabled and disabled, but the
* interrupts themselves are toggled (ignoring the question of interrupt
* blanking for polling on rings).
*
* Finally, we still have to set up the interrupt linked list, but the list is
* instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to
* one of the other MSI-X registers.
*
* --------------------
* Interrupt Moderation
* --------------------
*
* The XL710 hardware has three different interrupt moderation registers per
* interrupt. Unsurprisingly, we use these for:
*
* o RX interrupts
* o TX interrupts
* o 'Other interrupts' (link status change, admin queue, etc.)
*
* By default, we throttle 'other interrupts' the most, then TX interrupts, and
* then RX interrupts. The default values for these were based on trying to
* reason about both the importance and frequency of events. Generally speaking
* 'other interrupts' are not very frequent and they're not important for the
* I/O data path in and of itself (though they may indicate issues with the I/O
* data path).
*
* On the flip side, when we're not polling, RX interrupts are very important.
* The longer we wait for them, the more latency that we inject into the system.
* However, if we allow interrupts to occur too frequently, we risk a few
* problems:
*
* 1) Abusing system resources. Without proper interrupt blanking and polling,
* we can see upwards of 200k-300k interrupts per second on the system.
*
* 2) Not enough data coalescing to enable polling. In other words, the more
* data that we allow to build up, the more likely we'll be able to enable
* polling mode and allowing us to better handle bulk data.
*
* In-between the 'other interrupts' and the TX interrupts we have the
* reclamation of TX buffers. This operation is not quite as important as we
* generally size the ring large enough that we should be able to reclaim a
* substantial amount of the descriptors that we have used per interrupt. So
* while it's important that this interrupt occur, we don't necessarily need it
* firing as frequently as RX; it doesn't, on its own, induce additional latency
* into the system.
*
* Based on all this we currently assign static ITR values for the system. While
* we could move to a dynamic system (the hardware supports that), we'd want to
* make sure that we're seeing problems from this that we believe would be
* generally helped by the added complexity.
*
* Based on this, the default values that we have allow for the following
* interrupt thresholds:
*
* o 20k interrupts/s for RX
* o 5k interrupts/s for TX
* o 2k interupts/s for 'Other Interrupts'
*/
#include "i40e_sw.h"
#define I40E_INTR_NOTX_QUEUE 0
#define I40E_INTR_NOTX_INTR 0
#define I40E_INTR_NOTX_RX_QUEUE 0
#define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
#define I40E_INTR_NOTX_TX_QUEUE 1
#define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
void
i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
{
int i;
i40e_hw_t *hw = &i40e->i40e_hw_space;
VERIFY3U(val, <=, I40E_MAX_ITR);
VERIFY3U(itr, <, I40E_ITR_INDEX_NONE);
/*
* No matter the interrupt mode, the ITR for other interrupts is always
* on interrupt zero and the same is true if we're not using MSI-X.
*/
if (itr == I40E_ITR_INDEX_OTHER ||
i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val);
return;
}
for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val);
}
}
/*
* Re-enable the adminq. Note that the adminq doesn't have a traditional queue
* associated with it from an interrupt perspective and just lives on ICR0.
* However when MSI-X interrupts are not being used, then this also enables and
* disables those interrupts.
*/
static void
i40e_intr_adminq_enable(i40e_t *i40e)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
(I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
i40e_flush(hw);
}
static void
i40e_intr_adminq_disable(i40e_t *i40e)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
}
/*
* The next two functions enable/disable the reception of interrupts
* on the given vector. Only vectors 1..N are programmed by these
* functions; vector 0 is special and handled by a different register.
* We must subtract one from the vector because i40e implicitly adds
* one to the vector value. See section 10.2.2.10.13 for more details.
*/
static void
i40e_intr_io_enable(i40e_t *i40e, int vector)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
ASSERT3S(vector, >, 0);
reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
}
static void
i40e_intr_io_disable(i40e_t *i40e, int vector)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
ASSERT3S(vector, >, 0);
reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
}
/*
* When MSI-X interrupts are being used, then we can enable the actual
* interrupts themselves. However, when they are not, we instead have to turn
* towards the queue's CAUSE_ENA bit and enable that.
*/
void
i40e_intr_io_enable_all(i40e_t *i40e)
{
if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
int i;
for (i = 1; i < i40e->i40e_intr_count; i++) {
i40e_intr_io_enable(i40e, i);
}
} else {
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
}
}
/*
* When MSI-X interrupts are being used, then we can disable the actual
* interrupts themselves. However, when they are not, we instead have to turn
* towards the queue's CAUSE_ENA bit and disable that.
*/
void
i40e_intr_io_disable_all(i40e_t *i40e)
{
if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
int i;
for (i = 1; i < i40e->i40e_intr_count; i++) {
i40e_intr_io_disable(i40e, i);
}
} else {
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
}
}
/*
* As part of disabling the tx and rx queue's we're technically supposed to
* remove the linked list entries. The simplest way is to clear the LNKLSTN
* register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF).
*
* Note all of the FM register access checks are performed by the caller.
*/
void
i40e_intr_io_clear_cause(i40e_t *i40e)
{
uint32_t i;
i40e_hw_t *hw = &i40e->i40e_hw_space;
if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
uint32_t reg;
reg = I40E_QUEUE_TYPE_EOL;
I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
return;
}
for (i = 0; i < i40e->i40e_intr_count - 1; i++) {
uint32_t reg;
reg = I40E_QUEUE_TYPE_EOL;
I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
}
i40e_flush(hw);
}
/*
* Finalize interrupt handling. Mostly this disables the admin queue.
*/
void
i40e_intr_chip_fini(i40e_t *i40e)
{
#ifdef DEBUG
int i;
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
/*
* Take a look and verify that all other interrupts have been disabled
* and the interrupt linked lists have been zeroed.
*/
if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
for (i = 0; i < i40e->i40e_intr_count - 1; i++) {
reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
}
}
#endif
i40e_intr_adminq_disable(i40e);
}
/*
* Set the head of the interrupt linked list. The PFINT_LNKLSTN[N]
* register actually refers to the 'N + 1' interrupt vector. E.g.,
* PFINT_LNKLSTN[0] refers to interrupt vector 1.
*/
static void
i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg);
DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg);
}
/*
* Set the QINT_RQCTL[queue] register. The next queue is always the Tx
* queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the
* vector should be the actual vector this queue is on -- i.e., it
* should be equal to itrq_rx_intrvec.
*/
static void
i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec);
reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
(I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
(queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
I40E_QINT_RQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg);
}
/*
* Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is
* either the Rx queue of another TRQP, or EOL.
*/
static void
i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue)
{
uint32_t reg;
i40e_hw_t *hw = &i40e->i40e_hw_space;
ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec);
reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
(I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
(next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
I40E_QINT_TQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg);
DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg);
}
/*
* Program the interrupt linked list. Each vector has a linked list of
* queues which act as event sources for that vector. When one of
* those sources has an event the associated interrupt vector is
* fired. This mapping must match the mapping found in
* i40e_map_intrs_to_vectors().
*
* See section 7.5.3 for more information about the configuration of
* the interrupt linked list.
*/
static void
i40e_intr_init_queue_msix(i40e_t *i40e)
{
uint_t intr_count;
/*
* The 0th vector is for 'Other Interrupts' only (subject to
* change in the future).
*/
intr_count = i40e->i40e_intr_count - 1;
for (uint_t vec = 0; vec < intr_count; vec++) {
boolean_t head = B_TRUE;
for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs;
qidx += intr_count) {
uint_t next_qidx = qidx + intr_count;
next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ?
I40E_QUEUE_TYPE_EOL : next_qidx;
if (head) {
i40e_set_lnklstn(i40e, vec, qidx);
head = B_FALSE;
}
i40e_set_rqctl(i40e, vec + 1, qidx);
i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx);
}
}
}
/*
* Set up a single queue to share the admin queue interrupt in the non-MSI-X
* world. Note we do not enable the queue as an interrupt cause at this time. We
* don't have any other vector of control here, unlike with the MSI-X interrupt
* case.
*/
static void
i40e_intr_init_queue_shared(i40e_t *i40e)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
(I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
(I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) |
(I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
(I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
(I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) |
(I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
(I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
}
/*
* Enable the specified queue as a valid source of interrupts. Note, this should
* only be used as part of the GLDv3's interrupt blanking routines. The debug
* build assertions are specific to that.
*/
void
i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq)
{
uint32_t reg;
uint_t queue = itrq->itrq_index;
i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK);
reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
}
/*
* Disable the specified queue as a valid source of interrupts. Note, this
* should only be used as part of the GLDv3's interrupt blanking routines. The
* debug build assertions are specific to that.
*/
void
i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq)
{
uint32_t reg;
uint_t queue = itrq->itrq_index;
i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==,
I40E_QINT_RQCTL_CAUSE_ENA_MASK);
reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
}
/*
* Start up the various chip's interrupt handling. We not only configure the
* adminq here, but we also go through and configure all of the actual queues,
* the interrupt linked lists, and others.
*/
void
i40e_intr_chip_init(i40e_t *i40e)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
/*
* Ensure that all non adminq interrupts are disabled at the chip level.
*/
i40e_intr_io_disable_all(i40e);
I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0);
(void) I40E_READ_REG(hw, I40E_PFINT_ICR0);
/*
* Always enable all of the other-class interrupts to be on their own
* ITR. This only needs to be set on interrupt zero, which has its own
* special setting.
*/
reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT;
I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg);
/*
* Enable interrupt types we expect to receive. At the moment, this
* is limited to the adminq; however, we'll want to review 11.2.2.9.22
* for more types here as we add support for detecting them, handling
* them, and resetting the device as appropriate.
*/
reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
/*
* Always set the interrupt linked list to empty. We'll come back and
* change this if MSI-X are actually on the scene.
*/
I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL);
i40e_intr_adminq_enable(i40e);
/*
* Set up all of the queues and map them to interrupts based on the bit
* assignments.
*/
if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
i40e_intr_init_queue_msix(i40e);
} else {
i40e_intr_init_queue_shared(i40e);
}
/*
* Finally set all of the default ITRs for the interrupts. Note that the
* queues will have been set up above.
*/
i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr);
}
static void
i40e_intr_adminq_work(i40e_t *i40e)
{
struct i40e_hw *hw = &i40e->i40e_hw_space;
struct i40e_arq_event_info evt;
uint16_t remain = 1;
bzero(&evt, sizeof (struct i40e_arq_event_info));
evt.buf_len = I40E_ADMINQ_BUFSZ;
evt.msg_buf = i40e->i40e_aqbuf;
while (remain != 0) {
enum i40e_status_code ret;
uint16_t opcode;
/*
* At the moment, the only error code that seems to be returned
* is one saying that there's no work. In such a case we leave
* this be.
*/
ret = i40e_clean_arq_element(hw, &evt, &remain);
if (ret != I40E_SUCCESS)
break;
opcode = LE_16(evt.desc.opcode);
switch (opcode) {
case i40e_aqc_opc_get_link_status:
mutex_enter(&i40e->i40e_general_lock);
i40e_link_check(i40e);
mutex_exit(&i40e->i40e_general_lock);
break;
default:
/*
* Longer term we'll want to enable other causes here
* and get these cleaned up and doing something.
*/
break;
}
}
}
static void
i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
{
mblk_t *mp = NULL;
mutex_enter(&itrq->itrq_rx_lock);
if (!itrq->itrq_intr_poll)
mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
mutex_exit(&itrq->itrq_rx_lock);
if (mp == NULL)
return;
mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
itrq->itrq_rxgen);
}
/* ARGSUSED */
static void
i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
{
i40e_tx_recycle_ring(itrq);
}
/*
* At the moment, the only 'other' interrupt on ICR0 that we handle is the
* adminq. We should go through and support the other notifications at some
* point.
*/
static void
i40e_intr_other_work(i40e_t *i40e)
{
struct i40e_hw *hw = &i40e->i40e_hw_space;
uint32_t reg;
reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
DDI_FM_OK) {
ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
atomic_or_32(&i40e->i40e_state, I40E_ERROR);
return;
}
if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
i40e_intr_adminq_work(i40e);
/*
* Make sure that the adminq interrupt is not masked and then explicitly
* enable the adminq and thus the other interrupt.
*/
reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_intr_adminq_enable(i40e);
}
/*
* Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
* the MSI-X interrupt sequence.
*/
uint_t
i40e_intr_msix(void *arg1, void *arg2)
{
i40e_t *i40e = (i40e_t *)arg1;
uint_t vector_idx = (uint_t)(uintptr_t)arg2;
ASSERT3U(vector_idx, <, i40e->i40e_intr_count);
/*
* When using MSI-X interrupts, vector 0 is always reserved for the
* adminq at this time. Though longer term, we'll want to also bridge
* some I/O to them.
*/
if (vector_idx == 0) {
i40e_intr_other_work(i40e);
return (DDI_INTR_CLAIMED);
}
ASSERT3U(vector_idx, >, 0);
/*
* We determine the queue indexes via simple arithmetic (as
* opposed to keeping explicit state like a bitmap). While
* conveinent, it does mean that i40e_map_intrs_to_vectors(),
* i40e_intr_init_queue_msix(), and this function must be
* modified as a unit.
*
* We subtract 1 from the vector to offset the addition we
* performed during i40e_map_intrs_to_vectors().
*/
for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs;
i += (i40e->i40e_intr_count - 1)) {
i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
ASSERT3U(i, <, i40e->i40e_num_trqpairs);
ASSERT3P(itrq, !=, NULL);
i40e_intr_rx_work(i40e, itrq);
i40e_intr_tx_work(i40e, itrq);
}
i40e_intr_io_enable(i40e, vector_idx);
return (DDI_INTR_CLAIMED);
}
static uint_t
i40e_intr_notx(i40e_t *i40e, boolean_t shared)
{
i40e_hw_t *hw = &i40e->i40e_hw_space;
uint32_t reg;
i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0];
int ret = DDI_INTR_CLAIMED;
if (shared == B_TRUE) {
mutex_enter(&i40e->i40e_general_lock);
if (i40e->i40e_state & I40E_SUSPENDED) {
mutex_exit(&i40e->i40e_general_lock);
return (DDI_INTR_UNCLAIMED);
}
mutex_exit(&i40e->i40e_general_lock);
}
reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
DDI_FM_OK) {
ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
atomic_or_32(&i40e->i40e_state, I40E_ERROR);
return (DDI_INTR_CLAIMED);
}
if (reg == 0) {
if (shared == B_TRUE)
ret = DDI_INTR_UNCLAIMED;
goto done;
}
if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
i40e_intr_adminq_work(i40e);
if (reg & I40E_INTR_NOTX_RX_MASK)
i40e_intr_rx_work(i40e, itrq);
if (reg & I40E_INTR_NOTX_TX_MASK)
i40e_intr_tx_work(i40e, itrq);
done:
i40e_intr_adminq_enable(i40e);
return (ret);
}
/* ARGSUSED */
uint_t
i40e_intr_msi(void *arg1, void *arg2)
{
i40e_t *i40e = (i40e_t *)arg1;
return (i40e_intr_notx(i40e, B_FALSE));
}
/* ARGSUSED */
uint_t
i40e_intr_legacy(void *arg1, void *arg2)
{
i40e_t *i40e = (i40e_t *)arg1;
return (i40e_intr_notx(i40e, B_TRUE));
}
|