diff options
author | Evan Yan <Evan.Yan@Sun.COM> | 2009-02-28 00:10:54 +0800 |
---|---|---|
committer | Evan Yan <Evan.Yan@Sun.COM> | 2009-02-28 00:10:54 +0800 |
commit | 63ea9ad24896f2939472f8f96f568086d190eb33 (patch) | |
tree | 4a6f7e7732b4f65f37e0571ba013b932ac659c1e /usr/src | |
parent | ccba08015a17f88186762dd6d9f91195adb387e8 (diff) | |
download | illumos-gate-63ea9ad24896f2939472f8f96f568086d190eb33.tar.gz |
6794643 Interrupt to cpu binding should default to round robin for x64 OOBP
6795799 apic_msix_max should default to 8 for 10GbE out of box performance
6796665 dynamically adjust MSIX maximum allocation limit to 8 and higher on sparc
6763643 apic_delspl_common disables MSI incorrectly
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/os/ddi_intr.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/os/ddi_intr_impl.c | 37 | ||||
-rw-r--r-- | usr/src/uts/common/os/ddi_intr_irm.c | 144 | ||||
-rw-r--r-- | usr/src/uts/common/sys/ddi_intr_impl.h | 12 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/mp_platform_common.c | 20 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/pcplusmp/apic.c | 13 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/pcplusmp/apic_introp.c | 4 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/apic.h | 2 | ||||
-rw-r--r-- | usr/src/uts/i86xpv/io/psm/xpv_psm.c | 7 | ||||
-rw-r--r-- | usr/src/uts/sun4/io/px/px_msi.c | 1 |
10 files changed, 102 insertions, 146 deletions
diff --git a/usr/src/uts/common/os/ddi_intr.c b/usr/src/uts/common/os/ddi_intr.c index a506f7e85c..4dbd86418a 100644 --- a/usr/src/uts/common/os/ddi_intr.c +++ b/usr/src/uts/common/os/ddi_intr.c @@ -42,14 +42,6 @@ */ /* - * MSI-X allocation limit. - * - * This MSI-X limit or tunable may be obsolete or change with Interrupt - * Resource Management (IRM) support. - */ -uint_t ddi_msix_alloc_limit = DDI_DEFAULT_MSIX_ALLOC; - -/* * ddi_intr_get_supported_types: * Return, as a bit mask, the hardware interrupt types supported by * both the device and by the host in the integer pointed diff --git a/usr/src/uts/common/os/ddi_intr_impl.c b/usr/src/uts/common/os/ddi_intr_impl.c index 7ff3038e35..8d1759644f 100644 --- a/usr/src/uts/common/os/ddi_intr_impl.c +++ b/usr/src/uts/common/os/ddi_intr_impl.c @@ -36,7 +36,12 @@ #include <sys/sunndi.h> #include <sys/ndi_impldefs.h> /* include prototypes */ -extern uint_t ddi_msix_alloc_limit; +#if defined(__i386) || defined(__amd64) +/* + * MSI-X allocation limit. + */ +uint_t ddi_msix_alloc_limit = DDI_DEFAULT_MSIX_ALLOC; +#endif /* * New DDI interrupt framework @@ -226,7 +231,7 @@ i_ddi_intr_get_current_navail(dev_info_t *dip, int type) ddi_cb_t *cb_p; ddi_irm_pool_t *pool_p; ddi_irm_req_t *req_p; - uint_t navail = 0, nintrs, nreq; + uint_t navail = 0, nintrs; /* Get maximum number of supported interrupts */ nintrs = i_ddi_intr_get_supported_nintrs(dip, type); @@ -263,15 +268,12 @@ i_ddi_intr_get_current_navail(dev_info_t *dip, int type) } } - /* Apply MSI-X workarounds */ +#if defined(__i386) || defined(__amd64) + /* Global tunable workaround */ if (type == DDI_INTR_TYPE_MSIX) { - /* Global tunable workaround */ - if (navail < nintrs) - navail = MIN(nintrs, ddi_msix_alloc_limit); - /* Device property workaround */ - if ((nreq = i_ddi_get_msix_alloc_limit(dip)) > 0) - navail = MAX(navail, nreq); + navail = MIN(nintrs, ddi_msix_alloc_limit); } +#endif /* Always restrict MSI to a precise limit */ if (type == DDI_INTR_TYPE_MSI) @@ -489,20 +491,3 @@ i_ddi_set_msi_msix_cap_ptr(dev_info_t *dip, int cap_ptr) intr_p->devi_cap_ptr = cap_ptr; } #endif - -/* ARGSUSED */ -uint_t -i_ddi_get_msix_alloc_limit(dev_info_t *dip) -{ - uint_t msix_alloc_limit = ddi_msix_alloc_limit; - -#if defined(__sparc) - if (ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM | - DDI_PROP_DONTPASS, "#msix-request")) { - msix_alloc_limit = MAX(DDI_MAX_MSIX_ALLOC, - ddi_msix_alloc_limit); - } -#endif - - return (msix_alloc_limit); -} diff --git a/usr/src/uts/common/os/ddi_intr_irm.c b/usr/src/uts/common/os/ddi_intr_irm.c index bf0c95c24a..b96c05f5af 100644 --- a/usr/src/uts/common/os/ddi_intr_irm.c +++ b/usr/src/uts/common/os/ddi_intr_irm.c @@ -157,7 +157,6 @@ ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp, ASSERT(pool_retp != NULL); ASSERT(paramsp->iparams_total >= 1); ASSERT(paramsp->iparams_types != 0); - ASSERT(paramsp->iparams_default >= 1); DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip)); @@ -167,8 +166,7 @@ ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp, /* Validate parameters */ if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) || - (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0) || - (paramsp->iparams_default < 1)) + (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0)) return (NDI_FAILURE); /* Allocate and initialize the pool */ @@ -177,7 +175,8 @@ ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp, pool_p->ipool_policy = irm_default_policy; pool_p->ipool_types = paramsp->iparams_types; pool_p->ipool_totsz = paramsp->iparams_total; - pool_p->ipool_defsz = paramsp->iparams_default; + pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC, + paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER)); list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t), offsetof(ddi_irm_req_t, ireq_link)); list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t), @@ -893,8 +892,8 @@ i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag) static int i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance) { - ddi_irm_req_t *req_p, *next_p; - int nreqs, reduction; + ddi_irm_req_t *head_p, *next_p; + int next_navail, nreqs, reduction; ASSERT(pool_p != NULL); ASSERT(imbalance > 0); @@ -906,62 +905,46 @@ i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance) while (imbalance > 0) { - req_p = list_head(&pool_p->ipool_scratch_list); - next_p = list_next(&pool_p->ipool_scratch_list, req_p); + head_p = list_head(&pool_p->ipool_scratch_list); /* Fail if nothing is reducible */ - if (req_p->ireq_navail == 1) { + if (head_p->ireq_navail <= pool_p->ipool_defsz) { DDI_INTR_IRMDBG((CE_CONT, - "i_ddi_irm_reduce_large: failure.\n")); + "i_ddi_irm_reduce_large: Failure. " + "All requests have downsized to low limit.\n")); return (DDI_FAILURE); } /* Count the number of equally sized requests */ - nreqs = 1; - while (next_p && (req_p->ireq_navail == next_p->ireq_navail)) { - next_p = list_next(&pool_p->ipool_scratch_list, next_p); - nreqs++; - } - - /* Try to reduce multiple requests together */ - if (nreqs > 1) { - - if (next_p) { - reduction = req_p->ireq_navail - - (next_p->ireq_navail + 1); - } else { - reduction = req_p->ireq_navail - 1; - } - - if ((reduction * nreqs) > imbalance) - reduction = imbalance / nreqs; - - if (reduction > 0) { - while (req_p && (req_p != next_p)) { - imbalance -= reduction; - req_p->ireq_navail -= reduction; - pool_p->ipool_resno -= reduction; - req_p = list_next( - &pool_p->ipool_scratch_list, req_p); - } - continue; + for (nreqs = 1, next_p = head_p; + (next_p = list_next(&pool_p->ipool_scratch_list, next_p)) != + NULL && (head_p->ireq_navail == next_p->ireq_navail); + nreqs++) + ; + + next_navail = next_p ? next_p->ireq_navail : 0; + reduction = head_p->ireq_navail - + MAX(next_navail, pool_p->ipool_defsz); + + if ((reduction * nreqs) > imbalance) { + reduction = imbalance / nreqs; + + if (reduction == 0) { + reduction = 1; + nreqs = imbalance; } } - /* Or just reduce the current request */ - next_p = list_next(&pool_p->ipool_scratch_list, req_p); - if (next_p && (req_p->ireq_navail > next_p->ireq_navail)) { - reduction = req_p->ireq_navail - next_p->ireq_navail; - reduction = MIN(reduction, imbalance); - } else { - reduction = 1; + next_p = head_p; + while (nreqs--) { + imbalance -= reduction; + next_p->ireq_navail -= reduction; + pool_p->ipool_resno -= reduction; + next_p = list_next(&pool_p->ipool_scratch_list, next_p); } - imbalance -= reduction; - req_p->ireq_navail -= reduction; - pool_p->ipool_resno -= reduction; - /* Re-sort the scratch list if not yet finished */ - if (imbalance > 0) { + if (next_p && next_p->ireq_navail > head_p->ireq_navail) { + ASSERT(imbalance == 0); i_ddi_irm_reduce_large_resort(pool_p); } } @@ -978,21 +961,26 @@ i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance) static void i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *pool_p) { - ddi_irm_req_t *req_p, *next_p; + ddi_irm_req_t *start_p, *end_p, *next_p; ASSERT(pool_p != NULL); ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); - req_p = list_remove_head(&pool_p->ipool_scratch_list); - next_p = list_head(&pool_p->ipool_scratch_list); + start_p = list_head(&pool_p->ipool_scratch_list); + end_p = list_next(&pool_p->ipool_scratch_list, start_p); + while (end_p && start_p->ireq_navail == end_p->ireq_navail) + end_p = list_next(&pool_p->ipool_scratch_list, end_p); - while (next_p && - ((next_p->ireq_navail > req_p->ireq_navail) || - ((next_p->ireq_navail == req_p->ireq_navail) && - (next_p->ireq_nreq < req_p->ireq_nreq)))) + next_p = end_p; + while (next_p && (next_p->ireq_navail > start_p->ireq_navail)) next_p = list_next(&pool_p->ipool_scratch_list, next_p); - list_insert_before(&pool_p->ipool_scratch_list, next_p, req_p); + while (start_p != end_p) { + list_remove(&pool_p->ipool_scratch_list, start_p); + list_insert_before(&pool_p->ipool_scratch_list, next_p, + start_p); + start_p = list_head(&pool_p->ipool_scratch_list); + } } /* @@ -1026,7 +1014,7 @@ i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance) "i_ddi_irm_reduce_even: pool_p %p imbalance %d\n", (void *)pool_p, imbalance)); - while ((nmin > 0) && (imbalance > 0)) { + while (imbalance > 0) { /* Count reducible requests */ nreduce = 0; @@ -1038,10 +1026,12 @@ i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance) nreduce++; } - /* If none are reducible, try a lower minimum */ + /* Fail if none are reducible */ if (nreduce == 0) { - nmin--; - continue; + DDI_INTR_IRMDBG((CE_CONT, + "i_ddi_irm_reduce_even: Failure. " + "All requests have downsized to low limit.\n")); + return (DDI_FAILURE); } /* Compute reduction */ @@ -1071,34 +1061,48 @@ i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance) } } - if (nmin == 0) { - DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce_even: failure.\n")); - return (DDI_FAILURE); - } - return (DDI_SUCCESS); } /* * i_ddi_irm_reduce_new() * - * Reduces new requests to zero. This is only used as a - * last resort after another reduction algorithm failed. + * Reduces new requests. This is only used as a last resort + * after another reduction algorithm failed. */ static void i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance) { ddi_irm_req_t *req_p; + uint_t nreduce; ASSERT(pool_p != NULL); ASSERT(imbalance > 0); ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); + while (imbalance > 0) { + nreduce = 0; + for (req_p = list_head(&pool_p->ipool_scratch_list); + req_p && (imbalance > 0); + req_p = list_next(&pool_p->ipool_scratch_list, req_p)) { + if (req_p->ireq_flags & DDI_IRM_FLAG_NEW && + req_p->ireq_navail > 1) { + req_p->ireq_navail--; + pool_p->ipool_resno--; + imbalance--; + nreduce++; + } + } + + if (nreduce == 0) + break; + } + for (req_p = list_head(&pool_p->ipool_scratch_list); req_p && (imbalance > 0); req_p = list_next(&pool_p->ipool_scratch_list, req_p)) { - ASSERT(req_p->ireq_navail == 1); if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) { + ASSERT(req_p->ireq_navail == 1); req_p->ireq_navail--; pool_p->ipool_resno--; imbalance--; diff --git a/usr/src/uts/common/sys/ddi_intr_impl.h b/usr/src/uts/common/sys/ddi_intr_impl.h index 4430de7578..71c1d1849d 100644 --- a/usr/src/uts/common/sys/ddi_intr_impl.h +++ b/usr/src/uts/common/sys/ddi_intr_impl.h @@ -137,15 +137,12 @@ typedef struct ddi_intr_handle_impl { /* Maximum number of MSI resources to allocate */ #define DDI_MAX_MSI_ALLOC 2 -/* - * The following MSI-X limits will change with Interrupt Resource Management - * (IRM) support. - */ /* Default number of MSI-X resources to allocate */ #define DDI_DEFAULT_MSIX_ALLOC 2 -/* Maximum number of MSI-X resources to allocate */ -#define DDI_MAX_MSIX_ALLOC 8 +#define DDI_MSIX_ALLOC_DIVIDER 32 +#define DDI_MIN_MSIX_ALLOC 8 +#define DDI_MAX_MSIX_ALLOC 2048 struct av_softinfo; @@ -261,7 +258,6 @@ typedef struct ddi_irm_req { typedef struct ddi_irm_params { int iparams_types; /* Types of interrupts in pool */ uint_t iparams_total; /* Total size of the pool */ - uint_t iparams_default; /* Default allocation size */ } ddi_irm_params_t; /* @@ -345,8 +341,6 @@ int i_ddi_get_msi_msix_cap_ptr(dev_info_t *dip); void i_ddi_set_msi_msix_cap_ptr(dev_info_t *dip, int cap_ptr); #endif -uint_t i_ddi_get_msix_alloc_limit(dev_info_t *dip); - int32_t i_ddi_get_intr_weight(dev_info_t *); int32_t i_ddi_set_intr_weight(dev_info_t *, int32_t); diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c index 77314f3697..60b3e6133e 100644 --- a/usr/src/uts/i86pc/io/mp_platform_common.c +++ b/usr/src/uts/i86pc/io/mp_platform_common.c @@ -142,7 +142,7 @@ struct ioapic_reprogram_data apic_reprogram_info[APIC_MAX_VECTOR+1]; * is indexed by IRQ number, NOT by vector number. */ -int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; +int apic_intr_policy = INTR_ROUND_ROBIN; int apic_next_bind_cpu = 1; /* For round robin assignment */ /* start with cpu 1 */ @@ -1705,9 +1705,6 @@ apic_delspl_common(int irqno, int ipl, int min_ipl, int max_ipl) * of the multi-MSI support */ if (i_ddi_intr_get_current_nenables(irqptr->airq_dip) == 1) { - apic_pci_msi_unconfigure(irqptr->airq_dip, - DDI_INTR_TYPE_MSI, irqptr->airq_ioapicindex); - apic_pci_msi_disable_mode(irqptr->airq_dip, DDI_INTR_TYPE_MSI); } @@ -2410,10 +2407,11 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) cmn_err(CE_CONT, "!%s: %s (%s) instance #%d " - "vector 0x%x ioapic 0x%x " + "irq 0x%x vector 0x%x ioapic 0x%x " "intin 0x%x is bound to cpu %d\n", psm_name, name, drv_name, instance, irq, + apic_irq_table[irq]->airq_vector, ioapicid, intin, cpu); return (cpu); } @@ -2480,14 +2478,16 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) } } if (drv_name != NULL) - cmn_err(CE_CONT, "!%s: %s (%s) instance %d " + cmn_err(CE_CONT, "!%s: %s (%s) instance %d irq 0x%x " "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", - psm_name, name, drv_name, instance, - irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); + psm_name, name, drv_name, instance, irq, + apic_irq_table[irq]->airq_vector, ioapicid, intin, + bind_cpu & ~IRQ_USER_BOUND); else - cmn_err(CE_CONT, "!%s: " + cmn_err(CE_CONT, "!%s: irq 0x%x " "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", - psm_name, irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); + psm_name, irq, apic_irq_table[irq]->airq_vector, ioapicid, + intin, bind_cpu & ~IRQ_USER_BOUND); return ((uint32_t)bind_cpu); } diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c index 65aaa5d152..ec38e5fca6 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic.c +++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c @@ -2242,13 +2242,10 @@ apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, if (count > 1) { if (behavior == DDI_INTR_ALLOC_STRICT && - (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) + apic_multi_msi_enable == 0) return (0); - if (apic_multi_msi_enable == 0) count = 1; - else if (count > apic_multi_msi_max) - count = apic_multi_msi_max; } if ((rcount = apic_navail_vector(dip, pri)) > count) @@ -2338,14 +2335,6 @@ apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, int rcount, i; major_t major; - if (count > 1) { - if (behavior == DDI_INTR_ALLOC_STRICT) { - if (count > apic_msix_max) - return (0); - } else if (count > apic_msix_max) - count = apic_msix_max; - } - mutex_enter(&airq_mutex); if ((rcount = apic_navail_vector(dip, pri)) > count) diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c index 5b9dfcb354..e5f48b0ca0 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c +++ b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c @@ -61,11 +61,9 @@ int apic_support_msi = 0; /* Multiple vector support for MSI */ int apic_multi_msi_enable = 1; -int apic_multi_msi_max = 2; -/* Maximum no. of MSI-X vectors supported */ +/* Multiple vector support for MSI-X */ int apic_msix_enable = 1; -int apic_msix_max = 2; /* * apic_pci_msi_enable_vector: diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h index 67390d59db..b44447e6aa 100644 --- a/usr/src/uts/i86pc/sys/apic.h +++ b/usr/src/uts/i86pc/sys/apic.h @@ -840,8 +840,6 @@ extern int apic_nproc; extern int apic_next_bind_cpu; extern int apic_redistribute_sample_interval; extern int apic_multi_msi_enable; -extern int apic_multi_msi_max; -extern int apic_msix_max; extern int apic_sci_vect; extern uchar_t apic_ipls[]; extern apic_reg_ops_t *apic_reg_ops; diff --git a/usr/src/uts/i86xpv/io/psm/xpv_psm.c b/usr/src/uts/i86xpv/io/psm/xpv_psm.c index ea483955f4..9bbff3cfef 100644 --- a/usr/src/uts/i86xpv/io/psm/xpv_psm.c +++ b/usr/src/uts/i86xpv/io/psm/xpv_psm.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -974,13 +974,10 @@ apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, if (count > 1) { if (behavior == DDI_INTR_ALLOC_STRICT && - (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) + apic_multi_msi_enable == 0) return (0); - if (apic_multi_msi_enable == 0) count = 1; - else if (count > apic_multi_msi_max) - count = apic_multi_msi_max; } /* diff --git a/usr/src/uts/sun4/io/px/px_msi.c b/usr/src/uts/sun4/io/px/px_msi.c index 20178957c7..2f84fdd986 100644 --- a/usr/src/uts/sun4/io/px/px_msi.c +++ b/usr/src/uts/sun4/io/px/px_msi.c @@ -83,7 +83,6 @@ px_msi_attach(px_t *px_p) bzero(&irm_params, sizeof (ddi_irm_params_t)); irm_params.iparams_types = msi_state_p->msi_type; irm_params.iparams_total = msi_state_p->msi_cnt; - irm_params.iparams_default = DDI_DEFAULT_MSIX_ALLOC; if (ndi_irm_create(dip, &irm_params, &irm_pool_p) == DDI_SUCCESS) { msi_state_p->msi_pool_p = irm_pool_p; } else { |