summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVikram Hegde <Vikram.Hegde@Sun.COM>2010-02-15 16:17:46 -0800
committerVikram Hegde <Vikram.Hegde@Sun.COM>2010-02-15 16:17:46 -0800
commite03dceed3deb85ad561202c77277e701f763fa13 (patch)
treef22a115998063a185cd7c7154ea246a0481ca6bf
parentc65c9cdc54c4ba51aeb34ab36b73286653013c8a (diff)
downloadillumos-gate-e03dceed3deb85ad561202c77277e701f763fa13.tar.gz
6923494 IOMMU on G5 can make the USB devices unusable
6889221 Intel IOMMU must support force physical functionality 6926010 map_bios_rsvd_mem(): Variable "mrng" tracked as NULL was dereferenced. 6925263 Fiber channel initiator IO very slow running with IOMMU enabled 6922954 potential null dereference in get_gfx_devinfo() 6926327 disable immu by default
-rw-r--r--usr/src/uts/i86pc/io/immu.c113
-rw-r--r--usr/src/uts/i86pc/io/immu_dmar.c2
-rw-r--r--usr/src/uts/i86pc/io/immu_dvma.c1000
-rw-r--r--usr/src/uts/i86pc/io/immu_regs.c130
-rw-r--r--usr/src/uts/i86pc/sys/immu.h21
-rw-r--r--usr/src/uts/i86pc/sys/rootnex.h2
6 files changed, 767 insertions, 501 deletions
diff --git a/usr/src/uts/i86pc/io/immu.c b/usr/src/uts/i86pc/io/immu.c
index d9b664a820..02480cfd8b 100644
--- a/usr/src/uts/i86pc/io/immu.c
+++ b/usr/src/uts/i86pc/io/immu.c
@@ -59,7 +59,6 @@
#include <sys/bootinfo.h>
#include <sys/atomic.h>
#include <sys/immu.h>
-
/* ########################### Globals and tunables ######################## */
/*
* Global switches (boolean) that can be toggled either via boot options
@@ -67,7 +66,7 @@
*/
/* Various features */
-boolean_t immu_enable = B_TRUE;
+boolean_t immu_enable = B_FALSE;
boolean_t immu_dvma_enable = B_TRUE;
/* accessed in other files so not static */
@@ -83,11 +82,12 @@ boolean_t immu_quirk_usbrmrr = B_TRUE;
boolean_t immu_quirk_usbfullpa;
boolean_t immu_quirk_mobile4;
-boolean_t immu_mmio_safe = B_TRUE;
-
/* debug messages */
boolean_t immu_dmar_print;
+/* Tunables */
+int64_t immu_flush_gran = 5;
+
/* ############ END OPTIONS section ################ */
/*
@@ -96,6 +96,7 @@ boolean_t immu_dmar_print;
dev_info_t *root_devinfo;
kmutex_t immu_lock;
list_t immu_list;
+void *immu_pgtable_cache;
boolean_t immu_setup;
boolean_t immu_running;
boolean_t immu_quiesced;
@@ -143,16 +144,16 @@ map_bios_rsvd_mem(dev_info_t *dip)
mp = bios_rsvd;
while (mp != NULL) {
- memrng_t *mrng = {0};
+ memrng_t mrng = {0};
ddi_err(DER_LOG, dip, "IMMU: Mapping BIOS rsvd range "
"[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
mp->ml_address + mp->ml_size);
- mrng->mrng_start = IMMU_ROUNDOWN(mp->ml_address);
- mrng->mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
+ mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
+ mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
- e = immu_dvma_map(NULL, NULL, mrng, 0, dip, IMMU_FLAGS_MEMRNG);
+ e = immu_dvma_map(NULL, NULL, &mrng, 0, dip, IMMU_FLAGS_MEMRNG);
ASSERT(e == DDI_DMA_MAPPED || e == DDI_DMA_USE_PHYSICAL);
mp = mp->ml_next;
@@ -161,6 +162,40 @@ map_bios_rsvd_mem(dev_info_t *dip)
memlist_read_unlock();
}
+
+/*
+ * Check if the driver requests physical mapping
+ */
+/*ARGSUSED*/
+static void
+check_physical(dev_info_t *dip, void *arg)
+{
+ char *val;
+
+ /*
+ * Check for the DVMA unity mapping property on the device
+ */
+ val = NULL;
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, DDI_DVMA_MAPTYPE_PROP, &val) == DDI_SUCCESS) {
+ ASSERT(val);
+ if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) != 0) {
+ ddi_err(DER_WARN, dip, "%s value \"%s\" is not valid",
+ DDI_DVMA_MAPTYPE_PROP, val);
+ } else {
+ int e;
+
+ ddi_err(DER_NOTE, dip,
+ "Using unity DVMA mapping for device");
+ e = immu_dvma_map(NULL, NULL, NULL, 0, dip,
+ IMMU_FLAGS_UNITY);
+ /* for unity mode, map will return USE_PHYSICAL */
+ ASSERT(e == DDI_DMA_USE_PHYSICAL);
+ }
+ ddi_prop_free(val);
+ }
+}
+
/*
* Check if the device is USB controller
*/
@@ -261,6 +296,8 @@ check_pre_startup_quirks(dev_info_t *dip, void *arg)
check_usb(dip, arg);
+ check_physical(dip, arg);
+
return (DDI_WALK_CONTINUE);
}
@@ -311,6 +348,45 @@ get_bootopt(char *bopt, boolean_t *kvar)
}
static void
+get_tunables(char *bopt, int64_t *ivar)
+{
+ int64_t *iarray;
+ uint_t n;
+
+ /*
+ * Check the rootnex.conf property
+ * Fake up a dev_t since searching the global
+ * property list needs it
+ */
+ if (ddi_prop_lookup_int64_array(
+ makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
+ DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
+ &iarray, &n) != DDI_PROP_SUCCESS) {
+ return;
+ }
+
+ if (n != 1) {
+ ddi_err(DER_WARN, NULL, "More than one value specified for "
+ "%s property. Ignoring and using default",
+ "immu-flush-gran");
+ ddi_prop_free(iarray);
+ return;
+ }
+
+ if (iarray[0] < 0) {
+ ddi_err(DER_WARN, NULL, "Negative value specified for "
+ "%s property. Inoring and Using default value",
+ "immu-flush-gran");
+ ddi_prop_free(iarray);
+ return;
+ }
+
+ *ivar = iarray[0];
+
+ ddi_prop_free(iarray);
+}
+
+static void
read_boot_options(void)
{
/* enable/disable options */
@@ -319,7 +395,6 @@ read_boot_options(void)
get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
get_bootopt("immu-qinv-enable", &immu_qinv_enable);
- get_bootopt("immu-mmio-safe", &immu_mmio_safe);
/* workaround switches */
get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
@@ -328,6 +403,9 @@ read_boot_options(void)
/* debug printing */
get_bootopt("immu-dmar-print", &immu_dmar_print);
+
+ /* get tunables */
+ get_tunables("immu-flush-gran", &immu_flush_gran);
}
/*
@@ -348,8 +426,8 @@ blacklisted_driver(void)
return (B_FALSE);
}
- strptr = black_array;
for (i = 0; nblacks - i > 1; i++) {
+ strptr = &black_array[i];
if (strcmp(*strptr++, "DRIVER") == 0) {
if ((maj = ddi_name_to_major(*strptr++))
!= DDI_MAJOR_T_NONE) {
@@ -399,8 +477,8 @@ blacklisted_smbios(void)
ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
- strptr = black_array;
for (i = 0; nblacks - i > 3; i++) {
+ strptr = &black_array[i];
if (strcmp(*strptr++, "SMBIOS") == 0) {
if (strcmp(*strptr++, mfg) == 0 &&
((char *)strptr == '\0' ||
@@ -528,6 +606,8 @@ immu_state_alloc(int seg, void *dmar_unit)
/* IOMMU regs related */
mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
+ immu->immu_regs_busy = B_FALSE;
/* DVMA related */
immu->immu_dvma_coherent = B_FALSE;
@@ -583,6 +663,12 @@ immu_subsystems_setup(void)
mutex_enter(&immu_lock);
+ ASSERT(immu_pgtable_cache == NULL);
+
+ immu_pgtable_cache = kmem_cache_create("immu_pgtable_cache",
+ sizeof (pgtable_t), 0,
+ pgtable_ctor, pgtable_dtor, NULL, NULL, NULL, 0);
+
unit_hdl = NULL;
for (seg = 0; seg < IMMU_MAXSEG; seg++) {
while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
@@ -991,11 +1077,14 @@ immu_unquiesce(void)
mutex_enter(&(immu->immu_lock));
/* if immu was not quiesced, i.e was not running before */
- if (immu->immu_regs_quiesced == B_FALSE)
+ if (immu->immu_regs_quiesced == B_FALSE) {
+ mutex_exit(&(immu->immu_lock));
continue;
+ }
if (immu_regs_resume(immu) != DDI_SUCCESS) {
ret = DDI_FAILURE;
+ mutex_exit(&(immu->immu_lock));
continue;
}
diff --git a/usr/src/uts/i86pc/io/immu_dmar.c b/usr/src/uts/i86pc/io/immu_dmar.c
index b0be317cfe..3e771c73d1 100644
--- a/usr/src/uts/i86pc/io/immu_dmar.c
+++ b/usr/src/uts/i86pc/io/immu_dmar.c
@@ -1005,7 +1005,7 @@ immu_dmar_rmrr_map(void)
rmrr->rm_base + 1)) {
ddi_err(DER_WARN, rdip, "RMRR range "
" [0x%" PRIx64 " - 0x%" PRIx64 "]"
- " is not in BIOS reserved map",
+ " not in BIOS reserved map",
rmrr->rm_base, rmrr->rm_limit);
}
diff --git a/usr/src/uts/i86pc/io/immu_dvma.c b/usr/src/uts/i86pc/io/immu_dvma.c
index cfc1786671..f960e8bcc7 100644
--- a/usr/src/uts/i86pc/io/immu_dvma.c
+++ b/usr/src/uts/i86pc/io/immu_dvma.c
@@ -71,11 +71,12 @@ static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
int dev, int func, immu_flags_t immu_flags);
static void destroy_immu_devi(immu_devi_t *immu_devi);
-static void dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma,
- uint64_t spaddr, uint64_t npages, dev_info_t *rdip,
+static boolean_t dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma,
+ uint64_t nvpages, dcookie_t *dcookies, int dcount, dev_info_t *rdip,
immu_flags_t immu_flags);
-extern struct memlist *phys_install;
+/* Extern globals */
+extern struct memlist *phys_install;
/* static Globals */
@@ -319,8 +320,7 @@ get_gfx_devinfo(dev_info_t *rdip)
if (immu_devi == NULL) {
ddi_err(DER_WARN, rdip, "IMMU: No GFX device. "
- "Cannot redirect agpgart",
- ddi_node_name(immu_devi->imd_dip));
+ "Cannot redirect agpgart");
return (NULL);
}
@@ -345,6 +345,11 @@ dma_to_immu_flags(struct ddi_dma_req *dmareq)
flags |= IMMU_FLAGS_NOSLEEP;
}
+#ifdef BUGGY_DRIVERS
+
+ flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
+
+#else
/*
* Read and write flags need to be reversed.
* DMA_READ means read from device and write
@@ -356,7 +361,6 @@ dma_to_immu_flags(struct ddi_dma_req *dmareq)
if (dmareq->dmar_flags & DDI_DMA_WRITE)
flags |= IMMU_FLAGS_READ;
-#ifdef BUGGY_DRIVERS
/*
* Some buggy drivers specify neither READ or WRITE
* For such drivers set both read and write permissions
@@ -369,53 +373,32 @@ dma_to_immu_flags(struct ddi_dma_req *dmareq)
return (flags);
}
-/*
- * pgtable_alloc()
- * alloc a IOMMU pgtable structure.
- * This same struct is used for root and context tables as well.
- * This routine allocs the f/ollowing:
- * - a pgtable_t struct
- * - a HW page which holds PTEs/entries which is accesssed by HW
- * so we set up DMA for this page
- * - a SW page which is only for our bookeeping
- * (for example to hold pointers to the next level pgtable).
- * So a simple kmem_alloc suffices
- */
-static pgtable_t *
-pgtable_alloc(immu_t *immu, domain_t *domain, immu_flags_t immu_flags)
+int
+pgtable_ctor(void *buf, void *arg, int kmflag)
{
size_t actual_size = 0;
pgtable_t *pgtable;
int (*dmafp)(caddr_t);
caddr_t vaddr;
- int kmflags;
+ void *next;
- /* TO DO cache freed pgtables as it is expensive to create em */
- ASSERT(immu);
+ ASSERT(buf);
+ ASSERT(arg == NULL);
- kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ?
- KM_NOSLEEP : KM_SLEEP;
+ pgtable = (pgtable_t *)buf;
- dmafp = (immu_flags & IMMU_FLAGS_NOSLEEP) ?
- DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
+ dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
- pgtable = kmem_zalloc(sizeof (pgtable_t), kmflags);
- if (pgtable == NULL) {
- return (NULL);
- }
-
- pgtable->swpg_next_array = kmem_zalloc(IMMU_PAGESIZE, kmflags);
- if (pgtable->swpg_next_array == NULL) {
- kmem_free(pgtable, sizeof (pgtable_t));
- return (NULL);
+ next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
+ if (next == NULL) {
+ return (-1);
}
ASSERT(root_devinfo);
if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
- kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
- kmem_free(pgtable, sizeof (pgtable_t));
- return (NULL);
+ kmem_free(next, IMMU_PAGESIZE);
+ return (-1);
}
if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
@@ -423,10 +406,8 @@ pgtable_alloc(immu_t *immu, domain_t *domain, immu_flags_t immu_flags)
dmafp, NULL, &vaddr, &actual_size,
&pgtable->hwpg_memhdl) != DDI_SUCCESS) {
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
- kmem_free((void *)(pgtable->swpg_next_array),
- IMMU_PAGESIZE);
- kmem_free(pgtable, sizeof (pgtable_t));
- return (NULL);
+ kmem_free(next, IMMU_PAGESIZE);
+ return (-1);
}
/*
@@ -436,55 +417,86 @@ pgtable_alloc(immu_t *immu, domain_t *domain, immu_flags_t immu_flags)
if (actual_size < IMMU_PAGESIZE) {
ddi_dma_mem_free(&pgtable->hwpg_memhdl);
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
- kmem_free((void *)(pgtable->swpg_next_array),
- IMMU_PAGESIZE);
- kmem_free(pgtable, sizeof (pgtable_t));
- return (NULL);
+ kmem_free(next, IMMU_PAGESIZE);
+ return (-1);
}
pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
pgtable->hwpg_vaddr = vaddr;
+ pgtable->swpg_next_array = next;
- bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
+ rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
- /* Use immu directly as domain may be NULL, cant use dom_immu field */
- immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE);
+ return (0);
+}
- rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
+void
+pgtable_dtor(void *buf, void *arg)
+{
+ pgtable_t *pgtable;
- if (domain) {
- rw_enter(&(domain->dom_pgtable_rwlock), RW_WRITER);
- list_insert_head(&(domain->dom_pglist), pgtable);
- rw_exit(&(domain->dom_pgtable_rwlock));
- }
+ ASSERT(buf);
+ ASSERT(arg == NULL);
- return (pgtable);
+ pgtable = (pgtable_t *)buf;
+ ASSERT(pgtable->swpg_next_array);
+
+ /* destroy will panic if lock is held. */
+ rw_destroy(&(pgtable->swpg_rwlock));
+
+ ddi_dma_mem_free(&pgtable->hwpg_memhdl);
+ ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
+ kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
+
+ /* don't zero out hwpg_vaddr and swpg_next_array for debugging */
}
-static void
-pgtable_free(immu_t *immu, pgtable_t *pgtable, domain_t *domain)
+/*
+ * pgtable_alloc()
+ * alloc a IOMMU pgtable structure.
+ * This same struct is used for root and context tables as well.
+ * This routine allocs the f/ollowing:
+ * - a pgtable_t struct
+ * - a HW page which holds PTEs/entries which is accesssed by HW
+ * so we set up DMA for this page
+ * - a SW page which is only for our bookeeping
+ * (for example to hold pointers to the next level pgtable).
+ * So a simple kmem_alloc suffices
+ */
+static pgtable_t *
+pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
{
+ pgtable_t *pgtable;
+ int kmflags;
+
ASSERT(immu);
- ASSERT(pgtable);
- if (domain) {
- rw_enter(&(domain->dom_pgtable_rwlock), RW_WRITER);
- list_remove(&(domain->dom_pglist), pgtable);
- rw_exit(&(domain->dom_pgtable_rwlock));
- }
+ kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
- /* destroy will panic if lock is held. */
- rw_destroy(&(pgtable->swpg_rwlock));
+ pgtable = kmem_cache_alloc(immu_pgtable_cache, kmflags);
+ if (pgtable == NULL) {
+ return (NULL);
+ }
+ return (pgtable);
+}
- /* Zero out the HW page being freed to catch errors */
+static void
+pgtable_zero(immu_t *immu, pgtable_t *pgtable)
+{
bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
+ bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
+
+ /* Dont need to flush the write we will flush when we use the entry */
immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE);
- ddi_dma_mem_free(&pgtable->hwpg_memhdl);
- ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
- /* don't zero out the soft pages for debugging */
- if (pgtable->swpg_next_array)
- kmem_free((void *)(pgtable->swpg_next_array), IMMU_PAGESIZE);
- kmem_free(pgtable, sizeof (pgtable_t));
+}
+
+static void
+pgtable_free(immu_t *immu, pgtable_t *pgtable)
+{
+ ASSERT(immu);
+ ASSERT(pgtable);
+
+ kmem_cache_free(immu_pgtable_cache, pgtable);
}
/*
@@ -896,6 +908,7 @@ get_branch_domain(dev_info_t *pdip, void *arg)
* walk upwards until the topmost PCI bridge is found
*/
return (DDI_WALK_CONTINUE);
+
}
static void
@@ -904,6 +917,8 @@ map_unity_domain(domain_t *domain)
struct memlist *mp;
uint64_t start;
uint64_t npages;
+ dcookie_t dcookies[1] = {0};
+ int dcount = 0;
ASSERT(domain);
ASSERT(domain->dom_did == IMMU_UNITY_DID);
@@ -924,7 +939,10 @@ map_unity_domain(domain_t *domain)
/*
* Dont skip page0. Some broken HW/FW access it.
*/
- dvma_map(domain->dom_immu, domain, 0, 0, 1, NULL,
+ dcookies[0].dck_paddr = 0;
+ dcookies[0].dck_npages = 1;
+ dcount = 1;
+ (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL,
IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
#endif
@@ -940,8 +958,11 @@ map_unity_domain(domain_t *domain)
}
npages = mp->ml_size/IMMU_PAGESIZE + 1;
- dvma_map(domain->dom_immu, domain, start, start, npages, NULL,
- IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
+ dcookies[0].dck_paddr = start;
+ dcookies[0].dck_npages = npages;
+ dcount = 1;
+ (void) dvma_map(domain->dom_immu, domain, start, npages, dcookies,
+ dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
" - 0x%" PRIx64 "]", start, start + mp->ml_size);
@@ -955,9 +976,11 @@ map_unity_domain(domain_t *domain)
start = mp->ml_address;
npages = mp->ml_size/IMMU_PAGESIZE + 1;
- dvma_map(domain->dom_immu, domain, start, start,
- npages, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
-
+ dcookies[0].dck_paddr = start;
+ dcookies[0].dck_npages = npages;
+ dcount = 1;
+ (void) dvma_map(domain->dom_immu, domain, start, npages,
+ dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
mp = mp->ml_next;
}
@@ -970,8 +993,11 @@ map_unity_domain(domain_t *domain)
start = mp->ml_address;
npages = mp->ml_size/IMMU_PAGESIZE + 1;
- dvma_map(domain->dom_immu, domain, start, start,
- npages, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
+ dcookies[0].dck_paddr = start;
+ dcookies[0].dck_npages = npages;
+ dcount = 1;
+ (void) dvma_map(domain->dom_immu, domain, start, npages,
+ dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
mp = mp->ml_next;
}
@@ -1020,47 +1046,51 @@ create_xlate_arena(immu_t *immu, domain_t *domain,
* To ensure we avoid ioapic and PCI MMIO ranges we just
* use the physical memory address range of the system as the
* range
- * Implementing above causes graphics device to barf on
- * Lenovo X301 hence the toggle switch immu_mmio_safe.
*/
maxaddr = ((uint64_t)1 << mgaw);
- if (immu_mmio_safe == B_FALSE) {
+ memlist_read_lock();
+
+ mp = phys_install;
+ if (mp->ml_address == 0)
start = MMU_PAGESIZE;
+ else
+ start = mp->ml_address;
+
+ if (start + mp->ml_size > maxaddr)
size = maxaddr - start;
+ else
+ size = mp->ml_size;
- ddi_err(DER_VERB, rdip,
- "%s: Creating dvma vmem arena [0x%" PRIx64
- " - 0x%" PRIx64 "]", arena_name, start, start + size);
+ ddi_err(DER_VERB, rdip,
+ "%s: Creating dvma vmem arena [0x%" PRIx64
+ " - 0x%" PRIx64 "]", arena_name, start, start + size);
- ASSERT(domain->dom_dvma_arena == NULL);
+ ASSERT(domain->dom_dvma_arena == NULL);
- /*
- * We always allocate in quanta of IMMU_PAGESIZE
- */
- domain->dom_dvma_arena = vmem_create(arena_name,
- (void *)(uintptr_t)start, /* start addr */
- size, /* size */
- IMMU_PAGESIZE, /* quantum */
- NULL, /* afunc */
- NULL, /* ffunc */
- NULL, /* source */
- 0, /* qcache_max */
- vmem_flags);
-
- if (domain->dom_dvma_arena == NULL) {
- ddi_err(DER_PANIC, rdip,
- "Failed to allocate DVMA arena(%s) "
- "for domain ID (%d)", arena_name, domain->dom_did);
- /*NOTREACHED*/
- }
-
- } else {
+ /*
+ * We always allocate in quanta of IMMU_PAGESIZE
+ */
+ domain->dom_dvma_arena = vmem_create(arena_name,
+ (void *)(uintptr_t)start, /* start addr */
+ size, /* size */
+ IMMU_PAGESIZE, /* quantum */
+ NULL, /* afunc */
+ NULL, /* ffunc */
+ NULL, /* source */
+ 0, /* qcache_max */
+ vmem_flags);
- memlist_read_lock();
+ if (domain->dom_dvma_arena == NULL) {
+ ddi_err(DER_PANIC, rdip,
+ "Failed to allocate DVMA arena(%s) "
+ "for domain ID (%d)", arena_name, domain->dom_did);
+ /*NOTREACHED*/
+ }
- mp = phys_install;
+ mp = mp->ml_next;
+ while (mp) {
if (mp->ml_address == 0)
start = MMU_PAGESIZE;
@@ -1073,64 +1103,23 @@ create_xlate_arena(immu_t *immu, domain_t *domain,
size = mp->ml_size;
ddi_err(DER_VERB, rdip,
- "%s: Creating dvma vmem arena [0x%" PRIx64
- " - 0x%" PRIx64 "]", arena_name, start, start + size);
+ "%s: Adding dvma vmem span [0x%" PRIx64
+ " - 0x%" PRIx64 "]", arena_name, start,
+ start + size);
- ASSERT(domain->dom_dvma_arena == NULL);
+ vmem_ret = vmem_add(domain->dom_dvma_arena,
+ (void *)(uintptr_t)start, size, vmem_flags);
- /*
- * We always allocate in quanta of IMMU_PAGESIZE
- */
- domain->dom_dvma_arena = vmem_create(arena_name,
- (void *)(uintptr_t)start, /* start addr */
- size, /* size */
- IMMU_PAGESIZE, /* quantum */
- NULL, /* afunc */
- NULL, /* ffunc */
- NULL, /* source */
- 0, /* qcache_max */
- vmem_flags);
-
- if (domain->dom_dvma_arena == NULL) {
+ if (vmem_ret == NULL) {
ddi_err(DER_PANIC, rdip,
"Failed to allocate DVMA arena(%s) "
- "for domain ID (%d)", arena_name, domain->dom_did);
+ "for domain ID (%d)",
+ arena_name, domain->dom_did);
/*NOTREACHED*/
}
-
mp = mp->ml_next;
- while (mp) {
-
- if (mp->ml_address == 0)
- start = MMU_PAGESIZE;
- else
- start = mp->ml_address;
-
- if (start + mp->ml_size > maxaddr)
- size = maxaddr - start;
- else
- size = mp->ml_size;
-
- ddi_err(DER_VERB, rdip,
- "%s: Adding dvma vmem span [0x%" PRIx64
- " - 0x%" PRIx64 "]", arena_name, start,
- start + size);
-
- vmem_ret = vmem_add(domain->dom_dvma_arena,
- (void *)(uintptr_t)start, size, vmem_flags);
-
- if (vmem_ret == NULL) {
- ddi_err(DER_PANIC, rdip,
- "Failed to allocate DVMA arena(%s) "
- "for domain ID (%d)",
- arena_name, domain->dom_did);
- /*NOTREACHED*/
- }
-
- mp = mp->ml_next;
- }
- memlist_read_unlock();
}
+ memlist_read_unlock();
}
/* ################################### DOMAIN CODE ######################### */
@@ -1191,7 +1180,6 @@ static domain_t *
device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
{
dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
- dev_info_t *edip; /* effective dip used for finding domain */
immu_t *immu;
domain_t *domain;
dvma_arg_t dvarg = {0};
@@ -1220,39 +1208,16 @@ device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
* possible that there is no IOMMU unit for this device
* - BIOS bugs are one example.
*/
+ ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
return (NULL);
}
- /*
- * Some devices need to be redirected
- */
- edip = rdip;
-
- /*
- * for isa devices attached under lpc
- */
- if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
- edip = get_lpc_devinfo(immu, rdip, immu_flags);
- }
-
- /*
- * for gart, use the real graphic devinfo
- */
- if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
- edip = get_gfx_devinfo(rdip);
- }
-
- if (edip == NULL) {
- ddi_err(DER_MODE, rdip, "IMMU redirect failed");
- return (NULL);
- }
-
- dvarg.dva_rdip = edip;
+ dvarg.dva_rdip = rdip;
dvarg.dva_ddip = NULL;
dvarg.dva_domain = NULL;
dvarg.dva_flags = immu_flags;
level = 0;
- if (immu_walk_ancestor(edip, NULL, get_branch_domain,
+ if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
&dvarg, &level, immu_flags) != DDI_SUCCESS) {
/*
* maybe low memory. return error,
@@ -1277,9 +1242,7 @@ device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
* be found.
*/
if (ddip == NULL) {
- ddi_err(DER_MODE, rdip, "Cannot find domain dip for device. "
- "Effective dip (%s%d)", ddi_driver_name(edip),
- ddi_get_instance(edip));
+ ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
return (NULL);
}
@@ -1305,7 +1268,6 @@ found:
* effective dip.
*/
set_domain(ddip, ddip, domain);
- set_domain(edip, ddip, domain);
set_domain(rdip, ddip, domain);
*ddipp = ddip;
@@ -1325,8 +1287,6 @@ create_unity_domain(immu_t *immu)
domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
- list_create(&(domain->dom_pglist), sizeof (pgtable_t),
- offsetof(pgtable_t, swpg_domain_node));
domain->dom_did = IMMU_UNITY_DID;
domain->dom_maptype = IMMU_MAPTYPE_UNITY;
@@ -1338,10 +1298,9 @@ create_unity_domain(immu_t *immu)
* Setup the domain's initial page table
* should never fail.
*/
- domain->dom_pgtable_root = pgtable_alloc(immu, domain,
- IMMU_FLAGS_SLEEP);
-
+ domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
ASSERT(domain->dom_pgtable_root);
+ pgtable_zero(immu, domain->dom_pgtable_root);
map_unity_domain(domain);
@@ -1368,6 +1327,8 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
char mod_hash_name[128];
immu_devi_t *immu_devi;
int did;
+ dcookie_t dcookies[1] = {0};
+ int dcount = 0;
ASSERT(immu);
ASSERT(ddip);
@@ -1398,8 +1359,6 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
}
rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
- list_create(&(domain->dom_pglist), sizeof (pgtable_t),
- offsetof(pgtable_t, swpg_domain_node));
(void) snprintf(mod_hash_name, sizeof (mod_hash_name),
"immu%s-domain%d-pava-hash", immu->immu_name, did);
@@ -1416,13 +1375,14 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
/*
* Setup the domain's initial page table
*/
- domain->dom_pgtable_root = pgtable_alloc(immu, domain, immu_flags);
+ domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
if (domain->dom_pgtable_root == NULL) {
ddi_err(DER_PANIC, rdip, "Failed to alloc root "
"pgtable for domain (%d). IOMMU unit: %s",
domain->dom_did, immu->immu_name);
/*NOTREACHED*/
}
+ pgtable_zero(immu, domain->dom_pgtable_root);
/*
* Since this is a immu unit-specific domain, put it on
@@ -1445,10 +1405,12 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
/*
* Map page0. Some broken HW/FW access it.
*/
- dvma_map(domain->dom_immu, domain, 0, 0, 1, NULL,
+ dcookies[0].dck_paddr = 0;
+ dcookies[0].dck_npages = 1;
+ dcount = 1;
+ (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL,
IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
#endif
-
return (domain);
}
@@ -1508,8 +1470,10 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
hw_rce_t *hw_rent;
hw_rce_t *hw_cent;
hw_rce_t *ctxp;
-
- ASSERT(rw_write_held(&(immu->immu_ctx_rwlock)));
+ int sid;
+ krw_t rwtype;
+ boolean_t fill_root;
+ boolean_t fill_ctx;
ASSERT(immu);
ASSERT(domain);
@@ -1518,21 +1482,58 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
ASSERT(devfunc >= 0);
ASSERT(domain->dom_pgtable_root);
+ pgtable_root = domain->dom_pgtable_root;
+
ctxp = (hw_rce_t *)(root_table->swpg_next_array);
context = *(pgtable_t **)(ctxp + bus);
hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
+
+ fill_root = B_FALSE;
+ fill_ctx = B_FALSE;
+
+ /* Check the most common case first with reader lock */
+ rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
+ rwtype = RW_READER;
+again:
if (ROOT_GET_P(hw_rent)) {
ASSERT(ROOT_GET_CONT(hw_rent) == context->hwpg_paddr);
+ hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
+ if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
+ ASSERT(CONT_GET_P(hw_cent));
+ ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did);
+ ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw);
+ ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY);
+ ASSERT(CONT_GET_ASR(hw_cent) ==
+ pgtable_root->hwpg_paddr);
+ rw_exit(&(immu->immu_ctx_rwlock));
+ return;
+ } else {
+ fill_ctx = B_TRUE;
+ }
} else {
+ fill_root = B_TRUE;
+ fill_ctx = B_TRUE;
+ }
+
+ if (rwtype == RW_READER &&
+ rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
+ rw_exit(&(immu->immu_ctx_rwlock));
+ rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
+ rwtype = RW_WRITER;
+ goto again;
+ }
+ rwtype = RW_WRITER;
+
+ if (fill_root == B_TRUE) {
ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
ROOT_SET_P(hw_rent);
immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
}
- hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
- pgtable_root = domain->dom_pgtable_root;
- unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root;
- if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED) {
+ if (fill_ctx == B_TRUE) {
+ hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
+ unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root;
+ ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED);
ASSERT(CONT_GET_P(hw_cent));
ASSERT(CONT_GET_DID(hw_cent) ==
immu->immu_unity_domain->dom_did);
@@ -1547,14 +1548,11 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
/* flush caches */
immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
ASSERT(rw_write_held(&(immu->immu_ctx_rwlock)));
- immu_regs_context_flush(immu, 0, 0,
- immu->immu_unity_domain->dom_did, CONTEXT_DSI);
- immu_regs_context_flush(immu, 0, 0, domain->dom_did,
- CONTEXT_DSI);
- immu_regs_iotlb_flush(immu, immu->immu_unity_domain->dom_did,
- 0, 0, TLB_IVA_WHOLE, IOTLB_DSI);
- immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0,
- TLB_IVA_WHOLE, IOTLB_DSI);
+
+ sid = ((bus << 8) | devfunc);
+ immu_regs_context_flush(immu, 0, sid, domain->dom_did,
+ CONTEXT_FSI);
+
immu_regs_wbf_flush(immu);
CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
@@ -1565,14 +1563,8 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
CONT_SET_P(hw_cent);
immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
- } else {
- ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED);
- ASSERT(CONT_GET_P(hw_cent));
- ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did);
- ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw);
- ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY);
- ASSERT(CONT_GET_ASR(hw_cent) == pgtable_root->hwpg_paddr);
}
+ rw_exit(&(immu->immu_ctx_rwlock));
}
static pgtable_t *
@@ -1588,7 +1580,8 @@ context_create(immu_t *immu)
hw_rce_t *hw_cent;
/* Allocate a zeroed root table (4K 256b entries) */
- root_table = pgtable_alloc(immu, NULL, IMMU_FLAGS_SLEEP);
+ root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
+ pgtable_zero(immu, root_table);
/*
* Setup context tables for all possible root table entries.
@@ -1597,7 +1590,8 @@ context_create(immu_t *immu)
ctxp = (hw_rce_t *)(root_table->swpg_next_array);
hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
- context = pgtable_alloc(immu, NULL, IMMU_FLAGS_SLEEP);
+ context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
+ pgtable_zero(immu, context);
ASSERT(ROOT_GET_P(hw_rent) == 0);
ROOT_SET_P(hw_rent);
ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
@@ -1723,7 +1717,6 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
ASSERT(d_bus >= 0);
- rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
if (rdip == ddip) {
ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT ||
d_pcib_type == IMMU_PCIB_PCIE_PCIE);
@@ -1793,7 +1786,6 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
"set IMMU context.");
/*NOTREACHED*/
}
- rw_exit(&(immu->immu_ctx_rwlock));
/* XXX do we need a membar_producer() here */
return (DDI_SUCCESS);
@@ -1830,7 +1822,7 @@ PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
* TM field should be clear if not reserved.
* non-leaf is always reserved
*/
- if (next == NULL && immu_regs_is_TM_reserved(immu) == B_FALSE) {
+ if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
if (PDTE_TM(pdte)) {
ddi_err(DER_MODE, rdip, "TM flag set");
return (B_FALSE);
@@ -1869,7 +1861,7 @@ PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
* SNP field should be clear if not reserved.
* non-leaf is always reserved
*/
- if (next == NULL && immu_regs_is_SNP_reserved(immu) == B_FALSE) {
+ if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
if (PDTE_SNP(pdte)) {
ddi_err(DER_MODE, rdip, "SNP set");
return (B_FALSE);
@@ -1911,46 +1903,64 @@ PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
}
/*ARGSUSED*/
static void
-PTE_clear_one(immu_t *immu, domain_t *domain, xlate_t *xlate, uint64_t dvma,
- dev_info_t *rdip)
+PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
+ uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
{
- hw_pdte_t *hwp;
+ uint64_t npages;
+ uint64_t dvma;
pgtable_t *pgtable;
+ hw_pdte_t *hwp;
+ hw_pdte_t *shwp;
int idx;
hw_pdte_t pte;
ASSERT(xlate->xlt_level == 1);
- idx = xlate->xlt_idx;
pgtable = xlate->xlt_pgtable;
+ idx = xlate->xlt_idx;
- ASSERT(dvma % IMMU_PAGESIZE == 0);
ASSERT(pgtable);
ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
- /*
- * since we are clearing PTEs, lock the
- * page table write mode
- */
- rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
+ dvma = *dvma_ptr;
+ npages = *npages_ptr;
+
+ ASSERT(dvma);
+ ASSERT(dvma % IMMU_PAGESIZE == 0);
+ ASSERT(npages);
/*
- * We are at the leaf - next level array must be NULL
+ * since a caller gets a unique dvma for a physical address,
+ * no other concurrent thread will be writing to the same
+ * PTE even if it has the same paddr. So no locks needed.
*/
- ASSERT(pgtable->swpg_next_array == NULL);
+ shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
+
+ hwp = shwp;
+ for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
- hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
+ pte = *hwp;
- pte = *hwp;
- /* Cannot clear a HW PTE that is aleady clear */
- ASSERT(PDTE_P(pte));
- PDTE_CLEAR_P(pte);
- *hwp = pte;
+ /* Cannot clear a HW PTE that is aleady clear */
+ ASSERT(PDTE_P(pte));
+ PDTE_CLEAR_P(pte);
+ *hwp = pte;
- /* flush writes to HW PTE table */
- immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t));
+ dvma += IMMU_PAGESIZE;
+ npages--;
+ }
+
+
+#ifdef TEST
+ /* dont need to flush write during unmap */
+ immu_regs_cpu_flush(immu, (caddr_t)shwp,
+ (hwp - shwp) * sizeof (hw_pdte_t));
+#endif
- rw_exit(&(xlate->xlt_pgtable->swpg_rwlock));
+ *dvma_ptr = dvma;
+ *npages_ptr = npages;
+
+ xlate->xlt_idx = idx;
}
/*ARGSUSED*/
@@ -2041,6 +2051,7 @@ PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
}
}
+/*ARGSUSED*/
static void
PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
dev_info_t *rdip, immu_flags_t immu_flags)
@@ -2049,24 +2060,39 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
pte = *hwp;
+#ifndef DEBUG
+ /* Set paddr */
+ ASSERT(paddr % IMMU_PAGESIZE == 0);
+ pte = 0;
+ PDTE_SET_PADDR(pte, paddr);
+ PDTE_SET_READ(pte);
+ PDTE_SET_WRITE(pte);
+ *hwp = pte;
+#else
+
if (PDTE_P(pte)) {
if (PDTE_PADDR(pte) != paddr) {
ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
PDTE_PADDR(pte), paddr);
}
+#ifdef BUGGY_DRIVERS
+ return;
+#else
goto out;
+#endif
}
-
/* Don't touch SW4. It is the present field */
/* clear TM field if not reserved */
- if (immu_regs_is_TM_reserved(immu) == B_FALSE) {
+ if (immu->immu_TM_reserved == B_FALSE) {
PDTE_CLEAR_TM(pte);
}
+#ifdef DEBUG
/* Clear 3rd field for system software - not used */
PDTE_CLEAR_SW3(pte);
+#endif
/* Set paddr */
ASSERT(paddr % IMMU_PAGESIZE == 0);
@@ -2074,18 +2100,25 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
PDTE_SET_PADDR(pte, paddr);
/* clear SNP field if not reserved. */
- if (immu_regs_is_SNP_reserved(immu) == B_FALSE) {
+ if (immu->immu_SNP_reserved == B_FALSE) {
PDTE_CLEAR_SNP(pte);
}
+#ifdef DEBUG
/* Clear SW2 field available for software */
PDTE_CLEAR_SW2(pte);
+#endif
+
+#ifdef DEBUG
/* SP is don't care for PTEs. Clear it for cleanliness */
PDTE_CLEAR_SP(pte);
+#endif
+#ifdef DEBUG
/* Clear SW1 field available for software */
PDTE_CLEAR_SW1(pte);
+#endif
/*
* Now that we are done writing the PTE
@@ -2101,32 +2134,35 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
PDTE_SET_P(pte);
out:
+#ifdef BUGGY_DRIVERS
+ PDTE_SET_READ(pte);
+ PDTE_SET_WRITE(pte);
+#else
if (immu_flags & IMMU_FLAGS_READ)
PDTE_SET_READ(pte);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pte);
-
-#ifdef BUGGY_DRIVERS
- PDTE_SET_READ(pte);
- PDTE_SET_WRITE(pte);
#endif
*hwp = pte;
+#endif
}
/*ARGSUSED*/
static void
PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
- uint64_t *dvma_ptr, paddr_t *paddr_ptr, uint64_t *npages_ptr,
- dev_info_t *rdip, immu_flags_t immu_flags)
+ uint64_t *dvma_ptr, uint64_t *nvpages_ptr, dcookie_t *dcookies,
+ int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
{
paddr_t paddr;
- uint64_t npages;
+ uint64_t nvpages;
+ uint64_t nppages;
uint64_t dvma;
pgtable_t *pgtable;
hw_pdte_t *hwp;
hw_pdte_t *shwp;
int idx;
+ int j;
ASSERT(xlate->xlt_level == 1);
@@ -2137,50 +2173,75 @@ PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
ASSERT(pgtable);
dvma = *dvma_ptr;
- paddr = *paddr_ptr;
- npages = *npages_ptr;
+ nvpages = *nvpages_ptr;
- ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1));
ASSERT(dvma || (immu_flags & IMMU_FLAGS_PAGE1));
- ASSERT(npages);
-
- /*
- * since we are setting PTEs, lock the page table in
- * write mode
- */
- rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
+ ASSERT(nvpages);
/*
- * we are at the leaf pgtable - no further levels.
- * The next_array field should be NULL.
+ * since a caller gets a unique dvma for a physical address,
+ * no other concurrent thread will be writing to the same
+ * PTE even if it has the same paddr. So no locks needed.
*/
- ASSERT(pgtable->swpg_next_array == NULL);
-
shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
hwp = shwp;
- for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
+ for (j = dcount - 1; j >= 0; j--) {
+ if (nvpages <= dcookies[j].dck_npages)
+ break;
+ nvpages -= dcookies[j].dck_npages;
+ }
+
+ ASSERT(j >= 0);
+ ASSERT(nvpages);
+ ASSERT(nvpages <= dcookies[j].dck_npages);
+ nppages = nvpages;
+ paddr = dcookies[j].dck_paddr +
+ (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
+
+ nvpages = *nvpages_ptr;
+ for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
+
+ ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1));
PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
== B_TRUE);
-
+ nppages--;
+ nvpages--;
paddr += IMMU_PAGESIZE;
dvma += IMMU_PAGESIZE;
- npages--;
+
+ if (nppages == 0) {
+ j++;
+ }
+
+ if (j == dcount) {
+ ASSERT(nvpages == 0);
+ break;
+ }
+
+ ASSERT(nvpages);
+ if (nppages == 0) {
+ nppages = dcookies[j].dck_npages;
+ paddr = dcookies[j].dck_paddr;
+ }
}
/* flush writes to HW PTE table */
immu_regs_cpu_flush(immu, (caddr_t)shwp, (hwp - shwp) *
sizeof (hw_pdte_t));
- *dvma_ptr = dvma;
- *paddr_ptr = paddr;
- *npages_ptr = npages;
- xlate->xlt_idx = idx;
+ if (nvpages) {
+ *dvma_ptr = dvma;
+ *nvpages_ptr = nvpages;
+ } else {
+ *dvma_ptr = 0;
+ *nvpages_ptr = 0;
+ }
- rw_exit(&(pgtable->swpg_rwlock));
+ xlate->xlt_idx = idx;
}
/*ARGSUSED*/
@@ -2195,7 +2256,11 @@ PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
/* if PDE is already set, make sure it is correct */
if (PDTE_P(pde)) {
ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
+#ifdef BUGGY_DRIVERS
+ return;
+#else
goto out;
+#endif
}
/* Dont touch SW4, it is the present bit */
@@ -2231,16 +2296,16 @@ PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
* The present field in a PDE/PTE is not defined
* by the Vt-d spec
*/
-out:
+out:
+#ifdef BUGGY_DRIVERS
+ PDTE_SET_READ(pde);
+ PDTE_SET_WRITE(pde);
+#else
if (immu_flags & IMMU_FLAGS_READ)
PDTE_SET_READ(pde);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pde);
-
-#ifdef BUGGY_DRIVERS
- PDTE_SET_READ(pde);
- PDTE_SET_WRITE(pde);
#endif
PDTE_SET_P(pde);
@@ -2253,7 +2318,7 @@ out:
/*
* Used to set PDEs
*/
-static void
+static boolean_t
PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
dev_info_t *rdip, immu_flags_t immu_flags)
{
@@ -2263,6 +2328,8 @@ PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
hw_pdte_t *hwp;
int level;
uint_t idx;
+ krw_t rwtype;
+ boolean_t set = B_FALSE;
/* xlate should be at level 0 */
ASSERT(xlate->xlt_level == 0);
@@ -2286,16 +2353,16 @@ PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
/* speculative alloc */
if (new == NULL) {
- new = pgtable_alloc(immu, domain, immu_flags);
+ new = pgtable_alloc(immu, immu_flags);
if (new == NULL) {
ddi_err(DER_PANIC, rdip, "pgtable alloc err");
}
-
}
- /* Alway lock the pgtable in write mode */
- rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
-
+ /* Lock the pgtable in READ mode first */
+ rw_enter(&(pgtable->swpg_rwlock), RW_READER);
+ rwtype = RW_READER;
+again:
hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
ASSERT(pgtable->swpg_next_array);
@@ -2307,26 +2374,38 @@ PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
* if yes, verify
*/
if (next == NULL) {
+ /* Change to a write lock */
+ if (rwtype == RW_READER &&
+ rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
+ rw_exit(&(pgtable->swpg_rwlock));
+ rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
+ rwtype = RW_WRITER;
+ goto again;
+ }
+ rwtype = RW_WRITER;
+ pgtable_zero(immu, new);
next = new;
new = NULL;
- if (level == 2) {
- /* leaf cannot have next_array */
- kmem_free(next->swpg_next_array,
- IMMU_PAGESIZE);
- next->swpg_next_array = NULL;
- }
(pgtable->swpg_next_array)[idx] = next;
PDE_set_one(immu, hwp, next, rdip, immu_flags);
+ set = B_TRUE;
+ rw_downgrade(&(pgtable->swpg_rwlock));
+ rwtype = RW_READER;
} else {
hw_pdte_t pde = *hwp;
+#ifndef BUGGY_DRIVERS
+ /*
+ * If buggy driver we already set permission
+ * READ+WRITE so nothing to do for that case
+ * XXX Check that read writer perms change before
+ * actually setting perms. Also need to hold lock
+ */
if (immu_flags & IMMU_FLAGS_READ)
PDTE_SET_READ(pde);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pde);
-#ifdef BUGGY_DRIVERS
-/* If buggy driver we already set permission READ+WRITE so nothing to do */
#endif
*hwp = pde;
@@ -2336,13 +2415,15 @@ PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
== B_TRUE);
(xlate - 1)->xlt_pgtable = next;
-
+ ASSERT(rwtype == RW_READER);
rw_exit(&(pgtable->swpg_rwlock));
}
if (new) {
- pgtable_free(immu, new, domain);
+ pgtable_free(immu, new);
}
+
+ return (set);
}
/*
@@ -2357,35 +2438,38 @@ PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
* rdip: requesting device
* immu_flags: flags
*/
-static void
-dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t spaddr,
- uint64_t npages, dev_info_t *rdip, immu_flags_t immu_flags)
+static boolean_t
+dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snvpages,
+ dcookie_t *dcookies, int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
{
uint64_t dvma;
- paddr_t paddr;
uint64_t n;
int nlevels = immu->immu_dvma_nlevels;
xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
+ boolean_t pde_set = B_FALSE;
ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS);
- ASSERT(spaddr % IMMU_PAGESIZE == 0);
ASSERT(sdvma % IMMU_PAGESIZE == 0);
- ASSERT(npages);
+ ASSERT(snvpages);
- n = npages;
+ n = snvpages;
dvma = sdvma;
- paddr = spaddr;
while (n > 0) {
xlate_setup(immu, dvma, xlate, nlevels, rdip);
/* Lookup or allocate PGDIRs and PGTABLEs if necessary */
- PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags);
+ if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
+ == B_TRUE) {
+ pde_set = B_TRUE;
+ }
/* set all matching ptes that fit into this leaf pgtable */
- PTE_set_all(immu, domain, &xlate[1], &dvma, &paddr, &n, rdip,
- immu_flags);
+ PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
+ dcount, rdip, immu_flags);
}
+
+ return (pde_set);
}
/*
@@ -2400,30 +2484,34 @@ dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t spaddr,
* rdip: requesting device
*/
static void
-dvma_unmap(immu_t *immu, domain_t *domain, uint64_t dvma, uint64_t snpages,
+dvma_unmap(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snpages,
dev_info_t *rdip)
{
int nlevels = immu->immu_dvma_nlevels;
xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
- uint64_t npages;
+ uint64_t n;
+ uint64_t dvma;
ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS);
- ASSERT(dvma != 0);
- ASSERT(dvma % IMMU_PAGESIZE == 0);
+ ASSERT(sdvma != 0);
+ ASSERT(sdvma % IMMU_PAGESIZE == 0);
ASSERT(snpages);
- for (npages = snpages; npages > 0; npages--) {
+ dvma = sdvma;
+ n = snpages;
+
+ while (n > 0) {
/* setup the xlate array */
xlate_setup(immu, dvma, xlate, nlevels, rdip);
/* just lookup existing pgtables. Should never fail */
PDE_lookup(immu, domain, xlate, nlevels, rdip);
- /* XXX should be more efficient - batch clear */
- PTE_clear_one(immu, domain, &xlate[1], dvma, rdip);
-
- dvma += IMMU_PAGESIZE;
+ /* clear all matching ptes that fit into this leaf pgtable */
+ PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
}
+
+ /* No need to flush IOTLB after unmap */
}
static uint64_t
@@ -2431,7 +2519,7 @@ dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages)
{
ddi_dma_attr_t *dma_attr;
uint64_t dvma;
- size_t xsize, align, nocross;
+ size_t xsize, align;
uint64_t minaddr, maxaddr;
ASSERT(domain->dom_maptype != IMMU_MAPTYPE_UNITY);
@@ -2442,9 +2530,9 @@ dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages)
/* parameters */
xsize = npages * IMMU_PAGESIZE;
align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
- nocross = (size_t)(dma_attr->dma_attr_seg + 1);
minaddr = dma_attr->dma_attr_addr_lo;
maxaddr = dma_attr->dma_attr_addr_hi + 1;
+ /* nocross is checked in cookie_update() */
/* handle the rollover cases */
if (maxaddr < dma_attr->dma_attr_addr_hi) {
@@ -2455,7 +2543,7 @@ dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages)
* allocate from vmem arena.
*/
dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
- xsize, align, 0, nocross, (void *)(uintptr_t)minaddr,
+ xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
(void *)(uintptr_t)maxaddr, VM_NOSLEEP);
ASSERT(dvma);
@@ -2485,25 +2573,31 @@ dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
/*ARGSUSED*/
static void
cookie_free(rootnex_dma_t *dma, immu_t *immu, domain_t *domain,
- dev_info_t *ddip, dev_info_t *rdip)
+ dev_info_t *rdip)
{
int i;
uint64_t dvma;
uint64_t npages;
dvcookie_t *dvcookies = dma->dp_dvcookies;
- uint64_t dvmax = dma->dp_dvmax;
ASSERT(dma->dp_max_cookies);
ASSERT(dma->dp_max_dcookies);
ASSERT(dma->dp_dvmax < dma->dp_max_cookies);
ASSERT(dma->dp_dmax < dma->dp_max_dcookies);
- for (i = 0; i <= dvmax; i++) {
- dvma = dvcookies[i].dvck_dvma;
- npages = dvcookies[i].dvck_npages;
- dvma_unmap(immu, domain, dvma, npages, rdip);
- dvma_free(domain, dvma, npages);
+ /*
+ * we allocated DVMA in a single chunk. Calculate total number
+ * of pages
+ */
+ for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) {
+ npages += dvcookies[i].dvck_npages;
}
+ dvma = dvcookies[0].dvck_dvma;
+#ifdef DEBUG
+ /* Unmap only in DEBUG mode */
+ dvma_unmap(immu, domain, dvma, npages, rdip);
+#endif
+ dvma_free(domain, dvma, npages);
kmem_free(dma->dp_dvcookies, sizeof (dvcookie_t) * dma->dp_max_cookies);
dma->dp_dvcookies = NULL;
@@ -2579,17 +2673,15 @@ cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq,
if (max_cookies > prealloc) {
cookies = kmem_zalloc(cookie_size, kmflag);
if (cookies == NULL) {
- kmem_free(dvcookies, sizeof (dvcookie_t) *
- max_cookies);
- kmem_free(dcookies, sizeof (dcookie_t) *
- max_dcookies);
+ kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies);
+ kmem_free(dcookies, sizeof (dcookie_t) * max_dcookies);
goto fail;
}
dma->dp_need_to_free_cookie = B_TRUE;
} else {
/* the preallocated buffer fits this size */
cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
- bzero(cookies, sizeof (ddi_dma_cookie_t) * max_cookies);
+ bzero(cookies, sizeof (ddi_dma_cookie_t)* max_cookies);
dma->dp_need_to_free_cookie = B_FALSE;
}
@@ -2601,7 +2693,6 @@ cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq,
dma->dp_max_dcookies = max_dcookies;
dma->dp_dvmax = 0;
dma->dp_dmax = 0;
-
sinfo->si_max_pages = dma->dp_max_cookies;
return (DDI_SUCCESS);
@@ -2617,13 +2708,14 @@ fail:
dma->dp_dmax = 0;
dma->dp_need_to_free_cookie = B_FALSE;
sinfo->si_max_pages = 0;
+
return (DDI_FAILURE);
}
/*ARGSUSED*/
static void
cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr,
- int64_t psize, uint64_t maxseg)
+ int64_t psize, uint64_t maxseg, size_t nocross)
{
dvcookie_t *dvcookies = dma->dp_dvcookies;
dcookie_t *dcookies = dma->dp_dcookies;
@@ -2642,34 +2734,40 @@ cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr,
/*
* check to see if this page would put us
- * over the max cookie size
+ * over the max cookie size.
*/
if (cookies[dvmax].dmac_size + psize > maxseg) {
- dvcookies[dvmax].dvck_eidx = dmax;
dvmax++; /* use the next dvcookie */
- dmax++; /* also mean we use the next dcookie */
- dvcookies[dvmax].dvck_sidx = dmax;
-
+ dmax++; /* also means we use the next dcookie */
ASSERT(dvmax < dma->dp_max_cookies);
ASSERT(dmax < dma->dp_max_dcookies);
}
/*
- * If the cookie is mapped or empty
+ * check to see if this page would make us larger than
+ * the nocross boundary. If yes, create a new cookie
+ * otherwise we will fail later with vmem_xalloc()
+ * due to overconstrained alloc requests
+ * nocross == 0 implies no nocross constraint.
*/
- if (dvcookies[dvmax].dvck_dvma != 0 ||
- dvcookies[dvmax].dvck_npages == 0) {
- /* if mapped, we need a new empty one */
- if (dvcookies[dvmax].dvck_dvma != 0) {
- dvcookies[dvmax].dvck_eidx = dmax;
- dvmax++;
- dmax++;
- dvcookies[dvmax].dvck_sidx = dma->dp_dmax;
+ if (nocross > 0) {
+ ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE
+ <= nocross);
+ if ((dvcookies[dvmax].dvck_npages + 1) * IMMU_PAGESIZE
+ > nocross) {
+ dvmax++; /* use the next dvcookie */
+ dmax++; /* also means we use the next dcookie */
ASSERT(dvmax < dma->dp_max_cookies);
ASSERT(dmax < dma->dp_max_dcookies);
}
+ ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE
+ <= nocross);
+ }
- /* ok, we have an empty cookie */
+ /*
+ * If the cookie is empty
+ */
+ if (dvcookies[dvmax].dvck_npages == 0) {
ASSERT(cookies[dvmax].dmac_size == 0);
ASSERT(dvcookies[dvmax].dvck_dvma == 0);
ASSERT(dvcookies[dvmax].dvck_npages
@@ -2683,7 +2781,7 @@ cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr,
dcookies[dmax].dck_npages = 1;
cookies[dvmax].dmac_size = psize;
} else {
- /* Unmapped cookie but not empty. Add to it */
+ /* Cookie not empty. Add to it */
cookies[dma->dp_dvmax].dmac_size += psize;
ASSERT(dvcookies[dma->dp_dvmax].dvck_dvma == 0);
dvcookies[dma->dp_dvmax].dvck_npages++;
@@ -2712,55 +2810,42 @@ cookie_finalize(ddi_dma_impl_t *hp, immu_t *immu, domain_t *domain,
dev_info_t *rdip, immu_flags_t immu_flags)
{
int i;
- int j;
rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
dvcookie_t *dvcookies = dma->dp_dvcookies;
dcookie_t *dcookies = dma->dp_dcookies;
ddi_dma_cookie_t *cookies = dma->dp_cookies;
- paddr_t paddr;
uint64_t npages;
uint64_t dvma;
+ boolean_t pde_set;
- for (i = 0; i <= dma->dp_dvmax; i++) {
- /* Finish up the last cookie */
- if (i == dma->dp_dvmax) {
- dvcookies[i].dvck_eidx = dma->dp_dmax;
- }
- if ((dvma = dvcookies[i].dvck_dvma) != 0) {
- cookies[i].dmac_laddress = dvma;
- ASSERT(cookies[i].dmac_size != 0);
- cookies[i].dmac_type = 0;
- for (j = dvcookies[i].dvck_sidx;
- j <= dvcookies[i].dvck_eidx; j++) {
- ASSERT(dcookies[j].dck_paddr != 0);
- ASSERT(dcookies[j].dck_npages != 0);
- }
- continue;
- }
+ /* First calculate the total number of pages required */
+ for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) {
+ npages += dvcookies[i].dvck_npages;
+ }
- dvma = dvma_alloc(hp, domain, dvcookies[i].dvck_npages);
+ /* Now allocate dvma */
+ dvma = dvma_alloc(hp, domain, npages);
- dvcookies[i].dvck_dvma = dvma;
+ /* Now map the dvma */
+ pde_set = dvma_map(immu, domain, dvma, npages, dcookies,
+ dma->dp_dmax + 1, rdip, immu_flags);
+
+ /* Invalidate the IOTLB */
+ immu_regs_iotlb_flush(immu, domain->dom_did, dvma, npages,
+ pde_set == B_TRUE ? TLB_IVA_WHOLE : TLB_IVA_LEAF, IOTLB_PSI);
- /* Set "real" cookies addr, cookie size already set */
+ /* Now setup dvcookies and real cookie addresses */
+ for (i = 0; i <= dma->dp_dvmax; i++) {
+ dvcookies[i].dvck_dvma = dvma;
cookies[i].dmac_laddress = dvma;
ASSERT(cookies[i].dmac_size != 0);
cookies[i].dmac_type = 0;
-
- for (j = dvcookies[i].dvck_sidx;
- j <= dvcookies[i].dvck_eidx; j++) {
-
- paddr = dcookies[j].dck_paddr;
- npages = dcookies[j].dck_npages;
-
- ASSERT(paddr);
- ASSERT(npages);
-
- dvma_map(immu, domain, dvma, paddr, npages,
- rdip, immu_flags);
- dvma += npages * IMMU_PAGESIZE;
- }
+ dvma += (dvcookies[i].dvck_npages * IMMU_PAGESIZE);
}
+
+#ifdef TEST
+ immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, 0, IOTLB_DSI);
+#endif
}
/*
@@ -2771,7 +2856,6 @@ cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
ddi_dma_attr_t *a, immu_t *immu, domain_t *domain, dev_info_t *rdip,
uint_t prealloc_count, immu_flags_t immu_flags)
{
-
ddi_dma_atyp_t buftype;
uint64_t offset;
page_t **pparray;
@@ -2785,6 +2869,7 @@ cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
rootnex_sglinfo_t *sglinfo;
ddi_dma_obj_t *dmar_object;
rootnex_dma_t *dma;
+ size_t nocross;
dma = (rootnex_dma_t *)hp->dmai_private;
sglinfo = &(dma->dp_sglinfo);
@@ -2794,6 +2879,7 @@ cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
buftype = dmar_object->dmao_type;
size = dmar_object->dmao_size;
+ nocross = (size_t)(a->dma_attr_seg + 1);
/*
* Allocate cookie, dvcookie and dcookie
@@ -2842,7 +2928,7 @@ cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
/*
* setup dvcookie and dcookie for [paddr, paddr+psize)
*/
- cookie_update(domain, dma, paddr, psize, maxseg);
+ cookie_update(domain, dma, paddr, psize, maxseg, nocross);
size -= psize;
while (size > 0) {
@@ -2867,7 +2953,7 @@ cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
/*
* set dvcookie and dcookie for [paddr, paddr+psize)
*/
- cookie_update(domain, dma, paddr, psize, maxseg);
+ cookie_update(domain, dma, paddr, psize, maxseg, nocross);
size -= psize;
}
@@ -2955,6 +3041,8 @@ immu_dvma_physmem_update(uint64_t addr, uint64_t size)
{
uint64_t start;
uint64_t npages;
+ int dcount;
+ dcookie_t dcookies[1] = {0};
domain_t *domain;
/*
@@ -2974,13 +3062,17 @@ immu_dvma_physmem_update(uint64_t addr, uint64_t size)
start = IMMU_ROUNDOWN(addr);
npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
- dvma_map(domain->dom_immu, domain, start, start,
- npages, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
+ dcookies[0].dck_paddr = start;
+ dcookies[0].dck_npages = npages;
+ dcount = 1;
+ (void) dvma_map(domain->dom_immu, domain, start, npages,
+ dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
}
mutex_exit(&immu_domain_lock);
}
+
int
immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags)
@@ -2989,6 +3081,9 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
dev_info_t *ddip;
domain_t *domain;
immu_t *immu;
+ dcookie_t dcookies[1] = {0};
+ int dcount = 0;
+ boolean_t pde_set = B_TRUE;
int r = DDI_FAILURE;
ASSERT(immu_enable == B_TRUE);
@@ -3012,6 +3107,42 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
immu_flags |= dma_to_immu_flags(dmareq);
+ immu = immu_dvma_get_immu(rdip, immu_flags);
+ if (immu == NULL) {
+ /*
+ * possible that there is no IOMMU unit for this device
+ * - BIOS bugs are one example.
+ */
+ ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
+ return (DDI_DMA_NORESOURCES);
+ }
+
+
+ /*
+ * redirect isa devices attached under lpc to lpc dip
+ */
+ if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
+ rdip = get_lpc_devinfo(immu, rdip, immu_flags);
+ if (rdip == NULL) {
+ ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
+ /*NOTREACHED*/
+ }
+ }
+
+ /* Reset immu, as redirection can change IMMU */
+ immu = NULL;
+
+ /*
+ * for gart, redirect to the real graphic devinfo
+ */
+ if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
+ rdip = get_gfx_devinfo(rdip);
+ if (rdip == NULL) {
+ ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
+ /*NOTREACHED*/
+ }
+ }
+
/*
* Setup DVMA domain for the device. This does
* work only the first time we do DVMA for a
@@ -3040,7 +3171,6 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
ASSERT(immu);
if (domain->dom_did == IMMU_UNITY_DID) {
ASSERT(domain == immu->immu_unity_domain);
-
/* mapping already done. Let rootnex create cookies */
r = DDI_DMA_USE_PHYSICAL;
} else if (immu_flags & IMMU_FLAGS_DMAHDL) {
@@ -3055,18 +3185,22 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
"DMA handle (%p): NULL attr", hp);
/*NOTREACHED*/
}
+
if (cookie_create(hp, dmareq, attr, immu, domain, rdip,
prealloc_count, immu_flags) != DDI_SUCCESS) {
ddi_err(DER_MODE, rdip, "dvcookie_alloc: failed");
return (DDI_DMA_NORESOURCES);
}
-
- /* flush write buffer */
- immu_regs_wbf_flush(immu);
r = DDI_DMA_MAPPED;
} else if (immu_flags & IMMU_FLAGS_MEMRNG) {
- dvma_map(immu, domain, mrng->mrng_start, mrng->mrng_start,
- mrng->mrng_npages, rdip, immu_flags);
+ dcookies[0].dck_paddr = mrng->mrng_start;
+ dcookies[0].dck_npages = mrng->mrng_npages;
+ dcount = 1;
+ pde_set = dvma_map(immu, domain, mrng->mrng_start,
+ mrng->mrng_npages, dcookies, dcount, rdip, immu_flags);
+ immu_regs_iotlb_flush(immu, domain->dom_did, mrng->mrng_start,
+ mrng->mrng_npages, pde_set == B_TRUE ?
+ TLB_IVA_WHOLE : TLB_IVA_LEAF, IOTLB_PSI);
r = DDI_DMA_MAPPED;
} else {
ddi_err(DER_PANIC, rdip, "invalid flags for immu_dvma_map()");
@@ -3082,12 +3216,6 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
return (DDI_DMA_NORESOURCES);
}
- /* flush caches */
- rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
- immu_regs_context_flush(immu, 0, 0, domain->dom_did, CONTEXT_DSI);
- rw_exit(&(immu->immu_ctx_rwlock));
- immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, TLB_IVA_WHOLE,
- IOTLB_DSI);
immu_regs_wbf_flush(immu);
return (r);
@@ -3133,6 +3261,42 @@ immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip)
}
immu_flags = dma->dp_sleep_flags;
+ immu = immu_dvma_get_immu(rdip, immu_flags);
+ if (immu == NULL) {
+ /*
+ * possible that there is no IOMMU unit for this device
+ * - BIOS bugs are one example.
+ */
+ ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
+ return (DDI_DMA_NORESOURCES);
+ }
+
+
+ /*
+ * redirect isa devices attached under lpc to lpc dip
+ */
+ if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
+ rdip = get_lpc_devinfo(immu, rdip, immu_flags);
+ if (rdip == NULL) {
+ ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
+ /*NOTREACHED*/
+ }
+ }
+
+ /* Reset immu, as redirection can change IMMU */
+ immu = NULL;
+
+ /*
+ * for gart, redirect to the real graphic devinfo
+ */
+ if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
+ rdip = get_gfx_devinfo(rdip);
+ if (rdip == NULL) {
+ ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
+ /*NOTREACHED*/
+ }
+ }
+
ddip = NULL;
domain = device_domain(rdip, &ddip, immu_flags);
if (domain == NULL || domain->dom_did == 0 || ddip == NULL) {
@@ -3163,15 +3327,9 @@ immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip)
/*NOTREACHED*/
}
- /* free all cookies */
- cookie_free(dma, immu, domain, ddip, rdip);
+ cookie_free(dma, immu, domain, rdip);
- /* flush caches */
- rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
- immu_regs_context_flush(immu, 0, 0, domain->dom_did, CONTEXT_DSI);
- rw_exit(&(immu->immu_ctx_rwlock));
- immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, TLB_IVA_WHOLE,
- IOTLB_DSI);
+ /* No invalidation needed for unmap */
immu_regs_wbf_flush(immu);
return (DDI_SUCCESS);
@@ -3181,10 +3339,10 @@ immu_devi_t *
immu_devi_get(dev_info_t *rdip)
{
immu_devi_t *immu_devi;
+ volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
- mutex_enter(&DEVI(rdip)->devi_lock);
- immu_devi = DEVI(rdip)->devi_iommu;
- mutex_exit(&DEVI(rdip)->devi_lock);
-
+ /* Just want atomic reads. No need for lock */
+ immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
+ 0);
return (immu_devi);
}
diff --git a/usr/src/uts/i86pc/io/immu_regs.c b/usr/src/uts/i86pc/io/immu_regs.c
index 1c7fe2c65e..9f2b69bd13 100644
--- a/usr/src/uts/i86pc/io/immu_regs.c
+++ b/usr/src/uts/i86pc/io/immu_regs.c
@@ -31,6 +31,7 @@
#include <sys/archsystm.h>
#include <sys/x86_archext.h>
#include <sys/spl.h>
+#include <sys/sysmacros.h>
#include <sys/immu.h>
#define get_reg32(immu, offset) ddi_get32((immu)->immu_regs_handle, \
@@ -87,8 +88,6 @@ iotlb_flush(immu_t *immu, uint_t domain_id,
uint_t iva_offset, iotlb_offset;
uint64_t status = 0;
- ASSERT(MUTEX_HELD(&(immu->immu_regs_lock)));
-
/* no lock needed since cap and excap fields are RDONLY */
iva_offset = IMMU_ECAP_GET_IRO(immu->immu_regs_excap);
iotlb_offset = iva_offset + 8;
@@ -110,16 +109,13 @@ iotlb_flush(immu_t *immu, uint_t domain_id,
*/
switch (type) {
case IOTLB_PSI:
- if (!IMMU_CAP_GET_PSI(immu->immu_regs_cap) ||
- (am > IMMU_CAP_GET_MAMV(immu->immu_regs_cap)) ||
- (addr & IMMU_PAGEOFFSET)) {
- goto ignore_psi;
- }
+ ASSERT(IMMU_CAP_GET_PSI(immu->immu_regs_cap));
+ ASSERT(am <= IMMU_CAP_GET_MAMV(immu->immu_regs_cap));
+ ASSERT(!(addr & IMMU_PAGEOFFSET));
command |= TLB_INV_PAGE | TLB_INV_IVT |
TLB_INV_DID(domain_id);
iva = addr | am | TLB_IVA_HINT(hint);
break;
-ignore_psi:
case IOTLB_DSI:
command |= TLB_INV_DOMAIN | TLB_INV_IVT |
TLB_INV_DID(domain_id);
@@ -133,9 +129,7 @@ ignore_psi:
return;
}
- /* verify there is no pending command */
- wait_completion(immu, iotlb_offset, get_reg64,
- (!(status & TLB_INV_IVT)), status);
+ ASSERT(!(status & TLB_INV_IVT));
if (iva)
put_reg64(immu, iva_offset, iva);
put_reg64(immu, iotlb_offset, command);
@@ -148,55 +142,55 @@ ignore_psi:
* iotlb page specific invalidation
*/
static void
-iotlb_psi(immu_t *immu, uint_t domain_id,
- uint64_t dvma, uint_t count, uint_t hint)
+iotlb_psi(immu_t *immu, uint_t did, uint64_t dvma, uint_t snpages,
+ uint_t hint)
{
- uint_t am = 0;
- uint_t max_am = 0;
- uint64_t align = 0;
- uint64_t dvma_pg = 0;
- uint_t used_count = 0;
+ int dvma_am;
+ int npg_am;
+ int max_am;
+ int am;
+ uint64_t align;
+ int npages_left;
+ int npages;
+ int i;
+
+ ASSERT(IMMU_CAP_GET_PSI(immu->immu_regs_cap));
+ ASSERT(dvma % IMMU_PAGESIZE == 0);
+
+ max_am = IMMU_CAP_GET_MAMV(immu->immu_regs_cap);
mutex_enter(&(immu->immu_regs_lock));
- /* choose page specified invalidation */
- if (IMMU_CAP_GET_PSI(immu->immu_regs_cap)) {
- /* MAMV is valid only if PSI is set */
- max_am = IMMU_CAP_GET_MAMV(immu->immu_regs_cap);
- while (count != 0) {
- /* First calculate alignment of DVMA */
- dvma_pg = IMMU_BTOP(dvma);
- ASSERT(dvma_pg != NULL);
- ASSERT(count >= 1);
- for (align = 1; (dvma_pg & align) == 0; align <<= 1)
- ;
- /* truncate count to the nearest power of 2 */
- for (used_count = 1, am = 0; count >> used_count != 0;
- used_count <<= 1, am++)
+ npages_left = snpages;
+ for (i = 0; i < immu_flush_gran && npages_left > 0; i++) {
+ /* First calculate alignment of DVMA */
+
+ if (dvma == 0) {
+ dvma_am = max_am;
+ } else {
+ for (align = (1 << 12), dvma_am = 1;
+ (dvma & align) == 0; align <<= 1, dvma_am++)
;
- if (am > max_am) {
- am = max_am;
- used_count = 1 << am;
- }
- if (align >= used_count) {
- iotlb_flush(immu, domain_id,
- dvma, am, hint, IOTLB_PSI);
- } else {
- /* align < used_count */
- used_count = align;
- for (am = 0; (1 << am) != used_count; am++)
- ;
- iotlb_flush(immu, domain_id,
- dvma, am, hint, IOTLB_PSI);
- }
- count -= used_count;
- dvma = (dvma_pg + used_count) << IMMU_PAGESHIFT;
+ dvma_am--;
}
- } else {
- /* choose domain invalidation */
- iotlb_flush(immu, domain_id, dvma, 0, 0, IOTLB_DSI);
+
+ /* Calculate the npg_am */
+ npages = npages_left;
+ for (npg_am = 0, npages >>= 1; npages; npages >>= 1, npg_am++)
+ ;
+
+ am = MIN(max_am, MIN(dvma_am, npg_am));
+
+ iotlb_flush(immu, did, dvma, am, hint, IOTLB_PSI);
+
+ npages = (1 << am);
+ npages_left -= npages;
+ dvma += (npages * IMMU_PAGESIZE);
}
+ if (npages_left) {
+ iotlb_flush(immu, did, 0, 0, 0, IOTLB_DSI);
+ }
mutex_exit(&(immu->immu_regs_lock));
}
@@ -386,6 +380,10 @@ setup_regs(immu_t *immu)
}
}
+ /* Setup SNP and TM reserved fields */
+ immu->immu_SNP_reserved = immu_regs_is_SNP_reserved(immu);
+ immu->immu_TM_reserved = immu_regs_is_TM_reserved(immu);
+
/*
* Check for Mobile 4 series chipset
*/
@@ -637,15 +635,15 @@ immu_regs_wbf_flush(immu_t *immu)
void
immu_regs_cpu_flush(immu_t *immu, caddr_t addr, uint_t size)
{
- uint_t i;
+ uint64_t i;
ASSERT(immu);
if (immu->immu_dvma_coherent == B_TRUE)
return;
- for (i = 0; i < size; i += x86_clflush_size) {
- clflush_insn(addr+i);
+ for (i = 0; i < size; i += x86_clflush_size, addr += x86_clflush_size) {
+ clflush_insn(addr);
}
mfence_insn();
@@ -657,10 +655,26 @@ immu_regs_iotlb_flush(immu_t *immu, uint_t domainid, uint64_t dvma,
{
ASSERT(immu);
+#ifndef TEST
+ if (type == IOTLB_PSI && !IMMU_CAP_GET_PSI(immu->immu_regs_cap)) {
+ dvma = 0;
+ count = 0;
+ hint = 0;
+ type = IOTLB_DSI;
+ }
+#else
+ if (type == IOTLB_PSI) {
+ dvma = 0;
+ count = 0;
+ hint = 0;
+ type = IOTLB_DSI;
+ }
+#endif
+
+
switch (type) {
case IOTLB_PSI:
ASSERT(domainid > 0);
- ASSERT(dvma > 0);
ASSERT(count > 0);
iotlb_psi(immu, domainid, dvma, count, hint);
break;
@@ -728,9 +742,7 @@ immu_regs_context_flush(immu_t *immu, uint8_t function_mask,
}
mutex_enter(&(immu->immu_regs_lock));
- /* verify there is no pending command */
- wait_completion(immu, IMMU_REG_CONTEXT_CMD, get_reg64,
- (!(status & CCMD_INV_ICC)), status);
+ ASSERT(!(get_reg64(immu, IMMU_REG_CONTEXT_CMD) & CCMD_INV_ICC));
put_reg64(immu, IMMU_REG_CONTEXT_CMD, command);
wait_completion(immu, IMMU_REG_CONTEXT_CMD, get_reg64,
(!(status & CCMD_INV_ICC)), status);
diff --git a/usr/src/uts/i86pc/sys/immu.h b/usr/src/uts/i86pc/sys/immu.h
index f807ad8c42..ea4da80ff3 100644
--- a/usr/src/uts/i86pc/sys/immu.h
+++ b/usr/src/uts/i86pc/sys/immu.h
@@ -342,7 +342,7 @@ typedef enum context_inv {
#define ADDR_AM_OFFSET(n, m) ((n) & (ADDR_AM_MAX(m) - 1))
/* dmar fault event */
-#define IMMU_INTR_IPL (8)
+#define IMMU_INTR_IPL (4)
#define IMMU_REG_FEVNT_CON_IM_SHIFT (31)
#define IMMU_ALLOC_RESOURCE_DELAY (drv_usectohz(5000))
@@ -521,6 +521,8 @@ typedef struct immu {
/* IOMMU register related */
kmutex_t immu_regs_lock;
+ kcondvar_t immu_regs_cv;
+ boolean_t immu_regs_busy;
boolean_t immu_regs_setup;
boolean_t immu_regs_running;
boolean_t immu_regs_quiesced;
@@ -541,6 +543,8 @@ typedef struct immu {
int immu_dvma_agaw;
int immu_dvma_nlevels;
boolean_t immu_dvma_coherent;
+ boolean_t immu_TM_reserved;
+ boolean_t immu_SNP_reserved;
/* DVMA context related */
krwlock_t immu_ctx_rwlock;
@@ -588,6 +592,8 @@ typedef enum immu_maptype {
IMMU_MAPTYPE_XLATE
} immu_maptype_t;
+#define IMMU_COOKIE_HASHSZ (512)
+
/*
* domain_t
*
@@ -606,13 +612,12 @@ typedef struct domain {
pgtable_t *dom_pgtable_root;
krwlock_t dom_pgtable_rwlock;
- /* list of pgtables for this domain */
- list_t dom_pglist;
-
/* list node for list of domains (unity or xlate) */
list_node_t dom_maptype_node;
/* list node for list of domains off immu */
list_node_t dom_immu_node;
+
+ mod_hash_t *dom_cookie_hash;
} domain_t;
typedef enum immu_pcib {
@@ -682,6 +687,7 @@ typedef struct immu_arg {
extern dev_info_t *root_devinfo;
extern kmutex_t immu_lock;
extern list_t immu_list;
+extern void *immu_pgtable_cache;
extern boolean_t immu_setup;
extern boolean_t immu_running;
extern kmutex_t ioapic_drhd_lock;
@@ -695,7 +701,6 @@ extern boolean_t immu_dvma_enable;
extern boolean_t immu_gfxdvma_enable;
extern boolean_t immu_intrmap_enable;
extern boolean_t immu_qinv_enable;
-extern boolean_t immu_mmio_safe;
/* various quirks that need working around */
extern boolean_t immu_quirk_usbpage0;
@@ -706,6 +711,9 @@ extern boolean_t immu_quirk_mobile4;
/* debug messages */
extern boolean_t immu_dmar_print;
+/* tunables */
+extern int64_t immu_flush_gran;
+
/* ################### Interfaces exported outside IOMMU code ############## */
void immu_init(void);
void immu_startup(void);
@@ -795,7 +803,8 @@ void immu_dvma_free(dvcookie_t *first_dvcookie, void *arg);
int immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags);
immu_devi_t *immu_devi_get(dev_info_t *dip);
immu_t *immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags);
-
+int pgtable_ctor(void *buf, void *arg, int kmflag);
+void pgtable_dtor(void *buf, void *arg);
/* immu_intrmap.c interfaces */
void immu_intrmap_setup(list_t *immu_list);
diff --git a/usr/src/uts/i86pc/sys/rootnex.h b/usr/src/uts/i86pc/sys/rootnex.h
index d9a003a298..df12346dfb 100644
--- a/usr/src/uts/i86pc/sys/rootnex.h
+++ b/usr/src/uts/i86pc/sys/rootnex.h
@@ -208,8 +208,6 @@ typedef struct rootnex_window_s {
typedef struct dvcookie {
uint64_t dvck_dvma;
uint64_t dvck_npages;
- uint64_t dvck_sidx;
- uint64_t dvck_eidx;
} dvcookie_t;
typedef struct dcookie {