summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86pc
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/i86pc')
-rw-r--r--usr/src/uts/i86pc/Makefile.files9
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc.shared2
-rw-r--r--usr/src/uts/i86pc/Makefile.rules2
-rw-r--r--usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c2
-rw-r--r--usr/src/uts/i86pc/cpu/authenticamd/authamd_main.c2
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c12
-rw-r--r--usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c2
-rw-r--r--usr/src/uts/i86pc/io/amd_iommu/amd_iommu_impl.c24
-rw-r--r--usr/src/uts/i86pc/io/apix/apix.c11
-rw-r--r--usr/src/uts/i86pc/io/apix/apix_utils.c3
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_quiesce.c15
-rw-r--r--usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE24
-rw-r--r--usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/i86pc/io/hpet_acpi.c2
-rw-r--r--usr/src/uts/i86pc/io/immu.c231
-rw-r--r--usr/src/uts/i86pc/io/immu_dmar.c39
-rw-r--r--usr/src/uts/i86pc/io/immu_dvma.c1587
-rw-r--r--usr/src/uts/i86pc/io/immu_intrmap.c21
-rw-r--r--usr/src/uts/i86pc/io/immu_qinv.c356
-rw-r--r--usr/src/uts/i86pc/io/immu_regs.c87
-rw-r--r--usr/src/uts/i86pc/io/isa.c2
-rw-r--r--usr/src/uts/i86pc/io/mp_platform_common.c1
-rw-r--r--usr/src/uts/i86pc/io/pci/pci_common.c2
-rw-r--r--usr/src/uts/i86pc/io/pci/pci_kstats.c2
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic.c5
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic_common.c205
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic_introp.c5
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic_timer.c399
-rw-r--r--usr/src/uts/i86pc/io/rootnex.c582
-rw-r--r--usr/src/uts/i86pc/ml/cpr_wakecode.s13
-rw-r--r--usr/src/uts/i86pc/ml/genassym.c7
-rw-r--r--usr/src/uts/i86pc/ml/interrupt.s2
-rw-r--r--usr/src/uts/i86pc/ml/locore.s9
-rw-r--r--usr/src/uts/i86pc/ml/mpcore.s23
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in2
-rw-r--r--usr/src/uts/i86pc/os/cpr_impl.c17
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c558
-rw-r--r--usr/src/uts/i86pc/os/cpupm/pwrnow.c7
-rw-r--r--usr/src/uts/i86pc/os/cpupm/speedstep.c11
-rw-r--r--usr/src/uts/i86pc/os/ddi_impl.c6
-rw-r--r--usr/src/uts/i86pc/os/fastboot.c13
-rw-r--r--usr/src/uts/i86pc/os/fpu_subr.c69
-rw-r--r--usr/src/uts/i86pc/os/intr.c2
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c5
-rw-r--r--usr/src/uts/i86pc/os/machdep.c10
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c24
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c17
-rw-r--r--usr/src/uts/i86pc/os/mp_pc.c6
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c74
-rw-r--r--usr/src/uts/i86pc/os/pci_mech1_amd.c2
-rw-r--r--usr/src/uts/i86pc/os/startup.c26
-rw-r--r--usr/src/uts/i86pc/os/trap.c19
-rw-r--r--usr/src/uts/i86pc/sys/apic.h25
-rw-r--r--usr/src/uts/i86pc/sys/apic_common.h7
-rw-r--r--usr/src/uts/i86pc/sys/apic_timer.h78
-rw-r--r--usr/src/uts/i86pc/sys/apix.h1
-rw-r--r--usr/src/uts/i86pc/sys/hpet_acpi.h2
-rw-r--r--usr/src/uts/i86pc/sys/immu.h203
-rw-r--r--usr/src/uts/i86pc/sys/machsystm.h2
-rw-r--r--usr/src/uts/i86pc/sys/rm_platter.h5
-rw-r--r--usr/src/uts/i86pc/sys/rootnex.h46
-rw-r--r--usr/src/uts/i86pc/sys/smp_impldefs.h2
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c11
-rw-r--r--usr/src/uts/i86pc/vm/htable.c2
-rw-r--r--usr/src/uts/i86pc/vm/vm_dep.h5
65 files changed, 2833 insertions, 2113 deletions
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 2758749056..a287bfd209 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -22,6 +22,8 @@
#
# Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
#
+# Copyright (c) 2010, Intel Corporation.
+#
# This Makefile defines file modules in the directory uts/i86pc
# and its children. These are the source files which are i86pc
# "implementation architecture" dependent.
@@ -187,10 +189,11 @@ PCI_E_NEXUS_OBJS += npe.o npe_misc.o
PCI_E_NEXUS_OBJS += pci_common.o pci_kstats.o pci_tools.o
PCINEXUS_OBJS += pci.o pci_common.o pci_kstats.o pci_tools.o
PCPLUSMP_OBJS += apic.o apic_regops.o psm_common.o apic_introp.o \
- mp_platform_common.o mp_platform_misc.o \
- hpet_acpi.o apic_common.o
+ mp_platform_common.o mp_platform_misc.o \
+ hpet_acpi.o apic_common.o apic_timer.o
APIX_OBJS += apix.o apic_regops.o psm_common.o apix_intr.o apix_utils.o \
- apix_irm.o mp_platform_common.o hpet_acpi.o apic_common.o
+ apix_irm.o mp_platform_common.o hpet_acpi.o apic_common.o \
+ apic_timer.o
ACPI_DRV_OBJS += acpi_drv.o acpi_video.o
diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared
index f41e91a4fc..9b910c35ef 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared
@@ -22,7 +22,7 @@
#
# uts/i86pc/Makefile.i86pc
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
#
#
# This makefile contains the common definitions for the i86pc unix
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index dfff27de9f..604a2fb2c5 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -20,7 +20,7 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
#
# This Makefile defines the build rules for the directory uts/i86pc
diff --git a/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c b/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c
index dd6c2dd616..36ea92669d 100644
--- a/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c
+++ b/usr/src/uts/i86pc/cpu/amd_opteron/ao_main.c
@@ -64,7 +64,7 @@ ao_ms_init(cmi_hdl_t hdl, void **datap)
if (ao_ms_support_disable || cmi_hdl_model(hdl) >= ao_model_limit)
return (ENOTSUP);
- if (!(x86_feature & X86_MCA))
+ if (!is_x86_feature(x86_featureset, X86FSET_MCA))
return (ENOTSUP);
if (cmi_hdl_rdmsr(hdl, IA32_MSR_MCG_CAP, &cap) != CMI_SUCCESS)
diff --git a/usr/src/uts/i86pc/cpu/authenticamd/authamd_main.c b/usr/src/uts/i86pc/cpu/authenticamd/authamd_main.c
index 46723b1437..311eb6d12f 100644
--- a/usr/src/uts/i86pc/cpu/authenticamd/authamd_main.c
+++ b/usr/src/uts/i86pc/cpu/authenticamd/authamd_main.c
@@ -492,7 +492,7 @@ authamd_init(cmi_hdl_t hdl, void **datap)
!authamd_supported(hdl))
return (ENOTSUP);
- if (!(x86_feature & X86_MCA))
+ if (!is_x86_feature(x86_featureset, X86FSET_MCA))
return (ENOTSUP);
if (cmi_hdl_rdmsr(hdl, IA32_MSR_MCG_CAP, &cap) != CMI_SUCCESS)
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
index 50ef45bec9..1b9e259bd8 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
@@ -1067,13 +1067,13 @@ gcpu_mca_init(cmi_hdl_t hdl)
return;
/*
- * CPU startup code only calls cmi_mca_init if x86_feature indicates
- * both MCA and MCE support (i.e., X86_MCA). P5, K6, and earlier
+ * CPU startup code only calls cmi_mca_init if x86_featureset indicates
+ * both MCA and MCE support (i.e., X86FSET_MCA). P5, K6, and earlier
* processors, which have their own more primitive way of doing
* machine checks, will not have cmi_mca_init called since their
* CPUID information will not indicate both MCA and MCE features.
*/
- ASSERT(x86_feature & X86_MCA);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_MCA));
/*
* Determine whether the IA32_MCG_CTL register is present. If it
@@ -2018,13 +2018,13 @@ gcpu_mca_fini(cmi_hdl_t hdl)
int i;
/*
- * CPU startup code only calls cmi_mca_init if x86_feature indicates
- * both MCA and MCE support (i.e., X86_MCA). P5, K6, and earlier
+ * CPU startup code only calls cmi_mca_init if x86_featureset indicates
+ * both MCA and MCE support (i.e., X86FSET_MCA). P5, K6, and earlier
* processors, which have their own more primitive way of doing
* machine checks, will not have cmi_mca_init called since their
* CPUID information will not indicate both MCA and MCE features.
*/
- if ((x86_feature & X86_MCA) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_MCA))
return;
#ifndef __xpv
/*
diff --git a/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c b/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c
index e696725f6a..2eb7faea63 100644
--- a/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c
+++ b/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c
@@ -112,7 +112,7 @@ gintel_init(cmi_hdl_t hdl, void **datap)
if (gintel_ms_support_disable)
return (ENOTSUP);
- if (!(x86_feature & X86_MCA))
+ if (!is_x86_feature(x86_featureset, X86FSET_MCA))
return (ENOTSUP);
nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0);
diff --git a/usr/src/uts/i86pc/io/amd_iommu/amd_iommu_impl.c b/usr/src/uts/i86pc/io/amd_iommu/amd_iommu_impl.c
index 5b466d75a3..59c004458b 100644
--- a/usr/src/uts/i86pc/io/amd_iommu/amd_iommu_impl.c
+++ b/usr/src/uts/i86pc/io/amd_iommu/amd_iommu_impl.c
@@ -58,6 +58,11 @@ static int amd_iommu_win(iommulib_handle_t handle, dev_info_t *dip,
dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
uint_t *ccountp);
+static int amd_iommu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
+static int amd_iommu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
static int amd_iommu_map(iommulib_handle_t handle, dev_info_t *dip,
dev_info_t *rdip, struct ddi_dma_req *dmareq,
ddi_dma_handle_t *dma_handle);
@@ -105,6 +110,8 @@ struct iommulib_ops amd_iommulib_ops = {
amd_iommu_unbindhdl,
amd_iommu_sync,
amd_iommu_win,
+ amd_iommu_mapobject,
+ amd_iommu_unmapobject,
amd_iommu_map,
amd_iommu_mctl
};
@@ -1913,6 +1920,23 @@ amd_iommu_mctl(iommulib_handle_t handle, dev_info_t *dip,
request, offp, lenp, objpp, cache_flags));
}
+/*ARGSUSED*/
+static int
+amd_iommu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
+{
+ return (DDI_ENOTSUP);
+}
+
+/*ARGSUSED*/
+static int
+amd_iommu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
+{
+ return (DDI_ENOTSUP);
+}
+
uint64_t
amd_iommu_reg_get64_workaround(uint64_t *regp, uint32_t bits)
{
diff --git a/usr/src/uts/i86pc/io/apix/apix.c b/usr/src/uts/i86pc/io/apix/apix.c
index f2fdc19282..8c4ccb6a0a 100644
--- a/usr/src/uts/i86pc/io/apix/apix.c
+++ b/usr/src/uts/i86pc/io/apix/apix.c
@@ -475,7 +475,7 @@ apix_init_intr()
if (nlvt >= 5) {
/* Enable performance counter overflow interrupt */
- if ((x86_feature & X86_MSR) != X86_MSR)
+ if (!is_x86_feature(x86_featureset, X86FSET_MSR))
apic_enable_cpcovf_intr = 0;
if (apic_enable_cpcovf_intr) {
if (apic_cpcovf_vect == 0) {
@@ -1609,7 +1609,7 @@ apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
dev_info_t *dip;
int inum, cap_ptr;
ddi_acc_handle_t handle;
- ddi_intr_msix_t *msix_p;
+ ddi_intr_msix_t *msix_p = NULL;
ushort_t msix_ctrl;
uintptr_t off;
uint32_t mask;
@@ -1628,7 +1628,7 @@ apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
/*
* Mask MSI-X. It's unmasked when MSI-X gets enabled.
*/
- if (vecp->v_type == APIX_TYPE_MSIX) {
+ if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
if ((dip = APIX_GET_DIP(vecp)) == NULL)
return (NULL);
inum = vecp->v_devp->dv_inum;
@@ -1651,10 +1651,13 @@ apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
}
*result = 0;
-
if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
*result = EIO;
+ /* Restore mask bit */
+ if (msix_p != NULL)
+ ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
+
return (newp);
}
diff --git a/usr/src/uts/i86pc/io/apix/apix_utils.c b/usr/src/uts/i86pc/io/apix/apix_utils.c
index 07fc3535b9..3342306db6 100644
--- a/usr/src/uts/i86pc/io/apix/apix_utils.c
+++ b/usr/src/uts/i86pc/io/apix/apix_utils.c
@@ -312,7 +312,8 @@ apix_pci_msi_enable_vector(apix_vector_t *vecp, dev_info_t *dip, int type,
msi_regs.mr_data = vector;
msi_regs.mr_addr = target_apic_id;
- intrmap_tbl[0] = vecp->v_intrmap_private;
+ for (i = 0; i < count; i++)
+ intrmap_tbl[i] = xv_intrmap_private(vecp->v_cpuid, vector + i);
apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
count, 0xff);
for (i = 0; i < count; i++)
diff --git a/usr/src/uts/i86pc/io/dr/dr_quiesce.c b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
index f3467f6eb1..663977da25 100644
--- a/usr/src/uts/i86pc/io/dr/dr_quiesce.c
+++ b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -222,6 +221,10 @@ dr_bypass_device(char *dname)
{
int i;
char **lname;
+
+ if (dname == NULL)
+ return (0);
+
/* check the bypass list */
for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
if (strcmp(dname, dr_bypass_list[i++]) == 0)
@@ -707,10 +710,6 @@ dr_signal_user(int sig)
void
dr_resume(dr_sr_handle_t *srh)
{
- dr_handle_t *handle;
-
- handle = srh->sr_dr_handlep;
-
switch (srh->sr_suspend_state) {
case DR_SRSTATE_FULL:
@@ -780,8 +779,6 @@ dr_resume(dr_sr_handle_t *srh)
break;
}
- i_ndi_allow_device_tree_changes(handle->h_ndi);
-
prom_printf("DR: resume COMPLETED\n");
}
@@ -798,8 +795,6 @@ dr_suspend(dr_sr_handle_t *srh)
force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
- i_ndi_block_device_tree_changes(&handle->h_ndi);
-
prom_printf("\nDR: suspending user threads...\n");
srh->sr_suspend_state = DR_SRSTATE_USER;
if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
diff --git a/usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE b/usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..ddb59bc2cf
--- /dev/null
+++ b/usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE
@@ -0,0 +1,24 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
diff --git a/usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE.descrip b/usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..8270f005f1
--- /dev/null
+++ b/usr/src/uts/i86pc/io/fipe/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+INTEL FIPE DRIVER
diff --git a/usr/src/uts/i86pc/io/hpet_acpi.c b/usr/src/uts/i86pc/io/hpet_acpi.c
index 8b33cafc8a..b618e491e7 100644
--- a/usr/src/uts/i86pc/io/hpet_acpi.c
+++ b/usr/src/uts/i86pc/io/hpet_acpi.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/hpet_acpi.h>
diff --git a/usr/src/uts/i86pc/io/immu.c b/usr/src/uts/i86pc/io/immu.c
index da2fdad9d4..ed21bf8655 100644
--- a/usr/src/uts/i86pc/io/immu.c
+++ b/usr/src/uts/i86pc/io/immu.c
@@ -53,7 +53,6 @@
#include <sys/spl.h>
#include <sys/archsystm.h>
#include <sys/x86_archext.h>
-#include <sys/rootnex.h>
#include <sys/avl.h>
#include <sys/bootconf.h>
#include <sys/bootinfo.h>
@@ -72,7 +71,7 @@ boolean_t immu_dvma_enable = B_TRUE;
/* accessed in other files so not static */
boolean_t immu_gfxdvma_enable = B_TRUE;
boolean_t immu_intrmap_enable = B_FALSE;
-boolean_t immu_qinv_enable = B_FALSE;
+boolean_t immu_qinv_enable = B_TRUE;
/* various quirks that need working around */
@@ -98,7 +97,6 @@ immu_flags_t immu_global_dvma_flags;
dev_info_t *root_devinfo;
kmutex_t immu_lock;
list_t immu_list;
-void *immu_pgtable_cache;
boolean_t immu_setup;
boolean_t immu_running;
boolean_t immu_quiesced;
@@ -112,6 +110,11 @@ static char **unity_driver_array;
static uint_t nunity;
static char **xlate_driver_array;
static uint_t nxlate;
+
+static char **premap_driver_array;
+static uint_t npremap;
+static char **nopremap_driver_array;
+static uint_t nnopremap;
/* ###################### Utility routines ############################# */
/*
@@ -124,8 +127,6 @@ check_mobile4(dev_info_t *dip, void *arg)
int vendor, device;
int *ip = (int *)arg;
- ASSERT(arg);
-
vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
"vendor-id", -1);
device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
@@ -133,7 +134,7 @@ check_mobile4(dev_info_t *dip, void *arg)
if (vendor == 0x8086 && device == 0x2a40) {
*ip = B_TRUE;
- ddi_err(DER_NOTE, dip, "IMMU: Mobile 4 chipset detected. "
+ ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. "
"Force setting IOMMU write buffer");
return (DDI_WALK_TERMINATE);
} else {
@@ -145,7 +146,12 @@ static void
map_bios_rsvd_mem(dev_info_t *dip)
{
struct memlist *mp;
- int e;
+
+ /*
+ * Make sure the domain for the device is set up before
+ * mapping anything.
+ */
+ (void) immu_dvma_device_setup(dip, 0);
memlist_read_lock();
@@ -153,15 +159,14 @@ map_bios_rsvd_mem(dev_info_t *dip)
while (mp != NULL) {
memrng_t mrng = {0};
- ddi_err(DER_LOG, dip, "IMMU: Mapping BIOS rsvd range "
+ ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range "
"[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
mp->ml_address + mp->ml_size);
mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
- e = immu_dvma_map(NULL, NULL, &mrng, 0, dip, IMMU_FLAGS_MEMRNG);
- ASSERT(e == DDI_DMA_MAPPED || e == DDI_DMA_USE_PHYSICAL);
+ (void) immu_map_memrange(dip, &mrng);
mp = mp->ml_next;
}
@@ -180,7 +185,8 @@ check_conf(dev_info_t *dip, void *arg)
immu_devi_t *immu_devi;
const char *dname;
uint_t i;
- int hasprop = 0;
+ int hasmapprop = 0, haspreprop = 0;
+ boolean_t old_premap;
/*
* Only PCI devices can use an IOMMU. Legacy ISA devices
@@ -196,25 +202,45 @@ check_conf(dev_info_t *dip, void *arg)
for (i = 0; i < nunity; i++) {
if (strcmp(unity_driver_array[i], dname) == 0) {
- hasprop = 1;
+ hasmapprop = 1;
immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
}
}
for (i = 0; i < nxlate; i++) {
if (strcmp(xlate_driver_array[i], dname) == 0) {
- hasprop = 1;
+ hasmapprop = 1;
immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
}
}
+ old_premap = immu_devi->imd_use_premap;
+
+ for (i = 0; i < nnopremap; i++) {
+ if (strcmp(nopremap_driver_array[i], dname) == 0) {
+ haspreprop = 1;
+ immu_devi->imd_use_premap = B_FALSE;
+ }
+ }
+
+ for (i = 0; i < npremap; i++) {
+ if (strcmp(premap_driver_array[i], dname) == 0) {
+ haspreprop = 1;
+ immu_devi->imd_use_premap = B_TRUE;
+ }
+ }
+
/*
* Report if we changed the value from the default.
*/
- if (hasprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
+ if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
ddi_err(DER_LOG, dip, "using %s DVMA mapping",
immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
+
+ if (haspreprop && (immu_devi->imd_use_premap != old_premap))
+ ddi_err(DER_LOG, dip, "%susing premapped DVMA space",
+ immu_devi->imd_use_premap ? "" : "not ");
}
/*
@@ -263,11 +289,10 @@ check_lpc(dev_info_t *dip, void *arg)
immu_devi_t *immu_devi;
immu_devi = immu_devi_get(dip);
- ASSERT(immu_devi);
if (immu_devi->imd_lpc == B_TRUE) {
- ddi_err(DER_LOG, dip, "IMMU: Found LPC device");
+ ddi_err(DER_LOG, dip, "iommu: Found LPC device");
/* This will put the immu_devi on the LPC "specials" list */
- (void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
+ (void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP);
}
}
@@ -281,10 +306,9 @@ check_gfx(dev_info_t *dip, void *arg)
immu_devi_t *immu_devi;
immu_devi = immu_devi_get(dip);
- ASSERT(immu_devi);
if (immu_devi->imd_display == B_TRUE) {
immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
- ddi_err(DER_LOG, dip, "IMMU: Found GFX device");
+ ddi_err(DER_LOG, dip, "iommu: Found GFX device");
/* This will put the immu_devi on the GFX "specials" list */
(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
}
@@ -366,9 +390,6 @@ get_conf_opt(char *bopt, boolean_t *kvar)
{
char *val = NULL;
- ASSERT(bopt);
- ASSERT(kvar);
-
/*
* Check the rootnex.conf property
* Fake up a dev_t since searching the global
@@ -577,6 +598,24 @@ mapping_list_setup(void)
xlate_driver_array = string_array;
nxlate = nstrings;
}
+
+ if (ddi_prop_lookup_string_array(
+ makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
+ DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
+ "immu-dvma-premap-drivers",
+ &string_array, &nstrings) == DDI_PROP_SUCCESS) {
+ premap_driver_array = string_array;
+ npremap = nstrings;
+ }
+
+ if (ddi_prop_lookup_string_array(
+ makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
+ DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
+ "immu-dvma-nopremap-drivers",
+ &string_array, &nstrings) == DDI_PROP_SUCCESS) {
+ nopremap_driver_array = string_array;
+ nnopremap = nstrings;
+ }
}
/*
@@ -590,8 +629,6 @@ blacklisted_driver(void)
int i;
major_t maj;
- ASSERT((black_array == NULL) ^ (nblacks != 0));
-
/* need at least 2 strings */
if (nblacks < 2) {
return (B_FALSE);
@@ -625,8 +662,6 @@ blacklisted_smbios(void)
char **strptr;
int i;
- ASSERT((black_array == NULL) ^ (nblacks != 0));
-
/* need at least 4 strings for this setting */
if (nblacks < 4) {
return (B_FALSE);
@@ -668,7 +703,6 @@ blacklisted_smbios(void)
static boolean_t
blacklisted_acpi(void)
{
- ASSERT((black_array == NULL) ^ (nblacks != 0));
if (nblacks == 0) {
return (B_FALSE);
}
@@ -734,9 +768,20 @@ blacklist_destroy(void)
black_array = NULL;
nblacks = 0;
}
+}
- ASSERT(black_array == NULL);
- ASSERT(nblacks == 0);
+static char *
+immu_alloc_name(const char *str, int instance)
+{
+ size_t slen;
+ char *s;
+
+ slen = strlen(str) + IMMU_ISTRLEN + 1;
+ s = kmem_zalloc(slen, VM_SLEEP);
+ if (s != NULL)
+ (void) snprintf(s, slen, "%s%d", str, instance);
+
+ return (s);
}
@@ -749,6 +794,8 @@ static void *
immu_state_alloc(int seg, void *dmar_unit)
{
immu_t *immu;
+ char *nodename, *hcachename, *pcachename;
+ int instance;
dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
if (dmar_unit == NULL) {
@@ -763,10 +810,15 @@ immu_state_alloc(int seg, void *dmar_unit)
mutex_enter(&(immu->immu_lock));
immu->immu_dmar_unit = dmar_unit;
- immu->immu_name = ddi_strdup(immu_dmar_unit_name(dmar_unit),
- KM_SLEEP);
immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
+ nodename = ddi_node_name(immu->immu_dip);
+ instance = ddi_get_instance(immu->immu_dip);
+
+ immu->immu_name = immu_alloc_name(nodename, instance);
+ if (immu->immu_name == NULL)
+ return (NULL);
+
/*
* the immu_intr_lock mutex is grabbed by the IOMMU
* unit's interrupt handler so we need to use an
@@ -808,9 +860,24 @@ immu_state_alloc(int seg, void *dmar_unit)
*/
list_insert_tail(&immu_list, immu);
+ pcachename = immu_alloc_name("immu_pgtable_cache", instance);
+ if (pcachename == NULL)
+ return (NULL);
+
+ hcachename = immu_alloc_name("immu_hdl_cache", instance);
+ if (hcachename == NULL)
+ return (NULL);
+
+ immu->immu_pgtable_cache = kmem_cache_create(pcachename,
+ sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu,
+ NULL, 0);
+ immu->immu_hdl_cache = kmem_cache_create(hcachename,
+ sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor,
+ NULL, NULL, immu, NULL, 0);
+
mutex_exit(&(immu->immu_lock));
- ddi_err(DER_LOG, immu->immu_dip, "IMMU: unit setup");
+ ddi_err(DER_LOG, immu->immu_dip, "unit setup");
immu_dmar_set_immu(dmar_unit, immu);
@@ -824,22 +891,13 @@ immu_subsystems_setup(void)
void *unit_hdl;
ddi_err(DER_VERB, NULL,
- "Creating state structures for Intel IOMMU units\n");
-
- ASSERT(immu_setup == B_FALSE);
- ASSERT(immu_running == B_FALSE);
+ "Creating state structures for Intel IOMMU units");
mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
mutex_enter(&immu_lock);
- ASSERT(immu_pgtable_cache == NULL);
-
- immu_pgtable_cache = kmem_cache_create("immu_pgtable_cache",
- sizeof (pgtable_t), 0,
- pgtable_ctor, pgtable_dtor, NULL, NULL, NULL, 0);
-
unit_hdl = NULL;
for (seg = 0; seg < IMMU_MAXSEG; seg++) {
while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
@@ -865,12 +923,10 @@ static void
immu_subsystems_startup(void)
{
immu_t *immu;
+ iommulib_ops_t *iommulib_ops;
mutex_enter(&immu_lock);
- ASSERT(immu_setup == B_TRUE);
- ASSERT(immu_running == B_FALSE);
-
immu_dmar_startup();
immu = list_head(&immu_list);
@@ -893,6 +949,12 @@ immu_subsystems_startup(void)
immu_regs_startup(immu);
mutex_exit(&(immu->immu_lock));
+
+ iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP);
+ *iommulib_ops = immulib_ops;
+ iommulib_ops->ilops_data = (void *)immu;
+ (void) iommulib_iommu_register(immu->immu_dip, iommulib_ops,
+ &immu->immu_iommulib_handle);
}
mutex_exit(&immu_lock);
@@ -922,11 +984,6 @@ immu_walk_ancestor(
int level;
int error = DDI_SUCCESS;
- ASSERT(root_devinfo);
- ASSERT(rdip);
- ASSERT(rdip != root_devinfo);
- ASSERT(func);
-
/* ddip and immu can be NULL */
/* Hold rdip so that branch is not detached */
@@ -969,7 +1026,6 @@ immu_init(void)
char *phony_reg = "A thing of beauty is a joy forever";
/* Set some global shorthands that are needed by all of IOMMU code */
- ASSERT(root_devinfo == NULL);
root_devinfo = ddi_root_node();
/*
@@ -1107,7 +1163,7 @@ immu_startup(void)
if (immu_setup == B_FALSE) {
ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
- "skipping IOMU startup");
+ "skipping IOMMU startup");
return;
}
@@ -1122,38 +1178,6 @@ immu_startup(void)
}
/*
- * immu_map_sgl()
- * called from rootnex_coredma_bindhdl() when Intel
- * IOMMU is enabled to build DVMA cookies and map them.
- */
-int
-immu_map_sgl(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
- int prealloc_count, dev_info_t *rdip)
-{
- if (immu_running == B_FALSE) {
- return (DDI_DMA_USE_PHYSICAL);
- }
-
- return (immu_dvma_map(hp, dmareq, NULL, prealloc_count, rdip,
- IMMU_FLAGS_DMAHDL));
-}
-
-/*
- * immu_unmap_sgl()
- * called from rootnex_coredma_unbindhdl(), to unmap DVMA
- * cookies and free them
- */
-int
-immu_unmap_sgl(ddi_dma_impl_t *hp, dev_info_t *rdip)
-{
- if (immu_running == B_FALSE) {
- return (DDI_DMA_USE_PHYSICAL);
- }
-
- return (immu_dvma_unmap(hp, rdip));
-}
-
-/*
* Hook to notify IOMMU code of device tree changes
*/
void
@@ -1191,10 +1215,10 @@ immu_quiesce(void)
mutex_enter(&immu_lock);
- if (immu_running == B_FALSE)
+ if (immu_running == B_FALSE) {
+ mutex_exit(&immu_lock);
return (DDI_SUCCESS);
-
- ASSERT(immu_setup == B_TRUE);
+ }
immu = list_head(&immu_list);
for (; immu; immu = list_next(&immu_list, immu)) {
@@ -1205,9 +1229,9 @@ immu_quiesce(void)
/* flush caches */
rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
- immu_flush_context_gbl(immu);
+ immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
+ immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
rw_exit(&(immu->immu_ctx_rwlock));
- immu_flush_iotlb_gbl(immu);
immu_regs_wbf_flush(immu);
mutex_enter(&(immu->immu_lock));
@@ -1227,12 +1251,12 @@ immu_quiesce(void)
mutex_exit(&(immu->immu_lock));
}
- mutex_exit(&immu_lock);
if (ret == DDI_SUCCESS) {
immu_running = B_FALSE;
immu_quiesced = B_TRUE;
}
+ mutex_exit(&immu_lock);
return (ret);
}
@@ -1249,11 +1273,10 @@ immu_unquiesce(void)
mutex_enter(&immu_lock);
- if (immu_quiesced == B_FALSE)
+ if (immu_quiesced == B_FALSE) {
+ mutex_exit(&immu_lock);
return (DDI_SUCCESS);
-
- ASSERT(immu_setup == B_TRUE);
- ASSERT(immu_running == B_FALSE);
+ }
immu = list_head(&immu_list);
for (; immu; immu = list_next(&immu_list, immu)) {
@@ -1274,9 +1297,9 @@ immu_unquiesce(void)
/* flush caches before unquiesce */
rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
- immu_flush_context_gbl(immu);
+ immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
+ immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
rw_exit(&(immu->immu_ctx_rwlock));
- immu_flush_iotlb_gbl(immu);
/*
* Set IOMMU unit's regs to do
@@ -1303,4 +1326,20 @@ immu_unquiesce(void)
return (ret);
}
+void
+immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync)
+{
+ caddr_t vaddr;
+ uint64_t paddr;
+
+ iwp->iwp_sync = sync;
+
+ vaddr = (caddr_t)&iwp->iwp_vstatus;
+ paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
+ paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
+
+ iwp->iwp_pstatus = paddr;
+ iwp->iwp_name = name;
+}
+
/* ############## END Intel IOMMU entry points ################## */
diff --git a/usr/src/uts/i86pc/io/immu_dmar.c b/usr/src/uts/i86pc/io/immu_dmar.c
index 734363beef..7ebcfb0ba6 100644
--- a/usr/src/uts/i86pc/io/immu_dmar.c
+++ b/usr/src/uts/i86pc/io/immu_dmar.c
@@ -46,7 +46,6 @@
#include <sys/apic.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
-#include <sys/iommulib.h>
#include <sys/immu.h>
#include <sys/smp_impldefs.h>
@@ -639,14 +638,14 @@ dmar_table_print(dmar_table_t *tbl)
}
static void
-drhd_devi_create(drhd_t *drhd, char *name)
+drhd_devi_create(drhd_t *drhd, int unit)
{
struct ddi_parent_private_data *pdptr;
struct regspec reg;
dev_info_t *dip;
- ndi_devi_alloc_sleep(root_devinfo, name,
- DEVI_SID_NODEID, &dip);
+ dip = ddi_add_child(root_devinfo, IMMU_UNIT_NAME,
+ DEVI_SID_NODEID, unit);
drhd->dr_dip = dip;
@@ -702,7 +701,6 @@ dmar_devinfos_create(dmar_table_t *tbl)
{
list_t *drhd_list;
drhd_t *drhd;
- char name[IMMU_MAXNAMELEN];
int i, unit;
for (i = 0; i < IMMU_MAXSEG; i++) {
@@ -715,9 +713,7 @@ dmar_devinfos_create(dmar_table_t *tbl)
drhd = list_head(drhd_list);
for (unit = 0; drhd;
drhd = list_next(drhd_list, drhd), unit++) {
- (void) snprintf(name, sizeof (name),
- "drhd%d,%d", i, unit);
- drhd_devi_create(drhd, name);
+ drhd_devi_create(drhd, unit);
}
}
}
@@ -807,8 +803,8 @@ dmar_table_destroy(dmar_table_t *tbl)
}
/* free strings */
- kmem_free(tbl->tbl_oem_tblid, TBL_OEM_ID_SZ + 1);
- kmem_free(tbl->tbl_oem_id, TBL_OEM_TBLID_SZ + 1);
+ kmem_free(tbl->tbl_oem_tblid, TBL_OEM_TBLID_SZ + 1);
+ kmem_free(tbl->tbl_oem_id, TBL_OEM_ID_SZ + 1);
tbl->tbl_raw = NULL; /* raw ACPI table doesn't have to be freed */
mutex_destroy(&(tbl->tbl_lock));
kmem_free(tbl, sizeof (dmar_table_t));
@@ -946,7 +942,6 @@ void
immu_dmar_rmrr_map(void)
{
int seg;
- int e;
int count;
dev_info_t *rdip;
scope_t *scope;
@@ -1030,6 +1025,7 @@ immu_dmar_rmrr_map(void)
}
memlist_read_unlock();
+ (void) immu_dvma_device_setup(rdip, 0);
ddi_err(DER_LOG, rdip,
"IMMU: Mapping RMRR range "
@@ -1042,16 +1038,8 @@ immu_dmar_rmrr_map(void)
IMMU_ROUNDUP((uintptr_t)rmrr->rm_limit -
(uintptr_t)rmrr->rm_base + 1) /
IMMU_PAGESIZE;
- e = immu_dvma_map(NULL, NULL, &mrng, 0, rdip,
- IMMU_FLAGS_READ | IMMU_FLAGS_WRITE |
- IMMU_FLAGS_MEMRNG);
- /*
- * dip may have unity domain or xlate domain
- * If the former, PHYSICAL is returned else
- * MAPPED is returned.
- */
- ASSERT(e == DDI_DMA_MAPPED ||
- e == DDI_DMA_USE_PHYSICAL);
+
+ (void) immu_map_memrange(rdip, &mrng);
}
}
}
@@ -1219,15 +1207,6 @@ found:
return (drhd->dr_immu);
}
-char *
-immu_dmar_unit_name(void *dmar_unit)
-{
- drhd_t *drhd = (drhd_t *)dmar_unit;
-
- ASSERT(drhd->dr_dip);
- return (ddi_node_name(drhd->dr_dip));
-}
-
dev_info_t *
immu_dmar_unit_dip(void *dmar_unit)
{
diff --git a/usr/src/uts/i86pc/io/immu_dvma.c b/usr/src/uts/i86pc/io/immu_dvma.c
index 59ce95439a..4dfa9c05b4 100644
--- a/usr/src/uts/i86pc/io/immu_dvma.c
+++ b/usr/src/uts/i86pc/io/immu_dvma.c
@@ -42,6 +42,8 @@
#include <sys/acpica.h>
#include <sys/modhash.h>
#include <sys/immu.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
#undef TEST
@@ -71,13 +73,45 @@ static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
int dev, int func, immu_flags_t immu_flags);
static void destroy_immu_devi(immu_devi_t *immu_devi);
-static boolean_t dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma,
- uint64_t nvpages, dcookie_t *dcookies, int dcount, dev_info_t *rdip,
+static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
+ uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
immu_flags_t immu_flags);
/* Extern globals */
extern struct memlist *phys_install;
+/*
+ * iommulib interface functions.
+ */
+static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
+static int immu_allochdl(iommulib_handle_t handle,
+ dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
+ int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
+static int immu_freehdl(iommulib_handle_t handle,
+ dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
+static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
+ ddi_dma_cookie_t *cookiep, uint_t *ccountp);
+static int immu_unbindhdl(iommulib_handle_t handle,
+ dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
+static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
+ uint_t cachefl);
+static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
+ off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
+static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
+static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
+static int immu_map(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, struct ddi_dma_req *dmareq,
+ ddi_dma_handle_t *dma_handle);
+static int immu_mctl(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ enum ddi_dma_ctlops request, off_t *offp, size_t *lenp,
+ caddr_t *objpp, uint_t cachefl);
/* static Globals */
@@ -106,6 +140,33 @@ static ddi_device_acc_attr_t immu_acc_attr = {
DDI_STRICTORDER_ACC
};
+struct iommulib_ops immulib_ops = {
+ IOMMU_OPS_VERSION,
+ INTEL_IOMMU,
+ "Intel IOMMU",
+ NULL,
+ immu_probe,
+ immu_allochdl,
+ immu_freehdl,
+ immu_bindhdl,
+ immu_unbindhdl,
+ immu_sync,
+ immu_win,
+ immu_mapobject,
+ immu_unmapobject,
+ immu_map,
+ immu_mctl
+};
+
+/*
+ * Fake physical address range used to set up initial prealloc mappings.
+ * This memory is never actually accessed. It is mapped read-only,
+ * and is overwritten as soon as the first DMA bind operation is
+ * performed. Since 0 is a special case, just start at the 2nd
+ * physical page.
+ */
+
+static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
/* globals private to this file */
static kmutex_t immu_domain_lock;
@@ -124,6 +185,9 @@ typedef struct xlate {
static mod_hash_t *bdf_domain_hash;
+int immu_use_alh;
+int immu_use_tm;
+
static domain_t *
bdf_domain_lookup(immu_devi_t *immu_devi)
{
@@ -155,15 +219,12 @@ bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
int16_t bus = immu_devi->imd_bus;
int16_t devfunc = immu_devi->imd_devfunc;
uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
- int r;
if (seg < 0 || bus < 0 || devfunc < 0) {
return;
}
- r = mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
- ASSERT(r != MH_ERR_DUPLICATE);
- ASSERT(r == 0);
+ (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
}
static int
@@ -172,10 +233,6 @@ match_lpc(dev_info_t *pdip, void *arg)
immu_devi_t *immu_devi;
dvma_arg_t *dvap = (dvma_arg_t *)arg;
- ASSERT(dvap->dva_error == DDI_FAILURE);
- ASSERT(dvap->dva_ddip == NULL);
- ASSERT(dvap->dva_list);
-
if (list_is_empty(dvap->dva_list)) {
return (DDI_WALK_TERMINATE);
}
@@ -183,7 +240,6 @@ match_lpc(dev_info_t *pdip, void *arg)
immu_devi = list_head(dvap->dva_list);
for (; immu_devi; immu_devi = list_next(dvap->dva_list,
immu_devi)) {
- ASSERT(immu_devi->imd_dip);
if (immu_devi->imd_dip == pdip) {
dvap->dva_ddip = pdip;
dvap->dva_error = DDI_SUCCESS;
@@ -200,8 +256,6 @@ immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
list_t *spclist = NULL;
immu_devi_t *immu_devi;
- ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_lock)));
-
immu_devi = IMMU_DEVI(dip);
if (immu_devi->imd_display == B_TRUE) {
spclist = &(immu->immu_dvma_gfx_list);
@@ -226,10 +280,6 @@ immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
immu_devi_t *new_imd;
immu_devi_t *immu_devi;
- ASSERT(root_devinfo);
- ASSERT(dip);
- ASSERT(dip != root_devinfo);
-
immu_devi = immu_devi_get(dip);
if (immu_devi != NULL) {
return (DDI_SUCCESS);
@@ -305,7 +355,7 @@ get_gfx_devinfo(dev_info_t *rdip)
list_t *list_gfx;
/*
- * The GFX device may not be on the same IMMU unit as "agpgart"
+ * The GFX device may not be on the same iommu unit as "agpgart"
* so search globally
*/
immu_devi = NULL;
@@ -319,16 +369,12 @@ get_gfx_devinfo(dev_info_t *rdip)
}
if (immu_devi == NULL) {
- ddi_err(DER_WARN, rdip, "IMMU: No GFX device. "
+ ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
"Cannot redirect agpgart");
return (NULL);
}
- /* list is not empty we checked above */
- ASSERT(immu_devi);
- ASSERT(immu_devi->imd_dip);
-
- ddi_err(DER_LOG, rdip, "IMMU: GFX redirect to %s",
+ ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
ddi_node_name(immu_devi->imd_dip));
return (immu_devi->imd_dip);
@@ -373,6 +419,7 @@ dma_to_immu_flags(struct ddi_dma_req *dmareq)
return (flags);
}
+/*ARGSUSED*/
int
pgtable_ctor(void *buf, void *arg, int kmflag)
{
@@ -381,9 +428,8 @@ pgtable_ctor(void *buf, void *arg, int kmflag)
int (*dmafp)(caddr_t);
caddr_t vaddr;
void *next;
-
- ASSERT(buf);
- ASSERT(arg == NULL);
+ uint_t flags;
+ immu_t *immu = arg;
pgtable = (pgtable_t *)buf;
@@ -394,15 +440,18 @@ pgtable_ctor(void *buf, void *arg, int kmflag)
return (-1);
}
- ASSERT(root_devinfo);
if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
kmem_free(next, IMMU_PAGESIZE);
return (-1);
}
+ flags = DDI_DMA_CONSISTENT;
+ if (!immu->immu_dvma_coherent)
+ flags |= IOMEM_DATA_UC_WR_COMBINE;
+
if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
- &immu_acc_attr, DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED,
+ &immu_acc_attr, flags,
dmafp, NULL, &vaddr, &actual_size,
&pgtable->hwpg_memhdl) != DDI_SUCCESS) {
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
@@ -430,16 +479,13 @@ pgtable_ctor(void *buf, void *arg, int kmflag)
return (0);
}
+/*ARGSUSED*/
void
pgtable_dtor(void *buf, void *arg)
{
pgtable_t *pgtable;
- ASSERT(buf);
- ASSERT(arg == NULL);
-
pgtable = (pgtable_t *)buf;
- ASSERT(pgtable->swpg_next_array);
/* destroy will panic if lock is held. */
rw_destroy(&(pgtable->swpg_rwlock));
@@ -447,8 +493,6 @@ pgtable_dtor(void *buf, void *arg)
ddi_dma_mem_free(&pgtable->hwpg_memhdl);
ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
-
- /* don't zero out hwpg_vaddr and swpg_next_array for debugging */
}
/*
@@ -469,11 +513,9 @@ pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
pgtable_t *pgtable;
int kmflags;
- ASSERT(immu);
-
kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
- pgtable = kmem_cache_alloc(immu_pgtable_cache, kmflags);
+ pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
if (pgtable == NULL) {
return (NULL);
}
@@ -481,22 +523,16 @@ pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
}
static void
-pgtable_zero(immu_t *immu, pgtable_t *pgtable)
+pgtable_zero(pgtable_t *pgtable)
{
bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
-
- /* Dont need to flush the write we will flush when we use the entry */
- immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE);
}
static void
pgtable_free(immu_t *immu, pgtable_t *pgtable)
{
- ASSERT(immu);
- ASSERT(pgtable);
-
- kmem_cache_free(immu_pgtable_cache, pgtable);
+ kmem_cache_free(immu->immu_pgtable_cache, pgtable);
}
/*
@@ -564,6 +600,14 @@ device_is_pciex(
return (is_pciex);
}
+static boolean_t
+device_use_premap(uint_t classcode)
+{
+ if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
/*
* immu_dvma_get_immu()
@@ -591,7 +635,6 @@ immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
/*NOTREACHED*/
}
immu_devi = immu_devi_get(dip);
- ASSERT(immu_devi);
}
mutex_enter(&(DEVI(dip)->devi_lock));
@@ -715,9 +758,9 @@ create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
/* check for certain special devices */
immu_devi->imd_display = device_is_display(classcode);
-
immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
(subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
+ immu_devi->imd_use_premap = device_use_premap(classcode);
immu_devi->imd_domain = NULL;
@@ -739,9 +782,6 @@ immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
domain_t *domain;
dev_info_t *ddip;
- ASSERT(rdip);
- ASSERT(ddipp);
-
*ddipp = NULL;
immu_devi = immu_devi_get(rdip);
@@ -754,11 +794,8 @@ immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
ddip = immu_devi->imd_ddip;
mutex_exit(&(DEVI(rdip)->devi_lock));
- if (domain) {
- ASSERT(domain->dom_did > 0);
- ASSERT(ddip);
+ if (domain)
*ddipp = ddip;
- }
return (domain);
@@ -776,16 +813,10 @@ did_alloc(immu_t *immu, dev_info_t *rdip,
{
int did;
- ASSERT(immu);
- ASSERT(rdip);
- ASSERT(rdip != root_devinfo);
-
did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
(immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
if (did == 0) {
- ASSERT(immu->immu_unity_domain);
- ASSERT(immu->immu_unity_domain->dom_did > 0);
ddi_err(DER_WARN, rdip, "device domain-id alloc error"
" domain-device: %s%d. immu unit is %s. Using "
"unity domain with domain-id (%d)",
@@ -806,10 +837,6 @@ get_branch_domain(dev_info_t *pdip, void *arg)
immu_t *immu;
dvma_arg_t *dvp = (dvma_arg_t *)arg;
- ASSERT(pdip);
- ASSERT(dvp);
- ASSERT(dvp->dva_rdip);
-
/*
* The field dvp->dva_rdip is a work-in-progress
* and gets updated as we walk up the ancestor
@@ -828,12 +855,9 @@ get_branch_domain(dev_info_t *pdip, void *arg)
}
immu_devi = immu_devi_get(pdip);
- ASSERT(immu_devi);
immu = immu_devi->imd_immu;
- if (immu == NULL) {
+ if (immu == NULL)
immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
- ASSERT(immu);
- }
/*
* If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
@@ -879,10 +903,6 @@ get_branch_domain(dev_info_t *pdip, void *arg)
return (DDI_WALK_TERMINATE);
}
- /* immu_devi either has both set or both clear */
- ASSERT(domain == NULL);
- ASSERT(ddip == NULL);
-
/* Domain may already be set, continue walking so that ddip gets set */
if (dvp->dva_domain) {
return (DDI_WALK_CONTINUE);
@@ -899,7 +919,6 @@ get_branch_domain(dev_info_t *pdip, void *arg)
/* Grab lock again to check if something else set immu_devi fields */
mutex_enter(&(DEVI(pdip)->devi_lock));
if (immu_devi->imd_domain != NULL) {
- ASSERT(immu_devi->imd_domain == domain);
dvp->dva_domain = domain;
} else {
dvp->dva_domain = domain;
@@ -919,19 +938,9 @@ map_unity_domain(domain_t *domain)
struct memlist *mp;
uint64_t start;
uint64_t npages;
- dcookie_t dcookies[1] = {0};
+ immu_dcookie_t dcookies[1] = {0};
int dcount = 0;
- ASSERT(domain);
- ASSERT(domain->dom_did == IMMU_UNITY_DID);
-
- /*
- * We call into routines that grab the lock so we should
- * not be called with the lock held. This does not matter
- * much since, no else has a reference to this domain
- */
- ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock)));
-
/*
* UNITY arenas are a mirror of the physical memory
* installed on the system.
@@ -944,7 +953,7 @@ map_unity_domain(domain_t *domain)
dcookies[0].dck_paddr = 0;
dcookies[0].dck_npages = 1;
dcount = 1;
- (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL,
+ (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
#endif
@@ -963,17 +972,17 @@ map_unity_domain(domain_t *domain)
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
- (void) dvma_map(domain->dom_immu, domain, start, npages, dcookies,
+ (void) dvma_map(domain, start, npages, dcookies,
dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
- ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
+ ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
" - 0x%" PRIx64 "]", start, start + mp->ml_size);
mp = mp->ml_next;
while (mp) {
- ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
- " - 0x%" PRIx64 "]", mp->ml_address,
- mp->ml_address + mp->ml_size);
+ ddi_err(DER_LOG, domain->dom_dip,
+ "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
+ mp->ml_address, mp->ml_address + mp->ml_size);
start = mp->ml_address;
npages = mp->ml_size/IMMU_PAGESIZE + 1;
@@ -981,16 +990,16 @@ map_unity_domain(domain_t *domain)
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
- (void) dvma_map(domain->dom_immu, domain, start, npages,
+ (void) dvma_map(domain, start, npages,
dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
mp = mp->ml_next;
}
mp = bios_rsvd;
while (mp) {
- ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64
- " - 0x%" PRIx64 "]", mp->ml_address,
- mp->ml_address + mp->ml_size);
+ ddi_err(DER_LOG, domain->dom_dip,
+ "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
+ mp->ml_address, mp->ml_address + mp->ml_size);
start = mp->ml_address;
npages = mp->ml_size/IMMU_PAGESIZE + 1;
@@ -998,7 +1007,7 @@ map_unity_domain(domain_t *domain)
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
- (void) dvma_map(domain->dom_immu, domain, start, npages,
+ (void) dvma_map(domain, start, npages,
dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
mp = mp->ml_next;
@@ -1035,12 +1044,6 @@ create_xlate_arena(immu_t *immu, domain_t *domain,
vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
- /*
- * No one else has access to this domain.
- * So no domain locks needed
- */
- ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock)));
-
/* Restrict mgaddr (max guest addr) to MGAW */
mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
@@ -1066,11 +1069,9 @@ create_xlate_arena(immu_t *immu, domain_t *domain,
size = mp->ml_size;
ddi_err(DER_VERB, rdip,
- "%s: Creating dvma vmem arena [0x%" PRIx64
+ "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
" - 0x%" PRIx64 "]", arena_name, start, start + size);
- ASSERT(domain->dom_dvma_arena == NULL);
-
/*
* We always allocate in quanta of IMMU_PAGESIZE
*/
@@ -1105,7 +1106,7 @@ create_xlate_arena(immu_t *immu, domain_t *domain,
size = mp->ml_size;
ddi_err(DER_VERB, rdip,
- "%s: Adding dvma vmem span [0x%" PRIx64
+ "iommu: %s: Adding dvma vmem span [0x%" PRIx64
" - 0x%" PRIx64 "]", arena_name, start,
start + size);
@@ -1139,13 +1140,7 @@ set_domain(
domain_t *fdomain;
dev_info_t *fddip;
- ASSERT(dip);
- ASSERT(ddip);
- ASSERT(domain);
- ASSERT(domain->dom_did > 0); /* must be an initialized domain */
-
immu_devi = immu_devi_get(dip);
- ASSERT(immu_devi);
mutex_enter(&(DEVI(dip)->devi_lock));
fddip = immu_devi->imd_ddip;
@@ -1187,8 +1182,6 @@ device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
dvma_arg_t dvarg = {0};
int level;
- ASSERT(rdip);
-
*ddipp = NULL;
/*
@@ -1198,8 +1191,6 @@ device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
ddip = NULL;
domain = immu_devi_domain(rdip, &ddip);
if (domain) {
- ASSERT(domain->dom_did > 0);
- ASSERT(ddip);
*ddipp = ddip;
return (domain);
}
@@ -1210,7 +1201,7 @@ device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
* possible that there is no IOMMU unit for this device
* - BIOS bugs are one example.
*/
- ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
+ ddi_err(DER_WARN, rdip, "No iommu unit found for device");
return (NULL);
}
@@ -1262,7 +1253,6 @@ device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
if (domain == NULL) {
return (NULL);
}
- ASSERT(domain->dom_did > 0);
/*FALLTHROUGH*/
found:
@@ -1283,10 +1273,6 @@ create_unity_domain(immu_t *immu)
{
domain_t *domain;
- /* 0 is reserved by Vt-d */
- /*LINTED*/
- ASSERT(IMMU_UNITY_DID > 0);
-
/* domain created during boot and always use sleep flag */
domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
@@ -1303,10 +1289,16 @@ create_unity_domain(immu_t *immu)
* should never fail.
*/
domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
- ASSERT(domain->dom_pgtable_root);
- pgtable_zero(immu, domain->dom_pgtable_root);
+ pgtable_zero(domain->dom_pgtable_root);
+
+ /*
+ * Only map all physical memory in to the unity domain
+ * if passthrough is not supported. If it is supported,
+ * passthrough is set in the context entry instead.
+ */
+ if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
+ map_unity_domain(domain);
- map_unity_domain(domain);
/*
* put it on the system-wide UNITY domain list
@@ -1331,23 +1323,17 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
char mod_hash_name[128];
immu_devi_t *immu_devi;
int did;
- dcookie_t dcookies[1] = {0};
+ immu_dcookie_t dcookies[1] = {0};
int dcount = 0;
- ASSERT(immu);
- ASSERT(ddip);
-
immu_devi = immu_devi_get(rdip);
- ASSERT(immu_devi);
-
/*
* First allocate a domainid.
* This routine will never fail, since if we run out
* of domains the unity domain will be allocated.
*/
did = did_alloc(immu, rdip, ddip, immu_flags);
- ASSERT(did > 0);
if (did == IMMU_UNITY_DID) {
/* domain overflow */
ASSERT(immu->immu_unity_domain);
@@ -1370,6 +1356,7 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
domain->dom_did = did;
domain->dom_immu = immu;
domain->dom_maptype = IMMU_MAPTYPE_XLATE;
+ domain->dom_dip = ddip;
/*
* Create xlate DVMA arena for this domain.
@@ -1386,7 +1373,7 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
domain->dom_did, immu->immu_name);
/*NOTREACHED*/
}
- pgtable_zero(immu, domain->dom_pgtable_root);
+ pgtable_zero(domain->dom_pgtable_root);
/*
* Since this is a immu unit-specific domain, put it on
@@ -1412,7 +1399,7 @@ domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
dcookies[0].dck_paddr = 0;
dcookies[0].dck_npages = 1;
dcount = 1;
- (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL,
+ (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
#endif
return (domain);
@@ -1440,8 +1427,8 @@ did_init(immu_t *immu)
sizeof (immu->immu_did_arena_name),
"%s_domainid_arena", immu->immu_name);
- ddi_err(DER_VERB, NULL, "%s: Creating domainid arena %s",
- immu->immu_name, immu->immu_did_arena_name);
+ ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
+ immu->immu_did_arena_name);
immu->immu_did_arena = vmem_create(
immu->immu_did_arena_name,
@@ -1470,7 +1457,6 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
{
pgtable_t *context;
pgtable_t *pgtable_root;
- pgtable_t *unity_pgtable_root;
hw_rce_t *hw_rent;
hw_rce_t *hw_cent;
hw_rce_t *ctxp;
@@ -1479,13 +1465,6 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
boolean_t fill_root;
boolean_t fill_ctx;
- ASSERT(immu);
- ASSERT(domain);
- ASSERT(root_table);
- ASSERT(bus >= 0);
- ASSERT(devfunc >= 0);
- ASSERT(domain->dom_pgtable_root);
-
pgtable_root = domain->dom_pgtable_root;
ctxp = (hw_rce_t *)(root_table->swpg_next_array);
@@ -1500,15 +1479,8 @@ context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
rwtype = RW_READER;
again:
if (ROOT_GET_P(hw_rent)) {
- ASSERT(ROOT_GET_CONT(hw_rent) == context->hwpg_paddr);
hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
- ASSERT(CONT_GET_P(hw_cent));
- ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did);
- ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw);
- ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY);
- ASSERT(CONT_GET_ASR(hw_cent) ==
- pgtable_root->hwpg_paddr);
rw_exit(&(immu->immu_ctx_rwlock));
return;
} else {
@@ -1536,35 +1508,32 @@ again:
if (fill_ctx == B_TRUE) {
hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
- unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root;
- ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED);
- ASSERT(CONT_GET_P(hw_cent));
- ASSERT(CONT_GET_DID(hw_cent) ==
- immu->immu_unity_domain->dom_did);
- ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw);
- ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY);
- ASSERT(CONT_GET_ASR(hw_cent) ==
- unity_pgtable_root->hwpg_paddr);
-
/* need to disable context entry before reprogramming it */
bzero(hw_cent, sizeof (hw_rce_t));
/* flush caches */
immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
- ASSERT(rw_write_held(&(immu->immu_ctx_rwlock)));
sid = ((bus << 8) | devfunc);
- immu_flush_context_fsi(immu, 0, sid, domain->dom_did);
-
- immu_regs_wbf_flush(immu);
+ immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
+ &immu->immu_ctx_inv_wait);
CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
CONT_SET_DID(hw_cent, domain->dom_did);
CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
- /*LINTED*/
- CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
+ if (domain->dom_did == IMMU_UNITY_DID &&
+ IMMU_ECAP_GET_PT(immu->immu_regs_excap))
+ CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
+ else
+ /*LINTED*/
+ CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
CONT_SET_P(hw_cent);
+ if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
+ CONT_SET_EH(hw_cent);
+ if (immu_use_alh)
+ CONT_SET_ALH(hw_cent);
+ }
immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
}
rw_exit(&(immu->immu_ctx_rwlock));
@@ -1584,7 +1553,7 @@ context_create(immu_t *immu)
/* Allocate a zeroed root table (4K 256b entries) */
root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
- pgtable_zero(immu, root_table);
+ pgtable_zero(root_table);
/*
* Setup context tables for all possible root table entries.
@@ -1594,29 +1563,29 @@ context_create(immu_t *immu)
hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
- pgtable_zero(immu, context);
- ASSERT(ROOT_GET_P(hw_rent) == 0);
+ pgtable_zero(context);
ROOT_SET_P(hw_rent);
ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
for (devfunc = 0; devfunc < IMMU_CONT_NUM;
devfunc++, hw_cent++) {
- ASSERT(CONT_GET_P(hw_cent) == 0);
pgtable_root =
immu->immu_unity_domain->dom_pgtable_root;
CONT_SET_DID(hw_cent,
immu->immu_unity_domain->dom_did);
CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
- /*LINTED*/
- CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
+ if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
+ CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
+ else
+ /*LINTED*/
+ CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
CONT_SET_P(hw_cent);
}
immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
*((pgtable_t **)ctxp) = context;
}
- immu_regs_cpu_flush(immu, root_table->hwpg_vaddr, IMMU_PAGESIZE);
return (root_table);
}
@@ -1627,11 +1596,10 @@ context_create(immu_t *immu)
static void
context_init(immu_t *immu)
{
- ASSERT(immu);
- ASSERT(immu->immu_ctx_root == NULL);
-
rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
+ immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
+
immu_regs_wbf_flush(immu);
immu->immu_ctx_root = context_create(immu);
@@ -1639,10 +1607,9 @@ context_init(immu_t *immu)
immu_regs_set_root_table(immu);
rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
- immu_flush_context_gbl(immu);
+ immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
+ immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
rw_exit(&(immu->immu_ctx_rwlock));
- immu_flush_iotlb_gbl(immu);
- immu_regs_wbf_flush(immu);
}
@@ -1655,10 +1622,7 @@ find_top_pcib(dev_info_t *dip, void *arg)
immu_devi_t *immu_devi;
dev_info_t **pcibdipp = (dev_info_t **)arg;
- ASSERT(dip);
-
immu_devi = immu_devi_get(dip);
- ASSERT(immu_devi);
if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
*pcibdipp = dip;
@@ -1678,7 +1642,6 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
int r_devfunc;
int d_devfunc;
immu_pcib_t d_pcib_type;
- immu_pcib_t r_pcib_type;
dev_info_t *pcibdip;
if (ddip == NULL || rdip == NULL ||
@@ -1708,23 +1671,14 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
d_immu_devi = immu_devi_get(ddip);
r_immu_devi = immu_devi_get(rdip);
- ASSERT(r_immu_devi);
- ASSERT(d_immu_devi);
d_bus = d_immu_devi->imd_bus;
d_devfunc = d_immu_devi->imd_devfunc;
d_pcib_type = d_immu_devi->imd_pcib_type;
r_bus = r_immu_devi->imd_bus;
r_devfunc = r_immu_devi->imd_devfunc;
- r_pcib_type = r_immu_devi->imd_pcib_type;
-
- ASSERT(d_bus >= 0);
if (rdip == ddip) {
- ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT ||
- d_pcib_type == IMMU_PCIB_PCIE_PCIE);
- ASSERT(r_bus >= 0);
- ASSERT(r_devfunc >= 0);
/* rdip is a PCIE device. set context for it only */
context_set(immu, domain, immu->immu_ctx_root, r_bus,
r_devfunc);
@@ -1734,9 +1688,6 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
"0x%lx are identical", rdip, ddip);
#endif
- ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT);
- ASSERT(r_bus >= 0);
- ASSERT(r_devfunc >= 0);
/* rdip is a PCIE device. set context for it only */
context_set(immu, domain, immu->immu_ctx_root, r_bus,
r_devfunc);
@@ -1758,11 +1709,7 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
&pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
pcibdip != NULL) {
- ASSERT(pcibdip);
r_immu_devi = immu_devi_get(pcibdip);
- ASSERT(d_immu_devi);
- ASSERT(d_immu_devi->imd_pcib_type ==
- IMMU_PCIB_PCI_PCI);
r_bus = r_immu_devi->imd_bus;
r_devfunc = r_immu_devi->imd_devfunc;
context_set(immu, domain, immu->immu_ctx_root,
@@ -1777,7 +1724,6 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
context_set(immu, domain, immu->immu_ctx_root, d_bus,
d_devfunc);
} else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
- ASSERT(r_pcib_type == IMMU_PCIB_NOBDF);
/*
* ddip is a PCIE device which has a non-PCI device under it
* i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
@@ -1786,7 +1732,7 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
d_devfunc);
} else {
ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
- "set IMMU context.");
+ "set iommu context.");
/*NOTREACHED*/
}
@@ -1798,16 +1744,11 @@ immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
/* ##################### MAPPING CODE ################################## */
+#ifdef DEBUG
static boolean_t
PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
dev_info_t *rdip, immu_flags_t immu_flags)
{
- if (immu_flags & IMMU_FLAGS_PAGE1) {
- ASSERT(paddr == 0);
- } else {
- ASSERT((next == NULL) ^ (paddr == 0));
- }
-
/* The PDTE must be set i.e. present bit is set */
if (!PDTE_P(pdte)) {
ddi_err(DER_MODE, rdip, "No present flag");
@@ -1904,6 +1845,8 @@ PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
return (B_TRUE);
}
+#endif
+
/*ARGSUSED*/
static void
PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
@@ -1915,23 +1858,13 @@ PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
hw_pdte_t *hwp;
hw_pdte_t *shwp;
int idx;
- hw_pdte_t pte;
-
- ASSERT(xlate->xlt_level == 1);
pgtable = xlate->xlt_pgtable;
idx = xlate->xlt_idx;
- ASSERT(pgtable);
- ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
-
dvma = *dvma_ptr;
npages = *npages_ptr;
- ASSERT(dvma);
- ASSERT(dvma % IMMU_PAGESIZE == 0);
- ASSERT(npages);
-
/*
* since a caller gets a unique dvma for a physical address,
* no other concurrent thread will be writing to the same
@@ -1941,45 +1874,23 @@ PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
hwp = shwp;
for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
-
- pte = *hwp;
-
- /* Cannot clear a HW PTE that is aleady clear */
- ASSERT(PDTE_P(pte));
- PDTE_CLEAR_P(pte);
- *hwp = pte;
-
+ PDTE_CLEAR_P(*hwp);
dvma += IMMU_PAGESIZE;
npages--;
}
-
-#ifdef TEST
- /* dont need to flush write during unmap */
- immu_regs_cpu_flush(immu, (caddr_t)shwp,
- (hwp - shwp) * sizeof (hw_pdte_t));
-#endif
-
*dvma_ptr = dvma;
*npages_ptr = npages;
xlate->xlt_idx = idx;
}
-/*ARGSUSED*/
static void
-xlate_setup(immu_t *immu, uint64_t dvma, xlate_t *xlate,
- int nlevels, dev_info_t *rdip)
+xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
{
int level;
uint64_t offbits;
- /* level 0 is never used. Sanity check */
- ASSERT(xlate->xlt_level == 0);
- ASSERT(xlate->xlt_idx == 0);
- ASSERT(xlate->xlt_pgtable == NULL);
- ASSERT(dvma % IMMU_PAGESIZE == 0);
-
/*
* Skip the first 12 bits which is the offset into
* 4K PFN (phys page frame based on IMMU_PAGESIZE)
@@ -1999,41 +1910,28 @@ xlate_setup(immu_t *immu, uint64_t dvma, xlate_t *xlate,
/*
* Read the pgtables
*/
-static void
-PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
- dev_info_t *rdip)
+static boolean_t
+PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
{
pgtable_t *pgtable;
pgtable_t *next;
- hw_pdte_t pde;
uint_t idx;
- /* xlate should be at level 0 */
- ASSERT(xlate->xlt_level == 0);
- ASSERT(xlate->xlt_idx == 0);
-
/* start with highest level pgtable i.e. root */
xlate += nlevels;
- ASSERT(xlate->xlt_level == nlevels);
if (xlate->xlt_pgtable == NULL) {
xlate->xlt_pgtable = domain->dom_pgtable_root;
}
for (; xlate->xlt_level > 1; xlate--) {
-
idx = xlate->xlt_idx;
pgtable = xlate->xlt_pgtable;
- ASSERT(pgtable);
- ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
-
if ((xlate - 1)->xlt_pgtable) {
continue;
}
- /* xlate's leafier level is not set, set it now */
-
/* Lock the pgtable in read mode */
rw_enter(&(pgtable->swpg_rwlock), RW_READER);
@@ -2042,16 +1940,76 @@ PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
* already point to a leafier pgtable.
*/
next = *(pgtable->swpg_next_array + idx);
- ASSERT(next);
+ (xlate - 1)->xlt_pgtable = next;
+ rw_exit(&(pgtable->swpg_rwlock));
+ if (next == NULL)
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
- pde = *((hw_pdte_t *)(pgtable->hwpg_vaddr) + idx);
+static void
+immu_fault_walk(void *arg, void *base, size_t len)
+{
+ uint64_t dvma, start;
- ASSERT(PDTE_check(immu, pde, next, 0, rdip, 0) == B_TRUE);
+ dvma = *(uint64_t *)arg;
+ start = (uint64_t)(uintptr_t)base;
- (xlate - 1)->xlt_pgtable = next;
+ if (dvma >= start && dvma < (start + len)) {
+ ddi_err(DER_WARN, NULL,
+ "faulting DVMA address is in vmem arena "
+ "(%" PRIx64 "-%" PRIx64 ")",
+ start, start + len);
+ *(uint64_t *)arg = ~0ULL;
+ }
+}
- rw_exit(&(pgtable->swpg_rwlock));
+void
+immu_print_fault_info(uint_t sid, uint64_t dvma)
+{
+ int nlevels;
+ xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
+ xlate_t *xlatep;
+ hw_pdte_t pte;
+ domain_t *domain;
+ immu_t *immu;
+ uint64_t dvma_arg;
+
+ if (mod_hash_find(bdf_domain_hash,
+ (void *)(uintptr_t)sid, (void *)&domain) != 0) {
+ ddi_err(DER_WARN, NULL,
+ "no domain for faulting SID %08x", sid);
+ return;
}
+
+ immu = domain->dom_immu;
+
+ dvma_arg = dvma;
+ vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
+ (void *)&dvma_arg);
+ if (dvma_arg != ~0ULL)
+ ddi_err(DER_WARN, domain->dom_dip,
+ "faulting DVMA address is not in vmem arena");
+
+ nlevels = immu->immu_dvma_nlevels;
+ xlate_setup(dvma, xlate, nlevels);
+
+ if (!PDE_lookup(domain, xlate, nlevels)) {
+ ddi_err(DER_WARN, domain->dom_dip,
+ "pte not found in domid %d for faulting addr %" PRIx64,
+ domain->dom_did, dvma);
+ return;
+ }
+
+ xlatep = &xlate[1];
+ pte = *((hw_pdte_t *)
+ (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
+
+ ddi_err(DER_WARN, domain->dom_dip,
+ "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
+ (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
}
/*ARGSUSED*/
@@ -2061,17 +2019,11 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
{
hw_pdte_t pte;
- pte = *hwp;
-
#ifndef DEBUG
- /* Set paddr */
- ASSERT(paddr % IMMU_PAGESIZE == 0);
- pte = 0;
+ pte = immu->immu_ptemask;
PDTE_SET_PADDR(pte, paddr);
- PDTE_SET_READ(pte);
- PDTE_SET_WRITE(pte);
- *hwp = pte;
#else
+ pte = *hwp;
if (PDTE_P(pte)) {
if (PDTE_PADDR(pte) != paddr) {
@@ -2085,17 +2037,13 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
#endif
}
- /* Don't touch SW4. It is the present field */
-
/* clear TM field if not reserved */
if (immu->immu_TM_reserved == B_FALSE) {
PDTE_CLEAR_TM(pte);
}
-#ifdef DEBUG
/* Clear 3rd field for system software - not used */
PDTE_CLEAR_SW3(pte);
-#endif
/* Set paddr */
ASSERT(paddr % IMMU_PAGESIZE == 0);
@@ -2107,21 +2055,15 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
PDTE_CLEAR_SNP(pte);
}
-#ifdef DEBUG
/* Clear SW2 field available for software */
PDTE_CLEAR_SW2(pte);
-#endif
-#ifdef DEBUG
/* SP is don't care for PTEs. Clear it for cleanliness */
PDTE_CLEAR_SP(pte);
-#endif
-#ifdef DEBUG
/* Clear SW1 field available for software */
PDTE_CLEAR_SW1(pte);
-#endif
/*
* Now that we are done writing the PTE
@@ -2136,7 +2078,10 @@ PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
PDTE_SET_P(pte);
+ pte |= immu->immu_ptemask;
+
out:
+#endif /* DEBUG */
#ifdef BUGGY_DRIVERS
PDTE_SET_READ(pte);
PDTE_SET_WRITE(pte);
@@ -2145,16 +2090,15 @@ out:
PDTE_SET_READ(pte);
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pte);
-#endif
+#endif /* BUGGY_DRIVERS */
*hwp = pte;
-#endif
}
/*ARGSUSED*/
static void
PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
- uint64_t *dvma_ptr, uint64_t *nvpages_ptr, dcookie_t *dcookies,
+ uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
{
paddr_t paddr;
@@ -2164,23 +2108,15 @@ PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
pgtable_t *pgtable;
hw_pdte_t *hwp;
hw_pdte_t *shwp;
- int idx;
+ int idx, nset;
int j;
- ASSERT(xlate->xlt_level == 1);
-
pgtable = xlate->xlt_pgtable;
idx = xlate->xlt_idx;
- ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
- ASSERT(pgtable);
-
dvma = *dvma_ptr;
nvpages = *nvpages_ptr;
- ASSERT(dvma || (immu_flags & IMMU_FLAGS_PAGE1));
- ASSERT(nvpages);
-
/*
* since a caller gets a unique dvma for a physical address,
* no other concurrent thread will be writing to the same
@@ -2195,19 +2131,15 @@ PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
nvpages -= dcookies[j].dck_npages;
}
- ASSERT(j >= 0);
- ASSERT(nvpages);
- ASSERT(nvpages <= dcookies[j].dck_npages);
nppages = nvpages;
paddr = dcookies[j].dck_paddr +
(dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
nvpages = *nvpages_ptr;
+ nset = 0;
for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
-
- ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1));
-
PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
+ nset++;
ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
== B_TRUE);
@@ -2220,22 +2152,15 @@ PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
j++;
}
- if (j == dcount) {
- ASSERT(nvpages == 0);
+ if (j == dcount)
break;
- }
- ASSERT(nvpages);
if (nppages == 0) {
nppages = dcookies[j].dck_npages;
paddr = dcookies[j].dck_paddr;
}
}
- /* flush writes to HW PTE table */
- immu_regs_cpu_flush(immu, (caddr_t)shwp, (hwp - shwp) *
- sizeof (hw_pdte_t));
-
if (nvpages) {
*dvma_ptr = dvma;
*nvpages_ptr = nvpages;
@@ -2274,7 +2199,6 @@ PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
PDTE_CLEAR_SW3(pde);
/* Set next level pgtable-paddr for PDE */
- ASSERT(next->hwpg_paddr % IMMU_PAGESIZE == 0);
PDTE_CLEAR_PADDR(pde);
PDTE_SET_PADDR(pde, next->hwpg_paddr);
@@ -2314,8 +2238,6 @@ out:
PDTE_SET_P(pde);
*hwp = pde;
-
- immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t));
}
/*
@@ -2334,42 +2256,20 @@ PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
krw_t rwtype;
boolean_t set = B_FALSE;
- /* xlate should be at level 0 */
- ASSERT(xlate->xlt_level == 0);
- ASSERT(xlate->xlt_idx == 0);
-
/* start with highest level pgtable i.e. root */
xlate += nlevels;
- ASSERT(xlate->xlt_level == nlevels);
new = NULL;
xlate->xlt_pgtable = domain->dom_pgtable_root;
for (level = nlevels; level > 1; level--, xlate--) {
-
- ASSERT(xlate->xlt_level == level);
-
idx = xlate->xlt_idx;
pgtable = xlate->xlt_pgtable;
- ASSERT(pgtable);
- ASSERT(idx <= IMMU_PGTABLE_MAXIDX);
-
- /* speculative alloc */
- if (new == NULL) {
- new = pgtable_alloc(immu, immu_flags);
- if (new == NULL) {
- ddi_err(DER_PANIC, rdip, "pgtable alloc err");
- }
- }
-
/* Lock the pgtable in READ mode first */
rw_enter(&(pgtable->swpg_rwlock), RW_READER);
rwtype = RW_READER;
again:
hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
-
- ASSERT(pgtable->swpg_next_array);
-
next = (pgtable->swpg_next_array)[idx];
/*
@@ -2377,6 +2277,19 @@ again:
* if yes, verify
*/
if (next == NULL) {
+ if (new == NULL) {
+
+ IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
+ rdip, int, level);
+
+ new = pgtable_alloc(immu, immu_flags);
+ if (new == NULL) {
+ ddi_err(DER_PANIC, rdip,
+ "pgtable alloc err");
+ }
+ pgtable_zero(new);
+ }
+
/* Change to a write lock */
if (rwtype == RW_READER &&
rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
@@ -2386,18 +2299,18 @@ again:
goto again;
}
rwtype = RW_WRITER;
- pgtable_zero(immu, new);
next = new;
- new = NULL;
(pgtable->swpg_next_array)[idx] = next;
+ new = NULL;
PDE_set_one(immu, hwp, next, rdip, immu_flags);
set = B_TRUE;
rw_downgrade(&(pgtable->swpg_rwlock));
rwtype = RW_READER;
- } else {
+ }
+#ifndef BUGGY_DRIVERS
+ else {
hw_pdte_t pde = *hwp;
-#ifndef BUGGY_DRIVERS
/*
* If buggy driver we already set permission
* READ+WRITE so nothing to do for that case
@@ -2409,16 +2322,14 @@ again:
if (immu_flags & IMMU_FLAGS_WRITE)
PDTE_SET_WRITE(pde);
-#endif
-
*hwp = pde;
}
+#endif
ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
== B_TRUE);
(xlate - 1)->xlt_pgtable = next;
- ASSERT(rwtype == RW_READER);
rw_exit(&(pgtable->swpg_rwlock));
}
@@ -2442,24 +2353,22 @@ again:
* immu_flags: flags
*/
static boolean_t
-dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snvpages,
- dcookie_t *dcookies, int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
+dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
+ immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
+ immu_flags_t immu_flags)
{
uint64_t dvma;
uint64_t n;
+ immu_t *immu = domain->dom_immu;
int nlevels = immu->immu_dvma_nlevels;
xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
boolean_t pde_set = B_FALSE;
- ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS);
- ASSERT(sdvma % IMMU_PAGESIZE == 0);
- ASSERT(snvpages);
-
n = snvpages;
dvma = sdvma;
while (n > 0) {
- xlate_setup(immu, dvma, xlate, nlevels, rdip);
+ xlate_setup(dvma, xlate, nlevels);
/* Lookup or allocate PGDIRs and PGTABLEs if necessary */
if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
@@ -2487,28 +2396,27 @@ dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snvpages,
* rdip: requesting device
*/
static void
-dvma_unmap(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snpages,
+dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
dev_info_t *rdip)
{
+ immu_t *immu = domain->dom_immu;
int nlevels = immu->immu_dvma_nlevels;
xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
uint64_t n;
uint64_t dvma;
- ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS);
- ASSERT(sdvma != 0);
- ASSERT(sdvma % IMMU_PAGESIZE == 0);
- ASSERT(snpages);
-
dvma = sdvma;
n = snpages;
while (n > 0) {
/* setup the xlate array */
- xlate_setup(immu, dvma, xlate, nlevels, rdip);
+ xlate_setup(dvma, xlate, nlevels);
/* just lookup existing pgtables. Should never fail */
- PDE_lookup(immu, domain, xlate, nlevels, rdip);
+ if (!PDE_lookup(domain, xlate, nlevels))
+ ddi_err(DER_PANIC, rdip,
+ "PTE not found for addr %" PRIx64,
+ (unsigned long long)dvma);
/* clear all matching ptes that fit into this leaf pgtable */
PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
@@ -2518,24 +2426,17 @@ dvma_unmap(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snpages,
}
static uint64_t
-dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages)
+dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
{
- ddi_dma_attr_t *dma_attr;
uint64_t dvma;
size_t xsize, align;
uint64_t minaddr, maxaddr;
- ASSERT(domain->dom_maptype != IMMU_MAPTYPE_UNITY);
-
- /* shotcuts */
- dma_attr = &(hp->dmai_attr);
-
/* parameters */
xsize = npages * IMMU_PAGESIZE;
align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
minaddr = dma_attr->dma_attr_addr_lo;
maxaddr = dma_attr->dma_attr_addr_hi + 1;
- /* nocross is checked in cookie_update() */
/* handle the rollover cases */
if (maxaddr < dma_attr->dma_attr_addr_hi) {
@@ -2547,426 +2448,324 @@ dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages)
*/
dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
- (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
-
- ASSERT(dvma);
- ASSERT(dvma >= minaddr);
- ASSERT(dvma + xsize - 1 < maxaddr);
+ (void *)(uintptr_t)maxaddr, kmf);
return (dvma);
}
static void
-dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
-{
- uint64_t size = npages * IMMU_PAGESIZE;
-
- ASSERT(domain);
- ASSERT(domain->dom_did > 0);
- ASSERT(dvma);
- ASSERT(npages);
-
- if (domain->dom_maptype != IMMU_MAPTYPE_XLATE) {
- ASSERT(domain->dom_maptype == IMMU_MAPTYPE_UNITY);
- return;
- }
-
- vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
-}
-/*ARGSUSED*/
-static void
-cookie_free(rootnex_dma_t *dma, immu_t *immu, domain_t *domain,
- dev_info_t *rdip)
+dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
{
- int i;
- uint64_t dvma;
- uint64_t npages;
- dvcookie_t *dvcookies = dma->dp_dvcookies;
-
- ASSERT(dma->dp_max_cookies);
- ASSERT(dma->dp_max_dcookies);
- ASSERT(dma->dp_dvmax < dma->dp_max_cookies);
- ASSERT(dma->dp_dmax < dma->dp_max_dcookies);
+ int nlevels;
+ xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
+ uint64_t dvma, n;
+ size_t xsize, align;
+ uint64_t minaddr, maxaddr, dmamax;
+ int on, npte, pindex;
+ hw_pdte_t *shwp;
+ immu_t *immu;
+ domain_t *domain;
- /*
- * we allocated DVMA in a single chunk. Calculate total number
- * of pages
- */
- for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) {
- npages += dvcookies[i].dvck_npages;
- }
- dvma = dvcookies[0].dvck_dvma;
-#ifdef DEBUG
- /* Unmap only in DEBUG mode */
- dvma_unmap(immu, domain, dvma, npages, rdip);
-#endif
- dvma_free(domain, dvma, npages);
+ /* parameters */
+ domain = IMMU_DEVI(rdip)->imd_domain;
+ immu = domain->dom_immu;
+ nlevels = immu->immu_dvma_nlevels;
+ xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
+ align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
+ minaddr = dma_attr->dma_attr_addr_lo;
+ if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
+ dmamax = dma_attr->dma_attr_seg;
+ else
+ dmamax = dma_attr->dma_attr_addr_hi;
+ maxaddr = dmamax + 1;
- kmem_free(dma->dp_dvcookies, sizeof (dvcookie_t) * dma->dp_max_cookies);
- dma->dp_dvcookies = NULL;
- kmem_free(dma->dp_dcookies, sizeof (dcookie_t) * dma->dp_max_dcookies);
- dma->dp_dcookies = NULL;
- if (dma->dp_need_to_free_cookie == B_TRUE) {
- kmem_free(dma->dp_cookies, sizeof (ddi_dma_cookie_t) *
- dma->dp_max_cookies);
- dma->dp_dcookies = NULL;
- dma->dp_need_to_free_cookie = B_FALSE;
- }
+ if (maxaddr < dmamax)
+ maxaddr = dmamax;
- dma->dp_max_cookies = 0;
- dma->dp_max_dcookies = 0;
- dma->dp_cookie_size = 0;
- dma->dp_dvmax = 0;
- dma->dp_dmax = 0;
-}
+ dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
+ xsize, align, 0, dma_attr->dma_attr_seg + 1,
+ (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
-/*
- * cookie_alloc()
- */
-static int
-cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq,
- ddi_dma_attr_t *attr, uint_t prealloc)
-{
- int kmflag;
- rootnex_sglinfo_t *sinfo = &(dma->dp_sglinfo);
- dvcookie_t *dvcookies = dma->dp_dvcookies;
- dcookie_t *dcookies = dma->dp_dcookies;
- ddi_dma_cookie_t *cookies = dma->dp_cookies;
- uint64_t max_cookies;
- uint64_t max_dcookies;
- uint64_t cookie_size;
-
- /* we need to allocate new array */
- if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
- kmflag = KM_SLEEP;
- } else {
- kmflag = KM_NOSLEEP;
- }
+ ihp->ihp_predvma = dvma;
+ ihp->ihp_npremapped = 0;
+ if (dvma == 0)
+ return;
- /*
- * XXX make sure cookies size doen't exceed sinfo->si_max_cookie_size;
- */
+ n = IMMU_NPREPTES;
+ pindex = 0;
/*
- * figure out the rough estimate of array size
- * At a minimum, each cookie must hold 1 page.
- * At a maximum, it cannot exceed dma_attr_sgllen
+ * Set up a mapping at address 0, just so that all PDPs get allocated
+ * now. Although this initial mapping should never be used,
+ * explicitly set it to read-only, just to be safe.
*/
- max_dcookies = dmareq->dmar_object.dmao_size + IMMU_PAGEOFFSET;
- max_dcookies /= IMMU_PAGESIZE;
- max_dcookies++;
- max_cookies = MIN(max_dcookies, attr->dma_attr_sgllen);
-
- /* allocate the dvma cookie array */
- dvcookies = kmem_zalloc(sizeof (dvcookie_t) * max_cookies, kmflag);
- if (dvcookies == NULL) {
- return (DDI_FAILURE);
- }
+ while (n > 0) {
+ xlate_setup(dvma, xlate, nlevels);
- /* allocate the "phys" cookie array */
- dcookies = kmem_zalloc(sizeof (dcookie_t) * max_dcookies, kmflag);
- if (dcookies == NULL) {
- kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies);
- dvcookies = NULL;
- return (DDI_FAILURE);
- }
+ (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
+ IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
- /* allocate the "real" cookie array - the one given to users */
- cookie_size = sizeof (ddi_dma_cookie_t) * max_cookies;
- if (max_cookies > prealloc) {
- cookies = kmem_zalloc(cookie_size, kmflag);
- if (cookies == NULL) {
- kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies);
- kmem_free(dcookies, sizeof (dcookie_t) * max_dcookies);
- goto fail;
- }
- dma->dp_need_to_free_cookie = B_TRUE;
- } else {
- /* the preallocated buffer fits this size */
- cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
- bzero(cookies, sizeof (ddi_dma_cookie_t)* max_cookies);
- dma->dp_need_to_free_cookie = B_FALSE;
- }
-
- dma->dp_dvcookies = dvcookies;
- dma->dp_dcookies = dcookies;
- dma->dp_cookies = cookies;
- dma->dp_cookie_size = cookie_size;
- dma->dp_max_cookies = max_cookies;
- dma->dp_max_dcookies = max_dcookies;
- dma->dp_dvmax = 0;
- dma->dp_dmax = 0;
- sinfo->si_max_pages = dma->dp_max_cookies;
+ xlp = &xlate[1];
+ shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
+ + xlp->xlt_idx;
+ on = n;
- return (DDI_SUCCESS);
+ PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
+ 1, rdip, IMMU_FLAGS_READ);
-fail:
- dma->dp_dvcookies = NULL;
- dma->dp_dcookies = NULL;
- dma->dp_cookies = NULL;
- dma->dp_cookie_size = 0;
- dma->dp_max_cookies = 0;
- dma->dp_max_dcookies = 0;
- dma->dp_dvmax = 0;
- dma->dp_dmax = 0;
- dma->dp_need_to_free_cookie = B_FALSE;
- sinfo->si_max_pages = 0;
+ npte = on - n;
- return (DDI_FAILURE);
+ while (npte > 0) {
+ ihp->ihp_preptes[pindex++] = shwp;
+#ifdef BUGGY_DRIVERS
+ PDTE_CLEAR_WRITE(*shwp);
+#endif
+ shwp++;
+ npte--;
+ }
+ }
}
-/*ARGSUSED*/
static void
-cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr,
- int64_t psize, uint64_t maxseg, size_t nocross)
+dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
{
- dvcookie_t *dvcookies = dma->dp_dvcookies;
- dcookie_t *dcookies = dma->dp_dcookies;
- ddi_dma_cookie_t *cookies = dma->dp_cookies;
- uint64_t dvmax = dma->dp_dvmax;
- uint64_t dmax = dma->dp_dmax;
-
- ASSERT(dvmax < dma->dp_max_cookies);
- ASSERT(dmax < dma->dp_max_dcookies);
-
- paddr &= IMMU_PAGEMASK;
-
- ASSERT(paddr);
- ASSERT(psize);
- ASSERT(maxseg);
-
- /*
- * check to see if this page would put us
- * over the max cookie size.
- */
- if (cookies[dvmax].dmac_size + psize > maxseg) {
- dvmax++; /* use the next dvcookie */
- dmax++; /* also means we use the next dcookie */
- ASSERT(dvmax < dma->dp_max_cookies);
- ASSERT(dmax < dma->dp_max_dcookies);
- }
+ domain_t *domain;
- /*
- * check to see if this page would make us larger than
- * the nocross boundary. If yes, create a new cookie
- * otherwise we will fail later with vmem_xalloc()
- * due to overconstrained alloc requests
- * nocross == 0 implies no nocross constraint.
- */
- if (nocross > 0) {
- ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE
- <= nocross);
- if ((dvcookies[dvmax].dvck_npages + 1) * IMMU_PAGESIZE
- > nocross) {
- dvmax++; /* use the next dvcookie */
- dmax++; /* also means we use the next dcookie */
- ASSERT(dvmax < dma->dp_max_cookies);
- ASSERT(dmax < dma->dp_max_dcookies);
- }
- ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE
- <= nocross);
- }
+ domain = IMMU_DEVI(rdip)->imd_domain;
- /*
- * If the cookie is empty
- */
- if (dvcookies[dvmax].dvck_npages == 0) {
- ASSERT(cookies[dvmax].dmac_size == 0);
- ASSERT(dvcookies[dvmax].dvck_dvma == 0);
- ASSERT(dvcookies[dvmax].dvck_npages
- == 0);
- ASSERT(dcookies[dmax].dck_paddr == 0);
- ASSERT(dcookies[dmax].dck_npages == 0);
-
- dvcookies[dvmax].dvck_dvma = 0;
- dvcookies[dvmax].dvck_npages = 1;
- dcookies[dmax].dck_paddr = paddr;
- dcookies[dmax].dck_npages = 1;
- cookies[dvmax].dmac_size = psize;
- } else {
- /* Cookie not empty. Add to it */
- cookies[dma->dp_dvmax].dmac_size += psize;
- ASSERT(dvcookies[dma->dp_dvmax].dvck_dvma == 0);
- dvcookies[dma->dp_dvmax].dvck_npages++;
- ASSERT(dcookies[dmax].dck_paddr != 0);
- ASSERT(dcookies[dmax].dck_npages != 0);
-
- /* Check if this paddr is contiguous */
- if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
- dcookies[dmax].dck_npages++;
- } else {
- /* No, we need a new dcookie */
- dmax++;
- ASSERT(dcookies[dmax].dck_paddr == 0);
- ASSERT(dcookies[dmax].dck_npages == 0);
- dcookies[dmax].dck_paddr = paddr;
- dcookies[dmax].dck_npages = 1;
- }
+ if (ihp->ihp_predvma != 0) {
+ dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
+ vmem_free(domain->dom_dvma_arena,
+ (void *)(uintptr_t)ihp->ihp_predvma,
+ IMMU_NPREPTES * IMMU_PAGESIZE);
}
-
- dma->dp_dvmax = dvmax;
- dma->dp_dmax = dmax;
}
static void
-cookie_finalize(ddi_dma_impl_t *hp, immu_t *immu, domain_t *domain,
- dev_info_t *rdip, immu_flags_t immu_flags)
+dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
{
- int i;
- rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
- dvcookie_t *dvcookies = dma->dp_dvcookies;
- dcookie_t *dcookies = dma->dp_dcookies;
- ddi_dma_cookie_t *cookies = dma->dp_cookies;
- uint64_t npages;
- uint64_t dvma;
- boolean_t pde_set;
-
- /* First calculate the total number of pages required */
- for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) {
- npages += dvcookies[i].dvck_npages;
- }
-
- /* Now allocate dvma */
- dvma = dvma_alloc(hp, domain, npages);
-
- /* Now map the dvma */
- pde_set = dvma_map(immu, domain, dvma, npages, dcookies,
- dma->dp_dmax + 1, rdip, immu_flags);
-
- /* Invalidate the IOTLB */
- immu_flush_iotlb_psi(immu, domain->dom_did, dvma, npages,
- pde_set == B_TRUE ? TLB_IVA_WHOLE : TLB_IVA_LEAF);
+ uint64_t size = npages * IMMU_PAGESIZE;
- /* Now setup dvcookies and real cookie addresses */
- for (i = 0; i <= dma->dp_dvmax; i++) {
- dvcookies[i].dvck_dvma = dvma;
- cookies[i].dmac_laddress = dvma;
- ASSERT(cookies[i].dmac_size != 0);
- cookies[i].dmac_type = 0;
- dvma += (dvcookies[i].dvck_npages * IMMU_PAGESIZE);
- }
+ if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
+ return;
-#ifdef TEST
- immu_flush_iotlb_dsi(immu, domain->dom_did);
-#endif
+ vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
}
-/*
- * cookie_create()
- */
static int
-cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
- ddi_dma_attr_t *a, immu_t *immu, domain_t *domain, dev_info_t *rdip,
- uint_t prealloc_count, immu_flags_t immu_flags)
+immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
+ immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
+ ddi_dma_obj_t *dma_out)
{
+ domain_t *domain;
+ immu_t *immu;
+ immu_flags_t immu_flags;
ddi_dma_atyp_t buftype;
- uint64_t offset;
+ ddi_dma_obj_t *dmar_object;
+ ddi_dma_attr_t *attrp;
+ uint64_t offset, paddr, dvma, sdvma, rwmask;
+ size_t npages, npgalloc;
+ uint_t psize, size, pcnt, dmax;
page_t **pparray;
- uint64_t paddr;
- uint_t psize;
- uint_t size;
- uint64_t maxseg;
caddr_t vaddr;
- uint_t pcnt;
page_t *page;
- rootnex_sglinfo_t *sglinfo;
- ddi_dma_obj_t *dmar_object;
- rootnex_dma_t *dma;
- size_t nocross;
+ struct as *vas;
+ immu_dcookie_t *dcookies;
+ int pde_set;
- dma = (rootnex_dma_t *)hp->dmai_private;
- sglinfo = &(dma->dp_sglinfo);
- dmar_object = &(dmareq->dmar_object);
- maxseg = sglinfo->si_max_cookie_size;
+ domain = IMMU_DEVI(rdip)->imd_domain;
+ immu = domain->dom_immu;
+ immu_flags = dma_to_immu_flags(dmareq);
+
+ attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
+
+ dmar_object = &dmareq->dmar_object;
pparray = dmar_object->dmao_obj.virt_obj.v_priv;
vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
buftype = dmar_object->dmao_type;
size = dmar_object->dmao_size;
- nocross = (size_t)(a->dma_attr_seg + 1);
- /*
- * Allocate cookie, dvcookie and dcookie
- */
- if (cookie_alloc(dma, dmareq, a, prealloc_count) != DDI_SUCCESS) {
- return (DDI_FAILURE);
- }
- hp->dmai_cookie = dma->dp_cookies;
+ IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
+ buftype, uint_t, size);
+
+ dcookies = &ihp->ihp_dcookies[0];
- pcnt = 0;
+ pcnt = dmax = 0;
/* retrieve paddr, psize, offset from dmareq */
if (buftype == DMA_OTYP_PAGES) {
page = dmar_object->dmao_obj.pp_obj.pp_pp;
- ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page));
offset = dmar_object->dmao_obj.pp_obj.pp_offset &
MMU_PAGEOFFSET;
paddr = pfn_to_pa(page->p_pagenum) + offset;
psize = MIN((MMU_PAGESIZE - offset), size);
- sglinfo->si_asp = NULL;
page = page->p_next;
+ vas = dmar_object->dmao_obj.virt_obj.v_as;
} else {
- ASSERT((buftype == DMA_OTYP_VADDR) ||
- (buftype == DMA_OTYP_BUFVADDR));
- sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
- if (sglinfo->si_asp == NULL) {
- sglinfo->si_asp = &kas;
+ if (vas == NULL) {
+ vas = &kas;
}
offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
if (pparray != NULL) {
- ASSERT(!PP_ISFREE(pparray[pcnt]));
paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
psize = MIN((MMU_PAGESIZE - offset), size);
pcnt++;
} else {
- paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat,
+ paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
vaddr)) + offset;
psize = MIN(size, (MMU_PAGESIZE - offset));
vaddr += psize;
}
}
- /* save the iommu page offset */
- sglinfo->si_buf_offset = offset & IMMU_PAGEOFFSET;
+ npgalloc = IMMU_BTOPR(size + offset);
- /*
- * setup dvcookie and dcookie for [paddr, paddr+psize)
- */
- cookie_update(domain, dma, paddr, psize, maxseg, nocross);
+ if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
+#ifdef BUGGY_DRIVERS
+ rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
+#else
+ rwmask = immu->immu_ptemask;
+ if (immu_flags & IMMU_FLAGS_READ)
+ rwmask |= PDTE_MASK_R;
+ if (immu_flags & IMMU_FLAGS_WRITE)
+ rwmask |= PDTE_MASK_W;
+#endif
+#ifdef DEBUG
+ rwmask |= PDTE_MASK_P;
+#endif
+ sdvma = ihp->ihp_predvma;
+ ihp->ihp_npremapped = npgalloc;
+ *ihp->ihp_preptes[0] =
+ PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
+ } else {
+ ihp->ihp_npremapped = 0;
+ sdvma = dvma_alloc(domain, attrp, npgalloc,
+ dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
+ if (sdvma == 0)
+ return (DDI_DMA_NORESOURCES);
+ dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
+ dcookies[0].dck_npages = 1;
+ }
+
+ IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
+ uint64_t, sdvma);
+
+ dvma = sdvma;
+ pde_set = 0;
+ npages = 1;
size -= psize;
while (size > 0) {
/* get the size for this page (i.e. partial or full page) */
psize = MIN(size, MMU_PAGESIZE);
if (buftype == DMA_OTYP_PAGES) {
/* get the paddr from the page_t */
- ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page));
paddr = pfn_to_pa(page->p_pagenum);
page = page->p_next;
} else if (pparray != NULL) {
/* index into the array of page_t's to get the paddr */
- ASSERT(!PP_ISFREE(pparray[pcnt]));
paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
pcnt++;
} else {
/* call into the VM to get the paddr */
- paddr = pfn_to_pa(hat_getpfnum
- (sglinfo->si_asp->a_hat, vaddr));
+ paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
vaddr += psize;
}
- /*
- * set dvcookie and dcookie for [paddr, paddr+psize)
- */
- cookie_update(domain, dma, paddr, psize, maxseg, nocross);
+
+ npages++;
+
+ if (ihp->ihp_npremapped > 0) {
+ *ihp->ihp_preptes[npages - 1] =
+ PDTE_PADDR(paddr) | rwmask;
+ } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
+ dcookies[dmax].dck_npages++;
+ } else {
+ /* No, we need a new dcookie */
+ if (dmax == (IMMU_NDCK - 1)) {
+ /*
+ * Ran out of dcookies. Map them now.
+ */
+ if (dvma_map(domain, dvma,
+ npages, dcookies, dmax + 1, rdip,
+ immu_flags))
+ pde_set++;
+
+ IMMU_DPROBE4(immu__dvmamap__early,
+ dev_info_t *, rdip, uint64_t, dvma,
+ uint_t, npages, uint_t, dmax+1);
+
+ dvma += (npages << IMMU_PAGESHIFT);
+ npages = 0;
+ dmax = 0;
+ } else
+ dmax++;
+ dcookies[dmax].dck_paddr = paddr;
+ dcookies[dmax].dck_npages = 1;
+ }
size -= psize;
}
- cookie_finalize(hp, immu, domain, rdip, immu_flags);
+ /*
+ * Finish up, mapping all, or all of the remaining,
+ * physical memory ranges.
+ */
+ if (ihp->ihp_npremapped == 0 && npages > 0) {
+ IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
+ uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
+
+ if (dvma_map(domain, dvma, npages, dcookies,
+ dmax + 1, rdip, immu_flags))
+ pde_set++;
+ }
+
+ /* Invalidate the IOTLB */
+ immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
+ pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
+ &ihp->ihp_inv_wait);
+
+ ihp->ihp_ndvseg = 1;
+ ihp->ihp_dvseg[0].dvs_start = sdvma;
+ ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
+
+ dma_out->dmao_size = dmar_object->dmao_size;
+ dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
+ dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
+ dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
+ dma_out->dmao_type = DMA_OTYP_DVADDR;
+
+ return (DDI_DMA_MAPPED);
+}
+
+static int
+immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
+{
+ uint64_t dvma, npages;
+ domain_t *domain;
+ struct dvmaseg *dvs;
+
+ domain = IMMU_DEVI(rdip)->imd_domain;
+ dvs = dmao->dmao_obj.dvma_obj.dv_seg;
- /* take account in the offset into the first page */
- dma->dp_cookies[0].dmac_laddress += sglinfo->si_buf_offset;
+ dvma = dvs[0].dvs_start;
+ npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
- /* save away how many cookies we have */
- sglinfo->si_sgl_size = dma->dp_dvmax + 1;
+#ifdef DEBUG
+ /* Unmap only in DEBUG mode */
+ dvma_unmap(domain, dvma, npages, rdip);
+#endif
+ dvma_free(domain, dvma, npages);
+
+ IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
+ uint64_t, dvma);
+
+#ifdef DEBUG
+ /*
+ * In the DEBUG case, the unmap was actually done,
+ * but an IOTLB flush was not done. So, an explicit
+ * write back flush is needed.
+ */
+ immu_regs_wbf_flush(domain->dom_immu);
+#endif
return (DDI_SUCCESS);
}
@@ -3001,7 +2800,6 @@ immu_dvma_setup(list_t *listp)
nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
KM_NOSLEEP);
- ASSERT(bdf_domain_hash);
immu = list_head(listp);
for (; immu; immu = list_next(listp, immu)) {
@@ -3018,9 +2816,6 @@ immu_dvma_setup(list_t *listp)
void
immu_dvma_startup(immu_t *immu)
{
- ASSERT(immu);
- ASSERT(immu->immu_dvma_running == B_FALSE);
-
if (immu_gfxdvma_enable == B_FALSE &&
immu->immu_dvma_gfx_only == B_TRUE) {
return;
@@ -3029,7 +2824,6 @@ immu_dvma_startup(immu_t *immu)
/*
* DVMA will start once IOMMU is "running"
*/
- ASSERT(immu->immu_dvma_running == B_FALSE);
immu->immu_dvma_running = B_TRUE;
}
@@ -3045,7 +2839,7 @@ immu_dvma_physmem_update(uint64_t addr, uint64_t size)
uint64_t start;
uint64_t npages;
int dcount;
- dcookie_t dcookies[1] = {0};
+ immu_dcookie_t dcookies[1] = {0};
domain_t *domain;
/*
@@ -3057,9 +2851,15 @@ immu_dvma_physmem_update(uint64_t addr, uint64_t size)
mutex_enter(&immu_domain_lock);
domain = list_head(&immu_unity_domain_list);
for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
+ /*
+ * Nothing to do if the IOMMU supports passthrough.
+ */
+ if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
+ continue;
/* There is no vmem_arena for unity domains. Just map it */
- ddi_err(DER_LOG, NULL, "IMMU: unity-domain: Adding map "
+ ddi_err(DER_LOG, domain->dom_dip,
+ "iommu: unity-domain: Adding map "
"[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
start = IMMU_ROUNDOWN(addr);
@@ -3068,46 +2868,21 @@ immu_dvma_physmem_update(uint64_t addr, uint64_t size)
dcookies[0].dck_paddr = start;
dcookies[0].dck_npages = npages;
dcount = 1;
- (void) dvma_map(domain->dom_immu, domain, start, npages,
+ (void) dvma_map(domain, start, npages,
dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
}
mutex_exit(&immu_domain_lock);
}
-
int
-immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
- uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags)
+immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
{
- ddi_dma_attr_t *attr;
- dev_info_t *ddip;
- domain_t *domain;
+ dev_info_t *ddip, *odip;
immu_t *immu;
- dcookie_t dcookies[1] = {0};
- int dcount = 0;
- boolean_t pde_set = B_TRUE;
- int r = DDI_FAILURE;
-
- ASSERT(immu_enable == B_TRUE);
- ASSERT(immu_running == B_TRUE || !(immu_flags & IMMU_FLAGS_DMAHDL));
- ASSERT(hp || !(immu_flags & IMMU_FLAGS_DMAHDL));
-
- /*
- * Intel IOMMU will only be turned on if IOMMU
- * page size is a multiple of IOMMU page size
- */
-
- /*LINTED*/
- ASSERT(MMU_PAGESIZE % IMMU_PAGESIZE == 0);
-
- /* Can only do DVMA if dip is attached */
- if (rdip == NULL) {
- ddi_err(DER_PANIC, rdip, "DVMA map: No device specified");
- /*NOTREACHED*/
- }
+ domain_t *domain;
- immu_flags |= dma_to_immu_flags(dmareq);
+ odip = rdip;
immu = immu_dvma_get_immu(rdip, immu_flags);
if (immu == NULL) {
@@ -3115,7 +2890,7 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
* possible that there is no IOMMU unit for this device
* - BIOS bugs are one example.
*/
- ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
+ ddi_err(DER_WARN, rdip, "No iommu unit found for device");
return (DDI_DMA_NORESOURCES);
}
@@ -3125,7 +2900,7 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
rdip = get_lpc_devinfo(immu, rdip, immu_flags);
if (rdip == NULL) {
- ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
+ ddi_err(DER_PANIC, rdip, "iommu redirect failed");
/*NOTREACHED*/
}
}
@@ -3139,7 +2914,7 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
rdip = get_gfx_devinfo(rdip);
if (rdip == NULL) {
- ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
+ ddi_err(DER_PANIC, rdip, "iommu redirect failed");
/*NOTREACHED*/
}
}
@@ -3152,11 +2927,12 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
ddip = NULL;
domain = device_domain(rdip, &ddip, immu_flags);
if (domain == NULL) {
- ASSERT(ddip == NULL);
ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
return (DDI_DMA_NORESOURCES);
}
+ immu = domain->dom_immu;
+
/*
* If a domain is found, we must also have a domain dip
* which is the topmost ancestor dip of rdip that shares
@@ -3168,45 +2944,8 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
return (DDI_DMA_NORESOURCES);
}
- immu = domain->dom_immu;
- ASSERT(immu);
- if (domain->dom_did == IMMU_UNITY_DID) {
- ASSERT(domain == immu->immu_unity_domain);
- /* mapping already done. Let rootnex create cookies */
- r = DDI_DMA_USE_PHYSICAL;
- } else if (immu_flags & IMMU_FLAGS_DMAHDL) {
-
- /* if we have a DMA handle, the IOMMUs must be running */
- ASSERT(immu->immu_regs_running == B_TRUE);
- ASSERT(immu->immu_dvma_running == B_TRUE);
-
- attr = &hp->dmai_attr;
- if (attr == NULL) {
- ddi_err(DER_PANIC, rdip,
- "DMA handle (%p): NULL attr", hp);
- /*NOTREACHED*/
- }
-
- if (cookie_create(hp, dmareq, attr, immu, domain, rdip,
- prealloc_count, immu_flags) != DDI_SUCCESS) {
- ddi_err(DER_MODE, rdip, "dvcookie_alloc: failed");
- return (DDI_DMA_NORESOURCES);
- }
- r = DDI_DMA_MAPPED;
- } else if (immu_flags & IMMU_FLAGS_MEMRNG) {
- dcookies[0].dck_paddr = mrng->mrng_start;
- dcookies[0].dck_npages = mrng->mrng_npages;
- dcount = 1;
- pde_set = dvma_map(immu, domain, mrng->mrng_start,
- mrng->mrng_npages, dcookies, dcount, rdip, immu_flags);
- immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
- mrng->mrng_npages, pde_set == B_TRUE ?
- TLB_IVA_WHOLE : TLB_IVA_LEAF);
- r = DDI_DMA_MAPPED;
- } else {
- ddi_err(DER_PANIC, rdip, "invalid flags for immu_dvma_map()");
- /*NOTREACHED*/
- }
+ if (odip != rdip)
+ set_domain(odip, ddip, domain);
/*
* Update the root and context entries
@@ -3217,133 +2956,251 @@ immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng,
return (DDI_DMA_NORESOURCES);
}
- immu_regs_wbf_flush(immu);
-
- return (r);
+ return (DDI_SUCCESS);
}
int
-immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip)
+immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
{
- ddi_dma_attr_t *attr;
- rootnex_dma_t *dma;
- domain_t *domain;
+ immu_dcookie_t dcookies[1] = {0};
+ boolean_t pde_set;
immu_t *immu;
- dev_info_t *ddip;
- immu_flags_t immu_flags;
+ domain_t *domain;
+ immu_inv_wait_t iw;
- ASSERT(immu_enable == B_TRUE);
- ASSERT(immu_running == B_TRUE);
- ASSERT(hp);
+ dcookies[0].dck_paddr = mrng->mrng_start;
+ dcookies[0].dck_npages = mrng->mrng_npages;
- /*
- * Intel IOMMU will only be turned on if IOMMU
- * page size is same as MMU page size
- */
- /*LINTED*/
- ASSERT(MMU_PAGESIZE == IMMU_PAGESIZE);
+ domain = IMMU_DEVI(rdip)->imd_domain;
+ immu = domain->dom_immu;
- /* rdip need not be attached */
- if (rdip == NULL) {
- ddi_err(DER_PANIC, rdip, "DVMA unmap: No device specified");
- return (DDI_DMA_NORESOURCES);
- }
+ pde_set = dvma_map(domain, mrng->mrng_start,
+ mrng->mrng_npages, dcookies, 1, rdip,
+ IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
- /*
- * Get the device domain, this should always
- * succeed since there had to be a domain to
- * setup DVMA.
- */
- dma = (rootnex_dma_t *)hp->dmai_private;
- attr = &hp->dmai_attr;
- if (attr == NULL) {
- ddi_err(DER_PANIC, rdip, "DMA handle (%p) has NULL attr", hp);
- /*NOTREACHED*/
- }
- immu_flags = dma->dp_sleep_flags;
+ immu_init_inv_wait(&iw, "memrange", B_TRUE);
- immu = immu_dvma_get_immu(rdip, immu_flags);
- if (immu == NULL) {
- /*
- * possible that there is no IOMMU unit for this device
- * - BIOS bugs are one example.
- */
- ddi_err(DER_WARN, rdip, "No IMMU unit found for device");
- return (DDI_DMA_NORESOURCES);
- }
+ immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
+ mrng->mrng_npages, pde_set == B_TRUE ?
+ TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
+
+ return (DDI_SUCCESS);
+}
+
+immu_devi_t *
+immu_devi_get(dev_info_t *rdip)
+{
+ immu_devi_t *immu_devi;
+ volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
+
+ /* Just want atomic reads. No need for lock */
+ immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
+ 0);
+ return (immu_devi);
+}
+
+/*ARGSUSED*/
+int
+immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
+{
+ immu_hdl_priv_t *ihp;
+
+ ihp = buf;
+ immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
+
+ return (0);
+}
+
+/*
+ * iommulib interface functions
+ */
+static int
+immu_probe(iommulib_handle_t handle, dev_info_t *dip)
+{
+ immu_devi_t *immu_devi;
+ int ret;
+ if (!immu_enable)
+ return (DDI_FAILURE);
/*
- * redirect isa devices attached under lpc to lpc dip
+ * Make sure the device has all the IOMMU structures
+ * initialized. If this device goes through an IOMMU
+ * unit (e.g. this probe function returns success),
+ * this will be called at most N times, with N being
+ * the number of IOMMUs in the system.
+ *
+ * After that, when iommulib_nex_open succeeds,
+ * we can always assume that this device has all
+ * the structures initialized. IOMMU_USED(dip) will
+ * be true. There is no need to find the controlling
+ * IOMMU/domain again.
*/
- if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
- rdip = get_lpc_devinfo(immu, rdip, immu_flags);
- if (rdip == NULL) {
- ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
- /*NOTREACHED*/
- }
- }
+ ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
+ if (ret != DDI_SUCCESS)
+ return (ret);
- /* Reset immu, as redirection can change IMMU */
- immu = NULL;
+ immu_devi = IMMU_DEVI(dip);
/*
- * for gart, redirect to the real graphic devinfo
+ * For unity domains, there is no need to call in to
+ * the IOMMU code.
*/
- if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
- rdip = get_gfx_devinfo(rdip);
- if (rdip == NULL) {
- ddi_err(DER_PANIC, rdip, "IMMU redirect failed");
- /*NOTREACHED*/
+ if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
+ return (DDI_FAILURE);
+
+ if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
+ return (DDI_SUCCESS);
+
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+immu_allochdl(iommulib_handle_t handle,
+ dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
+ int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
+{
+ int ret;
+ immu_hdl_priv_t *ihp;
+ immu_t *immu;
+
+ ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
+ arg, dma_handlep);
+ if (ret == DDI_SUCCESS) {
+ immu = IMMU_DEVI(rdip)->imd_immu;
+
+ ihp = kmem_cache_alloc(immu->immu_hdl_cache,
+ waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
+ if (ihp == NULL) {
+ (void) iommulib_iommu_dma_freehdl(dip, rdip,
+ *dma_handlep);
+ return (DDI_DMA_NORESOURCES);
}
- }
- ddip = NULL;
- domain = device_domain(rdip, &ddip, immu_flags);
- if (domain == NULL || domain->dom_did == 0 || ddip == NULL) {
- ddi_err(DER_MODE, rdip, "Attempt to unmap DVMA for "
- "a device without domain or with an uninitialized "
- "domain");
- return (DDI_DMA_NORESOURCES);
+ if (IMMU_DEVI(rdip)->imd_use_premap)
+ dvma_prealloc(rdip, ihp, attr);
+ else {
+ ihp->ihp_npremapped = 0;
+ ihp->ihp_predvma = 0;
+ }
+ ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
+ ihp);
}
+ return (ret);
+}
- /*
- * immu must be set in the domain.
- */
- immu = domain->dom_immu;
- ASSERT(immu);
- if (domain->dom_did == IMMU_UNITY_DID) {
- ASSERT(domain == immu->immu_unity_domain);
- /*
- * domain is unity, nothing to do here, let the rootnex
- * code free the cookies.
- */
- return (DDI_DMA_USE_PHYSICAL);
+/*ARGSUSED*/
+static int
+immu_freehdl(iommulib_handle_t handle,
+ dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
+{
+ immu_hdl_priv_t *ihp;
+
+ ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
+ if (ihp != NULL) {
+ if (IMMU_DEVI(rdip)->imd_use_premap)
+ dvma_prefree(rdip, ihp);
+ kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
}
- dma = hp->dmai_private;
- if (dma == NULL) {
- ddi_err(DER_PANIC, rdip, "DVMA unmap: DMA handle (%p) has "
- "no private dma structure", hp);
- /*NOTREACHED*/
+ return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
+}
+
+
+/*ARGSUSED*/
+static int
+immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
+ uint_t *ccountp)
+{
+ int ret;
+ immu_hdl_priv_t *ihp;
+
+ ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
+ dma_req, cookiep, ccountp);
+
+ if (ret == DDI_DMA_MAPPED) {
+ ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
+ immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
}
- cookie_free(dma, immu, domain, rdip);
+ return (ret);
+}
- /* No invalidation needed for unmap */
- immu_regs_wbf_flush(immu);
+/*ARGSUSED*/
+static int
+immu_unbindhdl(iommulib_handle_t handle,
+ dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
+{
+ return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
+}
- return (DDI_SUCCESS);
+/*ARGSUSED*/
+static int
+immu_sync(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
+ size_t len, uint_t cachefl)
+{
+ return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
+ cachefl));
}
-immu_devi_t *
-immu_devi_get(dev_info_t *rdip)
+/*ARGSUSED*/
+static int
+immu_win(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
+ off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
+ uint_t *ccountp)
{
- immu_devi_t *immu_devi;
- volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
+ return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
+ lenp, cookiep, ccountp));
+}
- /* Just want atomic reads. No need for lock */
- immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
- 0);
- return (immu_devi);
+/*ARGSUSED*/
+static int
+immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
+{
+ immu_hdl_priv_t *ihp;
+
+ ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
+
+ return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
+}
+
+/*ARGSUSED*/
+static int
+immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
+{
+ immu_hdl_priv_t *ihp;
+
+ ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
+ if (ihp->ihp_npremapped > 0)
+ return (DDI_SUCCESS);
+ return (immu_unmap_dvmaseg(rdip, dmao));
+}
+
+/*ARGSUSED*/
+static int
+immu_map(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, struct ddi_dma_req *dmareq,
+ ddi_dma_handle_t *dma_handle)
+{
+ ASSERT(0);
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+immu_mctl(iommulib_handle_t handle, dev_info_t *dip,
+ dev_info_t *rdip, ddi_dma_handle_t dma_handle,
+ enum ddi_dma_ctlops request, off_t *offp, size_t *lenp,
+ caddr_t *objpp, uint_t cachefl)
+{
+ ASSERT(0);
+ return (DDI_FAILURE);
}
diff --git a/usr/src/uts/i86pc/io/immu_intrmap.c b/usr/src/uts/i86pc/io/immu_intrmap.c
index 8c98573bc0..ab9f9bcbe7 100644
--- a/usr/src/uts/i86pc/io/immu_intrmap.c
+++ b/usr/src/uts/i86pc/io/immu_intrmap.c
@@ -38,6 +38,7 @@
typedef struct intrmap_private {
immu_t *ir_immu;
+ immu_inv_wait_t ir_inv_wait;
uint16_t ir_idx;
uint32_t ir_sid_svt_sq;
} intrmap_private_t;
@@ -573,6 +574,7 @@ immu_intr_handler(immu_t *immu)
(sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
immu_dvma_faults[MIN(fault_reason,
DVMA_MAX_FAULTS)]);
+ immu_print_fault_info(sid, pg_addr);
} else if (fault_reason < 0x27) {
/* intr-remapping fault */
ddi_err(DER_WARN, idip,
@@ -666,6 +668,7 @@ immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
{
immu_t *immu;
intrmap_t *intrmap;
+ immu_inv_wait_t *iwp;
uint32_t idx, i;
uint32_t sid_svt_sq;
intrmap_private_t *intrmap_private;
@@ -702,10 +705,12 @@ immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
sid_svt_sq = intrmap_private->ir_sid_svt_sq =
get_sid(dip, type, ioapic_index);
+ iwp = &intrmap_private->ir_inv_wait;
+ immu_init_inv_wait(iwp, "intrmaplocal", B_TRUE);
if (count == 1) {
if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
- immu_qinv_intr_one_cache(immu, idx);
+ immu_qinv_intr_one_cache(immu, idx, iwp);
} else {
immu_regs_wbf_flush(immu);
}
@@ -723,7 +728,7 @@ immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
}
if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
- immu_qinv_intr_caches(immu, idx, count);
+ immu_qinv_intr_caches(immu, idx, count, iwp);
} else {
immu_regs_wbf_flush(immu);
}
@@ -742,6 +747,7 @@ immu_intrmap_map(void *intrmap_private, void *intrmap_data, uint16_t type,
int count)
{
immu_t *immu;
+ immu_inv_wait_t *iwp;
intrmap_t *intrmap;
ioapic_rdt_t *irdt = (ioapic_rdt_t *)intrmap_data;
msi_regs_t *mregs = (msi_regs_t *)intrmap_data;
@@ -755,6 +761,7 @@ immu_intrmap_map(void *intrmap_private, void *intrmap_data, uint16_t type,
idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
immu = INTRMAP_PRIVATE(intrmap_private)->ir_immu;
+ iwp = &INTRMAP_PRIVATE(intrmap_private)->ir_inv_wait;
intrmap = immu->immu_intrmap;
sid_svt_sq = INTRMAP_PRIVATE(intrmap_private)->ir_sid_svt_sq;
@@ -795,7 +802,7 @@ immu_intrmap_map(void *intrmap_private, void *intrmap_data, uint16_t type,
idx * INTRMAP_RTE_SIZE,
INTRMAP_RTE_SIZE);
- immu_qinv_intr_one_cache(immu, idx);
+ immu_qinv_intr_one_cache(immu, idx, iwp);
} else {
for (i = 0; i < count; i++) {
@@ -810,7 +817,7 @@ immu_intrmap_map(void *intrmap_private, void *intrmap_data, uint16_t type,
idx++;
}
- immu_qinv_intr_caches(immu, idx, count);
+ immu_qinv_intr_caches(immu, idx, count, iwp);
}
}
@@ -819,6 +826,7 @@ static void
immu_intrmap_free(void **intrmap_privatep)
{
immu_t *immu;
+ immu_inv_wait_t *iwp;
intrmap_t *intrmap;
uint32_t idx;
@@ -828,13 +836,14 @@ immu_intrmap_free(void **intrmap_privatep)
}
immu = INTRMAP_PRIVATE(*intrmap_privatep)->ir_immu;
+ iwp = &INTRMAP_PRIVATE(*intrmap_privatep)->ir_inv_wait;
intrmap = immu->immu_intrmap;
idx = INTRMAP_PRIVATE(*intrmap_privatep)->ir_idx;
bzero(intrmap->intrmap_vaddr + idx * INTRMAP_RTE_SIZE,
INTRMAP_RTE_SIZE);
- immu_qinv_intr_one_cache(immu, idx);
+ immu_qinv_intr_one_cache(immu, idx, iwp);
mutex_enter(&intrmap->intrmap_lock);
bitset_del(&intrmap->intrmap_map, idx);
@@ -928,6 +937,8 @@ immu_intrmap_setup(list_t *listp)
mutex_init(&(immu->immu_intrmap_lock), NULL,
MUTEX_DEFAULT, NULL);
mutex_enter(&(immu->immu_intrmap_lock));
+ immu_init_inv_wait(&immu->immu_intrmap_inv_wait,
+ "intrmapglobal", B_TRUE);
immu->immu_intrmap_setup = B_TRUE;
mutex_exit(&(immu->immu_intrmap_lock));
}
diff --git a/usr/src/uts/i86pc/io/immu_qinv.c b/usr/src/uts/i86pc/io/immu_qinv.c
index a3384a7340..de25729482 100644
--- a/usr/src/uts/i86pc/io/immu_qinv.c
+++ b/usr/src/uts/i86pc/io/immu_qinv.c
@@ -32,6 +32,7 @@
#include <sys/archsystm.h>
#include <vm/hat_i86.h>
#include <sys/types.h>
+#include <sys/cpu.h>
#include <sys/sysmacros.h>
#include <sys/immu.h>
@@ -44,10 +45,6 @@
/* status data size of invalidation wait descriptor */
#define QINV_SYNC_DATA_SIZE 0x4
-/* status data value of invalidation wait descriptor */
-#define QINV_SYNC_DATA_FENCE 1
-#define QINV_SYNC_DATA_UNFENCE 2
-
/* invalidation queue head and tail */
#define QINV_IQA_HEAD(QH) BITX((QH), 18, 4)
#define QINV_IQA_TAIL_SHIFT 4
@@ -58,38 +55,6 @@ typedef struct qinv_inv_dsc {
uint64_t hi;
} qinv_dsc_t;
-/*
- * struct iotlb_cache_node
- * the pending data for iotlb flush
- */
-typedef struct iotlb_pend_node {
- dvcookie_t *icn_dvcookies; /* ptr to dvma cookie array */
- uint_t icn_count; /* valid cookie count */
- uint_t icn_array_size; /* array size */
- list_node_t node;
-} qinv_iotlb_pend_node_t;
-
-/*
- * struct iotlb_cache_head
- * the pending head for the iotlb flush
- */
-typedef struct iotlb_pend_head {
- /* the pending node cache list */
- kmutex_t ich_mem_lock;
- list_t ich_mem_list;
-} qinv_iotlb_pend_head_t;
-
-/*
- * qinv_iotlb_t
- * pending data for qiueued invalidation iotlb flush
- */
-typedef struct qinv_iotlb {
- dvcookie_t *qinv_iotlb_dvcookies;
- uint_t qinv_iotlb_count;
- uint_t qinv_iotlb_size;
- list_node_t qinv_iotlb_node;
-} qinv_iotlb_t;
-
/* physical contigous pages for invalidation queue */
typedef struct qinv_mem {
kmutex_t qinv_mem_lock;
@@ -111,22 +76,22 @@ typedef struct qinv_mem {
*
* qinv_table - invalidation queue table
* qinv_sync - sync status memory for invalidation wait descriptor
- * qinv_iotlb_pend_node - pending iotlb node
*/
typedef struct qinv {
qinv_mem_t qinv_table;
qinv_mem_t qinv_sync;
- qinv_iotlb_pend_head_t qinv_pend_head;
- qinv_iotlb_pend_node_t **qinv_iotlb_pend_node;
} qinv_t;
+static void immu_qinv_inv_wait(immu_inv_wait_t *iwp);
+
static struct immu_flushops immu_qinv_flushops = {
immu_qinv_context_fsi,
immu_qinv_context_dsi,
immu_qinv_context_gbl,
immu_qinv_iotlb_psi,
immu_qinv_iotlb_dsi,
- immu_qinv_iotlb_gbl
+ immu_qinv_iotlb_gbl,
+ immu_qinv_inv_wait
};
/* helper macro for making queue invalidation descriptor */
@@ -200,13 +165,8 @@ static void qinv_iotlb_common(immu_t *immu, uint_t domain_id,
uint64_t addr, uint_t am, uint_t hint, tlb_inv_g_t type);
static void qinv_iec_common(immu_t *immu, uint_t iidx,
uint_t im, uint_t g);
-static uint_t qinv_alloc_sync_mem_entry(immu_t *immu);
-static void qinv_wait_async_unfence(immu_t *immu,
- qinv_iotlb_pend_node_t *node);
-static void qinv_wait_sync(immu_t *immu);
-static int qinv_wait_async_finish(immu_t *immu, int *count);
-/*LINTED*/
-static void qinv_wait_async_fence(immu_t *immu);
+static void immu_qinv_inv_wait(immu_inv_wait_t *iwp);
+static void qinv_wait_sync(immu_t *immu, immu_inv_wait_t *iwp);
/*LINTED*/
static void qinv_dev_iotlb_common(immu_t *immu, uint16_t sid,
uint64_t addr, uint_t size, uint_t max_invs_pd);
@@ -219,6 +179,9 @@ qinv_submit_inv_dsc(immu_t *immu, qinv_dsc_t *dsc)
qinv_t *qinv;
qinv_mem_t *qinv_table;
uint_t tail;
+#ifdef DEBUG
+ uint_t count = 0;
+#endif
qinv = (qinv_t *)immu->immu_qinv;
qinv_table = &(qinv->qinv_table);
@@ -231,6 +194,9 @@ qinv_submit_inv_dsc(immu_t *immu, qinv_dsc_t *dsc)
qinv_table->qinv_mem_tail = 0;
while (qinv_table->qinv_mem_head == qinv_table->qinv_mem_tail) {
+#ifdef DEBUG
+ count++;
+#endif
/*
* inv queue table exhausted, wait hardware to fetch
* next descriptor
@@ -239,6 +205,9 @@ qinv_submit_inv_dsc(immu_t *immu, qinv_dsc_t *dsc)
immu_regs_get64(immu, IMMU_REG_INVAL_QH));
}
+ IMMU_DPROBE3(immu__qinv__sub, uint64_t, dsc->lo, uint64_t, dsc->hi,
+ uint_t, count);
+
bcopy(dsc, qinv_table->qinv_mem_vaddr + tail * QINV_ENTRY_SIZE,
QINV_ENTRY_SIZE);
@@ -331,162 +300,71 @@ qinv_iec_common(immu_t *immu, uint_t iidx, uint_t im, uint_t g)
}
/*
- * alloc free entry from sync status table
- */
-static uint_t
-qinv_alloc_sync_mem_entry(immu_t *immu)
-{
- qinv_mem_t *sync_mem;
- uint_t tail;
- qinv_t *qinv;
-
- qinv = (qinv_t *)immu->immu_qinv;
- sync_mem = &qinv->qinv_sync;
-
-sync_mem_exhausted:
- mutex_enter(&sync_mem->qinv_mem_lock);
- tail = sync_mem->qinv_mem_tail;
- sync_mem->qinv_mem_tail++;
- if (sync_mem->qinv_mem_tail == sync_mem->qinv_mem_size)
- sync_mem->qinv_mem_tail = 0;
-
- if (sync_mem->qinv_mem_head == sync_mem->qinv_mem_tail) {
- /* should never happen */
- ddi_err(DER_WARN, NULL, "sync mem exhausted");
- sync_mem->qinv_mem_tail = tail;
- mutex_exit(&sync_mem->qinv_mem_lock);
- delay(IMMU_ALLOC_RESOURCE_DELAY);
- goto sync_mem_exhausted;
- }
- mutex_exit(&sync_mem->qinv_mem_lock);
-
- return (tail);
-}
-
-/*
- * queued invalidation interface -- invalidation wait descriptor
- * fence flag not set, need status data to indicate the invalidation
- * wait descriptor completion
- */
-static void
-qinv_wait_async_unfence(immu_t *immu, qinv_iotlb_pend_node_t *node)
-{
- qinv_dsc_t dsc;
- qinv_mem_t *sync_mem;
- uint64_t saddr;
- uint_t tail;
- qinv_t *qinv;
-
- qinv = (qinv_t *)immu->immu_qinv;
- sync_mem = &qinv->qinv_sync;
- tail = qinv_alloc_sync_mem_entry(immu);
-
- /* plant an iotlb pending node */
- qinv->qinv_iotlb_pend_node[tail] = node;
-
- saddr = sync_mem->qinv_mem_paddr + tail * QINV_SYNC_DATA_SIZE;
-
- /*
- * sdata = QINV_SYNC_DATA_UNFENCE, fence = 0, sw = 1, if = 0
- * indicate the invalidation wait descriptor completion by
- * performing a coherent DWORD write to the status address,
- * not by generating an invalidation completion event
- */
- dsc.lo = INV_WAIT_DSC_LOW(QINV_SYNC_DATA_UNFENCE, 0, 1, 0);
- dsc.hi = INV_WAIT_DSC_HIGH(saddr);
-
- qinv_submit_inv_dsc(immu, &dsc);
-}
-
-/*
- * queued invalidation interface -- invalidation wait descriptor
- * fence flag set, indicate descriptors following the invalidation
- * wait descriptor must be processed by hardware only after the
- * invalidation wait descriptor completes.
- */
-static void
-qinv_wait_async_fence(immu_t *immu)
-{
- qinv_dsc_t dsc;
-
- /* sw = 0, fence = 1, iflag = 0 */
- dsc.lo = INV_WAIT_DSC_LOW(0, 1, 0, 0);
- dsc.hi = 0;
- qinv_submit_inv_dsc(immu, &dsc);
-}
-
-/*
* queued invalidation interface -- invalidation wait descriptor
* wait until the invalidation request finished
*/
static void
-qinv_wait_sync(immu_t *immu)
+qinv_wait_sync(immu_t *immu, immu_inv_wait_t *iwp)
{
qinv_dsc_t dsc;
- qinv_mem_t *sync_mem;
- uint64_t saddr;
- uint_t tail;
- qinv_t *qinv;
volatile uint32_t *status;
+ uint64_t paddr;
+#ifdef DEBUG
+ uint_t count;
+#endif
- qinv = (qinv_t *)immu->immu_qinv;
- sync_mem = &qinv->qinv_sync;
- tail = qinv_alloc_sync_mem_entry(immu);
- saddr = sync_mem->qinv_mem_paddr + tail * QINV_SYNC_DATA_SIZE;
- status = (uint32_t *)(sync_mem->qinv_mem_vaddr + tail *
- QINV_SYNC_DATA_SIZE);
+ status = &iwp->iwp_vstatus;
+ paddr = iwp->iwp_pstatus;
+
+ *status = IMMU_INV_DATA_PENDING;
+ membar_producer();
/*
- * sdata = QINV_SYNC_DATA_FENCE, fence = 1, sw = 1, if = 0
+ * sdata = IMMU_INV_DATA_DONE, fence = 1, sw = 1, if = 0
* indicate the invalidation wait descriptor completion by
* performing a coherent DWORD write to the status address,
* not by generating an invalidation completion event
*/
- dsc.lo = INV_WAIT_DSC_LOW(QINV_SYNC_DATA_FENCE, 1, 1, 0);
- dsc.hi = INV_WAIT_DSC_HIGH(saddr);
+ dsc.lo = INV_WAIT_DSC_LOW(IMMU_INV_DATA_DONE, 1, 1, 0);
+ dsc.hi = INV_WAIT_DSC_HIGH(paddr);
qinv_submit_inv_dsc(immu, &dsc);
- while ((*status) != QINV_SYNC_DATA_FENCE)
- iommu_cpu_nop();
- *status = QINV_SYNC_DATA_UNFENCE;
+ if (iwp->iwp_sync) {
+#ifdef DEBUG
+ count = 0;
+ while (*status != IMMU_INV_DATA_DONE) {
+ count++;
+ ht_pause();
+ }
+ DTRACE_PROBE2(immu__wait__sync, const char *, iwp->iwp_name,
+ uint_t, count);
+#else
+ while (*status != IMMU_INV_DATA_DONE)
+ ht_pause();
+#endif
+ }
}
-/* get already completed invalidation wait requests */
-static int
-qinv_wait_async_finish(immu_t *immu, int *cnt)
+static void
+immu_qinv_inv_wait(immu_inv_wait_t *iwp)
{
- qinv_mem_t *sync_mem;
- int index;
- qinv_t *qinv;
- volatile uint32_t *value;
-
- ASSERT((*cnt) == 0);
-
- qinv = (qinv_t *)immu->immu_qinv;
- sync_mem = &qinv->qinv_sync;
-
- mutex_enter(&sync_mem->qinv_mem_lock);
- index = sync_mem->qinv_mem_head;
- value = (uint32_t *)(sync_mem->qinv_mem_vaddr + index
- * QINV_SYNC_DATA_SIZE);
- while (*value == QINV_SYNC_DATA_UNFENCE) {
- *value = 0;
- (*cnt)++;
- sync_mem->qinv_mem_head++;
- if (sync_mem->qinv_mem_head == sync_mem->qinv_mem_size) {
- sync_mem->qinv_mem_head = 0;
- value = (uint32_t *)(sync_mem->qinv_mem_vaddr);
- } else
- value = (uint32_t *)((char *)value +
- QINV_SYNC_DATA_SIZE);
+ volatile uint32_t *status = &iwp->iwp_vstatus;
+#ifdef DEBUG
+ uint_t count;
+
+ count = 0;
+ while (*status != IMMU_INV_DATA_DONE) {
+ count++;
+ ht_pause();
}
+ DTRACE_PROBE2(immu__wait__async, const char *, iwp->iwp_name,
+ uint_t, count);
+#else
- mutex_exit(&sync_mem->qinv_mem_lock);
- if ((*cnt) > 0)
- return (index);
- else
- return (-1);
+ while (*status != IMMU_INV_DATA_DONE)
+ ht_pause();
+#endif
}
/*
@@ -608,15 +486,6 @@ qinv_setup(immu_t *immu)
mutex_init(&(qinv->qinv_table.qinv_mem_lock), NULL, MUTEX_DRIVER, NULL);
mutex_init(&(qinv->qinv_sync.qinv_mem_lock), NULL, MUTEX_DRIVER, NULL);
- /*
- * init iotlb pend node for submitting invalidation iotlb
- * queue request
- */
- qinv->qinv_iotlb_pend_node = (qinv_iotlb_pend_node_t **)
- kmem_zalloc(qinv->qinv_sync.qinv_mem_size
- * sizeof (qinv_iotlb_pend_node_t *), KM_SLEEP);
-
- /* set invalidation queue structure */
immu->immu_qinv = qinv;
mutex_exit(&(immu->immu_qinv_lock));
@@ -698,11 +567,11 @@ immu_qinv_startup(immu_t *immu)
*/
void
immu_qinv_context_fsi(immu_t *immu, uint8_t function_mask,
- uint16_t source_id, uint_t domain_id)
+ uint16_t source_id, uint_t domain_id, immu_inv_wait_t *iwp)
{
qinv_context_common(immu, function_mask, source_id,
domain_id, CTT_INV_G_DEVICE);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
/*
@@ -710,10 +579,10 @@ immu_qinv_context_fsi(immu_t *immu, uint8_t function_mask,
* domain based context cache invalidation
*/
void
-immu_qinv_context_dsi(immu_t *immu, uint_t domain_id)
+immu_qinv_context_dsi(immu_t *immu, uint_t domain_id, immu_inv_wait_t *iwp)
{
qinv_context_common(immu, 0, 0, domain_id, CTT_INV_G_DOMAIN);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
/*
@@ -721,10 +590,10 @@ immu_qinv_context_dsi(immu_t *immu, uint_t domain_id)
* invalidation global context cache
*/
void
-immu_qinv_context_gbl(immu_t *immu)
+immu_qinv_context_gbl(immu_t *immu, immu_inv_wait_t *iwp)
{
qinv_context_common(immu, 0, 0, 0, CTT_INV_G_GLOBAL);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
/*
@@ -733,7 +602,7 @@ immu_qinv_context_gbl(immu_t *immu)
*/
void
immu_qinv_iotlb_psi(immu_t *immu, uint_t domain_id,
- uint64_t dvma, uint_t count, uint_t hint)
+ uint64_t dvma, uint_t count, uint_t hint, immu_inv_wait_t *iwp)
{
uint_t am = 0;
uint_t max_am;
@@ -761,6 +630,8 @@ immu_qinv_iotlb_psi(immu_t *immu, uint_t domain_id,
qinv_iotlb_common(immu, domain_id, dvma,
0, hint, TLB_INV_G_DOMAIN);
}
+
+ qinv_wait_sync(immu, iwp);
}
/*
@@ -768,10 +639,10 @@ immu_qinv_iotlb_psi(immu_t *immu, uint_t domain_id,
* domain based iotlb invalidation
*/
void
-immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id)
+immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id, immu_inv_wait_t *iwp)
{
qinv_iotlb_common(immu, domain_id, 0, 0, 0, TLB_INV_G_DOMAIN);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
/*
@@ -779,97 +650,32 @@ immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id)
* global iotlb invalidation
*/
void
-immu_qinv_iotlb_gbl(immu_t *immu)
+immu_qinv_iotlb_gbl(immu_t *immu, immu_inv_wait_t *iwp)
{
qinv_iotlb_common(immu, 0, 0, 0, 0, TLB_INV_G_GLOBAL);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
-
-
-/*
- * the plant wait operation for queued invalidation interface
- */
-void
-immu_qinv_plant(immu_t *immu, dvcookie_t *dvcookies,
- uint_t count, uint_t array_size)
-{
- qinv_t *qinv;
- qinv_iotlb_pend_node_t *node = NULL;
- qinv_iotlb_pend_head_t *head;
-
- qinv = (qinv_t *)immu->immu_qinv;
-
- head = &(qinv->qinv_pend_head);
- mutex_enter(&(head->ich_mem_lock));
- node = list_head(&(head->ich_mem_list));
- if (node) {
- list_remove(&(head->ich_mem_list), node);
- }
- mutex_exit(&(head->ich_mem_lock));
-
- /* no cache, alloc one */
- if (node == NULL) {
- node = kmem_zalloc(sizeof (qinv_iotlb_pend_node_t), KM_SLEEP);
- }
- node->icn_dvcookies = dvcookies;
- node->icn_count = count;
- node->icn_array_size = array_size;
-
- /* plant an invalidation wait descriptor, not wait its completion */
- qinv_wait_async_unfence(immu, node);
-}
-
-/*
- * the reap wait operation for queued invalidation interface
- */
-void
-immu_qinv_reap(immu_t *immu)
-{
- int index, cnt = 0;
- qinv_iotlb_pend_node_t *node;
- qinv_iotlb_pend_head_t *head;
- qinv_t *qinv;
-
- qinv = (qinv_t *)immu->immu_qinv;
- head = &(qinv->qinv_pend_head);
-
- index = qinv_wait_async_finish(immu, &cnt);
-
- while (cnt--) {
- node = qinv->qinv_iotlb_pend_node[index];
- if (node == NULL)
- continue;
- mutex_enter(&(head->ich_mem_lock));
- list_insert_head(&(head->ich_mem_list), node);
- mutex_exit(&(head->ich_mem_lock));
- qinv->qinv_iotlb_pend_node[index] = NULL;
- index++;
- if (index == qinv->qinv_sync.qinv_mem_size)
- index = 0;
- }
-}
-
-
/* queued invalidation interface -- global invalidate interrupt entry cache */
void
-immu_qinv_intr_global(immu_t *immu)
+immu_qinv_intr_global(immu_t *immu, immu_inv_wait_t *iwp)
{
qinv_iec_common(immu, 0, 0, IEC_INV_GLOBAL);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
/* queued invalidation interface -- invalidate single interrupt entry cache */
void
-immu_qinv_intr_one_cache(immu_t *immu, uint_t iidx)
+immu_qinv_intr_one_cache(immu_t *immu, uint_t iidx, immu_inv_wait_t *iwp)
{
qinv_iec_common(immu, iidx, 0, IEC_INV_INDEX);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
/* queued invalidation interface -- invalidate interrupt entry caches */
void
-immu_qinv_intr_caches(immu_t *immu, uint_t iidx, uint_t cnt)
+immu_qinv_intr_caches(immu_t *immu, uint_t iidx, uint_t cnt,
+ immu_inv_wait_t *iwp)
{
uint_t i, mask = 0;
@@ -880,7 +686,7 @@ immu_qinv_intr_caches(immu_t *immu, uint_t iidx, uint_t cnt)
for (i = 0; i < cnt; i++) {
qinv_iec_common(immu, iidx + cnt, 0, IEC_INV_INDEX);
}
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
return;
}
@@ -892,13 +698,13 @@ immu_qinv_intr_caches(immu_t *immu, uint_t iidx, uint_t cnt)
for (i = 0; i < cnt; i++) {
qinv_iec_common(immu, iidx + cnt, 0, IEC_INV_INDEX);
}
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
return;
}
qinv_iec_common(immu, iidx, mask, IEC_INV_INDEX);
- qinv_wait_sync(immu);
+ qinv_wait_sync(immu, iwp);
}
void
diff --git a/usr/src/uts/i86pc/io/immu_regs.c b/usr/src/uts/i86pc/io/immu_regs.c
index 97d56a3776..dc43b0f49a 100644
--- a/usr/src/uts/i86pc/io/immu_regs.c
+++ b/usr/src/uts/i86pc/io/immu_regs.c
@@ -33,6 +33,7 @@
#include <sys/spl.h>
#include <sys/sysmacros.h>
#include <sys/immu.h>
+#include <sys/cpu.h>
#define get_reg32(immu, offset) ddi_get32((immu)->immu_regs_handle, \
(uint32_t *)(immu->immu_regs_addr + (offset)))
@@ -45,13 +46,16 @@
((immu)->immu_regs_handle, \
(uint64_t *)(immu->immu_regs_addr + (offset)), val)
+static void immu_regs_inv_wait(immu_inv_wait_t *iwp);
+
struct immu_flushops immu_regs_flushops = {
immu_regs_context_fsi,
immu_regs_context_dsi,
immu_regs_context_gbl,
immu_regs_iotlb_psi,
immu_regs_iotlb_dsi,
- immu_regs_iotlb_gbl
+ immu_regs_iotlb_gbl,
+ immu_regs_inv_wait
};
/*
@@ -74,7 +78,7 @@ struct immu_flushops immu_regs_flushops = {
"immu wait completion time out"); \
/*NOTREACHED*/ \
} else { \
- iommu_cpu_nop();\
+ ht_pause();\
}\
}\
}
@@ -118,9 +122,6 @@ iotlb_flush(immu_t *immu, uint_t domain_id,
*/
switch (type) {
case IOTLB_PSI:
- ASSERT(IMMU_CAP_GET_PSI(immu->immu_regs_cap));
- ASSERT(am <= IMMU_CAP_GET_MAMV(immu->immu_regs_cap));
- ASSERT(!(addr & IMMU_PAGEOFFSET));
command |= TLB_INV_PAGE | TLB_INV_IVT |
TLB_INV_DID(domain_id);
iva = addr | am | TLB_IVA_HINT(hint);
@@ -149,9 +150,10 @@ iotlb_flush(immu_t *immu, uint_t domain_id,
* immu_regs_iotlb_psi()
* iotlb page specific invalidation
*/
+/*ARGSUSED*/
void
immu_regs_iotlb_psi(immu_t *immu, uint_t did, uint64_t dvma, uint_t snpages,
- uint_t hint)
+ uint_t hint, immu_inv_wait_t *iwp)
{
int dvma_am;
int npg_am;
@@ -163,12 +165,10 @@ immu_regs_iotlb_psi(immu_t *immu, uint_t did, uint64_t dvma, uint_t snpages,
int i;
if (!IMMU_CAP_GET_PSI(immu->immu_regs_cap)) {
- immu_regs_iotlb_dsi(immu, did);
+ immu_regs_iotlb_dsi(immu, did, iwp);
return;
}
- ASSERT(dvma % IMMU_PAGESIZE == 0);
-
max_am = IMMU_CAP_GET_MAMV(immu->immu_regs_cap);
mutex_enter(&(immu->immu_regs_lock));
@@ -210,8 +210,9 @@ immu_regs_iotlb_psi(immu_t *immu, uint_t did, uint64_t dvma, uint_t snpages,
* immu_regs_iotlb_dsi()
* domain specific invalidation
*/
+/*ARGSUSED*/
void
-immu_regs_iotlb_dsi(immu_t *immu, uint_t domain_id)
+immu_regs_iotlb_dsi(immu_t *immu, uint_t domain_id, immu_inv_wait_t *iwp)
{
mutex_enter(&(immu->immu_regs_lock));
iotlb_flush(immu, domain_id, 0, 0, 0, IOTLB_DSI);
@@ -222,8 +223,9 @@ immu_regs_iotlb_dsi(immu_t *immu, uint_t domain_id)
* immu_regs_iotlb_gbl()
* global iotlb invalidation
*/
+/*ARGSUSED*/
void
-immu_regs_iotlb_gbl(immu_t *immu)
+immu_regs_iotlb_gbl(immu_t *immu, immu_inv_wait_t *iwp)
{
mutex_enter(&(immu->immu_regs_lock));
iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL);
@@ -345,9 +347,6 @@ setup_regs(immu_t *immu)
{
int error;
- ASSERT(immu);
- ASSERT(immu->immu_name);
-
/*
* This lock may be acquired by the IOMMU interrupt handler
*/
@@ -382,7 +381,7 @@ setup_regs(immu_t *immu)
immu->immu_dvma_coherent = B_TRUE;
} else {
immu->immu_dvma_coherent = B_FALSE;
- if (!(x86_feature & X86_CLFSH)) {
+ if (!is_x86_feature(x86_featureset, X86FSET_CLFSH)) {
ddi_err(DER_WARN, NULL,
"immu unit %s can't be enabled due to "
"missing clflush functionality", immu->immu_name);
@@ -396,6 +395,11 @@ setup_regs(immu_t *immu)
immu->immu_SNP_reserved = immu_regs_is_SNP_reserved(immu);
immu->immu_TM_reserved = immu_regs_is_TM_reserved(immu);
+ if (IMMU_ECAP_GET_CH(immu->immu_regs_excap) && immu_use_tm)
+ immu->immu_ptemask = PDTE_MASK_TM;
+ else
+ immu->immu_ptemask = 0;
+
/*
* Check for Mobile 4 series chipset
*/
@@ -405,7 +409,6 @@ setup_regs(immu_t *immu)
"IMMU: Mobile 4 chipset quirk detected. "
"Force-setting RWBF");
IMMU_CAP_SET_RWBF(immu->immu_regs_cap);
- ASSERT(IMMU_CAP_GET_RWBF(immu->immu_regs_cap));
}
/*
@@ -514,10 +517,6 @@ immu_regs_startup(immu_t *immu)
return;
}
- ASSERT(immu->immu_regs_running == B_FALSE);
-
- ASSERT(MUTEX_HELD(&(immu->immu_lock)));
-
mutex_enter(&(immu->immu_regs_lock));
put_reg32(immu, IMMU_REG_GLOBAL_CMD,
immu->immu_regs_cmdval | IMMU_GCMD_TE);
@@ -527,7 +526,7 @@ immu_regs_startup(immu_t *immu)
immu->immu_regs_running = B_TRUE;
mutex_exit(&(immu->immu_regs_lock));
- ddi_err(DER_NOTE, NULL, "IMMU %s running", immu->immu_name);
+ ddi_err(DER_NOTE, NULL, "%s running", immu->immu_name);
}
/*
@@ -543,10 +542,6 @@ immu_regs_shutdown(immu_t *immu)
return;
}
- ASSERT(immu->immu_regs_setup == B_TRUE);
-
- ASSERT(MUTEX_HELD(&(immu->immu_lock)));
-
mutex_enter(&(immu->immu_regs_lock));
immu->immu_regs_cmdval &= ~IMMU_GCMD_TE;
put_reg32(immu, IMMU_REG_GLOBAL_CMD,
@@ -649,15 +644,17 @@ immu_regs_wbf_flush(immu_t *immu)
void
immu_regs_cpu_flush(immu_t *immu, caddr_t addr, uint_t size)
{
- uint64_t i;
-
- ASSERT(immu);
+ uintptr_t startline, endline;
if (immu->immu_dvma_coherent == B_TRUE)
return;
- for (i = 0; i < size; i += x86_clflush_size, addr += x86_clflush_size) {
- clflush_insn(addr);
+ startline = (uintptr_t)addr & ~(uintptr_t)(x86_clflush_size - 1);
+ endline = ((uintptr_t)addr + size - 1) &
+ ~(uintptr_t)(x86_clflush_size - 1);
+ while (startline <= endline) {
+ clflush_insn((caddr_t)startline);
+ startline += x86_clflush_size;
}
mfence_insn();
@@ -674,9 +671,6 @@ context_flush(immu_t *immu, uint8_t function_mask,
uint64_t command = 0;
uint64_t status;
- ASSERT(immu);
- ASSERT(rw_write_held(&(immu->immu_ctx_rwlock)));
-
/*
* define the command
*/
@@ -687,15 +681,10 @@ context_flush(immu_t *immu, uint8_t function_mask,
| CCMD_INV_SID(sid) | CCMD_INV_FM(function_mask);
break;
case CONTEXT_DSI:
- ASSERT(function_mask == 0);
- ASSERT(sid == 0);
command |= CCMD_INV_ICC | CCMD_INV_DOMAIN
| CCMD_INV_DID(did);
break;
case CONTEXT_GLOBAL:
- ASSERT(function_mask == 0);
- ASSERT(sid == 0);
- ASSERT(did == 0);
command |= CCMD_INV_ICC | CCMD_INV_GLOBAL;
break;
default:
@@ -706,32 +695,43 @@ context_flush(immu_t *immu, uint8_t function_mask,
}
mutex_enter(&(immu->immu_regs_lock));
- ASSERT(!(get_reg64(immu, IMMU_REG_CONTEXT_CMD) & CCMD_INV_ICC));
put_reg64(immu, IMMU_REG_CONTEXT_CMD, command);
wait_completion(immu, IMMU_REG_CONTEXT_CMD, get_reg64,
(!(status & CCMD_INV_ICC)), status);
mutex_exit(&(immu->immu_regs_lock));
}
+/*ARGSUSED*/
void
immu_regs_context_fsi(immu_t *immu, uint8_t function_mask,
- uint16_t source_id, uint_t domain_id)
+ uint16_t source_id, uint_t domain_id, immu_inv_wait_t *iwp)
{
context_flush(immu, function_mask, source_id, domain_id, CONTEXT_FSI);
}
+/*ARGSUSED*/
void
-immu_regs_context_dsi(immu_t *immu, uint_t domain_id)
+immu_regs_context_dsi(immu_t *immu, uint_t domain_id, immu_inv_wait_t *iwp)
{
context_flush(immu, 0, 0, domain_id, CONTEXT_DSI);
}
+/*ARGSUSED*/
void
-immu_regs_context_gbl(immu_t *immu)
+immu_regs_context_gbl(immu_t *immu, immu_inv_wait_t *iwp)
{
context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL);
}
+/*
+ * Nothing to do, all register operations are synchronous.
+ */
+/*ARGSUSED*/
+static void
+immu_regs_inv_wait(immu_inv_wait_t *iwp)
+{
+}
+
void
immu_regs_set_root_table(immu_t *immu)
{
@@ -797,8 +797,7 @@ immu_regs_intrmap_enable(immu_t *immu, uint64_t irta_reg)
mutex_exit(&(immu->immu_regs_lock));
/* global flush intr entry cache */
- if (immu_qinv_enable == B_TRUE)
- immu_qinv_intr_global(immu);
+ immu_qinv_intr_global(immu, &immu->immu_intrmap_inv_wait);
/* enable interrupt remapping */
mutex_enter(&(immu->immu_regs_lock));
diff --git a/usr/src/uts/i86pc/io/isa.c b/usr/src/uts/i86pc/io/isa.c
index aa5cea74f1..d2bb59ca99 100644
--- a/usr/src/uts/i86pc/io/isa.c
+++ b/usr/src/uts/i86pc/io/isa.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c
index 134a945207..29e371f000 100644
--- a/usr/src/uts/i86pc/io/mp_platform_common.c
+++ b/usr/src/uts/i86pc/io/mp_platform_common.c
@@ -45,6 +45,7 @@
#include <sys/acpica.h>
#include <sys/psm_common.h>
#include <sys/apic.h>
+#include <sys/apic_timer.h>
#include <sys/pit.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
diff --git a/usr/src/uts/i86pc/io/pci/pci_common.c b/usr/src/uts/i86pc/io/pci/pci_common.c
index ad689868bc..1cea07237f 100644
--- a/usr/src/uts/i86pc/io/pci/pci_common.c
+++ b/usr/src/uts/i86pc/io/pci/pci_common.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
diff --git a/usr/src/uts/i86pc/io/pci/pci_kstats.c b/usr/src/uts/i86pc/io/pci/pci_kstats.c
index ea7fcc9dc1..6a8c365c06 100644
--- a/usr/src/uts/i86pc/io/pci/pci_kstats.c
+++ b/usr/src/uts/i86pc/io/pci/pci_kstats.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Kstat support for X86 PCI driver
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c
index 22553d39d3..2a7ccb1080 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c
@@ -75,6 +75,7 @@
#include <sys/reboot.h>
#include <sys/hpet.h>
#include <sys/apic_common.h>
+#include <sys/apic_timer.h>
/*
* Local Function Prototypes
@@ -86,8 +87,6 @@ static void apic_init_intr(void);
*/
static int apic_probe(void);
static int apic_getclkirq(int ipl);
-static uint_t apic_calibrate(volatile uint32_t *addr,
- uint16_t *pit_ticks_adj);
static void apic_init(void);
static void apic_picinit(void);
static int apic_post_cpu_start(void);
@@ -371,7 +370,7 @@ apic_init_intr(void)
if (nlvt >= 5) {
/* Enable performance counter overflow interrupt */
- if ((x86_feature & X86_MSR) != X86_MSR)
+ if (!is_x86_feature(x86_featureset, X86FSET_MSR))
apic_enable_cpcovf_intr = 0;
if (apic_enable_cpcovf_intr) {
if (apic_cpcovf_vect == 0) {
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
index 79d24ed110..0cc45ff4e0 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
@@ -70,6 +70,7 @@
#include <sys/reboot.h>
#include <sys/hpet.h>
#include <sys/apic_common.h>
+#include <sys/apic_timer.h>
static void apic_record_ioapic_rdt(void *intrmap_private,
ioapic_rdt_t *irdt);
@@ -87,17 +88,11 @@ void apic_unset_idlecpu(processorid_t);
void apic_shutdown(int, int);
void apic_preshutdown(int, int);
processorid_t apic_get_next_processorid(processorid_t);
-void apic_timer_reprogram(hrtime_t);
-void apic_timer_enable(void);
-void apic_timer_disable(void);
hrtime_t apic_gettime();
enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
-int apic_oneshot = 0;
-int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
-
/* Now the ones for Dynamic Interrupt distribution */
int apic_enable_dynamic_migration = 0;
@@ -146,9 +141,6 @@ int apic_panic_on_apic_error = 0;
int apic_verbose = 0; /* 0x1ff */
-/* minimum number of timer ticks to program to */
-int apic_min_timer_ticks = 1;
-
#ifdef DEBUG
int apic_debug = 0;
int apic_restrict_vector = 0;
@@ -158,8 +150,6 @@ int apic_debug_msgbufindex = 0;
#endif /* DEBUG */
-uint_t apic_nsec_per_intr = 0;
-
uint_t apic_nticks = 0;
uint_t apic_skipped_redistribute = 0;
@@ -167,11 +157,6 @@ uint_t last_count_read = 0;
lock_t apic_gethrtime_lock;
volatile int apic_hrtime_stamp = 0;
volatile hrtime_t apic_nsec_since_boot = 0;
-uint_t apic_hertz_count;
-
-uint64_t apic_ticks_per_SFnsecs; /* # of ticks in SF nsecs */
-
-static hrtime_t apic_nsec_max;
static hrtime_t apic_last_hrtime = 0;
int apic_hrtime_error = 0;
@@ -1075,7 +1060,7 @@ apic_cpu_remove(psm_cpu_request_t *reqp)
* Return the number of APIC clock ticks elapsed for 8245 to decrement
* (APIC_TIME_COUNT + pit_ticks_adj) ticks.
*/
-static uint_t
+uint_t
apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
{
uint8_t pit_tick_lo;
@@ -1144,46 +1129,7 @@ apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
int
apic_clkinit(int hertz)
{
- uint_t apic_ticks = 0;
- uint_t pit_ticks;
int ret;
- uint16_t pit_ticks_adj;
- static int firsttime = 1;
-
- if (firsttime) {
- /* first time calibrate on CPU0 only */
-
- apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
- apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
- apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
-
- /* total number of PIT ticks corresponding to apic_ticks */
- pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
-
- /*
- * Determine the number of nanoseconds per APIC clock tick
- * and then determine how many APIC ticks to interrupt at the
- * desired frequency
- * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
- * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
- * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
- * pic_ticks_per_SFns =
- * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
- */
- apic_ticks_per_SFnsecs =
- ((SF * apic_ticks * PIT_HZ) /
- ((uint64_t)pit_ticks * NANOSEC));
-
- /* the interval timer initial count is 32 bit max */
- apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
- firsttime = 0;
- }
-
- if (hertz != 0) {
- /* periodic */
- apic_nsec_per_intr = NANOSEC / hertz;
- apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
- }
apic_int_busy_mark = (apic_int_busy_mark *
apic_sample_factor_redistribution) / 100;
@@ -1192,21 +1138,7 @@ apic_clkinit(int hertz)
apic_diff_for_redistribution = (apic_diff_for_redistribution *
apic_sample_factor_redistribution) / 100;
- if (hertz == 0) {
- /* requested one_shot */
- if (!tsc_gethrtime_enable || !apic_oneshot_enable)
- return (0);
- apic_oneshot = 1;
- ret = (int)APIC_TICKS_TO_NSECS(1);
- } else {
- /* program the local APIC to interrupt at the given frequency */
- apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
- apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
- (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
- apic_oneshot = 0;
- ret = NANOSEC / hertz;
- }
-
+ ret = apic_timer_init(hertz);
return (ret);
}
@@ -1419,137 +1351,6 @@ restart_sitka_bmc:
}
-/*
- * This function will reprogram the timer.
- *
- * When in oneshot mode the argument is the absolute time in future to
- * generate the interrupt at.
- *
- * When in periodic mode, the argument is the interval at which the
- * interrupts should be generated. There is no need to support the periodic
- * mode timer change at this time.
- */
-void
-apic_timer_reprogram(hrtime_t time)
-{
- hrtime_t now;
- uint_t ticks;
- int64_t delta;
-
- /*
- * We should be called from high PIL context (CBE_HIGH_PIL),
- * so kpreempt is disabled.
- */
-
- if (!apic_oneshot) {
- /* time is the interval for periodic mode */
- ticks = APIC_NSECS_TO_TICKS(time);
- } else {
- /* one shot mode */
-
- now = gethrtime();
- delta = time - now;
-
- if (delta <= 0) {
- /*
- * requested to generate an interrupt in the past
- * generate an interrupt as soon as possible
- */
- ticks = apic_min_timer_ticks;
- } else if (delta > apic_nsec_max) {
- /*
- * requested to generate an interrupt at a time
- * further than what we are capable of. Set to max
- * the hardware can handle
- */
-
- ticks = APIC_MAXVAL;
-#ifdef DEBUG
- cmn_err(CE_CONT, "apic_timer_reprogram, request at"
- " %lld too far in future, current time"
- " %lld \n", time, now);
-#endif
- } else
- ticks = APIC_NSECS_TO_TICKS(delta);
- }
-
- if (ticks < apic_min_timer_ticks)
- ticks = apic_min_timer_ticks;
-
- apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
-}
-
-/*
- * This function will enable timer interrupts.
- */
-void
-apic_timer_enable(void)
-{
- /*
- * We should be Called from high PIL context (CBE_HIGH_PIL),
- * so kpreempt is disabled.
- */
-
- if (!apic_oneshot) {
- apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
- (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
- } else {
- /* one shot */
- apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
- (apic_clkvect + APIC_BASE_VECT));
- }
-}
-
-/*
- * This function will disable timer interrupts.
- */
-void
-apic_timer_disable(void)
-{
- /*
- * We should be Called from high PIL context (CBE_HIGH_PIL),
- * so kpreempt is disabled.
- */
- apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
- (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
-}
-
-/*
- * Set timer far into the future and return timer
- * current Count in nanoseconds.
- */
-hrtime_t
-apic_timer_stop_count(void)
-{
- hrtime_t ns_val;
- int enable_val, count_val;
-
- /*
- * Should be called with interrupts disabled.
- */
- ASSERT(!interrupts_enabled());
-
- enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
- if ((enable_val & AV_MASK) == AV_MASK)
- return ((hrtime_t)-1); /* timer is disabled */
-
- count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
- ns_val = APIC_TICKS_TO_NSECS(count_val);
-
- apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
-
- return (ns_val);
-}
-
-/*
- * Reprogram timer after Deep C-State.
- */
-void
-apic_timer_restart(hrtime_t time)
-{
- apic_timer_reprogram(time);
-}
-
ddi_periodic_t apic_periodic_id;
/*
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c
index 46f1257ecd..1a1d72fccf 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c
@@ -75,7 +75,10 @@ apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
msi_regs.mr_data = vector;
msi_regs.mr_addr = target_apic_id;
- intrmap_tbl[0] = irq_ptr->airq_intrmap_private;
+ for (i = 0; i < count; i++) {
+ irqno = apic_vector_to_irq[vector + i];
+ intrmap_tbl[i] = apic_irq_table[irqno]->airq_intrmap_private;
+ }
apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
count, 0xff);
for (i = 0; i < count; i++) {
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_timer.c b/usr/src/uts/i86pc/io/pcplusmp/apic_timer.c
new file mode 100644
index 0000000000..ffc1e99f68
--- /dev/null
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic_timer.c
@@ -0,0 +1,399 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/time.h>
+#include <sys/psm.h>
+#include <sys/psm_common.h>
+#include <sys/apic.h>
+#include <sys/pit.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/cpuvar.h>
+#include <sys/clock.h>
+#include <sys/apic_timer.h>
+
+/*
+ * preferred apic timer mode, allow tuning from the /etc/system file.
+ */
+int apic_timer_preferred_mode = APIC_TIMER_MODE_DEADLINE;
+
+int apic_oneshot = 0;
+uint_t apic_hertz_count;
+uint_t apic_nsec_per_intr = 0;
+uint64_t apic_ticks_per_SFnsecs; /* # of ticks in SF nsecs */
+
+static int apic_min_timer_ticks = 1; /* minimum timer tick */
+static hrtime_t apic_nsec_max;
+
+static void periodic_timer_enable(void);
+static void periodic_timer_disable(void);
+static void periodic_timer_reprogram(hrtime_t);
+static void oneshot_timer_enable(void);
+static void oneshot_timer_disable(void);
+static void oneshot_timer_reprogram(hrtime_t);
+static void deadline_timer_enable(void);
+static void deadline_timer_disable(void);
+static void deadline_timer_reprogram(hrtime_t);
+
+extern int apic_clkvect;
+extern uint32_t apic_divide_reg_init;
+
+/*
+ * apic timer data structure
+ */
+typedef struct apic_timer {
+ int mode;
+ void (*apic_timer_enable_ops)(void);
+ void (*apic_timer_disable_ops)(void);
+ void (*apic_timer_reprogram_ops)(hrtime_t);
+} apic_timer_t;
+
+static apic_timer_t apic_timer;
+
+/*
+ * apic timer initialization
+ *
+ * For the one-shot mode request case, the function returns the
+ * resolution (in nanoseconds) for the hardware timer interrupt.
+ * If one-shot mode capability is not available, the return value
+ * will be 0.
+ */
+int
+apic_timer_init(int hertz)
+{
+ uint_t apic_ticks = 0;
+ uint_t pit_ticks;
+ int ret, timer_mode;
+ uint16_t pit_ticks_adj;
+ static int firsttime = 1;
+
+ if (firsttime) {
+ /* first time calibrate on CPU0 only */
+
+ apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
+ apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
+ apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
+
+ /* total number of PIT ticks corresponding to apic_ticks */
+ pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
+
+ /*
+ * Determine the number of nanoseconds per APIC clock tick
+ * and then determine how many APIC ticks to interrupt at the
+ * desired frequency
+ * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
+ * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
+ * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
+ * pic_ticks_per_SFns =
+ * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
+ */
+ apic_ticks_per_SFnsecs = ((SF * apic_ticks * PIT_HZ) /
+ ((uint64_t)pit_ticks * NANOSEC));
+
+ /* the interval timer initial count is 32 bit max */
+ apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
+ firsttime = 0;
+ }
+
+ if (hertz == 0) {
+ /* requested one_shot */
+
+ /*
+ * return 0 if TSC is not supported.
+ */
+ if (!tsc_gethrtime_enable)
+ return (0);
+ /*
+ * return 0 if one_shot is not preferred.
+ * here, APIC_TIMER_DEADLINE is also an one_shot mode.
+ */
+ if ((apic_timer_preferred_mode != APIC_TIMER_MODE_ONESHOT) &&
+ (apic_timer_preferred_mode != APIC_TIMER_MODE_DEADLINE))
+ return (0);
+
+ apic_oneshot = 1;
+ ret = (int)APIC_TICKS_TO_NSECS(1);
+ if ((apic_timer_preferred_mode == APIC_TIMER_MODE_DEADLINE) &&
+ cpuid_deadline_tsc_supported()) {
+ timer_mode = APIC_TIMER_MODE_DEADLINE;
+ } else {
+ timer_mode = APIC_TIMER_MODE_ONESHOT;
+ }
+ } else {
+ /* periodic */
+ apic_nsec_per_intr = NANOSEC / hertz;
+ apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
+
+ /* program the local APIC to interrupt at the given frequency */
+ apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT) | AV_PERIODIC);
+ apic_oneshot = 0;
+ timer_mode = APIC_TIMER_MODE_PERIODIC;
+ ret = NANOSEC / hertz;
+ }
+
+ /*
+ * initialize apic_timer data structure, install the timer ops
+ */
+ apic_timer.mode = timer_mode;
+ switch (timer_mode) {
+ default:
+ /* FALLTHROUGH */
+ case APIC_TIMER_MODE_ONESHOT:
+ apic_timer.apic_timer_enable_ops = oneshot_timer_enable;
+ apic_timer.apic_timer_disable_ops = oneshot_timer_disable;
+ apic_timer.apic_timer_reprogram_ops = oneshot_timer_reprogram;
+ break;
+
+ case APIC_TIMER_MODE_PERIODIC:
+ apic_timer.apic_timer_enable_ops = periodic_timer_enable;
+ apic_timer.apic_timer_disable_ops = periodic_timer_disable;
+ apic_timer.apic_timer_reprogram_ops = periodic_timer_reprogram;
+ break;
+
+ case APIC_TIMER_MODE_DEADLINE:
+ apic_timer.apic_timer_enable_ops = deadline_timer_enable;
+ apic_timer.apic_timer_disable_ops = deadline_timer_disable;
+ apic_timer.apic_timer_reprogram_ops = deadline_timer_reprogram;
+ break;
+ }
+
+ return (ret);
+}
+
+/*
+ * periodic timer mode ops
+ */
+/* periodic timer enable */
+static void
+periodic_timer_enable(void)
+{
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT) | AV_PERIODIC);
+}
+
+/* periodic timer disable */
+static void
+periodic_timer_disable(void)
+{
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
+}
+
+/* periodic timer reprogram */
+static void
+periodic_timer_reprogram(hrtime_t time)
+{
+ uint_t ticks;
+ /* time is the interval for periodic mode */
+ ticks = APIC_NSECS_TO_TICKS(time);
+
+ if (ticks < apic_min_timer_ticks)
+ ticks = apic_min_timer_ticks;
+
+ apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
+}
+
+/*
+ * oneshot timer mode ops
+ */
+/* oneshot timer enable */
+static void
+oneshot_timer_enable(void)
+{
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT));
+}
+
+/* oneshot timer disable */
+static void
+oneshot_timer_disable(void)
+{
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
+}
+
+/* oneshot timer reprogram */
+static void
+oneshot_timer_reprogram(hrtime_t time)
+{
+ hrtime_t now;
+ int64_t delta;
+ uint_t ticks;
+
+ now = gethrtime();
+ delta = time - now;
+
+ if (delta <= 0) {
+ /*
+ * requested to generate an interrupt in the past
+ * generate an interrupt as soon as possible
+ */
+ ticks = apic_min_timer_ticks;
+ } else if (delta > apic_nsec_max) {
+ /*
+ * requested to generate an interrupt at a time
+ * further than what we are capable of. Set to max
+ * the hardware can handle
+ */
+ ticks = APIC_MAXVAL;
+#ifdef DEBUG
+ cmn_err(CE_CONT, "apic_timer_reprogram, request at"
+ " %lld too far in future, current time"
+ " %lld \n", time, now);
+#endif
+ } else {
+ ticks = APIC_NSECS_TO_TICKS(delta);
+ }
+
+ if (ticks < apic_min_timer_ticks)
+ ticks = apic_min_timer_ticks;
+
+ apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
+}
+
+/*
+ * deadline timer mode ops
+ */
+/* deadline timer enable */
+static void
+deadline_timer_enable(void)
+{
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT) | AV_DEADLINE);
+}
+
+/* deadline timer disable */
+static void
+deadline_timer_disable(void)
+{
+ apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
+ (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
+}
+
+/* deadline timer reprogram */
+static void
+deadline_timer_reprogram(hrtime_t time)
+{
+ uint64_t ticks;
+
+ if (time <= 0) {
+ /*
+ * generate an immediate interrupt
+ */
+ ticks = (uint64_t)tsc_read();
+ } else {
+ ticks = unscalehrtime(time);
+ }
+
+ wrmsr(IA32_DEADLINE_TSC_MSR, ticks);
+}
+
+/*
+ * This function will reprogram the timer.
+ *
+ * When in oneshot mode the argument is the absolute time in future to
+ * generate the interrupt at.
+ *
+ * When in periodic mode, the argument is the interval at which the
+ * interrupts should be generated. There is no need to support the periodic
+ * mode timer change at this time.
+ */
+void
+apic_timer_reprogram(hrtime_t time)
+{
+ /*
+ * we should be Called from high PIL context (CBE_HIGH_PIL),
+ * so kpreempt is disabled.
+ */
+ apic_timer.apic_timer_reprogram_ops(time);
+}
+
+/*
+ * This function will enable timer interrupts.
+ */
+void
+apic_timer_enable(void)
+{
+ /*
+ * we should be Called from high PIL context (CBE_HIGH_PIL),
+ * so kpreempt is disabled.
+ */
+ apic_timer.apic_timer_enable_ops();
+}
+
+/*
+ * This function will disable timer interrupts.
+ */
+void
+apic_timer_disable(void)
+{
+ /*
+ * we should be Called from high PIL context (CBE_HIGH_PIL),
+ * so kpreempt is disabled.
+ */
+ apic_timer.apic_timer_disable_ops();
+}
+
+/*
+ * Set timer far into the future and return timer
+ * current count in nanoseconds.
+ */
+hrtime_t
+apic_timer_stop_count(void)
+{
+ hrtime_t ns_val;
+ int enable_val, count_val;
+
+ /*
+ * Should be called with interrupts disabled.
+ */
+ ASSERT(!interrupts_enabled());
+
+ enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
+ if ((enable_val & AV_MASK) == AV_MASK)
+ return ((hrtime_t)-1); /* timer is disabled */
+
+ count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
+ ns_val = APIC_TICKS_TO_NSECS(count_val);
+
+ apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
+
+ return (ns_val);
+}
+
+/*
+ * Reprogram timer after Deep C-State.
+ */
+void
+apic_timer_restart(hrtime_t time)
+{
+ apic_timer_reprogram(time);
+}
diff --git a/usr/src/uts/i86pc/io/rootnex.c b/usr/src/uts/i86pc/io/rootnex.c
index 8416281fee..0f42739f23 100644
--- a/usr/src/uts/i86pc/io/rootnex.c
+++ b/usr/src/uts/i86pc/io/rootnex.c
@@ -134,8 +134,10 @@ int rootnex_prealloc_copybuf = 2;
/* driver global state */
static rootnex_state_t *rootnex_state;
+#ifdef DEBUG
/* shortcut to rootnex counters */
static uint64_t *rootnex_cnt;
+#endif
/*
* XXX - does x86 even need these or are they left over from the SPARC days?
@@ -149,12 +151,17 @@ static rootnex_intprop_t rootnex_intprp[] = {
};
#define NROOT_INTPROPS (sizeof (rootnex_intprp) / sizeof (rootnex_intprop_t))
+/*
+ * If we're dom0, we're using a real device so we need to load
+ * the cookies with MFNs instead of PFNs.
+ */
#ifdef __xpv
typedef maddr_t rootnex_addr_t;
-#define ROOTNEX_PADDR_TO_RBASE(xinfo, pa) \
- (DOMAIN_IS_INITDOMAIN(xinfo) ? pa_to_ma(pa) : (pa))
+#define ROOTNEX_PADDR_TO_RBASE(pa) \
+ (DOMAIN_IS_INITDOMAIN(xen_info) ? pa_to_ma(pa) : (pa))
#else
typedef paddr_t rootnex_addr_t;
+#define ROOTNEX_PADDR_TO_RBASE(pa) (pa)
#endif
#if !defined(__xpv)
@@ -244,6 +251,14 @@ static int rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip,
ddi_dma_handle_t handle, uint_t win, off_t *offp, size_t *lenp,
ddi_dma_cookie_t *cookiep, uint_t *ccountp);
+#if defined(__amd64) && !defined(__xpv)
+static int rootnex_coredma_hdl_setprivate(dev_info_t *dip, dev_info_t *rdip,
+ ddi_dma_handle_t handle, void *v);
+static void *rootnex_coredma_hdl_getprivate(dev_info_t *dip, dev_info_t *rdip,
+ ddi_dma_handle_t handle);
+#endif
+
+
static struct bus_ops rootnex_bus_ops = {
BUSO_REV,
rootnex_map,
@@ -324,7 +339,9 @@ static iommulib_nexops_t iommulib_nexops = {
rootnex_coredma_sync,
rootnex_coredma_win,
rootnex_dma_map,
- rootnex_dma_mctl
+ rootnex_dma_mctl,
+ rootnex_coredma_hdl_setprivate,
+ rootnex_coredma_hdl_getprivate
};
#endif
@@ -369,13 +386,15 @@ static int rootnex_valid_bind_parms(ddi_dma_req_t *dmareq,
ddi_dma_attr_t *attr);
static void rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
rootnex_sglinfo_t *sglinfo);
+static void rootnex_dvma_get_sgl(ddi_dma_obj_t *dmar_object,
+ ddi_dma_cookie_t *sgl, rootnex_sglinfo_t *sglinfo);
static int rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
- rootnex_dma_t *dma, ddi_dma_attr_t *attr, int kmflag);
+ rootnex_dma_t *dma, ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag);
static int rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
rootnex_dma_t *dma, ddi_dma_attr_t *attr);
static void rootnex_teardown_copybuf(rootnex_dma_t *dma);
static int rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
- ddi_dma_attr_t *attr, int kmflag);
+ ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag);
static void rootnex_teardown_windows(rootnex_dma_t *dma);
static void rootnex_init_win(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
rootnex_window_t *window, ddi_dma_cookie_t *cookie, off_t cur_offset);
@@ -397,6 +416,7 @@ static int rootnex_dma_check(dev_info_t *dip, const void *handle,
const void *comp_addr, const void *not_used);
static boolean_t rootnex_need_bounce_seg(ddi_dma_obj_t *dmar_object,
rootnex_sglinfo_t *sglinfo);
+static struct as *rootnex_get_as(ddi_dma_obj_t *dmar_object);
/*
* _init()
@@ -466,7 +486,9 @@ rootnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
rootnex_state->r_dip = dip;
rootnex_state->r_err_ibc = (ddi_iblock_cookie_t)ipltospl(15);
rootnex_state->r_reserved_msg_printed = B_FALSE;
+#ifdef DEBUG
rootnex_cnt = &rootnex_state->r_counters[0];
+#endif
/*
* Set minimum fm capability level for i86pc platforms and then
@@ -1723,13 +1745,8 @@ rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
* best we can do with the current bind interfaces.
*/
hp = kmem_cache_alloc(rootnex_state->r_dmahdl_cache, kmflag);
- if (hp == NULL) {
- if (waitfp != DDI_DMA_DONTWAIT) {
- ddi_set_callback(waitfp, arg,
- &rootnex_state->r_dvma_call_list_id);
- }
+ if (hp == NULL)
return (DDI_DMA_NORESOURCES);
- }
/* Do our pointer manipulation now, align the structures */
hp->dmai_private = (void *)(((uintptr_t)hp +
@@ -1743,13 +1760,32 @@ rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
hp->dmai_error.err_fep = NULL;
hp->dmai_error.err_cf = NULL;
dma->dp_dip = rdip;
+ dma->dp_sglinfo.si_flags = attr->dma_attr_flags;
dma->dp_sglinfo.si_min_addr = attr->dma_attr_addr_lo;
- dma->dp_sglinfo.si_max_addr = attr->dma_attr_addr_hi;
+
+ /*
+ * The BOUNCE_ON_SEG workaround is not needed when an IOMMU
+ * is being used. Set the upper limit to the seg value.
+ * There will be enough DVMA space to always get addresses
+ * that will match the constraints.
+ */
+ if (IOMMU_USED(rdip) &&
+ (attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)) {
+ dma->dp_sglinfo.si_max_addr = attr->dma_attr_seg;
+ dma->dp_sglinfo.si_flags &= ~_DDI_DMA_BOUNCE_ON_SEG;
+ } else
+ dma->dp_sglinfo.si_max_addr = attr->dma_attr_addr_hi;
+
hp->dmai_minxfer = attr->dma_attr_minxfer;
hp->dmai_burstsizes = attr->dma_attr_burstsizes;
hp->dmai_rdip = rdip;
hp->dmai_attr = *attr;
+ if (attr->dma_attr_seg >= dma->dp_sglinfo.si_max_addr)
+ dma->dp_sglinfo.si_cancross = B_FALSE;
+ else
+ dma->dp_sglinfo.si_cancross = B_TRUE;
+
/* we don't need to worry about the SPL since we do a tryenter */
mutex_init(&dma->dp_mutex, NULL, MUTEX_DRIVER, NULL);
@@ -1812,13 +1848,12 @@ rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
}
dma->dp_sglinfo.si_max_cookie_size = maxsegmentsize;
dma->dp_sglinfo.si_segmask = attr->dma_attr_seg;
- dma->dp_sglinfo.si_flags = attr->dma_attr_flags;
/* check the ddi_dma_attr arg to make sure it makes a little sense */
if (rootnex_alloc_check_parms) {
e = rootnex_valid_alloc_parms(attr, maxsegmentsize);
if (e != DDI_SUCCESS) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ALLOC_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ALLOC_FAIL]);
(void) rootnex_dma_freehdl(dip, rdip,
(ddi_dma_handle_t)hp);
return (e);
@@ -1843,31 +1878,40 @@ static int
rootnex_dma_allochdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *handlep)
{
- int retval;
+ int retval = DDI_SUCCESS;
#if defined(__amd64) && !defined(__xpv)
- uint_t error = ENOTSUP;
- retval = iommulib_nex_open(rdip, &error);
+ if (IOMMU_UNITIALIZED(rdip)) {
+ retval = iommulib_nex_open(dip, rdip);
- if (retval != DDI_SUCCESS && error == ENOTSUP) {
- /* No IOMMU */
- return (rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
- handlep));
- } else if (retval != DDI_SUCCESS) {
- return (DDI_FAILURE);
+ if (retval != DDI_SUCCESS && retval != DDI_ENOTSUP)
+ return (retval);
}
- ASSERT(IOMMU_USED(rdip));
-
- /* has an IOMMU */
- retval = iommulib_nexdma_allochdl(dip, rdip, attr,
- waitfp, arg, handlep);
+ if (IOMMU_UNUSED(rdip)) {
+ retval = rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
+ handlep);
+ } else {
+ retval = iommulib_nexdma_allochdl(dip, rdip, attr,
+ waitfp, arg, handlep);
+ }
#else
retval = rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
handlep);
#endif
- if (retval == DDI_SUCCESS)
+ switch (retval) {
+ case DDI_DMA_NORESOURCES:
+ if (waitfp != DDI_DMA_DONTWAIT) {
+ ddi_set_callback(waitfp, arg,
+ &rootnex_state->r_dvma_call_list_id);
+ }
+ break;
+ case DDI_SUCCESS:
ndi_fmc_insert(rdip, DMA_HANDLE, *handlep, NULL);
+ break;
+ default:
+ break;
+ }
return (retval);
}
@@ -1893,9 +1937,6 @@ rootnex_coredma_freehdl(dev_info_t *dip, dev_info_t *rdip,
ROOTNEX_DPROBE1(rootnex__free__handle, uint64_t,
rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
- if (rootnex_state->r_dvma_call_list_id)
- ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
-
return (DDI_SUCCESS);
}
@@ -1906,13 +1947,20 @@ rootnex_coredma_freehdl(dev_info_t *dip, dev_info_t *rdip,
static int
rootnex_dma_freehdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle)
{
+ int ret;
+
ndi_fmc_remove(rdip, DMA_HANDLE, handle);
#if defined(__amd64) && !defined(__xpv)
- if (IOMMU_USED(rdip)) {
- return (iommulib_nexdma_freehdl(dip, rdip, handle));
- }
+ if (IOMMU_USED(rdip))
+ ret = iommulib_nexdma_freehdl(dip, rdip, handle);
+ else
#endif
- return (rootnex_coredma_freehdl(dip, rdip, handle));
+ ret = rootnex_coredma_freehdl(dip, rdip, handle);
+
+ if (rootnex_state->r_dvma_call_list_id)
+ ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
+
+ return (ret);
}
/*ARGSUSED*/
@@ -1922,21 +1970,29 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
ddi_dma_cookie_t *cookiep, uint_t *ccountp)
{
rootnex_sglinfo_t *sinfo;
+ ddi_dma_obj_t *dmao;
+#if defined(__amd64) && !defined(__xpv)
+ struct dvmaseg *dvs;
+ ddi_dma_cookie_t *cookie;
+#endif
ddi_dma_attr_t *attr;
ddi_dma_impl_t *hp;
rootnex_dma_t *dma;
int kmflag;
int e;
+ uint_t ncookies;
hp = (ddi_dma_impl_t *)handle;
dma = (rootnex_dma_t *)hp->dmai_private;
+ dmao = &dma->dp_dma;
sinfo = &dma->dp_sglinfo;
attr = &hp->dmai_attr;
+ /* convert the sleep flags */
if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
- dma->dp_sleep_flags = KM_SLEEP;
+ dma->dp_sleep_flags = kmflag = KM_SLEEP;
} else {
- dma->dp_sleep_flags = KM_NOSLEEP;
+ dma->dp_sleep_flags = kmflag = KM_NOSLEEP;
}
hp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
@@ -1953,12 +2009,12 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
*/
e = mutex_tryenter(&dma->dp_mutex);
if (e == 0) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
return (DDI_DMA_INUSE);
}
if (dma->dp_inuse) {
mutex_exit(&dma->dp_mutex);
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
return (DDI_DMA_INUSE);
}
dma->dp_inuse = B_TRUE;
@@ -1969,7 +2025,7 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
if (rootnex_bind_check_parms) {
e = rootnex_valid_bind_parms(dmareq, attr);
if (e != DDI_SUCCESS) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
rootnex_clean_dmahdl(hp);
return (e);
}
@@ -1979,30 +2035,72 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
dma->dp_dma = dmareq->dmar_object;
#if defined(__amd64) && !defined(__xpv)
- e = immu_map_sgl(hp, dmareq, rootnex_prealloc_cookies, rdip);
- switch (e) {
- case DDI_DMA_MAPPED:
- goto out;
- case DDI_DMA_USE_PHYSICAL:
- break;
- case DDI_DMA_PARTIAL:
- ddi_err(DER_PANIC, rdip, "Partial DVMA map");
- e = DDI_DMA_NORESOURCES;
- /*FALLTHROUGH*/
- default:
- ddi_err(DER_MODE, rdip, "DVMA map failed");
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
- rootnex_clean_dmahdl(hp);
- return (e);
+ if (IOMMU_USED(rdip)) {
+ dmao = &dma->dp_dvma;
+ e = iommulib_nexdma_mapobject(dip, rdip, handle, dmareq, dmao);
+ switch (e) {
+ case DDI_SUCCESS:
+ if (sinfo->si_cancross ||
+ dmao->dmao_obj.dvma_obj.dv_nseg != 1 ||
+ dmao->dmao_size > sinfo->si_max_cookie_size) {
+ dma->dp_dvma_used = B_TRUE;
+ break;
+ }
+ sinfo->si_sgl_size = 1;
+ hp->dmai_rflags |= DMP_NOSYNC;
+
+ dma->dp_dvma_used = B_TRUE;
+ dma->dp_need_to_free_cookie = B_FALSE;
+
+ dvs = &dmao->dmao_obj.dvma_obj.dv_seg[0];
+ cookie = hp->dmai_cookie = dma->dp_cookies =
+ (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
+ cookie->dmac_laddress = dvs->dvs_start +
+ dmao->dmao_obj.dvma_obj.dv_off;
+ cookie->dmac_size = dvs->dvs_len;
+ cookie->dmac_type = 0;
+
+ ROOTNEX_DPROBE1(rootnex__bind__dvmafast, dev_info_t *,
+ rdip);
+ goto fast;
+ case DDI_ENOTSUP:
+ break;
+ default:
+ rootnex_clean_dmahdl(hp);
+ return (e);
+ }
}
#endif
/*
- * Figure out a rough estimate of what maximum number of pages this
- * buffer could use (a high estimate of course).
+ * Figure out a rough estimate of what maximum number of pages
+ * this buffer could use (a high estimate of course).
*/
sinfo->si_max_pages = mmu_btopr(dma->dp_dma.dmao_size) + 1;
+ if (dma->dp_dvma_used) {
+ /*
+ * The number of physical pages is the worst case.
+ *
+ * For DVMA, the worst case is the length divided
+ * by the maximum cookie length, plus 1. Add to that
+ * the number of segment boundaries potentially crossed, and
+ * the additional number of DVMA segments that was returned.
+ *
+ * In the normal case, for modern devices, si_cancross will
+ * be false, and dv_nseg will be 1, and the fast path will
+ * have been taken above.
+ */
+ ncookies = (dma->dp_dma.dmao_size / sinfo->si_max_cookie_size)
+ + 1;
+ if (sinfo->si_cancross)
+ ncookies +=
+ (dma->dp_dma.dmao_size / attr->dma_attr_seg) + 1;
+ ncookies += (dmao->dmao_obj.dvma_obj.dv_nseg - 1);
+
+ sinfo->si_max_pages = MIN(sinfo->si_max_pages, ncookies);
+ }
+
/*
* We'll use the pre-allocated cookies for any bind that will *always*
* fit (more important to be consistent, we don't want to create
@@ -2011,7 +2109,7 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
if (sinfo->si_max_pages <= rootnex_state->r_prealloc_cookies) {
dma->dp_cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
dma->dp_need_to_free_cookie = B_FALSE;
- DTRACE_PROBE2(rootnex__bind__prealloc, dev_info_t *, rdip,
+ ROOTNEX_DPROBE2(rootnex__bind__prealloc, dev_info_t *, rdip,
uint_t, sinfo->si_max_pages);
/*
@@ -2024,13 +2122,6 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
* the bind interface would speed this case up.
*/
} else {
- /* convert the sleep flags */
- if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
- kmflag = KM_SLEEP;
- } else {
- kmflag = KM_NOSLEEP;
- }
-
/*
* Save away how much memory we allocated. If we're doing a
* nosleep, the alloc could fail...
@@ -2039,13 +2130,13 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
sizeof (ddi_dma_cookie_t);
dma->dp_cookies = kmem_alloc(dma->dp_cookie_size, kmflag);
if (dma->dp_cookies == NULL) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
rootnex_clean_dmahdl(hp);
return (DDI_DMA_NORESOURCES);
}
dma->dp_need_to_free_cookie = B_TRUE;
- DTRACE_PROBE2(rootnex__bind__alloc, dev_info_t *, rdip, uint_t,
- sinfo->si_max_pages);
+ ROOTNEX_DPROBE2(rootnex__bind__alloc, dev_info_t *, rdip,
+ uint_t, sinfo->si_max_pages);
}
hp->dmai_cookie = dma->dp_cookies;
@@ -2056,8 +2147,10 @@ rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
* the sgl clean, or do we need to do some munging; how many pages
* need to be copied, etc.)
*/
- rootnex_get_sgl(&dmareq->dmar_object, dma->dp_cookies,
- &dma->dp_sglinfo);
+ if (dma->dp_dvma_used)
+ rootnex_dvma_get_sgl(dmao, dma->dp_cookies, &dma->dp_sglinfo);
+ else
+ rootnex_get_sgl(dmao, dma->dp_cookies, &dma->dp_sglinfo);
out:
ASSERT(sinfo->si_sgl_size <= sinfo->si_max_pages);
@@ -2076,7 +2169,8 @@ out:
*/
if ((sinfo->si_copybuf_req == 0) &&
(sinfo->si_sgl_size <= attr->dma_attr_sgllen) &&
- (dma->dp_dma.dmao_size < dma->dp_maxxfer)) {
+ (dmao->dmao_size < dma->dp_maxxfer)) {
+fast:
/*
* If the driver supports FMA, insert the handle in the FMA DMA
* handle cache.
@@ -2094,10 +2188,10 @@ out:
*ccountp = sinfo->si_sgl_size;
hp->dmai_cookie++;
hp->dmai_rflags &= ~DDI_DMA_PARTIAL;
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
- DTRACE_PROBE3(rootnex__bind__fast, dev_info_t *, rdip,
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
+ ROOTNEX_DPROBE4(rootnex__bind__fast, dev_info_t *, rdip,
uint64_t, rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS],
- uint_t, dma->dp_dma.dmao_size);
+ uint_t, dmao->dmao_size, uint_t, *ccountp);
return (DDI_DMA_MAPPED);
@@ -2107,12 +2201,26 @@ out:
* go to the slow path, we may need to alloc more memory, create
* multiple windows, and munge up a sgl to make the device happy.
*/
- e = rootnex_bind_slowpath(hp, dmareq, dma, attr, kmflag);
+
+ /*
+ * With the IOMMU mapobject method used, we should never hit
+ * the slow path. If we do, something is seriously wrong.
+ * Clean up and return an error.
+ */
+
+ if (dma->dp_dvma_used) {
+ (void) iommulib_nexdma_unmapobject(dip, rdip, handle,
+ &dma->dp_dvma);
+ e = DDI_DMA_NOMAPPING;
+ } else {
+ e = rootnex_bind_slowpath(hp, dmareq, dma, attr, &dma->dp_dma,
+ kmflag);
+ }
if ((e != DDI_DMA_MAPPED) && (e != DDI_DMA_PARTIAL_MAP)) {
if (dma->dp_need_to_free_cookie) {
kmem_free(dma->dp_cookies, dma->dp_cookie_size);
}
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
rootnex_clean_dmahdl(hp); /* must be after free cookie */
return (e);
}
@@ -2150,9 +2258,9 @@ out:
hp->dmai_cookie++;
ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
- ROOTNEX_DPROBE3(rootnex__bind__slow, dev_info_t *, rdip, uint64_t,
+ ROOTNEX_DPROBE4(rootnex__bind__slow, dev_info_t *, rdip, uint64_t,
rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS], uint_t,
- dma->dp_dma.dmao_size);
+ dmao->dmao_size, uint_t, *ccountp);
return (e);
}
@@ -2165,14 +2273,22 @@ rootnex_dma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
ddi_dma_cookie_t *cookiep, uint_t *ccountp)
{
+ int ret;
#if defined(__amd64) && !defined(__xpv)
- if (IOMMU_USED(rdip)) {
- return (iommulib_nexdma_bindhdl(dip, rdip, handle, dmareq,
- cookiep, ccountp));
- }
+ if (IOMMU_USED(rdip))
+ ret = iommulib_nexdma_bindhdl(dip, rdip, handle, dmareq,
+ cookiep, ccountp);
+ else
#endif
- return (rootnex_coredma_bindhdl(dip, rdip, handle, dmareq,
- cookiep, ccountp));
+ ret = rootnex_coredma_bindhdl(dip, rdip, handle, dmareq,
+ cookiep, ccountp);
+
+ if (ret == DDI_DMA_NORESOURCES && dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
+ ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
+ &rootnex_state->r_dvma_call_list_id);
+ }
+
+ return (ret);
}
@@ -2212,15 +2328,9 @@ rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
rootnex_teardown_copybuf(dma);
rootnex_teardown_windows(dma);
-#if defined(__amd64) && !defined(__xpv)
- /*
- * Clean up the page tables and free the dvma
- */
- e = immu_unmap_sgl(hp, rdip);
- if (e != DDI_DMA_USE_PHYSICAL && e != DDI_SUCCESS) {
- return (e);
- }
-#endif
+ if (IOMMU_USED(rdip))
+ (void) iommulib_nexdma_unmapobject(dip, rdip, handle,
+ &dma->dp_dvma);
/*
* If we had to allocate space to for the worse case sgl (it didn't
@@ -2237,9 +2347,6 @@ rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
rootnex_clean_dmahdl(hp);
hp->dmai_error.err_cf = NULL;
- if (rootnex_state->r_dvma_call_list_id)
- ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
-
ROOTNEX_DPROF_DEC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
ROOTNEX_DPROBE1(rootnex__unbind, uint64_t,
rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
@@ -2256,12 +2363,19 @@ static int
rootnex_dma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
ddi_dma_handle_t handle)
{
+ int ret;
+
#if defined(__amd64) && !defined(__xpv)
- if (IOMMU_USED(rdip)) {
- return (iommulib_nexdma_unbindhdl(dip, rdip, handle));
- }
+ if (IOMMU_USED(rdip))
+ ret = iommulib_nexdma_unbindhdl(dip, rdip, handle);
+ else
#endif
- return (rootnex_coredma_unbindhdl(dip, rdip, handle));
+ ret = rootnex_coredma_unbindhdl(dip, rdip, handle);
+
+ if (rootnex_state->r_dvma_call_list_id)
+ ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
+
+ return (ret);
}
#if defined(__amd64) && !defined(__xpv)
@@ -2417,6 +2531,25 @@ rootnex_coredma_clear_cookies(dev_info_t *dip, ddi_dma_handle_t handle)
#endif
+static struct as *
+rootnex_get_as(ddi_dma_obj_t *dmao)
+{
+ struct as *asp;
+
+ switch (dmao->dmao_type) {
+ case DMA_OTYP_VADDR:
+ case DMA_OTYP_BUFVADDR:
+ asp = dmao->dmao_obj.virt_obj.v_as;
+ if (asp == NULL)
+ asp = &kas;
+ break;
+ default:
+ asp = NULL;
+ break;
+ }
+ return (asp);
+}
+
/*
* rootnex_verify_buffer()
* verify buffer wasn't free'd
@@ -2472,7 +2605,7 @@ rootnex_verify_buffer(rootnex_dma_t *dma)
/* For a virtual address, try to peek at each page */
} else {
- if (dma->dp_sglinfo.si_asp == &kas) {
+ if (rootnex_get_as(&dma->dp_dma) == &kas) {
for (i = 0; i < pcnt; i++) {
if (ddi_peek8(NULL, vaddr, &b) ==
DDI_FAILURE)
@@ -2484,7 +2617,7 @@ rootnex_verify_buffer(rootnex_dma_t *dma)
break;
default:
- ASSERT(0);
+ cmn_err(CE_PANIC, "rootnex_verify_buffer: bad DMA object");
break;
}
@@ -2511,6 +2644,7 @@ rootnex_clean_dmahdl(ddi_dma_impl_t *hp)
dma->dp_window = NULL;
dma->dp_cbaddr = NULL;
dma->dp_inuse = B_FALSE;
+ dma->dp_dvma_used = B_FALSE;
dma->dp_need_to_free_cookie = B_FALSE;
dma->dp_need_to_switch_cookies = B_FALSE;
dma->dp_saved_cookies = NULL;
@@ -2660,15 +2794,7 @@ rootnex_need_bounce_seg(ddi_dma_obj_t *dmar_object, rootnex_sglinfo_t *sglinfo)
vaddr += psize;
}
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MFNs instead of PFNs.
- */
- raddr = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- raddr = paddr;
-#endif
+ raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
if ((raddr + psize) > sglinfo->si_segmask) {
upper_addr = B_TRUE;
@@ -2702,15 +2828,7 @@ rootnex_need_bounce_seg(ddi_dma_obj_t *dmar_object, rootnex_sglinfo_t *sglinfo)
vaddr += psize;
}
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MFNs instead of PFNs.
- */
- raddr = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- raddr = paddr;
-#endif
+ raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
if ((raddr + psize) > sglinfo->si_segmask) {
upper_addr = B_TRUE;
@@ -2734,7 +2852,6 @@ rootnex_need_bounce_seg(ddi_dma_obj_t *dmar_object, rootnex_sglinfo_t *sglinfo)
return (B_FALSE);
}
-
/*
* rootnex_get_sgl()
* Called in bind fastpath to get the sgl. Most of this will be replaced
@@ -2839,15 +2956,7 @@ rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
vaddr += psize;
}
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MFNs instead of PFNs.
- */
- raddr = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- raddr = paddr;
-#endif
+ raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
/*
* Setup the first cookie with the physical address of the page and the
@@ -2921,15 +3030,7 @@ rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
vaddr += psize;
}
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MFNs instead of PFNs.
- */
- raddr = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- raddr = paddr;
-#endif
+ raddr = ROOTNEX_PADDR_TO_RBASE(paddr);
/*
* If we are using the copy buffer for anything over the
@@ -3043,6 +3144,98 @@ rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
}
}
+static void
+rootnex_dvma_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
+ rootnex_sglinfo_t *sglinfo)
+{
+ uint64_t offset;
+ uint64_t maxseg;
+ uint64_t dvaddr;
+ struct dvmaseg *dvs;
+ uint64_t paddr;
+ uint32_t psize, ssize;
+ uint32_t size;
+ uint_t cnt;
+ int physcontig;
+
+ ASSERT(dmar_object->dmao_type == DMA_OTYP_DVADDR);
+
+ /* shortcuts */
+ maxseg = sglinfo->si_max_cookie_size;
+ size = dmar_object->dmao_size;
+
+ cnt = 0;
+ sglinfo->si_bounce_on_seg = B_FALSE;
+
+ dvs = dmar_object->dmao_obj.dvma_obj.dv_seg;
+ offset = dmar_object->dmao_obj.dvma_obj.dv_off;
+ ssize = dvs->dvs_len;
+ paddr = dvs->dvs_start;
+ paddr += offset;
+ psize = MIN(ssize, (maxseg - offset));
+ dvaddr = paddr + psize;
+ ssize -= psize;
+
+ sgl[cnt].dmac_laddress = paddr;
+ sgl[cnt].dmac_size = psize;
+ sgl[cnt].dmac_type = 0;
+
+ size -= psize;
+ while (size > 0) {
+ if (ssize == 0) {
+ dvs++;
+ ssize = dvs->dvs_len;
+ dvaddr = dvs->dvs_start;
+ physcontig = 0;
+ } else
+ physcontig = 1;
+
+ paddr = dvaddr;
+ psize = MIN(ssize, maxseg);
+ dvaddr += psize;
+ ssize -= psize;
+
+ if (!physcontig || !(paddr & sglinfo->si_segmask) ||
+ ((sgl[cnt].dmac_size + psize) > maxseg) ||
+ (sgl[cnt].dmac_size == 0)) {
+ /*
+ * if we're not already in a new cookie, go to the next
+ * cookie.
+ */
+ if (sgl[cnt].dmac_size != 0) {
+ cnt++;
+ }
+
+ /* save the cookie information */
+ sgl[cnt].dmac_laddress = paddr;
+ sgl[cnt].dmac_size = psize;
+ sgl[cnt].dmac_type = 0;
+ } else {
+ sgl[cnt].dmac_size += psize;
+
+ /*
+ * if this exactly == the maximum cookie size, and
+ * it isn't the last cookie, go to the next cookie.
+ */
+ if (((sgl[cnt].dmac_size + psize) == maxseg) &&
+ ((cnt + 1) < sglinfo->si_max_pages)) {
+ cnt++;
+ sgl[cnt].dmac_laddress = 0;
+ sgl[cnt].dmac_size = 0;
+ sgl[cnt].dmac_type = 0;
+ }
+ }
+ size -= psize;
+ }
+
+ /* we're done, save away how many cookies the sgl has */
+ if (sgl[cnt].dmac_size == 0) {
+ sglinfo->si_sgl_size = cnt;
+ } else {
+ sglinfo->si_sgl_size = cnt + 1;
+ }
+}
+
/*
* rootnex_bind_slowpath()
* Call in the bind path if the calling driver can't use the sgl without
@@ -3051,7 +3244,7 @@ rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
*/
static int
rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
- rootnex_dma_t *dma, ddi_dma_attr_t *attr, int kmflag)
+ rootnex_dma_t *dma, ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag)
{
rootnex_sglinfo_t *sinfo;
rootnex_window_t *window;
@@ -3088,7 +3281,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
* if we need to trim the buffers when we munge the sgl.
*/
if ((dma->dp_copybuf_size < sinfo->si_copybuf_req) ||
- (dma->dp_dma.dmao_size > dma->dp_maxxfer) ||
+ (dmao->dmao_size > dma->dp_maxxfer) ||
(attr->dma_attr_sgllen < sinfo->si_sgl_size)) {
dma->dp_partial_required = B_TRUE;
if (attr->dma_attr_granular != 1) {
@@ -3127,7 +3320,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
* we might need multiple windows, setup state to handle them. In this
* code path, we will have at least one window.
*/
- e = rootnex_setup_windows(hp, dma, attr, kmflag);
+ e = rootnex_setup_windows(hp, dma, attr, dmao, kmflag);
if (e != DDI_SUCCESS) {
rootnex_teardown_copybuf(dma);
return (e);
@@ -3137,7 +3330,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
cookie = &dma->dp_cookies[0];
cur_offset = 0;
rootnex_init_win(hp, dma, window, cookie, cur_offset);
- if (dmareq->dmar_object.dmao_type == DMA_OTYP_PAGES) {
+ if (dmao->dmao_type == DMA_OTYP_PAGES) {
cur_pp = dmareq->dmar_object.dmao_obj.pp_obj.pp_pp;
}
@@ -3149,7 +3342,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
* copy buffer, make sure we sync this window during dma_sync.
*/
if (dma->dp_copybuf_size > 0) {
- rootnex_setup_cookie(&dmareq->dmar_object, dma, cookie,
+ rootnex_setup_cookie(dmao, dma, cookie,
cur_offset, &copybuf_used, &cur_pp);
if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
window->wd_dosync = B_TRUE;
@@ -3181,7 +3374,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
window->wd_dosync = B_TRUE;
}
- DTRACE_PROBE1(rootnex__copybuf__window, dev_info_t *,
+ ROOTNEX_DPROBE1(rootnex__copybuf__window, dev_info_t *,
dma->dp_dip);
/* if the cookie cnt == max sgllen, move to the next window */
@@ -3203,7 +3396,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
window->wd_dosync = B_TRUE;
}
- DTRACE_PROBE1(rootnex__sgllen__window, dev_info_t *,
+ ROOTNEX_DPROBE1(rootnex__sgllen__window, dev_info_t *,
dma->dp_dip);
/* else if we will be over maxxfer */
@@ -3225,7 +3418,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
window->wd_dosync = B_TRUE;
}
- DTRACE_PROBE1(rootnex__maxxfer__window, dev_info_t *,
+ ROOTNEX_DPROBE1(rootnex__maxxfer__window, dev_info_t *,
dma->dp_dip);
/* else this cookie fits in the current window */
@@ -3235,7 +3428,7 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
}
/* track our offset into the buffer, go to the next cookie */
- ASSERT(dmac_size <= dma->dp_dma.dmao_size);
+ ASSERT(dmac_size <= dmao->dmao_size);
ASSERT(cookie->dmac_size <= dmac_size);
cur_offset += dmac_size;
cookie++;
@@ -3257,7 +3450,6 @@ rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
return (DDI_DMA_PARTIAL_MAP);
}
-
/*
* rootnex_setup_copybuf()
* Called in bind slowpath. Figures out if we're going to use the copy
@@ -3276,6 +3468,7 @@ rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
int vmflag;
#endif
+ ASSERT(!dma->dp_dvma_used);
sinfo = &dma->dp_sglinfo;
@@ -3353,7 +3546,7 @@ rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
return (DDI_DMA_NORESOURCES);
}
- DTRACE_PROBE2(rootnex__alloc__copybuf, dev_info_t *, dma->dp_dip,
+ ROOTNEX_DPROBE2(rootnex__alloc__copybuf, dev_info_t *, dma->dp_dip,
size_t, dma->dp_copybuf_size);
return (DDI_SUCCESS);
@@ -3367,7 +3560,7 @@ rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
*/
static int
rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
- ddi_dma_attr_t *attr, int kmflag)
+ ddi_dma_attr_t *attr, ddi_dma_obj_t *dmao, int kmflag)
{
rootnex_window_t *windowp;
rootnex_sglinfo_t *sinfo;
@@ -3436,8 +3629,8 @@ rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
* for remainder, and plus 2 to handle the extra pages on the
* trim (see above comment about trim)
*/
- if (dma->dp_dma.dmao_size > dma->dp_maxxfer) {
- maxxfer_win = (dma->dp_dma.dmao_size /
+ if (dmao->dmao_size > dma->dp_maxxfer) {
+ maxxfer_win = (dmao->dmao_size /
dma->dp_maxxfer) + 1 + 2;
} else {
maxxfer_win = 0;
@@ -3509,7 +3702,7 @@ rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
}
dma->dp_need_to_free_window = B_TRUE;
dma->dp_window_size = space_needed;
- DTRACE_PROBE2(rootnex__bind__sp__alloc, dev_info_t *,
+ ROOTNEX_DPROBE2(rootnex__bind__sp__alloc, dev_info_t *,
dma->dp_dip, size_t, space_needed);
}
@@ -3639,6 +3832,8 @@ rootnex_setup_cookie(ddi_dma_obj_t *dmar_object, rootnex_dma_t *dma,
page_t **pplist;
#endif
+ ASSERT(dmar_object->dmao_type != DMA_OTYP_DVADDR);
+
sinfo = &dma->dp_sglinfo;
/*
@@ -3706,15 +3901,7 @@ rootnex_setup_cookie(ddi_dma_obj_t *dmar_object, rootnex_dma_t *dma,
paddr = pfn_to_pa(hat_getpfnum(kas.a_hat,
dma->dp_pgmap[pidx].pm_cbaddr)) + poff;
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MAs instead of PAs.
- */
- cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- cookie->dmac_laddress = paddr;
-#endif
+ cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(paddr);
/* if we have a kernel VA, it's easy, just save that address */
if ((dmar_object->dmao_type != DMA_OTYP_PAGES) &&
@@ -4186,16 +4373,8 @@ rootnex_copybuf_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, dma->dp_cbaddr)) +
poff;
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MAs instead of PAs.
- */
(*windowp)->wd_trim.tr_first_paddr =
- ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- (*windowp)->wd_trim.tr_first_paddr = paddr;
-#endif
+ ROOTNEX_PADDR_TO_RBASE(paddr);
#if !defined(__amd64)
(*windowp)->wd_trim.tr_first_kaddr = dma->dp_kva;
@@ -4230,15 +4409,7 @@ rootnex_copybuf_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
paddr = pfn_to_pa(hat_getpfnum(kas.a_hat,
dma->dp_pgmap[pidx + 1].pm_cbaddr)) + poff;
-#ifdef __xpv
- /*
- * If we're dom0, we're using a real device so we need to load
- * the cookies with MAs instead of PAs.
- */
- cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
-#else
- cookie->dmac_laddress = paddr;
-#endif
+ cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(paddr);
#if !defined(__amd64)
ASSERT(dma->dp_pgmap[pidx + 1].pm_mapped == B_FALSE);
@@ -4392,7 +4563,7 @@ rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
e = rootnex_valid_sync_parms(hp, win, offset, size,
cache_flags);
if (e != DDI_SUCCESS) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_SYNC_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_SYNC_FAIL]);
return (DDI_FAILURE);
}
}
@@ -4439,7 +4610,7 @@ rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
if (cache_flags == DDI_DMA_SYNC_FORDEV) {
fromaddr = cbpage->pm_kaddr + poff;
toaddr = cbpage->pm_cbaddr + poff;
- DTRACE_PROBE2(rootnex__sync__dev,
+ ROOTNEX_DPROBE2(rootnex__sync__dev,
dev_info_t *, dma->dp_dip, size_t, psize);
/*
@@ -4450,7 +4621,7 @@ rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
} else {
fromaddr = cbpage->pm_cbaddr + poff;
toaddr = cbpage->pm_kaddr + poff;
- DTRACE_PROBE2(rootnex__sync__cpu,
+ ROOTNEX_DPROBE2(rootnex__sync__cpu,
dev_info_t *, dma->dp_dip, size_t, psize);
}
@@ -4554,6 +4725,7 @@ rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
rootnex_trim_t *trim;
ddi_dma_impl_t *hp;
rootnex_dma_t *dma;
+ ddi_dma_obj_t *dmao;
#if !defined(__amd64)
rootnex_sglinfo_t *sinfo;
rootnex_pgmap_t *pmap;
@@ -4572,10 +4744,12 @@ rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
/* If we try and get a window which doesn't exist, return failure */
if (win >= hp->dmai_nwin) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
+ ROOTNEX_DPROF_INC(&rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
return (DDI_FAILURE);
}
+ dmao = dma->dp_dvma_used ? &dma->dp_dma : &dma->dp_dvma;
+
/*
* if we don't have any windows, and they're asking for the first
* window, setup the cookie pointer to the first cookie in the bind.
@@ -4584,12 +4758,13 @@ rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
*/
if (dma->dp_window == NULL) {
if (win != 0) {
- ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
+ ROOTNEX_DPROF_INC(
+ &rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
return (DDI_FAILURE);
}
hp->dmai_cookie = dma->dp_cookies;
*offp = 0;
- *lenp = dma->dp_dma.dmao_size;
+ *lenp = dmao->dmao_size;
*ccountp = dma->dp_sglinfo.si_sgl_size;
*cookiep = hp->dmai_cookie[0];
hp->dmai_cookie++;
@@ -4781,6 +4956,37 @@ rootnex_dma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
cookiep, ccountp));
}
+#if defined(__amd64) && !defined(__xpv)
+/*ARGSUSED*/
+static int
+rootnex_coredma_hdl_setprivate(dev_info_t *dip, dev_info_t *rdip,
+ ddi_dma_handle_t handle, void *v)
+{
+ ddi_dma_impl_t *hp;
+ rootnex_dma_t *dma;
+
+ hp = (ddi_dma_impl_t *)handle;
+ dma = (rootnex_dma_t *)hp->dmai_private;
+ dma->dp_iommu_private = v;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static void *
+rootnex_coredma_hdl_getprivate(dev_info_t *dip, dev_info_t *rdip,
+ ddi_dma_handle_t handle)
+{
+ ddi_dma_impl_t *hp;
+ rootnex_dma_t *dma;
+
+ hp = (ddi_dma_impl_t *)handle;
+ dma = (rootnex_dma_t *)hp->dmai_private;
+
+ return (dma->dp_iommu_private);
+}
+#endif
+
/*
* ************************
* obsoleted dma routines
diff --git a/usr/src/uts/i86pc/ml/cpr_wakecode.s b/usr/src/uts/i86pc/ml/cpr_wakecode.s
index 917ce412aa..fc58cd5d2a 100644
--- a/usr/src/uts/i86pc/ml/cpr_wakecode.s
+++ b/usr/src/uts/i86pc/ml/cpr_wakecode.s
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/asm_linkage.h>
@@ -673,9 +672,8 @@ kernel_wc_code:
* Before proceeding, enable usage of the page table NX bit if
* that's how the page tables are set up.
*/
- movl x86_feature, %ecx
- andl $X86_NX, %ecx
- jz 1f
+ bt $X86FSET_NX, x86_featureset(%rip)
+ jnc 1f
movl $MSR_AMD_EFER, %ecx
rdmsr
orl $AMD_EFER_NXE, %eax
@@ -1092,9 +1090,8 @@ kernel_wc_code:
* Before proceeding, enable usage of the page table NX bit if
* that's how the page tables are set up.
*/
- movl x86_feature, %ecx
- andl $X86_NX, %ecx
- jz 1f
+ bt $X86FSET_NX, x86_featureset
+ jnc 1f
movl $MSR_AMD_EFER, %ecx
rdmsr
orl $AMD_EFER_NXE, %eax
diff --git a/usr/src/uts/i86pc/ml/genassym.c b/usr/src/uts/i86pc/ml/genassym.c
index 4836628401..a34ca50669 100644
--- a/usr/src/uts/i86pc/ml/genassym.c
+++ b/usr/src/uts/i86pc/ml/genassym.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _GENASSYM
@@ -123,6 +122,10 @@ main(int argc, char *argv[])
printf("#define\tFP_387 0x%x\n", FP_387);
printf("#define\t__FP_SSE 0x%x\n", __FP_SSE);
+ printf("#define\tFP_FNSAVE 0x%x\n", FP_FNSAVE);
+ printf("#define\tFP_FXSAVE 0x%x\n", FP_FXSAVE);
+ printf("#define\tFP_XSAVE 0x%x\n", FP_XSAVE);
+
printf("#define\tAV_INT_SPURIOUS 0x%x\n", AV_INT_SPURIOUS);
printf("#define\tCPU_READY 0x%x\n", CPU_READY);
diff --git a/usr/src/uts/i86pc/ml/interrupt.s b/usr/src/uts/i86pc/ml/interrupt.s
index 97f5acba2d..46cbf2f308 100644
--- a/usr/src/uts/i86pc/ml/interrupt.s
+++ b/usr/src/uts/i86pc/ml/interrupt.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s
index db016a55db..8aec1537e5 100644
--- a/usr/src/uts/i86pc/ml/locore.s
+++ b/usr/src/uts/i86pc/ml/locore.s
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -208,7 +207,7 @@ _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop)
/*
* (We just assert this works by virtue of being here)
*/
- orl $X86_CPUID, x86_feature(%rip)
+ bts $X86FSET_CPUID, x86_featureset(%rip)
/*
* mlsetup() gets called with a struct regs as argument, while
@@ -623,7 +622,7 @@ have_cpuid:
/*
* cpuid instruction present
*/
- orl $X86_CPUID, x86_feature
+ bts $X86FSET_CPUID, x86_featureset / Just to set; Ignore the CF
movl $0, %eax
cpuid
@@ -2340,7 +2339,7 @@ cpu_vendor:
.globl CyrixInstead
- .globl x86_feature
+ .globl x86_featureset
.globl x86_type
.globl x86_vendor
#endif
diff --git a/usr/src/uts/i86pc/ml/mpcore.s b/usr/src/uts/i86pc/ml/mpcore.s
index 25a943870e..3ff8aa9d6d 100644
--- a/usr/src/uts/i86pc/ml/mpcore.s
+++ b/usr/src/uts/i86pc/ml/mpcore.s
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -316,9 +315,8 @@ kernel_cs_code:
* Before going any further, enable usage of page table NX bit if
* that's how our page tables are set up.
*/
- movl x86_feature, %ecx
- andl $X86_NX, %ecx
- jz 1f
+ bt $X86FSET_NX, x86_featureset(%rip)
+ jnc 1f
movl $MSR_AMD_EFER, %ecx
rdmsr
orl $AMD_EFER_NXE, %eax
@@ -570,9 +568,8 @@ kernel_cs_code:
* Before going any further, enable usage of page table NX bit if
* that's how our page tables are set up.
*/
- movl x86_feature, %ecx
- andl $X86_NX, %ecx
- jz 1f
+ bt $X86FSET_NX, x86_featureset(%rip)
+ jnc 1f
movl $MSR_AMD_EFER, %ecx
rdmsr
orl $AMD_EFER_NXE, %eax
@@ -671,9 +668,8 @@ kernel_cs_code:
* Before going any further, enable usage of page table NX bit if
* that's how our page tables are set up.
*/
- movl x86_feature, %ecx
- andl $X86_NX, %ecx
- jz 1f
+ bt $X86FSET_NX, x86_featureset
+ jnc 1f
movl %cr4, %ecx
andl $CR4_PAE, %ecx
jz 1f
@@ -763,9 +759,8 @@ kernel_cs_code:
* Before going any farther, enable usage of page table NX bit if
* that's how our page tables are set up.
*/
- movl x86_feature, %ecx
- andl $X86_NX, %ecx
- jz 1f
+ bt $X86FSET_NX, x86_featureset
+ jnc 1f
movl %cr4, %ecx
andl $CR4_PAE, %ecx
jz 1f
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index ceefce6d3c..20e0c972d4 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -165,6 +165,7 @@ _klwp
fpu_ctx
fpu_regs FPU_CTX_FPU_REGS
fpu_flags FPU_CTX_FPU_FLAGS
+ fpu_xsave_mask FPU_CTX_FPU_XSAVE_MASK
fxsave_state FXSAVE_STATE_SIZE
fx_fsw FXSAVE_STATE_FSW
@@ -293,7 +294,6 @@ rm_platter
rm_pdbr CR3OFF
rm_cpu CPUNOFF
rm_cr4 CR4OFF
- rm_x86feature X86FEATURE
rm_cpu_halt_code CPUHALTCODEOFF
rm_cpu_halted CPUHALTEDOFF
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 103955a097..555ed9f842 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -66,6 +65,7 @@
#include <sys/reboot.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
+#include <sys/fp.h>
#define AFMT "%lx"
@@ -876,8 +876,6 @@ init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
real_mode_platter->rm_gdt_lim = gdt.limit;
#if defined(__amd64)
- real_mode_platter->rm_x86feature = x86_feature;
-
if (getcr3() > 0xffffffffUL)
panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
"located above 4G in physical memory (@ 0x%llx).",
@@ -943,10 +941,17 @@ i_cpr_start_cpu(void)
* We need to Sync PAT with cpu0's PAT. We have to do
* this with interrupts disabled.
*/
- if (x86_feature & X86_PAT)
+ if (is_x86_feature(x86_featureset, X86FSET_PAT))
pat_sync();
/*
+ * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
+ */
+ if (fp_save_mech == FP_XSAVE) {
+ setup_xfem();
+ }
+
+ /*
* Initialize this CPU's syscall handlers
*/
init_cpu_syscall(cp);
@@ -994,7 +999,7 @@ i_cpr_start_cpu(void)
* cmi already been init'd (during boot), so do not need to do it again
*/
#ifdef PM_REINITMCAONRESUME
- if (x86_feature & X86_MCA)
+ if (is_x86_feature(x86_featureset, X86FSET_MCA))
cmi_mca_init();
#endif
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 4a86da8c3b..1a472c9e93 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -22,7 +22,7 @@
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2010, Intel Corporation.
* All rights reserved.
*/
/*
@@ -48,8 +48,8 @@
#include <sys/pg.h>
#include <sys/fp.h>
#include <sys/controlregs.h>
-#include <sys/auxv_386.h>
#include <sys/bitmap.h>
+#include <sys/auxv_386.h>
#include <sys/memnode.h>
#include <sys/pci_cfgspace.h>
@@ -67,7 +67,7 @@
*
* Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
* for the boot CPU and does the basic analysis that the early kernel needs.
- * x86_feature is set based on the return value of cpuid_pass1() of the boot
+ * x86_featureset is set based on the return value of cpuid_pass1() of the boot
* CPU.
*
* Pass 1 includes:
@@ -111,7 +111,6 @@
* to the accessor code.
*/
-uint_t x86_feature = 0;
uint_t x86_vendor = X86_VENDOR_IntelClone;
uint_t x86_type = X86_TYPE_OTHER;
uint_t x86_clflush_size = 0;
@@ -119,7 +118,98 @@ uint_t x86_clflush_size = 0;
uint_t pentiumpro_bug4046376;
uint_t pentiumpro_bug4064495;
+uchar_t x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
+
+static char *x86_feature_names[NUM_X86_FEATURES] = {
+ "lgpg",
+ "tsc",
+ "msr",
+ "mtrr",
+ "pge",
+ "de",
+ "cmov",
+ "mmx",
+ "mca",
+ "pae",
+ "cv8",
+ "pat",
+ "sep",
+ "sse",
+ "sse2",
+ "htt",
+ "asysc",
+ "nx",
+ "sse3",
+ "cx16",
+ "cmp",
+ "tscp",
+ "mwait",
+ "sse4a",
+ "cpuid",
+ "ssse3",
+ "sse4_1",
+ "sse4_2",
+ "1gpg",
+ "clfsh",
+ "64",
+ "aes",
+ "pclmulqdq",
+ "xsave",
+ "avx" };
+
+boolean_t
+is_x86_feature(void *featureset, uint_t feature)
+{
+ ASSERT(feature < NUM_X86_FEATURES);
+ return (BT_TEST((ulong_t *)featureset, feature));
+}
+
+void
+add_x86_feature(void *featureset, uint_t feature)
+{
+ ASSERT(feature < NUM_X86_FEATURES);
+ BT_SET((ulong_t *)featureset, feature);
+}
+
+void
+remove_x86_feature(void *featureset, uint_t feature)
+{
+ ASSERT(feature < NUM_X86_FEATURES);
+ BT_CLEAR((ulong_t *)featureset, feature);
+}
+
+boolean_t
+compare_x86_featureset(void *setA, void *setB)
+{
+ /*
+ * We assume that the unused bits of the bitmap are always zero.
+ */
+ if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
+ return (B_TRUE);
+ } else {
+ return (B_FALSE);
+ }
+}
+
+void
+print_x86_featureset(void *featureset)
+{
+ uint_t i;
+
+ for (i = 0; i < NUM_X86_FEATURES; i++) {
+ if (is_x86_feature(featureset, i)) {
+ cmn_err(CE_CONT, "?x86_feature: %s\n",
+ x86_feature_names[i]);
+ }
+ }
+}
+
uint_t enable486;
+
+static size_t xsave_state_size = 0;
+uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
+boolean_t xsave_force_disable = B_FALSE;
+
/*
* This is set to platform type Solaris is running on.
*/
@@ -150,6 +240,23 @@ struct mwait_info {
};
/*
+ * xsave/xrestor info.
+ *
+ * This structure contains HW feature bits and size of the xsave save area.
+ * Note: the kernel will use the maximum size required for all hardware
+ * features. It is not optimize for potential memory savings if features at
+ * the end of the save area are not enabled.
+ */
+struct xsave_info {
+ uint32_t xsav_hw_features_low; /* Supported HW features */
+ uint32_t xsav_hw_features_high; /* Supported HW features */
+ size_t xsav_max_size; /* max size save area for HW features */
+ size_t ymm_size; /* AVX: size of ymm save area */
+ size_t ymm_offset; /* AVX: offset for ymm save area */
+};
+
+
+/*
* These constants determine how many of the elements of the
* cpuid we cache in the cpuid_info data structure; the
* remaining elements are accessible via the cpuid instruction.
@@ -230,6 +337,8 @@ struct cpuid_info {
uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
/* Intel: 1 */
+
+ struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
};
@@ -332,6 +441,12 @@ static struct cpuid_info cpuid_info0;
BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
/*
+ * XSAVE leaf 0xD enumeration
+ */
+#define CPUID_LEAFD_2_YMM_OFFSET 576
+#define CPUID_LEAFD_2_YMM_SIZE 256
+
+/*
* Functions we consune from cpuid_subr.c; don't publish these in a header
* file to try and keep people using the expected cpuid_* interfaces.
*/
@@ -542,7 +657,7 @@ is_controldom(void)
#endif /* __xpv */
static void
-cpuid_intel_getids(cpu_t *cpu, uint_t feature)
+cpuid_intel_getids(cpu_t *cpu, void *feature)
{
uint_t i;
uint_t chipid_shift = 0;
@@ -555,7 +670,7 @@ cpuid_intel_getids(cpu_t *cpu, uint_t feature)
cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
- if (feature & X86_CMP) {
+ if (is_x86_feature(feature, X86FSET_CMP)) {
/*
* Multi-core (and possibly multi-threaded)
* processors.
@@ -591,7 +706,7 @@ cpuid_intel_getids(cpu_t *cpu, uint_t feature)
coreid_shift++;
cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
- } else if (feature & X86_HTT) {
+ } else if (is_x86_feature(feature, X86FSET_HTT)) {
/*
* Single-core multi-threaded processors.
*/
@@ -718,11 +833,31 @@ cpuid_amd_getids(cpu_t *cpu)
}
}
-uint_t
-cpuid_pass1(cpu_t *cpu)
+/*
+ * Setup XFeature_Enabled_Mask register. Required by xsave feature.
+ */
+void
+setup_xfem(void)
+{
+ uint64_t flags = XFEATURE_LEGACY_FP;
+
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
+
+ if (is_x86_feature(x86_featureset, X86FSET_SSE))
+ flags |= XFEATURE_SSE;
+
+ if (is_x86_feature(x86_featureset, X86FSET_AVX))
+ flags |= XFEATURE_AVX;
+
+ set_xcr(XFEATURE_ENABLED_MASK, flags);
+
+ xsave_bv_all = flags;
+}
+
+void
+cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
{
uint32_t mask_ecx, mask_edx;
- uint_t feature = X86_CPUID;
struct cpuid_info *cpi;
struct cpuid_regs *cp;
int xcpuid;
@@ -730,15 +865,19 @@ cpuid_pass1(cpu_t *cpu)
extern int idle_cpu_prefer_mwait;
#endif
-
#if !defined(__xpv)
determine_platform();
#endif
/*
* Space statically allocated for BSP, ensure pointer is set
*/
- if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL)
- cpu->cpu_m.mcpu_cpi = &cpuid_info0;
+ if (cpu->cpu_id == 0) {
+ if (cpu->cpu_m.mcpu_cpi == NULL)
+ cpu->cpu_m.mcpu_cpi = &cpuid_info0;
+ }
+
+ add_x86_feature(featureset, X86FSET_CPUID);
+
cpi = cpu->cpu_m.mcpu_cpi;
ASSERT(cpi != NULL);
cp = &cpi->cpi_std[0];
@@ -977,8 +1116,18 @@ cpuid_pass1(cpu_t *cpu)
* Do not support MONITOR/MWAIT under a hypervisor
*/
mask_ecx &= ~CPUID_INTC_ECX_MON;
+ /*
+ * Do not support XSAVE under a hypervisor for now
+ */
+ xsave_force_disable = B_TRUE;
+
#endif /* __xpv */
+ if (xsave_force_disable) {
+ mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
+ mask_ecx &= ~CPUID_INTC_ECX_AVX;
+ }
+
/*
* Now we've figured out the masks that determine
* which bits we choose to believe, apply the masks
@@ -1004,58 +1153,91 @@ cpuid_pass1(cpu_t *cpu)
cp->cp_ecx |= cpuid_feature_ecx_include;
cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
- if (cp->cp_edx & CPUID_INTC_EDX_PSE)
- feature |= X86_LARGEPAGE;
- if (cp->cp_edx & CPUID_INTC_EDX_TSC)
- feature |= X86_TSC;
- if (cp->cp_edx & CPUID_INTC_EDX_MSR)
- feature |= X86_MSR;
- if (cp->cp_edx & CPUID_INTC_EDX_MTRR)
- feature |= X86_MTRR;
- if (cp->cp_edx & CPUID_INTC_EDX_PGE)
- feature |= X86_PGE;
- if (cp->cp_edx & CPUID_INTC_EDX_CMOV)
- feature |= X86_CMOV;
- if (cp->cp_edx & CPUID_INTC_EDX_MMX)
- feature |= X86_MMX;
+ if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
+ add_x86_feature(featureset, X86FSET_LARGEPAGE);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
+ add_x86_feature(featureset, X86FSET_TSC);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
+ add_x86_feature(featureset, X86FSET_MSR);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
+ add_x86_feature(featureset, X86FSET_MTRR);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
+ add_x86_feature(featureset, X86FSET_PGE);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
+ add_x86_feature(featureset, X86FSET_CMOV);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
+ add_x86_feature(featureset, X86FSET_MMX);
+ }
if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
- (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0)
- feature |= X86_MCA;
- if (cp->cp_edx & CPUID_INTC_EDX_PAE)
- feature |= X86_PAE;
- if (cp->cp_edx & CPUID_INTC_EDX_CX8)
- feature |= X86_CX8;
- if (cp->cp_ecx & CPUID_INTC_ECX_CX16)
- feature |= X86_CX16;
- if (cp->cp_edx & CPUID_INTC_EDX_PAT)
- feature |= X86_PAT;
- if (cp->cp_edx & CPUID_INTC_EDX_SEP)
- feature |= X86_SEP;
+ (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
+ add_x86_feature(featureset, X86FSET_MCA);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
+ add_x86_feature(featureset, X86FSET_PAE);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
+ add_x86_feature(featureset, X86FSET_CX8);
+ }
+ if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
+ add_x86_feature(featureset, X86FSET_CX16);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
+ add_x86_feature(featureset, X86FSET_PAT);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
+ add_x86_feature(featureset, X86FSET_SEP);
+ }
if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
/*
* In our implementation, fxsave/fxrstor
* are prerequisites before we'll even
* try and do SSE things.
*/
- if (cp->cp_edx & CPUID_INTC_EDX_SSE)
- feature |= X86_SSE;
- if (cp->cp_edx & CPUID_INTC_EDX_SSE2)
- feature |= X86_SSE2;
- if (cp->cp_ecx & CPUID_INTC_ECX_SSE3)
- feature |= X86_SSE3;
+ if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
+ add_x86_feature(featureset, X86FSET_SSE);
+ }
+ if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
+ add_x86_feature(featureset, X86FSET_SSE2);
+ }
+ if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
+ add_x86_feature(featureset, X86FSET_SSE3);
+ }
if (cpi->cpi_vendor == X86_VENDOR_Intel) {
- if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3)
- feature |= X86_SSSE3;
- if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1)
- feature |= X86_SSE4_1;
- if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2)
- feature |= X86_SSE4_2;
- if (cp->cp_ecx & CPUID_INTC_ECX_AES)
- feature |= X86_AES;
+ if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
+ add_x86_feature(featureset, X86FSET_SSSE3);
+ }
+ if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
+ add_x86_feature(featureset, X86FSET_SSE4_1);
+ }
+ if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
+ add_x86_feature(featureset, X86FSET_SSE4_2);
+ }
+ if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
+ add_x86_feature(featureset, X86FSET_AES);
+ }
+ if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
+ add_x86_feature(featureset, X86FSET_PCLMULQDQ);
+ }
+
+ if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
+ add_x86_feature(featureset, X86FSET_XSAVE);
+ /* We only test AVX when there is XSAVE */
+ if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
+ add_x86_feature(featureset,
+ X86FSET_AVX);
+ }
+ }
}
}
- if (cp->cp_edx & CPUID_INTC_EDX_DE)
- feature |= X86_DE;
+ if (cp->cp_edx & CPUID_INTC_EDX_DE) {
+ add_x86_feature(featureset, X86FSET_DE);
+ }
#if !defined(__xpv)
if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
@@ -1065,7 +1247,7 @@ cpuid_pass1(cpu_t *cpu)
*/
if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
cpi->cpi_mwait.support |= MWAIT_SUPPORT;
- feature |= X86_MWAIT;
+ add_x86_feature(featureset, X86FSET_MWAIT);
} else {
extern int idle_cpu_assert_cflush_monitor;
@@ -1086,11 +1268,10 @@ cpuid_pass1(cpu_t *cpu)
* we only capture this for the bootcpu.
*/
if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
- feature |= X86_CLFSH;
+ add_x86_feature(featureset, X86FSET_CLFSH);
x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
}
-
- if (feature & X86_PAE)
+ if (is_x86_feature(featureset, X86FSET_PAE))
cpi->cpi_pabits = 36;
/*
@@ -1105,7 +1286,7 @@ cpuid_pass1(cpu_t *cpu)
if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
if (cpi->cpi_ncpu_per_chip > 1)
- feature |= X86_HTT;
+ add_x86_feature(featureset, X86FSET_HTT);
} else {
cpi->cpi_ncpu_per_chip = 1;
}
@@ -1180,27 +1361,31 @@ cpuid_pass1(cpu_t *cpu)
/*
* Compute the additions to the kernel's feature word.
*/
- if (cp->cp_edx & CPUID_AMD_EDX_NX)
- feature |= X86_NX;
+ if (cp->cp_edx & CPUID_AMD_EDX_NX) {
+ add_x86_feature(featureset, X86FSET_NX);
+ }
/*
* Regardless whether or not we boot 64-bit,
* we should have a way to identify whether
* the CPU is capable of running 64-bit.
*/
- if (cp->cp_edx & CPUID_AMD_EDX_LM)
- feature |= X86_64;
+ if (cp->cp_edx & CPUID_AMD_EDX_LM) {
+ add_x86_feature(featureset, X86FSET_64);
+ }
#if defined(__amd64)
/* 1 GB large page - enable only for 64 bit kernel */
- if (cp->cp_edx & CPUID_AMD_EDX_1GPG)
- feature |= X86_1GPG;
+ if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
+ add_x86_feature(featureset, X86FSET_1GPG);
+ }
#endif
if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
(cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
- (cp->cp_ecx & CPUID_AMD_ECX_SSE4A))
- feature |= X86_SSE4A;
+ (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
+ add_x86_feature(featureset, X86FSET_SSE4A);
+ }
/*
* If both the HTT and CMP_LGCY bits are set,
@@ -1208,10 +1393,10 @@ cpuid_pass1(cpu_t *cpu)
* "AMD CPUID Specification" for more details.
*/
if (cpi->cpi_vendor == X86_VENDOR_AMD &&
- (feature & X86_HTT) &&
+ is_x86_feature(featureset, X86FSET_HTT) &&
(cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
- feature &= ~X86_HTT;
- feature |= X86_CMP;
+ remove_x86_feature(featureset, X86FSET_HTT);
+ add_x86_feature(featureset, X86FSET_CMP);
}
#if defined(__amd64)
/*
@@ -1220,19 +1405,22 @@ cpuid_pass1(cpu_t *cpu)
* instead. In the amd64 kernel, things are -way-
* better.
*/
- if (cp->cp_edx & CPUID_AMD_EDX_SYSC)
- feature |= X86_ASYSC;
+ if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
+ add_x86_feature(featureset, X86FSET_ASYSC);
+ }
/*
* While we're thinking about system calls, note
* that AMD processors don't support sysenter
* in long mode at all, so don't try to program them.
*/
- if (x86_vendor == X86_VENDOR_AMD)
- feature &= ~X86_SEP;
+ if (x86_vendor == X86_VENDOR_AMD) {
+ remove_x86_feature(featureset, X86FSET_SEP);
+ }
#endif
- if (cp->cp_edx & CPUID_AMD_EDX_TSCP)
- feature |= X86_TSCP;
+ if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
+ add_x86_feature(featureset, X86FSET_TSCP);
+ }
break;
default:
break;
@@ -1327,20 +1515,22 @@ cpuid_pass1(cpu_t *cpu)
/*
* If more than one core, then this processor is CMP.
*/
- if (cpi->cpi_ncore_per_chip > 1)
- feature |= X86_CMP;
+ if (cpi->cpi_ncore_per_chip > 1) {
+ add_x86_feature(featureset, X86FSET_CMP);
+ }
/*
* If the number of cores is the same as the number
* of CPUs, then we cannot have HyperThreading.
*/
- if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip)
- feature &= ~X86_HTT;
+ if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
+ remove_x86_feature(featureset, X86FSET_HTT);
+ }
cpi->cpi_apicid = CPI_APIC_ID(cpi);
cpi->cpi_procnodes_per_pkg = 1;
-
- if ((feature & (X86_HTT | X86_CMP)) == 0) {
+ if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
+ is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
/*
* Single-core single-threaded processors.
*/
@@ -1354,7 +1544,7 @@ cpuid_pass1(cpu_t *cpu)
cpi->cpi_procnodeid = cpi->cpi_chipid;
} else if (cpi->cpi_ncpu_per_chip > 1) {
if (cpi->cpi_vendor == X86_VENDOR_Intel)
- cpuid_intel_getids(cpu, feature);
+ cpuid_intel_getids(cpu, featureset);
else if (cpi->cpi_vendor == X86_VENDOR_AMD)
cpuid_amd_getids(cpu);
else {
@@ -1380,7 +1570,6 @@ cpuid_pass1(cpu_t *cpu)
pass1_done:
cpi->cpi_pass = 1;
- return (feature);
}
/*
@@ -1587,6 +1776,92 @@ cpuid_pass2(cpu_t *cpu)
cp = NULL;
}
+ /*
+ * XSAVE enumeration
+ */
+ if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) {
+ struct cpuid_regs regs;
+ boolean_t cpuid_d_valid = B_TRUE;
+
+ cp = &regs;
+ cp->cp_eax = 0xD;
+ cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
+
+ (void) __cpuid_insn(cp);
+
+ /*
+ * Sanity checks for debug
+ */
+ if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
+ (cp->cp_eax & XFEATURE_SSE) == 0) {
+ cpuid_d_valid = B_FALSE;
+ }
+
+ cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
+ cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
+ cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
+
+ /*
+ * If the hw supports AVX, get the size and offset in the save
+ * area for the ymm state.
+ */
+ if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
+ cp->cp_eax = 0xD;
+ cp->cp_ecx = 2;
+ cp->cp_edx = cp->cp_ebx = 0;
+
+ (void) __cpuid_insn(cp);
+
+ if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
+ cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
+ cpuid_d_valid = B_FALSE;
+ }
+
+ cpi->cpi_xsave.ymm_size = cp->cp_eax;
+ cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+ xsave_state_size = 0;
+ } else if (cpuid_d_valid) {
+ xsave_state_size = cpi->cpi_xsave.xsav_max_size;
+ } else {
+ /* Broken CPUID 0xD, probably in HVM */
+ cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
+ "value: hw_low = %d, hw_high = %d, xsave_size = %d"
+ ", ymm_size = %d, ymm_offset = %d\n",
+ cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
+ cpi->cpi_xsave.xsav_hw_features_high,
+ (int)cpi->cpi_xsave.xsav_max_size,
+ (int)cpi->cpi_xsave.ymm_size,
+ (int)cpi->cpi_xsave.ymm_offset);
+
+ if (xsave_state_size != 0) {
+ /*
+ * This must be a non-boot CPU. We cannot
+ * continue, because boot cpu has already
+ * enabled XSAVE.
+ */
+ ASSERT(cpu->cpu_id != 0);
+ cmn_err(CE_PANIC, "cpu%d: we have already "
+ "enabled XSAVE on boot cpu, cannot "
+ "continue.", cpu->cpu_id);
+ } else {
+ /*
+ * Must be from boot CPU, OK to disable XSAVE.
+ */
+ ASSERT(cpu->cpu_id == 0);
+ remove_x86_feature(x86_featureset,
+ X86FSET_XSAVE);
+ remove_x86_feature(x86_featureset, X86FSET_AVX);
+ CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
+ CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
+ xsave_force_disable = B_TRUE;
+ }
+ }
+ }
+
+
if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
goto pass2_done;
@@ -1703,7 +1978,7 @@ intel_cpubrand(const struct cpuid_info *cpi)
{
int i;
- if ((x86_feature & X86_CPUID) == 0 ||
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
return ("i486");
@@ -1837,7 +2112,7 @@ intel_cpubrand(const struct cpuid_info *cpi)
static const char *
amd_cpubrand(const struct cpuid_info *cpi)
{
- if ((x86_feature & X86_CPUID) == 0 ||
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
return ("i486 compatible");
@@ -1907,7 +2182,7 @@ amd_cpubrand(const struct cpuid_info *cpi)
static const char *
cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
{
- if ((x86_feature & X86_CPUID) == 0 ||
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
type == X86_TYPE_CYRIX_486)
return ("i486 compatible");
@@ -2224,29 +2499,36 @@ cpuid_pass4(cpu_t *cpu)
/*
* [these require explicit kernel support]
*/
- if ((x86_feature & X86_SEP) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SEP))
*edx &= ~CPUID_INTC_EDX_SEP;
- if ((x86_feature & X86_SSE) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE))
*edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
- if ((x86_feature & X86_SSE2) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
*edx &= ~CPUID_INTC_EDX_SSE2;
- if ((x86_feature & X86_HTT) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_HTT))
*edx &= ~CPUID_INTC_EDX_HTT;
- if ((x86_feature & X86_SSE3) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
*ecx &= ~CPUID_INTC_ECX_SSE3;
if (cpi->cpi_vendor == X86_VENDOR_Intel) {
- if ((x86_feature & X86_SSSE3) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
*ecx &= ~CPUID_INTC_ECX_SSSE3;
- if ((x86_feature & X86_SSE4_1) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
*ecx &= ~CPUID_INTC_ECX_SSE4_1;
- if ((x86_feature & X86_SSE4_2) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
*ecx &= ~CPUID_INTC_ECX_SSE4_2;
- if ((x86_feature & X86_AES) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_AES))
*ecx &= ~CPUID_INTC_ECX_AES;
+ if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
+ *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
+ if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
+ *ecx &= ~(CPUID_INTC_ECX_XSAVE |
+ CPUID_INTC_ECX_OSXSAVE);
+ if (!is_x86_feature(x86_featureset, X86FSET_AVX))
+ *ecx &= ~CPUID_INTC_ECX_AVX;
}
/*
@@ -2280,6 +2562,9 @@ cpuid_pass4(cpu_t *cpu)
hwcap_flags |= AV_386_AES;
if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
hwcap_flags |= AV_386_PCLMULQDQ;
+ if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
+ (*ecx & CPUID_INTC_ECX_OSXSAVE))
+ hwcap_flags |= AV_386_XSAVE;
}
if (*ecx & CPUID_INTC_ECX_POPCNT)
hwcap_flags |= AV_386_POPCNT;
@@ -2326,14 +2611,14 @@ cpuid_pass4(cpu_t *cpu)
*/
switch (cpi->cpi_vendor) {
case X86_VENDOR_Intel:
- if ((x86_feature & X86_TSCP) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
*edx &= ~CPUID_AMD_EDX_TSCP;
break;
case X86_VENDOR_AMD:
- if ((x86_feature & X86_TSCP) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
*edx &= ~CPUID_AMD_EDX_TSCP;
- if ((x86_feature & X86_SSE4A) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
*ecx &= ~CPUID_AMD_ECX_SSE4A;
break;
@@ -2349,7 +2634,7 @@ cpuid_pass4(cpu_t *cpu)
*edx &= ~(CPUID_AMD_EDX_MMXamd |
CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
- if ((x86_feature & X86_NX) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_NX))
*edx &= ~CPUID_AMD_EDX_NX;
#if !defined(__amd64)
*edx &= ~CPUID_AMD_EDX_LM;
@@ -3340,7 +3625,7 @@ intel_walk_cacheinfo(struct cpuid_info *cpi,
des_b1_ct.ct_code = 0xb1;
des_b1_ct.ct_assoc = 4;
des_b1_ct.ct_line_size = 0;
- if (x86_feature & X86_PAE) {
+ if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
des_b1_ct.ct_size = 8;
des_b1_ct.ct_label = itlb2M_str;
} else {
@@ -3687,7 +3972,7 @@ cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
"clock-frequency", (int)mul);
}
- if ((x86_feature & X86_CPUID) == 0) {
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
return;
}
@@ -4083,7 +4368,7 @@ cpuid_deep_cstates_supported(void)
cpi = CPU->cpu_m.mcpu_cpi;
- if (!(x86_feature & X86_CPUID))
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
return (0);
switch (cpi->cpi_vendor) {
@@ -4134,6 +4419,31 @@ post_startup_cpu_fixups(void)
}
/*
+ * Setup necessary registers to enable XSAVE feature on this processor.
+ * This function needs to be called early enough, so that no xsave/xrstor
+ * ops will execute on the processor before the MSRs are properly set up.
+ *
+ * Current implementation has the following assumption:
+ * - cpuid_pass1() is done, so that X86 features are known.
+ * - fpu_probe() is done, so that fp_save_mech is chosen.
+ */
+void
+xsave_setup_msr(cpu_t *cpu)
+{
+ ASSERT(fp_save_mech == FP_XSAVE);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
+
+ /* Enable OSXSAVE in CR4. */
+ setcr4(getcr4() | CR4_OSXSAVE);
+ /*
+ * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
+ * correct value.
+ */
+ cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
+ setup_xfem();
+}
+
+/*
* Starting with the Westmere processor the local
* APIC timer will continue running in all C-states,
* including the deepest C-states.
@@ -4145,7 +4455,7 @@ cpuid_arat_supported(void)
struct cpuid_regs regs;
ASSERT(cpuid_checkpass(CPU, 1));
- ASSERT(x86_feature & X86_CPUID);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
cpi = CPU->cpu_m.mcpu_cpi;
@@ -4178,7 +4488,8 @@ cpuid_iepb_supported(struct cpu *cp)
ASSERT(cpuid_checkpass(cp, 1));
- if (!(x86_feature & X86_CPUID) || !(x86_feature & X86_MSR)) {
+ if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
+ !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
return (0);
}
@@ -4194,6 +4505,38 @@ cpuid_iepb_supported(struct cpu *cp)
return (regs.cp_ecx & CPUID_EPB_SUPPORT);
}
+/*
+ * Check support for TSC deadline timer
+ *
+ * TSC deadline timer provides a superior software programming
+ * model over local APIC timer that eliminates "time drifts".
+ * Instead of specifying a relative time, software specifies an
+ * absolute time as the target at which the processor should
+ * generate a timer event.
+ */
+int
+cpuid_deadline_tsc_supported(void)
+{
+ struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
+ struct cpuid_regs regs;
+
+ ASSERT(cpuid_checkpass(CPU, 1));
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
+
+ switch (cpi->cpi_vendor) {
+ case X86_VENDOR_Intel:
+ if (cpi->cpi_maxeax >= 1) {
+ regs.cp_eax = 1;
+ (void) cpuid_insn(NULL, &regs);
+ return (regs.cp_ecx & CPUID_DEADLINE_TSC);
+ } else {
+ return (0);
+ }
+ default:
+ return (0);
+ }
+}
+
#if defined(__amd64) && !defined(__xpv)
/*
* Patch in versions of bcopy for high performance Intel Nhm processors
@@ -4205,7 +4548,8 @@ patch_memops(uint_t vendor)
size_t cnt, i;
caddr_t to, from;
- if ((vendor == X86_VENDOR_Intel) && ((x86_feature & X86_SSE4_2) != 0)) {
+ if ((vendor == X86_VENDOR_Intel) &&
+ is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
cnt = &bcopy_patch_end - &bcopy_patch_start;
to = &bcopy_ck_size;
from = &bcopy_patch_start;
diff --git a/usr/src/uts/i86pc/os/cpupm/pwrnow.c b/usr/src/uts/i86pc/os/cpupm/pwrnow.c
index 8840d8ce34..d403c69e34 100644
--- a/usr/src/uts/i86pc/os/cpupm/pwrnow.c
+++ b/usr/src/uts/i86pc/os/cpupm/pwrnow.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/x86_archext.h>
@@ -217,8 +216,8 @@ pwrnow_supported()
struct cpuid_regs cpu_regs;
/* Required features */
- if (!(x86_feature & X86_CPUID) ||
- !(x86_feature & X86_MSR)) {
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
+ !is_x86_feature(x86_featureset, X86FSET_MSR)) {
PWRNOW_DEBUG(("No CPUID or MSR support."));
return (B_FALSE);
}
diff --git a/usr/src/uts/i86pc/os/cpupm/speedstep.c b/usr/src/uts/i86pc/os/cpupm/speedstep.c
index 151fdc79ae..2be4529a83 100644
--- a/usr/src/uts/i86pc/os/cpupm/speedstep.c
+++ b/usr/src/uts/i86pc/os/cpupm/speedstep.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2009, Intel Corporation.
@@ -444,8 +443,8 @@ speedstep_supported(uint_t family, uint_t model)
struct cpuid_regs cpu_regs;
/* Required features */
- if (!(x86_feature & X86_CPUID) ||
- !(x86_feature & X86_MSR)) {
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
+ !is_x86_feature(x86_featureset, X86FSET_MSR)) {
return (B_FALSE);
}
@@ -476,8 +475,8 @@ turbo_supported(void)
struct cpuid_regs cpu_regs;
/* Required features */
- if (!(x86_feature & X86_CPUID) ||
- !(x86_feature & X86_MSR)) {
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
+ !is_x86_feature(x86_featureset, X86FSET_MSR)) {
return (B_FALSE);
}
diff --git a/usr/src/uts/i86pc/os/ddi_impl.c b/usr/src/uts/i86pc/os/ddi_impl.c
index c1a27cc466..a1ae318703 100644
--- a/usr/src/uts/i86pc/os/ddi_impl.c
+++ b/usr/src/uts/i86pc/os/ddi_impl.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -1557,7 +1556,8 @@ i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
* If write-combining is not supported, then it falls back
* to uncacheable.
*/
- if (cache_attr == IOMEM_DATA_UC_WR_COMBINE && !(x86_feature & X86_PAT))
+ if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
+ !is_x86_feature(x86_featureset, X86FSET_PAT))
cache_attr = IOMEM_DATA_UNCACHED;
/*
diff --git a/usr/src/uts/i86pc/os/fastboot.c b/usr/src/uts/i86pc/os/fastboot.c
index 4cedb0be28..1520a6653c 100644
--- a/usr/src/uts/i86pc/os/fastboot.c
+++ b/usr/src/uts/i86pc/os/fastboot.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -601,7 +600,7 @@ fastboot_build_mbi(char *mdep, fastboot_info_t *nk)
static void
fastboot_init_fields(fastboot_info_t *nk)
{
- if (x86_feature & X86_PAE) {
+ if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
nk->fi_has_pae = 1;
nk->fi_shift_amt = fastboot_shift_amt_pae;
nk->fi_ptes_per_table = 512;
@@ -1155,9 +1154,11 @@ load_kernel_retry:
goto err_out;
}
- if ((x86_feature & X86_64) == 0 ||
- (x86_feature & X86_PAE) == 0) {
- cmn_err(CE_NOTE, "!Fastboot: Cannot "
+ if (!is_x86_feature(x86_featureset,
+ X86FSET_64) ||
+ !is_x86_feature(x86_featureset,
+ X86FSET_PAE)) {
+ cmn_err(CE_NOTE, "Fastboot: Cannot "
"reboot to %s: "
"not a 64-bit capable system",
kern_bootfile);
diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c
index 11f226a1eb..0598b913f1 100644
--- a/usr/src/uts/i86pc/os/fpu_subr.c
+++ b/usr/src/uts/i86pc/os/fpu_subr.c
@@ -20,12 +20,9 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Floating point configuration.
*/
@@ -51,6 +48,15 @@ int fpu_exists = 1;
int fp_kind = FP_387;
/*
+ * Mechanism to save FPU state.
+ */
+#if defined(__amd64)
+int fp_save_mech = FP_FXSAVE;
+#elif defined(__i386)
+int fp_save_mech = FP_FNSAVE;
+#endif
+
+/*
* The variable fpu_ignored is provided to allow other code to
* determine whether emulation is being done because there is
* no FPU or because of an override requested via /etc/system.
@@ -142,24 +148,59 @@ fpu_probe(void)
*
* (Perhaps we should complain more about this case!)
*/
- if ((x86_feature & X86_SSE|X86_SSE2) == (X86_SSE|X86_SSE2)) {
- fp_kind = __FP_SSE;
+ if (is_x86_feature(x86_featureset, X86FSET_SSE) &&
+ is_x86_feature(x86_featureset, X86FSET_SSE2)) {
+ fp_kind |= __FP_SSE;
ENABLE_SSE();
+
+ if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
+ ASSERT(is_x86_feature(x86_featureset,
+ X86FSET_XSAVE));
+ fp_kind |= __FP_AVX;
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+ fp_save_mech = FP_XSAVE;
+ fpsave_ctxt = xsave_ctxt;
+ patch_xsave();
+ }
}
#elif defined(__i386)
/*
* SSE and SSE2 are both optional, and we patch kernel
* code to exploit it when present.
*/
- if (x86_feature & X86_SSE) {
- fp_kind = __FP_SSE;
+ if (is_x86_feature(x86_featureset, X86FSET_SSE)) {
+ fp_kind |= __FP_SSE;
+ ENABLE_SSE();
+ fp_save_mech = FP_FXSAVE;
fpsave_ctxt = fpxsave_ctxt;
- patch_sse();
- if (x86_feature & X86_SSE2)
+
+ if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
patch_sse2();
- ENABLE_SSE();
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
+ ASSERT(is_x86_feature(x86_featureset,
+ X86FSET_XSAVE));
+ fp_kind |= __FP_AVX;
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+ fp_save_mech = FP_XSAVE;
+ fpsave_ctxt = xsave_ctxt;
+ patch_xsave();
+ } else {
+ patch_sse(); /* use fxrstor */
+ }
} else {
- x86_feature &= ~X86_SSE2;
+ remove_x86_feature(x86_featureset, X86FSET_SSE2);
+ /*
+ * We will not likely to have a chip with AVX but not
+ * SSE. But to be safe we disable AVX if SSE is not
+ * enabled.
+ */
+ remove_x86_feature(x86_featureset, X86FSET_AVX);
/*
* (Just in case the BIOS decided we wanted SSE
* enabled when we didn't. See 4965674.)
@@ -167,11 +208,11 @@ fpu_probe(void)
DISABLE_SSE();
}
#endif
- if (x86_feature & X86_SSE2) {
+ if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1;
}
- if (fp_kind == __FP_SSE) {
+ if (fp_kind & __FP_SSE) {
struct fxsave_state *fx;
uint8_t fxsave_state[sizeof (struct fxsave_state) +
XMM_ALIGN];
diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c
index faaecd20b6..91d7afcf36 100644
--- a/usr/src/uts/i86pc/os/intr.c
+++ b/usr/src/uts/i86pc/os/intr.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/cpuvar.h>
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index 02596478c0..ac647bea16 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -177,7 +176,7 @@
#include <sys/thread.h>
#include <sys/types.h>
#include <sys/var.h>
-#include <sys/x86_archext.h> /* for x86_feature and X86_AMD */
+#include <sys/x86_archext.h>
#include <vm/hat_i86.h>
#include <vm/seg_kmem.h>
#include <vm/vm_dep.h>
diff --git a/usr/src/uts/i86pc/os/machdep.c b/usr/src/uts/i86pc/os/machdep.c
index 38c9f7159f..dcc82d6d9b 100644
--- a/usr/src/uts/i86pc/os/machdep.c
+++ b/usr/src/uts/i86pc/os/machdep.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -1161,7 +1161,7 @@ get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
int
checked_rdmsr(uint_t msr, uint64_t *value)
{
- if ((x86_feature & X86_MSR) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_MSR))
return (ENOTSUP);
*value = rdmsr(msr);
return (0);
@@ -1174,7 +1174,7 @@ checked_rdmsr(uint_t msr, uint64_t *value)
int
checked_wrmsr(uint_t msr, uint64_t value)
{
- if ((x86_feature & X86_MSR) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_MSR))
return (ENOTSUP);
wrmsr(msr, value);
return (0);
@@ -1226,9 +1226,9 @@ num_phys_pages()
/* cpu threshold for compressed dumps */
#ifdef _LP64
-uint_t dump_plat_mincpu = DUMP_PLAT_X86_64_MINCPU;
+uint_t dump_plat_mincpu_default = DUMP_PLAT_X86_64_MINCPU;
#else
-uint_t dump_plat_mincpu = DUMP_PLAT_X86_32_MINCPU;
+uint_t dump_plat_mincpu_default = DUMP_PLAT_X86_32_MINCPU;
#endif
int
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index bf57dd3c50..2f7e271c29 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -180,13 +179,13 @@ mlsetup(struct regs *rp)
* was done in locore before mlsetup was called. Do the next
* pass in C code.
*
- * The x86_feature bits are set here on the basis of the capabilities
+ * The x86_featureset is initialized here based on the capabilities
* of the boot CPU. Note that if we choose to support CPUs that have
* different feature sets (at which point we would almost certainly
* want to set the feature bits to correspond to the feature
* minimum) this value may be altered.
*/
- x86_feature = cpuid_pass1(cpu[0]);
+ cpuid_pass1(cpu[0], x86_featureset);
#if !defined(__xpv)
@@ -212,13 +211,16 @@ mlsetup(struct regs *rp)
* The Xen hypervisor does not correctly report whether rdtscp is
* supported or not, so we must assume that it is not.
*/
- if (get_hwenv() != HW_XEN_HVM && (x86_feature & X86_TSCP))
+ if (get_hwenv() != HW_XEN_HVM &&
+ is_x86_feature(x86_featureset, X86FSET_TSCP))
patch_tsc_read(X86_HAVE_TSCP);
else if (cpuid_getvendor(CPU) == X86_VENDOR_AMD &&
- cpuid_getfamily(CPU) <= 0xf && (x86_feature & X86_SSE2) != 0)
+ cpuid_getfamily(CPU) <= 0xf &&
+ is_x86_feature(x86_featureset, X86FSET_SSE2))
patch_tsc_read(X86_TSC_MFENCE);
else if (cpuid_getvendor(CPU) == X86_VENDOR_Intel &&
- cpuid_getfamily(CPU) <= 6 && (x86_feature & X86_SSE2) != 0)
+ cpuid_getfamily(CPU) <= 6 &&
+ is_x86_feature(x86_featureset, X86FSET_SSE2))
patch_tsc_read(X86_TSC_LFENCE);
#endif /* !__xpv */
@@ -229,7 +231,7 @@ mlsetup(struct regs *rp)
* or at least they do not implement it correctly. Patch them to
* return 0.
*/
- if ((x86_feature & X86_TSC) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_TSC))
patch_tsc_read(X86_NO_TSC);
#endif /* __i386 && !__xpv */
@@ -246,13 +248,13 @@ mlsetup(struct regs *rp)
* (the cpuid) for the rdtscp instruction on appropriately
* capable hardware.
*/
- if (x86_feature & X86_TSC)
+ if (is_x86_feature(x86_featureset, X86FSET_TSC))
setcr4(getcr4() & ~CR4_TSD);
- if (x86_feature & X86_TSCP)
+ if (is_x86_feature(x86_featureset, X86FSET_TSCP))
(void) wrmsr(MSR_AMD_TSCAUX, 0);
- if (x86_feature & X86_DE)
+ if (is_x86_feature(x86_featureset, X86FSET_DE))
setcr4(getcr4() | CR4_DE);
#endif /* __xpv */
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index 9f9c3aae4a..d7aab080a6 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -20,7 +20,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2009-2010, Intel Corporation.
@@ -237,7 +237,7 @@ pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw)
{
switch (hw) {
case PGHW_IPIPE:
- if (x86_feature & (X86_HTT)) {
+ if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
/*
* Hyper-threading is SMT
*/
@@ -251,7 +251,8 @@ pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw)
else
return (0);
case PGHW_CHIP:
- if (x86_feature & (X86_CMP|X86_HTT))
+ if (is_x86_feature(x86_featureset, X86FSET_CMP) ||
+ is_x86_feature(x86_featureset, X86FSET_HTT))
return (1);
else
return (0);
@@ -1017,7 +1018,8 @@ mach_init()
idle_cpu = cpu_idle_adaptive;
CPU->cpu_m.mcpu_idle_cpu = cpu_idle;
#ifndef __xpv
- if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
+ if (is_x86_feature(x86_featureset, X86FSET_MWAIT) &&
+ idle_cpu_prefer_mwait) {
CPU->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU);
/*
* Protect ourself from insane mwait size.
@@ -1130,7 +1132,8 @@ mach_smpinit(void)
if (idle_cpu_use_hlt) {
disp_enq_thread = cpu_wakeup;
#ifndef __xpv
- if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait)
+ if (is_x86_feature(x86_featureset, X86FSET_MWAIT) &&
+ idle_cpu_prefer_mwait)
disp_enq_thread = cpu_wakeup_mwait;
non_deep_idle_disp_enq_thread = disp_enq_thread;
#endif
@@ -1239,7 +1242,7 @@ mach_getcpufreq(void)
uint32_t pit_counter;
uint64_t processor_clks;
- if (x86_feature & X86_TSC) {
+ if (is_x86_feature(x86_featureset, X86FSET_TSC)) {
/*
* We have a TSC. freq_tsc() knows how to measure the number
* of clock cycles sampled against the PIT.
@@ -1411,7 +1414,7 @@ mach_clkinit(int preferred_mode, int *set_mode)
cpu_freq = machhztomhz(cpu_freq_hz);
- if (!(x86_feature & X86_TSC) || (cpu_freq == 0))
+ if (!is_x86_feature(x86_featureset, X86FSET_TSC) || (cpu_freq == 0))
tsc_gethrtime_enable = 0;
#ifndef __xpv
diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c
index bc21812187..0429b463f6 100644
--- a/usr/src/uts/i86pc/os/mp_pc.c
+++ b/usr/src/uts/i86pc/os/mp_pc.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -313,7 +312,6 @@ mach_cpucontext_xalloc(struct cpu *cp, int optype)
*/
rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
rm->rm_cpu = cp->cpu_id;
- rm->rm_x86feature = x86_feature;
/*
* For hot-adding CPU at runtime, Machine Check and Performance Counter
@@ -624,7 +622,7 @@ out_enable_cmi:
if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
- if (x86_feature & X86_MCA)
+ if (is_x86_feature(x86_featureset, X86FSET_MCA))
cmi_mca_init(hdl);
cp->cpu_m.mcpu_cmi_hdl = hdl;
}
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 146f879a66..843c5ae73a 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -165,7 +164,8 @@ init_cpu_syscall(struct cpu *cp)
kpreempt_disable();
#if defined(__amd64)
- if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC)) {
+ if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
+ is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
#if !defined(__lint)
/*
@@ -205,7 +205,8 @@ init_cpu_syscall(struct cpu *cp)
* On 64-bit kernels on Nocona machines, the 32-bit syscall
* variant isn't available to 32-bit applications, but sysenter is.
*/
- if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP)) {
+ if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
+ is_x86_feature(x86_featureset, X86FSET_SEP)) {
#if !defined(__lint)
/*
@@ -415,7 +416,8 @@ mp_cpu_configure_common(int cpun, boolean_t boot)
*/
cpuid_alloc_space(cp);
#if !defined(__xpv)
- if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
+ if (is_x86_feature(x86_featureset, X86FSET_MWAIT) &&
+ idle_cpu_prefer_mwait) {
cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp);
cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
} else
@@ -1142,7 +1144,7 @@ workaround_errata(struct cpu *cpu)
if (opteron_workaround_6323525) {
opteron_workaround_6323525++;
#if defined(__xpv)
- } else if (x86_feature & X86_SSE2) {
+ } else if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
if (DOMAIN_IS_INITDOMAIN(xen_info)) {
/*
* XXPV Use dom0_msr here when extended
@@ -1160,7 +1162,8 @@ workaround_errata(struct cpu *cpu)
opteron_workaround_6323525++;
}
#else /* __xpv */
- } else if ((x86_feature & X86_SSE2) && ((opteron_get_nnodes() *
+ } else if (is_x86_feature(x86_featureset, X86FSET_SSE2) &&
+ ((opteron_get_nnodes() *
cpuid_get_ncpu_per_chip(cpu)) > 1)) {
if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0)
opteron_workaround_6323525++;
@@ -1602,8 +1605,7 @@ static void
mp_startup_common(boolean_t boot)
{
cpu_t *cp = CPU;
- uint_t new_x86_feature;
- const char *fmt = "?cpu%d: %b\n";
+ uchar_t new_x86_featureset[BT_SIZEOFMAP(NUM_X86_FEATURES)];
extern void cpu_event_init_cpu(cpu_t *);
/*
@@ -1629,13 +1631,14 @@ mp_startup_common(boolean_t boot)
*/
(void) (*ap_mlsetup)();
- new_x86_feature = cpuid_pass1(cp);
+ bzero(new_x86_featureset, BT_SIZEOFMAP(NUM_X86_FEATURES));
+ cpuid_pass1(cp, new_x86_featureset);
#ifndef __xpv
/*
* Program this cpu's PAT
*/
- if (x86_feature & X86_PAT)
+ if (is_x86_feature(x86_featureset, X86FSET_PAT))
pat_sync();
#endif
@@ -1643,7 +1646,7 @@ mp_startup_common(boolean_t boot)
* Set up TSC_AUX to contain the cpuid for this processor
* for the rdtscp instruction.
*/
- if (x86_feature & X86_TSCP)
+ if (is_x86_feature(x86_featureset, X86FSET_TSCP))
(void) wrmsr(MSR_AMD_TSCAUX, cp->cpu_id);
/*
@@ -1671,9 +1674,10 @@ mp_startup_common(boolean_t boot)
* likely to happen once the number of processors in a configuration
* gets large enough.
*/
- if ((x86_feature & new_x86_feature) != x86_feature) {
- cmn_err(CE_CONT, fmt, cp->cpu_id, new_x86_feature,
- FMT_X86_FEATURE);
+ if (compare_x86_featureset(x86_featureset, new_x86_featureset) ==
+ B_FALSE) {
+ cmn_err(CE_CONT, "cpu%d: featureset\n", cp->cpu_id);
+ print_x86_featureset(new_x86_featureset);
cmn_err(CE_WARN, "cpu%d feature mismatch", cp->cpu_id);
}
@@ -1681,7 +1685,8 @@ mp_startup_common(boolean_t boot)
* We do not support cpus with mixed monitor/mwait support if the
* boot cpu supports monitor/mwait.
*/
- if ((x86_feature & ~new_x86_feature) & X86_MWAIT)
+ if (is_x86_feature(x86_featureset, X86FSET_MWAIT) !=
+ is_x86_feature(new_x86_featureset, X86FSET_MWAIT))
panic("unsupported mixed cpu monitor/mwait support detected");
/*
@@ -1705,6 +1710,13 @@ mp_startup_common(boolean_t boot)
*/
cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
+ /*
+ * Setup this processor for XSAVE.
+ */
+ if (fp_save_mech == FP_XSAVE) {
+ xsave_setup_msr(cp);
+ }
+
cpuid_pass2(cp);
cpuid_pass3(cp);
(void) cpuid_pass4(cp);
@@ -1775,7 +1787,7 @@ mp_startup_common(boolean_t boot)
if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU),
cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) {
- if (x86_feature & X86_MCA)
+ if (is_x86_feature(x86_featureset, X86FSET_MCA))
cmi_mca_init(hdl);
cp->cpu_m.mcpu_cmi_hdl = hdl;
}
@@ -1934,19 +1946,21 @@ mp_cpu_faulted_exit(struct cpu *cp)
* The following two routines are used as context operators on threads belonging
* to processes with a private LDT (see sysi86). Due to the rarity of such
* processes, these routines are currently written for best code readability and
- * organization rather than speed. We could avoid checking x86_feature at every
- * context switch by installing different context ops, depending on the
- * x86_feature flags, at LDT creation time -- one for each combination of fast
- * syscall feature flags.
+ * organization rather than speed. We could avoid checking x86_featureset at
+ * every context switch by installing different context ops, depending on
+ * x86_featureset, at LDT creation time -- one for each combination of fast
+ * syscall features.
*/
/*ARGSUSED*/
void
cpu_fast_syscall_disable(void *arg)
{
- if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP))
+ if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
+ is_x86_feature(x86_featureset, X86FSET_SEP))
cpu_sep_disable();
- if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC))
+ if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
+ is_x86_feature(x86_featureset, X86FSET_ASYSC))
cpu_asysc_disable();
}
@@ -1954,16 +1968,18 @@ cpu_fast_syscall_disable(void *arg)
void
cpu_fast_syscall_enable(void *arg)
{
- if ((x86_feature & (X86_MSR | X86_SEP)) == (X86_MSR | X86_SEP))
+ if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
+ is_x86_feature(x86_featureset, X86FSET_SEP))
cpu_sep_enable();
- if ((x86_feature & (X86_MSR | X86_ASYSC)) == (X86_MSR | X86_ASYSC))
+ if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
+ is_x86_feature(x86_featureset, X86FSET_ASYSC))
cpu_asysc_enable();
}
static void
cpu_sep_enable(void)
{
- ASSERT(x86_feature & X86_SEP);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP));
ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
wrmsr(MSR_INTC_SEP_CS, (uint64_t)(uintptr_t)KCS_SEL);
@@ -1972,7 +1988,7 @@ cpu_sep_enable(void)
static void
cpu_sep_disable(void)
{
- ASSERT(x86_feature & X86_SEP);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_SEP));
ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
/*
@@ -1985,7 +2001,7 @@ cpu_sep_disable(void)
static void
cpu_asysc_enable(void)
{
- ASSERT(x86_feature & X86_ASYSC);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC));
ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
wrmsr(MSR_AMD_EFER, rdmsr(MSR_AMD_EFER) |
@@ -1995,7 +2011,7 @@ cpu_asysc_enable(void)
static void
cpu_asysc_disable(void)
{
- ASSERT(x86_feature & X86_ASYSC);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_ASYSC));
ASSERT(curthread->t_preempt || getpil() >= LOCK_LEVEL);
/*
diff --git a/usr/src/uts/i86pc/os/pci_mech1_amd.c b/usr/src/uts/i86pc/os/pci_mech1_amd.c
index d45408731b..3b6eb918fe 100644
--- a/usr/src/uts/i86pc/os/pci_mech1_amd.c
+++ b/usr/src/uts/i86pc/os/pci_mech1_amd.c
@@ -42,7 +42,7 @@ pci_check_amd_ioecs(void)
struct cpuid_regs cp;
int family;
- if ((x86_feature & X86_CPUID) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
return (B_FALSE);
/*
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index f69b37a9f2..d8facc92e7 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -1320,7 +1320,6 @@ static void
startup_kmem(void)
{
extern void page_set_colorequiv_arr(void);
- const char *fmt = "?features: %b\n";
PRM_POINT("startup_kmem() starting...");
@@ -1429,7 +1428,7 @@ startup_kmem(void)
/*
* print this out early so that we know what's going on
*/
- cmn_err(CE_CONT, fmt, x86_feature, FMT_X86_FEATURE);
+ print_x86_featureset(x86_featureset);
/*
* Initialize bp_mapin().
@@ -1651,7 +1650,7 @@ startup_modules(void)
if ((hdl = cmi_init(CMI_HDL_SOLARIS_xVM_MCA,
xen_physcpu_chipid(cpi), xen_physcpu_coreid(cpi),
xen_physcpu_strandid(cpi))) != NULL &&
- (x86_feature & X86_MCA))
+ is_x86_feature(x86_featureset, X86FSET_MCA))
cmi_mca_init(hdl);
}
}
@@ -1663,7 +1662,7 @@ startup_modules(void)
if ((get_hwenv() != HW_XEN_HVM) &&
(hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU),
cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL &&
- (x86_feature & X86_MCA)) {
+ is_x86_feature(x86_featureset, X86FSET_MCA)) {
cmi_mca_init(hdl);
CPU->cpu_m.mcpu_cmi_hdl = hdl;
}
@@ -2194,6 +2193,13 @@ startup_end(void)
PRM_POINT("configure() done");
/*
+ * We can now setup for XSAVE because fpu_probe is done in configure().
+ */
+ if (fp_save_mech == FP_XSAVE) {
+ xsave_setup_msr(CPU);
+ }
+
+ /*
* Set the isa_list string to the defined instruction sets we
* support.
*/
@@ -2670,7 +2676,7 @@ pat_sync(void)
{
ulong_t cr0, cr0_orig, cr4;
- if (!(x86_feature & X86_PAT))
+ if (!is_x86_feature(x86_featureset, X86FSET_PAT))
return;
cr0_orig = cr0 = getcr0();
cr4 = getcr4();
@@ -2993,12 +2999,13 @@ setx86isalist(void)
case X86_VENDOR_Intel:
case X86_VENDOR_AMD:
case X86_VENDOR_TM:
- if (x86_feature & X86_CMOV) {
+ if (is_x86_feature(x86_featureset, X86FSET_CMOV)) {
/*
* Pentium Pro or later
*/
(void) strcat(tp, "pentium_pro");
- (void) strcat(tp, x86_feature & X86_MMX ?
+ (void) strcat(tp,
+ is_x86_feature(x86_featureset, X86FSET_MMX) ?
"+mmx pentium_pro " : " ");
}
/*FALLTHROUGH*/
@@ -3007,9 +3014,10 @@ setx86isalist(void)
* The Cyrix 6x86 does not have any Pentium features
* accessible while not at privilege level 0.
*/
- if (x86_feature & X86_CPUID) {
+ if (is_x86_feature(x86_featureset, X86FSET_CPUID)) {
(void) strcat(tp, "pentium");
- (void) strcat(tp, x86_feature & X86_MMX ?
+ (void) strcat(tp,
+ is_x86_feature(x86_featureset, X86FSET_MMX) ?
"+mmx pentium " : " ");
}
break;
diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c
index a004b73055..71995cc992 100644
--- a/usr/src/uts/i86pc/os/trap.c
+++ b/usr/src/uts/i86pc/os/trap.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -256,17 +256,17 @@ instr_is_other_syscall(caddr_t pc, int which)
{
uchar_t instr[FAST_SCALL_SIZE];
- ASSERT(which == X86_SEP || which == X86_ASYSC || which == 0xCD);
+ ASSERT(which == X86FSET_SEP || which == X86FSET_ASYSC || which == 0xCD);
if (copyin_nowatch(pc, (caddr_t)instr, FAST_SCALL_SIZE) != 0)
return (0);
switch (which) {
- case X86_SEP:
+ case X86FSET_SEP:
if (instr[0] == 0x0F && instr[1] == 0x34)
return (1);
break;
- case X86_ASYSC:
+ case X86FSET_ASYSC:
if (instr[0] == 0x0F && instr[1] == 0x05)
return (1);
break;
@@ -283,9 +283,9 @@ static const char *
syscall_insn_string(int syscall_insn)
{
switch (syscall_insn) {
- case X86_SEP:
+ case X86FSET_SEP:
return ("sysenter");
- case X86_ASYSC:
+ case X86FSET_ASYSC:
return ("syscall");
case 0xCD:
return ("int");
@@ -916,7 +916,7 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
* be to emulate that particular instruction.
*/
if (p->p_ldt != NULL &&
- ldt_rewrite_syscall(rp, p, X86_ASYSC))
+ ldt_rewrite_syscall(rp, p, X86FSET_ASYSC))
goto out;
#ifdef __amd64
@@ -1018,7 +1018,8 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
case T_SIMDFPE + USER: /* SSE and SSE2 exceptions */
if (tudebug && tudebugsse)
showregs(type, rp, addr);
- if ((x86_feature & (X86_SSE|X86_SSE2)) == 0) {
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE) &&
+ !is_x86_feature(x86_featureset, X86FSET_SSE2)) {
/*
* There are rumours that some user instructions
* on older CPUs can cause this trap to occur; in
@@ -1268,7 +1269,7 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
* this will be to emulate that particular instruction.
*/
if (p->p_ldt != NULL &&
- ldt_rewrite_syscall(rp, p, X86_SEP))
+ ldt_rewrite_syscall(rp, p, X86FSET_SEP))
goto out;
/*FALLTHROUGH*/
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index 1f177556ea..b632cea09c 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -22,6 +22,11 @@
* Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
#ifndef _SYS_APIC_APIC_H
#define _SYS_APIC_APIC_H
@@ -74,9 +79,6 @@ extern "C" {
#define APIC_INT_CMD1 0xc0
#define APIC_INT_CMD2 0xc4
-/* Timer Vector Table register */
-#define APIC_LOCAL_TIMER 0xc8
-
/* Local Interrupt Vector registers */
#define APIC_CMCI_VECT 0xbc
#define APIC_THERM_VECT 0xcc
@@ -344,9 +346,6 @@ struct apic_io_intr {
/* spurious interrupt vector register */
#define AV_UNIT_ENABLE 0x100
-/* timer vector table */
-#define AV_TIME 0x20000 /* Set timer mode to periodic */
-
#define APIC_MAXVAL 0xffffffffUL
#define APIC_TIME_MIN 0x5000
#define APIC_TIME_COUNT 0x4000
@@ -665,19 +664,6 @@ typedef struct {
#define PSMGI_INTRBY_DEFAULT 0x4000 /* PSM specific default value */
#define PSMGI_INTRBY_FLAGS 0xc000 /* Mask for this flag */
-/*
- * Use scaled-fixed-point arithmetic to calculate apic ticks.
- * Round when dividing (by adding half of divisor to dividend)
- * for one extra bit of precision.
- */
-
-#define SF (1ULL<<20) /* Scaling Factor: scale by 2^20 */
-#define APIC_TICKS_TO_NSECS(ticks) ((((int64_t)(ticks) * SF) + \
- apic_ticks_per_SFnsecs / 2) / \
- apic_ticks_per_SFnsecs);
-#define APIC_NSECS_TO_TICKS(nsecs) (((int64_t)(nsecs) * \
- apic_ticks_per_SFnsecs + (SF/2)) / SF)
-
extern int apic_verbose;
/* Flag definitions for apic_verbose */
@@ -839,6 +825,7 @@ extern int apic_local_mode();
extern void apic_change_eoi();
extern void apic_send_EOI(uint32_t);
extern void apic_send_directed_EOI(uint32_t);
+extern uint_t apic_calibrate(volatile uint32_t *, uint16_t *);
extern volatile uint32_t *apicadr; /* virtual addr of local APIC */
extern int apic_forceload;
diff --git a/usr/src/uts/i86pc/sys/apic_common.h b/usr/src/uts/i86pc/sys/apic_common.h
index cd259d7f62..c7b6d925f1 100644
--- a/usr/src/uts/i86pc/sys/apic_common.h
+++ b/usr/src/uts/i86pc/sys/apic_common.h
@@ -111,9 +111,6 @@ extern int apic_panic_on_apic_error;
extern int apic_verbose;
-/* minimum number of timer ticks to program to */
-extern int apic_min_timer_ticks;
-
#ifdef DEBUG
extern int apic_debug;
extern int apic_restrict_vector;
@@ -180,9 +177,7 @@ extern void apic_unset_idlecpu(processorid_t cpun);
extern void apic_shutdown(int cmd, int fcn);
extern void apic_preshutdown(int cmd, int fcn);
extern processorid_t apic_get_next_processorid(processorid_t cpun);
-extern void apic_timer_reprogram(hrtime_t time);
-extern void apic_timer_enable(void);
-extern void apic_timer_disable(void);
+extern uint_t apic_calibrate(volatile uint32_t *, uint16_t *);
extern int apic_error_intr();
extern void apic_cpcovf_mask_clear(void);
diff --git a/usr/src/uts/i86pc/sys/apic_timer.h b/usr/src/uts/i86pc/sys/apic_timer.h
new file mode 100644
index 0000000000..7f7285fb83
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/apic_timer.h
@@ -0,0 +1,78 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_APIC_TIMER_H
+#define _SYS_APIC_TIMER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/time.h>
+
+#define IA32_DEADLINE_TSC_MSR 0x6E0
+
+/* Timer Vector Table register */
+#define APIC_LOCAL_TIMER 0xc8
+
+/* timer vector table */
+#define AV_PERIODIC 0x20000 /* Set timer mode to periodic */
+#define AV_DEADLINE 0x40000 /* Set timer mode to deadline */
+
+#define APIC_TIMER_MODE_ONESHOT 0x0
+#define APIC_TIMER_MODE_PERIODIC 0x1
+#define APIC_TIMER_MODE_DEADLINE 0x2 /* TSC-Deadline timer mode */
+
+extern int apic_oneshot;
+extern uint_t apic_nsec_per_intr;
+extern uint_t apic_hertz_count;
+extern uint64_t apic_ticks_per_SFnsecs;
+
+/*
+ * Use scaled-fixed-point arithmetic to calculate apic ticks.
+ * Round when dividing (by adding half of divisor to dividend)
+ * for one extra bit of precision.
+ */
+
+#define SF (1ULL<<20) /* Scaling Factor: scale by 2^20 */
+#define APIC_TICKS_TO_NSECS(ticks) ((((int64_t)(ticks) * SF) + \
+ apic_ticks_per_SFnsecs / 2) / \
+ apic_ticks_per_SFnsecs);
+#define APIC_NSECS_TO_TICKS(nsecs) (((int64_t)(nsecs) * \
+ apic_ticks_per_SFnsecs + (SF/2)) / SF)
+
+extern int apic_timer_init(int);
+extern void apic_timer_reprogram(hrtime_t);
+extern void apic_timer_enable(void);
+extern void apic_timer_disable(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_APIC_TIMER_H */
diff --git a/usr/src/uts/i86pc/sys/apix.h b/usr/src/uts/i86pc/sys/apix.h
index 3db39b4021..8237d2e455 100644
--- a/usr/src/uts/i86pc/sys/apix.h
+++ b/usr/src/uts/i86pc/sys/apix.h
@@ -30,6 +30,7 @@
#include <sys/traptrace.h>
#include <sys/apic.h>
#include <sys/apic_common.h>
+#include <sys/apic_timer.h>
#ifdef __cplusplus
extern "C" {
diff --git a/usr/src/uts/i86pc/sys/hpet_acpi.h b/usr/src/uts/i86pc/sys/hpet_acpi.h
index 078f4e73b3..e60ebe4bba 100644
--- a/usr/src/uts/i86pc/sys/hpet_acpi.h
+++ b/usr/src/uts/i86pc/sys/hpet_acpi.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _HPET_ACPI_H
diff --git a/usr/src/uts/i86pc/sys/immu.h b/usr/src/uts/i86pc/sys/immu.h
index db9732c8a7..70193d26e6 100644
--- a/usr/src/uts/i86pc/sys/immu.h
+++ b/usr/src/uts/i86pc/sys/immu.h
@@ -42,8 +42,11 @@ extern "C" {
#include <sys/types.h>
#include <sys/bitset.h>
#include <sys/kstat.h>
+#include <sys/kmem.h>
#include <sys/vmem.h>
#include <sys/rootnex.h>
+#include <sys/iommulib.h>
+#include <sys/sdt.h>
/*
* Some ON drivers have bugs. Keep this define until all such drivers
@@ -63,6 +66,7 @@ typedef uint64_t hw_pdte_t;
#define IMMU_PAGEMASK (~IMMU_PAGEOFFSET)
#define IMMU_BTOP(b) (((uint64_t)b) >> IMMU_PAGESHIFT)
#define IMMU_PTOB(p) (((uint64_t)p) << IMMU_PAGESHIFT)
+#define IMMU_BTOPR(x) ((((x) + IMMU_PAGEOFFSET) >> IMMU_PAGESHIFT))
#define IMMU_PGTABLE_MAX_LEVELS (6)
#define IMMU_ROUNDUP(size) (((size) + IMMU_PAGEOFFSET) & ~IMMU_PAGEOFFSET)
#define IMMU_ROUNDOWN(addr) ((addr) & ~IMMU_PAGEOFFSET)
@@ -71,9 +75,6 @@ typedef uint64_t hw_pdte_t;
#define IMMU_PGTABLE_OFFSHIFT (IMMU_PAGESHIFT - IMMU_PGTABLE_LEVEL_STRIDE)
#define IMMU_PGTABLE_MAXIDX ((IMMU_PAGESIZE / sizeof (hw_pdte_t)) - 1)
-#define IMMU_ROUNDUP(size) (((size) + IMMU_PAGEOFFSET) & ~IMMU_PAGEOFFSET)
-#define IMMU_ROUNDOWN(addr) ((addr) & ~IMMU_PAGEOFFSET)
-
/*
* DMAR global defines
*/
@@ -145,6 +146,8 @@ typedef struct rmrr {
list_node_t rm_node;
} rmrr_t;
+#define IMMU_UNIT_NAME "iommu"
+
/*
* Macros based on PCI spec
*/
@@ -460,6 +463,12 @@ typedef struct hw_rce {
#define CONT_GET_P(hcent) ((hcent)->lo & 0x1)
#define CONT_SET_P(hcent) ((hcent)->lo |= 0x1)
+#define CONT_GET_ALH(hcent) ((hcent)->lo & 0x20)
+#define CONT_SET_ALH(hcent) ((hcent)->lo |= 0x20)
+
+#define CONT_GET_EH(hcent) ((hcent)->lo & 0x10)
+#define CONT_SET_EH(hcent) ((hcent)->lo |= 0x10)
+
/* we use the bit 63 (available for system SW) as a present bit */
#define PDTE_SW4(hw_pdte) ((hw_pdte) & ((uint64_t)1<<63))
@@ -507,8 +516,41 @@ typedef struct hw_rce {
#define PDTE_CLEAR_READ(hw_pdte) ((hw_pdte) &= ~(0x1))
#define PDTE_SET_READ(hw_pdte) ((hw_pdte) |= (0x1))
+#define PDTE_MASK_R ((uint64_t)1 << 0)
+#define PDTE_MASK_W ((uint64_t)1 << 1)
+#define PDTE_MASK_SNP ((uint64_t)1 << 11)
+#define PDTE_MASK_TM ((uint64_t)1 << 62)
+#define PDTE_MASK_P ((uint64_t)1 << 63)
+
struct immu_flushops;
+/*
+ * Used to wait for invalidation completion.
+ * vstatus is the virtual address of the status word that will be written
+ * pstatus is the physical addres
+ * If sync is true, then the the operation will be waited on for
+ * completion immediately. Else, the wait interface can be called
+ * to wait for completion later.
+ */
+
+#define IMMU_INV_DATA_PENDING 1
+#define IMMU_INV_DATA_DONE 2
+
+typedef struct immu_inv_wait {
+ volatile uint32_t iwp_vstatus;
+ uint64_t iwp_pstatus;
+ boolean_t iwp_sync;
+ const char *iwp_name; /* ID for debugging/statistics */
+} immu_inv_wait_t;
+
+/*
+ * Used to batch IOMMU pagetable writes.
+ */
+typedef struct immu_dcookie {
+ paddr_t dck_paddr;
+ uint64_t dck_npages;
+} immu_dcookie_t;
+
typedef struct immu {
kmutex_t immu_lock;
char *immu_name;
@@ -547,10 +589,12 @@ typedef struct immu {
boolean_t immu_dvma_coherent;
boolean_t immu_TM_reserved;
boolean_t immu_SNP_reserved;
+ uint64_t immu_ptemask;
/* DVMA context related */
krwlock_t immu_ctx_rwlock;
pgtable_t *immu_ctx_root;
+ immu_inv_wait_t immu_ctx_inv_wait;
/* DVMA domain related */
int immu_max_domains;
@@ -569,6 +613,7 @@ typedef struct immu {
boolean_t immu_intrmap_running;
intrmap_t *immu_intrmap;
uint64_t immu_intrmap_irta_reg;
+ immu_inv_wait_t immu_intrmap_inv_wait;
/* queued invalidation related */
kmutex_t immu_qinv_lock;
@@ -582,8 +627,19 @@ typedef struct immu {
list_node_t immu_node;
struct immu_flushops *immu_flushops;
+
+ kmem_cache_t *immu_hdl_cache;
+ kmem_cache_t *immu_pgtable_cache;
+
+ iommulib_handle_t immu_iommulib_handle;
} immu_t;
+/*
+ * Enough space to hold the decimal number of any device instance.
+ * Used for device/cache names.
+ */
+#define IMMU_ISTRLEN 11 /* log10(2^31) + 1 */
+
/* properties that control DVMA */
#define DDI_DVMA_MAPTYPE_ROOTNEX_PROP "immu-dvma-mapping"
@@ -622,6 +678,9 @@ typedef struct domain {
list_node_t dom_immu_node;
mod_hash_t *dom_cookie_hash;
+
+ /* topmost device in domain; usually the device itself (non-shared) */
+ dev_info_t *dom_dip;
} domain_t;
typedef enum immu_pcib {
@@ -652,6 +711,9 @@ typedef struct immu_devi {
boolean_t imd_display;
boolean_t imd_lpc;
+ /* set if premapped DVMA space is used */
+ boolean_t imd_use_premap;
+
/* dmar unit to which this dip belongs */
immu_t *imd_immu;
@@ -686,6 +748,21 @@ typedef struct immu_arg {
dev_info_t *ima_ddip;
} immu_arg_t;
+#define IMMU_NDVSEG 8
+#define IMMU_NDCK 64
+#define IMMU_NPREPTES 8
+
+typedef struct immu_hdl_private {
+ immu_inv_wait_t ihp_inv_wait;
+ size_t ihp_ndvseg;
+ struct dvmaseg ihp_dvseg[IMMU_NDVSEG];
+ immu_dcookie_t ihp_dcookies[IMMU_NDCK];
+
+ hw_pdte_t *ihp_preptes[IMMU_NPREPTES];
+ uint64_t ihp_predvma;
+ int ihp_npremapped;
+} immu_hdl_priv_t;
+
/*
* Invalidation operation function pointers for context and IOTLB.
* These will be set to either the register or the queue invalidation
@@ -693,28 +770,35 @@ typedef struct immu_arg {
* both at the same time.
*/
struct immu_flushops {
- void (*imf_context_fsi)(immu_t *, uint8_t, uint16_t, uint_t);
- void (*imf_context_dsi)(immu_t *, uint_t);
- void (*imf_context_gbl)(immu_t *);
+ void (*imf_context_fsi)(immu_t *, uint8_t, uint16_t, uint_t,
+ immu_inv_wait_t *);
+ void (*imf_context_dsi)(immu_t *, uint_t, immu_inv_wait_t *);
+ void (*imf_context_gbl)(immu_t *, immu_inv_wait_t *);
- void (*imf_iotlb_psi)(immu_t *, uint_t, uint64_t, uint_t, uint_t);
- void (*imf_iotlb_dsi)(immu_t *, uint_t);
- void (*imf_iotlb_gbl)(immu_t *);
+ void (*imf_iotlb_psi)(immu_t *, uint_t, uint64_t, uint_t, uint_t,
+ immu_inv_wait_t *);
+ void (*imf_iotlb_dsi)(immu_t *, uint_t, immu_inv_wait_t *);
+ void (*imf_iotlb_gbl)(immu_t *, immu_inv_wait_t *);
+
+ void (*imf_wait)(immu_inv_wait_t *);
};
-#define immu_flush_context_fsi(i, f, s, d) \
- (i)->immu_flushops->imf_context_fsi(i, f, s, d)
-#define immu_flush_context_dsi(i, d) \
- (i)->immu_flushops->imf_context_dsi(i, d)
-#define immu_flush_context_gbl(i) \
- (i)->immu_flushops->imf_context_gbl(i)
+#define immu_flush_context_fsi(i, f, s, d, w) \
+ (i)->immu_flushops->imf_context_fsi(i, f, s, d, w)
+#define immu_flush_context_dsi(i, d, w) \
+ (i)->immu_flushops->imf_context_dsi(i, d, w)
+#define immu_flush_context_gbl(i, w) \
+ (i)->immu_flushops->imf_context_gbl(i, w)
+
+#define immu_flush_iotlb_psi(i, d, v, c, h, w) \
+ (i)->immu_flushops->imf_iotlb_psi(i, d, v, c, h, w)
+#define immu_flush_iotlb_dsi(i, d, w) \
+ (i)->immu_flushops->imf_iotlb_dsi(i, d, w)
+#define immu_flush_iotlb_gbl(i, w) \
+ (i)->immu_flushops->imf_iotlb_gbl(i, w)
-#define immu_flush_iotlb_psi(i, d, v, c, h) \
- (i)->immu_flushops->imf_iotlb_psi(i, d, v, c, h)
-#define immu_flush_iotlb_dsi(i, d) \
- (i)->immu_flushops->imf_iotlb_dsi(i, d)
-#define immu_flush_iotlb_gbl(i) \
- (i)->immu_flushops->imf_iotlb_gbl(i)
+#define immu_flush_wait(i, w) \
+ (i)->immu_flushops->imf_wait(w)
/*
* Globals used by IOMMU code
@@ -723,11 +807,11 @@ struct immu_flushops {
extern dev_info_t *root_devinfo;
extern kmutex_t immu_lock;
extern list_t immu_list;
-extern void *immu_pgtable_cache;
extern boolean_t immu_setup;
extern boolean_t immu_running;
extern kmutex_t ioapic_drhd_lock;
extern list_t ioapic_drhd_list;
+extern struct iommulib_ops immulib_ops;
/* switches */
@@ -751,6 +835,9 @@ extern int64_t immu_flush_gran;
extern immu_flags_t immu_global_dvma_flags;
+extern int immu_use_tm;
+extern int immu_use_alh;
+
/* ################### Interfaces exported outside IOMMU code ############## */
void immu_init(void);
void immu_startup(void);
@@ -766,12 +853,6 @@ int immu_unquiesce(void);
/* ######################################################################### */
/* ################# Interfaces used within IOMMU code #################### */
-
-/* functions in rootnex.c */
-int rootnex_dvcookies_alloc(ddi_dma_impl_t *hp,
- struct ddi_dma_req *dmareq, dev_info_t *rdip, void *arg);
-void rootnex_dvcookies_free(dvcookie_t *dvcookies, void *arg);
-
/* immu_dmar.c interfaces */
int immu_dmar_setup(void);
int immu_dmar_parse(void);
@@ -780,7 +861,6 @@ void immu_dmar_shutdown(void);
void immu_dmar_destroy(void);
boolean_t immu_dmar_blacklisted(char **strings_array, uint_t nstrings);
immu_t *immu_dmar_get_immu(dev_info_t *rdip);
-char *immu_dmar_unit_name(void *dmar_unit);
dev_info_t *immu_dmar_unit_dip(void *dmar_unit);
void immu_dmar_set_immu(void *dmar_unit, immu_t *immu);
void *immu_dmar_walk_units(int seg, void *dmar_unit);
@@ -793,6 +873,7 @@ void immu_dmar_rmrr_map(void);
int immu_walk_ancestor(dev_info_t *rdip, dev_info_t *ddip,
int (*func)(dev_info_t *, void *arg), void *arg,
int *level, immu_flags_t immu_flags);
+void immu_init_inv_wait(immu_inv_wait_t *iwp, const char *s, boolean_t sync);
/* immu_regs.c interfaces */
void immu_regs_setup(list_t *immu_list);
@@ -813,13 +894,14 @@ void immu_regs_wbf_flush(immu_t *immu);
void immu_regs_cpu_flush(immu_t *immu, caddr_t addr, uint_t size);
void immu_regs_context_fsi(immu_t *immu, uint8_t function_mask,
- uint16_t source_id, uint_t domain_id);
-void immu_regs_context_dsi(immu_t *immu, uint_t domain_id);
-void immu_regs_context_gbl(immu_t *immu);
+ uint16_t source_id, uint_t domain_id, immu_inv_wait_t *iwp);
+void immu_regs_context_dsi(immu_t *immu, uint_t domain_id,
+ immu_inv_wait_t *iwp);
+void immu_regs_context_gbl(immu_t *immu, immu_inv_wait_t *iwp);
void immu_regs_iotlb_psi(immu_t *immu, uint_t domain_id,
- uint64_t dvma, uint_t count, uint_t hint);
-void immu_regs_iotlb_dsi(immu_t *immu, uint_t domain_id);
-void immu_regs_iotlb_gbl(immu_t *immu);
+ uint64_t dvma, uint_t count, uint_t hint, immu_inv_wait_t *iwp);
+void immu_regs_iotlb_dsi(immu_t *immu, uint_t domain_id, immu_inv_wait_t *iwp);
+void immu_regs_iotlb_gbl(immu_t *immu, immu_inv_wait_t *iwp);
void immu_regs_set_root_table(immu_t *immu);
void immu_regs_qinv_enable(immu_t *immu, uint64_t qinv_reg_value);
@@ -838,17 +920,22 @@ void immu_dvma_shutdown(immu_t *immu);
void immu_dvma_destroy(list_t *immu_list);
void immu_dvma_physmem_update(uint64_t addr, uint64_t size);
-int immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *,
- uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags);
+int immu_map_memrange(dev_info_t *, memrng_t *);
+int immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
+ uint_t prealloc_count, dev_info_t *rdip);
int immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip);
-int immu_dvma_alloc(dvcookie_t *first_dvcookie, void *arg);
-void immu_dvma_free(dvcookie_t *first_dvcookie, void *arg);
int immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags);
immu_devi_t *immu_devi_get(dev_info_t *dip);
immu_t *immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags);
int pgtable_ctor(void *buf, void *arg, int kmflag);
void pgtable_dtor(void *buf, void *arg);
+int immu_hdl_priv_ctor(void *buf, void *arg, int kmf);
+
+int immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags);
+
+void immu_print_fault_info(uint_t sid, uint64_t dvma);
+
/* immu_intrmap.c interfaces */
void immu_intrmap_setup(list_t *immu_list);
void immu_intrmap_startup(immu_t *immu);
@@ -867,19 +954,37 @@ void immu_qinv_shutdown(immu_t *immu);
void immu_qinv_destroy(list_t *immu_list);
void immu_qinv_context_fsi(immu_t *immu, uint8_t function_mask,
- uint16_t source_id, uint_t domain_id);
-void immu_qinv_context_dsi(immu_t *immu, uint_t domain_id);
-void immu_qinv_context_gbl(immu_t *immu);
+ uint16_t source_id, uint_t domain_id, immu_inv_wait_t *iwp);
+void immu_qinv_context_dsi(immu_t *immu, uint_t domain_id,
+ immu_inv_wait_t *iwp);
+void immu_qinv_context_gbl(immu_t *immu, immu_inv_wait_t *iwp);
void immu_qinv_iotlb_psi(immu_t *immu, uint_t domain_id,
- uint64_t dvma, uint_t count, uint_t hint);
-void immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id);
-void immu_qinv_iotlb_gbl(immu_t *immu);
-
-void immu_qinv_intr_global(immu_t *immu);
-void immu_qinv_intr_one_cache(immu_t *immu, uint_t idx);
-void immu_qinv_intr_caches(immu_t *immu, uint_t idx, uint_t cnt);
+ uint64_t dvma, uint_t count, uint_t hint, immu_inv_wait_t *iwp);
+void immu_qinv_iotlb_dsi(immu_t *immu, uint_t domain_id, immu_inv_wait_t *iwp);
+void immu_qinv_iotlb_gbl(immu_t *immu, immu_inv_wait_t *iwp);
+
+void immu_qinv_intr_global(immu_t *immu, immu_inv_wait_t *iwp);
+void immu_qinv_intr_one_cache(immu_t *immu, uint_t idx, immu_inv_wait_t *iwp);
+void immu_qinv_intr_caches(immu_t *immu, uint_t idx, uint_t cnt,
+ immu_inv_wait_t *);
void immu_qinv_report_fault(immu_t *immu);
+#ifdef DEBUG
+#define IMMU_DPROBE1(name, type1, arg1) \
+ DTRACE_PROBE1(name, type1, arg1)
+#define IMMU_DPROBE2(name, type1, arg1, type2, arg2) \
+ DTRACE_PROBE2(name, type1, arg1, type2, arg2)
+#define IMMU_DPROBE3(name, type1, arg1, type2, arg2, type3, arg3) \
+ DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3)
+#define IMMU_DPROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \
+ DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4)
+#else
+#define IMMU_DPROBE1(name, type1, arg1)
+#define IMMU_DPROBE2(name, type1, arg1, type2, arg2)
+#define IMMU_DPROBE3(name, type1, arg1, type2, arg2, type3, arg3)
+#define IMMU_DPROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4)
+#endif
+
#ifdef __cplusplus
}
diff --git a/usr/src/uts/i86pc/sys/machsystm.h b/usr/src/uts/i86pc/sys/machsystm.h
index a783e942f7..e61f1baa84 100644
--- a/usr/src/uts/i86pc/sys/machsystm.h
+++ b/usr/src/uts/i86pc/sys/machsystm.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
diff --git a/usr/src/uts/i86pc/sys/rm_platter.h b/usr/src/uts/i86pc/sys/rm_platter.h
index 48e141126b..9ca3a4908d 100644
--- a/usr/src/uts/i86pc/sys/rm_platter.h
+++ b/usr/src/uts/i86pc/sys/rm_platter.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -70,7 +69,7 @@ typedef struct rm_platter {
gate_desc_t *rm_idt_base;
uint_t rm_pdbr; /* cr3 value */
uint_t rm_cpu; /* easy way to know which CPU we are */
- uint_t rm_x86feature; /* X86 supported features */
+ uint_t rm_filler3;
uint_t rm_cr4; /* cr4 value on cpu0 */
#if defined(__amd64)
/*
diff --git a/usr/src/uts/i86pc/sys/rootnex.h b/usr/src/uts/i86pc/sys/rootnex.h
index c0e3199828..859157d1c8 100644
--- a/usr/src/uts/i86pc/sys/rootnex.h
+++ b/usr/src/uts/i86pc/sys/rootnex.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ROOTNEX_H
@@ -55,16 +54,22 @@ extern "C" {
#define ROOTNEX_DPROF_DEC(addr) atomic_add_64(addr, -1)
#define ROOTNEX_DPROBE1(name, type1, arg1) \
DTRACE_PROBE1(name, type1, arg1)
+#define ROOTNEX_DPROBE2(name, type1, arg1, type2, arg2) \
+ DTRACE_PROBE2(name, type1, arg1, type2, arg2)
#define ROOTNEX_DPROBE3(name, type1, arg1, type2, arg2, type3, arg3) \
DTRACE_PROBE3(name, type1, arg1, type2, arg2, type3, arg3)
+#define ROOTNEX_DPROBE4(name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4) \
+ DTRACE_PROBE4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4)
#else
#define ROOTNEX_DPROF_INC(addr)
#define ROOTNEX_DPROF_DEC(addr)
#define ROOTNEX_DPROBE1(name, type1, arg1)
+#define ROOTNEX_DPROBE2(name, type1, arg1, type2, arg2)
#define ROOTNEX_DPROBE3(name, type1, arg1, type2, arg2, type3, arg3)
+#define ROOTNEX_DPROBE4(name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4)
#endif
-#define ROOTNEX_PROF_INC(addr) atomic_inc_64(addr)
-#define ROOTNEX_PROF_DEC(addr) atomic_add_64(addr, -1)
/* set in dmac_type to signify that this cookie uses the copy buffer */
#define ROOTNEX_USES_COPYBUF 0x80000000
@@ -84,6 +89,12 @@ typedef struct rootnex_intprop_s {
*/
typedef struct rootnex_sglinfo_s {
/*
+ * Used to simplify calculations to get the maximum number
+ * of cookies.
+ */
+ boolean_t si_cancross;
+
+ /*
* These are passed into rootnex_get_sgl().
*
* si_min_addr - the minimum physical address
@@ -210,16 +221,6 @@ typedef struct rootnex_window_s {
#endif
} rootnex_window_t;
-typedef struct dvcookie {
- uint64_t dvck_dvma;
- uint64_t dvck_npages;
-} dvcookie_t;
-
-typedef struct dcookie {
- paddr_t dck_paddr;
- uint64_t dck_npages;
-} dcookie_t;
-
/* per dma handle private state */
typedef struct rootnex_dma_s {
/*
@@ -241,7 +242,10 @@ typedef struct rootnex_dma_s {
boolean_t dp_trim_required;
boolean_t dp_granularity_power_2;
uint64_t dp_maxxfer;
+
+ boolean_t dp_dvma_used;
ddi_dma_obj_t dp_dma;
+ ddi_dma_obj_t dp_dvma;
rootnex_sglinfo_t dp_sglinfo;
/*
@@ -315,6 +319,8 @@ typedef struct rootnex_dma_s {
ddi_dma_cookie_t *dp_saved_cookies;
boolean_t dp_need_to_switch_cookies;
+ void *dp_iommu_private;
+
/*
* pre allocated space for the bind state, allocated during alloc
* handle. For a lot of devices, this will save us from having to do
@@ -325,18 +331,6 @@ typedef struct rootnex_dma_s {
uchar_t *dp_prealloc_buffer;
/*
- * Intel IOMMU (immu) related state
- * dv_cookies saves the dvma allocated for this handler
- * max index of dvcookies in dvmax
- */
- dvcookie_t *dp_dvcookies;
- uint64_t dp_dvmax;
- dcookie_t *dp_dcookies;
- uint64_t dp_dmax;
- uint64_t dp_max_cookies;
- uint64_t dp_max_dcookies;
-
- /*
* sleep flags set on bind and unset on unbind
*/
int dp_sleep_flags;
diff --git a/usr/src/uts/i86pc/sys/smp_impldefs.h b/usr/src/uts/i86pc/sys/smp_impldefs.h
index 6afce7fd6c..77a203042c 100644
--- a/usr/src/uts/i86pc/sys/smp_impldefs.h
+++ b/usr/src/uts/i86pc/sys/smp_impldefs.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_SMP_IMPLDEFS_H
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 092cd1659b..bc8e3e197f 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -491,7 +491,7 @@ set_max_page_level()
if (!kbm_largepage_support) {
lvl = 0;
} else {
- if (x86_feature & X86_1GPG) {
+ if (is_x86_feature(x86_featureset, X86FSET_1GPG)) {
lvl = 2;
if (chk_optimal_1gtlb &&
cpuid_opteron_erratum(CPU, 6671130)) {
@@ -528,7 +528,8 @@ mmu_init(void)
* If CPU enabled the page table global bit, use it for the kernel
* This is bit 7 in CR4 (PGE - Page Global Enable).
*/
- if ((x86_feature & X86_PGE) != 0 && (getcr4() & CR4_PGE) != 0)
+ if (is_x86_feature(x86_featureset, X86FSET_PGE) &&
+ (getcr4() & CR4_PGE) != 0)
mmu.pt_global = PT_GLOBAL;
/*
@@ -576,10 +577,10 @@ mmu_init(void)
mmu.pte_size_shift = 2;
}
- if (mmu.pae_hat && (x86_feature & X86_PAE) == 0)
+ if (mmu.pae_hat && !is_x86_feature(x86_featureset, X86FSET_PAE))
panic("Processor does not support PAE");
- if ((x86_feature & X86_CX8) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_CX8))
panic("Processor does not support cmpxchg8b instruction");
#if defined(__amd64)
@@ -1095,7 +1096,7 @@ hati_mkpte(pfn_t pfn, uint_t attr, level_t level, uint_t flags)
/* nothing to set */;
} else if (cache_attr & (HAT_MERGING_OK | HAT_LOADCACHING_OK)) {
PTE_SET(pte, PT_NOCACHE);
- if (x86_feature & X86_PAT)
+ if (is_x86_feature(x86_featureset, X86FSET_PAT))
PTE_SET(pte, (level == 0) ? PT_PAT_4K : PT_PAT_LARGE);
else
PTE_SET(pte, PT_WRITETHRU);
diff --git a/usr/src/uts/i86pc/vm/htable.c b/usr/src/uts/i86pc/vm/htable.c
index 3bc7eb254d..bcb2b117a3 100644
--- a/usr/src/uts/i86pc/vm/htable.c
+++ b/usr/src/uts/i86pc/vm/htable.c
@@ -2416,7 +2416,7 @@ x86pte_zero(htable_t *dest, uint_t entry, uint_t count)
size = count << mmu.pte_size_shift;
ASSERT(size > BLOCKZEROALIGN);
#ifdef __i386
- if ((x86_feature & X86_SSE2) == 0)
+ if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
bzero(dst_va, size);
else
#endif
diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h
index 753aa9d146..dbad783383 100644
--- a/usr/src/uts/i86pc/vm/vm_dep.h
+++ b/usr/src/uts/i86pc/vm/vm_dep.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -55,7 +54,7 @@ extern "C" {
* correct tick value. The proper routine to use is tsc_read().
*/
-extern hrtime_t randtick();
+extern u_longlong_t randtick();
extern uint_t page_create_update_flags_x86(uint_t);
extern size_t plcnt_sz(size_t);