diff options
author | Richard Lowe <richlowe@richlowe.net> | 2021-06-04 15:15:12 -0500 |
---|---|---|
committer | Richard Lowe <richlowe@richlowe.net> | 2021-08-16 12:46:39 -0500 |
commit | f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988 (patch) | |
tree | c4ac2f5e703ed459d50bcee7ddb38a993d961520 /usr/src/uts/intel/ia32/os/desctbls.c | |
parent | d083fed0c91296a88878f7a468910ad5b5c888ea (diff) | |
download | illumos-joyent-f0089e391b2bc4be2755f1a1b51fb4cd9b8f3988.tar.gz |
13941 intel code and headers should not look ia32 specific
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Patrick Mooney <pmooney@pfmooney.com>
Approved by: Garret D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/intel/ia32/os/desctbls.c')
-rw-r--r-- | usr/src/uts/intel/ia32/os/desctbls.c | 1218 |
1 files changed, 0 insertions, 1218 deletions
diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c deleted file mode 100644 index 35345c3fe8..0000000000 --- a/usr/src/uts/intel/ia32/os/desctbls.c +++ /dev/null @@ -1,1218 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - */ - -/* - * Copyright 2018 Joyent, Inc. All rights reserved. - */ - -/* - * Copyright (c) 1992 Terrence R. Lambert. - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - */ - -#include <sys/types.h> -#include <sys/sysmacros.h> -#include <sys/tss.h> -#include <sys/segments.h> -#include <sys/trap.h> -#include <sys/cpuvar.h> -#include <sys/bootconf.h> -#include <sys/x86_archext.h> -#include <sys/controlregs.h> -#include <sys/archsystm.h> -#include <sys/machsystm.h> -#include <sys/kobj.h> -#include <sys/cmn_err.h> -#include <sys/reboot.h> -#include <sys/kdi.h> -#include <sys/mach_mmu.h> -#include <sys/systm.h> -#include <sys/note.h> - -#ifdef __xpv -#include <sys/hypervisor.h> -#include <vm/as.h> -#endif - -#include <sys/promif.h> -#include <sys/bootinfo.h> -#include <vm/kboot_mmu.h> -#include <vm/hat_pte.h> - -/* - * cpu0 and default tables and structures. - */ -user_desc_t *gdt0; -#if !defined(__xpv) -desctbr_t gdt0_default_r; -#endif - -gate_desc_t *idt0; /* interrupt descriptor table */ - -tss_t *ktss0; /* kernel task state structure */ - - -user_desc_t zero_udesc; /* base zero user desc native procs */ -user_desc_t null_udesc; /* null user descriptor */ -system_desc_t null_sdesc; /* null system descriptor */ - -user_desc_t zero_u32desc; /* 32-bit compatibility procs */ - -user_desc_t ucs_on; -user_desc_t ucs_off; -user_desc_t ucs32_on; -user_desc_t ucs32_off; - -/* - * If the size of this is changed, you must update hat_pcp_setup() and the - * definitions in exception.s - */ -extern char dblfault_stack0[DEFAULTSTKSZ]; -extern char nmi_stack0[DEFAULTSTKSZ]; -extern char mce_stack0[DEFAULTSTKSZ]; - -extern void fast_null(void); -extern hrtime_t get_hrtime(void); -extern hrtime_t gethrvtime(void); -extern hrtime_t get_hrestime(void); -extern uint64_t getlgrp(void); - -void (*(fasttable[]))(void) = { - fast_null, /* T_FNULL routine */ - fast_null, /* T_FGETFP routine (initially null) */ - fast_null, /* T_FSETFP routine (initially null) */ - (void (*)())(uintptr_t)get_hrtime, /* T_GETHRTIME */ - (void (*)())(uintptr_t)gethrvtime, /* T_GETHRVTIME */ - (void (*)())(uintptr_t)get_hrestime, /* T_GETHRESTIME */ - (void (*)())(uintptr_t)getlgrp /* T_GETLGRP */ -}; - -/* - * Structure containing pre-computed descriptors to allow us to temporarily - * interpose on a standard handler. - */ -struct interposing_handler { - int ih_inum; - gate_desc_t ih_interp_desc; - gate_desc_t ih_default_desc; -}; - -/* - * The brand infrastructure interposes on two handlers, and we use one as a - * NULL signpost. - */ -static struct interposing_handler brand_tbl[2]; - -/* - * software prototypes for default local descriptor table - */ - -/* - * Routines for loading segment descriptors in format the hardware - * can understand. - */ - -/* - * In long mode we have the new L or long mode attribute bit - * for code segments. Only the conforming bit in type is used along - * with descriptor priority and present bits. Default operand size must - * be zero when in long mode. In 32-bit compatibility mode all fields - * are treated as in legacy mode. For data segments while in long mode - * only the present bit is loaded. - */ -void -set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, - uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) -{ - ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); - /* This should never be a "system" segment. */ - ASSERT3U(type & SDT_S, !=, 0); - - /* - * 64-bit long mode. - */ - if (lmode == SDP_LONG) - dp->usd_def32 = 0; /* 32-bit operands only */ - else - /* - * 32-bit compatibility mode. - */ - dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ - - /* - * We should always set the "accessed" bit (SDT_A), otherwise the CPU - * will write to the GDT whenever we change segment registers around. - * With KPTI on, the GDT is read-only in the user page table, which - * causes crashes if we don't set this. - */ - ASSERT3U(type & SDT_A, !=, 0); - - dp->usd_long = lmode; /* 64-bit mode */ - dp->usd_type = type; - dp->usd_dpl = dpl; - dp->usd_p = 1; - dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ - - dp->usd_lobase = (uintptr_t)base; - dp->usd_midbase = (uintptr_t)base >> 16; - dp->usd_hibase = (uintptr_t)base >> (16 + 8); - dp->usd_lolimit = size; - dp->usd_hilimit = (uintptr_t)size >> 16; -} - -/* - * Install system segment descriptor for LDT and TSS segments. - */ - -void -set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, - uint_t dpl) -{ - dp->ssd_lolimit = size; - dp->ssd_hilimit = (uintptr_t)size >> 16; - - dp->ssd_lobase = (uintptr_t)base; - dp->ssd_midbase = (uintptr_t)base >> 16; - dp->ssd_hibase = (uintptr_t)base >> (16 + 8); - dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); - - dp->ssd_type = type; - dp->ssd_zero1 = 0; /* must be zero */ - dp->ssd_zero2 = 0; - dp->ssd_dpl = dpl; - dp->ssd_p = 1; - dp->ssd_gran = 0; /* force byte units */ -} - -void * -get_ssd_base(system_desc_t *dp) -{ - uintptr_t base; - - base = (uintptr_t)dp->ssd_lobase | - (uintptr_t)dp->ssd_midbase << 16 | - (uintptr_t)dp->ssd_hibase << (16 + 8) | - (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); - return ((void *)base); -} - -/* - * Install gate segment descriptor for interrupt, trap, call and task gates. - * - * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on - * all interrupts. We have different ISTs for each class of exceptions that are - * most likely to occur while handling an existing exception; while many of - * these are just going to panic, it's nice not to trample on the existing - * exception state for debugging purposes. - * - * Normal interrupts are all redirected unconditionally to the KPTI trampoline - * stack space. This unifies the trampoline handling between user and kernel - * space (and avoids the need to touch %gs). - * - * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when - * we do a read from KMDB that cause another #PF. Without its own IST, this - * would stomp on the kernel's mcpu_kpti_flt frame. - */ -uint_t -idt_vector_to_ist(uint_t vector) -{ -#if defined(__xpv) - _NOTE(ARGUNUSED(vector)); - return (IST_NONE); -#else - switch (vector) { - /* These should always use IST even without KPTI enabled. */ - case T_DBLFLT: - return (IST_DF); - case T_NMIFLT: - return (IST_NMI); - case T_MCE: - return (IST_MCE); - - case T_BPTFLT: - case T_SGLSTP: - if (kpti_enable == 1) { - return (IST_DBG); - } - return (IST_NONE); - case T_STKFLT: - case T_GPFLT: - case T_PGFLT: - if (kpti_enable == 1) { - return (IST_NESTABLE); - } - return (IST_NONE); - default: - if (kpti_enable == 1) { - return (IST_DEFAULT); - } - return (IST_NONE); - } -#endif -} - -void -set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, - uint_t type, uint_t dpl, uint_t ist) -{ - dp->sgd_looffset = (uintptr_t)func; - dp->sgd_hioffset = (uintptr_t)func >> 16; - dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); - dp->sgd_selector = (uint16_t)sel; - dp->sgd_ist = ist; - dp->sgd_type = type; - dp->sgd_dpl = dpl; - dp->sgd_p = 1; -} - -/* - * Updates a single user descriptor in the the GDT of the current cpu. - * Caller is responsible for preventing cpu migration. - */ - -void -gdt_update_usegd(uint_t sidx, user_desc_t *udp) -{ -#if defined(DEBUG) - /* This should never be a "system" segment, but it might be null. */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_S, !=, 0); - } - /* - * We should always set the "accessed" bit (SDT_A), otherwise the CPU - * will write to the GDT whenever we change segment registers around. - * With KPTI on, the GDT is read-only in the user page table, which - * causes crashes if we don't set this. - */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_A, !=, 0); - } -#endif - -#if defined(__xpv) - uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; - - if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) - panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); - -#else /* __xpv */ - CPU->cpu_gdt[sidx] = *udp; -#endif /* __xpv */ -} - -/* - * Writes single descriptor pointed to by udp into a processes - * LDT entry pointed to by ldp. - */ -int -ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) -{ -#if defined(DEBUG) - /* This should never be a "system" segment, but it might be null. */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_S, !=, 0); - } - /* - * We should always set the "accessed" bit (SDT_A), otherwise the CPU - * will write to the LDT whenever we change segment registers around. - * With KPTI on, the LDT is read-only in the user page table, which - * causes crashes if we don't set this. - */ - if (udp->usd_p != 0 || udp->usd_type != 0) { - ASSERT3U(udp->usd_type & SDT_A, !=, 0); - } -#endif - -#if defined(__xpv) - uint64_t dpa; - - dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | - ((uintptr_t)ldp & PAGEOFFSET); - - /* - * The hypervisor is a little more restrictive about what it - * supports in the LDT. - */ - if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) - return (EINVAL); - -#else /* __xpv */ - *ldp = *udp; - -#endif /* __xpv */ - return (0); -} - -#if defined(__xpv) - -/* - * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. - * Returns true if a valid entry was written. - */ -int -xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) -{ - trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ - - /* - * skip holes in the IDT - */ - if (GATESEG_GETOFFSET(sgd) == 0) - return (0); - - ASSERT(sgd->sgd_type == SDT_SYSIGT); - ti->vector = vec; - TI_SET_DPL(ti, sgd->sgd_dpl); - - /* - * Is this an interrupt gate? - */ - if (sgd->sgd_type == SDT_SYSIGT) { - /* LINTED */ - TI_SET_IF(ti, 1); - } - ti->cs = sgd->sgd_selector; - ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ - ti->address = GATESEG_GETOFFSET(sgd); - return (1); -} - -/* - * Convert a single hw format gate descriptor and write it into our virtual IDT. - */ -void -xen_idt_write(gate_desc_t *sgd, uint_t vec) -{ - trap_info_t trapinfo[2]; - - bzero(trapinfo, sizeof (trapinfo)); - if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) - return; - if (xen_set_trap_table(trapinfo) != 0) - panic("xen_idt_write: xen_set_trap_table() failed"); -} - -#endif /* __xpv */ - - -/* - * Build kernel GDT. - */ - -static void -init_gdt_common(user_desc_t *gdt) -{ - int i; - - /* - * 64-bit kernel code segment. - */ - set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, - SDP_PAGES, SDP_OP32); - - /* - * 64-bit kernel data segment. The limit attribute is ignored in 64-bit - * mode, but we set it here to 0xFFFF so that we can use the SYSRET - * instruction to return from system calls back to 32-bit applications. - * SYSRET doesn't update the base, limit, or attributes of %ss or %ds - * descriptors. We therefore must ensure that the kernel uses something, - * though it will be ignored by hardware, that is compatible with 32-bit - * apps. For the same reason we must set the default op size of this - * descriptor to 32-bit operands. - */ - set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, - SEL_KPL, SDP_PAGES, SDP_OP32); - gdt[GDT_KDATA].usd_def32 = 1; - - /* - * 64-bit user code segment. - */ - set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, - SDP_PAGES, SDP_OP32); - - /* - * 32-bit user code segment. - */ - set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, - SEL_UPL, SDP_PAGES, SDP_OP32); - - /* - * See gdt_ucode32() and gdt_ucode_native(). - */ - ucs_on = ucs_off = gdt[GDT_UCODE]; - ucs_off.usd_p = 0; /* forces #np fault */ - - ucs32_on = ucs32_off = gdt[GDT_U32CODE]; - ucs32_off.usd_p = 0; /* forces #np fault */ - - /* - * 32 and 64 bit data segments can actually share the same descriptor. - * In long mode only the present bit is checked but all other fields - * are loaded. But in compatibility mode all fields are interpreted - * as in legacy mode so they must be set correctly for a 32-bit data - * segment. - */ - set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, - SDP_PAGES, SDP_OP32); - -#if !defined(__xpv) - - /* - * The 64-bit kernel has no default LDT. By default, the LDT descriptor - * in the GDT is 0. - */ - - /* - * Kernel TSS - */ - set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, - sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); - -#endif /* !__xpv */ - - /* - * Initialize fs and gs descriptors for 32 bit processes. - * Only attributes and limits are initialized, the effective - * base address is programmed via fsbase/gsbase. - */ - set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, - SEL_UPL, SDP_PAGES, SDP_OP32); - set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, - SEL_UPL, SDP_PAGES, SDP_OP32); - - /* - * Initialize the descriptors set aside for brand usage. - * Only attributes and limits are initialized. - */ - for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) - set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, - SEL_UPL, SDP_PAGES, SDP_OP32); - - /* - * Initialize convenient zero base user descriptors for clearing - * lwp private %fs and %gs descriptors in GDT. See setregs() for - * an example. - */ - set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, - SDP_BYTES, SDP_OP32); - set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, - SDP_PAGES, SDP_OP32); -} - -#if defined(__xpv) - -static user_desc_t * -init_gdt(void) -{ - uint64_t gdtpa; - ulong_t ma[1]; /* XXPV should be a memory_t */ - ulong_t addr; - -#if !defined(__lint) - /* - * Our gdt is never larger than a single page. - */ - ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); -#endif - gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, - PAGESIZE, PAGESIZE); - bzero(gdt0, PAGESIZE); - - init_gdt_common(gdt0); - - /* - * XXX Since we never invoke kmdb until after the kernel takes - * over the descriptor tables why not have it use the kernel's - * selectors? - */ - if (boothowto & RB_DEBUG) { - set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, - SEL_KPL, SDP_PAGES, SDP_OP32); - set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, - SEL_KPL, SDP_PAGES, SDP_OP32); - } - - /* - * Clear write permission for page containing the gdt and install it. - */ - gdtpa = pfn_to_pa(va_to_pfn(gdt0)); - ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); - kbm_read_only((uintptr_t)gdt0, gdtpa); - xen_set_gdt(ma, NGDT); - - /* - * Reload the segment registers to use the new GDT. - * On 64-bit, fixup KCS_SEL to be in ring 3. - * See KCS_SEL in segments.h. - */ - load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); - - /* - * setup %gs for kernel - */ - xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); - - /* - * XX64 We should never dereference off "other gsbase" or - * "fsbase". So, we should arrange to point FSBASE and - * KGSBASE somewhere truly awful e.g. point it at the last - * valid address below the hole so that any attempts to index - * off them cause an exception. - * - * For now, point it at 8G -- at least it should be unmapped - * until some 64-bit processes run. - */ - addr = 0x200000000ul; - xen_set_segment_base(SEGBASE_FS, addr); - xen_set_segment_base(SEGBASE_GS_USER, addr); - xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); - - return (gdt0); -} - -#else /* __xpv */ - -static user_desc_t * -init_gdt(void) -{ - desctbr_t r_bgdt, r_gdt; - user_desc_t *bgdt; - -#if !defined(__lint) - /* - * Our gdt is never larger than a single page. - */ - ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); -#endif - gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, - PAGESIZE, PAGESIZE); - bzero(gdt0, PAGESIZE); - - init_gdt_common(gdt0); - - /* - * Copy in from boot's gdt to our gdt. - * Entry 0 is the null descriptor by definition. - */ - rd_gdtr(&r_bgdt); - bgdt = (user_desc_t *)r_bgdt.dtr_base; - if (bgdt == NULL) - panic("null boot gdt"); - - gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; - gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; - gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; - gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; - gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; - - /* - * Install our new GDT - */ - r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; - r_gdt.dtr_base = (uintptr_t)gdt0; - wr_gdtr(&r_gdt); - - /* - * Reload the segment registers to use the new GDT - */ - load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); - - /* - * setup %gs for kernel - */ - wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); - - /* - * XX64 We should never dereference off "other gsbase" or - * "fsbase". So, we should arrange to point FSBASE and - * KGSBASE somewhere truly awful e.g. point it at the last - * valid address below the hole so that any attempts to index - * off them cause an exception. - * - * For now, point it at 8G -- at least it should be unmapped - * until some 64-bit processes run. - */ - wrmsr(MSR_AMD_FSBASE, 0x200000000ul); - wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); - return (gdt0); -} - -#endif /* __xpv */ - - -/* - * Build kernel IDT. - * - * Note that for amd64 we pretty much require every gate to be an interrupt - * gate which blocks interrupts atomically on entry; that's because of our - * dependency on using 'swapgs' every time we come into the kernel to find - * the cpu structure. If we get interrupted just before doing that, %cs could - * be in kernel mode (so that the trap prolog doesn't do a swapgs), but - * %gsbase is really still pointing at something in userland. Bad things will - * ensue. We also use interrupt gates for i386 as well even though this is not - * required for some traps. - * - * Perhaps they should have invented a trap gate that does an atomic swapgs? - */ -static void -init_idt_common(gate_desc_t *idt) -{ - set_gatesegd(&idt[T_ZERODIV], - (kpti_enable == 1) ? &tr_div0trap : &div0trap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV)); - set_gatesegd(&idt[T_SGLSTP], - (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP)); - set_gatesegd(&idt[T_NMIFLT], - (kpti_enable == 1) ? &tr_nmiint : &nmiint, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT)); - set_gatesegd(&idt[T_BPTFLT], - (kpti_enable == 1) ? &tr_brktrap : &brktrap, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT)); - set_gatesegd(&idt[T_OVFLW], - (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW)); - set_gatesegd(&idt[T_BOUNDFLT], - (kpti_enable == 1) ? &tr_boundstrap : &boundstrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT)); - set_gatesegd(&idt[T_ILLINST], - (kpti_enable == 1) ? &tr_invoptrap : &invoptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST)); - set_gatesegd(&idt[T_NOEXTFLT], - (kpti_enable == 1) ? &tr_ndptrap : &ndptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT)); - - /* - * double fault handler. - * - * Note that on the hypervisor a guest does not receive #df faults. - * Instead a failsafe event is injected into the guest if its selectors - * and/or stack is in a broken state. See xen_failsafe_callback. - */ -#if !defined(__xpv) - set_gatesegd(&idt[T_DBLFLT], - (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT)); -#endif /* !__xpv */ - - /* - * T_EXTOVRFLT coprocessor-segment-overrun not supported. - */ - set_gatesegd(&idt[T_TSSFLT], - (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT)); - set_gatesegd(&idt[T_SEGFLT], - (kpti_enable == 1) ? &tr_segnptrap : &segnptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT)); - set_gatesegd(&idt[T_STKFLT], - (kpti_enable == 1) ? &tr_stktrap : &stktrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT)); - set_gatesegd(&idt[T_GPFLT], - (kpti_enable == 1) ? &tr_gptrap : &gptrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT)); - set_gatesegd(&idt[T_PGFLT], - (kpti_enable == 1) ? &tr_pftrap : &pftrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT)); - set_gatesegd(&idt[T_EXTERRFLT], - (kpti_enable == 1) ? &tr_ndperr : &ndperr, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT)); - set_gatesegd(&idt[T_ALIGNMENT], - (kpti_enable == 1) ? &tr_achktrap : &achktrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT)); - set_gatesegd(&idt[T_MCE], - (kpti_enable == 1) ? &tr_mcetrap : &mcetrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE)); - set_gatesegd(&idt[T_SIMDFPE], - (kpti_enable == 1) ? &tr_xmtrap : &xmtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE)); - - /* - * install fast trap handler at 210. - */ - set_gatesegd(&idt[T_FASTTRAP], - (kpti_enable == 1) ? &tr_fasttrap : &fasttrap, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP)); - - /* - * System call handler. - */ - set_gatesegd(&idt[T_SYSCALLINT], - (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT)); - - /* - * Install the DTrace interrupt handler for the pid provider. - */ - set_gatesegd(&idt[T_DTRACE_RET], - (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret, - KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET)); - - /* - * Prepare interposing descriptor for the syscall handler - * and cache copy of the default descriptor. - */ - brand_tbl[0].ih_inum = T_SYSCALLINT; - brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; - - set_gatesegd(&(brand_tbl[0].ih_interp_desc), - (kpti_enable == 1) ? &tr_brand_sys_syscall_int : - &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL, - idt_vector_to_ist(T_SYSCALLINT)); - - brand_tbl[1].ih_inum = 0; -} - -#if defined(__xpv) - -static void -init_idt(gate_desc_t *idt) -{ - init_idt_common(idt); -} - -#else /* __xpv */ - -static void -init_idt(gate_desc_t *idt) -{ - char ivctname[80]; - void (*ivctptr)(void); - int i; - - /* - * Initialize entire table with 'reserved' trap and then overwrite - * specific entries. T_EXTOVRFLT (9) is unsupported and reserved - * since it can only be generated on a 386 processor. 15 is also - * unsupported and reserved. - */ -#if !defined(__xpv) - for (i = 0; i < NIDT; i++) { - set_gatesegd(&idt[i], - (kpti_enable == 1) ? &tr_resvtrap : &resvtrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, - idt_vector_to_ist(T_RESVTRAP)); - } -#else - for (i = 0; i < NIDT; i++) { - set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, - IST_NONE); - } -#endif - - /* - * 20-31 reserved - */ -#if !defined(__xpv) - for (i = 20; i < 32; i++) { - set_gatesegd(&idt[i], - (kpti_enable == 1) ? &tr_invaltrap : &invaltrap, - KCS_SEL, SDT_SYSIGT, TRP_KPL, - idt_vector_to_ist(T_INVALTRAP)); - } -#else - for (i = 20; i < 32; i++) { - set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, - IST_NONE); - } -#endif - - /* - * interrupts 32 - 255 - */ - for (i = 32; i < 256; i++) { -#if !defined(__xpv) - (void) snprintf(ivctname, sizeof (ivctname), - (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i); -#else - (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); -#endif - ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); - if (ivctptr == NULL) - panic("kobj_getsymvalue(%s) failed", ivctname); - - set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, - idt_vector_to_ist(i)); - } - - /* - * Now install the common ones. Note that it will overlay some - * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. - */ - init_idt_common(idt); -} - -#endif /* __xpv */ - -/* - * The kernel does not deal with LDTs unless a user explicitly creates - * one. Under normal circumstances, the LDTR contains 0. Any process attempting - * to reference the LDT will therefore cause a #gp. System calls made via the - * obsolete lcall mechanism are emulated by the #gp fault handler. - */ -static void -init_ldt(void) -{ -#if defined(__xpv) - xen_set_ldt(NULL, 0); -#else - wr_ldtr(0); -#endif -} - -#if !defined(__xpv) - -static void -init_tss(void) -{ - extern struct cpu cpus[]; - - /* - * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each - * context switch but it'll be overwritten with this same value anyway. - */ - if (kpti_enable == 1) { - ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; - } - - /* Set up the IST stacks for double fault, NMI, MCE. */ - ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)]; - ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)]; - ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)]; - - /* - * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is - * enabled), and also for KDI (always). - */ - ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp; - - if (kpti_enable == 1) { - /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */ - ktss0->tss_ist5 = - (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp; - - /* This IST stack is used for all other intrs (for KPTI). */ - ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; - } - - /* - * Set I/O bit map offset equal to size of TSS segment limit - * for no I/O permission map. This will force all user I/O - * instructions to generate #gp fault. - */ - ktss0->tss_bitmapbase = sizeof (*ktss0); - - /* - * Point %tr to descriptor for ktss0 in gdt. - */ - wr_tsr(KTSS_SEL); -} - -#endif /* !__xpv */ - -#if defined(__xpv) - -void -init_desctbls(void) -{ - uint_t vec; - user_desc_t *gdt; - - /* - * Setup and install our GDT. - */ - gdt = init_gdt(); - - /* - * Store static pa of gdt to speed up pa_to_ma() translations - * on lwp context switches. - */ - ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); - CPU->cpu_gdt = gdt; - CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); - - /* - * Setup and install our IDT. - */ -#if !defined(__lint) - ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); -#endif - idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, - PAGESIZE, PAGESIZE); - bzero(idt0, PAGESIZE); - init_idt(idt0); - for (vec = 0; vec < NIDT; vec++) - xen_idt_write(&idt0[vec], vec); - - CPU->cpu_idt = idt0; - - /* - * set default kernel stack - */ - xen_stack_switch(KDS_SEL, - (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); - - xen_init_callbacks(); - - init_ldt(); -} - -#else /* __xpv */ - -void -init_desctbls(void) -{ - user_desc_t *gdt; - desctbr_t idtr; - - /* - * Allocate IDT and TSS structures on unique pages for better - * performance in virtual machines. - */ -#if !defined(__lint) - ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); -#endif - idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, - PAGESIZE, PAGESIZE); - bzero(idt0, PAGESIZE); -#if !defined(__lint) - ASSERT(sizeof (*ktss0) <= PAGESIZE); -#endif - ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, - PAGESIZE, PAGESIZE); - bzero(ktss0, PAGESIZE); - - - /* - * Setup and install our GDT. - */ - gdt = init_gdt(); - ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); - CPU->cpu_gdt = gdt; - - /* - * Initialize this CPU's LDT. - */ - CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA, - LDT_CPU_SIZE, PAGESIZE); - bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE); - CPU->cpu_m.mcpu_ldt_len = 0; - - /* - * Setup and install our IDT. - */ - init_idt(idt0); - - idtr.dtr_base = (uintptr_t)idt0; - idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; - wr_idtr(&idtr); - CPU->cpu_idt = idt0; - - - init_tss(); - CPU->cpu_tss = ktss0; - init_ldt(); - - /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */ - kpti_safe_cr3 = (uint64_t)getcr3(); -} - -#endif /* __xpv */ - -#ifndef __xpv -/* - * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so - * we have to manually fix it up ourselves. - * - * The caller may still need to make sure that it can't go off-CPU with the - * incorrect limit, before calling this (such as disabling pre-emption). - */ -void -reset_gdtr_limit(void) -{ - ulong_t flags = intr_clear(); - desctbr_t gdtr; - - rd_gdtr(&gdtr); - gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1; - wr_gdtr(&gdtr); - - intr_restore(flags); -} -#endif /* __xpv */ - -/* - * In the early kernel, we need to set up a simple GDT to run on. - * - * XXPV Can dboot use this too? See dboot_gdt.s - */ -void -init_boot_gdt(user_desc_t *bgdt) -{ - set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, - SDP_PAGES, SDP_OP32); - set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, - SDP_PAGES, SDP_OP32); -} - -/* - * Enable interpositioning on the system call path by rewriting the - * sys{call|enter} MSRs and the syscall-related entries in the IDT to use - * the branded entry points. - */ -void -brand_interpositioning_enable(void) -{ - gate_desc_t *idt = CPU->cpu_idt; - int i; - - ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); - - for (i = 0; brand_tbl[i].ih_inum; i++) { - idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; -#if defined(__xpv) - xen_idt_write(&idt[brand_tbl[i].ih_inum], - brand_tbl[i].ih_inum); -#endif - } - -#if defined(__xpv) - - /* - * Currently the hypervisor only supports 64-bit syscalls via - * syscall instruction. The 32-bit syscalls are handled by - * interrupt gate above. - */ - xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, - CALLBACKF_mask_events); - -#else - - if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { - if (kpti_enable == 1) { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32); - } else { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); - } - } - -#endif - - if (is_x86_feature(x86_featureset, X86FSET_SEP)) { - if (kpti_enable == 1) { - wrmsr(MSR_INTC_SEP_EIP, - (uintptr_t)tr_brand_sys_sysenter); - } else { - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); - } - } -} - -/* - * Disable interpositioning on the system call path by rewriting the - * sys{call|enter} MSRs and the syscall-related entries in the IDT to use - * the standard entry points, which bypass the interpositioning hooks. - */ -void -brand_interpositioning_disable(void) -{ - gate_desc_t *idt = CPU->cpu_idt; - int i; - - ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); - - for (i = 0; brand_tbl[i].ih_inum; i++) { - idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; -#if defined(__xpv) - xen_idt_write(&idt[brand_tbl[i].ih_inum], - brand_tbl[i].ih_inum); -#endif - } - -#if defined(__xpv) - - /* - * See comment above in brand_interpositioning_enable. - */ - xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, - CALLBACKF_mask_events); - -#else - - if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { - if (kpti_enable == 1) { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32); - } else { - wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); - wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); - } - } - -#endif - - if (is_x86_feature(x86_featureset, X86FSET_SEP)) { - if (kpti_enable == 1) { - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter); - } else { - wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); - } - } -} |