diff options
Diffstat (limited to 'usr/src/uts/intel')
105 files changed, 11480 insertions, 610 deletions
diff --git a/usr/src/uts/intel/Makefile b/usr/src/uts/intel/Makefile index 7fbdb31cb8..9a1f819983 100644 --- a/usr/src/uts/intel/Makefile +++ b/usr/src/uts/intel/Makefile @@ -71,7 +71,7 @@ install_h.prereq := TARGET= install_h .PARALLEL: $(PARALLEL_KMODS) $(XMODS) config $(LINT_DEPS) -def all install clean clobber modlist: $(KMODS) $(XMODS) config +def all install clean clobber modlist: genassym $(KMODS) $(XMODS) config clobber: clobber.targ @@ -113,7 +113,7 @@ CLOBBERFILES += $(PRIVS_C) # intel/dtrace depends on i86pc/genassym, so we need to build both # i86pc/genassym and intel/genassym. # -all.prereq install.prereq def.prereq: genunix FRC +all.prereq install.prereq def.prereq: genassym genunix FRC @cd ../i86pc/genassym; pwd; $(MAKE) $(@:%.prereq=%) # @@ -131,7 +131,7 @@ genunix: $(PRIVS_C) modlintlib clean.lint: $(LINT_KMODS) $(XMODS) -$(KMODS) $(SUBDIRS) config: FRC +genassym $(KMODS) $(SUBDIRS) config: FRC @cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET) $(XMODS): FRC diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index 5b0396e1b2..b2ad69e8c1 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -21,7 +21,7 @@ # # Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2013, Joyent, Inc. All rights reserved. +# Copyright 2019, Joyent, Inc. # Copyright 2018 Nexenta Systems, Inc. # @@ -102,6 +102,30 @@ GENUNIX_OBJS += \ CORE_OBJS += \ prmachdep.o +LX_CGROUP_OBJS += \ + cgrps_node.o \ + cgrps_vfsops.o \ + cgrps_vnops.o + +LX_DEVFS_OBJS += \ + lxd_attrdb.o \ + lxd_node.o \ + lxd_vfsops.o \ + lxd_vnops.o + +LX_PROC_OBJS += \ + lx_prsubr.o \ + lx_prvfsops.o \ + lx_prvnops.o + +LX_SYS_OBJS += \ + lx_syssubr.o \ + lx_sysvfsops.o \ + lx_sysvnops.o + +LX_AUTOFS_OBJS += \ + lx_autofs.o + # # ZFS file system module # @@ -272,6 +296,74 @@ IOMMULIB_OBJS = iommulib.o # SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o S10_BRAND_OBJS = s10_brand.o s10_brand_asm.o +LX_BRAND_OBJS = \ + lx_access.o \ + lx_acct.o \ + lx_acl.o \ + lx_aio.o \ + lx_archdep.o \ + lx_audit.o \ + lx_auxv.o \ + lx_brand.o \ + lx_brk.o \ + lx_chmod.o \ + lx_chown.o \ + lx_clone.o \ + lx_close.o \ + lx_cpu.o \ + lx_dup.o \ + lx_errno.o \ + lx_epoll.o \ + lx_eventfd.o \ + lx_fadvise.o \ + lx_fallocate.o \ + lx_fcntl.o \ + lx_futex.o \ + lx_getcwd.o \ + lx_getdents.o \ + lx_getpid.o \ + lx_getrandom.o \ + lx_id.o \ + lx_ioctl.o \ + lx_ioprio.o \ + lx_kill.o \ + lx_link.o \ + lx_lseek.o \ + lx_mem.o \ + lx_misc.o \ + lx_miscsys.o \ + lx_mkdir.o \ + lx_modify_ldt.o \ + lx_mount.o \ + lx_lockd.o \ + lx_open.o \ + lx_personality.o \ + lx_pgrp.o \ + lx_pid.o \ + lx_pipe.o \ + lx_poll.o \ + lx_prctl.o \ + lx_priority.o \ + lx_ptrace.o \ + lx_rename.o \ + lx_rlimit.o \ + lx_rw.o \ + lx_sched.o \ + lx_signal.o \ + lx_signum.o \ + lx_socket.o \ + lx_splice.o \ + lx_stat.o \ + lx_sync.o \ + lx_syscall.o \ + lx_sysinfo.o \ + lx_thread_area.o \ + lx_time.o \ + lx_timer.o \ + lx_umask.o \ + lx_uname.o \ + lx_wait.o \ + lx_xattr.o # # special files @@ -331,3 +423,13 @@ VMXNET3S_OBJS = vmxnet3_main.o \ # VMware PVSCSI SCSI Controller # PVSCSI_OBJS = pvscsi.o + +# +# Intel Temperature Module +# +CORETEMP_OBJS = coretemp.o + +# +# AMD Family 17 northbridge driver +# +AMDF17NBDF_OBJS = amdf17nbdf.o diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 83da92e201..e23797aeac 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -21,7 +21,7 @@ # # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Andrew Stormont. All rights reserved. -# Copyright 2016 Joyent, Inc. +# Copyright 2019, Joyent, Inc. # Copyright 2016 Garrett D'Amore <garrett@damore.org> # Copyright 2018 Nexenta Systems, Inc. # @@ -46,6 +46,7 @@ PLATFORM = i86pc # UNIX_DIR = $(UTSBASE)/i86pc/unix GENLIB_DIR = $(UTSBASE)/intel/genunix +GENASSYM_DIR = $(UTSBASE)/intel/genassym IPDRV_DIR = $(UTSBASE)/intel/ip MODSTUBS_DIR = $(UNIX_DIR) DSF_DIR = $(UTSBASE)/$(PLATFORM)/genassym @@ -139,6 +140,7 @@ ASFLAGS_XARCH_64 = $(amd64_ASFLAGS) ASFLAGS_XARCH = $(ASFLAGS_XARCH_$(CLASS)) ASFLAGS += $(ASFLAGS_XARCH) +AS_INC_PATH += -I$(GENASSYM_DIR)/$(OBJS_DIR) # # Define the base directory for installation. @@ -212,7 +214,7 @@ DRV_KMODS += audiopci DRV_KMODS += audiosolo DRV_KMODS += audiots DRV_KMODS += audiovia823x -DRV_KMODS += bl +DRV_KMODS += bl DRV_KMODS += blkdev DRV_KMODS += bge DRV_KMODS += bofi @@ -239,6 +241,7 @@ DRV_KMODS += devinfo DRV_KMODS += dld DRV_KMODS += dlpistub DRV_KMODS += dnet +DRV_KMODS += dr_sas DRV_KMODS += dump DRV_KMODS += ecpp DRV_KMODS += emlxs @@ -252,6 +255,7 @@ DRV_KMODS += i8042 DRV_KMODS += i915 DRV_KMODS += icmp DRV_KMODS += icmp6 +DRV_KMODS += inotify DRV_KMODS += intel_nb5000 DRV_KMODS += intel_nhm DRV_KMODS += ip @@ -287,6 +291,7 @@ DRV_KMODS += mpt_sas DRV_KMODS += mr_sas DRV_KMODS += mwl DRV_KMODS += nca +DRV_KMODS += nfp DRV_KMODS += nsmb DRV_KMODS += nulldriver DRV_KMODS += nv_sata @@ -355,6 +360,8 @@ DRV_KMODS += ural DRV_KMODS += uath DRV_KMODS += urtw DRV_KMODS += vgatext +DRV_KMODS += vmxnet +DRV_KMODS += vnd DRV_KMODS += vnic DRV_KMODS += vscan DRV_KMODS += wc @@ -363,8 +370,10 @@ DRV_KMODS += wpi DRV_KMODS += xge DRV_KMODS += yge DRV_KMODS += zcons +DRV_KMODS += zfd DRV_KMODS += zyd DRV_KMODS += simnet +DRV_KMODS += smrt DRV_KMODS += stmf DRV_KMODS += stmf_sbd DRV_KMODS += fct @@ -497,9 +506,9 @@ DRV_KMODS += xhci # DRV_KMODS += usbgem DRV_KMODS += axf -DRV_KMODS += udmf +DRV_KMODS += udmf DRV_KMODS += upf -DRV_KMODS += urf +DRV_KMODS += urf # # 1394 modules @@ -513,12 +522,13 @@ DRV_KMODS += dcam1394 # InfiniBand pseudo drivers # DRV_KMODS += ib ibp eibnx eoib rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs -DRV_KMODS += sol_umad +DRV_KMODS += sol_umad # # Brand modules # -BRAND_KMODS += sn1_brand s10_brand +BRAND_KMODS += sn1_brand s10_brand lx_brand +DRV_KMODS += lx_systrace lx_ptm lx_netlink # # Exec Class Modules (/kernel/exec): @@ -533,10 +543,10 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL SDC # # File System Modules (/kernel/fs): # -FS_KMODS += autofs ctfs dcfs dev devfs fdfs fifofs hsfs lofs -FS_KMODS += mntfs namefs nfs objfs zfs zut -FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs -FS_KMODS += smbfs bootfs +FS_KMODS += autofs ctfs dcfs dev devfs fdfs fifofs hsfs hyprlofs +FS_KMODS += lofs lxautofs lx_proc lxprocfs mntfs namefs nfs objfs zfs zut +FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs lx_sysfs +FS_KMODS += smbfs bootfs lx_cgroup lx_devfs # # Streams Modules (/kernel/strmod): @@ -596,6 +606,7 @@ MISC_KMODS += drm MISC_KMODS += fssnap_if MISC_KMODS += gda MISC_KMODS += gld +MISC_KMODS += gsqueue MISC_KMODS += hidparser MISC_KMODS += hook MISC_KMODS += hpcsvc @@ -711,6 +722,12 @@ MAC_KMODS += mac_wifi MAC_KMODS += mac_ib # +# Overlay related modules (/kernel/overlay) +# +DRV_KMODS += overlay +OVERLAY_KMODS += vxlan + +# # socketmod (kernel/socketmod) # SOCKET_KMODS += sockpfp @@ -718,6 +735,7 @@ SOCKET_KMODS += socksctp SOCKET_KMODS += socksdp SOCKET_KMODS += sockrds SOCKET_KMODS += ksslf +SOCKET_KMODS += datafilt # # kiconv modules (/kernel/kiconv): @@ -735,3 +753,9 @@ DACF_KMODS += net_dacf # global cross check. # LINTFLAGS += -D_MACHDEP -I$(UTSBASE)/i86pc + +# +# Sensor related drivers +# +DRV_KMODS += amdf17nbdf +DRV_KMODS += coretemp diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules index 5829d88c32..723cd2fd84 100644 --- a/usr/src/uts/intel/Makefile.rules +++ b/usr/src/uts/intel/Makefile.rules @@ -21,7 +21,7 @@ # # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# Copyright 2019 Joyent, Inc. All rights reserved. +# Copyright 2019, Joyent, Inc. # Copyright 2017 Nexenta Systems, Inc. # @@ -154,10 +154,18 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/amd8111s/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/amdf17nbdf/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/amr/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/coretemp/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/drm/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -174,6 +182,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/intel_nb5000/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/vmxnet/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/intel_nhm/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -451,6 +463,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/scsi/targets/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vgatext/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vmxnet/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vmxnet3s/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/amd64/krtld/kobj_reloc.c b/usr/src/uts/intel/amd64/krtld/kobj_reloc.c index 56c8087baa..9d34ec1310 100644 --- a/usr/src/uts/intel/amd64/krtld/kobj_reloc.c +++ b/usr/src/uts/intel/amd64/krtld/kobj_reloc.c @@ -23,6 +23,9 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright (c) 2017 Joyent, Inc. + */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -119,6 +122,57 @@ sdt_reloc_resolve(struct module *mp, char *symname, uint8_t *instr) return (0); } + +/* + * We're relying on the fact that the call we're replacing is + * call (e8) plus 4 bytes of address, making a 5 byte instruction + */ +#define NOP_INSTR 0x90 +#define SMAP_NOPS 5 + +/* + * Currently the only call replaced as a hot inline + * is smap_enable() and smap_disable(). If more are needed + * we should probably come up with an sdt probe like prefix + * and look for those instead of exact call names. + */ +static int +smap_reloc_resolve(struct module *mp, char *symname, uint8_t *instr) +{ + uint_t symlen; + hotinline_desc_t *hid; + + if (strcmp(symname, "smap_enable") == 0 || + strcmp(symname, "smap_disable") == 0) { + +#ifdef KOBJ_DEBUG + if (kobj_debug & D_RELOCATIONS) { + _kobj_printf(ops, "smap_reloc_resolve: %s relocating " + "enable/disable_smap\n", mp->filename); + } +#endif + + hid = kobj_alloc(sizeof (hotinline_desc_t), KM_WAIT); + symlen = strlen(symname) + 1; + hid->hid_symname = kobj_alloc(symlen, KM_WAIT); + bcopy(symname, hid->hid_symname, symlen); + + /* + * We backtrack one byte here to consume the call + * instruction itself. + */ + hid->hid_instr_offset = (uintptr_t)instr - 1; + hid->hid_next = mp->hi_calls; + mp->hi_calls = hid; + + memset((void *)hid->hid_instr_offset, NOP_INSTR, SMAP_NOPS); + + return (0); + } + + return (1); +} + int /* ARGSUSED2 */ do_relocate(struct module *mp, char *reltbl, Word relshtype, int nreloc, @@ -223,6 +277,11 @@ do_relocate(struct module *mp, char *reltbl, Word relshtype, int nreloc, continue; if (symref->st_shndx == SHN_UNDEF && + smap_reloc_resolve(mp, mp->strings + + symref->st_name, (uint8_t *)off) == 0) + continue; + + if (symref->st_shndx == SHN_UNDEF && tnf_reloc_resolve(mp->strings + symref->st_name, &symref->st_value, &addend, off, &probelist, &taglist) != 0) { diff --git a/usr/src/uts/intel/amdf17nbdf/Makefile b/usr/src/uts/intel/amdf17nbdf/Makefile new file mode 100644 index 0000000000..a5543f176f --- /dev/null +++ b/usr/src/uts/intel/amdf17nbdf/Makefile @@ -0,0 +1,47 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE = amdf17nbdf +OBJECTS = $(AMDF17NBDF_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/intel/io/amdf17nb + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) $(CONFMOD) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/bpf/Makefile b/usr/src/uts/intel/bpf/Makefile index e96a03d04b..08e92c6fc7 100644 --- a/usr/src/uts/intel/bpf/Makefile +++ b/usr/src/uts/intel/bpf/Makefile @@ -62,7 +62,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # # CFLAGS += $(CCVERBOSE) -LDFLAGS += -dy -Nmisc/mac -Nmisc/dls -Ndrv/ipnet -Nmisc/neti +LDFLAGS += -dy -Nmisc/mac -Nmisc/dls -Ndrv/ipnet -Nmisc/neti -Ndrv/ip INC_PATH += -I$(UTSBASE)/common/io/bpf # diff --git a/usr/src/uts/intel/brand/lx/lx_archdep.c b/usr/src/uts/intel/brand/lx/lx_archdep.c new file mode 100644 index 0000000000..24f3d2c446 --- /dev/null +++ b/usr/src/uts/intel/brand/lx/lx_archdep.c @@ -0,0 +1,1720 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +/* + * LX brand Intel-specific routines. + */ + +#include <sys/types.h> +#include <sys/sunddi.h> +#include <sys/ddi.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> +#include <sys/privregs.h> +#include <sys/pcb.h> +#include <sys/archsystm.h> +#include <sys/stack.h> +#include <sys/sdt.h> +#include <sys/sysmacros.h> +#include <sys/psw.h> +#include <lx_errno.h> + +/* + * Argument constants for fix_segreg. + * See usr/src/uts/intel/ia32/os/archdep.c for the originals. + */ +#define IS_CS 1 +#define IS_NOT_CS 0 + +extern greg_t fix_segreg(greg_t, int, model_t); + + +#define LX_REG(ucp, r) ((ucp)->uc_mcontext.gregs[(r)]) + +#define PSLMERGE(oldval, newval) \ + (((oldval) & ~PSL_USERMASK) | ((newval) & PSL_USERMASK)) + +#ifdef __amd64 +/* 64-bit native user_regs_struct */ +typedef struct lx_user_regs64 { + int64_t lxur_r15; + int64_t lxur_r14; + int64_t lxur_r13; + int64_t lxur_r12; + int64_t lxur_rbp; + int64_t lxur_rbx; + int64_t lxur_r11; + int64_t lxur_r10; + int64_t lxur_r9; + int64_t lxur_r8; + int64_t lxur_rax; + int64_t lxur_rcx; + int64_t lxur_rdx; + int64_t lxur_rsi; + int64_t lxur_rdi; + int64_t lxur_orig_rax; + int64_t lxur_rip; + int64_t lxur_xcs; + int64_t lxur_rflags; + int64_t lxur_rsp; + int64_t lxur_xss; + int64_t lxur_xfs_base; + int64_t lxur_xgs_base; + int64_t lxur_xds; + int64_t lxur_xes; + int64_t lxur_xfs; + int64_t lxur_xgs; +} lx_user_regs64_t; + +/* 64-bit native user_fpregs_struct */ +typedef struct lx_user_fpregs64 { + uint16_t lxufp_cwd; + uint16_t lxufp_swd; + uint16_t lxufp_ftw; + uint16_t lxufp_fop; + uint64_t lxufp_rip; + uint64_t lxufp_rdp; + uint32_t lxufp_mxcsr; + uint32_t lxufp_mxcr_mask; + /* 8*16 bytes for each FP-reg = 128 bytes */ + uint32_t lxufp_st_space[32]; + /* 16*16 bytes for each XMM-reg = 256 bytes */ + uint32_t lxufp_xmm_space[64]; + uint32_t lxufp_padding[24]; +} lx_user_fpregs64_t; + +/* 64-bit native user_struct */ +typedef struct lx_user64 { + lx_user_regs64_t lxu_regs; + int32_t lxu_fpvalid; + int32_t lxu_pad0; + lx_user_fpregs64_t lxu_i387; + uint64_t lxu_tsize; + uint64_t lxu_dsize; + uint64_t lxu_ssize; + uint64_t lxu_start_code; + uint64_t lxu_start_stack; + int64_t lxu_signal; + int32_t lxu_reserved; + int32_t lxu_pad1; + /* help gdb to locate user_regs structure */ + caddr_t lxu_ar0; + /* help gdb to locate user_fpregs structure */ + caddr_t lxu_fpstate; + uint64_t lxu_magic; + char lxu_comm[32]; + uint64_t lxu_debugreg[8]; + uint64_t lxu_error_code; + uint64_t lxu_fault_address; +} lx_user64_t; + +#endif /* __amd64 */ + +/* 32-bit native user_regs_struct */ +typedef struct lx_user_regs32 { + int32_t lxur_ebx; + int32_t lxur_ecx; + int32_t lxur_edx; + int32_t lxur_esi; + int32_t lxur_edi; + int32_t lxur_ebp; + int32_t lxur_eax; + int32_t lxur_xds; + int32_t lxur_xes; + int32_t lxur_xfs; + int32_t lxur_xgs; + int32_t lxur_orig_eax; + int32_t lxur_eip; + int32_t lxur_xcs; + int32_t lxur_eflags; + int32_t lxur_esp; + int32_t lxur_xss; +} lx_user_regs32_t; + +/* 32-bit native user_fpregs_struct */ +typedef struct lx_user_fpregs32 { + int32_t lxufp_cwd; + int32_t lxufp_swd; + int32_t lxufp_twd; + int32_t lxufp_fip; + int32_t lxufp_fcs; + int32_t lxufp_foo; + int32_t lxufp_fos; + int32_t lxufp_st_space[20]; +} lx_user_fpregs32_t; + +/* 32-bit native user_fpxregs_struct */ +typedef struct lx_user_fpxregs32 { + uint16_t lxufpx_cwd; + uint16_t lxufpx_swd; + uint16_t lxufpx_twd; + uint16_t lxufpx_fop; + int32_t lxufpx_fip; + int32_t lxufpx_fcs; + int32_t lxufpx_foo; + int32_t lxufpx_fos; + int32_t lxufpx_mxcsr; + int32_t lxufpx_reserved; + /* 8*16 bytes for each FP-reg = 128 bytes */ + int32_t lxufpx_st_space[32]; + /* 8*16 bytes for each XMM-reg = 128 bytes */ + int32_t lxufpx_xmm_space[32]; + int32_t lxufpx_padding[56]; +} lx_user_fpxregs32_t; + +/* 32-bit native user_struct */ +typedef struct lx_user32 { + lx_user_regs32_t lxu_regs; + int32_t lxu_fpvalid; + lx_user_fpregs32_t lxu_i387; + uint32_t lxu_tsize; + uint32_t lxu_dsize; + uint32_t lxu_ssize; + uint32_t lxu_start_code; + uint32_t lxu_start_stack; + int32_t lxu_signal; + int32_t lxu_reserved; + caddr32_t lxu_ar0; + caddr32_t lxu_fpstate; + uint32_t lxu_magic; + char lxu_comm[32]; + int32_t lxu_debugreg[8]; +} lx_user32_t; + +/* + * Certain version of strace (on centos6 for example) use the %cs value to + * determine what kind of process is being traced. Here is a sample comment: + * Check CS register value. On x86-64 linux it is: + * 0x33 for long mode (64 bit and x32)) + * 0x23 for compatibility mode (32 bit) + * %ds = 0x2b for x32 mode (x86-64 in 32 bit) + * We can't change the %cs value in the ucp (see setgregs and _sys_rtt) so we + * emulate the expected value for ptrace use. + */ +#define LX_CS_64BIT 0x33 +#define LX_CS_32BIT 0x23 + +extern int getsetcontext(int, void *); +#if defined(_SYSCALL32_IMPL) +extern int getsetcontext32(int, void *); +#endif + +static int +lx_rw_uc(proc_t *p, void *ucp, void *kucp, size_t ucsz, boolean_t writing) +{ + int error = 0; + size_t rem = ucsz; + off_t pos = 0; + + VERIFY(MUTEX_HELD(&p->p_lock)); + + /* + * Grab P_PR_LOCK so that we can drop p_lock while doing I/O. + */ + sprlock_proc(p); + + /* + * Drop p_lock while we do I/O to avoid deadlock with the clock thread. + */ + mutex_exit(&p->p_lock); + while (rem != 0) { + uintptr_t addr = (uintptr_t)ucp + pos; + size_t len = MIN(rem, PAGESIZE - (addr & PAGEOFFSET)); + + if (writing) { + error = uwrite(p, (caddr_t)kucp + pos, len, addr); + } else { + error = uread(p, (caddr_t)kucp + pos, len, addr); + } + + if (error != 0) { + break; + } + + rem -= len; + pos += len; + } + mutex_enter(&p->p_lock); + + sprunlock(p); + mutex_enter(&p->p_lock); + + return (error); +} + +/* + * Read a ucontext_t from the target process, which may or may not be + * the current process. + */ +static int +lx_read_uc(proc_t *p, void *ucp, void *kucp, size_t ucsz) +{ + return (lx_rw_uc(p, ucp, kucp, ucsz, B_FALSE)); +} + +/* + * Write a ucontext_t to the target process, which may or may not be + * the current process. + */ +static int +lx_write_uc(proc_t *p, void *ucp, void *kucp, size_t ucsz) +{ + return (lx_rw_uc(p, ucp, kucp, ucsz, B_TRUE)); +} + +static void +lx_getfpregs32(lx_lwp_data_t *lwpd, lx_user_fpregs32_t *lfp) +{ +#ifdef __amd64 + fpregset32_t fp; + getfpregs32(lwpd->br_lwp, &fp); +#else /* __i386 */ + fpregset_t fp; + getfpregs(lwpd->br_lwp, &fp); +#endif /* __amd64 */ + + /* + * The fpchip_state.state field should correspond to all 27 fields in + * the 32-bit structure. + */ + bcopy(&fp.fp_reg_set.fpchip_state.state, lfp, sizeof (*lfp)); +} + +static void +lx_setfpregs32(lx_lwp_data_t *lwpd, lx_user_fpregs32_t *lfp) +{ +#ifdef __amd64 + fpregset32_t fp; +#else /* __i386 */ + fpregset_t fp; +#endif /* __amd64 */ + + /* + * The fpchip_state field should correspond to all 27 fields in the + * native 32-bit structure. + */ + bcopy(lfp, &fp.fp_reg_set.fpchip_state.state, sizeof (*lfp)); + +#ifdef __amd64 + setfpregs32(lwpd->br_lwp, &fp); +#else /* __i386 */ + setfpregs(lwpd->br_lwp, &fp); +#endif /* __amd64 */ +} + +static int +lx_get_user_regs32_uc(klwp_t *lwp, void *ucp, lx_user_regs32_t *lxrp) +{ + proc_t *p = lwptoproc(lwp); + ucontext32_t uc; + + if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + lxrp->lxur_ebx = LX_REG(&uc, EBX); + lxrp->lxur_ecx = LX_REG(&uc, ECX); + lxrp->lxur_edx = LX_REG(&uc, EDX); + lxrp->lxur_esi = LX_REG(&uc, ESI); + lxrp->lxur_edi = LX_REG(&uc, EDI); + lxrp->lxur_ebp = LX_REG(&uc, EBP); + lxrp->lxur_eax = LX_REG(&uc, EAX); + lxrp->lxur_orig_eax = 0; + + lxrp->lxur_eip = LX_REG(&uc, EIP); + lxrp->lxur_eflags = LX_REG(&uc, EFL); + lxrp->lxur_esp = LX_REG(&uc, UESP); + lxrp->lxur_xss = LX_REG(&uc, SS); + + /* emulated %cs, see defines */ + lxrp->lxur_xcs = LX_CS_32BIT; + lxrp->lxur_xds = LX_REG(&uc, DS); + lxrp->lxur_xes = LX_REG(&uc, ES); + lxrp->lxur_xfs = LX_REG(&uc, FS); + lxrp->lxur_xgs = LX_REG(&uc, GS); + return (0); +} + +static int +lx_get_user_regs32(lx_lwp_data_t *lwpd, lx_user_regs32_t *lxrp) +{ + klwp_t *lwp = lwpd->br_lwp; + struct regs *rp = lwptoregs(lwp); + void *ucp; +#ifdef __amd64 + struct pcb *pcb = &lwp->lwp_pcb; +#endif + + VERIFY(lwp_getdatamodel(lwp) == DATAMODEL_ILP32); + + switch (lx_regs_location(lwpd, &ucp, B_FALSE)) { + case LX_REG_LOC_UNAVAIL: + return (-1); + + case LX_REG_LOC_UCP: + return (lx_get_user_regs32_uc(lwp, ucp, lxrp)); + + case LX_REG_LOC_LWP: + /* transformation below */ + break; + + default: + VERIFY(0); + break; + } + +#ifdef __amd64 + lxrp->lxur_ebx = (int32_t)rp->r_rbx; + lxrp->lxur_ecx = (int32_t)rp->r_rcx; + lxrp->lxur_edx = (int32_t)rp->r_rdx; + lxrp->lxur_esi = (int32_t)rp->r_rsi; + lxrp->lxur_edi = (int32_t)rp->r_rdi; + lxrp->lxur_ebp = (int32_t)rp->r_rbp; + lxrp->lxur_eax = (int32_t)rp->r_rax; + lxrp->lxur_orig_eax = 0; + lxrp->lxur_eip = (int32_t)rp->r_rip; + lxrp->lxur_eflags = (int32_t)rp->r_rfl; + lxrp->lxur_esp = (int32_t)rp->r_rsp; + lxrp->lxur_xss = (int32_t)rp->r_ss; + + kpreempt_disable(); + if (PCB_NEED_UPDATE_SEGS(pcb)) { + lxrp->lxur_xds = pcb->pcb_ds; + lxrp->lxur_xes = pcb->pcb_es; + lxrp->lxur_xfs = pcb->pcb_fs; + lxrp->lxur_xgs = pcb->pcb_gs; + } else { + lxrp->lxur_xds = rp->r_ds; + lxrp->lxur_xes = rp->r_es; + lxrp->lxur_xfs = rp->r_fs; + lxrp->lxur_xgs = rp->r_gs; + } + kpreempt_enable(); +#else /* __i386 */ + lxrp->lxur_ebx = rp->r_ebx; + lxrp->lxur_ecx = rp->r_ecx; + lxrp->lxur_edx = rp->r_edx; + lxrp->lxur_esi = rp->r_esi; + lxrp->lxur_edi = rp->r_edi; + lxrp->lxur_ebp = rp->r_ebp; + lxrp->lxur_eax = rp->r_eax; + lxrp->lxur_orig_eax = 0; + lxrp->lxur_eip = rp->r_eip; + lxrp->lxur_eflags = rp->r_efl; + lxrp->lxur_esp = rp->r_esp; + lxrp->lxur_xss = rp->r_ss; + + lxrp->lxur_xds = rp->r_ds; + lxrp->lxur_xes = rp->r_es; + lxrp->lxur_xfs = rp->r_fs; + lxrp->lxur_xgs = rp->r_gs; +#endif /* __amd64 */ + + /* emulated %cs, see defines */ + lxrp->lxur_xcs = LX_CS_32BIT; + + if (lwpd->br_ptrace_whatstop == LX_PR_SYSENTRY) { + lxrp->lxur_eax = (int32_t)-lx_errno(ENOTSUP, EINVAL); + lxrp->lxur_orig_eax = (int32_t)lwpd->br_syscall_num; + } else if (lwpd->br_ptrace_whatstop == LX_PR_SYSEXIT) { + lxrp->lxur_orig_eax = (int32_t)lwpd->br_syscall_num; + } + + return (0); +} + +static int +lx_set_user_regs32_uc(klwp_t *lwp, void *ucp, lx_user_regs32_t *lxrp) +{ + proc_t *p = lwptoproc(lwp); + ucontext32_t uc; + + if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + /* + * Note: we currently ignore "lxur_orig_rax" here since this + * path should not be used for system call stops. + */ + LX_REG(&uc, EBP) = lxrp->lxur_ebp; + LX_REG(&uc, EBX) = lxrp->lxur_ebx; + LX_REG(&uc, EAX) = lxrp->lxur_eax; + LX_REG(&uc, ECX) = lxrp->lxur_ecx; + LX_REG(&uc, EDX) = lxrp->lxur_edx; + LX_REG(&uc, ESI) = lxrp->lxur_esi; + LX_REG(&uc, EDI) = lxrp->lxur_edi; + LX_REG(&uc, EIP) = lxrp->lxur_eip; + LX_REG(&uc, EFL) = PSLMERGE(LX_REG(&uc, EFL), lxrp->lxur_eflags); + LX_REG(&uc, UESP) = lxrp->lxur_esp; + LX_REG(&uc, SS) = fix_segreg(lxrp->lxur_xss, IS_NOT_CS, + DATAMODEL_ILP32); + + /* %cs is ignored because of our lies */ + LX_REG(&uc, DS) = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, + DATAMODEL_ILP32); + LX_REG(&uc, ES) = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, + DATAMODEL_ILP32); + LX_REG(&uc, FS) = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, + DATAMODEL_ILP32); + LX_REG(&uc, GS) = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, + DATAMODEL_ILP32); + + if (lx_write_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + return (0); +} + +static int +lx_set_user_regs32(lx_lwp_data_t *lwpd, lx_user_regs32_t *lxrp) +{ + klwp_t *lwp = lwpd->br_lwp; + struct regs *rp = lwptoregs(lwp); + void *ucp; +#ifdef __amd64 + struct pcb *pcb = &lwp->lwp_pcb; +#endif + + VERIFY(lwp_getdatamodel(lwp) == DATAMODEL_ILP32); + + switch (lx_regs_location(lwpd, &ucp, B_TRUE)) { + case LX_REG_LOC_UNAVAIL: + return (-1); + + case LX_REG_LOC_UCP: + return (lx_set_user_regs32_uc(lwp, ucp, lxrp)); + + case LX_REG_LOC_LWP: + /* transformation below */ + break; + + default: + VERIFY(0); + break; + } + +#ifdef __amd64 + rp->r_rbx = (int32_t)lxrp->lxur_ebx; + rp->r_rcx = (int32_t)lxrp->lxur_ecx; + rp->r_rdx = (int32_t)lxrp->lxur_edx; + rp->r_rsi = (int32_t)lxrp->lxur_esi; + rp->r_rdi = (int32_t)lxrp->lxur_edi; + rp->r_rbp = (int32_t)lxrp->lxur_ebp; + rp->r_rax = (int32_t)lxrp->lxur_eax; + lwpd->br_syscall_num = (int)lxrp->lxur_orig_eax; + rp->r_rip = (int32_t)lxrp->lxur_eip; + rp->r_rfl = (int32_t)PSLMERGE(rp->r_rfl, lxrp->lxur_eflags); + rp->r_rsp = (int32_t)lxrp->lxur_esp; + rp->r_ss = (int32_t)fix_segreg(lxrp->lxur_xss, IS_NOT_CS, + DATAMODEL_ILP32); + + kpreempt_disable(); + PCB_SET_UPDATE_SEGS(pcb); + pcb->pcb_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_ILP32); + pcb->pcb_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_ILP32); + pcb->pcb_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_ILP32); + pcb->pcb_gs = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, DATAMODEL_ILP32); + kpreempt_enable(); +#else /* __i386 */ + rp->r_ebx = lxrp->lxur_ebx; + rp->r_ecx = lxrp->lxur_ecx; + rp->r_edx = lxrp->lxur_edx; + rp->r_esi = lxrp->lxur_esi; + rp->r_edi = lxrp->lxur_edi; + rp->r_ebp = lxrp->lxur_ebp; + rp->r_eax = lxrp->lxur_eax; + lwpd->br_syscall_num = (int)lxrp->lxur_orig_eax; + rp->r_eip = lxrp->lxur_eip; + rp->r_efl = PSLMERGE(rp->r_efl, lxrp->lxur_eflags); + rp->r_esp = lxrp->lxur_esp; + rp->r_ss = fix_segreg(lxrp->lxur_xss, IS_NOT_CS, DATAMODEL_ILP32); + + rp->r_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_ILP32); + rp->r_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_ILP32); + rp->r_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_ILP32); + rp->r_gs = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, DATAMODEL_ILP32); +#endif /* __amd64 */ + + return (0); +} + +#ifdef __amd64 + +static void +lx_getfpregs64(lx_lwp_data_t *lwpd, lx_user_fpregs64_t *lfp) +{ + fpregset_t fp; + + getfpregs(lwpd->br_lwp, &fp); + /* Drop the extra illumos status/xstatus fields when copying state */ + bcopy(&fp.fp_reg_set.fpchip_state, lfp, sizeof (*lfp)); +} + +static void +lx_setfpregs64(lx_lwp_data_t *lwpd, lx_user_fpregs64_t *lfp) +{ + fpregset_t fp; + + /* + * Since the Linux fpregs structure does not contain the same + * additional status register which illumos contains, we simply + * preserve the existing values when setting fp state. + */ + getfpregs(lwpd->br_lwp, &fp); + + /* Copy the identically formatted state */ + bcopy(lfp, &fp.fp_reg_set.fpchip_state, sizeof (*lfp)); + + setfpregs(lwpd->br_lwp, &fp); +} + +static int +lx_get_user_regs64_uc(klwp_t *lwp, void *ucp, lx_user_regs64_t *lxrp) +{ + proc_t *p = lwptoproc(lwp); + + switch (lwp_getdatamodel(lwp)) { + case DATAMODEL_LP64: { + ucontext_t uc; + + if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + lxrp->lxur_r15 = LX_REG(&uc, REG_R15); + lxrp->lxur_r14 = LX_REG(&uc, REG_R14); + lxrp->lxur_r13 = LX_REG(&uc, REG_R13); + lxrp->lxur_r12 = LX_REG(&uc, REG_R12); + lxrp->lxur_rbp = LX_REG(&uc, REG_RBP); + lxrp->lxur_rbx = LX_REG(&uc, REG_RBX); + lxrp->lxur_r11 = LX_REG(&uc, REG_R11); + lxrp->lxur_r10 = LX_REG(&uc, REG_R10); + lxrp->lxur_r9 = LX_REG(&uc, REG_R9); + lxrp->lxur_r8 = LX_REG(&uc, REG_R8); + lxrp->lxur_rax = LX_REG(&uc, REG_RAX); + lxrp->lxur_rcx = LX_REG(&uc, REG_RCX); + lxrp->lxur_rdx = LX_REG(&uc, REG_RDX); + lxrp->lxur_rsi = LX_REG(&uc, REG_RSI); + lxrp->lxur_rdi = LX_REG(&uc, REG_RDI); + lxrp->lxur_orig_rax = 0; + lxrp->lxur_rip = LX_REG(&uc, REG_RIP); + lxrp->lxur_rflags = LX_REG(&uc, REG_RFL); + lxrp->lxur_rsp = LX_REG(&uc, REG_RSP); + lxrp->lxur_xss = LX_REG(&uc, REG_SS); + lxrp->lxur_xfs_base = LX_REG(&uc, REG_FSBASE); + lxrp->lxur_xgs_base = LX_REG(&uc, REG_GSBASE); + + lxrp->lxur_xds = LX_REG(&uc, REG_DS); + lxrp->lxur_xes = LX_REG(&uc, REG_ES); + lxrp->lxur_xfs = LX_REG(&uc, REG_FS); + lxrp->lxur_xgs = LX_REG(&uc, REG_GS); + + /* emulated %cs, see defines */ + lxrp->lxur_xcs = LX_CS_64BIT; + return (0); + } + + case DATAMODEL_ILP32: { + ucontext32_t uc; + + if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + lxrp->lxur_r15 = 0; + lxrp->lxur_r14 = 0; + lxrp->lxur_r13 = 0; + lxrp->lxur_r12 = 0; + lxrp->lxur_r11 = 0; + lxrp->lxur_r10 = 0; + lxrp->lxur_r9 = 0; + lxrp->lxur_r8 = 0; + lxrp->lxur_rbp = LX_REG(&uc, EBP); + lxrp->lxur_rbx = LX_REG(&uc, EBX); + lxrp->lxur_rax = LX_REG(&uc, EAX); + lxrp->lxur_orig_rax = 0; + lxrp->lxur_rcx = LX_REG(&uc, ECX); + lxrp->lxur_rdx = LX_REG(&uc, EDX); + lxrp->lxur_rsi = LX_REG(&uc, ESI); + lxrp->lxur_rdi = LX_REG(&uc, EDI); + lxrp->lxur_rip = LX_REG(&uc, EIP); + + lxrp->lxur_rflags = LX_REG(&uc, EFL); + lxrp->lxur_rsp = LX_REG(&uc, UESP); + lxrp->lxur_xss = LX_REG(&uc, SS); + lxrp->lxur_xfs_base = 0; + lxrp->lxur_xgs_base = 0; + + lxrp->lxur_xds = LX_REG(&uc, DS); + lxrp->lxur_xes = LX_REG(&uc, ES); + lxrp->lxur_xfs = LX_REG(&uc, FS); + lxrp->lxur_xgs = LX_REG(&uc, GS); + + /* See comment above re: %cs register */ + lxrp->lxur_xcs = LX_CS_32BIT; + return (0); + } + + default: + break; + } + + return (-1); +} + +static int +lx_get_user_regs64(lx_lwp_data_t *lwpd, lx_user_regs64_t *lxrp) +{ + klwp_t *lwp = lwpd->br_lwp; + struct regs *rp = lwptoregs(lwp); + struct pcb *pcb = &lwp->lwp_pcb; + void *ucp; + + switch (lx_regs_location(lwpd, &ucp, B_FALSE)) { + case LX_REG_LOC_UNAVAIL: + return (-1); + + case LX_REG_LOC_UCP: + return (lx_get_user_regs64_uc(lwp, ucp, lxrp)); + + case LX_REG_LOC_LWP: + /* transformation below */ + break; + + default: + VERIFY(0); + break; + } + + lxrp->lxur_r15 = rp->r_r15; + lxrp->lxur_r14 = rp->r_r14; + lxrp->lxur_r13 = rp->r_r13; + lxrp->lxur_r12 = rp->r_r12; + lxrp->lxur_rbp = rp->r_rbp; + lxrp->lxur_rbx = rp->r_rbx; + lxrp->lxur_r11 = rp->r_r11; + lxrp->lxur_r10 = rp->r_r10; + lxrp->lxur_r9 = rp->r_r9; + lxrp->lxur_r8 = rp->r_r8; + lxrp->lxur_rax = rp->r_rax; + lxrp->lxur_rcx = rp->r_rcx; + lxrp->lxur_rdx = rp->r_rdx; + lxrp->lxur_rsi = rp->r_rsi; + lxrp->lxur_rdi = rp->r_rdi; + lxrp->lxur_orig_rax = 0; + lxrp->lxur_rip = rp->r_rip; + + lxrp->lxur_rflags = rp->r_rfl; + lxrp->lxur_rsp = rp->r_rsp; + lxrp->lxur_xss = rp->r_ss; + lxrp->lxur_xfs_base = pcb->pcb_fsbase; + lxrp->lxur_xgs_base = pcb->pcb_gsbase; + + /* emulated %cs, see defines */ + switch (lwp_getdatamodel(lwp)) { + case DATAMODEL_LP64: + lxrp->lxur_xcs = LX_CS_64BIT; + break; + case DATAMODEL_ILP32: + lxrp->lxur_xcs = LX_CS_32BIT; + break; + default: + VERIFY(0); + break; + } + + kpreempt_disable(); + if (PCB_NEED_UPDATE_SEGS(pcb)) { + lxrp->lxur_xds = pcb->pcb_ds; + lxrp->lxur_xes = pcb->pcb_es; + lxrp->lxur_xfs = pcb->pcb_fs; + lxrp->lxur_xgs = pcb->pcb_gs; + } else { + lxrp->lxur_xds = rp->r_ds; + lxrp->lxur_xes = rp->r_es; + lxrp->lxur_xfs = rp->r_fs; + lxrp->lxur_xgs = rp->r_gs; + } + kpreempt_enable(); + + if (lwpd->br_ptrace_whatstop == LX_PR_SYSENTRY) { + lxrp->lxur_rax = -lx_errno(ENOTSUP, EINVAL); + lxrp->lxur_orig_rax = lwpd->br_syscall_num; + } else if (lwpd->br_ptrace_whatstop == LX_PR_SYSEXIT) { + lxrp->lxur_orig_rax = lwpd->br_syscall_num; + } + + return (0); +} + +static int +lx_set_user_regs64_uc(klwp_t *lwp, void *ucp, lx_user_regs64_t *lxrp) +{ + proc_t *p = lwptoproc(lwp); + + switch (lwp_getdatamodel(lwp)) { + case DATAMODEL_LP64: { + ucontext_t uc; + + if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + /* + * Note: we currently ignore "lxur_orig_rax" here since this + * path should not be used for system call stops. + */ + LX_REG(&uc, REG_R15) = lxrp->lxur_r15; + LX_REG(&uc, REG_R14) = lxrp->lxur_r14; + LX_REG(&uc, REG_R13) = lxrp->lxur_r13; + LX_REG(&uc, REG_R12) = lxrp->lxur_r12; + LX_REG(&uc, REG_RBP) = lxrp->lxur_rbp; + LX_REG(&uc, REG_RBX) = lxrp->lxur_rbx; + LX_REG(&uc, REG_R11) = lxrp->lxur_r11; + LX_REG(&uc, REG_R10) = lxrp->lxur_r10; + LX_REG(&uc, REG_R9) = lxrp->lxur_r9; + LX_REG(&uc, REG_R8) = lxrp->lxur_r8; + LX_REG(&uc, REG_RAX) = lxrp->lxur_rax; + LX_REG(&uc, REG_RCX) = lxrp->lxur_rcx; + LX_REG(&uc, REG_RDX) = lxrp->lxur_rdx; + LX_REG(&uc, REG_RSI) = lxrp->lxur_rsi; + LX_REG(&uc, REG_RDI) = lxrp->lxur_rdi; + LX_REG(&uc, REG_RIP) = lxrp->lxur_rip; + LX_REG(&uc, REG_RFL) = PSLMERGE(LX_REG(&uc, REG_RFL), + lxrp->lxur_rflags); + LX_REG(&uc, REG_RSP) = lxrp->lxur_rsp; + LX_REG(&uc, REG_SS) = fix_segreg(lxrp->lxur_xss, IS_NOT_CS, + DATAMODEL_LP64); + LX_REG(&uc, REG_FSBASE) = lxrp->lxur_xfs_base; + LX_REG(&uc, REG_GSBASE) = lxrp->lxur_xgs_base; + + /* %cs is ignored because of our lies */ + LX_REG(&uc, REG_DS) = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, + DATAMODEL_LP64); + LX_REG(&uc, REG_ES) = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, + DATAMODEL_LP64); + LX_REG(&uc, REG_FS) = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, + DATAMODEL_LP64); + LX_REG(&uc, REG_GS) = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, + DATAMODEL_LP64); + + if (lx_write_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + return (0); + } + + case DATAMODEL_ILP32: { + ucontext32_t uc; + + if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + + /* + * Note: we currently ignore "lxur_orig_rax" here since this + * path should not be used for system call stops. + */ + LX_REG(&uc, EBP) = (int32_t)lxrp->lxur_rbp; + LX_REG(&uc, EBX) = (int32_t)lxrp->lxur_rbx; + LX_REG(&uc, EAX) = (int32_t)lxrp->lxur_rax; + LX_REG(&uc, ECX) = (int32_t)lxrp->lxur_rcx; + LX_REG(&uc, EDX) = (int32_t)lxrp->lxur_rdx; + LX_REG(&uc, ESI) = (int32_t)lxrp->lxur_rsi; + LX_REG(&uc, EDI) = (int32_t)lxrp->lxur_rdi; + LX_REG(&uc, EIP) = (int32_t)lxrp->lxur_rip; + LX_REG(&uc, EFL) = (int32_t)PSLMERGE(LX_REG(&uc, EFL), + lxrp->lxur_rflags); + LX_REG(&uc, UESP) = (int32_t)lxrp->lxur_rsp; + LX_REG(&uc, SS) = (int32_t)fix_segreg(lxrp->lxur_xss, + IS_NOT_CS, DATAMODEL_ILP32); + + /* %cs is ignored because of our lies */ + LX_REG(&uc, DS) = (int32_t)fix_segreg(lxrp->lxur_xds, + IS_NOT_CS, DATAMODEL_ILP32); + LX_REG(&uc, ES) = (int32_t)fix_segreg(lxrp->lxur_xes, + IS_NOT_CS, DATAMODEL_ILP32); + LX_REG(&uc, FS) = (int32_t)fix_segreg(lxrp->lxur_xfs, + IS_NOT_CS, DATAMODEL_ILP32); + LX_REG(&uc, GS) = (int32_t)fix_segreg(lxrp->lxur_xgs, + IS_NOT_CS, DATAMODEL_ILP32); + + if (lx_write_uc(p, ucp, &uc, sizeof (uc)) != 0) { + return (-1); + } + return (0); + } + + default: + break; + } + + return (-1); +} + +static int +lx_set_user_regs64(lx_lwp_data_t *lwpd, lx_user_regs64_t *lxrp) +{ + klwp_t *lwp = lwpd->br_lwp; + struct regs *rp = lwptoregs(lwp); + struct pcb *pcb = &lwp->lwp_pcb; + void *ucp; + + switch (lx_regs_location(lwpd, &ucp, B_TRUE)) { + case LX_REG_LOC_UNAVAIL: + return (-1); + + case LX_REG_LOC_UCP: + return (lx_set_user_regs64_uc(lwp, ucp, lxrp)); + + case LX_REG_LOC_LWP: + /* transformation below */ + break; + + default: + VERIFY(0); + break; + } + + rp->r_r15 = lxrp->lxur_r15; + rp->r_r14 = lxrp->lxur_r14; + rp->r_r13 = lxrp->lxur_r13; + rp->r_r12 = lxrp->lxur_r12; + rp->r_rbp = lxrp->lxur_rbp; + rp->r_rbx = lxrp->lxur_rbx; + rp->r_r11 = lxrp->lxur_r11; + rp->r_r10 = lxrp->lxur_r10; + rp->r_r9 = lxrp->lxur_r9; + rp->r_r8 = lxrp->lxur_r8; + rp->r_rax = lxrp->lxur_rax; + rp->r_rcx = lxrp->lxur_rcx; + rp->r_rdx = lxrp->lxur_rdx; + rp->r_rsi = lxrp->lxur_rsi; + rp->r_rdi = lxrp->lxur_rdi; + lwpd->br_syscall_num = (int)lxrp->lxur_orig_rax; + rp->r_rip = lxrp->lxur_rip; + rp->r_rfl = PSLMERGE(rp->r_rfl, lxrp->lxur_rflags); + rp->r_rsp = lxrp->lxur_rsp; + rp->r_ss = fix_segreg(lxrp->lxur_xss, IS_NOT_CS, DATAMODEL_LP64); + pcb->pcb_fsbase = lxrp->lxur_xfs_base; + pcb->pcb_gsbase = lxrp->lxur_xgs_base; + + kpreempt_disable(); + PCB_SET_UPDATE_SEGS(pcb); + pcb->pcb_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_LP64); + pcb->pcb_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_LP64); + pcb->pcb_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_LP64); + pcb->pcb_gs = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, DATAMODEL_LP64); + kpreempt_enable(); + + return (0); +} + +#endif /* __amd64 */ + +static int +lx_peekuser32(lx_lwp_data_t *lwpd, uintptr_t offset, uint32_t *res) +{ + lx_user32_t lxu; + boolean_t valid = B_FALSE; + + bzero(&lxu, sizeof (lxu)); + if (offset < sizeof (lx_user_regs32_t)) { + if (lx_get_user_regs32(lwpd, &lxu.lxu_regs) == 0) { + valid = B_TRUE; + } + } + if (valid) { + uint32_t *data = (uint32_t *)&lxu; + *res = data[offset / sizeof (uint32_t)]; + return (0); + } + return (-1); +} + +#ifdef __amd64 +static int +lx_peekuser64(lx_lwp_data_t *lwpd, uintptr_t offset, uintptr_t *res) +{ + lx_user64_t lxu; + boolean_t valid = B_FALSE; + + bzero(&lxu, sizeof (lxu)); + if (offset < sizeof (lx_user_regs64_t)) { + if (lx_get_user_regs64(lwpd, &lxu.lxu_regs) == 0) { + valid = B_TRUE; + } + } + if (valid) { + uintptr_t *data = (uintptr_t *)&lxu; + *res = data[offset / sizeof (uintptr_t)]; + return (0); + } + return (-1); +} +#endif /* __amd64 */ + +int +lx_user_regs_copyin(lx_lwp_data_t *lwpd, void *uregsp) +{ + model_t target_model = lwp_getdatamodel(lwpd->br_lwp); + + switch (get_udatamodel()) { + case DATAMODEL_ILP32: + if (target_model == DATAMODEL_ILP32) { + lx_user_regs32_t regs; + + if (copyin(uregsp, ®s, sizeof (regs)) != 0) { + return (EFAULT); + } + if (lx_set_user_regs32(lwpd, ®s) != 0) { + return (EIO); + } + return (0); + } + break; + +#ifdef __amd64 + case DATAMODEL_LP64: + if (target_model == DATAMODEL_ILP32 || + target_model == DATAMODEL_LP64) { + lx_user_regs64_t regs; + + if (copyin(uregsp, ®s, sizeof (regs)) != 0) { + return (EFAULT); + } + if (lx_set_user_regs64(lwpd, ®s) != 0) { + return (EIO); + } + return (0); + } + break; +#endif /* __amd64 */ + + default: + break; + } + return (EIO); +} + +int +lx_user_regs_copyout(lx_lwp_data_t *lwpd, void *uregsp) +{ + model_t target_model = lwp_getdatamodel(lwpd->br_lwp); + + switch (get_udatamodel()) { + case DATAMODEL_ILP32: + if (target_model == DATAMODEL_ILP32) { + lx_user_regs32_t regs; + + if (lx_get_user_regs32(lwpd, ®s) != 0) { + return (EIO); + } + if (copyout(®s, uregsp, sizeof (regs)) != 0) { + return (EFAULT); + } + return (0); + } + break; + +#ifdef __amd64 + case DATAMODEL_LP64: + if (target_model == DATAMODEL_ILP32 || + target_model == DATAMODEL_LP64) { + lx_user_regs64_t regs; + + if (lx_get_user_regs64(lwpd, ®s) != 0) { + return (EIO); + } + if (copyout(®s, uregsp, sizeof (regs)) != 0) { + return (EFAULT); + } + return (0); + } + break; +#endif /* __amd64 */ + + default: + break; + } + return (EIO); +} + +int +lx_user_fpregs_copyin(lx_lwp_data_t *lwpd, void *uregsp) +{ + model_t target_model = lwp_getdatamodel(lwpd->br_lwp); + + switch (get_udatamodel()) { + case DATAMODEL_ILP32: + if (target_model == DATAMODEL_ILP32) { + lx_user_fpregs32_t regs; + + if (copyin(uregsp, ®s, sizeof (regs)) != 0) { + return (EFAULT); + } + lx_setfpregs32(lwpd, ®s); + return (0); + } + break; + +#ifdef __amd64 + case DATAMODEL_LP64: + if (target_model == DATAMODEL_ILP32 || + target_model == DATAMODEL_LP64) { + lx_user_fpregs64_t regs; + + if (copyin(uregsp, ®s, sizeof (regs)) != 0) { + return (EFAULT); + } + lx_setfpregs64(lwpd, ®s); + return (0); + } + break; +#endif /* __amd64 */ + + default: + break; + } + return (EIO); +} + +int +lx_user_fpregs_copyout(lx_lwp_data_t *lwpd, void *uregsp) +{ + model_t target_model = lwp_getdatamodel(lwpd->br_lwp); + + switch (get_udatamodel()) { + case DATAMODEL_ILP32: + if (target_model == DATAMODEL_ILP32) { + lx_user_fpregs32_t regs; + + lx_getfpregs32(lwpd, ®s); + if (copyout(®s, uregsp, sizeof (regs)) != 0) { + return (EFAULT); + } + return (0); + } + break; + +#ifdef __amd64 + case DATAMODEL_LP64: + if (target_model == DATAMODEL_ILP32 || + target_model == DATAMODEL_LP64) { + lx_user_fpregs64_t regs; + + lx_getfpregs64(lwpd, ®s); + if (copyout(®s, uregsp, sizeof (regs)) != 0) { + return (EFAULT); + } + return (0); + } + break; +#endif /* __amd64 */ + + default: + break; + } + return (EIO); +} + +/* ARGSUSED */ +int +lx_user_fpxregs_copyin(lx_lwp_data_t *lwpd, void *uregsp) +{ + /* Punt on fpxregs for now */ + return (EIO); +} + +/* ARGSUSED */ +int +lx_user_fpxregs_copyout(lx_lwp_data_t *lwpd, void *uregsp) +{ + /* Punt on fpxregs for now */ + return (EIO); +} + +int +lx_ptrace_peekuser(lx_lwp_data_t *lwpd, uintptr_t offset, void *uptr) +{ + model_t target_model = lwp_getdatamodel(lwpd->br_lwp); + + switch (get_udatamodel()) { + case DATAMODEL_ILP32: + if ((offset & (sizeof (uint32_t) - 1)) != 0) { + /* Must be aligned to 32bit boundary */ + break; + } + if (target_model == DATAMODEL_ILP32) { + uint32_t res; + + if (lx_peekuser32(lwpd, offset, &res) != 0) { + return (EIO); + } + if (copyout(&res, uptr, sizeof (res)) != 0) { + return (EFAULT); + } + return (0); + } + break; + +#ifdef __amd64 + case DATAMODEL_LP64: + if ((offset & (sizeof (uintptr_t) - 1)) != 0) { + /* Must be aligned to 64bit boundary */ + break; + } + if (target_model == DATAMODEL_ILP32 || + target_model == DATAMODEL_LP64) { + uintptr_t res; + + if (lx_peekuser64(lwpd, offset, &res) != 0) { + return (EIO); + } + if (copyout(&res, uptr, sizeof (res)) != 0) { + return (EFAULT); + } + return (0); + } + break; +#endif /* __amd64 */ + + default: + break; + } + return (EIO); +} + +/* ARGSUSED */ +int +lx_ptrace_pokeuser(lx_lwp_data_t *lwpd, uintptr_t offset, void *uptr) +{ + return (EIO); +} + + +/* + * Load registers and repoint the stack and program counter. This function is + * used by the B_JUMP_TO_LINUX brand system call to revector to a Linux + * entrypoint. + */ +int +lx_runexe(klwp_t *lwp, void *ucp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + /* + * We should only make it here when transitioning to Linux from + * the NATIVE or INIT mode. + */ + VERIFY(lwpd->br_stack_mode == LX_STACK_MODE_NATIVE || + lwpd->br_stack_mode == LX_STACK_MODE_INIT); + +#if defined(__amd64) + if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { + struct pcb *pcb = &lwp->lwp_pcb; + + /* + * Preserve the %fs/%gsbase value for this LWP, as set and used + * by native illumos code. + */ + lwpd->br_ntv_fsbase = pcb->pcb_fsbase; + lwpd->br_ntv_gsbase = pcb->pcb_gsbase; + + return (getsetcontext(SETCONTEXT, ucp)); + } else { + return (getsetcontext32(SETCONTEXT, ucp)); + } +#else + return (getsetcontext(SETCONTEXT, ucp)); +#endif +} + +/* + * The usermode emulation code is illumos library code. This routine ensures + * the segment registers are set up correctly for native illumos code. It + * should be called _after_ we have stored the outgoing Linux machine state + * but _before_ we return from the kernel to any illumos native code; e.g. the + * usermode emulation library, or any interposed signal handlers. + * + * See the comment on lwp_segregs_save() for how we handle the usermode + * registers when we come into the kernel and see update_sregs() for how we + * restore. + */ +void +lx_switch_to_native(klwp_t *lwp) +{ +#if defined(__amd64) + model_t datamodel = lwp_getdatamodel(lwp); + + switch (datamodel) { + case DATAMODEL_ILP32: { + struct pcb *pcb = &lwp->lwp_pcb; + + /* + * For 32-bit processes, we ensure that the correct %gs value + * is loaded: + */ + kpreempt_disable(); + if (PCB_NEED_UPDATE_SEGS(pcb)) { + /* + * If we are already flushing the segment registers, + * then ensure we are flushing the native %gs. + */ + pcb->pcb_gs = LWPGS_SEL; + } else { + struct regs *rp = lwptoregs(lwp); + + /* + * If we are not flushing the segment registers yet, + * only do so if %gs is not correct already: + */ + if (rp->r_gs != LWPGS_SEL) { + pcb->pcb_gs = LWPGS_SEL; + + /* + * Ensure we go out via update_sregs. + */ + PCB_SET_UPDATE_SEGS(pcb); + } + } + kpreempt_enable(); + break; + } + + case DATAMODEL_LP64: { + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + + /* + * For 64-bit processes we ensure that the correct %fsbase + * value is loaded: + */ + if (lwpd->br_ntv_fsbase != 0) { + struct pcb *pcb = &lwp->lwp_pcb; + + kpreempt_disable(); + if (pcb->pcb_fsbase != lwpd->br_ntv_fsbase) { + pcb->pcb_fsbase = lwpd->br_ntv_fsbase; + + /* + * Ensure we go out via update_sregs. + */ + PCB_SET_UPDATE_SEGS(pcb); + } + kpreempt_enable(); + } + /* + * ... and the correct %gsbase + */ + if (lwpd->br_ntv_gsbase != 0) { + struct pcb *pcb = &lwp->lwp_pcb; + + kpreempt_disable(); + if (pcb->pcb_gsbase != lwpd->br_ntv_gsbase) { + pcb->pcb_gsbase = lwpd->br_ntv_gsbase; + + /* + * Ensure we go out via update_sregs. + */ + PCB_SET_UPDATE_SEGS(pcb); + } + kpreempt_enable(); + } + break; + } + + default: + cmn_err(CE_PANIC, "unknown data model: %d", datamodel); + } +#elif defined(__i386) + struct regs *rp = lwptoregs(lwp); + + rp->r_gs = LWPGS_SEL; +#else +#error "unknown x86" +#endif +} + +#if defined(__amd64) +/* + * Call frame for the 64-bit usermode emulation handler: + * lx_emulate(ucontext_t *ucp, int syscall_num, uintptr_t *args) + * + * old sp: -------------------------------------------------------------- + * | - ucontext_t (register state for emulation) + * | - uintptr_t[6] (system call arguments array) + * V -------------------------------------------------------------- + * new sp: - bogus return address + * + * Arguments are passed in registers, per the AMD64 ABI: %rdi, %rsi and %rdx. + */ +void +lx_emulate_user(klwp_t *lwp, int syscall_num, uintptr_t *args) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + struct regs *rp = lwptoregs(lwp); + label_t lab; + uintptr_t uc_addr; + uintptr_t args_addr; + uintptr_t top; + /* + * Variables used after on_fault() returns for a fault + * must be volatile. + */ + volatile size_t frsz; + volatile uintptr_t sp; + volatile proc_t *p = lwptoproc(lwp); + volatile int watched; + + /* + * We should not be able to get here unless we are running Linux + * code for a system call we cannot emulate in the kernel. + */ + VERIFY(lwpd->br_stack_mode == LX_STACK_MODE_BRAND); + + /* + * The AMD64 ABI requires us to align the return address on the stack + * so that when the called function pushes %rbp, the stack is 16-byte + * aligned. + * + * This routine, like the amd64 version of sendsig(), depends on + * STACK_ALIGN being 16 and STACK_ENTRY_ALIGN being 8. + */ +#if STACK_ALIGN != 16 || STACK_ENTRY_ALIGN != 8 +#error "lx_emulate_user() amd64 did not find the expected stack alignments" +#endif + + /* + * We begin at the current native stack pointer, and reserve space for + * the ucontext_t we are copying onto the stack, as well as the call + * arguments for the usermode emulation handler. + * + * We 16-byte align the entire frame, and then unalign it again by + * adding space for the return address. + */ + frsz = SA(sizeof (ucontext_t)) + SA(6 * sizeof (uintptr_t)) + + sizeof (uintptr_t); + VERIFY((frsz & (STACK_ALIGN - 1UL)) == 8); + VERIFY((frsz & (STACK_ENTRY_ALIGN - 1UL)) == 0); + + if (lwpd->br_ntv_stack == lwpd->br_ntv_stack_current) { + /* + * Nobody else is using the stack right now, so start at the + * top. + */ + top = lwpd->br_ntv_stack_current; + } else { + /* + * Drop below the 128-byte reserved region of the stack frame + * we are interrupting. + */ + top = lwpd->br_ntv_stack_current - STACK_RESERVE; + } + top = top & ~(STACK_ALIGN - 1); + sp = top - frsz; + + uc_addr = top - SA(sizeof (ucontext_t)); + args_addr = uc_addr - SA(6 * sizeof (uintptr_t)); + + watched = watch_disable_addr((caddr_t)sp, frsz, S_WRITE); + + /* + * Save the register state we preserved on the way into this brand + * system call and drop it on the native stack. + */ + { + /* + * Note: the amd64 ucontext_t is 864 bytes. + */ + ucontext_t uc; + + /* + * We do not want to save the signal mask for an emulation + * context. Some emulated system calls alter the signal mask; + * restoring it when the emulation is complete would clobber + * those intentional side effects. + */ + savecontext(&uc, NULL); + + if (on_fault(&lab)) { + goto badstack; + } + + /* + * Mark this as a system call emulation context: + */ + uc.uc_brand_data[0] = (void *)((uintptr_t) + uc.uc_brand_data[0] | LX_UC_FRAME_IS_SYSCALL); + + copyout_noerr(&uc, (void *)(uintptr_t)uc_addr, sizeof (uc)); + } + + DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp, + uintptr_t, lwp->lwp_oldcontext, uintptr_t, uc_addr); + lwp->lwp_oldcontext = (uintptr_t)uc_addr; + + /* + * Copy the system call arguments out to userland: + */ + copyout_noerr(args, (void *)(uintptr_t)args_addr, + 6 * sizeof (uintptr_t)); + + /* + * Drop the bogus return address on the stack. + */ + suword64_noerr((void *)sp, 0); + + no_fault(); + if (watched) { + watch_enable_addr((caddr_t)sp, frsz, S_WRITE); + } + + /* + * Pass the arguments to lx_emulate() in the appropriate registers. + */ + rp->r_rdi = uc_addr; + rp->r_rsi = syscall_num; + rp->r_rdx = args_addr; + + /* + * In order to be able to restore %edx, we need to JUSTRETURN. + */ + lwp->lwp_eosys = JUSTRETURN; + curthread->t_post_sys = 1; + aston(curthread); + + /* + * Set stack pointer and return address to the usermode emulation + * handler: + */ + lwpd->br_stack_mode = LX_STACK_MODE_NATIVE; + lx_lwp_set_native_stack_current(lwpd, sp); + + /* + * Divert execution, on our return, to the usermode emulation stack + * and handler: + */ + rp->r_fp = 0; + rp->r_sp = sp; + rp->r_pc = ptolxproc(p)->l_handler; + + /* + * Fix up segment registers, etc. + */ + lx_switch_to_native(lwp); + + return; + +badstack: + no_fault(); + if (watched) { + watch_enable_addr((caddr_t)sp, frsz, S_WRITE); + } + +#ifdef DEBUG + printf("lx_emulate_user: bad native stack cmd=%s, pid=%d, sp=0x%lx\n", + PTOU(p)->u_comm, p->p_pid, sp); +#endif + + exit(CLD_KILLED, SIGSEGV); +} + +#if defined(_SYSCALL32_IMPL) +/* + * Call frame for the 32-bit usermode emulation handler: + * lx_emulate(ucontext_t *ucp, int syscall_num, uintptr_t *args) + * + * old sp: -------------------------------------------------------------- + * | - ucontext_t (register state for emulation) + * | - uintptr_t[6] (system call arguments array) + * | -------------------------------------------------------------- + * | - arg2: uintptr_t * (pointer to arguments array above) + * | - arg1: int (system call number) + * V - arg0: ucontext_t * (pointer to context above) + * new sp: - bogus return address + */ +struct lx_emu_frame32 { + caddr32_t retaddr; /* 0 */ + caddr32_t ucontextp; /* 4 */ + int32_t syscall_num; /* 8 */ + caddr32_t argsp; /* c */ +}; + +/* + * This function arranges for the lwp to execute the usermode emulation handler + * for this system call. The mechanism is similar to signal handling, and this + * function is modelled on sendsig32(). + */ +void +lx_emulate_user32(klwp_t *lwp, int syscall_num, uintptr_t *args) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + struct regs *rp = lwptoregs(lwp); + label_t lab; + caddr32_t uc_addr; + caddr32_t args_addr; + caddr32_t top; + /* + * Variables used after on_fault() returns for a fault + * must be volatile. + */ + volatile size_t frsz; + volatile caddr32_t sp; + volatile proc_t *p = lwptoproc(lwp); + volatile int watched; + + /* + * We should not be able to get here unless we are running Linux + * code for a system call we cannot emulate in the kernel. + */ + VERIFY(lwpd->br_stack_mode == LX_STACK_MODE_BRAND); + + /* + * We begin at the current native stack pointer, and reserve space for + * the ucontext_t we are copying onto the stack, as well as the call + * arguments for the usermode emulation handler. + */ + frsz = SA32(sizeof (ucontext32_t)) + SA32(6 * sizeof (uint32_t)) + + SA32(sizeof (struct lx_emu_frame32)); + VERIFY((frsz & (STACK_ALIGN32 - 1)) == 0); + + top = (caddr32_t)(lwpd->br_ntv_stack_current & ~(STACK_ALIGN32 - 1)); + sp = top - frsz; + + uc_addr = top - SA32(sizeof (ucontext32_t)); + args_addr = uc_addr - SA32(6 * sizeof (uint32_t)); + + watched = watch_disable_addr((caddr_t)(uintptr_t)sp, frsz, S_WRITE); + + /* + * Save the register state we preserved on the way into this brand + * system call and drop it on the native stack. + */ + { + /* + * Note: ucontext32_t is 512 bytes. + */ + ucontext32_t uc; + + /* + * We do not want to save the signal mask for an emulation + * context. Some emulated system calls alter the signal mask; + * restoring it when the emulation is complete would clobber + * those intentional side effects. + */ + savecontext32(&uc, NULL); + + if (on_fault(&lab)) { + goto badstack; + } + + /* + * Mark this as a system call emulation context: + */ + uc.uc_brand_data[0] |= LX_UC_FRAME_IS_SYSCALL; + copyout_noerr(&uc, (void *)(uintptr_t)uc_addr, sizeof (uc)); + } + + DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp, + uintptr_t, lwp->lwp_oldcontext, uintptr_t, uc_addr); + lwp->lwp_oldcontext = (uintptr_t)uc_addr; + + /* + * Copy the system call arguments out to userland: + */ + { + uint32_t args32[6]; + + args32[0] = args[0]; + args32[1] = args[1]; + args32[2] = args[2]; + args32[3] = args[3]; + args32[4] = args[4]; + args32[5] = args[5]; + + copyout_noerr(&args32, (void *)(uintptr_t)args_addr, + sizeof (args32)); + } + + /* + * Assemble the call frame on the stack. + */ + { + struct lx_emu_frame32 frm; + + frm.retaddr = 0; + frm.ucontextp = uc_addr; + frm.argsp = args_addr; + frm.syscall_num = syscall_num; + + copyout_noerr(&frm, (void *)(uintptr_t)sp, sizeof (frm)); + } + + no_fault(); + if (watched) { + watch_enable_addr((caddr_t)(uintptr_t)sp, frsz, S_WRITE); + } + + /* + * Set stack pointer and return address to the usermode emulation + * handler: + */ + lwpd->br_stack_mode = LX_STACK_MODE_NATIVE; + lx_lwp_set_native_stack_current(lwpd, sp); + + /* + * Divert execution, on our return, to the usermode emulation stack + * and handler: + */ + rp->r_fp = 0; + rp->r_sp = sp; + rp->r_pc = ptolxproc(p)->l_handler; + + /* + * Fix up segment registers, etc. + */ + lx_switch_to_native(lwp); + + return; + +badstack: + no_fault(); + if (watched) { + watch_enable_addr((caddr_t)(uintptr_t)sp, frsz, S_WRITE); + } + +#ifdef DEBUG + printf("lx_emulate_user32: bad native stack cmd=%s, pid=%d, sp=0x%x\n", + PTOU(p)->u_comm, p->p_pid, sp); +#endif + + exit(CLD_KILLED, SIGSEGV); +} +#endif /* _SYSCALL32_IMPL */ + +#else /* !__amd64 (__i386) */ + +/* ARGSUSED */ +void +lx_emulate_user(klwp_t *lwp, int syscall_num, uintptr_t *args) +{ + cmn_err(CE_WARN, "%s: no 32-bit kernel support", __FUNCTION__); + exit(CLD_KILLED, SIGSYS); +} + +#endif /* __amd64 */ diff --git a/usr/src/uts/intel/chxge/Makefile b/usr/src/uts/intel/chxge/Makefile index fd082067f7..1e2c3636d4 100644 --- a/usr/src/uts/intel/chxge/Makefile +++ b/usr/src/uts/intel/chxge/Makefile @@ -21,6 +21,7 @@ # # Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. +# Copyright 2018 Joyent, Inc. # # Copyright (c) 2018, Joyent, Inc. @@ -70,9 +71,9 @@ CFLAGS += -DSUN_KSTATS -DHOST_PAUSE -DTX_CKSUM_FIX -DTX_THREAD_RECLAIM # CFLAGS += -DCH_DEBUG=1 -DPE_DBGOUT_ENABLED=1 # -# Driver depends on GLD & IP +# Driver depends on GLD, IP, and MAC # -LDFLAGS += -dy -N misc/gld -N drv/ip +LDFLAGS += -dy -N misc/gld -N drv/ip -N misc/mac # Lint flag # diff --git a/usr/src/uts/intel/core_pcbe/Makefile b/usr/src/uts/intel/core_pcbe/Makefile index 2834b97621..6286d956a9 100644 --- a/usr/src/uts/intel/core_pcbe/Makefile +++ b/usr/src/uts/intel/core_pcbe/Makefile @@ -67,7 +67,7 @@ MODULE = pcbe.GenuineIntel.6.15 OBJECTS = $(CORE_PCBE_OBJS:%=$(OBJS_DIR)/%) OBJECTS += $(CPCGEN_OBJS:%=$(OBJS_DIR)/%) LINTS = $(CORE_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE) +ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE) # # This order matches the families declared in uts/intel/sys/x86_archext.h. @@ -111,7 +111,7 @@ SOFTLINKS = \ pcbe.GenuineIntel.6.95 \ pcbe.GenuineIntel.6.122 -ROOTSOFTLINKS = $(SOFTLINKS:%=$(USR_PCBE_DIR)/%) +ROOTSOFTLINKS = $(SOFTLINKS:%=$(ROOT_PSM_PCBE_DIR)/%) # # Include common rules. diff --git a/usr/src/uts/intel/coretemp/Makefile b/usr/src/uts/intel/coretemp/Makefile new file mode 100644 index 0000000000..9ce4a8ab56 --- /dev/null +++ b/usr/src/uts/intel/coretemp/Makefile @@ -0,0 +1,54 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE = coretemp +OBJECTS = $(CORETEMP_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/intel/io/coretemp + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) $(CONFMOD) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Because we need to use cross calls directly, we must include the +# definitions below. Once CMI rdmsr routines have been fixed, we can +# remove this and move out of the platform specific driver world. +# +CPPFLAGS += -I$(UTSBASE)/i86pc/ + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/datafilt/Makefile b/usr/src/uts/intel/datafilt/Makefile new file mode 100644 index 0000000000..bc72416406 --- /dev/null +++ b/usr/src/uts/intel/datafilt/Makefile @@ -0,0 +1,74 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2011, OmniTI Computer Consulting, Inc. All rights reserved. +# Copyright 2012, Nexenta Systems, Inc. All rights reserved. +# + + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = datafilt +OBJECTS = $(DATAFILT_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(DATAFILT_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# lint pass one enforcement and OS version +# +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/dev/Makefile b/usr/src/uts/intel/dev/Makefile index c34feb9c18..00c885fc3a 100644 --- a/usr/src/uts/intel/dev/Makefile +++ b/usr/src/uts/intel/dev/Makefile @@ -71,6 +71,7 @@ LINTTAGS += -erroff=E_STATIC_UNUSED CERRWARN += -_gcc=-Wno-parentheses CERRWARN += -_gcc=-Wno-unused-label CERRWARN += -_gcc=-Wno-uninitialized +CERRWARN += -_gcc=-Wno-unused-function # # Default build targets. diff --git a/usr/src/uts/intel/dld/Makefile b/usr/src/uts/intel/dld/Makefile index a46b6046e9..d2bf772b82 100644 --- a/usr/src/uts/intel/dld/Makefile +++ b/usr/src/uts/intel/dld/Makefile @@ -56,7 +56,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -N misc/dls -N misc/mac -INC_PATH += -I$(UTSBASE)/common/io/bpf # # For now, disable these lint checks; maintainers should endeavor diff --git a/usr/src/uts/intel/dls/Makefile b/usr/src/uts/intel/dls/Makefile index 8a267c7a1a..5bf2bdbcf8 100644 --- a/usr/src/uts/intel/dls/Makefile +++ b/usr/src/uts/intel/dls/Makefile @@ -54,7 +54,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -N misc/mac -INC_PATH += -I$(UTSBASE)/common/io/bpf # # For now, disable these lint checks; maintainers should endeavor diff --git a/usr/src/uts/intel/dr_sas/Makefile b/usr/src/uts/intel/dr_sas/Makefile new file mode 100644 index 0000000000..f4871b694a --- /dev/null +++ b/usr/src/uts/intel/dr_sas/Makefile @@ -0,0 +1,90 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# uts/intel/dr_sas/Makefile +# +# This makefile drives the production of the dr_sas driver kernel module. +# +# intel implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = dr_sas +OBJECTS = $(DR_SAS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(DR_SAS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/io/dr_sas + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(CONFMOD) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Kernel Module Dependencies +# +LDFLAGS += -dy -Nmisc/scsi + +CERRWARN += -_gcc=-Wno-unused-label +CERRWARN += -_gcc=-Wno-switch +CERRWARN += -_gcc=-Wno-uninitialized + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/dtrace/fasttrap_isa.c b/usr/src/uts/intel/dtrace/fasttrap_isa.c index 1b93869a73..f9eba2876c 100644 --- a/usr/src/uts/intel/dtrace/fasttrap_isa.c +++ b/usr/src/uts/intel/dtrace/fasttrap_isa.c @@ -24,7 +24,9 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2015, Joyent, Inc. All rights reserved. + */ #include <sys/fasttrap_isa.h> #include <sys/fasttrap_impl.h> @@ -38,6 +40,9 @@ #include <sys/sysmacros.h> #include <sys/trap.h> #include <sys/archsystm.h> +#include <sys/proc.h> +#include <sys/brand.h> +#include <sys/machbrand.h> /* * Lossless User-Land Tracing on x86 @@ -1394,6 +1399,14 @@ fasttrap_pid_probe(struct regs *rp) #if defined(__amd64) if (p->p_model == DATAMODEL_LP64) { addr = lwp->lwp_pcb.pcb_fsbase; + + /* + * If we're branded, convert the fsbase from the + * brand's fsbase to the native fsbase. + */ + if (PROC_IS_BRANDED(p) && BRMOP(p)->b_fsbase != NULL) + addr = BRMOP(p)->b_fsbase(lwp, addr); + addr += sizeof (void *); } else { addr = lwp->lwp_pcb.pcb_gsbase; diff --git a/usr/src/uts/intel/e1000g/Makefile b/usr/src/uts/intel/e1000g/Makefile index 9a82a0e698..d48b8f77a0 100644 --- a/usr/src/uts/intel/e1000g/Makefile +++ b/usr/src/uts/intel/e1000g/Makefile @@ -80,6 +80,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # Driver depends on MAC # LDFLAGS += -dy -N misc/mac +MAPFILES += ddi mac # # Default build targets. @@ -106,4 +107,5 @@ install: $(INSTALL_DEPS) # # Include common targets. # +include $(UTSBASE)/Makefile.mapfile include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/genassym/Makefile b/usr/src/uts/intel/genassym/Makefile new file mode 100644 index 0000000000..ce01dc8610 --- /dev/null +++ b/usr/src/uts/intel/genassym/Makefile @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of genassym.h through +# compile time intialized data. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +GENASSYM_H = $(GENASSYM_DIR)/$(OBJS_DIR)/genassym.h +OFFSETS_SRC = $(GENASSYM_DIR)/offsets.in + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(GENASSYM_H) + +INC_PATH += -I$(UTSBASE)/common/brand/lx + +# +# Overrides +# +CLEANFILES = Nothing_to_remove +CLOBBERFILES = $(GENASSYM_H) Nothing_to_remove + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +clean.lint: + +install: def + +# +# Create genassym.h +# +$(GENASSYM_H): $(OFFSETS_SRC) + $(OFFSETS_CREATE) <$(OFFSETS_SRC) >$@ + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/genassym/offsets.in b/usr/src/uts/intel/genassym/offsets.in new file mode 100644 index 0000000000..70221c02f9 --- /dev/null +++ b/usr/src/uts/intel/genassym/offsets.in @@ -0,0 +1,43 @@ +\ +\ CDDL HEADER START +\ +\ The contents of this file are subject to the terms of the +\ Common Development and Distribution License (the "License"). +\ You may not use this file except in compliance with the License. +\ +\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +\ or http://www.opensolaris.org/os/licensing. +\ See the License for the specific language governing permissions +\ and limitations under the License. +\ +\ When distributing Covered Code, include this CDDL HEADER in each +\ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +\ If applicable, add the following below this CDDL HEADER, with the +\ fields enclosed by brackets "[]" replaced with your own identifying +\ information: Portions Copyright [yyyy] [name of copyright owner] +\ +\ CDDL HEADER END +\ +\ +\ Copyright 2010 Sun Microsystems, Inc. All rights reserved. +\ Use is subject to license terms. +\ Copyright 2015 Joyent, Inc. +\ + +\ +\ offsets.in: input file to produce the architecture-dependent genassym.h +\ using the ctfstabs program +\ + +#ifndef _GENASSYM +#define _GENASSYM +#endif + +#include <sys/lx_brand.h> + +lx_proc_data + l_handler + +lx_lwp_data + br_lx_fsbase + br_ntv_fsbase diff --git a/usr/src/uts/intel/gsqueue/Makefile b/usr/src/uts/intel/gsqueue/Makefile new file mode 100644 index 0000000000..411e384309 --- /dev/null +++ b/usr/src/uts/intel/gsqueue/Makefile @@ -0,0 +1,49 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +UTSBASE = ../.. + +MODULE = gsqueue +OBJECTS = $(GSQUEUE_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(GSQUEUE_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +LDFLAGS += -dy -Ndrv/ip + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/hyprlofs/Makefile b/usr/src/uts/intel/hyprlofs/Makefile new file mode 100644 index 0000000000..919b045617 --- /dev/null +++ b/usr/src/uts/intel/hyprlofs/Makefile @@ -0,0 +1,83 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/hyprlofs/Makefile +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the hyprlofs file system +# kernel module. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = hyprlofs +OBJECTS = $(HYPRLOFS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(HYPRLOFS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/i40e/Makefile b/usr/src/uts/intel/i40e/Makefile index dd7a4940a6..06cb47cba3 100644 --- a/usr/src/uts/intel/i40e/Makefile +++ b/usr/src/uts/intel/i40e/Makefile @@ -48,6 +48,8 @@ SMOFF += all_func_returns LDFLAGS += -dy -N misc/mac +MAPFILES += ddi mac random + .KEEP_STATE: def: $(DEF_DEPS) @@ -66,4 +68,5 @@ clean.lint: $(CLEAN_LINT_DEPS) install: $(INSTALL_DEPS) +include $(UTSBASE)/Makefile.mapfile include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/ia32/krtld/kobj_reloc.c b/usr/src/uts/intel/ia32/krtld/kobj_reloc.c index befd15853a..46b878e2af 100644 --- a/usr/src/uts/intel/ia32/krtld/kobj_reloc.c +++ b/usr/src/uts/intel/ia32/krtld/kobj_reloc.c @@ -117,6 +117,43 @@ sdt_reloc_resolve(struct module *mp, char *symname, uint8_t *instr) return (0); } +/* + * We're relying on the fact that the call we're replacing is + * call (e8) plus 4 bytes of address, making a 5 byte instruction + */ +#define NOP_INSTR 0x90 +#define SMAP_NOPS 5 + +/* + * Note that SMAP is only supported on amd64. In the context of + * ia32 this function only serves to NOP out calls to smap_enable() or + * smap_disable(). + */ +static int +smap_reloc_resolve(struct module *mp, char *symname, uint8_t *instr) +{ + if (strcmp(symname, "smap_enable") == 0 || + strcmp(symname, "smap_disable") == 0) { + +#ifdef KOBJ_DEBUG + if (kobj_debug & D_RELOCATIONS) { + _kobj_printf(ops, "smap_reloc_resolve: %s relocating " + "enable/disable_smap\n", mp->filename); + } +#endif + + /* + * We backtrack one byte here to consume the call + * instruction itself. + */ + memset((void *)instr - 1, NOP_INSTR, SMAP_NOPS); + + return (0); + } + + return (1); +} + int /* ARGSUSED2 */ do_relocate(struct module *mp, char *reltbl, Word relshtype, int nreloc, @@ -218,6 +255,11 @@ do_relocate(struct module *mp, char *reltbl, Word relshtype, int nreloc, continue; if (symref->st_shndx == SHN_UNDEF && + smap_reloc_resolve(mp, mp->strings + + symref->st_name, (uint8_t *)off) == 0) + continue; + + if (symref->st_shndx == SHN_UNDEF && tnf_reloc_resolve(mp->strings + symref->st_name, &symref->st_value, off, &probelist, &taglist) != 0) { diff --git a/usr/src/uts/intel/ia32/ml/copy.s b/usr/src/uts/intel/ia32/ml/copy.s index 7593de374e..f76a8a43cb 100644 --- a/usr/src/uts/intel/ia32/ml/copy.s +++ b/usr/src/uts/intel/ia32/ml/copy.s @@ -36,7 +36,7 @@ /* All Rights Reserved */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018 Joyent, Inc. */ #include <sys/errno.h> @@ -866,8 +866,8 @@ bcopy_patch_start: bcopy_patch_end: .p2align 4 - .globl bcopy_ck_size -bcopy_ck_size: + ALTENTRY(bcopy_ck_size) + cmpq $BCOPY_DFLT_REP, %rdx jae L(use_rep) @@ -956,6 +956,7 @@ L(use_rep): jnz L(do_remainder) ret #undef L + SET_SIZE(bcopy_ck_size) #ifdef DEBUG /* @@ -3138,47 +3139,6 @@ ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied) #endif /* __lint */ -/* - * These functions are used for SMAP, supervisor mode access protection. They - * are hotpatched to become real instructions when the system starts up which is - * done in mlsetup() as a part of enabling the other CR4 related features. - * - * Generally speaking, smap_disable() is a stac instruction and smap_enable is a - * clac instruction. It's safe to call these any number of times, and in fact, - * out of paranoia, the kernel will likely call it at several points. - */ - -#if defined(__lint) - -void -smap_enable(void) -{} - -void -smap_disable(void) -{} - -#else - -#if defined (__amd64) || defined(__i386) - ENTRY(smap_disable) - nop - nop - nop - ret - SET_SIZE(smap_disable) - - ENTRY(smap_enable) - nop - nop - nop - ret - SET_SIZE(smap_enable) - -#endif /* __amd64 || __i386 */ - -#endif /* __lint */ - #ifndef __lint .data diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index 466aa4f4b5..9ee2ba6908 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -50,7 +50,7 @@ char stubs_base[1], stubs_end[1]; * NOTE: Use NO_UNLOAD_STUBs if the module is NOT unloadable once it is * loaded. */ -#define MAXNARG 10 +#define MAXNARG 12 /* * WARNING: there is no check for forgetting to write END_MODULE, @@ -184,7 +184,7 @@ fcnname/**/_info: \ pushq %rcx pushq %r8 pushq %r9 - /* (next 4 args, if any, are already on the stack above %rbp) */ + /* (next 6 args, if any, are already on the stack above %rbp) */ movq %r15, %rdi call mod_hold_stub /* mod_hold_stub(mod_stub_info *) */ cmpl $-1, %eax /* error? */ @@ -195,7 +195,7 @@ fcnname/**/_info: \ jmp .L2 .L1: /* - * copy MAXNARG == 10 incoming arguments + * copy MAXNARG == 12 incoming arguments */ popq %r9 popq %r8 @@ -219,8 +219,10 @@ fcnname/**/_info: \ pushq (%rsp, %r11, 8) pushq (%rsp, %r11, 8) pushq (%rsp, %r11, 8) + pushq (%rsp, %r11, 8) + pushq (%rsp, %r11, 8) call *(%r15) /* call the stub fn(arg, ..) */ - addq $0x20, %rsp /* pop off last 4 args */ + addq $0x30, %rsp /* pop off last 6 args */ pushq %rax /* save any return values */ pushq %rdx movq %r15, %rdi @@ -345,6 +347,8 @@ fcnname/**/_info: \ pushl (%esp, %ecx, 4) pushl (%esp, %ecx, 4) pushl (%esp, %ecx, 4) + pushl (%esp, %ecx, 4) + pushl (%esp, %ecx, 4) call *(%esi) / call the stub function(arg1,arg2, ...) add $_MUL(MAXNARG, 4), %esp / pop off MAXNARG arguments pushl %eax / save any return values from the stub @@ -1426,6 +1430,23 @@ fcnname/**/_info: \ END_MODULE(apix); #endif +/* + * Stubs for ppt module (bhyve PCI passthrough driver) + */ +#ifndef PPT_MODULE + MODULE(ppt,drv); + WSTUB(ppt, ppt_unassign_all, nomod_zero); + WSTUB(ppt, ppt_map_mmio, nomod_einval); + WSTUB(ppt, ppt_setup_msi, nomod_einval); + WSTUB(ppt, ppt_setup_msix, nomod_einval); + WSTUB(ppt, ppt_assigned_devices, nomod_zero); + WSTUB(ppt, ppt_is_mmio, nomod_zero); + WSTUB(ppt, ppt_assign_device, nomod_einval); + WSTUB(ppt, ppt_unassign_device, nomod_einval); + WSTUB(ppt, ppt_get_limits, nomod_einval); + END_MODULE(ppt); +#endif + / this is just a marker for the area of text that contains stubs ENTRY_NP(stubs_end) diff --git a/usr/src/uts/intel/ia32/ml/swtch.s b/usr/src/uts/intel/ia32/ml/swtch.s index dd0db7b29b..c2c9fd9bd2 100644 --- a/usr/src/uts/intel/ia32/ml/swtch.s +++ b/usr/src/uts/intel/ia32/ml/swtch.s @@ -31,14 +31,6 @@ * Process switching routines. */ -#if defined(__lint) -#include <sys/thread.h> -#include <sys/systm.h> -#include <sys/time.h> -#else /* __lint */ -#include "assym.h" -#endif /* __lint */ - #include <sys/asm_linkage.h> #include <sys/asm_misc.h> #include <sys/regset.h> @@ -47,6 +39,9 @@ #include <sys/segments.h> #include <sys/psw.h> +#if !defined(__lint) +#include "assym.h" + /* * resume(thread_id_t t); * @@ -74,16 +69,10 @@ * off the stack. */ -#if !defined(__lint) - #if LWP_PCB_FPU != 0 #error LWP_PCB_FPU MUST be defined as 0 for code in swtch.s to work #endif /* LWP_PCB_FPU != 0 */ -#endif /* !__lint */ - -#if defined(__amd64) - /* * Save non-volatile regs other than %rsp (%rbx, %rbp, and %r12 - %r15) * @@ -153,88 +142,6 @@ jnz 0b; \ 1: -#elif defined (__i386) - -/* - * Save non-volatile registers (%ebp, %esi, %edi and %ebx) - * - * The stack frame must be created before the save of %esp so that tracebacks - * of swtch()ed-out processes show the process as having last called swtch(). - */ -#define SAVE_REGS(thread_t, retaddr) \ - movl %ebp, T_EBP(thread_t); \ - movl %ebx, T_EBX(thread_t); \ - movl %esi, T_ESI(thread_t); \ - movl %edi, T_EDI(thread_t); \ - pushl %ebp; \ - movl %esp, %ebp; \ - movl %esp, T_SP(thread_t); \ - movl retaddr, T_PC(thread_t); \ - movl 8(%ebp), %edi; \ - pushl %edi; \ - call __dtrace_probe___sched_off__cpu; \ - addl $CLONGSIZE, %esp - -/* - * Restore non-volatile registers (%ebp, %esi, %edi and %ebx) - * - * We don't do a 'leave,' because reloading %rsp/%rbp from the label_t - * already has the effect of putting the stack back the way it was when - * we came in. - */ -#define RESTORE_REGS(scratch_reg) \ - movl %gs:CPU_THREAD, scratch_reg; \ - movl T_EBP(scratch_reg), %ebp; \ - movl T_EBX(scratch_reg), %ebx; \ - movl T_ESI(scratch_reg), %esi; \ - movl T_EDI(scratch_reg), %edi - -/* - * Get pointer to a thread's hat structure - */ -#define GET_THREAD_HATP(hatp, thread_t, scratch_reg) \ - movl T_PROCP(thread_t), hatp; \ - movl P_AS(hatp), scratch_reg; \ - movl A_HAT(scratch_reg), hatp - -/* - * If we are resuming an interrupt thread, store a timestamp in the thread - * structure. If an interrupt occurs between tsc_read() and its subsequent - * store, the timestamp will be stale by the time it is stored. We can detect - * this by doing a compare-and-swap on the thread's timestamp, since any - * interrupt occurring in this window will put a new timestamp in the thread's - * t_intr_start field. - */ -#define STORE_INTR_START(thread_t) \ - testw $T_INTR_THREAD, T_FLAGS(thread_t); \ - jz 1f; \ - pushl %ecx; \ -0: \ - pushl T_INTR_START(thread_t); \ - pushl T_INTR_START+4(thread_t); \ - call tsc_read; \ - movl %eax, %ebx; \ - movl %edx, %ecx; \ - popl %edx; \ - popl %eax; \ - cmpxchg8b T_INTR_START(thread_t); \ - jnz 0b; \ - popl %ecx; \ -1: - -#endif /* __amd64 */ - -#if defined(__lint) - -/* ARGSUSED */ -void -resume(kthread_t *t) -{} - -#else /* __lint */ - -#if defined(__amd64) - .global kpti_enable ENTRY(resume) @@ -436,6 +343,8 @@ resume(kthread_t *t) call smap_disable .nosmap: + call ht_mark + /* * Restore non-volatile registers, then have spl0 return to the * resuming thread's PC after first setting the priority as low as @@ -456,202 +365,6 @@ resume_return: SET_SIZE(_resume_from_idle) SET_SIZE(resume) -#elif defined (__i386) - - ENTRY(resume) - movl %gs:CPU_THREAD, %eax - movl $resume_return, %ecx - - /* - * Save non-volatile registers, and set return address for current - * thread to resume_return. - * - * %edi = t (new thread) when done. - */ - SAVE_REGS(%eax, %ecx) - - LOADCPU(%ebx) /* %ebx = CPU */ - movl CPU_THREAD(%ebx), %esi /* %esi = curthread */ - -#ifdef DEBUG - call assert_ints_enabled /* panics if we are cli'd */ -#endif - /* - * Call savectx if thread has installed context ops. - * - * Note that if we have floating point context, the save op - * (either fpsave_begin or fpxsave_begin) will issue the - * async save instruction (fnsave or fxsave respectively) - * that we fwait for below. - */ - movl T_CTX(%esi), %eax /* should current thread savectx? */ - testl %eax, %eax - jz .nosavectx /* skip call when zero */ - pushl %esi /* arg = thread pointer */ - call savectx /* call ctx ops */ - addl $4, %esp /* restore stack pointer */ -.nosavectx: - - /* - * Call savepctx if process has installed context ops. - */ - movl T_PROCP(%esi), %eax /* %eax = proc */ - cmpl $0, P_PCTX(%eax) /* should current thread savectx? */ - je .nosavepctx /* skip call when zero */ - pushl %eax /* arg = proc pointer */ - call savepctx /* call ctx ops */ - addl $4, %esp -.nosavepctx: - - /* - * Temporarily switch to the idle thread's stack - */ - movl CPU_IDLE_THREAD(%ebx), %eax /* idle thread pointer */ - - /* - * Set the idle thread as the current thread - */ - movl T_SP(%eax), %esp /* It is safe to set esp */ - movl %eax, CPU_THREAD(%ebx) - - /* switch in the hat context for the new thread */ - GET_THREAD_HATP(%ecx, %edi, %ecx) - pushl %ecx - call hat_switch - addl $4, %esp - - /* - * Clear and unlock previous thread's t_lock - * to allow it to be dispatched by another processor. - */ - movb $0, T_LOCK(%esi) - - /* - * IMPORTANT: Registers at this point must be: - * %edi = new thread - * - * Here we are in the idle thread, have dropped the old thread. - */ - ALTENTRY(_resume_from_idle) - /* - * spin until dispatched thread's mutex has - * been unlocked. this mutex is unlocked when - * it becomes safe for the thread to run. - */ -.L4: - lock - btsl $0, T_LOCK(%edi) /* lock new thread's mutex */ - jc .L4_2 /* lock did not succeed */ - - /* - * Fix CPU structure to indicate new running thread. - * Set pointer in new thread to the CPU structure. - */ - LOADCPU(%esi) /* load current CPU pointer */ - movl T_STACK(%edi), %eax /* here to use v pipeline of */ - /* Pentium. Used few lines below */ - cmpl %esi, T_CPU(%edi) - jne .L5_2 -.L5_1: - /* - * Setup esp0 (kernel stack) in TSS to curthread's stack. - * (Note: Since we don't have saved 'regs' structure for all - * the threads we can't easily determine if we need to - * change esp0. So, we simply change the esp0 to bottom - * of the thread stack and it will work for all cases.) - */ - movl CPU_TSS(%esi), %ecx - addl $REGSIZE+MINFRAME, %eax /* to the bottom of thread stack */ -#if !defined(__xpv) - movl %eax, TSS_ESP0(%ecx) -#else - pushl %eax - pushl $KDS_SEL - call HYPERVISOR_stack_switch - addl $8, %esp -#endif /* __xpv */ - - movl %edi, CPU_THREAD(%esi) /* set CPU's thread pointer */ - mfence /* synchronize with mutex_exit() */ - xorl %ebp, %ebp /* make $<threadlist behave better */ - movl T_LWP(%edi), %eax /* set associated lwp to */ - movl %eax, CPU_LWP(%esi) /* CPU's lwp ptr */ - - movl T_SP(%edi), %esp /* switch to outgoing thread's stack */ - movl T_PC(%edi), %esi /* saved return addr */ - - /* - * Call restorectx if context ops have been installed. - */ - movl T_CTX(%edi), %eax /* should resumed thread restorectx? */ - testl %eax, %eax - jz .norestorectx /* skip call when zero */ - pushl %edi /* arg = thread pointer */ - call restorectx /* call ctx ops */ - addl $4, %esp /* restore stack pointer */ -.norestorectx: - - /* - * Call restorepctx if context ops have been installed for the proc. - */ - movl T_PROCP(%edi), %eax - cmpl $0, P_PCTX(%eax) - je .norestorepctx - pushl %eax /* arg = proc pointer */ - call restorepctx - addl $4, %esp /* restore stack pointer */ -.norestorepctx: - - STORE_INTR_START(%edi) - - /* - * Restore non-volatile registers, then have spl0 return to the - * resuming thread's PC after first setting the priority as low as - * possible and blocking all interrupt threads that may be active. - */ - movl %esi, %eax /* save return address */ - RESTORE_REGS(%ecx) - pushl %eax /* push return address for spl0() */ - call __dtrace_probe___sched_on__cpu - jmp spl0 - -resume_return: - /* - * Remove stack frame created in SAVE_REGS() - */ - addl $CLONGSIZE, %esp - ret - -.L4_2: - pause - cmpb $0, T_LOCK(%edi) - je .L4 - jmp .L4_2 - -.L5_2: - /* cp->cpu_stats.sys.cpumigrate++ */ - addl $1, CPU_STATS_SYS_CPUMIGRATE(%esi) - adcl $0, CPU_STATS_SYS_CPUMIGRATE+4(%esi) - movl %esi, T_CPU(%edi) /* set new thread's CPU pointer */ - jmp .L5_1 - - SET_SIZE(_resume_from_idle) - SET_SIZE(resume) - -#endif /* __amd64 */ -#endif /* __lint */ - -#if defined(__lint) - -/* ARGSUSED */ -void -resume_from_zombie(kthread_t *t) -{} - -#else /* __lint */ - -#if defined(__amd64) - ENTRY(resume_from_zombie) movq %gs:CPU_THREAD, %rax leaq resume_from_zombie_return(%rip), %r11 @@ -726,88 +439,6 @@ resume_from_zombie_return: ret SET_SIZE(resume_from_zombie) -#elif defined (__i386) - - ENTRY(resume_from_zombie) - movl %gs:CPU_THREAD, %eax - movl $resume_from_zombie_return, %ecx - - /* - * Save non-volatile registers, and set return address for current - * thread to resume_from_zombie_return. - * - * %edi = t (new thread) when done. - */ - SAVE_REGS(%eax, %ecx) - -#ifdef DEBUG - call assert_ints_enabled /* panics if we are cli'd */ -#endif - movl %gs:CPU_THREAD, %esi /* %esi = curthread */ - - /* clean up the fp unit. It might be left enabled */ - - movl %cr0, %eax - testl $CR0_TS, %eax - jnz .zfpu_disabled /* if TS already set, nothing to do */ - fninit /* init fpu & discard pending error */ - orl $CR0_TS, %eax - movl %eax, %cr0 -.zfpu_disabled: - - /* - * Temporarily switch to the idle thread's stack so that the zombie - * thread's stack can be reclaimed by the reaper. - */ - movl %gs:CPU_IDLE_THREAD, %eax /* idle thread pointer */ - movl T_SP(%eax), %esp /* get onto idle thread stack */ - - /* - * Set the idle thread as the current thread. - */ - movl %eax, %gs:CPU_THREAD - - /* - * switch in the hat context for the new thread - */ - GET_THREAD_HATP(%ecx, %edi, %ecx) - pushl %ecx - call hat_switch - addl $4, %esp - - /* - * Put the zombie on death-row. - */ - pushl %esi - call reapq_add - addl $4, %esp - jmp _resume_from_idle /* finish job of resume */ - -resume_from_zombie_return: - RESTORE_REGS(%ecx) /* restore non-volatile registers */ - call __dtrace_probe___sched_on__cpu - - /* - * Remove stack frame created in SAVE_REGS() - */ - addl $CLONGSIZE, %esp - ret - SET_SIZE(resume_from_zombie) - -#endif /* __amd64 */ -#endif /* __lint */ - -#if defined(__lint) - -/* ARGSUSED */ -void -resume_from_intr(kthread_t *t) -{} - -#else /* __lint */ - -#if defined(__amd64) - ENTRY(resume_from_intr) movq %gs:CPU_THREAD, %rax leaq resume_from_intr_return(%rip), %r11 @@ -834,6 +465,8 @@ resume_from_intr(kthread_t *t) STORE_INTR_START(%r12) + call ht_mark + /* * Restore non-volatile registers, then have spl0 return to the * resuming thread's PC after first setting the priority as low as @@ -853,69 +486,6 @@ resume_from_intr_return: ret SET_SIZE(resume_from_intr) -#elif defined (__i386) - - ENTRY(resume_from_intr) - movl %gs:CPU_THREAD, %eax - movl $resume_from_intr_return, %ecx - - /* - * Save non-volatile registers, and set return address for current - * thread to resume_return. - * - * %edi = t (new thread) when done. - */ - SAVE_REGS(%eax, %ecx) - -#ifdef DEBUG - call assert_ints_enabled /* panics if we are cli'd */ -#endif - movl %gs:CPU_THREAD, %esi /* %esi = curthread */ - movl %edi, %gs:CPU_THREAD /* set CPU's thread pointer */ - mfence /* synchronize with mutex_exit() */ - movl T_SP(%edi), %esp /* restore resuming thread's sp */ - xorl %ebp, %ebp /* make $<threadlist behave better */ - - /* - * Unlock outgoing thread's mutex dispatched by another processor. - */ - xorl %eax,%eax - xchgb %al, T_LOCK(%esi) - - STORE_INTR_START(%edi) - - /* - * Restore non-volatile registers, then have spl0 return to the - * resuming thread's PC after first setting the priority as low as - * possible and blocking all interrupt threads that may be active. - */ - movl T_PC(%edi), %eax /* saved return addr */ - RESTORE_REGS(%ecx) - pushl %eax /* push return address for spl0() */ - call __dtrace_probe___sched_on__cpu - jmp spl0 - -resume_from_intr_return: - /* - * Remove stack frame created in SAVE_REGS() - */ - addl $CLONGSIZE, %esp - ret - SET_SIZE(resume_from_intr) - -#endif /* __amd64 */ -#endif /* __lint */ - -#if defined(__lint) - -void -thread_start(void) -{} - -#else /* __lint */ - -#if defined(__amd64) - ENTRY(thread_start) popq %rax /* start() */ popq %rdi /* arg */ @@ -926,18 +496,42 @@ thread_start(void) /*NOTREACHED*/ SET_SIZE(thread_start) -#elif defined(__i386) - - ENTRY(thread_start) - popl %eax - movl %esp, %ebp - addl $8, %ebp - call *%eax - addl $8, %esp - call thread_exit /* destroy thread if it returns. */ - /*NOTREACHED*/ - SET_SIZE(thread_start) - -#endif /* __i386 */ + ENTRY(thread_splitstack_run) + pushq %rbp /* push base pointer */ + movq %rsp, %rbp /* construct frame */ + movq %rdi, %rsp /* set stack pinter */ + movq %rdx, %rdi /* load arg */ + call *%rsi /* call specified function */ + leave /* pop base pointer */ + ret + SET_SIZE(thread_splitstack_run) + + /* + * Once we're back on our own stack, we need to be sure to set the + * value of rsp0 in the TSS back to our original stack: if we gave + * up the CPU at all while on our split stack, the rsp0 will point + * to that stack from resume (above); if were to try to return to + * userland in that state, we will die absolutely horribly (namely, + * trying to iretq back to registers in a bunch of freed segkp). We + * are expecting this to be called after T_STACK has been restored, + * but before we return. It's okay if we are preempted in this code: + * when the new CPU picks us up, they will automatically set rsp0 + * correctly, which is all we're trying to do here. + */ + ENTRY(thread_splitstack_cleanup) + LOADCPU(%r8) + movq CPU_TSS(%r8), %r9 + cmpq $1, kpti_enable + jne 1f + leaq CPU_KPTI_TR_RSP(%r8), %rax + jmp 2f +1: + movq CPU_THREAD(%r8), %r10 + movq T_STACK(%r10), %rax + addq $REGSIZE+MINFRAME, %rax +2: + movq %rax, TSS_RSP0(%r9) + ret + SET_SIZE(thread_splitstack_cleanup) -#endif /* __lint */ +#endif /* !__lint */ diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index 99b9777858..c918f63bd2 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -573,6 +573,13 @@ ucontext_32ton(const ucontext32_t *src, ucontext_t *dst) if (src->uc_flags & UC_FPU) fpregset_32ton(&src->uc_mcontext.fpregs, &dst->uc_mcontext.fpregs); + + /* + * Copy the brand-private data: + */ + dst->uc_brand_data[0] = (void *)(uintptr_t)src->uc_brand_data[0]; + dst->uc_brand_data[1] = (void *)(uintptr_t)src->uc_brand_data[1]; + dst->uc_brand_data[2] = (void *)(uintptr_t)src->uc_brand_data[2]; } #endif /* _SYSCALL32_IMPL */ @@ -627,9 +634,11 @@ getuserpc() #define IS_NOT_CS 0 /*ARGSUSED*/ -static greg_t +greg_t fix_segreg(greg_t sr, int iscs, model_t datamodel) { + kthread_t *t = curthread; + switch (sr &= 0xffff) { case 0: @@ -666,6 +675,19 @@ fix_segreg(greg_t sr, int iscs, model_t datamodel) } /* + * Allow this process's brand to do any necessary segment register + * manipulation. + */ + if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) { + greg_t bsr = BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel); + + if (bsr == 0 && iscs == IS_CS) + return (0 | SEL_UPL); + else + return (bsr); + } + + /* * Force it into the LDT in ring 3 for 32-bit processes, which by * default do not have an LDT, so that any attempt to use an invalid * selector will reference the (non-existant) LDT, and cause a #gp diff --git a/usr/src/uts/intel/ia32/os/comm_page_util.c b/usr/src/uts/intel/ia32/os/comm_page_util.c index 4150853813..14fcf9ca57 100644 --- a/usr/src/uts/intel/ia32/os/comm_page_util.c +++ b/usr/src/uts/intel/ia32/os/comm_page_util.c @@ -39,12 +39,12 @@ comm_page_mapin() { #if defined(__amd64) && !defined(__xpv) proc_t *p = curproc; - caddr_t addr = NULL; + caddr_t addr = (caddr_t)COMM_PAGE_ALIGN; size_t len = COMM_PAGE_SIZE; uint_t prot = PROT_USER | PROT_READ; segumap_crargs_t suarg; - map_addr(&addr, len, (offset_t)0, 1, 0); + map_addr(&addr, len, (offset_t)0, 1, MAP_ALIGN); if (addr == NULL || valid_usr_range(addr, len, prot, p->p_as, p->p_as->a_userlimit) != RANGE_OKAY) { return (NULL); diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c index 8a6ae25a70..8e0a4edd61 100644 --- a/usr/src/uts/intel/ia32/os/desctbls.c +++ b/usr/src/uts/intel/ia32/os/desctbls.c @@ -167,7 +167,7 @@ struct interposing_handler { * The brand infrastructure interposes on two handlers, and we use one as a * NULL signpost. */ -static struct interposing_handler brand_tbl[2]; +static struct interposing_handler brand_tbl[3]; /* * software prototypes for default local descriptor table @@ -984,6 +984,13 @@ init_idt_common(gate_desc_t *idt) KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE)); /* + * install "int80" handler at, well, 0x80. + */ + set_gatesegd(&idt0[T_INT80], + (kpti_enable == 1) ? &tr_sys_int80 : &sys_int80, + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_INT80)); + + /* * install fast trap handler at 210. */ set_gatesegd(&idt[T_FASTTRAP], @@ -1005,18 +1012,25 @@ init_idt_common(gate_desc_t *idt) KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET)); /* - * Prepare interposing descriptor for the syscall handler - * and cache copy of the default descriptor. + * Prepare interposing descriptors for the branded "int80" + * and syscall handlers and cache copies of the default + * descriptors. */ - brand_tbl[0].ih_inum = T_SYSCALLINT; - brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; - + brand_tbl[0].ih_inum = T_INT80; + brand_tbl[0].ih_default_desc = idt0[T_INT80]; set_gatesegd(&(brand_tbl[0].ih_interp_desc), + (kpti_enable == 1) ? &tr_brand_sys_int80 : &brand_sys_int80, + KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_INT80)); + + brand_tbl[1].ih_inum = T_SYSCALLINT; + brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; + + set_gatesegd(&(brand_tbl[1].ih_interp_desc), (kpti_enable == 1) ? &tr_brand_sys_syscall_int : &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT)); - brand_tbl[1].ih_inum = 0; + brand_tbl[2].ih_inum = 0; } #if defined(__xpv) diff --git a/usr/src/uts/intel/ia32/os/sendsig.c b/usr/src/uts/intel/ia32/os/sendsig.c index b7b79f38ca..cf6c623b7a 100644 --- a/usr/src/uts/intel/ia32/os/sendsig.c +++ b/usr/src/uts/intel/ia32/os/sendsig.c @@ -20,6 +20,9 @@ */ /* + * Copyright 2015 Joyent, Inc. + */ +/* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -87,6 +90,8 @@ #include <sys/kdi.h> #include <sys/contract_impl.h> #include <sys/x86_archext.h> +#include <sys/brand.h> +#include <sys/sdt.h> /* * Construct the execution environment for the user's signal @@ -186,7 +191,18 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); - if (newstack) { + /* + * If this is a branded process, the brand may provide an alternate + * stack pointer for signal delivery: + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig_stack != NULL) { + /* + * Use the stack pointer value provided by the brand, + * accounting for the 128-byte reserved region. + */ + newstack = 0; + fp = BROP(p)->b_sendsig_stack(sig) - STACK_RESERVE; + } else if (newstack) { fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN); } else { @@ -296,6 +312,8 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) kmem_free(tuc, sizeof (*tuc)); tuc = NULL; + DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp, + uintptr_t, lwp->lwp_oldcontext, uintptr_t, (uintptr_t)uc); lwp->lwp_oldcontext = (uintptr_t)uc; if (newstack) { @@ -345,6 +363,14 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) } /* + * Allow the brand to perform additional book-keeping once the signal + * handling frame has been fully assembled: + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig != NULL) { + BROP(p)->b_sendsig(sig); + } + + /* * Don't set lwp_eosys here. sendsig() is called via psig() after * lwp_eosys is handled, so setting it here would affect the next * system call. @@ -420,7 +446,17 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)()) newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); - if (newstack) { + /* + * If this is a branded process, the brand may provide an alternate + * stack pointer for signal delivery: + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig_stack != NULL) { + /* + * Use the stack pointer value provided by the brand: + */ + newstack = 0; + fp = BROP(p)->b_sendsig_stack(sig); + } else if (newstack) { fp = (caddr_t)(SA32((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + SA32(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN32); } else if ((rp->r_ss & 0xffff) != UDS_SEL) { @@ -435,8 +471,9 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)()) USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]); else fp = (caddr_t)rp->r_sp; - } else + } else { fp = (caddr_t)rp->r_sp; + } /* * Force proper stack pointer alignment, even in the face of a @@ -517,6 +554,8 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)()) kmem_free(tuc, sizeof (*tuc)); tuc = NULL; + DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp, + uintptr_t, lwp->lwp_oldcontext, uintptr_t, (uintptr_t)uc); lwp->lwp_oldcontext = (uintptr_t)uc; if (newstack) { @@ -566,6 +605,14 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)()) } /* + * Allow the brand to perform additional book-keeping once the signal + * handling frame has been fully assembled: + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig != NULL) { + BROP(p)->b_sendsig(sig); + } + + /* * Don't set lwp_eosys here. sendsig() is called via psig() after * lwp_eosys is handled, so setting it here would affect the next * system call. @@ -643,7 +690,17 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); - if (newstack) { + /* + * If this is a branded process, the brand may provide an alternate + * stack pointer for signal delivery: + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig_stack != NULL) { + /* + * Use the stack pointer value provided by the brand: + */ + newstack = 0; + fp = BROP(p)->b_sendsig_stack(sig); + } else if (newstack) { fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN); } else if ((rp->r_ss & 0xffff) != UDS_SEL) { @@ -658,8 +715,9 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]); else fp = (caddr_t)rp->r_sp; - } else + } else { fp = (caddr_t)rp->r_sp; + } /* * Force proper stack pointer alignment, even in the face of a @@ -737,6 +795,8 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) kmem_free(tuc, sizeof (*tuc)); tuc = NULL; + DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp, + uintptr_t, lwp->lwp_oldcontext, uintptr_t, (uintptr_t)uc); lwp->lwp_oldcontext = (uintptr_t)uc; if (newstack) { @@ -774,6 +834,14 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) } /* + * Allow the brand to perform additional book-keeping once the signal + * handling frame has been fully assembled: + */ + if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig != NULL) { + BROP(p)->b_sendsig(sig); + } + + /* * Don't set lwp_eosys here. sendsig() is called via psig() after * lwp_eosys is handled, so setting it here would affect the next * system call. diff --git a/usr/src/uts/intel/ia32/syscall/getcontext.c b/usr/src/uts/intel/ia32/syscall/getcontext.c index d5dfd5a9cd..38e908acaf 100644 --- a/usr/src/uts/intel/ia32/syscall/getcontext.c +++ b/usr/src/uts/intel/ia32/syscall/getcontext.c @@ -20,6 +20,9 @@ */ /* + * Copyright 2015 Joyent, Inc. + */ +/* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,6 +49,7 @@ #include <sys/schedctl.h> #include <sys/debug.h> #include <sys/sysmacros.h> +#include <sys/sdt.h> /* * Save user context. @@ -125,7 +129,23 @@ savecontext(ucontext_t *ucp, const k_sigset_t *mask) else ucp->uc_flags &= ~UC_FPU; - sigktou(mask, &ucp->uc_sigmask); + if (mask != NULL) { + /* + * Save signal mask. + */ + sigktou(mask, &ucp->uc_sigmask); + } else { + ucp->uc_flags &= ~UC_SIGMASK; + bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask)); + } + + if (PROC_IS_BRANDED(p) && BROP(p)->b_savecontext != NULL) { + /* + * Allow the brand the chance to modify the context we + * saved: + */ + BROP(p)->b_savecontext(ucp); + } } /* @@ -136,7 +156,19 @@ restorecontext(ucontext_t *ucp) { kthread_t *t = curthread; klwp_t *lwp = ttolwp(t); + proc_t *p = lwptoproc(lwp); + if (PROC_IS_BRANDED(p) && BROP(p)->b_restorecontext != NULL) { + /* + * Allow the brand the chance to modify the context before + * we restore it: + */ + BROP(p)->b_restorecontext(ucp); + } + + DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp, + uintptr_t, lwp->lwp_oldcontext, + uintptr_t, (uintptr_t)ucp->uc_link); lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link; if (ucp->uc_flags & UC_STACK) { @@ -184,6 +216,7 @@ getsetcontext(int flag, void *arg) ucontext_t *ucp; klwp_t *lwp = ttolwp(curthread); stack_t dummy_stk; + proc_t *p = lwptoproc(lwp); /* * In future releases, when the ucontext structure grows, @@ -228,6 +261,15 @@ getsetcontext(int flag, void *arg) return (set_errno(EFAULT)); } + /* + * If this is a branded process, copy in the brand-private + * data: + */ + if (PROC_IS_BRANDED(p) && copyin(&ucp->uc_brand_data, + &uc.uc_brand_data, sizeof (uc.uc_brand_data)) != 0) { + return (set_errno(EFAULT)); + } + restorecontext(&uc); if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0)) @@ -311,7 +353,23 @@ savecontext32(ucontext32_t *ucp, const k_sigset_t *mask) else ucp->uc_flags &= ~UC_FPU; - sigktou(mask, &ucp->uc_sigmask); + if (mask != NULL) { + /* + * Save signal mask. + */ + sigktou(mask, &ucp->uc_sigmask); + } else { + ucp->uc_flags &= ~UC_SIGMASK; + bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask)); + } + + if (PROC_IS_BRANDED(p) && BROP(p)->b_savecontext32 != NULL) { + /* + * Allow the brand the chance to modify the context we + * saved: + */ + BROP(p)->b_savecontext32(ucp); + } } int @@ -323,6 +381,7 @@ getsetcontext32(int flag, void *arg) klwp_t *lwp = ttolwp(curthread); caddr32_t ustack32; stack32_t dummy_stk32; + proc_t *p = lwptoproc(lwp); switch (flag) { default: @@ -354,6 +413,15 @@ getsetcontext32(int flag, void *arg) return (set_errno(EFAULT)); } + /* + * If this is a branded process, copy in the brand-private + * data: + */ + if (PROC_IS_BRANDED(p) && copyin(&ucp->uc_brand_data, + &uc.uc_brand_data, sizeof (uc.uc_brand_data)) != 0) { + return (set_errno(EFAULT)); + } + ucontext_32ton(&uc, &ucnat); restorecontext(&ucnat); diff --git a/usr/src/uts/intel/icmp/Makefile b/usr/src/uts/intel/icmp/Makefile index 4a88d2a885..f2a07516b8 100644 --- a/usr/src/uts/intel/icmp/Makefile +++ b/usr/src/uts/intel/icmp/Makefile @@ -64,6 +64,8 @@ ALL_TARGET = $(BINARY) $(SRC_CONFFILE) LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) +INC_PATH += -I$(UTSBASE)/common/io/bpf + # # depends on ip and sockfs # diff --git a/usr/src/uts/intel/igb/Makefile b/usr/src/uts/intel/igb/Makefile index da31ce8a0a..1f6864efbe 100644 --- a/usr/src/uts/intel/igb/Makefile +++ b/usr/src/uts/intel/igb/Makefile @@ -69,6 +69,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # Driver depends on MAC # LDFLAGS += -dy -N misc/mac +MAPFILES += ddi mac random # # Default build targets. @@ -94,4 +95,5 @@ install: $(INSTALL_DEPS) # # Include common targets. # +include $(UTSBASE)/Makefile.mapfile include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/inotify/Makefile b/usr/src/uts/intel/inotify/Makefile new file mode 100644 index 0000000000..80e7a80404 --- /dev/null +++ b/usr/src/uts/intel/inotify/Makefile @@ -0,0 +1,70 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = inotify +OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR +CERRWARN += -_gcc=-Wno-parentheses +LDFLAGS += -dy -Nfs/specfs + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c new file mode 100644 index 0000000000..11bddfa515 --- /dev/null +++ b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c @@ -0,0 +1,1015 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * AMD Family 17 Northbridge and Data Fabric Driver + * + * This driver attaches to the AMD Family 17h northbridge and data fabric bus. + * Each Zeppelin die ('processor node' in cpuid.c parlance) has its own + * northbridge and access to the data fabric bus. The northbridge and data + * fabric both provide access to various features such as: + * + * - The System Management Network (SMN) + * - Data Fabric via Fabric Indirect Config Access (FICAA) + * + * These are required to access things such as temperature sensors or memory + * controller configuration registers. + * + * In AMD Family 17h systems, the 'northbridge' is an ASIC that is part of the + * package that contains many I/O capabilities related to things like PCI + * express, etc. The 'data fabric' is the means by which different components + * both inside the socket and multiple sockets are connected together. Both the + * northbridge and the data fabric have dedicated PCI devices which the + * operating system can use to interact with them. + * + * ------------------------ + * Mapping Devices Together + * ------------------------ + * + * The operating system needs to expose things like temperature sensors and DRAM + * configuration registers in terms that are meaningful to the system such as + * logical CPUs, cores, etc. This driver attaches to the PCI IDs that represent + * the northbridge and data fabric; however, there are multiple PCI devices (one + * per die) that exist. This driver does manage to map all of these three things + * together; however, it requires some acrobatics. Unfortunately, there's no + * direct way to map a northbridge to its corresponding die. However, we can map + * a CPU die to a data fabric PCI device and a data fabric PCI device to a + * corresponding northbridge PCI device. + * + * In current Zen based products, there is a direct mapping between processor + * nodes and a data fabric PCI device. All of the devices are on PCI Bus 0 and + * start from Device 0x18. Device 0x18 maps to processor node 0, 0x19 to + * processor node 1, etc. This means that to map a logical CPU to a data fabric + * device, we take its processor node id, add it to 0x18 and find the PCI device + * that is on bus 0, device 0x18. As each data fabric device is attached based + * on its PCI ID, we add it to the global list, amd_nbdf_dfs that is in the + * amd_f17nbdf_t structure. + * + * The northbridge PCI device has a defined device and function, but the PCI bus + * that it's on can vary. Each die has its own series of PCI buses that are + * assigned to it and the northbridge PCI device is on the first of die-specific + * PCI bus for each die. This also means that the northbridge will not show up + * on PCI bus 0, which is the PCI bus that all of the data fabric devices are + * on. While conventionally the northbridge with the lowest PCI bus value + * would correspond to processor node zero, hardware does not guarantee that at + * all. Because we don't want to be at the mercy of firmware, we don't rely on + * this ordering, even though we have yet to find a system that deviates from + * this scheme. + * + * One of the registers in the data fabric device's function 0 + * (AMDF17_DF_CFG_ADDR_CTL), happens to have the first PCI bus that is + * associated with the processor node. This means, that we can map a data fabric + * device to a northbridge by finding the northbridge whose PCI bus matches the + * value in the corresponding data fabric's AMDF17_DF_CFG_ADDR_CTL. + * + * This means that we can map a northbridge to a data fabric device and a data + * fabric device to a die. Because these are 1:1 mappings, there is a transitive + * relationship and therefore we know which northbridge is associated with which + * processor die. This is summarized in the following image: + * + * +-------+ +----------------------------+ +--------------+ + * | Die 0 | ---> | Data Fabric PCI BDF 0/18/0 |-------> | Northbridge | + * +-------+ | AMDF17_DF_CFG_ADDR: bus 10 | | PCI 10/0/0 | + * ... +----------------------------+ +--------------+ + * +-------+ +------------------------------+ +--------------+ + * | Die n | ---> | Data Fabric PCI BDF 0/18+n/0 |-------> | Northbridge | + * +-------+ | AMDF17_DF_CFG_ADDR: bus 133 | | PCI 133/0/0 | + * +------------------------------+ +--------------+ + * + * Note, the PCI buses used by the northbridges here are arbitrary. They do not + * reflect the actual values by hardware; however, the bus/device/function (BDF) + * of the data fabric accurately models hardware. All of the BDF values are in + * hex. + * + * ------------------------------- + * Attach and Detach Complications + * ------------------------------- + * + * Because we need to map different PCI devices together, this means that we + * have multiple dev_info_t structures that we need to manage. Each of these is + * independently attached and detached. While this is easily managed for attach, + * it is not for detach. + * + * Once a device has been detached it will only come back if we have an active + * minor node that will be accessed. While we have minor nodes associated with + * the northbridges, we don't with the data fabric devices. This means that if + * they are detached, nothing would ever cause them to be reattached. The system + * also doesn't provide us a way or any guarantees around making sure that we're + * attached to all such devices before we detach. As a result, unfortunately, + * it's easier to basically have detach always fail. + * + * To deal with both development and if issues arise in the field, there is a + * knob, amdf17df_allow_detach, which if set to a non-zero value, will allow + * instances to detach. + * + * --------------- + * Exposed Devices + * --------------- + * + * Currently we expose a single set of character devices which represent + * temperature sensors for this family of processors. Because temperature + * sensors exist on a per-processor node basis, we create a single minor node + * for each one. Because our naming matches the cpuid naming, FMA can match that + * up to logical CPUs and take care of matching the sensors appropriately. We + * internally rate limit the sensor updates to 100ms, which is controlled by the + * global amdf17nbdf_cache_ms. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/cred.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/cmn_err.h> +#include <sys/list.h> +#include <sys/pci.h> +#include <sys/stddef.h> +#include <sys/stat.h> +#include <sys/x86_archext.h> +#include <sys/cpuvar.h> +#include <sys/sensors.h> + +/* + * The range of minors that we'll allow. + */ +#define AMDF17_MINOR_LOW 1 +#define AMDF17_MINOR_HIGH INT32_MAX + +/* + * This is the value of the first PCI data fabric device that globally exists. + * It always maps to AMD's first nodeid (what we call cpi_procnodeid). + */ +#define AMDF17_DF_FIRST_DEVICE 0x18 + +/* + * The data fabric devices are defined to always be on PCI bus zero. + */ +#define AMDF17_DF_BUSNO 0x00 + +/* + * This register contains the BUS A of the the processor node that corresponds + * to the data fabric device. + */ +#define AMDF17_DF_CFG_ADDR_CTL 0x84 +#define AMDF17_DF_CFG_ADDR_CTL_MASK 0xff + +/* + * Northbridge registers that are related to accessing the SMN. One writes to + * the SMN address register and then can read from the SMN data register. + */ +#define AMDF17_NB_SMN_ADDR 0x60 +#define AMDF17_NB_SMN_DATA 0x64 + +/* + * The following are register offsets and the meaning of their bits related to + * temperature. These addresses are addresses in the System Management Network + * which is accessed through the northbridge. They are not addresses in PCI + * configuration space. + */ +#define AMDF17_SMU_THERMAL_CURTEMP 0x00059800 +#define AMDF17_SMU_THERMAL_CURTEMP_TEMPERATURE(x) ((x) >> 21) +#define AMDF17_SMU_THERMAL_CURTEMP_RANGE_SEL (1 << 19) + +#define AMDF17_SMU_THERMAL_CURTEMP_RANGE_ADJ (-49) +#define AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS 3 +#define AMDF17_SMU_THERMAL_CURTEMP_BITS_MASK 0x7 + +/* + * The temperature sensor in family 17 is measured in terms of 0.125 C steps. + */ +#define AMDF17_THERMAL_GRANULARITY 8 + +struct amdf17nb; +struct amdf17df; + +typedef struct amdf17nb { + list_node_t amd_nb_link; + dev_info_t *amd_nb_dip; + ddi_acc_handle_t amd_nb_cfgspace; + uint_t amd_nb_bus; + uint_t amd_nb_dev; + uint_t amd_nb_func; + struct amdf17df *amd_nb_df; + uint_t amd_nb_procnodeid; + id_t amd_nb_temp_minor; + hrtime_t amd_nb_temp_last_read; + int amd_nb_temp_off; + uint32_t amd_nb_temp_reg; + /* Values derived from the above */ + int64_t amd_nb_temp; +} amdf17nb_t; + +typedef struct amdf17df { + list_node_t amd_df_link; + dev_info_t *amd_df_f0_dip; + ddi_acc_handle_t amd_df_f0_cfgspace; + uint_t amd_df_procnodeid; + uint_t amd_df_iobus; + amdf17nb_t *amd_df_nb; +} amdf17df_t; + +typedef struct amdf17nbdf { + kmutex_t amd_nbdf_lock; + id_space_t *amd_nbdf_minors; + list_t amd_nbdf_nbs; + list_t amd_nbdf_dfs; +} amdf17nbdf_t; + +typedef enum { + AMD_NBDF_TYPE_UNKNOWN, + AMD_NBDF_TYPE_NORTHBRIDGE, + AMD_NBDF_TYPE_DATA_FABRIC +} amdf17nbdf_type_t; + +typedef struct { + uint16_t amd_nbdft_pci_did; + amdf17nbdf_type_t amd_nbdft_type; +} amdf17nbdf_table_t; + +static const amdf17nbdf_table_t amdf17nbdf_dev_map[] = { + /* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */ + { 0x1450, AMD_NBDF_TYPE_NORTHBRIDGE }, + { 0x1460, AMD_NBDF_TYPE_DATA_FABRIC }, + { PCI_EINVAL16 } +}; + +typedef struct { + const char *amd_nbdfo_brand; + uint_t amd_nbdfo_family; + int amd_nbdfo_off; +} amdf17nbdf_offset_t; + +/* + * AMD processors report a control temperature (called Tctl) which may be + * different from the junction temperature, which is the value that is actually + * measured from the die (sometimes called Tdie or Tjct). This is done so that + * socket-based environmental monitoring can be consistent from a platform + * perspective, but doesn't help us. Unfortunately, these values aren't in + * datasheets that we can find, but have been documented partially in a series + * of blog posts by AMD when discussing their 'Ryzen Master' monitoring software + * for Windows. + * + * The brand strings below may contain partial matches such in the Threadripper + * cases so we can match the entire family of processors. The offset value is + * the quantity in degrees that we should adjust Tctl to reach Tdie. + */ +static const amdf17nbdf_offset_t amdf17nbdf_offsets[] = { + { "AMD Ryzen 5 1600X", 0x17, -20 }, + { "AMD Ryzen 7 1700X", 0x17, -20 }, + { "AMD Ryzen 7 1800X", 0x17, -20 }, + { "AMD Ryzen 7 2700X", 0x17, -10 }, + { "AMD Ryzen Threadripper 19", 0x17, -27 }, + { "AMD Ryzen Threadripper 29", 0x17, -27 }, + { NULL } +}; + +/* + * This indicates a number of milliseconds that we should wait between reads. + * This is somewhat arbitrary, but the goal is to reduce cross call activity + * and reflect that the sensor may not update all the time. + */ +uint_t amdf17nbdf_cache_ms = 100; + +/* + * This indicates whether detach is allowed. It is not by default. See the + * theory statement section 'Attach and Detach Complications' for more + * information. + */ +uint_t amdf17nbdf_allow_detach = 0; + +/* + * Global data that we keep regarding the device. + */ +amdf17nbdf_t *amdf17nbdf; + +static amdf17nb_t * +amdf17nbdf_lookup_nb(amdf17nbdf_t *nbdf, minor_t minor) +{ + ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + + if (minor < AMDF17_MINOR_LOW || minor > AMDF17_MINOR_HIGH) { + return (NULL); + } + + for (amdf17nb_t *nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL; + nb = list_next(&nbdf->amd_nbdf_nbs, nb)) { + if ((id_t)minor == nb->amd_nb_temp_minor) { + return (nb); + } + } + + return (NULL); +} + +static void +amdf17nbdf_cleanup_nb(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ + if (nb == NULL) + return; + + ddi_remove_minor_node(nb->amd_nb_dip, NULL); + if (nb->amd_nb_temp_minor > 0) { + id_free(nbdf->amd_nbdf_minors, nb->amd_nb_temp_minor); + } + if (nb->amd_nb_cfgspace != NULL) { + pci_config_teardown(&nb->amd_nb_cfgspace); + } + kmem_free(nb, sizeof (amdf17nb_t)); +} + +static void +amdf17nbdf_cleanup_df(amdf17df_t *df) +{ + if (df == NULL) + return; + + if (df->amd_df_f0_cfgspace != NULL) { + pci_config_teardown(&df->amd_df_f0_cfgspace); + } + kmem_free(df, sizeof (amdf17df_t)); +} + +static int +amdf17nbdf_smn_read(amdf17nbdf_t *nbdf, amdf17nb_t *nb, uint32_t addr, + uint32_t *valp) +{ + VERIFY(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + + pci_config_put32(nb->amd_nb_cfgspace, AMDF17_NB_SMN_ADDR, addr); + *valp = pci_config_get32(nb->amd_nb_cfgspace, AMDF17_NB_SMN_DATA); + + return (0); +} + +static int +amdf17nbdf_temp_read(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ + int ret; + uint32_t reg, rawtemp, decimal; + + ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + + /* + * Update the last read time first. Even if this fails, we want to make + * sure that we latch the fact that we tried. + */ + nb->amd_nb_temp_last_read = gethrtime(); + if ((ret = amdf17nbdf_smn_read(nbdf, nb, AMDF17_SMU_THERMAL_CURTEMP, + ®)) != 0) { + return (ret); + } + + nb->amd_nb_temp_reg = reg; + + /* + * Take the primary temperature value and break apart its decimal value + * from its main value. + */ + rawtemp = AMDF17_SMU_THERMAL_CURTEMP_TEMPERATURE(reg); + decimal = rawtemp & AMDF17_SMU_THERMAL_CURTEMP_BITS_MASK; + rawtemp = rawtemp >> AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS; + + if ((reg & AMDF17_SMU_THERMAL_CURTEMP_RANGE_SEL) != 0) { + rawtemp += AMDF17_SMU_THERMAL_CURTEMP_RANGE_ADJ; + } + rawtemp += nb->amd_nb_temp_off; + nb->amd_nb_temp = rawtemp << AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS; + nb->amd_nb_temp += decimal; + + return (0); +} + +static int +amdf17nbdf_temp_init(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ + uint_t i, family; + char buf[256]; + + if (cpuid_getbrandstr(CPU, buf, sizeof (buf)) >= sizeof (buf)) { + dev_err(nb->amd_nb_dip, CE_WARN, "!failed to read processor " + "brand string, brand larger than internal buffer"); + return (EOVERFLOW); + } + + family = cpuid_getfamily(CPU); + + for (i = 0; amdf17nbdf_offsets[i].amd_nbdfo_brand != NULL; i++) { + if (family != amdf17nbdf_offsets[i].amd_nbdfo_family) + continue; + if (strncmp(buf, amdf17nbdf_offsets[i].amd_nbdfo_brand, + strlen(amdf17nbdf_offsets[i].amd_nbdfo_brand)) == 0) { + nb->amd_nb_temp_off = + amdf17nbdf_offsets[i].amd_nbdfo_off; + break; + } + } + + return (amdf17nbdf_temp_read(nbdf, nb)); +} + +static amdf17nbdf_type_t +amdf17nbdf_dip_type(uint16_t dev) +{ + uint_t i; + const amdf17nbdf_table_t *tp = amdf17nbdf_dev_map; + + for (i = 0; tp[i].amd_nbdft_pci_did != PCI_EINVAL16; i++) { + if (tp[i].amd_nbdft_pci_did == dev) { + return (tp[i].amd_nbdft_type); + } + } + + return (AMD_NBDF_TYPE_UNKNOWN); +} + +static boolean_t +amdf17nbdf_map(amdf17nbdf_t *nbdf, amdf17nb_t *nb, amdf17df_t *df) +{ + int ret; + char buf[128]; + + ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + + /* + * This means that we encountered a duplicate. We're going to stop + * processing, but we're not going to fail its attach at this point. + */ + if (nb->amd_nb_df != NULL) { + dev_err(nb->amd_nb_dip, CE_WARN, "!trying to map NB %u/%u/%u " + "to DF procnode %u, but NB is already mapped to DF " + "procnode %u!", + nb->amd_nb_bus, nb->amd_nb_dev, nb->amd_nb_func, + df->amd_df_procnodeid, nb->amd_nb_df->amd_df_procnodeid); + return (B_TRUE); + } + + /* + * Now that we have found a mapping, initialize our temperature + * information and create the minor node. + */ + nb->amd_nb_procnodeid = df->amd_df_procnodeid; + nb->amd_nb_temp_minor = id_alloc(nbdf->amd_nbdf_minors); + + if ((ret = amdf17nbdf_temp_init(nbdf, nb)) != 0) { + dev_err(nb->amd_nb_dip, CE_WARN, "!failed to init SMN " + "temperature data on node %u: %d", nb->amd_nb_procnodeid, + ret); + return (B_FALSE); + } + + if (snprintf(buf, sizeof (buf), "procnode.%u", nb->amd_nb_procnodeid) >= + sizeof (buf)) { + dev_err(nb->amd_nb_dip, CE_WARN, "!unexpected buffer name " + "overrun assembling temperature minor %u", + nb->amd_nb_procnodeid); + return (B_FALSE); + } + + if (ddi_create_minor_node(nb->amd_nb_dip, buf, S_IFCHR, + nb->amd_nb_temp_minor, DDI_NT_SENSOR_TEMP_CPU, 0) != DDI_SUCCESS) { + dev_err(nb->amd_nb_dip, CE_WARN, "!failed to create minor node " + "%s", buf); + return (B_FALSE); + } + + /* + * Now that's it's all done, note that they're mapped to each other. + */ + nb->amd_nb_df = df; + df->amd_df_nb = nb; + + return (B_TRUE); +} + +static boolean_t +amdf17nbdf_add_nb(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ + amdf17df_t *df; + boolean_t ret = B_TRUE; + + mutex_enter(&nbdf->amd_nbdf_lock); + list_insert_tail(&nbdf->amd_nbdf_nbs, nb); + for (df = list_head(&nbdf->amd_nbdf_dfs); df != NULL; + df = list_next(&nbdf->amd_nbdf_dfs, df)) { + if (nb->amd_nb_bus == df->amd_df_iobus) { + ret = amdf17nbdf_map(nbdf, nb, df); + break; + } + } + mutex_exit(&nbdf->amd_nbdf_lock); + + return (ret); +} + +static boolean_t +amdf17nbdf_add_df(amdf17nbdf_t *nbdf, amdf17df_t *df) +{ + amdf17nb_t *nb; + boolean_t ret = B_TRUE; + + mutex_enter(&nbdf->amd_nbdf_lock); + list_insert_tail(&nbdf->amd_nbdf_dfs, df); + for (nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL; + nb = list_next(&nbdf->amd_nbdf_nbs, nb)) { + if (nb->amd_nb_bus == df->amd_df_iobus) { + ret = amdf17nbdf_map(nbdf, nb, df); + } + } + mutex_exit(&nbdf->amd_nbdf_lock); + + return (ret); +} + +static boolean_t +amdf17nbdf_attach_nb(amdf17nbdf_t *nbdf, dev_info_t *dip, ddi_acc_handle_t hdl, + uint_t bus, uint_t dev, uint_t func) +{ + amdf17nb_t *nb; + + nb = kmem_zalloc(sizeof (amdf17nb_t), KM_SLEEP); + nb->amd_nb_dip = dip; + nb->amd_nb_cfgspace = hdl; + nb->amd_nb_bus = bus; + nb->amd_nb_dev = dev; + nb->amd_nb_func = func; + /* + * Set this to a value we won't get from the processor. + */ + nb->amd_nb_procnodeid = UINT_MAX; + + if (!amdf17nbdf_add_nb(nbdf, nb)) { + amdf17nbdf_cleanup_nb(nbdf, nb); + return (B_FALSE); + } + + return (B_TRUE); +} + +static boolean_t +amdf17nbdf_attach_df(amdf17nbdf_t *nbdf, dev_info_t *dip, ddi_acc_handle_t hdl, + uint_t bus, uint_t dev, uint_t func) +{ + amdf17df_t *df; + + if (bus != AMDF17_DF_BUSNO) { + dev_err(dip, CE_WARN, "!encountered data fabric device with " + "unexpected PCI bus assignment, found 0x%x, expected 0x%x", + bus, AMDF17_DF_BUSNO); + return (B_FALSE); + } + + if (dev < AMDF17_DF_FIRST_DEVICE) { + dev_err(dip, CE_WARN, "!encountered data fabric device with " + "PCI device assignment below the first minimum device " + "(0x%x): 0x%x", AMDF17_DF_FIRST_DEVICE, dev); + return (B_FALSE); + } + + /* + * At the moment we only care about function 0. However, we may care + * about Function 4 in the future which has access to the FICAA. + * However, only function zero should ever be attached, so this is just + * an extra precaution. + */ + if (func != 0) { + dev_err(dip, CE_WARN, "!encountered data fabric device with " + "unxpected PCI function assignment, found 0x%x, expected " + "0x0", func); + return (B_FALSE); + } + + df = kmem_zalloc(sizeof (amdf17df_t), KM_SLEEP); + df->amd_df_f0_dip = dip; + df->amd_df_f0_cfgspace = hdl; + df->amd_df_procnodeid = dev - AMDF17_DF_FIRST_DEVICE; + df->amd_df_iobus = pci_config_get32(hdl, AMDF17_DF_CFG_ADDR_CTL) & + AMDF17_DF_CFG_ADDR_CTL_MASK; + + if (!amdf17nbdf_add_df(nbdf, df)) { + amdf17nbdf_cleanup_df(df); + return (B_FALSE); + } + + return (B_TRUE); +} + +static int +amdf17nbdf_open(dev_t *devp, int flags, int otype, cred_t *credp) +{ + amdf17nbdf_t *nbdf = amdf17nbdf; + minor_t m; + + if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) { + return (EPERM); + } + + if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) { + return (EINVAL); + } + + if (otype != OTYP_CHR) { + return (EINVAL); + } + + m = getminor(*devp); + + /* + * Sanity check the minor + */ + mutex_enter(&nbdf->amd_nbdf_lock); + if (amdf17nbdf_lookup_nb(nbdf, m) == NULL) { + mutex_exit(&nbdf->amd_nbdf_lock); + return (ENXIO); + } + mutex_exit(&nbdf->amd_nbdf_lock); + + return (0); +} + +static int +amdf17nbdf_ioctl_kind(intptr_t arg, int mode) +{ + sensor_ioctl_kind_t kind; + + bzero(&kind, sizeof (sensor_ioctl_kind_t)); + kind.sik_kind = SENSOR_KIND_TEMPERATURE; + + if (ddi_copyout((void *)&kind, (void *)arg, + sizeof (sensor_ioctl_kind_t), mode & FKIOCTL) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode) +{ + amdf17nb_t *nb; + hrtime_t diff; + sensor_ioctl_temperature_t temp; + + bzero(&temp, sizeof (temp)); + + mutex_enter(&nbdf->amd_nbdf_lock); + nb = amdf17nbdf_lookup_nb(nbdf, minor); + if (nb == NULL) { + mutex_exit(&nbdf->amd_nbdf_lock); + return (ENXIO); + } + + diff = NSEC2MSEC(gethrtime() - nb->amd_nb_temp_last_read); + if (diff > 0 && diff > (hrtime_t)amdf17nbdf_cache_ms) { + int ret; + + ret = amdf17nbdf_temp_read(nbdf, nb); + if (ret != 0) { + mutex_exit(&nbdf->amd_nbdf_lock); + return (ret); + } + } + + temp.sit_unit = SENSOR_UNIT_CELSIUS; + temp.sit_temp = nb->amd_nb_temp; + temp.sit_gran = AMDF17_THERMAL_GRANULARITY; + mutex_exit(&nbdf->amd_nbdf_lock); + + if (ddi_copyout(&temp, (void *)arg, sizeof (temp), + mode & FKIOCTL) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +amdf17nbdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + minor_t m; + amdf17nbdf_t *nbdf = amdf17nbdf; + + if ((mode & FREAD) == 0) { + return (EINVAL); + } + + m = getminor(dev); + + switch (cmd) { + case SENSOR_IOCTL_TYPE: + return (amdf17nbdf_ioctl_kind(arg, mode)); + case SENSOR_IOCTL_TEMPERATURE: + return (amdf17nbdf_ioctl_temp(nbdf, m, arg, mode)); + default: + return (ENOTTY); + } +} + +/* + * We don't really do any state tracking on close, so for now, just allow it to + * always succeed. + */ +static int +amdf17nbdf_close(dev_t dev, int flags, int otype, cred_t *credp) +{ + return (0); +} + +static int +amdf17nbdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + uint_t nregs; + int *regs; + uint_t bus, dev, func; + uint16_t pci_did; + ddi_acc_handle_t pci_hdl; + amdf17nbdf_type_t type; + amdf17nbdf_t *nbdf = amdf17nbdf; + + if (cmd == DDI_RESUME) + return (DDI_SUCCESS); + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, 0, "reg", + ®s, &nregs) != DDI_PROP_SUCCESS) { + dev_err(dip, CE_WARN, "!failed to find pci 'reg' property"); + return (DDI_FAILURE); + } + + if (nregs < 1) { + ddi_prop_free(regs); + return (DDI_FAILURE); + } + + bus = PCI_REG_BUS_G(regs[0]); + dev = PCI_REG_DEV_G(regs[0]); + func = PCI_REG_FUNC_G(regs[0]); + + ddi_prop_free(regs); + + if (pci_config_setup(dip, &pci_hdl) != DDI_SUCCESS) { + dev_err(dip, CE_WARN, "!failed to map pci devices"); + return (DDI_FAILURE); + } + + pci_did = pci_config_get16(pci_hdl, PCI_CONF_DEVID); + + type = amdf17nbdf_dip_type(pci_did); + switch (type) { + case AMD_NBDF_TYPE_NORTHBRIDGE: + if (!amdf17nbdf_attach_nb(nbdf, dip, pci_hdl, bus, dev, func)) { + return (DDI_FAILURE); + } + break; + case AMD_NBDF_TYPE_DATA_FABRIC: + if (!amdf17nbdf_attach_df(nbdf, dip, pci_hdl, bus, dev, func)) { + return (DDI_FAILURE); + } + break; + default: + pci_config_teardown(&pci_hdl); + return (DDI_FAILURE); + } + + return (DDI_SUCCESS); +} + +/* + * Unfortunately, it's hard for us to really support detach here. The problem is + * that we need both the data fabric devices and the northbridges to make sure + * that we map everything. However, only the northbridges actually create minor + * nodes that'll be opened and thus trigger them to reattach when accessed. What + * we should probably look at doing in the future is making this into a nexus + * driver that enumerates children like a temperature driver. + */ +static int +amdf17nbdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + amdf17nbdf_t *nbdf = amdf17nbdf; + + if (cmd == DDI_SUSPEND) + return (DDI_SUCCESS); + + if (nbdf == NULL) { + return (DDI_FAILURE); + } + + if (amdf17nbdf_allow_detach == 0) { + return (DDI_FAILURE); + } + + mutex_enter(&nbdf->amd_nbdf_lock); + for (amdf17nb_t *nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL; + nb = list_next(&nbdf->amd_nbdf_nbs, nb)) { + if (dip == nb->amd_nb_dip) { + list_remove(&nbdf->amd_nbdf_nbs, nb); + if (nb->amd_nb_df != NULL) { + ASSERT3P(nb->amd_nb_df->amd_df_nb, ==, nb); + nb->amd_nb_df->amd_df_nb = NULL; + } + amdf17nbdf_cleanup_nb(nbdf, nb); + mutex_exit(&nbdf->amd_nbdf_lock); + return (DDI_SUCCESS); + } + } + + for (amdf17df_t *df = list_head(&nbdf->amd_nbdf_dfs); df != NULL; + df = list_next(&nbdf->amd_nbdf_nbs, df)) { + if (dip == df->amd_df_f0_dip) { + list_remove(&nbdf->amd_nbdf_dfs, df); + if (df->amd_df_nb != NULL) { + ASSERT3P(df->amd_df_nb->amd_nb_df, ==, df); + df->amd_df_nb->amd_nb_df = NULL; + } + amdf17nbdf_cleanup_df(df); + mutex_exit(&nbdf->amd_nbdf_lock); + return (DDI_SUCCESS); + } + } + mutex_exit(&nbdf->amd_nbdf_lock); + + return (DDI_FAILURE); +} + +static int +amdf17nbdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, + void **resultp) +{ + dev_t dev; + minor_t minor; + amdf17nbdf_t *nbdf; + amdf17nb_t *nb; + + switch (cmd) { + case DDI_INFO_DEVT2DEVINFO: + case DDI_INFO_DEVT2INSTANCE: + break; + default: + return (DDI_FAILURE); + } + + dev = (dev_t)arg; + minor = getminor(dev); + nbdf = amdf17nbdf; + + mutex_enter(&nbdf->amd_nbdf_lock); + nb = amdf17nbdf_lookup_nb(nbdf, (id_t)minor); + if (nb == NULL) { + mutex_exit(&nbdf->amd_nbdf_lock); + return (DDI_FAILURE); + } + if (cmd == DDI_INFO_DEVT2DEVINFO) { + *resultp = nb->amd_nb_dip; + } else { + int inst = ddi_get_instance(nb->amd_nb_dip); + *resultp = (void *)(uintptr_t)inst; + } + mutex_exit(&nbdf->amd_nbdf_lock); + + return (DDI_SUCCESS); +} + +static void +amdf17nbdf_destroy(amdf17nbdf_t *nbdf) +{ + amdf17nb_t *nb; + amdf17df_t *df; + + while ((nb = list_remove_head(&nbdf->amd_nbdf_nbs)) != NULL) { + amdf17nbdf_cleanup_nb(nbdf, nb); + } + list_destroy(&nbdf->amd_nbdf_nbs); + + while ((df = list_remove_head(&nbdf->amd_nbdf_dfs)) != NULL) { + amdf17nbdf_cleanup_df(df); + } + list_destroy(&nbdf->amd_nbdf_dfs); + + if (nbdf->amd_nbdf_minors != NULL) { + id_space_destroy(nbdf->amd_nbdf_minors); + } + + mutex_destroy(&nbdf->amd_nbdf_lock); + kmem_free(nbdf, sizeof (amdf17nbdf_t)); +} + +static amdf17nbdf_t * +amdf17nbdf_create(void) +{ + amdf17nbdf_t *nbdf; + + nbdf = kmem_zalloc(sizeof (amdf17nbdf_t), KM_SLEEP); + mutex_init(&nbdf->amd_nbdf_lock, NULL, MUTEX_DRIVER, NULL); + list_create(&nbdf->amd_nbdf_nbs, sizeof (amdf17nb_t), + offsetof(amdf17nb_t, amd_nb_link)); + list_create(&nbdf->amd_nbdf_dfs, sizeof (amdf17df_t), + offsetof(amdf17df_t, amd_df_link)); + if ((nbdf->amd_nbdf_minors = id_space_create("amdf17nbdf_minors", + AMDF17_MINOR_LOW, AMDF17_MINOR_HIGH)) == NULL) { + amdf17nbdf_destroy(nbdf); + return (NULL); + } + + return (nbdf); +} + +static struct cb_ops amdf17nbdf_cb_ops = { + .cb_open = amdf17nbdf_open, + .cb_close = amdf17nbdf_close, + .cb_strategy = nodev, + .cb_print = nodev, + .cb_dump = nodev, + .cb_read = nodev, + .cb_write = nodev, + .cb_ioctl = amdf17nbdf_ioctl, + .cb_devmap = nodev, + .cb_mmap = nodev, + .cb_segmap = nodev, + .cb_chpoll = nochpoll, + .cb_prop_op = ddi_prop_op, + .cb_flag = D_MP, + .cb_rev = CB_REV, + .cb_aread = nodev, + .cb_awrite = nodev +}; + +static struct dev_ops amdf17nbdf_dev_ops = { + .devo_rev = DEVO_REV, + .devo_refcnt = 0, + .devo_getinfo = amdf17nbdf_getinfo, + .devo_identify = nulldev, + .devo_probe = nulldev, + .devo_attach = amdf17nbdf_attach, + .devo_detach = amdf17nbdf_detach, + .devo_reset = nodev, + .devo_power = ddi_power, + .devo_quiesce = ddi_quiesce_not_needed, + .devo_cb_ops = &amdf17nbdf_cb_ops +}; + +static struct modldrv amdf17nbdf_modldrv = { + .drv_modops = &mod_driverops, + .drv_linkinfo = "AMD Family 17h Driver", + .drv_dev_ops = &amdf17nbdf_dev_ops +}; + +static struct modlinkage amdf17nbdf_modlinkage = { + .ml_rev = MODREV_1, + .ml_linkage = { &amdf17nbdf_modldrv, NULL } +}; + +int +_init(void) +{ + int ret; + amdf17nbdf_t *nbdf; + + if ((nbdf = amdf17nbdf_create()) == NULL) { + return (ENOMEM); + } + + if ((ret = mod_install(&amdf17nbdf_modlinkage)) != 0) { + amdf17nbdf_destroy(amdf17nbdf); + return (ret); + } + + amdf17nbdf = nbdf; + return (ret); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&amdf17nbdf_modlinkage, modinfop)); +} + +int +_fini(void) +{ + int ret; + + if ((ret = mod_remove(&amdf17nbdf_modlinkage)) != 0) { + return (ret); + } + + amdf17nbdf_destroy(amdf17nbdf); + amdf17nbdf = NULL; + return (ret); +} diff --git a/usr/src/uts/intel/io/coretemp/coretemp.c b/usr/src/uts/intel/io/coretemp/coretemp.c new file mode 100644 index 0000000000..e21d385991 --- /dev/null +++ b/usr/src/uts/intel/io/coretemp/coretemp.c @@ -0,0 +1,784 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * Intel CPU Thermal sensor driver + * + * These MSRs that were used were introduced with the 'Core' family processors + * and have since spread beyond there, even to the Atom line. Currently, + * temperature sensors exist on a per-core basis and optionally on a per-package + * basis. The temperature sensor exposes a reading that's relative to the + * processor's maximum junction temperature, often referred to as Tj. We + * currently only support models where we can determine that junction + * temperature programatically. For older processors, we would need to track + * down the datasheet. Unfortunately, the values here are often on a per-brand + * string basis. As in two CPUs with the same model and stepping, but have + * binned differently have different temperatures. + * + * The temperature is exposed through /dev and uses a semi-standard sensor + * framework. We expose one minor node per CPU core and one minor node per CPU + * package, if that is supported. Reads are rate-limited in the driver at 100ms + * by default per the global variable coretemp_cache_ms. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/list.h> +#include <sys/stddef.h> +#include <sys/cmn_err.h> +#include <sys/id_space.h> +#include <sys/x86_archext.h> +#include <sys/cpu_module.h> +#include <sys/ontrap.h> +#include <sys/cpuvar.h> +#include <sys/x_call.h> +#include <sys/sensors.h> + +#define CORETEMP_MINOR_MIN 1 +#define CORETEMP_MINOR_MAX INT32_MAX + +typedef struct coretemp_core { + list_node_t ctc_link; + id_t ctc_core_minor; + id_t ctc_pkg_minor; + enum cmi_hdl_class ctc_class; + uint_t ctc_chip; + uint_t ctc_core; + uint_t ctc_strand; + uint_t ctc_tjmax; + hrtime_t ctc_last_read; + uint64_t ctc_core_status; + uint64_t ctc_core_intr; + uint64_t ctc_pkg_status; + uint64_t ctc_pkg_intr; + uint64_t ctc_invalid_reads; + /* The following fields are derived from above */ + uint_t ctc_temperature; + uint_t ctc_resolution; + uint_t ctc_pkg_temperature; +} coretemp_core_t; + +typedef struct coretemp { + dev_info_t *coretemp_dip; + id_space_t *coretemp_ids; + cpuset_t *coretemp_cpuset; + boolean_t coretemp_pkg; + kmutex_t coretemp_mutex; + list_t coretemp_cores; +} coretemp_t; + +coretemp_t *coretemp; + +/* + * This indicates a number of milliseconds that we should wait between reads. + * This is somewhat arbitrary, but the goal is to reduce cross call activity + * and reflect that the sensor may not update all the time. + */ +uint_t coretemp_cache_ms = 100; + +static int +coretemp_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) +{ + uint_t msr = (uint_t)arg1; + uint64_t *valp = (uint64_t *)arg2; + cmi_errno_t *errp = (cmi_errno_t *)arg3; + + on_trap_data_t otd; + + if (on_trap(&otd, OT_DATA_ACCESS) == 0) { + if (checked_rdmsr(msr, valp) == 0) { + *errp = CMI_SUCCESS; + } else { + *errp = CMIERR_NOTSUP; + } + } else { + *errp = CMIERR_MSRGPF; + } + no_trap(); + + return (0); +} + +/* + * This really should just be a call to the CMI handle to provide us the MSR. + * However, that routine, cmi_hdl_rdmsr(), cannot be safely used until it is + * fixed for use outside of a panic-like context. + */ +static int +coretemp_rdmsr(coretemp_t *ct, cmi_hdl_t hdl, uint_t msr, uint64_t *valp) +{ + id_t cpu = cmi_hdl_logical_id(hdl); + int ret = CMI_SUCCESS; + + ASSERT(MUTEX_HELD(&ct->coretemp_mutex)); + kpreempt_disable(); + if (CPU->cpu_id == cpu) { + (void) coretemp_rdmsr_xc((xc_arg_t)msr, (xc_arg_t)valp, + (xc_arg_t)&ret); + } else { + cpuset_only(ct->coretemp_cpuset, (uint_t)cpu); + xc_call((xc_arg_t)msr, (xc_arg_t)valp, (xc_arg_t)&ret, + (ulong_t *)ct->coretemp_cpuset, coretemp_rdmsr_xc); + } + kpreempt_enable(); + + return (ret); +} + +static int +coretemp_cmi_errno(cmi_errno_t e) +{ + switch (e) { + case CMIERR_NOTSUP: + return (ENOTSUP); + default: + return (EIO); + } +} + +/* + * Answer the question of whether or not the driver can support the CPU in + * question. Right now we have the following constraints for supporting the CPU: + * + * o The CPU is made by Intel + * o The CPU has the Digital Thermal Sensor + * o The CPU family is 6, which is usually implicit from the above + * o We can determine its junction temperature through an MSR + * + * If we can't determine the junction temperature programatically, then we need + * to set up tables of CPUs to do so. This can be fleshed out and improved. + */ +static boolean_t +coretemp_supported(void) +{ + uint_t model; + + if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) { + return (B_FALSE); + } + + if (!is_x86_feature(x86_featureset, X86FSET_CORE_THERMAL)) { + return (B_FALSE); + } + + if (cpuid_getfamily(CPU) != 6) { + return (B_FALSE); + } + + model = cpuid_getmodel(CPU); + if (model <= INTC_MODEL_PENRYN || model == INTC_MODEL_SILVERTHORNE || + model == INTC_MODEL_LINCROFT || model == INTC_MODEL_PENWELL || + model == INTC_MODEL_CLOVERVIEW || model == INTC_MODEL_CEDARVIEW) { + return (B_FALSE); + } + + return (B_TRUE); +} + +static coretemp_core_t * +coretemp_lookup_core(coretemp_t *ct, minor_t minor) +{ + coretemp_core_t *ctc; + + ASSERT(MUTEX_HELD(&ct->coretemp_mutex)); + + if (minor < CORETEMP_MINOR_MIN || minor > CORETEMP_MINOR_MAX) { + return (NULL); + } + + for (ctc = list_head(&ct->coretemp_cores); ctc != NULL; + ctc = list_next(&ct->coretemp_cores, ctc)) { + if (ctc->ctc_core_minor == (id_t)minor || + (ctc->ctc_pkg_minor >= CORETEMP_MINOR_MIN && + ctc->ctc_pkg_minor == (id_t)minor)) { + return (ctc); + } + } + + return (NULL); +} + + +/* + * We need to determine the value of Tj Max as all temperature sensors are + * derived from this value. The ease of this depends on how old the processor in + * question is. The Core family processors after Penryn have support for an MSR + * that tells us what to go for. In the Atom family, processors starting with + * Silvermont have support for an MSR that documents this value. For older + * processors, one needs to track down the datasheet for a specific processor. + * Two processors in the same family/model may have different values of Tj Max. + * At the moment, we only support this on processors that have that MSR. + */ +static int +coretemp_calculate_tjmax(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl) +{ + cmi_errno_t e; + int err = 0; + uint64_t val = 0; + + e = coretemp_rdmsr(ct, hdl, MSR_TEMPERATURE_TARGET, &val); + if (e == CMI_SUCCESS && val != 0) { + ctc->ctc_tjmax = MSR_TEMPERATURE_TARGET_TARGET(val); + } else if (val == 0) { + err = EINVAL; + } else { + err = coretemp_cmi_errno(e); + } + + return (err); +} + +static int +coretemp_read(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl) +{ + cmi_errno_t e; + int err = 0; + uint64_t val = 0; + + ctc->ctc_last_read = gethrtime(); + + e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_STATUS, &val); + if (e == CMI_SUCCESS) { + ctc->ctc_core_status = val; + } else { + err = coretemp_cmi_errno(e); + dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core " + "thermal status on %u/%u: %d", ctc->ctc_chip, ctc->ctc_core, + err); + return (err); + } + + e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_INTERRUPT, &val); + if (e == CMI_SUCCESS) { + ctc->ctc_core_intr = val; + } else { + err = coretemp_cmi_errno(e); + dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core " + "thermal interrupt on %u/%u: %d", ctc->ctc_chip, + ctc->ctc_core, err); + return (err); + } + + /* + * If the last read wasn't valid, then we should keep the current state. + */ + if ((ctc->ctc_core_status & IA32_THERM_STATUS_READ_VALID) != 0) { + uint_t diff; + diff = IA32_THERM_STATUS_READING(ctc->ctc_core_status); + + if (diff >= ctc->ctc_tjmax) { + dev_err(ct->coretemp_dip, CE_WARN, "!found invalid " + "core temperature on %u/%u: readout: %u, Tjmax: " + "%u, raw: 0x%" PRIx64, ctc->ctc_chip, + ctc->ctc_core, diff, ctc->ctc_tjmax, + ctc->ctc_core_status); + ctc->ctc_invalid_reads++; + } else { + ctc->ctc_temperature = ctc->ctc_tjmax - diff; + } + } else { + ctc->ctc_invalid_reads++; + } + + ctc->ctc_resolution = + IA32_THERM_STATUS_RESOLUTION(ctc->ctc_core_status); + + /* + * If we have package support and this is core zero, then update the + * package data. + */ + if (ct->coretemp_pkg && ctc->ctc_core == 0) { + uint_t diff; + + e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_STATUS, + &val); + if (e == CMI_SUCCESS) { + ctc->ctc_pkg_status = val; + } else { + err = coretemp_cmi_errno(e); + dev_err(ct->coretemp_dip, CE_WARN, "!failed to get " + "package thermal status on %u: %d", ctc->ctc_chip, + err); + return (err); + } + + e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_INTERRUPT, + &val); + if (e == CMI_SUCCESS) { + ctc->ctc_pkg_intr = val; + } else { + err = coretemp_cmi_errno(e); + dev_err(ct->coretemp_dip, CE_WARN, "!failed to get " + "package thermal interrupt on %u: %d", + ctc->ctc_chip, err); + return (err); + } + + diff = IA32_PKG_THERM_STATUS_READING(ctc->ctc_pkg_status); + if (diff >= ctc->ctc_tjmax) { + dev_err(ct->coretemp_dip, CE_WARN, "!found invalid " + "package temperature on %u: readout: %u, tjmax: " + "%u, raw: 0x%" PRIx64, ctc->ctc_chip, diff, + ctc->ctc_tjmax, ctc->ctc_pkg_status); + ctc->ctc_invalid_reads++; + + } else { + ctc->ctc_pkg_temperature = ctc->ctc_tjmax - diff; + } + } + + return (0); +} + +static int +coretemp_open(dev_t *devp, int flags, int otype, cred_t *credp) +{ + coretemp_t *ct = coretemp; + + if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) { + return (EPERM); + } + + if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) { + return (EINVAL); + } + + if (otype != OTYP_CHR) { + return (EINVAL); + } + + /* + * Sanity check the minor + */ + mutex_enter(&ct->coretemp_mutex); + if (coretemp_lookup_core(ct, getminor(*devp)) == NULL) { + mutex_exit(&ct->coretemp_mutex); + return (ENXIO); + } + mutex_exit(&ct->coretemp_mutex); + + return (0); +} + +static int +coretemp_ioctl_kind(intptr_t arg, int mode) +{ + sensor_ioctl_kind_t kind; + + bzero(&kind, sizeof (kind)); + kind.sik_kind = SENSOR_KIND_TEMPERATURE; + + if (ddi_copyout((void *)&kind, (void *)arg, sizeof (kind), + mode & FKIOCTL) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +coretemp_ioctl_temp(coretemp_t *ct, minor_t minor, intptr_t arg, int mode) +{ + coretemp_core_t *ctc; + hrtime_t diff; + sensor_ioctl_temperature_t temp; + + bzero(&temp, sizeof (temp)); + + mutex_enter(&ct->coretemp_mutex); + ctc = coretemp_lookup_core(ct, minor); + if (ctc == NULL) { + mutex_exit(&ct->coretemp_mutex); + return (ENXIO); + } + + diff = NSEC2MSEC(gethrtime() - ctc->ctc_last_read); + if (diff > 0 && diff > (hrtime_t)coretemp_cache_ms) { + int ret; + cmi_hdl_t hdl; + + if ((hdl = cmi_hdl_lookup(ctc->ctc_class, ctc->ctc_chip, + ctc->ctc_core, ctc->ctc_strand)) == NULL) { + mutex_exit(&ct->coretemp_mutex); + return (ENXIO); + } + ret = coretemp_read(ct, ctc, hdl); + cmi_hdl_rele(hdl); + if (ret != 0) { + mutex_exit(&ct->coretemp_mutex); + return (ret); + } + } + + temp.sit_unit = SENSOR_UNIT_CELSIUS; + if ((id_t)minor == ctc->ctc_core_minor) { + temp.sit_temp = ctc->ctc_temperature; + } else { + temp.sit_temp = ctc->ctc_pkg_temperature; + } + + /* + * The resolution field is in whole units of degrees Celsius. + */ + temp.sit_gran = ctc->ctc_resolution; + if (ctc->ctc_resolution > 1) { + temp.sit_gran *= -1; + } + mutex_exit(&ct->coretemp_mutex); + + if (ddi_copyout(&temp, (void *)arg, sizeof (temp), + mode & FKIOCTL) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +coretemp_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + coretemp_t *ct = coretemp; + + if ((mode & FREAD) == 0) { + return (EINVAL); + } + + switch (cmd) { + case SENSOR_IOCTL_TYPE: + return (coretemp_ioctl_kind(arg, mode)); + case SENSOR_IOCTL_TEMPERATURE: + return (coretemp_ioctl_temp(ct, getminor(dev), arg, mode)); + default: + return (ENOTTY); + } +} + +/* + * We don't really do any state tracking on close, so for now, just allow it to + * always succeed. + */ +static int +coretemp_close(dev_t dev, int flags, int otype, cred_t *credp) +{ + return (0); +} + +static void +coretemp_fini_core(coretemp_t *ct, coretemp_core_t *ctc) +{ + if (ctc->ctc_core_minor > 0) + id_free(ct->coretemp_ids, ctc->ctc_core_minor); + if (ctc->ctc_pkg_minor > 0) + id_free(ct->coretemp_ids, ctc->ctc_pkg_minor); + kmem_free(ctc, sizeof (coretemp_core_t)); +} + +static void +coretemp_destroy(coretemp_t *ct) +{ + coretemp_core_t *ctc; + + ddi_remove_minor_node(ct->coretemp_dip, NULL); + + while ((ctc = list_remove_head(&ct->coretemp_cores)) != NULL) { + coretemp_fini_core(ct, ctc); + } + list_destroy(&ct->coretemp_cores); + + if (ct->coretemp_cpuset != NULL) { + cpuset_free(ct->coretemp_cpuset); + } + + if (ct->coretemp_ids != NULL) { + id_space_destroy(ct->coretemp_ids); + } + + mutex_destroy(&ct->coretemp_mutex); + kmem_free(ct, sizeof (coretemp_t)); +} + +static int +coretemp_init_core(cmi_hdl_t hdl, void *arg1, void *arg2, void *arg3) +{ + coretemp_t *ct = arg1; + boolean_t *walkerr = arg2; + coretemp_core_t *ctc; + uint_t chip, core; + int err; + + chip = cmi_hdl_chipid(hdl); + core = cmi_hdl_coreid(hdl); + + /* + * The temperature sensor only exists on a per-core basis. Therefore we + * ignore any non-zero strand. + */ + if (cmi_hdl_strandid(hdl) != 0) { + return (CMI_HDL_WALK_NEXT); + } + + ctc = kmem_zalloc(sizeof (coretemp_core_t), KM_SLEEP); + ctc->ctc_class = cmi_hdl_class(hdl); + ctc->ctc_chip = chip; + ctc->ctc_core = core; + ctc->ctc_strand = 0; + ctc->ctc_core_minor = id_alloc(ct->coretemp_ids); + if (ct->coretemp_pkg && ctc->ctc_core == 0) { + ctc->ctc_pkg_minor = id_alloc(ct->coretemp_ids); + } + + if ((err = coretemp_calculate_tjmax(ct, ctc, hdl)) != 0) { + dev_err(ct->coretemp_dip, CE_WARN, + "failed to read Tj Max on %u/%u: %d", chip, core, err); + *walkerr = B_TRUE; + coretemp_fini_core(ct, ctc); + return (CMI_HDL_WALK_DONE); + } + + if ((err = coretemp_read(ct, ctc, hdl)) != 0) { + dev_err(ct->coretemp_dip, CE_WARN, + "failed to take initial temperature reading on %u/%u: %d", + chip, core, err); + *walkerr = B_TRUE; + coretemp_fini_core(ct, ctc); + return (CMI_HDL_WALK_DONE); + } + + list_insert_tail(&ct->coretemp_cores, ctc); + + return (CMI_HDL_WALK_NEXT); +} + +static boolean_t +coretemp_create_minors(coretemp_t *ct) +{ + coretemp_core_t *ctc; + + for (ctc = list_head(&ct->coretemp_cores); ctc != NULL; + ctc = list_next(&ct->coretemp_cores, ctc)) { + int ret; + char buf[128]; + + if (snprintf(buf, sizeof (buf), "chip%u.core%u", ctc->ctc_chip, + ctc->ctc_core) >= sizeof (buf)) { + return (B_FALSE); + } + ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR, + ctc->ctc_core_minor, DDI_NT_SENSOR_TEMP_CPU, 0); + if (ret != DDI_SUCCESS) { + dev_err(ct->coretemp_dip, CE_WARN, "!failed to create " + "minor node %s", buf); + return (B_FALSE); + } + + if (ctc->ctc_core != 0) + continue; + + if (snprintf(buf, sizeof (buf), "chip%u", ctc->ctc_chip) >= + sizeof (buf)) { + return (B_FALSE); + } + + ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR, + ctc->ctc_pkg_minor, DDI_NT_SENSOR_TEMP_CPU, 0); + if (ret != DDI_SUCCESS) { + dev_err(ct->coretemp_dip, CE_WARN, "!failed to create " + "minor node %s", buf); + return (B_FALSE); + } + } + + return (B_TRUE); +} + +static int +coretemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + boolean_t walkerr; + coretemp_t *ct = NULL; + + if (cmd == DDI_RESUME) { + /* + * Currently suspend and resume for this driver are nops. + */ + return (DDI_SUCCESS); + } + + if (cmd != DDI_ATTACH) { + return (DDI_FAILURE); + } + + if (coretemp != NULL) { + return (DDI_FAILURE); + } + + ct = kmem_zalloc(sizeof (coretemp_t), KM_SLEEP); + ct->coretemp_dip = dip; + ct->coretemp_pkg = is_x86_feature(x86_featureset, X86FSET_PKG_THERMAL); + list_create(&ct->coretemp_cores, sizeof (coretemp_core_t), + offsetof(coretemp_core_t, ctc_link)); + mutex_init(&ct->coretemp_mutex, NULL, MUTEX_DRIVER, NULL); + ct->coretemp_cpuset = cpuset_alloc(KM_SLEEP); + if ((ct->coretemp_ids = id_space_create("coretemp_minors", 1, + INT32_MAX)) == NULL) { + goto fail; + } + + mutex_enter(&ct->coretemp_mutex); + walkerr = B_FALSE; + cmi_hdl_walk(coretemp_init_core, ct, &walkerr, NULL); + + if (walkerr) { + mutex_exit(&ct->coretemp_mutex); + goto fail; + } + + if (!coretemp_create_minors(ct)) { + mutex_exit(&ct->coretemp_mutex); + goto fail; + } + + coretemp = ct; + mutex_exit(&ct->coretemp_mutex); + return (DDI_SUCCESS); +fail: + coretemp = NULL; + coretemp_destroy(ct); + return (DDI_FAILURE); + +} + +static int +coretemp_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, + void **resultp) +{ + int ret; + + switch (cmd) { + case DDI_INFO_DEVT2DEVINFO: + *resultp = coretemp->coretemp_dip; + ret = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *resultp = (void *)0; + ret = DDI_SUCCESS; + break; + default: + ret = DDI_FAILURE; + break; + } + + return (ret); +} + +static int +coretemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + coretemp_t *ct; + + if (cmd == DDI_SUSPEND) { + return (DDI_SUCCESS); + } + + if (cmd != DDI_DETACH) { + return (DDI_FAILURE); + } + + if (coretemp == NULL) { + return (DDI_FAILURE); + } + + ct = coretemp; + coretemp = NULL; + coretemp_destroy(ct); + + return (DDI_SUCCESS); +} + +static struct cb_ops coretemp_cb_ops = { + .cb_open = coretemp_open, + .cb_close = coretemp_close, + .cb_strategy = nodev, + .cb_print = nodev, + .cb_dump = nodev, + .cb_read = nodev, + .cb_write = nodev, + .cb_ioctl = coretemp_ioctl, + .cb_devmap = nodev, + .cb_mmap = nodev, + .cb_segmap = nodev, + .cb_chpoll = nochpoll, + .cb_prop_op = ddi_prop_op, + .cb_flag = D_MP, + .cb_rev = CB_REV, + .cb_aread = nodev, + .cb_awrite = nodev +}; + +static struct dev_ops coretemp_dev_ops = { + .devo_rev = DEVO_REV, + .devo_refcnt = 0, + .devo_getinfo = coretemp_getinfo, + .devo_identify = nulldev, + .devo_probe = nulldev, + .devo_attach = coretemp_attach, + .devo_detach = coretemp_detach, + .devo_reset = nodev, + .devo_power = ddi_power, + .devo_quiesce = ddi_quiesce_not_needed, + .devo_cb_ops = &coretemp_cb_ops +}; + +static struct modldrv coretemp_modldrv = { + .drv_modops = &mod_driverops, + .drv_linkinfo = "Intel CPU/Package thermal sensor", + .drv_dev_ops = &coretemp_dev_ops +}; + +static struct modlinkage coretemp_modlinkage = { + .ml_rev = MODREV_1, + .ml_linkage = { &coretemp_modldrv, NULL } +}; + +int +_init(void) +{ + if (!coretemp_supported()) { + return (ENOTSUP); + } + + return (mod_install(&coretemp_modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&coretemp_modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&coretemp_modlinkage)); +} diff --git a/usr/src/uts/intel/io/coretemp/coretemp.conf b/usr/src/uts/intel/io/coretemp/coretemp.conf new file mode 100644 index 0000000000..1880a2fa16 --- /dev/null +++ b/usr/src/uts/intel/io/coretemp/coretemp.conf @@ -0,0 +1,16 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent, Inc. +# + +name="coretemp" parent="pseudo" instance=0; diff --git a/usr/src/uts/intel/io/dktp/dcdev/dadk.c b/usr/src/uts/intel/io/dktp/dcdev/dadk.c index 35f97482b8..f74a0d4137 100644 --- a/usr/src/uts/intel/io/dktp/dcdev/dadk.c +++ b/usr/src/uts/intel/io/dktp/dcdev/dadk.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent, Inc. All rights reserved. */ /* @@ -170,6 +171,8 @@ static int dadk_debug = DGEOM; #endif /* DADK_DEBUG */ +#define ONE_MIN ((longlong_t)60 * NANOSEC) + static int dadk_check_media_time = 3000000; /* 3 Second State Check */ static int dadk_dk_maxphys = 0x80000; @@ -1376,6 +1379,47 @@ static struct dadkio_derr dadk_errtab[] = { {COMMAND_DONE_ERROR, GDA_FATAL}, /* 23 DERR_RESV */ }; +/* + * A bad disk can result in a large number of errors spewed to the log. + * This can in turn lead to /var/adm/messages filling up the file system on + * a machine with a small root or /var file system. + * + * Instead of logging every error, if we're seeing repeated errors on a disk + * only log them periodically. + */ +static void +dadk_logerr(struct dadk *dadkp, struct cmpkt *pktp, char *label, + int severity, daddr_t blkno, daddr_t err_blkno, + char **cmdvec, char **senvec) +{ + hrtime_t now; + + now = gethrtime(); + if ((now - dadkp->dad_last_log) < ONE_MIN) { + atomic_add_32(&dadkp->dad_err_cnt, 1); + return; + } + + if (dadkp->dad_err_cnt > 0) { + dev_info_t *dev = dadkp->dad_sd->sd_dev; + char name[256], buf[256]; + + if (dev) + (void) snprintf(name, sizeof (name), "%s (%s%d)", + ddi_pathname(dev, buf), label, + ddi_get_instance(dev)); + else + (void) strlcpy(name, label, sizeof (name)); + cmn_err(CE_WARN, "%s: %d additional unlogged errors\n", + name, dadkp->dad_err_cnt); + } + + gda_errmsg(dadkp->dad_sd, pktp, label, severity, blkno, err_blkno, + cmdvec, senvec); + dadkp->dad_err_cnt = 0; + dadkp->dad_last_log = now; +} + static int dadk_chkerr(struct cmpkt *pktp) { @@ -1462,7 +1506,7 @@ dadk_chkerr(struct cmpkt *pktp) return (COMMAND_DONE); } if (pktp->cp_passthru == NULL) { - gda_errmsg(dadkp->dad_sd, pktp, dadk_name, + dadk_logerr(dadkp, pktp, dadk_name, dadk_errtab[scb].d_severity, pktp->cp_srtsec, err_blkno, dadk_cmds, dadk_sense); } @@ -1519,7 +1563,7 @@ dadk_recorderr(struct cmpkt *pktp, struct dadkio_rwcmd *rwcmdp) if (rwcmdp->flags & DADKIO_FLAG_SILENT) return; - gda_errmsg(dadkp->dad_sd, pktp, dadk_name, dadk_errtab[scb].d_severity, + dadk_logerr(dadkp, pktp, dadk_name, dadk_errtab[scb].d_severity, rwcmdp->blkaddr, rwcmdp->status.failed_blk, dadk_cmds, dadk_sense); } diff --git a/usr/src/uts/intel/io/hotplug/pcicfg/pcicfg.c b/usr/src/uts/intel/io/hotplug/pcicfg/pcicfg.c index 09cf261d9b..b482117c7c 100644 --- a/usr/src/uts/intel/io/hotplug/pcicfg/pcicfg.c +++ b/usr/src/uts/intel/io/hotplug/pcicfg/pcicfg.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, Joyent, Inc. */ /* @@ -557,7 +558,7 @@ pcicfg_configure(dev_info_t *devi, uint_t device, uint_t function, pci_bus_range_t pci_bus_range; int rv; int circ; - uint_t highest_bus; + uint_t highest_bus, visited = 0; int ari_mode = B_FALSE; int max_function = PCI_MAX_FUNCTIONS; int trans_device; @@ -669,6 +670,11 @@ pcicfg_configure(dev_info_t *devi, uint_t device, uint_t function, goto cleanup; } + /* + * Note that we've successfully gone through and visited at + * least one node. + */ + visited++; next: /* * Determine if ARI Forwarding should be enabled. @@ -696,7 +702,7 @@ next: goto cleanup; /* - * Check if there are more fucntions to probe. + * Check if there are more functions to probe. */ if (next_function == 0) { DEBUG0("Next Function - " @@ -712,7 +718,7 @@ next: ndi_devi_exit(devi, circ); - if (func == 0) + if (visited == 0) return (PCICFG_FAILURE); /* probe failed */ else return (PCICFG_SUCCESS); diff --git a/usr/src/uts/intel/io/ipmi/ipmivars.h b/usr/src/uts/intel/io/ipmi/ipmivars.h index 7fd819cd3d..f547d6f043 100644 --- a/usr/src/uts/intel/io/ipmi/ipmivars.h +++ b/usr/src/uts/intel/io/ipmi/ipmivars.h @@ -78,6 +78,7 @@ struct ipmi_request { #define SMIC_CTL_STS 1 #define SMIC_FLAGS 2 +struct ipmi_softc; #define IPMI_BUSY 0x1 #define IPMI_CLOSING 0x2 diff --git a/usr/src/uts/intel/io/pci/pci_boot.c b/usr/src/uts/intel/io/pci/pci_boot.c index 1af4db0659..0bf28143cf 100644 --- a/usr/src/uts/intel/io/pci/pci_boot.c +++ b/usr/src/uts/intel/io/pci/pci_boot.c @@ -2873,7 +2873,7 @@ add_ppb_props(dev_info_t *dip, uchar_t bus, uchar_t dev, uchar_t func, * If it is unset, we disable i/o and mark it for reconfiguration in * later passes by setting the base > limit */ - val = (uint_t)pci_getw(bus, dev, func, PCI_CONF_COMM); + val = (uint64_t)pci_getw(bus, dev, func, PCI_CONF_COMM); if (val & PCI_COMM_IO) { val = (uint_t)pci_getb(bus, dev, func, PCI_BCNF_IO_BASE_LOW); io_range[0] = ((val & PCI_BCNF_IO_MASK) << PCI_BCNF_IO_SHIFT); diff --git a/usr/src/uts/intel/io/vmxnet/buildNumber.h b/usr/src/uts/intel/io/vmxnet/buildNumber.h new file mode 100644 index 0000000000..97f18a3cbc --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/buildNumber.h @@ -0,0 +1,12 @@ +#define BUILD_NUMBER \ + "build-425873" +#define BUILD_NUMBER_NUMERIC \ + 425873 +#define BUILD_NUMBER_NUMERIC_STRING \ + "425873" +#define PRODUCT_BUILD_NUMBER \ + "product-build-6261" +#define PRODUCT_BUILD_NUMBER_NUMERIC \ + 6261 +#define PRODUCT_BUILD_NUMBER_NUMERIC_STRING \ + "6261" diff --git a/usr/src/uts/intel/io/vmxnet/includeCheck.h b/usr/src/uts/intel/io/vmxnet/includeCheck.h new file mode 100644 index 0000000000..c414d6daf5 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/includeCheck.h @@ -0,0 +1,159 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation version 2.1 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + *********************************************************/ + +/********************************************************* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of VMware Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission of VMware Inc. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +/* + * includeCheck.h -- + * + * Restrict include file use. + * + * In every .h file, define one or more of these + * + * INCLUDE_ALLOW_VMX + * INCLUDE_ALLOW_USERLEVEL + * INCLUDE_ALLOW_VMCORE + * INCLUDE_ALLOW_MODULE + * INCLUDE_ALLOW_VMKERNEL + * INCLUDE_ALLOW_DISTRIBUTE + * INCLUDE_ALLOW_VMK_MODULE + * INCLUDE_ALLOW_VMKDRIVERS + * INCLUDE_ALLOW_VMIROM + * + * Then include this file. + * + * Any file that has INCLUDE_ALLOW_DISTRIBUTE defined will potentially + * be distributed in source form along with GPLed code. Ensure + * that this is acceptable. + */ + + +/* + * Declare a VMCORE-only variable to help classify object + * files. The variable goes in the common block and does + * not create multiple definition link-time conflicts. + */ + +#if defined VMCORE && defined VMX86_DEVEL && defined VMX86_DEBUG && \ + defined linux && !defined MODULE && \ + !defined COMPILED_WITH_VMCORE +#define COMPILED_WITH_VMCORE compiled_with_vmcore +#ifdef ASM + .comm compiled_with_vmcore, 0 +#else + asm(".comm compiled_with_vmcore, 0"); +#endif /* ASM */ +#endif + + +#if defined VMCORE && \ + !(defined VMX86_VMX || defined VMM || \ + defined MONITOR_APP || defined VMMON) +#error "Makefile problem: VMCORE without VMX86_VMX or \ + VMM or MONITOR_APP or MODULE." +#endif + +#if defined VMCORE && !defined INCLUDE_ALLOW_VMCORE +#error "The surrounding include file is not allowed in vmcore." +#endif +#undef INCLUDE_ALLOW_VMCORE + +#if defined VMX86_VMX && !defined VMCORE && \ + !(defined INCLUDE_ALLOW_VMX || defined INCLUDE_ALLOW_USERLEVEL) +#error "The surrounding include file is not allowed in the VMX." +#endif +#undef INCLUDE_ALLOW_VMX + +#if defined USERLEVEL && !defined VMX86_VMX && !defined VMCORE && \ + !defined INCLUDE_ALLOW_USERLEVEL +#error "The surrounding include file is not allowed at userlevel." +#endif +#undef INCLUDE_ALLOW_USERLEVEL + +#if defined MODULE && !defined VMKERNEL_MODULE && \ + !defined VMMON && !defined INCLUDE_ALLOW_MODULE +#error "The surrounding include file is not allowed in driver modules." +#endif +#undef INCLUDE_ALLOW_MODULE + +#if defined VMMON && !defined INCLUDE_ALLOW_VMMON +#error "The surrounding include file is not allowed in vmmon." +#endif +#undef INCLUDE_ALLOW_VMMON + +#if defined VMKERNEL && !defined INCLUDE_ALLOW_VMKERNEL +#error "The surrounding include file is not allowed in the vmkernel." +#endif +#undef INCLUDE_ALLOW_VMKERNEL + +#if defined GPLED_CODE && !defined INCLUDE_ALLOW_DISTRIBUTE +#error "The surrounding include file is not allowed in GPL code." +#endif +#undef INCLUDE_ALLOW_DISTRIBUTE + +#if defined VMKERNEL_MODULE && !defined VMKERNEL && \ + !defined INCLUDE_ALLOW_VMK_MODULE && !defined INCLUDE_ALLOW_VMKDRIVERS +#error "The surrounding include file is not allowed in vmkernel modules." +#endif +#undef INCLUDE_ALLOW_VMK_MODULE +#undef INCLUDE_ALLOW_VMKDRIVERS + +#if defined VMIROM && ! defined INCLUDE_ALLOW_VMIROM +#error "The surrounding include file is not allowed in vmirom." +#endif +#undef INCLUDE_ALLOW_VMIROM diff --git a/usr/src/uts/intel/io/vmxnet/net.h b/usr/src/uts/intel/io/vmxnet/net.h new file mode 100644 index 0000000000..41b6eb1d14 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/net.h @@ -0,0 +1,220 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +/************************************************************ + * + * net.h + * + * This file should contain all network global defines. + * No vlance/vmxnet/vnet/vmknet specific stuff should be + * put here only defines used/usable by all network code. + * --gustav + * + ************************************************************/ + +#ifndef VMWARE_DEVICES_NET_H +#define VMWARE_DEVICES_NET_H + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMCORE + +#include "includeCheck.h" +#include "vm_device_version.h" + +#ifdef VMCORE +#include "config.h" +#include "str.h" +#include "strutil.h" +#endif + +#define ETHERNET_MTU 1518 +#define ETH_MIN_FRAME_LEN 60 + +#ifndef ETHER_ADDR_LEN +#define ETHER_ADDR_LEN 6 /* length of MAC address */ +#endif +#define ETH_HEADER_LEN 14 /* length of Ethernet header */ +#define IP_ADDR_LEN 4 /* length of IPv4 address */ +#define IP_HEADER_LEN 20 /* minimum length of IPv4 header */ + +#define ETHER_MAX_QUEUED_PACKET 1600 + + +/* + * State's that a NIC can be in currently we only use this + * in VLance but if we implement/emulate new adapters that + * we also want to be able to morph a new corresponding + * state should be added. + */ + +#define LANCE_CHIP 0x2934 +#define VMXNET_CHIP 0x4392 + +/* + * Size of reserved IO space needed by the LANCE adapter and + * the VMXNET adapter. If you add more ports to Vmxnet than + * there is reserved space you must bump VMXNET_CHIP_IO_RESV_SIZE. + * The sizes must be powers of 2. + */ + +#define LANCE_CHIP_IO_RESV_SIZE 0x20 +#define VMXNET_CHIP_IO_RESV_SIZE 0x40 + +#define MORPH_PORT_SIZE 4 + +#ifdef VMCORE +typedef struct Net_AdapterCount { + uint8 vlance; + uint8 vmxnet2; + uint8 vmxnet3; + uint8 e1000; + uint8 e1000e; +} Net_AdapterCount; +#endif + +#ifdef USERLEVEL + +/* + *---------------------------------------------------------------------------- + * + * Net_AddAddrToLADRF -- + * + * Given a MAC address, sets the corresponding bit in the LANCE style + * Logical Address Filter 'ladrf'. + * The caller should have initialized the ladrf to all 0's, as this + * function only ORs on a bit in the array. + * 'addr' is presumed to be ETHER_ADDR_LEN in size; + * 'ladrf' is presumed to point to a 64-bit vector. + * + * Derived from a long history of derivations, originally inspired by + * sample code from the AMD "Network Products: Ethernet Controllers 1998 + * Data Book, Book 2", pages 1-53..1-55. + * + * Returns: + * None. + * + * Side effects: + * Updates 'ladrf'. + * + *---------------------------------------------------------------------------- + */ + +static INLINE void +Net_AddAddrToLadrf(const uint8 *addr, // IN: pointer to MAC address + uint8 *ladrf) // IN/OUT: pointer to ladrf +{ +#define CRC_POLYNOMIAL_BE 0x04c11db7UL /* Ethernet CRC, big endian */ + + uint16 hashcode; + int32 crc = 0xffffffff; /* init CRC for each address */ + int32 j; + int32 bit; + int32 byte; + + ASSERT(addr); + ASSERT(ladrf); + + for (byte = 0; byte < ETHER_ADDR_LEN; byte++) { /* for each address byte */ + /* process each address bit */ + for (bit = *addr++, j = 0; + j < 8; + j++, bit >>= 1) { + crc = (crc << 1) ^ ((((crc < 0 ? 1 : 0) ^ bit) & 0x01) ? + CRC_POLYNOMIAL_BE : 0); + } + } + hashcode = (crc & 1); /* hashcode is 6 LSb of CRC ... */ + for (j = 0; j < 5; j++) { /* ... in reverse order. */ + hashcode = (hashcode << 1) | ((crc>>=1) & 1); + } + + ladrf[hashcode >> 3] |= 1 << (hashcode & 0x07); +} +#endif // USERLEVEL + +#ifdef VMCORE +/* + *---------------------------------------------------------------------- + * + * Net_GetNumAdapters -- + * + * Returns the number of each type of network adapter configured in this + * VM. + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static INLINE void +Net_GetNumAdapters(Net_AdapterCount *counts) +{ + uint32 i; + + counts->vlance = 0; + counts->vmxnet2 = 0; + counts->vmxnet3 = 0; + counts->e1000 = 0; + counts->e1000e = 0; + + for (i = 0; i < MAX_ETHERNET_CARDS; i++) { + char* adapterStr; + + if (!Config_GetBool(FALSE, "ethernet%d.present", i)) { + continue; + } + adapterStr = Config_GetString("vlance", "ethernet%d.virtualDev", i); + if (Str_Strcasecmp(adapterStr, "vmxnet3") == 0) { + counts->vmxnet3++; + } else if (Str_Strcasecmp(adapterStr, "vlance") == 0) { + counts->vlance++; + } else if (Str_Strcasecmp(adapterStr, "vmxnet") == 0) { + counts->vmxnet2++; + } else if (Str_Strcasecmp(adapterStr, "e1000") == 0) { + counts->e1000++; + } else if (Str_Strcasecmp(adapterStr, "e1000e") == 0) { + counts->e1000e++; + } else { + LOG_ONCE(("%s: unknown adapter: %s\n", __FUNCTION__, adapterStr)); + } + free(adapterStr); + } +} + +#endif // VMCORE + +#endif // VMWARE_DEVICES_NET_H diff --git a/usr/src/uts/intel/io/vmxnet/net_sg.h b/usr/src/uts/intel/io/vmxnet/net_sg.h new file mode 100644 index 0000000000..f6c30fb2b5 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/net_sg.h @@ -0,0 +1,84 @@ +/********************************************************* + * Copyright (C) 2000 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +/* + * net_sg.h -- + * + * Network packet scatter gather structure. + */ + + +#ifndef _NET_SG_H +#define _NET_SG_H + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#include "includeCheck.h" + +#define NET_SG_DEFAULT_LENGTH 16 + +/* + * A single scatter-gather element for a network packet. + * The address is split into low and high to save space. + * If we make it 64 bits then Windows pads things out such that + * we lose a lot of space for each scatter gather array. + * This adds up when you have embedded scatter-gather + * arrays for transmit and receive ring buffers. + */ +typedef struct NetSG_Elem { + uint32 addrLow; + uint16 addrHi; + uint16 length; +} NetSG_Elem; + +typedef enum NetSG_AddrType { + NET_SG_MACH_ADDR, + NET_SG_PHYS_ADDR, + NET_SG_VIRT_ADDR, +} NetSG_AddrType; + +typedef struct NetSG_Array { + uint16 addrType; + uint16 length; + NetSG_Elem sg[NET_SG_DEFAULT_LENGTH]; +} NetSG_Array; + +#define NET_SG_SIZE(len) (sizeof(NetSG_Array) + (len - NET_SG_DEFAULT_LENGTH) * sizeof(NetSG_Elem)) + +#define NET_SG_MAKE_PA(elem) (PA)QWORD(elem.addrHi, elem.addrLow) +#define NET_SG_MAKE_PTR(elem) (char *)(uintptr_t)QWORD(elem.addrHi, elem.addrLow) + +#endif diff --git a/usr/src/uts/intel/io/vmxnet/vm_basic_types.h b/usr/src/uts/intel/io/vmxnet/vm_basic_types.h new file mode 100644 index 0000000000..adeac1b708 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vm_basic_types.h @@ -0,0 +1,1037 @@ +/********************************************************* + * Copyright (C) 1998-2009 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation version 2.1 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + *********************************************************/ + +/********************************************************* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of VMware Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission of VMware Inc. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +/* + * + * vm_basic_types.h -- + * + * basic data types. + */ + + +#ifndef _VM_BASIC_TYPES_H_ +#define _VM_BASIC_TYPES_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMMON +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMKDRIVERS +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_DISTRIBUTE +#define INCLUDE_ALLOW_VMCORE +#define INCLUDE_ALLOW_VMIROM +#include "includeCheck.h" + +/* STRICT ANSI means the Xserver build and X defines Bool differently. */ +#if !defined(_XTYPEDEF_BOOL) && \ + (!defined(__STRICT_ANSI__) || defined(__FreeBSD__) || defined(__MINGW32__)) +#define _XTYPEDEF_BOOL +typedef char Bool; +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +#define IsBool(x) (((x) & ~1) == 0) +#define IsBool2(x, y) ((((x) | (y)) & ~1) == 0) + +/* + * Macros __i386__ and __ia64 are intrinsically defined by GCC + */ +#if defined _MSC_VER && defined _M_X64 +# define __x86_64__ +#elif defined _MSC_VER && defined _M_IX86 +# define __i386__ +#endif + +#ifdef __i386__ +#define VM_I386 +#endif + +#ifdef __x86_64__ +#define VM_X86_64 +#define VM_I386 +#define vm_x86_64 (1) +#else +#define vm_x86_64 (0) +#endif + + +#ifdef _MSC_VER + +#pragma warning (3 :4505) // unreferenced local function +#pragma warning (disable :4018) // signed/unsigned mismatch +#pragma warning (disable :4761) // integral size mismatch in argument; conversion supplied +#pragma warning (disable :4305) // truncation from 'const int' to 'short' +#pragma warning (disable :4244) // conversion from 'unsigned short' to 'unsigned char' +#pragma warning (disable :4267) // truncation of 'size_t' +#pragma warning (disable :4146) // unary minus operator applied to unsigned type, result still unsigned +#pragma warning (disable :4142) // benign redefinition of type + +#endif + +#if defined(__APPLE__) || defined(HAVE_STDINT_H) + +/* + * TODO: This is a C99 standard header. We should be able to test for + * #if __STDC_VERSION__ >= 199901L, but that breaks the Netware build + * (which doesn't have stdint.h). + */ + +#include <stdint.h> + +typedef uint64_t uint64; +typedef int64_t int64; +typedef uint32_t uint32; +typedef int32_t int32; +typedef uint16_t uint16; +typedef int16_t int16; +typedef uint8_t uint8; +typedef int8_t int8; + +/* + * Note: C does not specify whether char is signed or unsigned, and + * both gcc and msvc implement processor-specific signedness. With + * three types: + * typeof(char) != typeof(signed char) != typeof(unsigned char) + * + * Be careful here, because gcc (4.0.1 and others) likes to warn about + * conversions between signed char * and char *. + */ + +#else /* !HAVE_STDINT_H */ + +#ifdef _MSC_VER + +typedef unsigned __int64 uint64; +typedef signed __int64 int64; + +#elif defined(__GNUC__) || defined(__SUNPRO_C) +/* The Xserver source compiles with -ansi -pendantic */ +# if !defined(__STRICT_ANSI__) || defined(__FreeBSD__) +# if defined(VM_X86_64) +typedef unsigned long uint64; +typedef long int64; +# else +typedef unsigned long long uint64; +typedef long long int64; +# endif +# endif +#else +# error - Need compiler define for int64/uint64 +#endif /* _MSC_VER */ + +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; + +typedef int int32; +typedef short int16; +typedef signed char int8; + +#endif /* HAVE_STDINT_H */ + +/* + * FreeBSD (for the tools build) unconditionally defines these in + * sys/inttypes.h so don't redefine them if this file has already + * been included. [greg] + * + * This applies to Solaris as well. + */ + +/* + * Before trying to do the includes based on OS defines, see if we can use + * feature-based defines to get as much functionality as possible + */ + +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_INTTYPES_H +#include <sys/inttypes.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#ifdef __FreeBSD__ +#include <sys/param.h> /* For __FreeBSD_version */ +#endif + +#if !defined(USING_AUTOCONF) +# if defined(__FreeBSD__) || defined(sun) +# ifdef KLD_MODULE +# include <sys/types.h> +# else +# if __FreeBSD_version >= 500043 +# if !defined(VMKERNEL) +# include <inttypes.h> +# endif +# include <sys/types.h> +# else +# include <sys/inttypes.h> +# endif +# endif +# elif defined __APPLE__ +# if KERNEL +# include <sys/unistd.h> +# include <sys/types.h> /* mostly for size_t */ +# include <stdint.h> +# else +# include <unistd.h> +# include <inttypes.h> +# include <stdlib.h> +# include <stdint.h> +# endif +# else +# if !defined(__intptr_t_defined) && !defined(intptr_t) +# ifdef VM_I386 +# define __intptr_t_defined +# ifdef VM_X86_64 +typedef int64 intptr_t; +# else +typedef int32 intptr_t; +# endif +# elif defined(__arm__) +typedef int32 intptr_t; +# endif +# endif + +# ifndef _STDINT_H +# ifdef VM_I386 +# ifdef VM_X86_64 +typedef uint64 uintptr_t; +# else +typedef uint32 uintptr_t; +# endif +# elif defined(__arm__) +typedef uint32 uintptr_t; +# endif +# endif +# endif +#endif + + +/* + * Time + * XXX These should be cleaned up. -- edward + */ + +typedef int64 VmTimeType; /* Time in microseconds */ +typedef int64 VmTimeRealClock; /* Real clock kept in microseconds */ +typedef int64 VmTimeVirtualClock; /* Virtual Clock kept in CPU cycles */ + +/* + * Printf format specifiers for size_t and 64-bit number. + * Use them like this: + * printf("%"FMT64"d\n", big); + * + * FMTH is for handles/fds. + */ + +#ifdef _MSC_VER + #define FMT64 "I64" + #ifdef VM_X86_64 + #define FMTSZ "I64" + #define FMTPD "I64" + #define FMTH "I64" + #else + #define FMTSZ "I" + #define FMTPD "I" + #define FMTH "I" + #endif +#elif defined __APPLE__ + /* Mac OS hosts use the same formatters for 32- and 64-bit. */ + #define FMT64 "ll" + #if KERNEL + #define FMTSZ "l" + #else + #define FMTSZ "z" + #endif + #define FMTPD "l" + #define FMTH "" +#elif defined(__GNUC__) || defined(__SUNPRO_C) + #define FMTH "" + #if defined(N_PLAT_NLM) || defined(sun) || \ + (defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) < 5)) + /* + * Why (__FreeBSD__ + 0)? See bug 141008. + * Yes, we really need to test both (__FreeBSD__ + 0) and + * ((__FreeBSD__ + 0) < 5). No, we can't remove "+ 0" from + * ((__FreeBSD__ + 0) < 5). + */ + #ifdef VM_X86_64 + #define FMTSZ "l" + #define FMTPD "l" + #else + #define FMTSZ "" + #define FMTPD "" + #endif + #elif defined(__linux__) \ + || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \ + || (defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112L) \ + || (defined(_POSIX2_VERSION) && _POSIX2_VERSION >= 200112L) + /* BSD, Linux */ + #define FMTSZ "z" + + #if defined(VM_X86_64) + #define FMTPD "l" + #else + #define FMTPD "" + #endif + #else + /* Systems with a pre-C99 libc */ + #define FMTSZ "Z" + #ifdef VM_X86_64 + #define FMTPD "l" + #else + #define FMTPD "" + #endif + #endif + #ifdef VM_X86_64 + #define FMT64 "l" + #elif defined(sun) || defined(__FreeBSD__) + #define FMT64 "ll" + #else + #define FMT64 "L" + #endif +#else + #error - Need compiler define for FMT64 and FMTSZ +#endif + +/* + * Suffix for 64-bit constants. Use it like this: + * CONST64(0x7fffffffffffffff) for signed or + * CONST64U(0x7fffffffffffffff) for unsigned. + * + * 2004.08.30(thutt): + * The vmcore/asm64/gen* programs are compiled as 32-bit + * applications, but must handle 64 bit constants. If the + * 64-bit-constant defining macros are already defined, the + * definition will not be overwritten. + */ + +#if !defined(CONST64) || !defined(CONST64U) +#ifdef _MSC_VER +#define CONST64(c) c##I64 +#define CONST64U(c) c##uI64 +#elif defined __APPLE__ +#define CONST64(c) c##LL +#define CONST64U(c) c##uLL +#elif defined(__GNUC__) || defined(__SUNPRO_C) +#ifdef VM_X86_64 +#define CONST64(c) c##L +#define CONST64U(c) c##uL +#else +#define CONST64(c) c##LL +#define CONST64U(c) c##uLL +#endif +#else +#error - Need compiler define for CONST64 +#endif +#endif + +/* + * Use CONST3264/CONST3264U if you want a constant to be + * treated as a 32-bit number on 32-bit compiles and + * a 64-bit number on 64-bit compiles. Useful in the case + * of shifts, like (CONST3264U(1) << x), where x could be + * more than 31 on a 64-bit compile. + */ + +#ifdef VM_X86_64 + #define CONST3264(a) CONST64(a) + #define CONST3264U(a) CONST64U(a) +#else + #define CONST3264(a) (a) + #define CONST3264U(a) (a) +#endif + +#define MIN_INT8 ((int8)0x80) +#define MAX_INT8 ((int8)0x7f) + +#define MIN_UINT8 ((uint8)0) +#define MAX_UINT8 ((uint8)0xff) + +#define MIN_INT16 ((int16)0x8000) +#define MAX_INT16 ((int16)0x7fff) + +#define MIN_UINT16 ((uint16)0) +#define MAX_UINT16 ((uint16)0xffff) + +#define MIN_INT32 ((int32)0x80000000) +#define MAX_INT32 ((int32)0x7fffffff) + +#define MIN_UINT32 ((uint32)0) +#define MAX_UINT32 ((uint32)0xffffffff) + +#define MIN_INT64 (CONST64(0x8000000000000000)) +#define MAX_INT64 (CONST64(0x7fffffffffffffff)) + +#define MIN_UINT64 (CONST64U(0)) +#define MAX_UINT64 (CONST64U(0xffffffffffffffff)) + +typedef uint8 *TCA; /* Pointer into TC (usually). */ + +/* + * Type big enough to hold an integer between 0..100 + */ +typedef uint8 Percent; +#define AsPercent(v) ((Percent)(v)) +#define CHOOSE_PERCENT AsPercent(101) + + +typedef uintptr_t VA; +typedef uintptr_t VPN; + +typedef uint64 PA; +typedef uint32 PPN; + +typedef uint64 PhysMemOff; +typedef uint64 PhysMemSize; + +/* The Xserver source compiles with -ansi -pendantic */ +#ifndef __STRICT_ANSI__ +typedef uint64 BA; +#endif +typedef uint32 BPN; +typedef uint32 PageNum; +typedef unsigned MemHandle; +typedef int32 World_ID; + +/* !! do not alter the definition of INVALID_WORLD_ID without ensuring + * that the values defined in both bora/public/vm_basic_types.h and + * lib/vprobe/vm_basic_types.h are the same. Additionally, the definition + * of VMK_INVALID_WORLD_ID in vmkapi_world.h also must be defined with + * the same value + */ + +#define INVALID_WORLD_ID ((World_ID)0) + +typedef World_ID User_CartelID; +#define INVALID_CARTEL_ID INVALID_WORLD_ID + +typedef User_CartelID User_SessionID; +#define INVALID_SESSION_ID INVALID_CARTEL_ID + +typedef User_CartelID User_CartelGroupID; +#define INVALID_CARTELGROUP_ID INVALID_CARTEL_ID + +typedef uint32 Worldlet_ID; +#define INVALID_WORLDLET_ID ((Worldlet_ID)-1) + +/* The Xserver source compiles with -ansi -pendantic */ +#ifndef __STRICT_ANSI__ +typedef uint64 MA; +typedef uint32 MPN; +#endif + +/* + * This type should be used for variables that contain sector + * position/quantity. + */ +typedef uint64 SectorType; + +/* + * Linear address + */ + +typedef uintptr_t LA; +typedef uintptr_t LPN; +#define LA_2_LPN(_la) ((_la) >> PAGE_SHIFT) +#define LPN_2_LA(_lpn) ((_lpn) << PAGE_SHIFT) + +#define LAST_LPN ((((LA) 1) << (8 * sizeof(LA) - PAGE_SHIFT)) - 1) +#define LAST_LPN32 ((((LA32)1) << (8 * sizeof(LA32) - PAGE_SHIFT)) - 1) +#define LAST_LPN64 ((((LA64)1) << (8 * sizeof(LA64) - PAGE_SHIFT)) - 1) + +/* Valid bits in a LPN. */ +#define LPN_MASK LAST_LPN +#define LPN_MASK32 LAST_LPN32 +#define LPN_MASK64 LAST_LPN64 + +/* + * On 64 bit platform, address and page number types default + * to 64 bit. When we need to represent a 32 bit address, we use + * types defined below. + * + * On 32 bit platform, the following types are the same as the + * default types. + */ +typedef uint32 VA32; +typedef uint32 VPN32; +typedef uint32 LA32; +typedef uint32 LPN32; +typedef uint32 PA32; +typedef uint32 PPN32; +typedef uint32 MA32; +typedef uint32 MPN32; + +/* + * On 64 bit platform, the following types are the same as the + * default types. + */ +typedef uint64 VA64; +typedef uint64 VPN64; +typedef uint64 LA64; +typedef uint64 LPN64; +typedef uint64 PA64; +typedef uint64 PPN64; +typedef uint64 MA64; +typedef uint64 MPN64; + +/* + * VA typedefs for user world apps. + */ +typedef VA32 UserVA32; +typedef VA64 UserVA64; +typedef UserVA64 UserVAConst; /* Userspace ptr to data that we may only read. */ +typedef UserVA32 UserVA32Const; /* Userspace ptr to data that we may only read. */ +typedef UserVA64 UserVA64Const; /* Used by 64-bit syscalls until conversion is finished. */ +#ifdef VMKERNEL +typedef UserVA64 UserVA; +#else +typedef void * UserVA; +#endif + + +/* + * Maximal possible PPN value (errors too) that PhysMem can handle. + * Must be at least as large as MAX_PPN which is the maximum PPN + * for any region other than buserror. + */ +#define PHYSMEM_MAX_PPN ((PPN)0xffffffff) +#define MAX_PPN ((PPN)0x1fffffff) /* Maximal observable PPN value. */ +#define INVALID_PPN ((PPN)0xffffffff) + +#define INVALID_BPN ((BPN)0x1fffffff) + +#define RESERVED_MPN ((MPN) 0) +#define INVALID_MPN ((MPN)-1) +#define MEMREF_MPN ((MPN)-2) +#define RELEASED_MPN ((MPN)-3) +#define MAX_MPN ((MPN)0x7fffffff) /* 43 bits of address space. */ + +#define INVALID_LPN ((LPN)-1) +#define INVALID_VPN ((VPN)-1) +#define INVALID_LPN64 ((LPN64)-1) +#define INVALID_PAGENUM ((PageNum)-1) + + +/* + * Format modifier for printing VA, LA, and VPN. + * Use them like this: Log("%#"FMTLA"x\n", laddr) + */ + +#if defined(VMM) || defined(FROBOS64) || vm_x86_64 || defined __APPLE__ +# define FMTLA "l" +# define FMTVA "l" +# define FMTVPN "l" +#else +# define FMTLA "" +# define FMTVA "" +# define FMTVPN "" +#endif + +#ifndef EXTERN +#define EXTERN extern +#endif +#define CONST const + + +#ifndef INLINE +# ifdef _MSC_VER +# define INLINE __inline +# else +# define INLINE inline +# endif +#endif + + +/* + * Annotation for data that may be exported into a DLL and used by other + * apps that load that DLL and import the data. + */ +#if defined(_WIN32) && defined(VMX86_IMPORT_DLLDATA) +# define VMX86_EXTERN_DATA extern __declspec(dllimport) +#else // !_WIN32 +# define VMX86_EXTERN_DATA extern +#endif + +#if defined(_WIN32) && !defined(VMX86_NO_THREADS) +#define THREADSPECIFIC __declspec(thread) +#else +#define THREADSPECIFIC +#endif + +/* + * Due to the wonderful "registry redirection" feature introduced in + * 64-bit Windows, if you access any key under HKLM\Software in 64-bit + * code, you need to open/create/delete that key with + * VMKEY_WOW64_32KEY if you want a consistent view with 32-bit code. + */ + +#ifdef _WIN32 +#ifdef _WIN64 +#define VMW_KEY_WOW64_32KEY KEY_WOW64_32KEY +#else +#define VMW_KEY_WOW64_32KEY 0x0 +#endif +#endif + + +/* + * Consider the following reasons functions are inlined: + * + * 1) inlined for performance reasons + * 2) inlined because it's a single-use function + * + * Functions which meet only condition 2 should be marked with this + * inline macro; It is not critical to be inlined (but there is a + * code-space & runtime savings by doing so), so when other callers + * are added the inline-ness should be removed. + */ + +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +/* + * Starting at version 3.3, gcc does not always inline functions marked + * 'inline' (it depends on their size). To force gcc to do so, one must use the + * extra __always_inline__ attribute. + */ +# define INLINE_SINGLE_CALLER INLINE __attribute__((__always_inline__)) +#else +# define INLINE_SINGLE_CALLER INLINE +#endif + +/* + * Used when a hard guaranteed of no inlining is needed. Very few + * instances need this since the absence of INLINE is a good hint + * that gcc will not do inlining. + */ + +#if defined(__GNUC__) && defined(VMM) +#define ABSOLUTELY_NOINLINE __attribute__((__noinline__)) +#endif + +/* + * Attributes placed on function declarations to tell the compiler + * that the function never returns. + */ + +#ifdef _MSC_VER +#define NORETURN __declspec(noreturn) +#elif __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 9) +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN +#endif + +/* + * GCC 3.2 inline asm needs the + constraint for input/ouput memory operands. + * Older GCCs don't know about it --hpreg + */ + +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2) +# define VM_ASM_PLUS 1 +#else +# define VM_ASM_PLUS 0 +#endif + +/* + * Branch prediction hints: + * LIKELY(exp) - Expression exp is likely TRUE. + * UNLIKELY(exp) - Expression exp is likely FALSE. + * Usage example: + * if (LIKELY(excCode == EXC_NONE)) { + * or + * if (UNLIKELY(REAL_MODE(vc))) { + * + * We know how to predict branches on gcc3 and later (hopefully), + * all others we don't so we do nothing. + */ + +#if (__GNUC__ >= 3) +/* + * gcc3 uses __builtin_expect() to inform the compiler of an expected value. + * We use this to inform the static branch predictor. The '!!' in LIKELY + * will convert any !=0 to a 1. + */ +#define LIKELY(_exp) __builtin_expect(!!(_exp), 1) +#define UNLIKELY(_exp) __builtin_expect((_exp), 0) +#else +#define LIKELY(_exp) (_exp) +#define UNLIKELY(_exp) (_exp) +#endif + +/* + * GCC's argument checking for printf-like functions + * This is conditional until we have replaced all `"%x", void *' + * with `"0x%08x", (uint32) void *'. Note that %p prints different things + * on different platforms. Argument checking is enabled for the + * vmkernel, which has already been cleansed. + * + * fmtPos is the position of the format string argument, beginning at 1 + * varPos is the position of the variable argument, beginning at 1 + */ + +#if defined(__GNUC__) +# define PRINTF_DECL(fmtPos, varPos) __attribute__((__format__(__printf__, fmtPos, varPos))) +#else +# define PRINTF_DECL(fmtPos, varPos) +#endif + +#if defined(__GNUC__) +# define SCANF_DECL(fmtPos, varPos) __attribute__((__format__(__scanf__, fmtPos, varPos))) +#else +# define SCANF_DECL(fmtPos, varPos) +#endif + +/* + * UNUSED_PARAM should surround the parameter name and type declaration, + * e.g. "int MyFunction(int var1, UNUSED_PARAM(int var2))" + * + */ + +#ifndef UNUSED_PARAM +# if defined(__GNUC__) +# define UNUSED_PARAM(_parm) _parm __attribute__((__unused__)) +# else +# define UNUSED_PARAM(_parm) _parm +# endif +#endif + +/* + * REGPARM defaults to REGPARM3; i.e., a request that gcc + * put the first three arguments in registers. (It is fine + * if the function has fewer than three arguments.) Gcc only. + * Syntactically, put REGPARM where you'd put INLINE or NORETURN. + * + * Note that 64-bit code already puts the first six arguments in + * registers, so these attributes are only useful for 32-bit code. + */ + +#if defined(__GNUC__) +# define REGPARM0 __attribute__((regparm(0))) +# define REGPARM1 __attribute__((regparm(1))) +# define REGPARM2 __attribute__((regparm(2))) +# define REGPARM3 __attribute__((regparm(3))) +# define REGPARM REGPARM3 +#else +# define REGPARM0 +# define REGPARM1 +# define REGPARM2 +# define REGPARM3 +# define REGPARM +#endif + +/* + * ALIGNED specifies minimum alignment in "n" bytes. + */ + +#ifdef __GNUC__ +#define ALIGNED(n) __attribute__((__aligned__(n))) +#else +#define ALIGNED(n) +#endif + +/* + * __func__ is a stringified function name that is part of the C99 standard. The block + * below defines __func__ on older systems where the compiler does not support that + * macro. + */ +#if defined(__GNUC__) \ + && ((__GNUC__ == 2 && __GNUC_MINOR < 96) \ + || (__GNUC__ < 2)) +# define __func__ __FUNCTION__ +#endif + +/* + * Once upon a time, this was used to silence compiler warnings that + * get generated when the compiler thinks that a function returns + * when it is marked noreturn. Don't do it. Use NOT_REACHED(). + */ + +#define INFINITE_LOOP() do { } while (1) + +/* + * On FreeBSD (for the tools build), size_t is typedef'd if _BSD_SIZE_T_ + * is defined. Use the same logic here so we don't define it twice. [greg] + */ +#ifdef __FreeBSD__ +# ifdef _BSD_SIZE_T_ +# undef _BSD_SIZE_T_ +# ifdef VM_I386 +# ifdef VM_X86_64 + typedef uint64 size_t; +# else + typedef uint32 size_t; +# endif +# endif /* VM_I386 */ +# endif + +# ifdef _BSD_SSIZE_T_ +# undef _BSD_SSIZE_T_ +# ifdef VM_I386 +# ifdef VM_X86_64 + typedef int64 ssize_t; +# else + typedef int32 ssize_t; +# endif +# endif /* VM_I386 */ +# endif + +#else +# ifndef _SIZE_T +# ifdef VM_I386 +# define _SIZE_T +# ifdef VM_X86_64 + typedef uint64 size_t; +# else + typedef uint32 size_t; +# endif +# elif defined(__arm__) +# define _SIZE_T + typedef uint32 size_t; +# endif +# endif + +# if !defined(FROBOS) && !defined(_SSIZE_T) && !defined(_SSIZE_T_) && \ + !defined(ssize_t) && !defined(__ssize_t_defined) && \ + !defined(_SSIZE_T_DECLARED) +# ifdef VM_I386 +# define _SSIZE_T +# define __ssize_t_defined +# define _SSIZE_T_DECLARED +# ifdef VM_X86_64 + typedef int64 ssize_t; +# else + typedef int32 ssize_t; +# endif +# elif defined(__arm__) +# define _SSIZE_T +# define __ssize_t_defined +# define _SSIZE_T_DECLARED + typedef int32 ssize_t; +# endif +# endif + +#endif + +/* + * Format modifier for printing pid_t. On sun the pid_t is a ulong, but on + * Linux it's an int. + * Use this like this: printf("The pid is %"FMTPID".\n", pid); + */ +#ifdef sun +# ifdef VM_X86_64 +# define FMTPID "d" +# else +# define FMTPID "lu" +# endif +#else +# define FMTPID "d" +#endif + +/* + * Format modifier for printing uid_t. On Solaris 10 and earlier, uid_t + * is a ulong, but on other platforms it's an unsigned int. + * Use this like this: printf("The uid is %"FMTUID".\n", uid); + */ +#if defined(sun) && !defined(SOL11) +# ifdef VM_X86_64 +# define FMTUID "u" +# else +# define FMTUID "lu" +# endif +#else +# define FMTUID "u" +#endif + +/* + * Format modifier for printing mode_t. On sun the mode_t is a ulong, but on + * Linux it's an int. + * Use this like this: printf("The mode is %"FMTMODE".\n", mode); + */ +#ifdef sun +# ifdef VM_X86_64 +# define FMTMODE "o" +# else +# define FMTMODE "lo" +# endif +#else +# define FMTMODE "o" +#endif + +/* + * Format modifier for printing time_t. Most platforms define a time_t to be + * a long int, but on FreeBSD (as of 5.0, it seems), the time_t is a signed + * size quantity. Refer to the definition of FMTSZ to see why we need silly + * preprocessor arithmetic. + * Use this like this: printf("The mode is %"FMTTIME".\n", time); + */ +#if defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) >= 5) +# define FMTTIME FMTSZ"d" +#else +# if defined(_MSC_VER) +# ifndef _SAFETIME_H_ +# if (_MSC_VER < 1400) || defined(_USE_32BIT_TIME_T) +# define FMTTIME "ld" +# else +# define FMTTIME FMT64"d" +# endif +# else +# ifndef FMTTIME +# error "safetime.h did not define FMTTIME" +# endif +# endif +# else +# define FMTTIME "ld" +# endif +#endif + +#ifdef __APPLE__ +/* + * Format specifier for all these annoying types such as {S,U}Int32 + * which are 'long' in 32-bit builds + * and 'int' in 64-bit builds. + */ +# ifdef __LP64__ +# define FMTLI "" +# else +# define FMTLI "l" +# endif + +/* + * Format specifier for all these annoying types such as NS[U]Integer + * which are 'int' in 32-bit builds + * and 'long' in 64-bit builds. + */ +# ifdef __LP64__ +# define FMTIL "l" +# else +# define FMTIL "" +# endif +#endif + + +/* + * Define MXSemaHandle here so both vmmon and vmx see this definition. + */ + +#ifdef _WIN32 +typedef uintptr_t MXSemaHandle; +#else +typedef int MXSemaHandle; +#endif + +/* + * Define type for poll device handles. + */ + +typedef int64 PollDevHandle; + +/* + * Define the utf16_t type. + */ + +#if defined(_WIN32) && defined(_NATIVE_WCHAR_T_DEFINED) +typedef wchar_t utf16_t; +#else +typedef uint16 utf16_t; +#endif + +/* + * Define for point and rectangle types. Defined here so they + * can be used by other externally facing headers in bora/public. + */ + +typedef struct VMPoint { + int x, y; +} VMPoint; + +#if defined _WIN32 && defined USERLEVEL +struct tagRECT; +typedef struct tagRECT VMRect; +#else +typedef struct VMRect { + int left; + int top; + int right; + int bottom; +} VMRect; +#endif + +/* + * ranked locks "everywhere" + */ + +typedef uint32 MX_Rank; + +#endif /* _VM_BASIC_TYPES_H_ */ diff --git a/usr/src/uts/intel/io/vmxnet/vm_device_version.h b/usr/src/uts/intel/io/vmxnet/vm_device_version.h new file mode 100644 index 0000000000..7046594a6c --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vm_device_version.h @@ -0,0 +1,246 @@ +/********************************************************* + * Copyright (C) 1998 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation version 2.1 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + *********************************************************/ + +#ifndef VM_DEVICE_VERSION_H +#define VM_DEVICE_VERSION_H + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_VMCORE +#include "includeCheck.h" + +#ifdef _WIN32 +#ifdef __MINGW32__ +#include "initguid.h" +#else +#include "guiddef.h" +#endif +#endif + +/* LSILogic 53C1030 Parallel SCSI controller + * LSILogic SAS1068 SAS controller + */ +#define PCI_VENDOR_ID_LSILOGIC 0x1000 +#define PCI_DEVICE_ID_LSI53C1030 0x0030 +#define PCI_DEVICE_ID_LSISAS1068 0x0054 + +/* Our own PCI IDs + * VMware SVGA II (Unified VGA) + * VMware SVGA (PCI Accelerator) + * VMware vmxnet (Idealized NIC) + * VMware vmxscsi (Abortive idealized SCSI controller) + * VMware chipset (Subsystem ID for our motherboards) + * VMware e1000 (Subsystem ID) + * VMware vmxnet3 (Uniform Pass Through NIC) + * VMware HD Audio codec + * VMware HD Audio controller + */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405 +#define PCI_DEVICE_ID_VMWARE_SVGA 0x0710 +#define PCI_DEVICE_ID_VMWARE_NET 0x0720 +#define PCI_DEVICE_ID_VMWARE_SCSI 0x0730 +#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 +#define PCI_DEVICE_ID_VMWARE_CHIPSET 0x1976 +#define PCI_DEVICE_ID_VMWARE_82545EM 0x0750 /* single port */ +#define PCI_DEVICE_ID_VMWARE_82546EB 0x0760 /* dual port */ +#define PCI_DEVICE_ID_VMWARE_EHCI 0x0770 +#define PCI_DEVICE_ID_VMWARE_UHCI 0x0774 +#define PCI_DEVICE_ID_VMWARE_XHCI 0x0778 +#define PCI_DEVICE_ID_VMWARE_1394 0x0780 +#define PCI_DEVICE_ID_VMWARE_BRIDGE 0x0790 +#define PCI_DEVICE_ID_VMWARE_ROOTPORT 0x07A0 +#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0 +#define PCI_DEVICE_ID_VMWARE_VMXWIFI 0x07B8 +#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07C0 +#define PCI_DEVICE_ID_VMWARE_82574 0x07D0 +#define PCI_DEVICE_ID_VMWARE_HDAUDIO_CODEC 0x1975 +#define PCI_DEVICE_ID_VMWARE_HDAUDIO_CONTROLLER 0x1977 + +/* The hypervisor device might grow. Please leave room + * for 7 more subfunctions. + */ +#define PCI_DEVICE_ID_VMWARE_HYPER 0x0800 +#define PCI_DEVICE_ID_VMWARE_VMI 0x0801 + +#define PCI_DEVICE_VMI_CLASS 0x05 +#define PCI_DEVICE_VMI_SUBCLASS 0x80 +#define PCI_DEVICE_VMI_INTERFACE 0x00 +#define PCI_DEVICE_VMI_REVISION 0x01 + +/* From linux/pci_ids.h: + * AMD Lance Ethernet controller + * BusLogic SCSI controller + * Ensoniq ES1371 sound controller + */ +#define PCI_VENDOR_ID_AMD 0x1022 +#define PCI_DEVICE_ID_AMD_VLANCE 0x2000 +#define PCI_VENDOR_ID_BUSLOGIC 0x104B +#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140 +#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040 +#define PCI_VENDOR_ID_ENSONIQ 0x1274 +#define PCI_DEVICE_ID_ENSONIQ_ES1371 0x1371 + +/* From linux/pci_ids.h: + * Intel 82439TX (430 HX North Bridge) + * Intel 82371AB (PIIX4 South Bridge) + * Intel 82443BX (440 BX North Bridge and AGP Bridge) + * Intel 82545EM (e1000, server adapter, single port) + * Intel 82546EB (e1000, server adapter, dual port) + * Intel HECI (as embedded in ich9m) + */ +#define PCI_VENDOR_ID_INTEL 0x8086 +#define PCI_DEVICE_ID_INTEL_82439TX 0x7100 +#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110 +#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112 +#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113 +#define PCI_DEVICE_ID_INTEL_82371AB 0x7111 +#define PCI_DEVICE_ID_INTEL_82443BX 0x7190 +#define PCI_DEVICE_ID_INTEL_82443BX_1 0x7191 +#define PCI_DEVICE_ID_INTEL_82443BX_2 0x7192 /* Used when no AGP support */ +#define PCI_DEVICE_ID_INTEL_82545EM 0x100f +#define PCI_DEVICE_ID_INTEL_82546EB 0x1010 +#define PCI_DEVICE_ID_INTEL_82574 0x10d3 +#define PCI_DEVICE_ID_INTEL_82574_APPLE 0x10f6 +#define PCI_DEVICE_ID_INTEL_HECI 0x2a74 + +#define E1000E_PCI_DEVICE_ID_CONFIG_STR "e1000e.pci.deviceID" +#define E1000E_PCI_SUB_VENDOR_ID_CONFIG_STR "e1000e.pci.subVendorID" +#define E1000E_PCI_SUB_DEVICE_ID_CONFIG_STR "e1000e.pci.subDeviceID" + +/* + * Intel HD Audio controller and Realtek ALC885 codec. + */ +#define PCI_DEVICE_ID_INTEL_631XESB_632XESB 0x269a +#define PCI_VENDOR_ID_REALTEK 0x10ec +#define PCI_DEVICE_ID_REALTEK_ALC885 0x0885 + + +/* + * Fresco Logic xHCI (USB 3.0) Controller + */ +#define PCI_VENDOR_ID_FRESCO 0x1B73 +#define PCI_DEVICE_ID_FRESCO_FL1000 0x1000 // Original 1-port chip +#define PCI_DEVICE_ID_FRESCO_FL1009 0x1009 // New 2-port chip (Driver 3.0.98+) +#define PCI_DEVICE_ID_FRESCO_FL1400 0x1400 // Unknown (4-port? Dev hardware?) + +/* + * NEC/Renesas xHCI (USB 3.0) Controller + */ +#define PCI_VENDOR_ID_NEC 0x1033 +#define PCI_DEVICE_ID_NEC_UPD720200 0x0194 +#define PCI_REVISION_NEC_UPD720200 0x03 +#define PCI_FIRMWARE_NEC_UPD720200 0x3015 + + +/************* Strings for IDE Identity Fields **************************/ +#define VIDE_ID_SERIAL_STR "00000000000000000001" /* Must be 20 Bytes */ +#define VIDE_ID_FIRMWARE_STR "00000001" /* Must be 8 Bytes */ + +/* No longer than 40 Bytes */ +#define VIDE_ATA_MODEL_STR PRODUCT_GENERIC_NAME " Virtual IDE Hard Drive" +#define VIDE_ATAPI_MODEL_STR PRODUCT_GENERIC_NAME " Virtual IDE CDROM Drive" + +#define ATAPI_VENDOR_ID "NECVMWar" /* Must be 8 Bytes */ +#define ATAPI_PRODUCT_ID PRODUCT_GENERIC_NAME " IDE CDROM" /* Must be 16 Bytes */ +#define ATAPI_REV_LEVEL "1.00" /* Must be 4 Bytes */ + +#define IDE_NUM_INTERFACES 2 /* support for two interfaces */ +#define IDE_DRIVES_PER_IF 2 + +/************* Strings for SCSI Identity Fields **************************/ +#define SCSI_DISK_MODEL_STR PRODUCT_GENERIC_NAME " Virtual SCSI Hard Drive" +#define SCSI_DISK_VENDOR_NAME COMPANY_NAME +#define SCSI_DISK_REV_LEVEL "1.0" +#define SCSI_CDROM_MODEL_STR PRODUCT_GENERIC_NAME " Virtual SCSI CDROM Drive" +#define SCSI_CDROM_VENDOR_NAME COMPANY_NAME +#define SCSI_CDROM_REV_LEVEL "1.0" + +/************* SCSI implementation limits ********************************/ +#define SCSI_MAX_CONTROLLERS 4 // Need more than 1 for MSCS clustering +#define SCSI_MAX_DEVICES 16 // BT-958 emulates only 16 +#define PVSCSI_MAX_DEVICES 255 // 255 (including the controller) +/* + * VSCSI_BV_INTS is the number of uint32's needed for a bit vector + * to cover all scsi devices per target. + */ +#define VSCSI_BV_INTS CEILING(PVSCSI_MAX_DEVICES, 8 * sizeof (uint32)) +#define SCSI_IDE_CHANNEL SCSI_MAX_CONTROLLERS +#define SCSI_IDE_HOSTED_CHANNEL (SCSI_MAX_CONTROLLERS + 1) +#define SCSI_MAX_CHANNELS (SCSI_MAX_CONTROLLERS + 2) + +/************* Strings for the VESA BIOS Identity Fields *****************/ +#define VBE_OEM_STRING COMPANY_NAME " SVGA" +#define VBE_VENDOR_NAME COMPANY_NAME +#define VBE_PRODUCT_NAME PRODUCT_GENERIC_NAME + +/************* PCI implementation limits ********************************/ +#define PCI_MAX_BRIDGES 15 + +/************* Ethernet implementation limits ***************************/ +#define MAX_ETHERNET_CARDS 10 + +/********************** Floppy limits ***********************************/ +#define MAX_FLOPPY_DRIVES 2 + +/************* PCI Passthrough implementation limits ********************/ +#define MAX_PCI_PASSTHRU_DEVICES 6 + +/************* USB implementation limits ********************************/ +#define MAX_USB_DEVICES_PER_HOST_CONTROLLER 127 + +/************* Strings for Host USB Driver *******************************/ + +#ifdef _WIN32 + +/* + * Globally unique ID for the VMware device interface. Define INITGUID before including + * this header file to instantiate the variable. + */ +DEFINE_GUID(GUID_DEVICE_INTERFACE_VMWARE_USB_DEVICES, +0x2da1fe75, 0xaab3, 0x4d2c, 0xac, 0xdf, 0x39, 0x8, 0x8c, 0xad, 0xa6, 0x65); + +/* + * Globally unique ID for the VMware device setup class. + */ +DEFINE_GUID(GUID_CLASS_VMWARE_USB_DEVICES, +0x3b3e62a5, 0x3556, 0x4d7e, 0xad, 0xad, 0xf5, 0xfa, 0x3a, 0x71, 0x2b, 0x56); + +/* + * This string defines the device ID string of a VMware USB device. + * The format is USB\Vid_XXXX&Pid_YYYY, where XXXX and YYYY are the + * hexadecimal representations of the vendor and product ids, respectively. + * + * The official vendor ID for VMware, Inc. is 0x0E0F. + * The product id for USB generic devices is 0x0001. + */ +#define USB_VMWARE_DEVICE_ID_WIDE L"USB\\Vid_0E0F&Pid_0001" +#define USB_DEVICE_ID_LENGTH (sizeof(USB_VMWARE_DEVICE_ID_WIDE) / sizeof(WCHAR)) + +#ifdef UNICODE +#define USB_PNP_SETUP_CLASS_NAME L"VMwareUSBDevices" +#define USB_PNP_DRIVER_NAME L"vmusb" +#else +#define USB_PNP_SETUP_CLASS_NAME "VMwareUSBDevices" +#define USB_PNP_DRIVER_NAME "vmusb" +#endif +#endif + +#endif /* VM_DEVICE_VERSION_H */ diff --git a/usr/src/uts/intel/io/vmxnet/vmnet_def.h b/usr/src/uts/intel/io/vmxnet/vmnet_def.h new file mode 100644 index 0000000000..6e44aea2bb --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vmnet_def.h @@ -0,0 +1,91 @@ +/********************************************************* + * Copyright (C) 2004 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +/* + * vmnet_def.h + * + * - definitions which are (mostly) not vmxnet or vlance specific + */ + +#ifndef _VMNET_DEF_H_ +#define _VMNET_DEF_H_ + +#define INCLUDE_ALLOW_USERLEVEL +#define INCLUDE_ALLOW_VMCORE + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#include "includeCheck.h" + +#define VMNET_NAME_BUFFER_LEN 128 /* Increased for i18n. */ +#define VMNET_COAL_SCHEME_NAME_LEN 16 + + +/* + * capabilities - not all of these are implemented in the virtual HW + * (eg VLAN support is in the virtual switch) so even vlance + * can use them + */ +#define VMNET_CAP_SG 0x0001 /* Can do scatter-gather transmits. */ +#define VMNET_CAP_IP4_CSUM 0x0002 /* Can checksum only TCP/UDP over IPv4. */ +#define VMNET_CAP_HW_CSUM 0x0004 /* Can checksum all packets. */ +#define VMNET_CAP_HIGH_DMA 0x0008 /* Can DMA to high memory. */ +#define VMNET_CAP_TOE 0x0010 /* Supports TCP/IP offload. */ +#define VMNET_CAP_TSO 0x0020 /* Supports TCP Segmentation offload */ +#define VMNET_CAP_SW_TSO 0x0040 /* Supports SW TCP Segmentation */ +#define VMNET_CAP_VMXNET_APROM 0x0080 /* Vmxnet APROM support */ +#define VMNET_CAP_HW_TX_VLAN 0x0100 /* Can we do VLAN tagging in HW */ +#define VMNET_CAP_HW_RX_VLAN 0x0200 /* Can we do VLAN untagging in HW */ +#define VMNET_CAP_SW_VLAN 0x0400 /* Can we do VLAN tagging/untagging in SW */ +#define VMNET_CAP_WAKE_PCKT_RCV 0x0800 /* Can wake on network packet recv? */ +#define VMNET_CAP_ENABLE_INT_INLINE 0x1000 /* Enable Interrupt Inline */ +#define VMNET_CAP_ENABLE_HEADER_COPY 0x2000 /* copy header for vmkernel */ +#define VMNET_CAP_TX_CHAIN 0x4000 /* Guest can use multiple tx entries for a pkt */ +#define VMNET_CAP_RX_CHAIN 0x8000 /* a pkt can span multiple rx entries */ +#define VMNET_CAP_LPD 0x10000 /* large pkt delivery */ +#define VMNET_CAP_BPF 0x20000 /* BPF Support in VMXNET Virtual Hardware */ +#define VMNET_CAP_SG_SPAN_PAGES 0x40000 /* Can do scatter-gather span multiple pages transmits. */ +#define VMNET_CAP_IP6_CSUM 0x80000 /* Can do IPv6 csum offload. */ +#define VMNET_CAP_TSO6 0x100000 /* Can do TSO segmentation offload for IPv6 pkts. */ +#define VMNET_CAP_TSO256k 0x200000 /* Can do TSO segmentation offload for pkts up to 256kB. */ +#define VMNET_CAP_UPT 0x400000 /* Support UPT */ +#define VMNET_CAP_RDONLY_INETHDRS 0x800000 /* Modifies inet headers for TSO/CSUm */ +#define VMNET_CAP_NPA 0x1000000 /* Support NPA */ +#define VMNET_CAP_DCB 0x2000000 /* Support DCB */ +#define VMNET_CAP_OFFLOAD_8OFFSET 0x4000000 /* supports 8bit parameterized offsets */ +#define VMNET_CAP_OFFLOAD_16OFFSET 0x8000000 /* supports 16bit parameterized offsets */ +#define VMNET_CAP_IP6_CSUM_EXT_HDRS 0x10000000 /* support csum of ip6 ext hdrs */ +#define VMNET_CAP_TSO6_EXT_HDRS 0x20000000 /* support TSO for ip6 ext hdrs */ +#define VMNET_CAP_SCHED 0x40000000 /* compliant with network scheduling */ +#endif // _VMNET_DEF_H_ diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet.c b/usr/src/uts/intel/io/vmxnet/vmxnet.c new file mode 100644 index 0000000000..e170f049d9 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vmxnet.c @@ -0,0 +1,2442 @@ +/********************************************************* + * Copyright (C) 2004 VMware, Inc. All rights reserved. + * + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#include <sys/types.h> +#include <sys/conf.h> +#include <sys/debug.h> +#include <sys/stropts.h> +#include <sys/stream.h> +#include <sys/strlog.h> +#include <sys/kmem.h> +#include <sys/stat.h> +#include <sys/kstat.h> +#include <sys/vtrace.h> +#include <sys/dlpi.h> +#include <sys/strsun.h> +#include <sys/ethernet.h> +#include <sys/modctl.h> +#include <sys/errno.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/ddi_impldefs.h> +#include <sys/gld.h> +#include <sys/pci.h> +#include <sys/strsubr.h> + +/* + * This used to be defined in sys/gld.h, but was flagged as private, + * and we used it anyway. Now it no longer exists, and we're stuck + * with it for the time being. + */ +#ifndef GLD_MAX_MULTICAST +#define GLD_MAX_MULTICAST 64 +#endif + +#define __intptr_t_defined +#define _STDINT_H +#include "vm_basic_types.h" +#include "vmxnet2_def.h" +#include "vm_device_version.h" +#include "net.h" +#include "buildNumber.h" + +#define SOLVMXNET_SUCCESS 1 +#define SOLVMXNET_FAILURE 0 + +#ifdef SOLVMXNET_DEBUG_LEVEL +static int vxn_debug = SOLVMXNET_DEBUG_LEVEL; +#define DPRINTF(n, args) if (vxn_debug>(n)) cmn_err args +#else +#define DPRINTF(n, args) +#endif + +static char ident[] = "VMware Ethernet Adapter b" BUILD_NUMBER_NUMERIC_STRING; +char _depends_on[] = {"misc/gld"}; + +#define MAX_NUM_RECV_BUFFERS 128 +#define DEFAULT_NUM_RECV_BUFFERS 100 +#define MAX_NUM_XMIT_BUFFERS 128 +#define DEFAULT_NUM_XMIT_BUFFERS 100 +#define CRC_POLYNOMIAL_LE 0xedb88320UL +#define SOLVMXNET_MAXNAME 20 +#define MAX_TX_WAIT_ON_STOP 2000 + +#define ETHERALIGN 2 +#define SLACKBYTES 4 +#define MAXPKTBUF (14 + ETHERALIGN + ETHERMTU + SLACKBYTES) + + +#define QHIWATER (MAX_NUM_RECV_BUFFERS*ETHERMTU) + +#define OUTB(dp, p, v) \ + ddi_put8((dp)->vxnIOHdl, \ + (uint8_t *)((caddr_t)((dp)->vxnIOp) + (p)), v) +#define OUTW(dp, p, v) \ + ddi_put16((dp)->vxnIOHdl, \ + (uint16_t *)((caddr_t)((dp)->vxnIOp) + (p)), v) +#define OUTL(dp, p, v) \ + ddi_put32((dp)->vxnIOHdl, \ + (uint32_t *)((caddr_t)((dp)->vxnIOp) + (p)), v) +#define INB(dp, p) \ + ddi_get8((dp)->vxnIOHdl, \ + (uint8_t *)(((caddr_t)(dp)->vxnIOp) + (p))) +#define INW(dp, p) \ + ddi_get16((dp)->vxnIOHdl, \ + (uint16_t *)(((caddr_t)(dp)->vxnIOp) + (p))) +#define INL(dp, p) \ + ddi_get32((dp)->vxnIOHdl, \ + (uint32_t *)(((caddr_t)(dp)->vxnIOp) + (p))) + +#define VMXNET_INC(val, max) \ + val++; \ + if (UNLIKELY(val == max)) { \ + val = 0; \ + } + +#define TX_RINGBUF_MBLK(dp, idx) (dp->txRingBuf[idx].mblk) +#define TX_RINGBUF_DMAMEM(dp, idx) (dp->txRingBuf[idx].dmaMem) + +typedef struct { + caddr_t buf; /* Virtual address */ + uint32_t phyBuf; /* Physical address */ + size_t bufLen; /* Buffer length */ + ddi_dma_cookie_t cookie; /* Dma cookie */ + uint_t cookieCount; /* Cookie count */ + ddi_dma_handle_t dmaHdl; /* Dma handle */ + ddi_acc_handle_t dataAccHdl; /* Dada access handle */ +} dma_buf_t; + +typedef struct rx_dma_buf { + dma_buf_t dmaDesc; /* Dma descriptor */ + mblk_t *mblk; /* Streams message block */ + frtn_t freeCB; /* Free callback */ + struct vxn_softc *softc; /* Back pointer to softc */ + struct rx_dma_buf *next; /* Next one in list */ +} rx_dma_buf_t; + +typedef struct vxn_stats { + uint32_t errxmt; /* Transmit errors */ + uint32_t errrcv; /* Receive errors */ + uint32_t runt; /* Runt packets */ + uint32_t norcvbuf; /* Buffer alloc errors */ + uint32_t interrupts; /* Interrupts */ + uint32_t defer; /* Deferred transmits */ +} vxn_stats_t; + +typedef struct tx_ring_buf { + mblk_t *mblk; + dma_buf_t dmaMem; +} tx_ring_buf_t; + +typedef struct vxn_softc { + char drvName[SOLVMXNET_MAXNAME]; /* Driver name string */ + int unit; /* Driver instance */ + vxn_stats_t stats; /* Stats */ + + dev_info_t *dip; /* Info pointer */ + ddi_iblock_cookie_t iblockCookie; /* Interrupt block cookie */ + gld_mac_info_t *macInfo; /* GLD mac info */ + ddi_acc_handle_t confHdl; /* Configuration space handle */ + ddi_acc_handle_t vxnIOHdl; /* I/O space handle */ + caddr_t vxnIOp; /* I/O space pointer */ + boolean_t morphed; /* Adapter morphed ? */ + + kmutex_t intrlock; /* Interrupt lock */ + kmutex_t xmitlock; /* Transmit lock */ + kmutex_t rxlistlock; /* Rx free pool lock */ + + boolean_t nicActive; /* NIC active flag */ + boolean_t inIntr; /* Interrupt processing flag */ + + struct ether_addr devAddr; /* MAC address */ + + uint32_t vxnNumRxBufs; /* Number of reveice buffers */ + uint32_t vxnNumTxBufs; /* Number of transmit buffers */ + + dma_buf_t driverDataDmaMem; /* Driver Data (dma handle) */ + Vmxnet2_DriverData *driverData; /* Driver Data */ + void *driverDataPhy; /* Driver Data busaddr pointer */ + Vmxnet2_RxRingEntry *rxRing; /* Receive ring */ + Vmxnet2_TxRingEntry *txRing; /* Transmit ring */ + ddi_dma_handle_t txDmaHdl; /* Tx buffers dma handle */ + rx_dma_buf_t *rxRingBuffPtr[MAX_NUM_RECV_BUFFERS]; + /* DMA buffers associated with rxRing */ + tx_ring_buf_t txRingBuf[MAX_NUM_XMIT_BUFFERS]; /* tx Ring buffers */ + + rx_dma_buf_t *rxFreeBufList; + uint32_t rxNumFreeBufs; /* current # of buffers in pool */ + uint32_t rxMaxFreeBufs; /* max # of buffers in pool */ + + uint32_t txPending; /* Pending transmits */ + uint32_t maxTxFrags; /* Max Tx fragments */ + + int multiCount; /* Multicast address count */ + struct ether_addr multicastList[GLD_MAX_MULTICAST]; /* Multicast list */ + + struct vxn_softc *next; /* Circular list of instances */ + struct vxn_softc *prev; +} vxn_softc_t; + +/* used for rx buffers or buffers allocated by ddi_dma_mem_alloc() */ +static ddi_dma_attr_t vxn_dma_attrs = { + DMA_ATTR_V0, /* dma_attr version */ + 0, /* dma_attr_addr_lo */ + (uint64_t)0xFFFFFFFF, /* dma_attr_addr_hi */ + 0x7FFFFFFF, /* dma_attr_count_max */ + 4, /* dma_attr_align */ + 0x3F, /* dma_attr_burstsizes */ + 1, /* dma_attr_minxfer */ + (uint64_t)0xFFFFFFFF, /* dma_attr_maxxfer */ + (uint64_t)0xFFFFFFFF, /* dma_attr_seg */ + 1, /* dma_attr_sgllen */ + 1, /* dma_attr_granular */ + 0, /* dma_attr_flags */ +}; + +/* used for tx buffers */ +static ddi_dma_attr_t vxn_dma_attrs_tx = { + DMA_ATTR_V0, /* dma_attr version */ + 0, /* dma_attr_addr_lo */ + (uint64_t)0xFFFFFFFF, /* dma_attr_addr_hi */ + 0x7FFFFFFF, /* dma_attr_count_max */ + 1, /* dma_attr_align */ + 0x3F, /* dma_attr_burstsizes */ + 1, /* dma_attr_minxfer */ + (uint64_t)0xFFFFFFFF, /* dma_attr_maxxfer */ + (uint64_t)0xFFFFFFFF, /* dma_attr_seg */ + 1, /* dma_attr_sgllen */ + 1, /* dma_attr_granular */ + 0, /* dma_attr_flags */ +}; + + +static struct ether_addr etherbroadcastaddr = { + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} +}; + +static struct ddi_device_acc_attr vxn_buf_attrs = { + DDI_DEVICE_ATTR_V0, + DDI_STRUCTURE_LE_ACC, + DDI_STRICTORDER_ACC +}; + +static struct ddi_device_acc_attr dev_attr = { + DDI_DEVICE_ATTR_V0, + DDI_STRUCTURE_LE_ACC, + DDI_STRICTORDER_ACC +}; + +static vxn_softc_t vxnList; /* for debugging */ +static kmutex_t vxnListLock; + +static void *Vxn_Memset(void *s, int c, size_t n); +static int Vxn_Reset(gld_mac_info_t *macInfo); +static int Vxn_SetPromiscuous(gld_mac_info_t *macInfo, int flag); +static int Vxn_GetStats(gld_mac_info_t *macInfo, struct gld_stats *gs); +static void Vxn_ApplyAddressFilter(vxn_softc_t *dp); +static int Vxn_SetMulticast(gld_mac_info_t *macinfo, uint8_t *ep, int flag); +static int Vxn_SetMacAddress(gld_mac_info_t *macInfo, uint8_t *mac); +static int Vxn_Start(gld_mac_info_t *macInfo); +static int Vxn_Stop(gld_mac_info_t *macInfo); +static void Vxn_FreeTxBuf(vxn_softc_t *dp, int idx); +static int Vxn_EncapTxBuf(vxn_softc_t *dp, mblk_t *mp, Vmxnet2_TxRingEntry *xre, + tx_ring_buf_t *txBuf); +static int Vxn_Send(gld_mac_info_t *macinfo, mblk_t *mp); +static boolean_t Vxn_TxComplete(vxn_softc_t *dp, boolean_t *reschedp); +static boolean_t Vxn_Receive(vxn_softc_t *dp); +static u_int Vxn_Interrupt(gld_mac_info_t *macInfo); +static void Vxn_ReclaimRxBuf(rx_dma_buf_t *rxDesc); +static void Vxn_FreeRxBuf(rx_dma_buf_t *rxDesc); +static rx_dma_buf_t *Vxn_AllocRxBuf(vxn_softc_t *dp, int cansleep); +static void Vxn_FreeInitBuffers(vxn_softc_t *dp); +static int Vxn_AllocInitBuffers(vxn_softc_t *dp); +static void Vxn_FreeDmaMem(dma_buf_t *dma); +static int Vxn_AllocDmaMem(vxn_softc_t *dp, int size, int cansleep, dma_buf_t *dma); +static void Vxn_FreeDriverData(vxn_softc_t *dp); +static int Vxn_AllocDriverData(vxn_softc_t *dp); +static int Vxn_Attach(dev_info_t *dip, ddi_attach_cmd_t cmd); +static int Vxn_Detach(dev_info_t *dip, ddi_detach_cmd_t cmd); +static int Vxn_AllocRxBufPool(vxn_softc_t *dp); +static void Vxn_FreeRxBufPool(vxn_softc_t *dp); +static rx_dma_buf_t * Vxn_AllocRxBufFromPool(vxn_softc_t *dp); +static void Vxn_FreeRxBufToPool(rx_dma_buf_t *rxDesc); + +/* + *----------------------------------------------------------------------------- + * Vxn_Memset -- + * memset() (Because bzero does not get resolved by module loader) + * + * Results: + * pointer to the memory area s + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void * +Vxn_Memset(void *s, int c, size_t n) +{ + while (n--) { + ((uint8_t *)s)[n] = c; + } + + return s; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Reset -- + * Stub routine to reset hardware. Presently does nothing. Start/Stop should + * take care of resets. + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_Reset(gld_mac_info_t *macInfo) +{ + return GLD_SUCCESS; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_SetPromiscuous -- + * Set/Reset NIC to/from promiscuous mode + * + * Results: + * GLD_SUCCESS + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_SetPromiscuous(gld_mac_info_t *macInfo, int flag) +{ + vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private; + Vmxnet2_DriverData *dd = dp->driverData; + + mutex_enter(&dp->intrlock); + if (flag == GLD_MAC_PROMISC_PHYS) { + dd->ifflags |= VMXNET_IFF_PROMISC; + } else if (flag == GLD_MAC_PROMISC_MULTI) { + /* + * This should really set VMXNET_IFF_ALLMULTI, + * but unfortunately it doesn't exist. The next + * best thing would be to set the LADRFs to all + * 0xFFs and set VMXNET_IFF_MULTICAST, but that + * opens up a whole new set of potential pitfalls, + * so this is a reasonable temporary solution. + */ + dd->ifflags |= VMXNET_IFF_PROMISC; + } else if (flag == GLD_MAC_PROMISC_NONE) { + dd->ifflags &= ~VMXNET_IFF_PROMISC; + } else { + /* This could be GLD_MAC_PROMISC_NOOP? */ + mutex_exit(&dp->intrlock); + cmn_err(CE_WARN, "%s%d: Vxn_SetPromiscuous: Unexpected mode flag: 0x%x", + dp->drvName, dp->unit, flag); + + return GLD_FAILURE; + } + + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF); + mutex_exit(&dp->intrlock); + + return GLD_SUCCESS; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_GetStats -- + * Get driver specific stats + * + * Results: + * GLD_SUCCESS + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_GetStats(gld_mac_info_t *macInfo, struct gld_stats *gs) +{ + vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private; + + gs->glds_errxmt = dp->stats.errxmt; + gs->glds_errrcv = dp->stats.errrcv; + gs->glds_short = dp->stats.runt; + gs->glds_norcvbuf = dp->stats.norcvbuf; + gs->glds_intr = dp->stats.interrupts; + gs->glds_defer = dp->stats.defer; + + return GLD_SUCCESS; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_ApplyAddressFilter -- + * Go over multicast list and compute/apply address filter + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_ApplyAddressFilter(vxn_softc_t *dp) +{ + uint8_t *ep; + int i, j, bit, byte; + uint32_t crc, poly = CRC_POLYNOMIAL_LE; + Vmxnet2_DriverData *dd = dp->driverData; + volatile uint16_t *mcastTable = (uint16_t *)dd->LADRF; + + ASSERT(MUTEX_HELD(&dp->intrlock)); + + /* clear the multicast filter */ + dd->LADRF[0] = 0; + dd->LADRF[1] = 0; + + for (i = 0; i < dp->multiCount; i++) { + crc = 0xffffffff; + ep = (uint8_t *)&dp->multicastList[i].ether_addr_octet; + + for (byte = 0; byte < 6; byte++) { + for (bit = *ep++, j = 0; j < 8; j++, bit >>= 1) { + int test; + + test = ((bit ^ crc) & 0x01); + crc >>= 1; + + if (test) { + crc = crc ^ poly; + } + } + } + + crc = crc >> 26; + mcastTable[crc >> 4] |= 1 << (crc & 0xf); + } +} + +/* + *----------------------------------------------------------------------------- + * Vxn_SetMulticast -- + * Add delete entry from multicast list + * + * Results: + * GLD_FAILURE on failure + * GLD_SUCCESS on success + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_SetMulticast(gld_mac_info_t *macinfo, uint8_t *ep, int flag) +{ + int i; + int copyLen; + vxn_softc_t *dp = (vxn_softc_t *)macinfo->gldm_private; + Vmxnet2_DriverData *dd = dp->driverData; + + if (flag == GLD_MULTI_ENABLE) { + /* + * Exceeded multicast address limit + */ + if (dp->multiCount >= GLD_MAX_MULTICAST) { + return GLD_FAILURE; + } + + /* + * Add mac address to multicast list + */ + bcopy(ep, dp->multicastList[dp->multiCount].ether_addr_octet, + ETHERADDRL); + dp->multiCount++; + } + else { + for (i=0; i<dp->multiCount; i++) { + if (bcmp(ep, dp->multicastList[i].ether_addr_octet, ETHERADDRL) == 0) { + goto found; + } + } + return GLD_FAILURE; + + found: + /* + * Delete mac address from multicast list + */ + copyLen = (dp->multiCount - (i+1)) * sizeof(struct ether_addr); + if (copyLen > 0) { + bcopy(&dp->multicastList[i+1], &dp->multicastList[i], copyLen); + } + dp->multiCount--; + } + + /* + * Compute address filter from list of addressed and apply it + */ + mutex_enter(&dp->intrlock); + Vxn_ApplyAddressFilter(dp); + + if (dp->multiCount) { + ASSERT(dd->LADRF[0] || dd->LADRF[1]); + dd->ifflags |= VMXNET_IFF_MULTICAST; + } else { + ASSERT(!(dd->LADRF[0] || dd->LADRF[1])); + dd->ifflags &= ~VMXNET_IFF_MULTICAST; + } + + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF); + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_LADRF); + mutex_exit(&dp->intrlock); + + return GLD_SUCCESS; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_SetMacAddress -- + * Change device MAC address + * + * Results: + * GLD_SUCCESS + * GLD_FAILURE + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_SetMacAddress(gld_mac_info_t *macInfo, uint8_t *mac) +{ + int i; + int err = GLD_SUCCESS; + vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private; + + mutex_enter(&dp->intrlock); + mutex_enter(&dp->xmitlock); + + /* + * Don't change MAC address on a running NIC + */ + if (dp->nicActive) { + err = GLD_FAILURE; + goto out; + } + + /* + * Save new MAC address + */ + for (i = 0; i < 6; i++) { + dp->devAddr.ether_addr_octet[i] = mac[i]; + } + + /* + * Push new MAC address down into hardware + */ + for (i = 0; i < 6; i++) { + OUTB(dp, VMXNET_MAC_ADDR + i, mac[i]); + } + +out: + mutex_exit(&dp->xmitlock); + mutex_exit(&dp->intrlock); + return err; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Start -- + * Device start routine. Called on "ifconfig plumb" + * + * Results: + * GLD_SUCCESS + * GLD_FAILURE + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_Start(gld_mac_info_t *macInfo) +{ + int err = GLD_SUCCESS; + uint32_t r, capabilities, features; + vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private; + + mutex_enter(&dp->intrlock); + mutex_enter(&dp->xmitlock); + + if (!dp->nicActive) { + /* + * Register ring structure with hardware + * + * This downcast is OK because we requested a 32-bit physical address + */ + OUTL(dp, VMXNET_INIT_ADDR, (uint32_t)(uintptr_t)dp->driverDataPhy); + OUTL(dp, VMXNET_INIT_LENGTH, dp->driverData->length); + + /* + * Make sure registeration succeded + */ + r = INL(dp, VMXNET_INIT_LENGTH); + if (!r) { + cmn_err(CE_WARN, "%s%d: Vxn_Start: failed to register ring", + dp->drvName, dp->unit); + err = GLD_FAILURE; + goto out; + } + + /* + * Get maximum tx fragments supported + */ + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_CAPABILITIES); + capabilities = INL(dp, VMXNET_COMMAND_ADDR); + + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_FEATURES); + features = INL(dp, VMXNET_COMMAND_ADDR); + + DPRINTF(3, (CE_CONT, "%s%d: chip capabilities=0x%x features=0x%x\n", + dp->drvName, dp->unit, capabilities, features)); + + if ((capabilities & VMNET_CAP_SG) && + (features & VMXNET_FEATURE_ZERO_COPY_TX)) { + dp->maxTxFrags = VMXNET2_SG_DEFAULT_LENGTH; + } else { + dp->maxTxFrags = 1; + } + ASSERT(dp->maxTxFrags >= 1); + + /* + * Alloc Tx DMA handle + */ + vxn_dma_attrs_tx.dma_attr_sgllen = dp->maxTxFrags; + if (ddi_dma_alloc_handle(dp->dip, &vxn_dma_attrs_tx, DDI_DMA_SLEEP, + NULL, &dp->txDmaHdl) != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_Start: failed to alloc tx dma handle", + dp->drvName, dp->unit); + err = GLD_FAILURE; + goto out; + } + + /* + * Enable interrupts on the card + */ + dp->driverData->ifflags |= VMXNET_IFF_BROADCAST | VMXNET_IFF_DIRECTED; + + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_ENABLE); + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF); + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_LADRF); + + dp->nicActive = TRUE; + } + +out: + mutex_exit(&dp->xmitlock); + mutex_exit(&dp->intrlock); + return err; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Stop -- + * Device stop routine. Called on "ifconfig unplumb" + * + * Results: + * GLD_SUCCESS + * GLD_FAILURE + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_Stop(gld_mac_info_t *macInfo) +{ + int i; + int err = GLD_SUCCESS; + vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private; + boolean_t resched; + + mutex_enter(&dp->intrlock); + mutex_enter(&dp->xmitlock); + + if (!dp->nicActive) { + goto out; + } + + /* + * Disable interrupts + */ + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_DISABLE); + + /* + * Wait for pending transmits + */ + if (dp->txPending) { + for (i=0; i < MAX_TX_WAIT_ON_STOP && dp->txPending; i++) { + delay(drv_usectohz(1000)); + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_CHECK_TX_DONE); + (void) Vxn_TxComplete(dp, &resched); + /* + * Don't worry about rescheduling transmits - GLD handles + * this automatically. + */ + } + } + if (dp->txPending) { + cmn_err(CE_WARN, "%s%d: Vxn_Stop: giving up on %d pending transmits", + dp->drvName, dp->unit, dp->txPending); + } + + OUTL(dp, VMXNET_INIT_ADDR, 0); + dp->nicActive = FALSE; + + /* + * Free Tx DMA handle + * + * The ddi_dma_free_handle() man page says that ddi_dma_unbind_handle() must be called + * prior to calling ddi_dma_free_handle(). + * However, call to ddi_dma_unbind_handle() is not required here, because + * ddi_dma_addr_bind_handle() and matching ddi_dma_unbind_handle() are called from + * Vxn_EncapTxBuf(). + * xmitlock is held in Vxn_EncapTxBuf() as well as acquired above in Vxn_Stop(). + */ + ddi_dma_free_handle(&dp->txDmaHdl); + dp->txDmaHdl = NULL; + +out: + mutex_exit(&dp->xmitlock); + mutex_exit(&dp->intrlock); + return err; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeTxBuf -- + * Free transmit buffer + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeTxBuf(vxn_softc_t *dp, int idx) +{ + mblk_t **txMblkp = &TX_RINGBUF_MBLK(dp, idx); + dma_buf_t *dmaMem = &TX_RINGBUF_DMAMEM(dp, idx); + + if (*txMblkp) { + freemsg(*txMblkp); + *txMblkp = NULL; + } + + if (dmaMem->buf) { + Vxn_FreeDmaMem(dmaMem); + ASSERT(dmaMem->buf == NULL); + } +} + +/* + *----------------------------------------------------------------------------- + * Vxn_EncapTxBuf -- + * Go over dma mappings of Tx buffers and drop buffer physical address + * into ring entry + * + * Results: + * SOLVMXNET_SUCCESS on success + * SOLVMXNET_FAILURE on failure + * + * Side effects: + * None + *---------------- ------------------------------------------------------------- + */ +static int +Vxn_EncapTxBuf(vxn_softc_t *dp, + mblk_t *mp, + Vmxnet2_TxRingEntry *xre, + tx_ring_buf_t *txBuf) +{ + int frag; + int fragcount; + int rval; + mblk_t *tp; + mblk_t *mblk; + boolean_t needPullup = FALSE; + boolean_t dmaMemAlloced = FALSE; + + ASSERT(txBuf); + ASSERT(txBuf->mblk == NULL); + ASSERT(MUTEX_HELD(&dp->xmitlock)); + + xre->sg.length = 0; + xre->flags = 0; + + fragcount = 0; + for (tp = mp; tp != NULL; tp = tp->b_cont) { + fragcount++; + } + if (fragcount > dp->maxTxFrags) { + needPullup = TRUE; + } + +pullup: + frag = 0; + if (needPullup) { + if (!(mblk = msgpullup(mp, -1))) { + cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: msgpullup failed", + dp->drvName, dp->unit); + goto err; + } + } else { + mblk = mp; + } + + /* + * Go through message chain and drop packet pointers into ring + * scatter/gather array + */ + for (tp = mblk; tp != NULL; tp = tp->b_cont) { + + uint_t nCookies; + ddi_dma_cookie_t dmaCookie; + int len = tp->b_wptr - tp->b_rptr; + + if (len) { + /* + * Associate tx buffer with dma handle + */ + ASSERT(dp->txDmaHdl); + if ((rval = ddi_dma_addr_bind_handle(dp->txDmaHdl, NULL, (caddr_t)tp->b_rptr, + len, DDI_DMA_RDWR | DDI_DMA_STREAMING, + DDI_DMA_DONTWAIT, NULL, + &dmaCookie, &nCookies)) + != DDI_DMA_MAPPED) { + + /* + * Try to handle bind failure caused by a page boundary spill + * by allocating a private dma buffer and copying data into it + */ + if ((rval == DDI_DMA_TOOBIG) && !dmaMemAlloced ) { + /* + * Force pullup + */ + if (!needPullup && (dp->maxTxFrags > 1)) { + needPullup = TRUE; + goto pullup; + } + + if (Vxn_AllocDmaMem(dp, len, FALSE, &txBuf->dmaMem) + != SOLVMXNET_SUCCESS) { + goto err; + } + + dmaMemAlloced = TRUE; + + /* + * Copy data into DMA capable buffer + */ + bcopy(tp->b_rptr, txBuf->dmaMem.buf, len); + + /* + * Stick buffer physical addr in the ring + */ + xre->sg.sg[frag].addrLow = txBuf->dmaMem.phyBuf; + xre->sg.sg[frag].length = len; + frag++; + + continue; + + } else { + cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: failed (%d) to bind dma " + "handle for len %d. [dmaMemAlloced=%d]", + dp->drvName, dp->unit, rval, len, dmaMemAlloced); + goto err; + } + } + + /* + * Extract tx buffer physical addresses from cookie + */ + while (nCookies) { + if (UNLIKELY(frag == dp->maxTxFrags)) { + (void)ddi_dma_unbind_handle(dp->txDmaHdl); + + if (!needPullup) { + ASSERT(!dmaMemAlloced); + needPullup = TRUE; + goto pullup; + } else { + cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: " + "exceeded max (%d) fragments in message", + dp->drvName, dp->unit, dp->maxTxFrags); + goto err; + } + } + + /* + * Stick it in the ring + */ + xre->sg.sg[frag].addrLow = dmaCookie.dmac_address; + xre->sg.sg[frag].length = dmaCookie.dmac_size; + frag++; + + if (--nCookies) { + ddi_dma_nextcookie(dp->txDmaHdl, &dmaCookie); + } + } + + (void)ddi_dma_unbind_handle(dp->txDmaHdl); + } + } + + if (frag > 0) { + xre->sg.length = frag; + + /* Give ownership to NIC */ + xre->sg.addrType = NET_SG_PHYS_ADDR; + xre->ownership = VMXNET2_OWNERSHIP_NIC; + xre->flags |= VMXNET2_TX_CAN_KEEP; + txBuf->mblk = mblk; + + /* + * If we called msgpullup to concatenate fragments, free + * original mblk now since we're going to return success. + */ + if (mblk != mp) { + freemsg(mp); + } + + return SOLVMXNET_SUCCESS; + } + +err: + if (mblk != NULL && mblk != mp) { + /* + * Free mblk allocated by msgpullup. + */ + freemsg(mblk); + } + + if (dmaMemAlloced) { + ASSERT(txBuf->dmaMem.buf); + Vxn_FreeDmaMem(&txBuf->dmaMem); + } + + return SOLVMXNET_FAILURE; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Send -- + * GLD Transmit routine. Starts packet hard tx. + * + * Results: + * GLD_SUCCESS on success + * GLD_FAILURE on failure + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_Send(gld_mac_info_t *macinfo, mblk_t *mp) +{ + Vmxnet2_TxRingEntry *xre; + int err = GLD_SUCCESS; + vxn_softc_t *dp = (vxn_softc_t *)macinfo->gldm_private; + Vmxnet2_DriverData *dd = dp->driverData; + boolean_t resched = FALSE; + + mutex_enter(&dp->xmitlock); + + /* + * Check if ring entry at drop pointer is available + */ + if (TX_RINGBUF_MBLK(dp, dd->txDriverNext) != NULL) { + DPRINTF(3, (CE_NOTE, "%s%d: Vxn_Send: tx ring full", + dp->drvName, dp->unit)); + err = GLD_NORESOURCES; + dd->txStopped = TRUE; + dp->stats.defer++; + goto out; + } + + xre = &dp->txRing[dd->txDriverNext]; + + /* + * Drop packet into ring entry + */ + if (Vxn_EncapTxBuf(dp, mp, xre, &dp->txRingBuf[dd->txDriverNext]) + != SOLVMXNET_SUCCESS) { + err = GLD_FAILURE; + dp->stats.errxmt++; + goto out; + } + + /* + * Increment drop pointer + */ + VMXNET_INC(dd->txDriverNext, dd->txRingLength); + dd->txNumDeferred++; + dp->txPending++; + + /* + * Transmit, if number of pending packets > tx cluster length + */ + if (dd->txNumDeferred >= dd->txClusterLength) { + dd->txNumDeferred = 0; + + /* + * Call hardware transmit + */ + INL(dp, VMXNET_TX_ADDR); + } + + /* + * Clean up transmit ring. TX completion interrupts are not guaranteed + */ + (void) Vxn_TxComplete(dp, &resched); + +out: + mutex_exit(&dp->xmitlock); + if (resched) { + /* Tell GLD to retry any deferred packets */ + gld_sched(dp->macInfo); + } + return err; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_TxComplete -- + * Scan Tx ring for completed transmits. Reclaim Tx buffers. + * + * Results: + * Returns TRUE if it found a completed transmit, FALSE otherwise. + * Also sets *reschedp to TRUE if the caller should call gld_sched + * to reschedule transmits (once all locks are dropped). + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static boolean_t +Vxn_TxComplete(vxn_softc_t *dp, boolean_t *reschedp) +{ + Vmxnet2_DriverData *dd = dp->driverData; + boolean_t found = FALSE; + boolean_t needresched = FALSE; + + ASSERT(MUTEX_HELD(&dp->xmitlock)); + + while (1) { + Vmxnet2_TxRingEntry *xre = &dp->txRing[dd->txDriverCur]; + + if (xre->ownership != VMXNET2_OWNERSHIP_DRIVER || + (TX_RINGBUF_MBLK(dp, dd->txDriverCur) == NULL)) { + break; + } + + found = TRUE; + Vxn_FreeTxBuf(dp, dd->txDriverCur); + + dp->txPending--; + VMXNET_INC(dd->txDriverCur, dd->txRingLength); + if (dd->txStopped) { + needresched = TRUE; + dd->txStopped = FALSE; + } + } + + *reschedp = needresched; + return found; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Receive -- + * Rx handler. First assembles the packets into a chain of mblks, + * then drops locks and passes them up the stack to GLD. + * + * Results: + * Returns TRUE if it find a packet ready for processing, FALSE + * otherwise. + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static boolean_t +Vxn_Receive(vxn_softc_t *dp) +{ + int ringnext; + short pktlen; + Vmxnet2_DriverData *dd = dp->driverData; + rx_dma_buf_t *rxDesc; + rx_dma_buf_t *newRxDesc; + mblk_t *mblk; + mblk_t *head = NULL; + mblk_t **tail = &head; + mblk_t *next; + boolean_t found = FALSE; /* Did we find at least one packet? */ + + ASSERT(MUTEX_HELD(&dp->intrlock)); + + /* + * Walk receive ring looking for entries with ownership + * reverted back to driver + */ + while (1) { + Vmxnet2_RxRingEntry *rre; + rx_dma_buf_t **rbuf; + + ringnext = dd->rxDriverNext; + rre = &dp->rxRing[ringnext]; + rbuf = &dp->rxRingBuffPtr[ringnext]; + + if (rre->ownership != VMXNET2_OWNERSHIP_DRIVER) { + break; + } + + found = TRUE; + + pktlen = rre->actualLength; + + if (pktlen < (60 - 4)) { + /* + * Ethernet header vlan tags are 4 bytes. Some vendors generate + * 60byte frames including vlan tags. When vlan tag + * is stripped, such frames become 60 - 4. (PR106153) + */ + dp->stats.errrcv++; + if (pktlen != 0) { + DPRINTF(3, (CE_CONT, "%s%d: runt packet\n", dp->drvName, dp->unit)); + dp->stats.runt++; + } + } else { + /* + * Alloc new Rx buffer to replace current one + */ + newRxDesc = Vxn_AllocRxBufFromPool(dp); + + if (newRxDesc) { + rxDesc = *rbuf; + mblk = rxDesc->mblk; + + *rbuf = newRxDesc; + rre->paddr = newRxDesc->dmaDesc.phyBuf + ETHERALIGN; + rre->bufferLength = MAXPKTBUF - ETHERALIGN; + rre->actualLength = 0; + + /* + * Advance write pointer past packet length + */ + mblk->b_wptr = mblk->b_rptr + pktlen; + + /* + * Add to end of chain. + */ + mblk->b_next = NULL; + *tail = mblk; + tail = &mblk->b_next; + } else { + dp->stats.errrcv++; + dp->stats.norcvbuf++; + } + } + + /* Give the descriptor back to NIC */ + rre->ownership = VMXNET2_OWNERSHIP_NIC; + VMXNET_INC(dd->rxDriverNext, dd->rxRingLength); + } + + /* + * Walk chain and pass mblks up to gld_recv one by one. + */ + mutex_exit(&dp->intrlock); + for (mblk = head; mblk != NULL; mblk = next) { + next = mblk->b_next; + mblk->b_next = NULL; + gld_recv(dp->macInfo, mblk); + } + mutex_enter(&dp->intrlock); + + return (found); +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Interrupt -- + * GLD interrupt handler. Scan: Rx ring for received packets, Tx ring for + * completed transmits + * + * Results: + * - DDI_INTR_CLAIMED (if we found something to do) + * - DDI_INTR_UNCLAIMED (if not) + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static u_int +Vxn_Interrupt(gld_mac_info_t *macInfo) +{ + u_int ret = DDI_INTR_UNCLAIMED; + vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private; + boolean_t foundRx, foundTx; + boolean_t resched = FALSE; + + mutex_enter(&dp->intrlock); + dp->inIntr = TRUE; + + if (!dp->nicActive) { + goto out; + } + + /* + * Ack interrupt + */ + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_ACK); + + foundRx = Vxn_Receive(dp); + + mutex_enter(&dp->xmitlock); + foundTx = Vxn_TxComplete(dp, &resched); + mutex_exit(&dp->xmitlock); + + if (foundRx || foundTx) { + ret = DDI_INTR_CLAIMED; + dp->stats.interrupts++; + } + +out: + dp->inIntr = FALSE; + mutex_exit(&dp->intrlock); + + if (resched) { + gld_sched(dp->macInfo); + } + + return ret; +} + + +/* + *----------------------------------------------------------------------------- + * Vxn_ReclaimRxBuf -- + * Callback handler invoked by freemsg(). Frees Rx buffer memory and mappings + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_ReclaimRxBuf(rx_dma_buf_t *rxDesc) +{ + Vxn_FreeRxBufToPool(rxDesc); +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeRxBuf -- + * Free allocated Rx buffer + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeRxBuf(rx_dma_buf_t *rxDesc) +{ + ASSERT(rxDesc); + + if (rxDesc->mblk) { + freemsg(rxDesc->mblk); + } else { + Vxn_FreeDmaMem(&rxDesc->dmaDesc); + kmem_free(rxDesc, sizeof(rx_dma_buf_t)); + } +} + + +/* + *----------------------------------------------------------------------------- + * Vxn_AllocRxBuf -- + * Allocate Rx buffer + * + * Results: + * Pointer to Rx buffer descriptor - on success + * NULL - on failure + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static rx_dma_buf_t * +Vxn_AllocRxBuf(vxn_softc_t *dp, int cansleep) +{ + rx_dma_buf_t *rxDesc; + + rxDesc = (rx_dma_buf_t *)kmem_zalloc(sizeof(rx_dma_buf_t), + cansleep ? KM_SLEEP : KM_NOSLEEP); + if (!rxDesc) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBuf: kmem_zalloc failed", + dp->drvName, dp->unit); + return NULL; + } + + rxDesc->softc = dp; + + /* + * Alloc dma-able packet memory + */ + if (Vxn_AllocDmaMem(dp, MAXPKTBUF, cansleep, &rxDesc->dmaDesc) + != SOLVMXNET_SUCCESS) { + kmem_free(rxDesc, sizeof(rx_dma_buf_t)); + return NULL; + } + + /* + * Fill in free callback; fired by freemsg() + */ + rxDesc->freeCB.free_func = &Vxn_ReclaimRxBuf; + rxDesc->freeCB.free_arg = (caddr_t) rxDesc; + + rxDesc->mblk = NULL; + return rxDesc; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeInitBuffers -- + * Free allocated Tx and Rx buffers + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeInitBuffers(vxn_softc_t *dp) +{ + int i; + + for (i=0; i<dp->vxnNumRxBufs; i++) { + if (dp->rxRingBuffPtr[i]) { + Vxn_FreeRxBuf(dp->rxRingBuffPtr[i]); + dp->rxRingBuffPtr[i] = NULL; + } + } + + for (i=0; i<dp->vxnNumTxBufs; i++) { + if (TX_RINGBUF_MBLK(dp, i)) { + Vxn_FreeTxBuf(dp, i); + } + } + + /* + * Rx pool must get freed last. Rx buffers above will + * show up on the pool when freemsg callback fires. + */ + Vxn_FreeRxBufPool(dp); +} + + +/* + *----------------------------------------------------------------------------- + * Vxn_AllocRxBufPool -- + * Allocate pool of rx buffers - 3 * configured Rx buffers + * + * Results: + * SOLVMXNET_SUCCESS/SOLVMXNET_FAILURE + * + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_AllocRxBufPool(vxn_softc_t *dp) +{ + int i; + + dp->rxFreeBufList = NULL; + + // Allow list to double in size if needed. Any additional buffers + // that are allocated on the fly will be freed back to main memory. + dp->rxMaxFreeBufs = dp->vxnNumRxBufs * 6; + + for (i = 0; i < dp->vxnNumRxBufs * 3; i++) { + rx_dma_buf_t *rxDesc; + + /* + * Alloc rx buffer + */ + if (!(rxDesc = Vxn_AllocRxBuf(dp, TRUE))) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBufPool: failed to allocate memory", + dp->drvName, dp->unit); + dp->rxNumFreeBufs = i; + return SOLVMXNET_FAILURE; + } + /* + * Add to free list + */ + rxDesc->next = dp->rxFreeBufList; + dp->rxFreeBufList = rxDesc; + } + + dp->rxNumFreeBufs = i; + return SOLVMXNET_SUCCESS; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeRxBufPool -- + * Free rx buffers pool + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeRxBufPool(vxn_softc_t *dp) +{ + while (dp->rxFreeBufList) { + rx_dma_buf_t *rxDesc = dp->rxFreeBufList; + + /* unlink */ + dp->rxFreeBufList = rxDesc->next; + + ASSERT(rxDesc->mblk == NULL); + Vxn_FreeDmaMem(&rxDesc->dmaDesc); + kmem_free(rxDesc, sizeof(rx_dma_buf_t)); + } + dp->rxNumFreeBufs = 0; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_AllocRxBufFromPool -- + * Allocate Rx buffer from free pool + * + * Results: + * Pointer to Rx buffer descriptor - on success + * NULL - on failure + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static rx_dma_buf_t * +Vxn_AllocRxBufFromPool(vxn_softc_t *dp) +{ + rx_dma_buf_t *rxDesc = NULL; + + mutex_enter(&dp->rxlistlock); + if (dp->rxFreeBufList) { + rxDesc = dp->rxFreeBufList; + dp->rxFreeBufList = rxDesc->next; + ASSERT(dp->rxNumFreeBufs >= 1); + dp->rxNumFreeBufs--; + } + mutex_exit(&dp->rxlistlock); + + if (!rxDesc) { + /* + * Try to allocate new descriptor from memory. Can't block here + * since we could be being called from interrupt context. + */ + DPRINTF(5, (CE_NOTE, "%s%d: allocating rx buf from memory", + dp->drvName, dp->unit)); + if (!(rxDesc = Vxn_AllocRxBuf(dp, FALSE))) { + cmn_err(CE_WARN, + "%s%d: Vxn_AllocRxBufFromPool : pool rx alloc failed", + dp->drvName, dp->unit); + return NULL; + } + } + + /* + * Allocate new message block for this buffer + */ + rxDesc->mblk = desballoc((uchar_t *)rxDesc->dmaDesc.buf + ETHERALIGN, + rxDesc->dmaDesc.bufLen - ETHERALIGN, + BPRI_MED, &rxDesc->freeCB); + if (!rxDesc->mblk) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBufFromPool : desballoc failed", + dp->drvName, dp->unit); + + /* put back on free list */ + Vxn_FreeRxBufToPool(rxDesc); + return NULL; + } + + return rxDesc; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeRxBufToPool -- + * Return rx buffer to free pool + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeRxBufToPool(rx_dma_buf_t *rxDesc) +{ + vxn_softc_t *dp = rxDesc->softc; + + rxDesc->mblk = NULL; + + /* + * Insert on free list, or free if the list is full + */ + mutex_enter(&dp->rxlistlock); + if (dp->rxNumFreeBufs >= dp->rxMaxFreeBufs) { + DPRINTF(5, (CE_NOTE, "%s%d: freeing rx buf to memory", + dp->drvName, dp->unit)); + Vxn_FreeRxBuf(rxDesc); + } else { + rxDesc->next = dp->rxFreeBufList; + dp->rxFreeBufList = rxDesc; + dp->rxNumFreeBufs++; + } + mutex_exit(&dp->rxlistlock); +} + +/* + *----------------------------------------------------------------------------- + * Vxn_AllocInitBuffers -- + * Allocated Rx buffers and init ring entries + * + * Results: + * SOLVMXNET_SUCCESS - on success + * SOLVMXNET_FAILURE - on failure + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_AllocInitBuffers(vxn_softc_t *dp) +{ + Vmxnet2_DriverData *dd; + uint32_t i, offset; + + dd = dp->driverData; + offset = sizeof(*dd); + + /* + * Init shared structures + */ + dd->rxRingLength = dp->vxnNumRxBufs; + dd->rxRingOffset = offset; + dp->rxRing = (Vmxnet2_RxRingEntry *)((uintptr_t)dd + offset); + offset += dp->vxnNumRxBufs * sizeof(Vmxnet2_RxRingEntry); + + dd->rxRingLength2 = 1; + dd->rxRingOffset2 = offset; + offset += sizeof(Vmxnet2_RxRingEntry); + + dd->txRingLength = dp->vxnNumTxBufs; + dd->txRingOffset = offset; + dp->txRing = (Vmxnet2_TxRingEntry *)((uintptr_t)dd + offset); + offset += dp->vxnNumTxBufs * sizeof(Vmxnet2_TxRingEntry); + + /* + * Alloc Rx buffers pool + */ + if ( Vxn_AllocRxBufPool(dp) != SOLVMXNET_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocInitBuffers: failed to alloc buf pool", + dp->drvName, dp->unit); + return SOLVMXNET_FAILURE; + } + + /* + * Allocate receive buffers + */ + for (i = 0; i < dp->vxnNumRxBufs; i++) { + rx_dma_buf_t *rxDesc; + Vmxnet2_RxRingEntry *rre = &dp->rxRing[i]; + + if (!(rxDesc = Vxn_AllocRxBufFromPool(dp))) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocInitBuffers: " + "failed to alloc buf from pool", dp->drvName, dp->unit); + goto err; + } + + /* + * Init ring entries + */ + rre->paddr = rxDesc->dmaDesc.phyBuf + ETHERALIGN; + rre->bufferLength = MAXPKTBUF - ETHERALIGN; + rre->actualLength = 0; + dp->rxRingBuffPtr[i] = rxDesc; + rre->ownership = VMXNET2_OWNERSHIP_NIC; + } + + dp->txDmaHdl = NULL; + + /* + * Dummy recvRing2 tacked on to the end, with a single unusable entry + */ + dp->rxRing[i].paddr = 0; + dp->rxRing[i].bufferLength = 0; + dp->rxRing[i].actualLength = 0; + dp->rxRingBuffPtr[i] = NULL; + dp->rxRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER; + + dd->rxDriverNext = 0; + + /* + * Give xmit ring ownership to DRIVER + */ + for (i = 0; i < dp->vxnNumTxBufs; i++) { + dp->txRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER; + dp->txRingBuf[i].mblk = NULL; + dp->txRingBuf[i].dmaMem.buf = NULL; + dp->txRing[i].sg.sg[0].addrHi = 0; + } + + dd->txDriverCur = dd->txDriverNext = 0; + dd->txStopped = FALSE; + + return SOLVMXNET_SUCCESS; + +err: + for (i=0; i<dp->vxnNumRxBufs; i++) { + if (dp->rxRingBuffPtr[i]) { + Vxn_FreeRxBuf(dp->rxRingBuffPtr[i]); + dp->rxRingBuffPtr[i] = NULL; + } + } + + Vxn_FreeRxBufPool(dp); + return SOLVMXNET_FAILURE; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeDmaMem -- + * Free allocated dma memory + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeDmaMem(dma_buf_t *dma) +{ + ddi_dma_unbind_handle(dma->dmaHdl); + ddi_dma_mem_free(&dma->dataAccHdl); + ddi_dma_free_handle(&dma->dmaHdl); + + dma->buf = NULL; + dma->phyBuf = NULL; + dma->bufLen = 0; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_AllocDmaMem -- + * Allocate dma-able memory and fill passed in dma descriptor pointer + * if successful + * + * Results: + * SOLVMXNET_SUCCESS on success + * SOLVMXNET_FAILURE on failure + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_AllocDmaMem(vxn_softc_t *dp, int size, int cansleep, dma_buf_t *dma) +{ + /* + * Allocate handle + */ + if (ddi_dma_alloc_handle(dp->dip, &vxn_dma_attrs, + cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, + NULL, &dma->dmaHdl) != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: failed to allocate handle", + dp->drvName, dp->unit); + return SOLVMXNET_FAILURE; + } + + /* + * Allocate memory + */ + if (ddi_dma_mem_alloc(dma->dmaHdl, size, &vxn_buf_attrs, DDI_DMA_CONSISTENT, + cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, NULL, + &dma->buf, &dma->bufLen, &dma->dataAccHdl) + != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: " + "ddi_dma_mem_alloc %d bytes failed", + dp->drvName, dp->unit, size); + ddi_dma_free_handle(&dma->dmaHdl); + return SOLVMXNET_FAILURE; + } + + /* + * Mapin memory + */ + if (ddi_dma_addr_bind_handle(dma->dmaHdl, NULL, dma->buf, dma->bufLen, + DDI_DMA_RDWR | DDI_DMA_STREAMING, + cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, + NULL, &dma->cookie, &dma->cookieCount) + != DDI_DMA_MAPPED) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: failed to bind handle", + dp->drvName, dp->unit); + ddi_dma_mem_free(&dma->dataAccHdl); + ddi_dma_free_handle(&dma->dmaHdl); + return SOLVMXNET_FAILURE; + } + + if (dma->cookieCount != 1) { + cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: too many DMA cookies", + dp->drvName, dp->unit); + Vxn_FreeDmaMem(dma); + return SOLVMXNET_FAILURE; + } + + /* + * Save physical address (for easy use) + */ + dma->phyBuf = dma->cookie.dmac_address; + + return SOLVMXNET_SUCCESS; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_FreeDriverData -- + * Free driver data structures and Tx Rx buffers + * + * Results: + * None + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static void +Vxn_FreeDriverData(vxn_softc_t *dp) +{ + Vxn_FreeInitBuffers(dp); + Vxn_FreeDmaMem(&dp->driverDataDmaMem); +} + +/* + *----------------------------------------------------------------------------- + * Vxn_AllocDriverData -- + * Allocate driver data structures and Tx Rx buffers on init + * + * Results: + * SOLVMXNET_SUCCESS on success + * SOLVMXNET_FAILURE on failure + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_AllocDriverData(vxn_softc_t *dp) +{ + uint32_t r, driverDataSize; + + /* + * Get configured receive buffers + */ + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_NUM_RX_BUFFERS); + r = INL(dp, VMXNET_COMMAND_ADDR); + if (r == 0 || r > MAX_NUM_RECV_BUFFERS) { + r = DEFAULT_NUM_RECV_BUFFERS; + } + dp->vxnNumRxBufs = r; + + /* + * Get configured transmit buffers + */ + OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_NUM_TX_BUFFERS); + r = INL(dp, VMXNET_COMMAND_ADDR); + if (r == 0 || r > MAX_NUM_XMIT_BUFFERS) { + r = DEFAULT_NUM_XMIT_BUFFERS; + } + dp->vxnNumTxBufs = r; + + /* + * Calculate shared data size and allocate memory for it + */ + driverDataSize = + sizeof(Vmxnet2_DriverData) + + /* numRecvBuffers + 1 for the dummy recvRing2 (used only by Windows) */ + (dp->vxnNumRxBufs + 1) * sizeof(Vmxnet2_RxRingEntry) + + dp->vxnNumTxBufs * sizeof(Vmxnet2_TxRingEntry); + + if (Vxn_AllocDmaMem(dp, driverDataSize, TRUE, &dp->driverDataDmaMem) + != SOLVMXNET_SUCCESS) { + return SOLVMXNET_FAILURE; + } + + /* + * Clear memory (bzero isn't resolved by module loader for some reason) + */ + ASSERT(dp->driverDataDmaMem.buf && dp->driverDataDmaMem.bufLen); + Vxn_Memset(dp->driverDataDmaMem.buf, 0, dp->driverDataDmaMem.bufLen); + + dp->driverData = (Vmxnet2_DriverData *)dp->driverDataDmaMem.buf; + dp->driverDataPhy = (void *)(uintptr_t)dp->driverDataDmaMem.phyBuf; + + /* So that the vmkernel can check it is compatible */ + dp->driverData->magic = VMXNET2_MAGIC; + dp->driverData->length = driverDataSize; + + /* + * Alloc rx/tx buffers, init ring, register with hardware etc. + */ + if (Vxn_AllocInitBuffers(dp) != SOLVMXNET_SUCCESS) { + Vxn_FreeDmaMem(&dp->driverDataDmaMem); + return SOLVMXNET_FAILURE; + } + + DPRINTF(3, (CE_CONT, "%s%d: numRxBufs=(%d*%"FMT64"d) numTxBufs=(%d*%"FMT64"d)" + " driverDataSize=%d driverDataPhy=0x%p\n", + dp->drvName, dp->unit, + dp->vxnNumRxBufs, (uint64_t)sizeof(Vmxnet2_RxRingEntry), + dp->vxnNumTxBufs, (uint64_t)sizeof(Vmxnet2_TxRingEntry), + driverDataSize, dp->driverDataPhy)); + + return SOLVMXNET_SUCCESS; +} + + +/* + *----------------------------------------------------------------------------- + * Vxn_Attach -- + * Probe and attach driver to stack + * + * Results: + * DDI_SUCCESS + * DDI_FAILURE + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_Attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int i, ret, len, unit; + const char *drvName; + ddi_acc_handle_t confHdl; + uint16_t vid, did; + uint8_t revid __unused; + struct pci_phys_spec *regs; + caddr_t vxnIOp; + ddi_acc_handle_t vxnIOHdl; + uint32_t vLow, vHigh; + gld_mac_info_t *macInfo; + vxn_softc_t *dp; + boolean_t morphed = FALSE; + uint_t regSpaceSize; + uint_t chip; + uint_t vxnIOSize; + + if (cmd != DDI_ATTACH) { + return DDI_FAILURE; + } + + unit = ddi_get_instance(dip); + drvName = ddi_driver_name(dip); + + /* + * Check if chip is supported. + */ + if (pci_config_setup(dip, &confHdl) != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: pci_config_setup() failed", drvName, unit); + return DDI_FAILURE; + } + + vid = pci_config_get16(confHdl, PCI_CONF_VENID); + did = pci_config_get16(confHdl, PCI_CONF_DEVID); + revid = pci_config_get8(confHdl, PCI_CONF_REVID); + + if (vid == PCI_VENDOR_ID_VMWARE && did == PCI_DEVICE_ID_VMWARE_NET) { + /* Found vmxnet */ + chip = VMXNET_CHIP; + } + else if (vid == PCI_VENDOR_ID_AMD && did == PCI_DEVICE_ID_AMD_VLANCE) { + /* Found vlance (maybe a vmxnet disguise) */ + chip = LANCE_CHIP; + } + else { + /* Not Found */ + DPRINTF(3, (CE_WARN, "%s: Vxn_Attach: wrong PCI venid/devid (0x%x, 0x%x)", + drvName, vid, did)); + goto err; + } + + DPRINTF(3, (CE_CONT, "%s%d: (vid: 0x%04x, did: 0x%04x, revid: 0x%02x)\n", + drvName, unit, vid, did, revid)); + + /* + * Get device properties + */ + regs = NULL; + len = 0; + if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, + "reg", (caddr_t)®s, &len) != DDI_PROP_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: failed to get reg property", + drvName, unit); + goto err; + } + + ASSERT(regs != NULL && len > 0); + + /* + * Search device properties for IO-space + */ + for (i = 0; i <len / sizeof(struct pci_phys_spec); i++) { + if ((regs[i].pci_phys_hi & PCI_REG_ADDR_M) == PCI_ADDR_IO) { + regSpaceSize = regs[i].pci_size_low; + DPRINTF(5, (CE_CONT, "%s%d: Vxn_Attach: regSpaceSize=%d\n", + drvName, unit, regSpaceSize)); + kmem_free(regs, len); + goto map_space_found; + } + } + + cmn_err(CE_WARN, "%s%d: Vxn_Attach: failed to find IO space", drvName, unit); + kmem_free(regs, len); + goto err; + +map_space_found: + + /* + * Ensure we can access registers through IO space. + */ + ret = pci_config_get16(confHdl, PCI_CONF_COMM); + ret |= PCI_COMM_IO | PCI_COMM_ME; + pci_config_put16(confHdl, PCI_CONF_COMM, ret); + + if (ddi_regs_map_setup(dip, i, (caddr_t *)&vxnIOp, 0, 0, &dev_attr, + &vxnIOHdl) != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: ddi_regs_map_setup failed", + drvName, unit); + goto err; + } + + if (chip == VMXNET_CHIP) { + vxnIOSize = VMXNET_CHIP_IO_RESV_SIZE; + } + else { + /* + * Since this is a vlance adapter we can only use it if + * its I/0 space is big enough for the adapter to be + * capable of morphing. This is the first requirement + * for this adapter to potentially be morphable. The + * layout of a morphable LANCE adapter is + * + * I/O space: + * + * |------------------| + * | LANCE IO PORTS | + * |------------------| + * | MORPH PORT | + * |------------------| + * | VMXNET IO PORTS | + * |------------------| + * + * VLance has 8 ports of size 4 bytes, the morph port is 4 bytes, and + * Vmxnet has 10 ports of size 4 bytes. + * + * We shift up the ioaddr with the size of the LANCE I/O space since + * we want to access the vmxnet ports. We also shift the ioaddr up by + * the MORPH_PORT_SIZE so other port access can be independent of + * whether we are Vmxnet or a morphed VLance. This means that when + * we want to access the MORPH port we need to subtract the size + * from ioaddr to get to it. + */ + vxnIOp += LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE; + vxnIOSize = LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE + + VMXNET_CHIP_IO_RESV_SIZE; + } + + /* + * Do not attempt to morph non-morphable AMD PCnet + */ + if (vxnIOSize > regSpaceSize) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: " + "vlance device is not supported by this driver", drvName, unit); + goto err_free_regs_map; + } + + /* + * Morph, if we found a vlance adapter + */ + if (chip == LANCE_CHIP) { + uint16_t magic; + + /* Read morph port to verify that we can morph the adapter */ + magic = ddi_get16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE)); + if (magic != LANCE_CHIP && magic != VMXNET_CHIP) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: Invalid magic, read: 0x%08X", + drvName, unit, magic); + goto err_free_regs_map; + } + + /* Morph */ + ddi_put16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE), VMXNET_CHIP); + morphed = TRUE; + + /* Verify that we morphed correctly */ + magic = ddi_get16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE)); + if (magic != VMXNET_CHIP) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: Couldn't morph adapter." + " Invalid magic, read:: 0x%08X", drvName, unit, magic); + goto err_morph_back; + } + } + + /* + * Check the version number of the device implementation + */ + vLow = (uint32_t)ddi_get32(vxnIOHdl, + (uint32_t *)(vxnIOp+VMXNET_LOW_VERSION)); + vHigh = (uint32_t)ddi_get32(vxnIOHdl, + (uint32_t *)(vxnIOp+VMXNET_HIGH_VERSION)); + + if ((vLow & 0xffff0000) != (VMXNET2_MAGIC & 0xffff0000) || + ((VMXNET2_MAGIC < vLow) || (VMXNET2_MAGIC > vHigh))) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: driver version 0x%08X doesn't " + "match device 0x%08X:0x%08X", + drvName, unit, VMXNET2_MAGIC, vLow, vHigh); + goto err_version_mismatch; + } + + /* + * Alloc soft state + */ + macInfo = gld_mac_alloc(dip); + if (!macInfo) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: gld_mac_alloc failed", + drvName, unit); + goto err_gld_mac_alloc; + } + + dp = (vxn_softc_t *) kmem_zalloc(sizeof(vxn_softc_t), KM_SLEEP); + ASSERT(dp); + + /* + * Get interrupt cookie + */ + if (ddi_get_iblock_cookie(dip, 0, &dp->iblockCookie) != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: Vxn_Attach: ddi_get_iblock_cookie failed", + drvName, unit); + goto err_get_iblock_cookie; + } + + strncpy(dp->drvName, drvName, SOLVMXNET_MAXNAME); + dp->unit = unit; + dp->dip = dip; + dp->macInfo = macInfo; + dp->confHdl = confHdl; + dp->vxnIOHdl = vxnIOHdl; + dp->vxnIOp = vxnIOp; + dp->morphed = morphed; + dp->nicActive = FALSE; + dp->txPending = 0; + dp->maxTxFrags = 1; + + /* + * Initialize mutexes + */ + mutex_init(&dp->intrlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie); + mutex_init(&dp->xmitlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie); + mutex_init(&dp->rxlistlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie); + + /* + * Allocate and initialize our private and shared data structures + */ + if (Vxn_AllocDriverData(dp) != SOLVMXNET_SUCCESS) { + goto err_alloc_driverdata; + } + + /* + * Read the MAC address from the device + */ + for (i = 0; i < 6; i++) { + dp->devAddr.ether_addr_octet[i] = + (uint8_t)ddi_get8(vxnIOHdl, (uint8_t *)(vxnIOp + VMXNET_MAC_ADDR + i)); + } + macInfo->gldm_vendor_addr = dp->devAddr.ether_addr_octet; + macInfo->gldm_broadcast_addr = etherbroadcastaddr.ether_addr_octet; + + DPRINTF(3, (CE_CONT, + "MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", + dp->devAddr.ether_addr_octet[0], + dp->devAddr.ether_addr_octet[1], + dp->devAddr.ether_addr_octet[2], + dp->devAddr.ether_addr_octet[3], + dp->devAddr.ether_addr_octet[4], + dp->devAddr.ether_addr_octet[5])); + + /* + * Configure GLD entry points + */ + macInfo->gldm_devinfo = dip; + macInfo->gldm_private = (caddr_t)dp; + macInfo->gldm_cookie = dp->iblockCookie; + macInfo->gldm_reset = Vxn_Reset; + macInfo->gldm_start = Vxn_Start; + macInfo->gldm_stop = Vxn_Stop; + macInfo->gldm_set_mac_addr = Vxn_SetMacAddress; + macInfo->gldm_send = Vxn_Send; + macInfo->gldm_set_promiscuous = Vxn_SetPromiscuous; + macInfo->gldm_get_stats = Vxn_GetStats; + macInfo->gldm_ioctl = NULL; + macInfo->gldm_set_multicast= Vxn_SetMulticast; + macInfo->gldm_intr = Vxn_Interrupt; + macInfo->gldm_mctl = NULL; + + macInfo->gldm_ident = (char *)ddi_driver_name(dip); + macInfo->gldm_type = DL_ETHER; + macInfo->gldm_minpkt = 0; + macInfo->gldm_maxpkt = ETHERMTU; + macInfo->gldm_addrlen = ETHERADDRL; + macInfo->gldm_saplen = -2; + macInfo->gldm_ppa = unit; + + /* + * Register with GLD (Generic Lan Driver) framework + */ + if (gld_register(dip, + (char *)ddi_driver_name(dip), macInfo) != DDI_SUCCESS) { + goto err_gld_register; + } + + /* + * Add interrupt to system. + */ + if (ddi_add_intr(dip, 0, NULL, NULL, gld_intr, + (caddr_t)macInfo) != DDI_SUCCESS) { + cmn_err(CE_WARN, "%s%d: ddi_add_intr failed", drvName, unit); + goto err_ddi_add_intr; + } + + /* + * Add to list of interfaces. + */ + mutex_enter(&vxnListLock); + dp->next = &vxnList; + dp->prev = vxnList.prev; + vxnList.prev->next = dp; + vxnList.prev = dp; + mutex_exit(&vxnListLock); + + /* + * Success + */ + return DDI_SUCCESS; + +err_ddi_add_intr: + gld_unregister(macInfo); + +err_gld_register: + Vxn_FreeDriverData(dp); + +err_alloc_driverdata: + mutex_destroy(&dp->intrlock); + mutex_destroy(&dp->xmitlock); + +err_get_iblock_cookie: + kmem_free(dp, sizeof(*dp)); + gld_mac_free(macInfo); + +err_gld_mac_alloc: +err_version_mismatch: +err_morph_back: + if (morphed) { + ddi_put16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE), LANCE_CHIP); + } + +err_free_regs_map: + ddi_regs_map_free(&vxnIOHdl); + +err: + pci_config_teardown(&confHdl); + return DDI_FAILURE; +} + +/* + *----------------------------------------------------------------------------- + * Vxn_Detach -- + * Called on module unload + * + * Results: + * DDI_SUCCESS + * DDI_FAILURE + * + * Side effects: + * None + *----------------------------------------------------------------------------- + */ +static int +Vxn_Detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + gld_mac_info_t *macInfo; + vxn_softc_t *dp; + + macInfo = (gld_mac_info_t *)ddi_get_driver_private(dip); + dp = (vxn_softc_t *)macInfo->gldm_private; + + if (cmd == DDI_DETACH) { + /* + * Tear down interrupt + */ + ddi_remove_intr(dip, 0, macInfo->gldm_cookie); + gld_unregister(macInfo); + + /* + * Quiesce hardware + */ + Vxn_Stop(macInfo); + + /* + * Free driver-data, tx/rx buffers etc + */ + Vxn_FreeDriverData(dp); + + /* + * Destroy locks + */ + mutex_destroy(&dp->intrlock); + mutex_destroy(&dp->xmitlock); + + /* + * Unmorph + */ + if (dp->morphed) { + uint16_t magic; + + /* Verify that we had morphed earlier */ + magic = ddi_get16(dp->vxnIOHdl, + (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE)); + if (magic != VMXNET_CHIP) { + cmn_err(CE_WARN, "%s%d: Vxn_Detach: Adapter not morphed" + " magic=0x%08X", dp->drvName, dp->unit, magic); + } + else { + /* Unmorph */ + ddi_put16(dp->vxnIOHdl, + (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE), LANCE_CHIP); + + /* Verify */ + magic = ddi_get16(dp->vxnIOHdl, + (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE)); + if (magic != LANCE_CHIP) { + cmn_err(CE_WARN, "%s%d: Vxn_Detach: Unable to unmorph adapter" + " magic=0x%08X", dp->drvName, dp->unit, magic); + } + } + } + + /* + * Release resister mappings + */ + ddi_regs_map_free(&dp->vxnIOHdl); + pci_config_teardown(&dp->confHdl); + + /* + * Remove from list of interfaces. + */ + mutex_enter(&vxnListLock); + ASSERT(dp != &vxnList); + dp->prev->next = dp->next; + dp->next->prev = dp->prev; + mutex_exit(&vxnListLock); + + /* + * Release memory + */ + kmem_free(dp, sizeof(*dp)); + gld_mac_free(macInfo); + + return DDI_SUCCESS; + } + else { + return DDI_FAILURE; + } +} + +static struct module_info vxnminfo = { + 0, /* mi_idnum */ + "vmxnet", /* mi_idname */ + 0, /* mi_minpsz */ + ETHERMTU, /* mi_maxpsz */ + QHIWATER, /* mi_hiwat */ + 1, /* mi_lowat */ +}; + +static struct qinit vxnrinit = { + NULL, /* qi_putp */ + gld_rsrv, /* qi_srvp */ + gld_open, /* qi_qopen */ + gld_close, /* qi_qclose */ + NULL, /* qi_qadmin */ + &vxnminfo, /* qi_minfo */ + NULL /* qi_mstat */ +}; + +static struct qinit vxnwinit = { + gld_wput, /* qi_putp */ + gld_wsrv, /* qi_srvp */ + NULL, /* qi_qopen */ + NULL, /* qi_qclose */ + NULL, /* qi_qadmin */ + &vxnminfo, /* qi_minfo */ + NULL /* qi_mstat */ +}; + +static struct streamtab vxn_info = { + &vxnrinit, /* st_rdinit */ + &vxnwinit, /* st_wrinit */ + NULL, /* st_muxrinit */ + NULL /* st_muxwrinit */ +}; + +static struct cb_ops cb_vxn_ops = { + nulldev, /* cb_open */ + nulldev, /* cb_close */ + nodev, /* cb_strategy */ + nodev, /* cb_print */ + nodev, /* cb_dump */ + nodev, /* cb_read */ + nodev, /* cb_write */ + nodev, /* cb_ioctl */ + nodev, /* cb_devmap */ + nodev, /* cb_mmap */ + nodev, /* cb_segmap */ + nochpoll, /* cb_chpoll */ + ddi_prop_op, /* cb_prop_op */ + &vxn_info, /* cb_stream */ + D_NEW|D_MP /* cb_flag */ +}; + +static struct dev_ops vxn_ops = { + DEVO_REV, /* devo_rev */ + 0, /* devo_refcnt */ + gld_getinfo, /* devo_getinfo */ + nulldev, /* devo_identify */ + nulldev, /* devo_probe */ + Vxn_Attach, /* devo_attach */ + Vxn_Detach, /* devo_detach */ + nodev, /* devo_reset */ + &cb_vxn_ops, /* devo_cb_ops */ + NULL, /* devo_bus_ops */ + ddi_power /* devo_power */ +}; + +static struct modldrv modldrv = { + &mod_driverops, + ident, + &vxn_ops, +}; + +static struct modlinkage modlinkage = { + MODREV_1, {&modldrv, NULL,} +}; + + +/* + * Module load entry point + */ +int +_init(void) +{ + int err; + + DPRINTF(5, (CE_CONT, "vxn: _init:\n")); + /* Initialize interface list */ + vxnList.next = vxnList.prev = &vxnList; + mutex_init(&vxnListLock, NULL, MUTEX_DRIVER, NULL); + if ((err = mod_install(&modlinkage)) != 0) { + mutex_destroy(&vxnListLock); + } + return err; +} + +/* + * Module unload entry point + */ +int +_fini(void) +{ + int err; + + DPRINTF(5, (CE_CONT, "vxn: _fini:\n")); + if ((err = mod_remove(&modlinkage)) == 0) { + mutex_destroy(&vxnListLock); + } + return err; +} + +/* + * Module info entry point + */ +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet.conf b/usr/src/uts/intel/io/vmxnet/vmxnet.conf new file mode 100644 index 0000000000..eb3b160412 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vmxnet.conf @@ -0,0 +1,24 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2012, Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h b/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h new file mode 100644 index 0000000000..5ea437df72 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h @@ -0,0 +1,436 @@ +/********************************************************* + * Copyright (C) 2004 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +#ifndef _VMXNET2_DEF_H_ +#define _VMXNET2_DEF_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#include "includeCheck.h" + +#include "net_sg.h" +#include "vmxnet_def.h" + + +/* + * Magic number that identifies this version of the vmxnet protocol. + */ +#define VMXNET2_MAGIC 0xbabe864f + +/* size of the rx ring */ +#define VMXNET2_MAX_NUM_RX_BUFFERS 128 +#define VMXNET2_DEFAULT_NUM_RX_BUFFERS 100 + + +/* size of the rx ring when enhanced vmxnet is used */ +#define ENHANCED_VMXNET2_MAX_NUM_RX_BUFFERS 512 +#define ENHANCED_VMXNET2_DEFAULT_NUM_RX_BUFFERS 150 + +/* size of the 2nd rx ring */ +#define VMXNET2_MAX_NUM_RX_BUFFERS2 2048 +#define VMXNET2_DEFAULT_NUM_RX_BUFFERS2 512 + +/* size of the tx ring */ +#define VMXNET2_MAX_NUM_TX_BUFFERS 128 +#define VMXNET2_DEFAULT_NUM_TX_BUFFERS 100 + +/* size of the tx ring when tso/jf is used */ +#define VMXNET2_MAX_NUM_TX_BUFFERS_TSO 512 +#define VMXNET2_DEFAULT_NUM_TX_BUFFERS_TSO 256 + +enum { + VMXNET2_OWNERSHIP_DRIVER, + VMXNET2_OWNERSHIP_DRIVER_PENDING, + VMXNET2_OWNERSHIP_NIC, + VMXNET2_OWNERSHIP_NIC_PENDING, + VMXNET2_OWNERSHIP_NIC_FRAG, + VMXNET2_OWNERSHIP_DRIVER_FRAG, +}; + +#define VMXNET2_SG_DEFAULT_LENGTH 6 + +typedef struct Vmxnet2_SG_Array { + uint16 addrType; + uint16 length; + NetSG_Elem sg[VMXNET2_SG_DEFAULT_LENGTH]; +} Vmxnet2_SG_Array; + +typedef struct Vmxnet2_RxRingEntry { + uint64 paddr; /* Physical address of the packet data. */ + uint32 bufferLength; /* The length of the data at paddr. */ + uint32 actualLength; /* The actual length of the received data. */ + uint16 ownership; /* Who owns the packet. */ + uint16 flags; /* Flags as defined below. */ + uint32 index; /* + * Currently: + * + * This is being used as an packet index to + * rx buffers. + * + * Originally: + * + * was void* driverData ("Driver specific data.") + * which was used for sk_buf**s in Linux and + * VmxnetRxBuff*s in Windows. It could not be + * here because the structure needs to be the + * same size between architectures, and it was + * not used on the device side, anyway. Look + * for its replacement in + * Vmxnet_Private.rxRingBuffPtr on Linux and + * VmxnetAdapter.rxRingBuffPtr on Windows. + */ +} Vmxnet2_RxRingEntry; + +/* + * Vmxnet2_RxRingEntry flags: + * + * VMXNET2_RX_HW_XSUM_OK The hardware verified the TCP/UDP checksum. + * VMXNET2_RX_WITH_FRAG More data is in the 2nd ring + * VMXNET2_RX_FRAG_EOP This is the last frag, the only valid flag for + * 2nd ring entry + * + */ +#define VMXNET2_RX_HW_XSUM_OK 0x01 +#define VMXNET2_RX_WITH_FRAG 0x02 +#define VMXNET2_RX_FRAG_EOP 0x04 + +typedef struct Vmxnet2_TxRingEntry { + uint16 flags; /* Flags as defined below. */ + uint16 ownership; /* Who owns this packet. */ + uint32 extra; /* + * was void* driverData ("Driver specific data.") + * which was used for sk_buf*s in Linux and + * VmxnetTxInfo*s in Windows. It could not be + * here because the structure needs to be the + * same size between architectures, and it was + * not used on the device side, anyway. Look + * for its replacement in + * Vmxnet_Private.txRingBuffPtr on Linux and + * VmxnetAdapter.txRingBuffPtr on Windows. + */ + uint32 tsoMss; /* TSO pkt MSS */ + Vmxnet2_SG_Array sg; /* Packet data. */ +} Vmxnet2_TxRingEntry; + +/* + * Vmxnet2_TxRingEntry flags: + * + * VMXNET2_TX_CAN_KEEP The implementation can return the tx ring entry + * to the driver when it is ready as opposed to + * before the transmit call from the driver completes. + * VMXNET2_TX_RING_LOW The driver's transmit ring buffer is low on free + * slots. + * VMXNET2_TX_HW_XSUM The hardware should perform the TCP/UDP checksum + * VMXNET2_TX_TSO The hardware should do TCP segmentation. + * VMXNET2_TX_PINNED_BUFFER The driver used one of the preallocated vmkernel + * buffers *and* it has been pinned with Net_PinTxBuffers. + * VMXNET2_TX_MORE This is *not* the last tx entry for the pkt. + * All flags except VMXNET2_TX_MORE are ignored + * for the subsequent tx entries. + */ +#define VMXNET2_TX_CAN_KEEP 0x0001 +#define VMXNET2_TX_RING_LOW 0x0002 +#define VMXNET2_TX_HW_XSUM 0x0004 +#define VMXNET2_TX_TSO 0x0008 +#define VMXNET2_TX_PINNED_BUFFER 0x0010 +#define VMXNET2_TX_MORE 0x0020 + +/* + * Structure used by implementations. This structure allows the inline + * functions below to be used. + */ +typedef struct Vmxnet2_RxRingInfo { + Vmxnet2_RxRingEntry *base; /* starting addr of the ring */ + uint32 nicNext; /* next entry to use in the ring */ + uint32 ringLength; /* # of entries in the ring */ + PA startPA; /* PA of the starting addr of the ring */ +#ifdef VMX86_DEBUG + const char *name; +#endif +} Vmxnet2_RxRingInfo; + +typedef struct Vmxnet2_TxRingInfo { + Vmxnet2_TxRingEntry *base; /* starting addr of the ring */ + uint32 nicNext; /* next entry to use in the ring */ + uint32 ringLength; /* # of entries in the ring */ + PA startPA; /* PA of the starting addr of the ring */ +#ifdef VMX86_DEBUG + const char *name; +#endif +} Vmxnet2_TxRingInfo; + +typedef struct Vmxnet2_ImplData { + Vmxnet2_RxRingInfo rxRing; + Vmxnet2_RxRingInfo rxRing2; + Vmxnet2_TxRingInfo txRing; + + struct PhysMem_Token *ddPhysMemToken; +} Vmxnet2_ImplData; + +/* + * Used internally for performance studies. By default this will be off so there + * should be no compatibilty or other interferences. + */ + +/* #define ENABLE_VMXNET2_PROFILING */ + + +#ifdef ENABLE_VMXNET2_PROFILING +typedef struct Vmxnet2_VmmStats { + uint64 vIntTSC; /* the time that virtual int was posted */ + uint64 actionsCount; /* Number of actions received */ + uint64 numWasteActions; /* Number of non-productive actions */ +} Vmxnet2_VmmStats; +#endif + +typedef struct Vmxnet2_DriverStats { + uint32 transmits; /* # of times that the drivers transmit function */ + /* is called. The driver could transmit more */ + /* than one packet per call. */ + uint32 pktsTransmitted; /* # of packets transmitted. */ + uint32 noCopyTransmits; /* # of packets that are transmitted without */ + /* copying any data. */ + uint32 copyTransmits; /* # of packets that are transmittted by copying */ + /* the data into a buffer. */ + uint32 maxTxsPending; /* Max # of transmits outstanding. */ + uint32 txStopped; /* # of times that transmits got stopped because */ + /* the tx ring was full. */ + uint32 txRingOverflow; /* # of times that transmits got deferred bc */ + /* the tx ring was full. This must be >= */ + /* txStopped since there will be one */ + /* txStopped when the ring fills up and then */ + /* one txsRingOverflow for each packet that */ + /* that gets deferred until there is space. */ + uint32 interrupts; /* # of times interrupted. */ + uint32 pktsReceived; /* # of packets received. */ + uint32 rxBuffersLow; /* # of times that the driver was low on */ + /* receive buffers. */ +#ifdef ENABLE_VMXNET2_PROFILING + Vmxnet2_VmmStats vmmStats; /* vmm related stats for perf study */ +#endif +} Vmxnet2_DriverStats; + +/* + * Shared data structure between the vm, the vmm, and the vmkernel. + * This structure was originally arranged to try to group common data + * on 32-byte cache lines, but bit rot and the fact that we no longer + * run on many CPUs with that cacheline size killed that optimization. + * vmxnet3 should target 128 byte sizes and alignments to optimize for + * the 64 byte cacheline pairs on P4. + */ +typedef struct Vmxnet2_DriverData { + /* + * Magic must be first. + */ + Vmxnet_DDMagic magic; + + /* + * Receive fields. + */ + uint32 rxRingLength; /* Length of the receive ring. */ + uint32 rxDriverNext; /* Index of the next packet that will */ + /* be filled in by the impl */ + + uint32 rxRingLength2; /* Length of the 2nd receive ring. */ + uint32 rxDriverNext2; /* Index of the next packet that will */ + /* be filled in by the impl */ + + uint32 notUsed1; /* was "irq" */ + + /* + * Interface flags and multicast filter. + */ + uint32 ifflags; + uint32 LADRF[VMXNET_MAX_LADRF]; + + /* + * Transmit fields + */ + uint32 txDontClusterSize; /* All packets <= this will be transmitted */ + /* immediately, regardless of clustering */ + /* settings [was fill[1]] */ + uint32 txRingLength; /* Length of the transmit ring. */ + uint32 txDriverCur; /* Index of the next packet to be */ + /* returned by the implementation.*/ + uint32 txDriverNext; /* Index of the entry in the ring */ + /* buffer to use for the next packet.*/ + uint32 txStopped; /* The driver has stopped transmitting */ + /* because its ring buffer is full.*/ + uint32 txClusterLength; /* Maximum number of packets to */ + /* put in the ring buffer before */ + /* asking the implementation to */ + /* transmit the packets in the buffer.*/ + uint32 txNumDeferred; /* Number of packets that have been */ + /* queued in the ring buffer since */ + /* the last time the implementation */ + /* was asked to transmit. */ + uint32 notUsed3; /* This field is deprecated but still used */ + /* as minXmitPhysLength on the escher branch. */ + /* It cannot be used for other purposes */ + /* until escher vms no longer are allowed */ + /* to install this driver. */ + + uint32 totalRxBuffers; /* used by esx for max rx buffers */ + uint64 rxBufferPhysStart; /* used by esx for pinng rx buffers */ + /* + * Extra fields for future expansion. + */ + uint32 extra[2]; + + uint16 maxFrags; /* # of frags the driver can handle */ + uint16 featureCtl; /* for driver to enable some feature */ + + /* + * The following fields are used to save the nicNext indexes part + * of implData in the vmkernel when disconnecting the adapter, we + * need them when we reconnect. This mechanism is used for + * checkpointing as well. + */ + uint32 savedRxNICNext; + uint32 savedRxNICNext2; + uint32 savedTxNICNext; + + /* + * Fields used during initialization or debugging. + */ + uint32 length; + uint32 rxRingOffset; + uint32 rxRingOffset2; + uint32 txRingOffset; + uint32 debugLevel; + uint32 txBufferPhysStart; + uint32 txBufferPhysLength; + uint32 txPktMaxSize; + + /* + * Driver statistics. + */ + Vmxnet2_DriverStats stats; +} Vmxnet2_DriverData; + +/* + * Shared between VMM and Vmkernel part of vmxnet2 to optimize action posting + * VMM writes 1 (don't post) or 0 (okay to post) and vmk reads this. + */ +typedef struct VmxnetVMKShared { + uint32 dontPostActions; +} VmxnetVMKShared; + +#if defined VMX86_VMX || defined VMKERNEL + +/* + * Inline functions used to assist the implementation of the vmxnet interface. + */ + +/* + * Get the next empty packet out of the receive ring and move to + * the next packet. + */ +static INLINE Vmxnet2_RxRingEntry * +Vmxnet2_GetNextRx(Vmxnet2_RxRingInfo *ri, uint16 ownership) +{ + Vmxnet2_RxRingEntry *rre = ri->base + ri->nicNext; + if (rre->ownership == ownership) { + VMXNET_INC(ri->nicNext, ri->ringLength); + } else { + rre = NULL; + } + + return rre; +} + +/* + * Return ownership of a packet in the receive ring to the driver. + */ +static INLINE void +Vmxnet2_PutRx(Vmxnet2_RxRingEntry *rre, uint32 pktLength, uint16 ownership) +{ + rre->actualLength = pktLength; + COMPILER_MEM_BARRIER(); + rre->ownership = ownership; +} + +/* + * Get the next pending packet out of the transmit ring. + */ +static INLINE Vmxnet2_TxRingEntry * +Vmxnet2_GetNextTx(Vmxnet2_TxRingInfo *ri) +{ + Vmxnet2_TxRingEntry *txre = ri->base + ri->nicNext; + if (txre->ownership == VMXNET2_OWNERSHIP_NIC) { + return txre; + } else { + return NULL; + } +} + +/* + * Move to the next entry in the transmit ring. + */ +static INLINE unsigned int +Vmxnet2_IncNextTx(Vmxnet2_TxRingInfo *ri) +{ + unsigned int prev = ri->nicNext; + Vmxnet2_TxRingEntry *txre = ri->base + ri->nicNext; + + txre->ownership = VMXNET2_OWNERSHIP_NIC_PENDING; + + VMXNET_INC(ri->nicNext, ri->ringLength); + return prev; +} + +/* + * Get the indicated entry from transmit ring. + */ +static INLINE Vmxnet2_TxRingEntry * +Vmxnet2_GetTxEntry(Vmxnet2_TxRingInfo *ri, unsigned int idx) +{ + return ri->base + idx; +} + +/* + * Get the indicated entry from the given rx ring + */ +static INLINE Vmxnet2_RxRingEntry * +Vmxnet2_GetRxEntry(Vmxnet2_RxRingInfo *ri, unsigned int idx) +{ + return ri->base + idx; +} + +#endif /* defined VMX86_VMX || defined VMKERNEL */ + +#endif + diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet_def.h b/usr/src/uts/intel/io/vmxnet/vmxnet_def.h new file mode 100644 index 0000000000..703466c995 --- /dev/null +++ b/usr/src/uts/intel/io/vmxnet/vmxnet_def.h @@ -0,0 +1,184 @@ +/********************************************************* + * Copyright (C) 1999 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + *********************************************************/ + +/********************************************************* + * The contents of this file are subject to the terms of the Common + * Development and Distribution License (the "License") version 1.0 + * and no later version. You may not use this file except in + * compliance with the License. + * + * You can obtain a copy of the License at + * http://www.opensource.org/licenses/cddl1.php + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + *********************************************************/ + +#ifndef _VMXNET_DEF_H_ +#define _VMXNET_DEF_H_ + +#define INCLUDE_ALLOW_USERLEVEL + +#define INCLUDE_ALLOW_MODULE +#define INCLUDE_ALLOW_VMK_MODULE +#define INCLUDE_ALLOW_VMKERNEL +#define INCLUDE_ALLOW_DISTRIBUTE +#include "includeCheck.h" + +#include "net_sg.h" +#include "vmnet_def.h" + + +/* + * Vmxnet I/O ports, used by both the vmxnet driver and + * the device emulation code. + */ + +#define VMXNET_INIT_ADDR 0x00 +#define VMXNET_INIT_LENGTH 0x04 +#define VMXNET_TX_ADDR 0x08 +#define VMXNET_COMMAND_ADDR 0x0c +#define VMXNET_MAC_ADDR 0x10 +#define VMXNET_LOW_VERSION 0x18 +#define VMXNET_HIGH_VERSION 0x1c +#define VMXNET_STATUS_ADDR 0x20 +#define VMXNET_TOE_INIT_ADDR 0x24 +#define VMXNET_APROM_ADDR 0x28 +#define VMXNET_INT_ENABLE_ADDR 0x30 +#define VMXNET_WAKE_PKT_PATTERNS 0x34 + +/* + * Vmxnet command register values. + */ +#define VMXNET_CMD_INTR_ACK 0x0001 +#define VMXNET_CMD_UPDATE_LADRF 0x0002 +#define VMXNET_CMD_UPDATE_IFF 0x0004 +#define VMXNET_CMD_UNUSED 1 0x0008 +#define VMXNET_CMD_UNUSED_2 0x0010 +#define VMXNET_CMD_INTR_DISABLE 0x0020 +#define VMXNET_CMD_INTR_ENABLE 0x0040 +#define VMXNET_CMD_UNUSED_3 0x0080 +#define VMXNET_CMD_CHECK_TX_DONE 0x0100 +#define VMXNET_CMD_GET_NUM_RX_BUFFERS 0x0200 +#define VMXNET_CMD_GET_NUM_TX_BUFFERS 0x0400 +#define VMXNET_CMD_PIN_TX_BUFFERS 0x0800 +#define VMXNET_CMD_GET_CAPABILITIES 0x1000 +#define VMXNET_CMD_GET_FEATURES 0x2000 +#define VMXNET_CMD_SET_POWER_FULL 0x4000 +#define VMXNET_CMD_SET_POWER_LOW 0x8000 + +/* + * Vmxnet status register values. + */ +#define VMXNET_STATUS_CONNECTED 0x0001 +#define VMXNET_STATUS_ENABLED 0x0002 +#define VMXNET_STATUS_TX_PINNED 0x0004 + +/* + * Values for the interface flags. + */ +#define VMXNET_IFF_PROMISC 0x01 +#define VMXNET_IFF_BROADCAST 0x02 +#define VMXNET_IFF_MULTICAST 0x04 +#define VMXNET_IFF_DIRECTED 0x08 + +/* + * Length of the multicast address filter. + */ +#define VMXNET_MAX_LADRF 2 + +/* + * Size of Vmxnet APROM. + */ +#define VMXNET_APROM_SIZE 6 + +/* + * An invalid ring index. + */ +#define VMXNET_INVALID_RING_INDEX (-1) + +/* + * Features that are implemented by the driver. These are driver + * specific so not all features will be listed here. In addition not all + * drivers have to pay attention to these feature flags. + * + * VMXNET_FEATURE_ZERO_COPY_TX The driver won't do any copies as long as + * the packet length is > + * Vmxnet_DriverData.minTxPhysLength. + * + * VMXNET_FEATURE_TSO The driver will use the TSO capabilities + * of the underlying hardware if available + * and enabled. + * + * VMXNET_FEATURE_JUMBO_FRAME The driver can send/rcv jumbo frame + * + * VMXNET_FEATURE_LPD The backend can deliver large pkts + */ +#define VMXNET_FEATURE_ZERO_COPY_TX 0x01 +#define VMXNET_FEATURE_TSO 0x02 +#define VMXNET_FEATURE_JUMBO_FRAME 0x04 +#define VMXNET_FEATURE_LPD 0x08 + +/* + * Define the set of capabilities required by each feature above + */ +#define VMXNET_FEATURE_ZERO_COPY_TX_CAPS VMXNET_CAP_SG +#define VMXNET_FEATURE_TSO_CAPS VMXNET_CAP_TSO +#define VMXNET_HIGHEST_FEATURE_BIT VMXNET_FEATURE_TSO + +#define VMXNET_INC(val, max) \ + val++; \ + if (UNLIKELY(val == max)) { \ + val = 0; \ + } + +/* + * code that just wants to switch on the different versions of the + * guest<->implementation protocol can cast driver data to this. + */ +typedef uint32 Vmxnet_DDMagic; + +/* + * Wake packet pattern commands sent through VMXNET_WAKE_PKT_PATTERNS port + */ + +#define VMXNET_PM_OPCODE_START 3 /* args: cnt of wake packet patterns */ +#define VMXNET_PM_OPCODE_LEN 2 /* args: index of wake packet pattern */ + /* number of pattern byte values */ +#define VMXNET_PM_OPCODE_DATA 1 /* args: index of wake packet pattern */ + /* offset in pattern byte values list */ + /* packet byte offset */ + /* packet byte value */ +#define VMXNET_PM_OPCODE_END 0 /* args: <none> */ + +typedef union Vmxnet_WakePktCmd { + uint32 pktData : 32; + struct { + unsigned cmd : 2; /* wake packet pattern cmd [from list above] */ + unsigned cnt : 3; /* cnt wk pkt pttrns 1..MAX_NUM_FILTER_PTTRNS */ + unsigned ind : 3; /* ind wk pkt pttrn 0..MAX_NUM_FILTER_PTTRNS-1 */ + unsigned lenOff : 8; /* num pttrn byte vals 1..MAX_PKT_FILTER_SIZE */ + /* OR offset in pattern byte values list */ + /* 0..MAX_PKT_FILTER_SIZE-1 */ + unsigned byteOff : 8; /* pkt byte offset 0..MAX_PKT_FILTER_SIZE-1 */ + unsigned byteVal : 8; /* packet byte value 0..255 */ + } pktPttrn; +} Vmxnet_WakePktCmd; + +#endif /* _VMXNET_DEF_H_ */ diff --git a/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c b/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c index 8004a3eb3e..8dd039e8cc 100644 --- a/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c +++ b/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c @@ -14,6 +14,7 @@ */ /* * Copyright (c) 2013, 2016 by Delphix. All rights reserved. + * Copyright 2018 Joyent, Inc. */ #include <vmxnet3.h> @@ -322,7 +323,7 @@ vmxnet3_rx_hwcksum(vmxnet3_softc_t *dp, mblk_t *mp, VMXNET3_DEBUG(dp, 3, "rx cksum flags = 0x%x\n", flags); - (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); + mac_hcksum_set(mp, 0, 0, 0, 0, flags); } } diff --git a/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c b/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c index 8a9f05e690..8769d938ab 100644 --- a/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c +++ b/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c @@ -15,6 +15,7 @@ /* * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright 2018 Joyent, Inc. */ #include <vmxnet3.h> @@ -79,7 +80,7 @@ vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol, ol->hlen = 0; ol->msscof = 0; - hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags); + mac_hcksum_get(mp, &start, &stuff, NULL, &value, &flags); mac_lso_get(mp, &mss, &lso_flag); diff --git a/usr/src/uts/intel/ip/ip.global-objs.debug64 b/usr/src/uts/intel/ip/ip.global-objs.debug64 index 6aa8cc87d3..691b7da537 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.debug64 +++ b/usr/src/uts/intel/ip/ip.global-objs.debug64 @@ -145,7 +145,6 @@ ip_squeue_create_callback ip_squeue_enter ip_squeue_fanout ip_squeue_flag -ip_squeue_worker_wait ip_thread_data ip_thread_list ip_thread_rwlock @@ -257,8 +256,6 @@ squeue_drain_ms squeue_drain_ns squeue_drain_stack_needed squeue_drain_stack_toodeep -squeue_workerwait_ms -squeue_workerwait_tick tcp_acceptor_rinit tcp_acceptor_winit tcp_conn_cache diff --git a/usr/src/uts/intel/ip/ip.global-objs.obj64 b/usr/src/uts/intel/ip/ip.global-objs.obj64 index 7e7d1a2833..624f9984f0 100644 --- a/usr/src/uts/intel/ip/ip.global-objs.obj64 +++ b/usr/src/uts/intel/ip/ip.global-objs.obj64 @@ -145,7 +145,6 @@ ip_squeue_create_callback ip_squeue_enter ip_squeue_fanout ip_squeue_flag -ip_squeue_worker_wait ip_thread_data ip_thread_list ip_thread_rwlock @@ -254,8 +253,6 @@ squeue_drain_ms squeue_drain_ns squeue_drain_stack_needed squeue_drain_stack_toodeep -squeue_workerwait_ms -squeue_workerwait_tick tcp_acceptor_rinit tcp_acceptor_winit tcp_conn_cache diff --git a/usr/src/uts/intel/ipf/Makefile b/usr/src/uts/intel/ipf/Makefile index 78cd65310f..7e0da79ba3 100644 --- a/usr/src/uts/intel/ipf/Makefile +++ b/usr/src/uts/intel/ipf/Makefile @@ -21,6 +21,7 @@ # # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. +# Copyright 2018 Joyent, Inc. # # Copyright (c) 2018, Joyent, Inc. @@ -52,7 +53,8 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) CPPFLAGS += -DIPFILTER_LKM -DIPFILTER_LOG -DIPFILTER_LOOKUP -DUSE_INET6 CPPFLAGS += -DSUNDDI -DSOLARIS2=$(RELEASE_MINOR) -DIRE_ILL_CN -LDFLAGS += -dy -Ndrv/ip -Nmisc/md5 -Nmisc/neti -Nmisc/hook -Nmisc/kcf +LDFLAGS += -dy -Ndrv/ip -Nmisc/md5 -Nmisc/neti -Nmisc/hook -Nmisc/kcf -Ndrv/vnd +LDFLAGS += -Nmisc/mac INC_PATH += -I$(UTSBASE)/common/inet/ipf diff --git a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 b/usr/src/uts/intel/ipf/ipf.global-objs.debug64 index 663613cee3..5ebc7eed2b 100644 --- a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 +++ b/usr/src/uts/intel/ipf/ipf.global-objs.debug64 @@ -22,13 +22,17 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# Copyright 2013 Joyent, Inc. All rights reserved +# Copyright 2018 Joyent, Inc. All rights reserved # fr_availfuncs fr_features fr_objbytes hdrsizes +hook_viona_in +hook_viona_in_gz +hook_viona_out +hook_viona_out_gz hook4_in hook4_in_gz hook4_loop_in @@ -39,6 +43,10 @@ hook4_nicevents hook4_nicevents_gz hook4_out hook4_out_gz +hook4_vnd_in +hook4_vnd_in_gz +hook4_vnd_out +hook4_vnd_out_gz hook6_in hook6_in_gz hook6_loop_in @@ -49,6 +57,10 @@ hook6_nicevents hook6_nicevents_gz hook6_out hook6_out_gz +hook6_vnd_in +hook6_vnd_in_gz +hook6_vnd_out +hook6_vnd_out_gz icmpreplytype4 icmpreplytype6 icmptoicmp6types @@ -58,6 +70,9 @@ ip6exthdr ipf_cb_ops ipf_dev_info ipf_devfiles +ipf_eth_bcast_addr +ipf_eth_ipv4_mcast +ipf_eth_ipv6_mcast ipf_kstat_tmp ipf_minor ipf_ops diff --git a/usr/src/uts/intel/iptun/Makefile b/usr/src/uts/intel/iptun/Makefile index 24fb7f9fe8..c71c39f911 100644 --- a/usr/src/uts/intel/iptun/Makefile +++ b/usr/src/uts/intel/iptun/Makefile @@ -54,7 +54,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -Ndrv/dld -Nmisc/dls -Nmisc/mac -Ndrv/ip -INC_PATH += -I$(UTSBASE)/common/io/bpf LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW diff --git a/usr/src/uts/intel/ixgbe/Makefile b/usr/src/uts/intel/ixgbe/Makefile index 4b4cc45f6e..f19f151d60 100644 --- a/usr/src/uts/intel/ixgbe/Makefile +++ b/usr/src/uts/intel/ixgbe/Makefile @@ -75,6 +75,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # Driver depends on MAC # LDFLAGS += -dy -N misc/mac +MAPFILES += ddi mac random kernel # # Default build targets. @@ -100,4 +101,5 @@ install: $(INSTALL_DEPS) # # Include common targets. # +include $(UTSBASE)/Makefile.mapfile include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/lx_brand/Makefile b/usr/src/uts/intel/lx_brand/Makefile new file mode 100644 index 0000000000..4eff474a49 --- /dev/null +++ b/usr/src/uts/intel/lx_brand/Makefile @@ -0,0 +1,114 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# Copyright 2017 Joyent, Inc. +# +# This makefile drives the production of the kernel component of +# the lx brand +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Path to where brand common sources live +# +LX_CMN = $(SRC)/common/brand/lx + +# +# Define the module and object file sets. +# +MODULE = lx_brand +OBJECTS = $(LX_BRAND_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_BRAND_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) -I$(SRC)/common +INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf +INC_PATH += -I$(UTSBASE)/common/fs/sockfs +INC_PATH += -I$(UTSBASE)/common/fs/zfs +AS_INC_PATH += -I$(UTSBASE)/i86pc/genassym/$(OBJS_DIR) + +# +# lint pass one enforcement +# +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nexec/elfexec -Nfs/fifofs -Nfs/sockfs -Ndrv/ip \ + -Nfs/zfs -Nmisc/klmmod -Nsys/sysacct + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON +# Due to zfs_ioctl.h inlines. gcc catches non-inline ones during compilation. +LINTTAGS += -erroff=E_STATIC_UNUSED + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_brand/Makefile.rules diff --git a/usr/src/uts/intel/lx_brand/Makefile.rules b/usr/src/uts/intel/lx_brand/Makefile.rules new file mode 100644 index 0000000000..1f641cea05 --- /dev/null +++ b/usr/src/uts/intel/lx_brand/Makefile.rules @@ -0,0 +1,100 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# Copyright 2016 Joyent, Inc. +# +# + +# +# Section 1a: C object build rules +# +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -I$(UTSBASE)/i86pc -o $@ $< + +$(OBJS_DIR_OBJ64)/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -I$(UTSBASE)/i86pc -o $@ $< + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -I$(UTSBASE)/i86pc -o $@ $< + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/os/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/syscall/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(LX_CMN)/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.s + @($(LHEAD) $(LINT.s) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_cgroup/Makefile b/usr/src/uts/intel/lx_cgroup/Makefile new file mode 100644 index 0000000000..6f9116f32a --- /dev/null +++ b/usr/src/uts/intel/lx_cgroup/Makefile @@ -0,0 +1,57 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +UTSBASE = ../.. + +LX_CMN = $(SRC)/common/brand/lx + +MODULE = lx_cgroup +OBJECTS = $(LX_CGROUP_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_CGROUP_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nbrand/lx_brand + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ + +include $(UTSBASE)/intel/lx_cgroup/Makefile.rules diff --git a/usr/src/uts/intel/lx_cgroup/Makefile.rules b/usr/src/uts/intel/lx_cgroup/Makefile.rules new file mode 100644 index 0000000000..6d4c7c4060 --- /dev/null +++ b/usr/src/uts/intel/lx_cgroup/Makefile.rules @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. All rights reserved. +# + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/cgroups/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/cgroups/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_devfs/Makefile b/usr/src/uts/intel/lx_devfs/Makefile new file mode 100644 index 0000000000..1254f596eb --- /dev/null +++ b/usr/src/uts/intel/lx_devfs/Makefile @@ -0,0 +1,57 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +UTSBASE = ../.. + +LX_CMN = $(SRC)/common/brand/lx + +MODULE = lx_devfs +OBJECTS = $(LX_DEVFS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_DEVFS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nbrand/lx_brand + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ + +include $(UTSBASE)/intel/lx_devfs/Makefile.rules diff --git a/usr/src/uts/intel/lx_devfs/Makefile.rules b/usr/src/uts/intel/lx_devfs/Makefile.rules new file mode 100644 index 0000000000..4b9748314c --- /dev/null +++ b/usr/src/uts/intel/lx_devfs/Makefile.rules @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. All rights reserved. +# + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/devfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/devfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_netlink/Makefile b/usr/src/uts/intel/lx_netlink/Makefile new file mode 100644 index 0000000000..2ada8e28a6 --- /dev/null +++ b/usr/src/uts/intel/lx_netlink/Makefile @@ -0,0 +1,77 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lx_netlink +OBJECTS = $(LX_NETLINK_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_NETLINK_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx +LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs -Nbrand/lx_brand + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_proc/Makefile b/usr/src/uts/intel/lx_proc/Makefile new file mode 100644 index 0000000000..9ec70e5adb --- /dev/null +++ b/usr/src/uts/intel/lx_proc/Makefile @@ -0,0 +1,117 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_proc/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# Copyright 2017 Joyent, Inc. +# +# This makefile drives the production of the lxproc file system +# kernel module. +# +# i86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Path to where brand common sources live +# +LX_CMN = $(SRC)/common/brand/lx + +# +# Define the module and object file sets. +# +MODULE = lx_proc +OBJECTS = $(LX_PROC_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_PROC_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) +INC_PATH += -I$(UTSBASE)/common/fs/zfs + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) + +# +# Depends on procfs and lx_brand +# +LDFLAGS += -dy -Nfs/procfs -Nbrand/lx_brand -Ndrv/inotify -Ndrv/ip +LDFLAGS += -Nfs/sockfs -Ncrypto/swrand + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_proc/Makefile.rules diff --git a/usr/src/uts/intel/lx_proc/Makefile.rules b/usr/src/uts/intel/lx_proc/Makefile.rules new file mode 100644 index 0000000000..b8592d2fdd --- /dev/null +++ b/usr/src/uts/intel/lx_proc/Makefile.rules @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# Section 1a: C object build rules +# +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/procfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/procfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_ptm/Makefile b/usr/src/uts/intel/lx_ptm/Makefile new file mode 100644 index 0000000000..dcead27da7 --- /dev/null +++ b/usr/src/uts/intel/lx_ptm/Makefile @@ -0,0 +1,91 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_ptm/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the lx_ptm driver +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lx_ptm +OBJECTS = $(LX_PTM_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_PTM_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_sysfs/Makefile b/usr/src/uts/intel/lx_sysfs/Makefile new file mode 100644 index 0000000000..14e0603533 --- /dev/null +++ b/usr/src/uts/intel/lx_sysfs/Makefile @@ -0,0 +1,66 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2016 Joyent, Inc. +# + +UTSBASE = ../.. + +LX_CMN = $(SRC)/common/brand/lx + +MODULE = lx_sysfs +OBJECTS = $(LX_SYS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_SYS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nbrand/lx_brand -Ndrv/ip + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +# XXX JJ +# LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +# LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ + +include $(UTSBASE)/intel/lx_sysfs/Makefile.rules diff --git a/usr/src/uts/intel/lx_sysfs/Makefile.rules b/usr/src/uts/intel/lx_sysfs/Makefile.rules new file mode 100644 index 0000000000..c9d4c28f85 --- /dev/null +++ b/usr/src/uts/intel/lx_sysfs/Makefile.rules @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. All rights reserved. +# + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/sysfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/sysfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_systrace/Makefile b/usr/src/uts/intel/lx_systrace/Makefile new file mode 100644 index 0000000000..20c4a6a3a3 --- /dev/null +++ b/usr/src/uts/intel/lx_systrace/Makefile @@ -0,0 +1,80 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +UTSBASE = ../.. + +MODULE = lx_systrace +OBJECTS = $(LX_SYSTRACE_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_SYSTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +ROOTLINK = $(USR_DTRACE_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/dtrace + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +LDFLAGS += -dy -Ndrv/dtrace -Nbrand/lx_brand + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_STATIC_UNUSED + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +$(ROOTLINK): $(USR_DTRACE_DIR) $(ROOTMODULE) + -$(RM) $@; ln $(ROOTMODULE) $@ + +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/dtrace/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/dtrace/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lxautofs/Makefile b/usr/src/uts/intel/lxautofs/Makefile new file mode 100644 index 0000000000..5de66af48f --- /dev/null +++ b/usr/src/uts/intel/lxautofs/Makefile @@ -0,0 +1,114 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# Copyright 2016 Joyent, Inc. +# + +# +# This makefile drives the production of the lxautofs file system +# kernel module. +# +# i86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +# Note that the name of the actual filesystem is lxautofs and +# not lx_autofs. This is becase filesystem names are stupidly +# limited to 8 characters. +# +MODULE = lxautofs +OBJECTS = $(LX_AUTOFS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_AUTOFS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +ROOTLINK = $(USR_FS_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/autofs + +INC_PATH += -I$(UTSBASE)/common/brand/lx + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy -Nfs/nfs + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +$(ROOTLINK): $(ROOT_FS_DIR) $(ROOTMODULE) + -$(RM) $@; ln $(ROOTMODULE) $@ + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lxautofs/Makefile.rules diff --git a/usr/src/uts/intel/lxautofs/Makefile.rules b/usr/src/uts/intel/lxautofs/Makefile.rules new file mode 100644 index 0000000000..474743ce9d --- /dev/null +++ b/usr/src/uts/intel/lxautofs/Makefile.rules @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Section 1a: C object build rules +# +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/autofs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/autofs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lxprocfs/Makefile b/usr/src/uts/intel/lxprocfs/Makefile new file mode 100644 index 0000000000..c6ffec0199 --- /dev/null +++ b/usr/src/uts/intel/lxprocfs/Makefile @@ -0,0 +1,88 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lxprocfs/Makefile +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the lxprocfs file system +# kernel module. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lxprocfs +OBJECTS = $(LXPROC_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LXPROC_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Depends on procfs +# +LDFLAGS += -dy -Nfs/procfs + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/mac/Makefile b/usr/src/uts/intel/mac/Makefile index 652e90aa67..6766bd933b 100644 --- a/usr/src/uts/intel/mac/Makefile +++ b/usr/src/uts/intel/mac/Makefile @@ -54,7 +54,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -INC_PATH += -I$(UTSBASE)/common/io/bpf LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN diff --git a/usr/src/uts/intel/mac_ether/Makefile b/usr/src/uts/intel/mac_ether/Makefile index 889f7a73de..c58051a7b1 100644 --- a/usr/src/uts/intel/mac_ether/Makefile +++ b/usr/src/uts/intel/mac_ether/Makefile @@ -56,7 +56,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -N misc/mac -INC_PATH += -I$(UTSBASE)/common/io/bpf # # Default build targets. diff --git a/usr/src/uts/intel/mac_ib/Makefile b/usr/src/uts/intel/mac_ib/Makefile index 5045d1bbbf..ebcef78eaa 100644 --- a/usr/src/uts/intel/mac_ib/Makefile +++ b/usr/src/uts/intel/mac_ib/Makefile @@ -56,7 +56,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -N misc/mac -INC_PATH += -I$(UTSBASE)/common/io/bpf # # Default build targets. diff --git a/usr/src/uts/intel/mac_wifi/Makefile b/usr/src/uts/intel/mac_wifi/Makefile index 11f22cfca7..b05cb5d627 100644 --- a/usr/src/uts/intel/mac_wifi/Makefile +++ b/usr/src/uts/intel/mac_wifi/Makefile @@ -58,7 +58,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # CFLAGS += $(CCVERBOSE) LDFLAGS += -dy -Nmisc/mac -INC_PATH += -I$(UTSBASE)/common/io/bpf # # Default build targets. diff --git a/usr/src/uts/intel/nfp/Makefile b/usr/src/uts/intel/nfp/Makefile new file mode 100644 index 0000000000..3acbc6a725 --- /dev/null +++ b/usr/src/uts/intel/nfp/Makefile @@ -0,0 +1,82 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014, Joyent, Inc +# + +# +# uts/intel/nfp/Makefile +# +# This makefile drives the production of the nfp +# driver kernel module. +# +# intel architecture dependent +# + +# +# Paths to the base of the uts directory trees +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = nfp +OBJECTS = $(NFP_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(NFP_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Driver-specific flags +# +CPPFLAGS += -DCH_KERNELVER=270 +LDFLAGS += -dy +CERRWARN += -_gcc=-Wno-unused-variable +CERRWARN += -_gcc=-Wno-unused-function + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/opteron_pcbe/Makefile b/usr/src/uts/intel/opteron_pcbe/Makefile index 6ddb761ccd..2d45ea3801 100644 --- a/usr/src/uts/intel/opteron_pcbe/Makefile +++ b/usr/src/uts/intel/opteron_pcbe/Makefile @@ -47,7 +47,7 @@ MODULE = pcbe.AuthenticAMD OBJECTS = $(OPTERON_PCBE_OBJS:%=$(OBJS_DIR)/%) OBJECTS += $(CPCGEN_OBJS:%=$(OBJS_DIR)/%) LINTS = $(OPTERON_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE) +ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE) # # Include common rules. diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases index 3116819932..1cea287121 100644 --- a/usr/src/uts/intel/os/driver_aliases +++ b/usr/src/uts/intel/os/driver_aliases @@ -1 +1,2 @@ asy "pci11c1,480" +vmxnet "pci15ad,720" diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major index c5ad4c9bf0..1fb86f9a50 100644 --- a/usr/src/uts/intel/os/name_to_major +++ b/usr/src/uts/intel/os/name_to_major @@ -2,3 +2,4 @@ md 85 devinfo 88 asy 106 did 239 +vmxnet 270 diff --git a/usr/src/uts/intel/overlay/Makefile b/usr/src/uts/intel/overlay/Makefile new file mode 100644 index 0000000000..645b888cbc --- /dev/null +++ b/usr/src/uts/intel/overlay/Makefile @@ -0,0 +1,54 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE = overlay +OBJECTS = $(OVERLAY_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(OVERLAY_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) $(SRC_CONFFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) +CONF_SRCDIR = $(UTSBASE)/common/io/overlay +MAPFILE = $(UTSBASE)/common/io/overlay/overlay.mapfile + +LDFLAGS += -dy -Nmisc/mac -Ndrv/dld -Nmisc/dls -Nmisc/ksocket + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/p4_pcbe/Makefile b/usr/src/uts/intel/p4_pcbe/Makefile index 07689646c1..42dc040eee 100644 --- a/usr/src/uts/intel/p4_pcbe/Makefile +++ b/usr/src/uts/intel/p4_pcbe/Makefile @@ -35,7 +35,7 @@ UTSBASE = ../.. MODULE = pcbe.GenuineIntel.15 OBJECTS = $(P4_PCBE_OBJS:%=$(OBJS_DIR)/%) LINTS = $(P4_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE) +ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE) # # Include common rules. diff --git a/usr/src/uts/intel/pcbe/core_pcbe.c b/usr/src/uts/intel/pcbe/core_pcbe.c index 2b29ed6d94..7424e2526b 100644 --- a/usr/src/uts/intel/pcbe/core_pcbe.c +++ b/usr/src/uts/intel/pcbe/core_pcbe.c @@ -819,7 +819,7 @@ core_pcbe_init(void) for (i = 0; i < num_gpc; i++) { /* - * Determine length of all supported event names + * determine length of all supported event names * (architectural + non-architectural) */ size = arch_events_string_length; diff --git a/usr/src/uts/intel/promif/prom_emul.c b/usr/src/uts/intel/promif/prom_emul.c index 5497d9eab8..cdf190ec6a 100644 --- a/usr/src/uts/intel/promif/prom_emul.c +++ b/usr/src/uts/intel/promif/prom_emul.c @@ -24,7 +24,9 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ #include <sys/promif.h> #include <sys/promimpl.h> @@ -46,7 +48,7 @@ promif_create_prop(prom_node_t *pnp, char *name, void *val, int len, int flags) q = kmem_zalloc(sizeof (*q), KM_SLEEP); q->pp_name = kmem_zalloc(strlen(name) + 1, KM_SLEEP); (void) strcpy(q->pp_name, name); - q->pp_val = kmem_alloc(len, KM_SLEEP); + q->pp_val = len > 0 ? kmem_alloc(len, KM_SLEEP) : NULL; q->pp_len = len; switch (flags) { case DDI_PROP_TYPE_INT: diff --git a/usr/src/uts/intel/smrt/Makefile b/usr/src/uts/intel/smrt/Makefile new file mode 100644 index 0000000000..1d9d8969ed --- /dev/null +++ b/usr/src/uts/intel/smrt/Makefile @@ -0,0 +1,72 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017, Joyent, Inc. +# + +# +# Path to the base of the uts directory tree +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = smrt +OBJECTS = $(SMRT_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(SMRT_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/io/scsi/adapters/smrt + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(CONFMOD) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Kernel Module Dependencies +# +LDFLAGS += -dy -Nmisc/scsi + + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/sockpfp/Makefile b/usr/src/uts/intel/sockpfp/Makefile index 2179fd50ff..7b54398af3 100644 --- a/usr/src/uts/intel/sockpfp/Makefile +++ b/usr/src/uts/intel/sockpfp/Makefile @@ -54,7 +54,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # CFLAGS += $(CCVERBOSE) -LDFLAGS += -dy -Nfs/sockfs -Nmisc/dls -Nmisc/mac -Ndrv/bpf -Ndrv/ip +LDFLAGS += -dy -Nfs/sockfs -Nmisc/dls -Nmisc/mac -Ndrv/ip INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf # diff --git a/usr/src/uts/intel/spdsock/Makefile b/usr/src/uts/intel/spdsock/Makefile index 63a2aeb834..cae0da72f8 100644 --- a/usr/src/uts/intel/spdsock/Makefile +++ b/usr/src/uts/intel/spdsock/Makefile @@ -56,8 +56,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # LDFLAGS += -dy -Ndrv/ip -INC_PATH += -I$(UTSBASE)/common/io/bpf - # # For now, disable these lint checks; maintainers should endeavor # to investigate and remove these for maximum lint coverage. diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h index 93fed4e87d..0c9ceac7be 100644 --- a/usr/src/uts/intel/sys/archsystm.h +++ b/usr/src/uts/intel/sys/archsystm.h @@ -185,13 +185,17 @@ extern void fakesoftint(void); extern void *plat_traceback(void *); /* - * The following two macros are the four byte instruction sequence of stac, ret - * and clac, ret. These are used in startup_smap() as a part of properly setting - * up the valid instructions. For more information on SMAP, see - * uts/intel/ia32/ml/copy.s. + * The following two macros are the four byte instruction sequence of stac, nop + * and clac, nop. These are used in startup_smap() and hotinline_smap() as a + * part of properly setting up the valid instructions. For more information on + * SMAP, see uts/intel/ia32/ml/copy.s, uts/i86pc/os/machdep.c and + * uts/common/os/modctl.c. + * + * Note that smap_disable and smap_enable are resolved to stubs at compile time, + * but inlined at runtime by do_hotinlines() in uts/i86pc/os/machdep.c. */ -#define SMAP_CLAC_INSTR 0xc3ca010f -#define SMAP_STAC_INSTR 0xc3cb010f +#define SMAP_CLAC_INSTR 0x90ca010f +#define SMAP_STAC_INSTR 0x90cb010f extern void smap_disable(void); extern void smap_enable(void); diff --git a/usr/src/uts/intel/sys/controlregs.h b/usr/src/uts/intel/sys/controlregs.h index fe0cf687b4..0be7b3b650 100644 --- a/usr/src/uts/intel/sys/controlregs.h +++ b/usr/src/uts/intel/sys/controlregs.h @@ -86,8 +86,8 @@ extern "C" { /* CR3 Register */ -#define CR3_PCD 0x00000010 /* cache disable */ -#define CR3_PWT 0x00000008 /* write through */ +#define CR3_PCD 0x00000010 /* cache disable */ +#define CR3_PWT 0x00000008 /* write through */ #if defined(_ASM) #define CR3_NOINVL_BIT 0x8000000000000000 #else @@ -110,18 +110,22 @@ extern "C" { #define CR4_PCE 0x0100 /* perf-monitoring counter enable */ #define CR4_OSFXSR 0x0200 /* OS fxsave/fxrstor support */ #define CR4_OSXMMEXCPT 0x0400 /* OS unmasked exception support */ - /* 0x0800 reserved */ +#define CR4_UMIP 0x0800 /* user-mode instruction prevention */ /* 0x1000 reserved */ -#define CR4_VMXE 0x2000 -#define CR4_SMXE 0x4000 +#define CR4_VMXE 0x2000 /* VMX enable */ +#define CR4_SMXE 0x4000 /* SMX enable */ + /* 0x8000 reserved */ +#define CR4_FSGSBASE 0x10000 /* FSGSBASE enable */ #define CR4_PCIDE 0x20000 /* PCID enable */ #define CR4_OSXSAVE 0x40000 /* OS xsave/xrestore support */ #define CR4_SMEP 0x100000 /* NX for user pages in kernel */ #define CR4_SMAP 0x200000 /* kernel can't access user pages */ +#define CR4_PKE 0x400000 /* protection key enable */ #define FMT_CR4 \ - "\20\26smap\25smep\23osxsav\22pcide" \ - "\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge" \ + "\20\27pke\26smap\25smep\23osxsav" \ + "\22pcide\20fsgsbase\17smxe\16vmxe" \ + "\14umip\13xmme\12fxsr\11pce\10pge" \ "\7mce\6pae\5pse\4de\3tsd\2pvi\1vme" /* @@ -158,7 +162,9 @@ extern "C" { #define MSR_AMD_EFER 0xc0000080 /* extended feature enable MSR */ +#define AMD_EFER_TCE 0x8000 /* translation cache extension */ #define AMD_EFER_FFXSR 0x4000 /* fast fxsave/fxrstor */ +#define AMD_EFER_LMSLE 0x2000 /* long mode segment limit enable */ #define AMD_EFER_SVME 0x1000 /* svm enable */ #define AMD_EFER_NXE 0x0800 /* no-execute enable */ #define AMD_EFER_LMA 0x0400 /* long mode active (read-only) */ @@ -166,7 +172,7 @@ extern "C" { #define AMD_EFER_SCE 0x0001 /* system call extensions */ #define FMT_AMD_EFER \ - "\20\17ffxsr\15svme\14nxe\13lma\11lme\1sce" + "\20\20tce\17ffxsr\16lmsle\15svme\14nxe\13lma\11lme\1sce" /* AMD's SYSCFG register */ @@ -194,6 +200,18 @@ extern "C" { #define MSR_AMD_KGSBASE 0xc0000102 /* swapgs swaps this with gsbase */ #define MSR_AMD_TSCAUX 0xc0000103 /* %ecx value on rdtscp insn */ + +/* AMD's SVM MSRs */ + +#define MSR_AMD_VM_CR 0xc0010114 /* SVM global control */ +#define MSR_AMD_VM_HSAVE_PA 0xc0010117 /* SVM host save area address */ + +#define AMD_VM_CR_DPD (1 << 0) +#define AMD_VM_CR_R_INIT (1 << 1) +#define AMD_VM_CR_DIS_A20M (1 << 2) +#define AMD_VM_CR_LOCK (1 << 3) +#define AMD_VM_CR_SVMDIS (1 << 4) + /* AMD's configuration MSRs, weakly documented in the revision guide */ #define MSR_AMD_DC_CFG 0xc0011022 diff --git a/usr/src/uts/intel/sys/debugreg.h b/usr/src/uts/intel/sys/debugreg.h index b537076d26..8528a293ab 100644 --- a/usr/src/uts/intel/sys/debugreg.h +++ b/usr/src/uts/intel/sys/debugreg.h @@ -26,6 +26,9 @@ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ /* All Rights Reserved */ +/* + * Copyright (c) 2018, Joyent, Inc. All rights reserved. + */ #ifndef _SYS_DEBUGREG_H #define _SYS_DEBUGREG_H @@ -57,6 +60,7 @@ extern "C" { #define DR_ICEALSO 0x2000 /* Flag bit reserved for in-circuit-emulator */ #define DR_SINGLESTEP 0x4000 /* Trap resulting from the single-step flag */ #define DR_TASKSWITCH 0x8000 /* Trap resulting from a task-switch */ +#define DR_IN_RTM 0x10000 /* Trap inside an RTM region */ /* * dr7 controls the rest of the debug registers. @@ -73,6 +77,8 @@ extern "C" { #define DR_CONTROL_RESERVED 0xFC00 /* Bits reserved by Intel */ #define DR_LOCAL_SLOWDOWN 0x100 /* Slow the pipeline for ldt addrs */ #define DR_GLOBAL_SLOWDOWN 0x200 /* Slow the pipeline for gdt addrs */ +#define DR_RTM 0x800 /* Restricted Transactional Memory */ +#define DR_GENERAL_DETECT 0x2000 /* General Detect Enable */ #define DR_LOCAL_ENABLE_SHIFT 0 /* Additional shift: local enable */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Additional shift: global enable */ @@ -95,6 +101,7 @@ extern "C" { #define DR_LEN_1 0x0 /* Settings for data length */ #define DR_LEN_2 0x4 #define DR_LEN_4 0xC +#define DR_LEN_8 0x8 #ifdef __cplusplus } diff --git a/usr/src/uts/intel/sys/machbrand.h b/usr/src/uts/intel/sys/machbrand.h index 3f9ebdb6b7..ad7f631649 100644 --- a/usr/src/uts/intel/sys/machbrand.h +++ b/usr/src/uts/intel/sys/machbrand.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2016 Joyent, Inc. */ #ifndef _SYS_MACHBRAND_H @@ -32,20 +33,25 @@ extern "C" { #ifndef _ASM #include <sys/model.h> +#include <sys/thread.h> struct brand_mach_ops { void (*b_sysenter)(void); + void (*b_int80)(void); void (*b_int91)(void); void (*b_syscall)(void); void (*b_syscall32)(void); + greg_t (*b_fixsegreg)(greg_t, model_t); + uintptr_t (*b_fsbase)(klwp_t *, uintptr_t); }; #endif /* _ASM */ #define BRAND_CB_SYSENTER 0 -#define BRAND_CB_INT91 1 -#define BRAND_CB_SYSCALL 2 -#define BRAND_CB_SYSCALL32 3 +#define BRAND_CB_INT80 1 +#define BRAND_CB_INT91 2 +#define BRAND_CB_SYSCALL 3 +#define BRAND_CB_SYSCALL32 4 #ifdef __cplusplus } diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h index fc2f1847cd..6bf18b3082 100644 --- a/usr/src/uts/intel/sys/segments.h +++ b/usr/src/uts/intel/sys/segments.h @@ -695,6 +695,8 @@ extern void _start(), cmnint(); extern void achktrap(), mcetrap(); extern void xmtrap(); extern void fasttrap(); +extern void sys_int80(); +extern void brand_sys_int80(); extern void dtrace_ret(); /* KPTI trampolines */ @@ -710,6 +712,8 @@ extern void tr_overrun(), tr_resvtrap(); extern void tr_achktrap(), tr_mcetrap(); extern void tr_xmtrap(); extern void tr_fasttrap(); +extern void tr_sys_int80(); +extern void tr_brand_sys_int80(); extern void tr_dtrace_ret(); #if !defined(__amd64) diff --git a/usr/src/uts/intel/sys/ucontext.h b/usr/src/uts/intel/sys/ucontext.h index 66300e71a1..2d4e39b3e8 100644 --- a/usr/src/uts/intel/sys/ucontext.h +++ b/usr/src/uts/intel/sys/ucontext.h @@ -20,6 +20,7 @@ */ /* + * Copyright 2015 Joyent, Inc. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * * Copyright 2010 Sun Microsystems, Inc. All rights reserved. @@ -84,9 +85,16 @@ struct __ucontext { sigset_t uc_sigmask; stack_t uc_stack; mcontext_t uc_mcontext; - long uc_filler[5]; /* see ABI spec for Intel386 */ + /* + * The Intel386 ABI specification includes a 5-element array of longs + * called "uc_filler", padding the size of the struct to 512 bytes. To + * allow zone brands to communicate extra data right the way through + * the signal handling process, from sigacthandler to setcontext, we + * steal the first three of these longs as a brand-private member. + */ + void *uc_brand_data[3]; + long uc_filler[2]; }; - #if defined(_SYSCALL32) /* Kernel view of user ILP32 ucontext structure */ @@ -97,7 +105,8 @@ typedef struct ucontext32 { sigset_t uc_sigmask; stack32_t uc_stack; mcontext32_t uc_mcontext; - int32_t uc_filler[5]; + caddr32_t uc_brand_data[3]; + int32_t uc_filler[2]; } ucontext32_t; #if defined(_KERNEL) diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 585a6576bc..0545633682 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -214,6 +214,18 @@ extern "C" { #define CPUID_AMD_EBX_SSB_NO 0x004000000 /* AMD: SSB Fixed */ /* + * AMD SVM features (extended function 0x8000000A). + */ +#define CPUID_AMD_EDX_NESTED_PAGING 0x000000001 /* AMD: SVM NP */ +#define CPUID_AMD_EDX_LBR_VIRT 0x000000002 /* AMD: LBR virt. */ +#define CPUID_AMD_EDX_SVML 0x000000004 /* AMD: SVM lock */ +#define CPUID_AMD_EDX_NRIPS 0x000000008 /* AMD: NRIP save */ +#define CPUID_AMD_EDX_TSC_RATE_MSR 0x000000010 /* AMD: MSR TSC ctrl */ +#define CPUID_AMD_EDX_VMCB_CLEAN 0x000000020 /* AMD: VMCB clean bits */ +#define CPUID_AMD_EDX_FLUSH_ASID 0x000000040 /* AMD: flush by ASID */ +#define CPUID_AMD_EDX_DECODE_ASSISTS 0x000000080 /* AMD: decode assists */ + +/* * Intel now seems to have claimed part of the "extended" function * space that we previously for non-Intel implementors to use. * More excitingly still, they've claimed bit 20 to mean LAHF/SAHF @@ -223,6 +235,38 @@ extern "C" { #define CPUID_INTC_ECX_AHF64 0x00100000 /* LAHF and SAHF in long mode */ /* + * Intel uses cpuid leaf 6 to cover various thermal and power control + * operations. + */ +#define CPUID_INTC_EAX_DTS 0x00000001 /* Digital Thermal Sensor */ +#define CPUID_INTC_EAX_TURBO 0x00000002 /* Turboboost */ +#define CPUID_INTC_EAX_ARAT 0x00000004 /* APIC-Timer-Always-Running */ +/* bit 3 is reserved */ +#define CPUID_INTC_EAX_PLN 0x00000010 /* Power limit notification */ +#define CPUID_INTC_EAX_ECMD 0x00000020 /* Clock mod. duty cycle */ +#define CPUID_INTC_EAX_PTM 0x00000040 /* Package thermal management */ +#define CPUID_INTC_EAX_HWP 0x00000080 /* HWP base registers */ +#define CPUID_INTC_EAX_HWP_NOT 0x00000100 /* HWP Notification */ +#define CPUID_INTC_EAX_HWP_ACT 0x00000200 /* HWP Activity Window */ +#define CPUID_INTC_EAX_HWP_EPR 0x00000400 /* HWP Energy Perf. Pref. */ +#define CPUID_INTC_EAX_HWP_PLR 0x00000800 /* HWP Package Level Request */ +/* bit 12 is reserved */ +#define CPUID_INTC_EAX_HDC 0x00002000 /* HDC */ +#define CPUID_INTC_EAX_TURBO3 0x00004000 /* Turbo Boost Max Tech 3.0 */ +#define CPUID_INTC_EAX_HWP_CAP 0x00008000 /* HWP Capabilities */ +#define CPUID_INTC_EAX_HWP_PECI 0x00010000 /* HWP PECI override */ +#define CPUID_INTC_EAX_HWP_FLEX 0x00020000 /* Flexible HWP */ +#define CPUID_INTC_EAX_HWP_FAST 0x00040000 /* Fast IA32_HWP_REQUEST */ +/* bit 19 is reserved */ +#define CPUID_INTC_EAX_HWP_IDLE 0x00100000 /* Ignore Idle Logical HWP */ + +#define CPUID_INTC_EBX_DTS_NTRESH(x) ((x) & 0xf) + +#define CPUID_INTC_ECX_MAPERF 0x00000001 /* IA32_MPERF / IA32_APERF */ +/* bits 1-2 are reserved */ +#define CPUID_INTC_ECX_PERFBIAS 0x00000008 /* IA32_ENERGY_PERF_BIAS */ + +/* * Intel also uses cpuid leaf 7 to have additional instructions and features. * Like some other leaves, but unlike the current ones we care about, it * requires us to specify both a leaf in %eax and a sub-leaf in %ecx. To deal @@ -444,6 +488,99 @@ extern "C" { #define MSR_IA32_FLUSH_CMD 0x10b #define IA32_FLUSH_CMD_L1D 0x01 +/* + * Intel VMX related MSRs + */ +#define MSR_IA32_FEAT_CTRL 0x03a +#define IA32_FEAT_CTRL_LOCK 0x1 +#define IA32_FEAT_CTRL_SMX_EN 0x2 +#define IA32_FEAT_CTRL_VMX_EN 0x4 + +#define MSR_IA32_VMX_BASIC 0x480 +#define IA32_VMX_BASIC_INS_OUTS (1UL << 54) +#define IA32_VMX_BASIC_TRUE_CTRLS (1UL << 55) + +#define MSR_IA32_VMX_PROCBASED_CTLS 0x482 +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x48e +#define IA32_VMX_PROCBASED_2ND_CTLS (1UL << 31) + +#define MSR_IA32_VMX_PROCBASED2_CTLS 0x48b +#define IA32_VMX_PROCBASED2_EPT (1UL << 1) +#define IA32_VMX_PROCBASED2_VPID (1UL << 5) + +#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c +#define IA32_VMX_EPT_VPID_INVEPT (1UL << 20) +#define IA32_VMX_EPT_VPID_INVEPT_SINGLE (1UL << 25) +#define IA32_VMX_EPT_VPID_INVEPT_ALL (1UL << 26) + +/* + * Intel Thermal MSRs + */ +#define MSR_IA32_THERM_INTERRUPT 0x19b +#define IA32_THERM_INTERRUPT_HIGH_IE 0x00000001 +#define IA32_THERM_INTERRUPT_LOW_IE 0x00000002 +#define IA32_THERM_INTERRUPT_PROCHOT_IE 0x00000004 +#define IA32_THERM_INTERRUPT_FORCEPR_IE 0x00000008 +#define IA32_THERM_INTERRUPT_CRIT_IE 0x00000010 +#define IA32_THERM_INTERRUPT_TR1_VAL(x) (((x) >> 8) & 0x7f) +#define IA32_THERM_INTTERUPT_TR1_IE 0x00008000 +#define IA32_THERM_INTTERUPT_TR2_VAL(x) (((x) >> 16) & 0x7f) +#define IA32_THERM_INTERRUPT_TR2_IE 0x00800000 +#define IA32_THERM_INTERRUPT_PL_NE 0x01000000 + +#define MSR_IA32_THERM_STATUS 0x19c +#define IA32_THERM_STATUS_STATUS 0x00000001 +#define IA32_THERM_STATUS_STATUS_LOG 0x00000002 +#define IA32_THERM_STATUS_PROCHOT 0x00000004 +#define IA32_THERM_STATUS_PROCHOT_LOG 0x00000008 +#define IA32_THERM_STATUS_CRIT_STATUS 0x00000010 +#define IA32_THERM_STATUS_CRIT_LOG 0x00000020 +#define IA32_THERM_STATUS_TR1_STATUS 0x00000040 +#define IA32_THERM_STATUS_TR1_LOG 0x00000080 +#define IA32_THERM_STATUS_TR2_STATUS 0x00000100 +#define IA32_THERM_STATUS_TR2_LOG 0x00000200 +#define IA32_THERM_STATUS_POWER_LIMIT_STATUS 0x00000400 +#define IA32_THERM_STATUS_POWER_LIMIT_LOG 0x00000800 +#define IA32_THERM_STATUS_CURRENT_STATUS 0x00001000 +#define IA32_THERM_STATUS_CURRENT_LOG 0x00002000 +#define IA32_THERM_STATUS_CROSS_DOMAIN_STATUS 0x00004000 +#define IA32_THERM_STATUS_CROSS_DOMAIN_LOG 0x00008000 +#define IA32_THERM_STATUS_READING(x) (((x) >> 16) & 0x7f) +#define IA32_THERM_STATUS_RESOLUTION(x) (((x) >> 27) & 0x0f) +#define IA32_THERM_STATUS_READ_VALID 0x80000000 + +#define MSR_TEMPERATURE_TARGET 0x1a2 +#define MSR_TEMPERATURE_TARGET_TARGET(x) (((x) >> 16) & 0xff) +/* + * Not all models support the offset. Refer to the Intel SDM Volume 4 for a list + * of which models have support for which bits. + */ +#define MSR_TEMPERATURE_TARGET_OFFSET(x) (((x) >> 24) & 0x0f) + +#define MSR_IA32_PACKAGE_THERM_STATUS 0x1b1 +#define IA32_PKG_THERM_STATUS_STATUS 0x00000001 +#define IA32_PKG_THERM_STATUS_STATUS_LOG 0x00000002 +#define IA32_PKG_THERM_STATUS_PROCHOT 0x00000004 +#define IA32_PKG_THERM_STATUS_PROCHOT_LOG 0x00000008 +#define IA32_PKG_THERM_STATUS_CRIT_STATUS 0x00000010 +#define IA32_PKG_THERM_STATUS_CRIT_LOG 0x00000020 +#define IA32_PKG_THERM_STATUS_TR1_STATUS 0x00000040 +#define IA32_PKG_THERM_STATUS_TR1_LOG 0x00000080 +#define IA32_PKG_THERM_STATUS_TR2_STATUS 0x00000100 +#define IA32_PKG_THERM_STATUS_TR2_LOG 0x00000200 +#define IA32_PKG_THERM_STATUS_READING(x) (((x) >> 16) & 0x7f) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x1b2 +#define IA32_PKG_THERM_INTERRUPT_HIGH_IE 0x00000001 +#define IA32_PKG_THERM_INTERRUPT_LOW_IE 0x00000002 +#define IA32_PKG_THERM_INTERRUPT_PROCHOT_IE 0x00000004 +#define IA32_PKG_THERM_INTERRUPT_OVERHEAT_IE 0x00000010 +#define IA32_PKG_THERM_INTERRUPT_TR1_VAL(x) (((x) >> 8) & 0x7f) +#define IA32_PKG_THERM_INTTERUPT_TR1_IE 0x00008000 +#define IA32_PKG_THERM_INTTERUPT_TR2_VAL(x) (((x) >> 16) & 0x7f) +#define IA32_PKG_THERM_INTERRUPT_TR2_IE 0x00800000 +#define IA32_PKG_THERM_INTERRUPT_PL_NE 0x01000000 + #define MCI_CTL_VALUE 0xffffffff #define MTRR_TYPE_UC 0 @@ -568,6 +705,8 @@ extern "C" { #define X86FSET_TBM 90 #define X86FSET_AVX512VNNI 91 #define X86FSET_AMD_PCEC 92 +#define X86FSET_CORE_THERMAL 93 +#define X86FSET_PKG_THERMAL 94 /* * Intel Deep C-State invariant TSC in leaf 0x80000007. @@ -575,16 +714,6 @@ extern "C" { #define CPUID_TSC_CSTATE_INVARIANCE (0x100) /* - * Intel Deep C-state always-running local APIC timer - */ -#define CPUID_CSTATE_ARAT (0x4) - -/* - * Intel ENERGY_PERF_BIAS MSR indicated by feature bit CPUID.6.ECX[3]. - */ -#define CPUID_EPB_SUPPORT (1 << 3) - -/* * Intel TSC deadline timer */ #define CPUID_DEADLINE_TSC (1 << 24) @@ -851,7 +980,9 @@ extern "C" { * Definitions for Intel processor models. These are all for Family 6 * processors. This list and the Atom set below it are not exhuastive. */ +#define INTC_MODEL_YONAH 0x0e #define INTC_MODEL_MEROM 0x0f +#define INTC_MODEL_MEROM_L 0x16 #define INTC_MODEL_PENRYN 0x17 #define INTC_MODEL_DUNNINGTON 0x1d @@ -937,7 +1068,7 @@ extern "C" { #if defined(_KERNEL) || defined(_KMEMUSER) -#define NUM_X86_FEATURES 93 +#define NUM_X86_FEATURES 95 extern uchar_t x86_featureset[]; extern void free_x86_featureset(void *featureset); @@ -956,6 +1087,8 @@ extern uint_t pentiumpro_bug4046376; extern const char CyrixInstead[]; +extern void (*spec_l1d_flush)(void); + #endif #if defined(_KERNEL) diff --git a/usr/src/uts/intel/usba/Makefile b/usr/src/uts/intel/usba/Makefile index ae74e680d1..37c0664abe 100644 --- a/usr/src/uts/intel/usba/Makefile +++ b/usr/src/uts/intel/usba/Makefile @@ -23,7 +23,7 @@ # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc. # # Path to the base of the uts directory tree (usually /usr/src/uts). @@ -43,16 +43,6 @@ ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE) # include $(UTSBASE)/intel/Makefile.intel -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-parentheses -CERRWARN += -_gcc=-Wno-switch -CERRWARN += -_gcc=-Wno-unused-label -CERRWARN += -_gcc=-Wno-unused-value -CERRWARN += -_gcc=-Wno-unused-variable - -# needs work -SMOFF += all_func_returns,deref_check - # # Define targets # diff --git a/usr/src/uts/intel/vmxnet/Makefile b/usr/src/uts/intel/vmxnet/Makefile new file mode 100644 index 0000000000..4f3ebcf5af --- /dev/null +++ b/usr/src/uts/intel/vmxnet/Makefile @@ -0,0 +1,93 @@ +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2012, Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# VMware Ethernet Adapter b module +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. +UTSCLOSED = ../../../../closed/uts + +# +# Define the module and object file sets. +# +MODULE = vmxnet +# +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/intel/io/vmxnet + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# The list of object files is defined here, rather than in Makefile.files, +# because the "$(CLOSED_BUILD)" macro has not been defined at the time +# Makefile.files is processed. +# +VMXNET_OBJS += vmxnet.o + +OBJECTS = $(VMXNET_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(VMXNET_OBJS:%.o=$(LINTS_DIR)/%.ln) + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFFILE) +LINT_TARGET = $(LINT_MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/i86pc +LDFLAGS += -dy -N misc/gld + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +# +# Intentionally don't build lint libraries to minimize divergence with +# the upstream source. +# +lint: +modlintlib: +clean.lint: + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include ../Makefile.targ diff --git a/usr/src/uts/intel/vnd/Makefile b/usr/src/uts/intel/vnd/Makefile new file mode 100644 index 0000000000..b94d014eb7 --- /dev/null +++ b/usr/src/uts/intel/vnd/Makefile @@ -0,0 +1,59 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2018 Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE = vnd +OBJECTS = $(VND_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(VND_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) +CONF_SRCDIR = $(UTSBASE)/common/io/vnd + +CPPFLAGS += -I$(UTSBASE)/i86pc +LDFLAGS += -dy -Nmisc/neti -Nmisch/hook -Nfs/dev -Nmisc/gsqueue + +# +# We use <sys/ctype.h> which causes gcc to think that all of its inline +# functions are defined and unused. +# +CERRWARN += -_gcc=-Wno-unused-function + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/vxlan/Makefile b/usr/src/uts/intel/vxlan/Makefile new file mode 100644 index 0000000000..89a24c17f5 --- /dev/null +++ b/usr/src/uts/intel/vxlan/Makefile @@ -0,0 +1,51 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE = vxlan +OBJECTS = $(OVERLAY_VXLAN_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(OVERLAY_VXLAN_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_OVERLAY_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +LDFLAGS += -dy -Ndrv/overlay -Ndrv/ip + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/zfd/Makefile b/usr/src/uts/intel/zfd/Makefile new file mode 100644 index 0000000000..c270466d08 --- /dev/null +++ b/usr/src/uts/intel/zfd/Makefile @@ -0,0 +1,48 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2014 Joyent, Inc. All rights reserved. +# +# uts/intel/zfd/Makefile + +UTSBASE = ../.. + +MODULE = zfd +OBJECTS = $(ZFD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(ZFD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/zfs/Makefile b/usr/src/uts/intel/zfs/Makefile index b5955cfe7a..f1715120dd 100644 --- a/usr/src/uts/intel/zfs/Makefile +++ b/usr/src/uts/intel/zfs/Makefile @@ -73,6 +73,7 @@ INC_PATH += -I$(UTSBASE)/common/fs/zfs/lua INC_PATH += -I$(SRC)/common INC_PATH += -I$(COMMONBASE)/zfs +CPPFLAGS += -I$(UTSBASE)/i86pc C99LMODE= -Xc99=%all # |