diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/Makefile.lint | 1 | ||||
-rw-r--r-- | usr/src/cmd/fs.d/Makefile | 15 | ||||
-rw-r--r-- | usr/src/cmd/fs.d/xmemfs/Makefile | 37 | ||||
-rw-r--r-- | usr/src/cmd/fs.d/xmemfs/mount.c | 330 | ||||
-rw-r--r-- | usr/src/cmd/zoneadm/zoneadm.c | 3 | ||||
-rw-r--r-- | usr/src/pkgdefs/SUNWcsu/prototype_i386 | 6 | ||||
-rw-r--r-- | usr/src/pkgdefs/SUNWhea/prototype_i386 | 4 | ||||
-rw-r--r-- | usr/src/tools/scripts/bfu.sh | 9 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mntent.h | 3 | ||||
-rw-r--r-- | usr/src/uts/intel/Makefile.files | 9 | ||||
-rw-r--r-- | usr/src/uts/intel/Makefile.intel.shared | 2 | ||||
-rw-r--r-- | usr/src/uts/intel/Makefile.rules | 9 | ||||
-rw-r--r-- | usr/src/uts/intel/fs/xmemfs/seg_xmem.c | 823 | ||||
-rw-r--r-- | usr/src/uts/intel/fs/xmemfs/xmem_dir.c | 1025 | ||||
-rw-r--r-- | usr/src/uts/intel/fs/xmemfs/xmem_subr.c | 566 | ||||
-rw-r--r-- | usr/src/uts/intel/fs/xmemfs/xmem_vfsops.c | 810 | ||||
-rw-r--r-- | usr/src/uts/intel/fs/xmemfs/xmem_vnops.c | 1736 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/Makefile | 15 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/fs/seg_xmem.h | 108 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/fs/xmem.h | 282 | ||||
-rw-r--r-- | usr/src/uts/intel/xmemfs/Makefile | 94 |
21 files changed, 26 insertions, 5861 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index 9198b7d694..50773f685d 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -421,7 +421,6 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \ i386_SUBDIRS= \ cmd/biosdev \ - cmd/fs.d/xmemfs \ cmd/rtc \ lib/brand/lx \ lib/cfgadm_plugins/sata diff --git a/usr/src/cmd/fs.d/Makefile b/usr/src/cmd/fs.d/Makefile index d14b141a07..7027a0e299 100644 --- a/usr/src/cmd/fs.d/Makefile +++ b/usr/src/cmd/fs.d/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -46,11 +46,8 @@ include ../Makefile.cmd SUBDIR1= lofs zfs SUBDIR2= dev fd pcfs nfs hsfs proc ctfs udfs ufs tmpfs cachefs autofs mntfs objfs -i386_SUBDIRS= xmemfs -i386_I18NDIRS= xmemfs -SUBDIRS= $(SUBDIR1) $(SUBDIR2) $($(MACH)_SUBDIRS) -ALL_SUBDIRS= $(SUBDIR1) $(SUBDIR2) $(i386_SUBDIRS) -I18NDIRS= $(SUBDIR2) $(i386_I18NDIRS) +SUBDIRS= $(SUBDIR1) $(SUBDIR2) +I18NDIRS= $(SUBDIR2) CLOBBERFILES += $(POFILES_XPG4) @@ -195,11 +192,11 @@ fs.dfl: $(ROOTUSRSBINFF): $(ROOTUSRSBIN)/ff -$(RM) $@; $(SYMLINK) ./ff $@ -clean: $(ALL_SUBDIRS) .WAIT clean_local +clean: $(SUBDIRS) .WAIT clean_local clean_local: -clobber: $(ALL_SUBDIRS) .WAIT clobber_local +clobber: $(SUBDIRS) .WAIT clobber_local clobber_local: clean_local $(RM) $(PROG) $(ROOTFS_PROG) $(SPPROG) $(MNTTAB) $(DEFAULTFILES) \ @@ -207,7 +204,7 @@ clobber_local: clean_local lint: -$(ALL_SUBDIRS): FRC +$(SUBDIRS): FRC @cd $@; pwd; $(MAKE) $(MFLAGS) $(TARGET) FRC: diff --git a/usr/src/cmd/fs.d/xmemfs/Makefile b/usr/src/cmd/fs.d/xmemfs/Makefile deleted file mode 100644 index b2e9e8c85d..0000000000 --- a/usr/src/cmd/fs.d/xmemfs/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#ident "%Z%%M% %I% %E% SMI" -# -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# - -FSTYPE= xmemfs -LIBPROG= mount - -include ../Makefile.fstype -include ../Makefile.mount - -CPPFLAGS += -I../../../uts/intel -I../../../uts/i86pc - -include ../Makefile.mount.targ diff --git a/usr/src/cmd/fs.d/xmemfs/mount.c b/usr/src/cmd/fs.d/xmemfs/mount.c deleted file mode 100644 index 9cc7b5f950..0000000000 --- a/usr/src/cmd/fs.d/xmemfs/mount.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <stdio.h> -#include <signal.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/mntent.h> -#include <sys/mnttab.h> -#include <sys/mntent.h> -#include <sys/mount.h> -#include <sys/fs/xmem.h> -#include <sys/types.h> -#include <locale.h> -#include <sys/stat.h> -#include <sys/statvfs.h> -#include <fslib.h> -#include <stdlib.h> - -enum { - FSSIZE, - VERBOSE, - LARGEBSIZE, -#ifdef DEBUG - NOLARGEBSIZE, - BSIZE, - RESERVEMEM, - NORESERVEMEM, -#endif - XOPTSZ -}; - -static char *myopts[] = { - "size", /* required */ - "vb", - "largebsize", -#ifdef DEBUG - "nolargebsize", /* default */ - "bsize", /* internal use only */ - "reservemem", /* default */ - "noreservemem", -#endif - NULL -}; - -static offset_t -atosz(char *optarg) -{ - offset_t off; - char *endptr; - - off = strtoll(optarg, &endptr, 0); - - switch (*endptr) { - case 't': case 'T': - off *= 1024; - /* FALLTHROUGH */ - case 'g': case 'G': - off *= 1024; - /* FALLTHROUGH */ - case 'm': case 'M': - off *= 1024; - /* FALLTHROUGH */ - case 'k': case 'K': - off *= 1024; - /* FALLTHROUGH */ - default: - break; - } - return (off); -} - - -int -main(int argc, char *argv[]) -{ - struct mnttab mnt; - int c; - char *myname; - char optbuf[MAX_MNTOPT_STR]; - char typename[64]; - char *options, *value; - int error = 0; - int verbose = 0; - int nmflg = 0; - offset_t fssize = 0; - offset_t bsize = 0; - int optsize = sizeof (struct xmemfs_args); - int mflg = 0; - int optcnt = 0; - int qflg = 0; - char *saveopt; - struct xmemfs_args xargs = { - 0, /* xa_fssize - file system sz */ - 0, /* xa_bsize - blk sz */ - XARGS_RESERVEMEM /* xa_flags */ - }; - - (void) setlocale(LC_ALL, ""); - -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) textdomain(TEXT_DOMAIN); - - myname = strrchr(argv[0], '/'); - myname = myname ? myname + 1 : argv[0]; - (void) snprintf(typename, sizeof (typename), "%s_%s", MNTTYPE_XMEMFS, - myname); - argv[0] = typename; - - /* RO xmemfs not supported... */ - (void) strlcpy(optbuf, "rw", sizeof (optbuf)); - - while ((c = getopt(argc, argv, "Vqo:mO")) != EOF) { - switch (c) { - case 'q': - qflg++; - break; - case 'V': - verbose++; - break; - case 'm': - nmflg++; - mflg |= MS_NOMNTTAB; - break; - case 'O': - mflg |= MS_OVERLAY; - break; - case 'o': - options = optarg; - while (*options != '\0') { - saveopt = options; - - switch (getsubopt(&options, myopts, &value)) { - case LARGEBSIZE: - xargs.xa_flags |= XARGS_LARGEPAGES; - break; - case FSSIZE: - if (value) { - fssize = atosz(value); - if (!fssize) { - (void) fprintf(stderr, -gettext("%s: value %s for option \"%s\" is invalid\n"), -typename, value, myopts[FSSIZE]); - error++; - break; - } - xargs.xa_fssize = fssize; - optcnt++; - if (verbose) - (void) fprintf(stderr, -gettext("setting fssize to %d\n"), fssize); - } else { - (void) fprintf(stderr, -gettext("%s: option \"%s\" requires value\n"), typename, myopts[FSSIZE]); - error++; - } - break; -#ifdef DEBUG - case RESERVEMEM: - xargs.xa_flags |= XARGS_RESERVEMEM; - break; - case NORESERVEMEM: - xargs.xa_flags &= ~XARGS_RESERVEMEM; - break; - case NOLARGEBSIZE: - xargs.xa_flags &= ~XARGS_LARGEPAGES; - break; - case BSIZE: /* file system block size */ - if (value) { - bsize = atosz(value); - if (!bsize) { - (void) fprintf(stderr, -gettext("%s: value %s for option \"%s\" is invalid\n"), -typename, value, myopts[FSSIZE]); - error++; - break; - } - xargs.xa_bsize = bsize; - optcnt++; - if (verbose) - (void) fprintf(stderr, -gettext("setting bsize to %d\n"), bsize); - } else { - (void) fprintf(stderr, -gettext("%s: option \"%s\" requires value\n"), typename, myopts[BSIZE]); - error++; - } - break; -#endif - - case VERBOSE: - verbose++; - break; - default: - if (fsisstdopt(saveopt)) { - (void) strlcat(optbuf, ",", - sizeof (optbuf)); - (void) strlcat(optbuf, - saveopt, sizeof (optbuf)); - break; - } - if (!qflg) { - (void) fprintf(stderr, gettext( - "%s: WARNING: ignoring " - "option \"%s\"\n"), - typename, saveopt); - } - - break; - } - } - if (bsize) { - (void) snprintf(optbuf, sizeof (optbuf), - "%s,bsize=%lld", optbuf, bsize); - if (--optcnt) - (void) strlcat(optbuf, ",", - sizeof (optbuf)); - if (verbose) - (void) fprintf(stderr, "optbuf:%s\n", - optbuf); - } - if (fssize) { - (void) snprintf(optbuf, sizeof (optbuf), - "%s,size=%lld", optbuf, fssize); - if (--optcnt) - (void) strlcat(optbuf, ",", - sizeof (optbuf)); - if (verbose) - (void) fprintf(stderr, "optbuf:%s\n", - optbuf); - } else { - error++; - } - if (options[0] && !error) { - (void) strlcat(optbuf, options, - sizeof (optbuf)); - if (verbose) - (void) fprintf(stderr, "optbuf:%s\n", - optbuf); - } - if (verbose) - (void) fprintf(stderr, "optsize:%d optbuf:%s\n", - optsize, optbuf); - break; - default: - error++; - break; - } - } - - if (verbose && !error) { - char *optptr; - - (void) fprintf(stderr, "%s", typename); - for (optcnt = 1; optcnt < argc; optcnt++) { - optptr = argv[optcnt]; - if (optptr) - (void) fprintf(stderr, " %s", optptr); - } - (void) fprintf(stderr, "\n"); - } - - if (argc - optind != 2 || error) { - (void) fprintf(stderr, - gettext("Usage: %s -o[largebsize,]size=sz" - " xmem mount_point\n"), typename); - exit(1); - } - - mnt.mnt_special = argv[optind++]; - mnt.mnt_mountp = argv[optind++]; - mnt.mnt_fstype = MNTTYPE_XMEMFS; - mflg |= MS_DATA | MS_OPTIONSTR; - mnt.mnt_mntopts = optbuf; - - saveopt = strdup(optbuf); - - if (verbose) { - (void) fprintf(stderr, "mount(%s, \"%s\", %d, %s", - mnt.mnt_special, mnt.mnt_mountp, mflg, MNTTYPE_XMEMFS); - if (optsize) - (void) fprintf(stderr, ", \"%s\", %d)\n", - optbuf, strlen(optbuf)); - else - (void) fprintf(stderr, ")\n"); - } - if (mount(mnt.mnt_special, mnt.mnt_mountp, mflg, MNTTYPE_XMEMFS, - &xargs, optsize, optbuf, MAX_MNTOPT_STR)) { - if (errno == EBUSY) - (void) fprintf(stderr, - gettext("mount: %s already mounted\n"), - mnt.mnt_mountp); - else - perror("mount"); - exit(1); - } - - if (!qflg && saveopt != NULL) - cmp_requested_to_actual_options(saveopt, optbuf, - mnt.mnt_special, mnt.mnt_mountp); - - return (0); -} diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index 5d4f180419..9da3182f85 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -926,8 +926,7 @@ validate_zonepath(char *path, int cmd_num) rpath); return (Z_ERR); } - if ((strcmp(stbuf.st_fstype, MNTTYPE_TMPFS) == 0) || - (strcmp(stbuf.st_fstype, MNTTYPE_XMEMFS) == 0)) { + if (strcmp(stbuf.st_fstype, MNTTYPE_TMPFS) == 0) { (void) printf(gettext("WARNING: %s is on a temporary " "file system.\n"), rpath); } diff --git a/usr/src/pkgdefs/SUNWcsu/prototype_i386 b/usr/src/pkgdefs/SUNWcsu/prototype_i386 index 1523bf5b68..7dffa22aad 100644 --- a/usr/src/pkgdefs/SUNWcsu/prototype_i386 +++ b/usr/src/pkgdefs/SUNWcsu/prototype_i386 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #ident "%Z%%M% %I% %E% SMI" @@ -69,7 +69,6 @@ f none usr/kernel/drv/pts 755 root sys f none usr/kernel/exec/javaexec 755 root sys f none usr/kernel/fs/fdfs 755 root sys f none usr/kernel/fs/pcfs 755 root sys -f none usr/kernel/fs/xmemfs 755 root sys f none usr/kernel/sched/FX 755 root sys f none usr/kernel/sched/FX_DPTBL 755 root sys f none usr/kernel/sched/IA 755 root sys @@ -82,8 +81,6 @@ f none usr/kernel/sys/acctctl 755 root sys f none usr/kernel/sys/exacctsys 755 root sys f none usr/kernel/sys/sysacct 755 root sys f none usr/lib/devfsadm/linkmod/SUNW_misc_link_i386.so 755 root sys -d none usr/lib/fs/xmemfs 755 root sys -f none usr/lib/fs/xmemfs/mount 555 root bin s none usr/sbin/installgrub=../../sbin/installgrub f none usr/sbin/rtc 555 root bin d none usr/sbin/i86 755 root bin @@ -126,7 +123,6 @@ f none usr/kernel/exec/amd64/javaexec 755 root sys d none usr/kernel/fs/amd64 755 root sys f none usr/kernel/fs/amd64/fdfs 755 root sys f none usr/kernel/fs/amd64/pcfs 755 root sys -f none usr/kernel/fs/amd64/xmemfs 755 root sys d none usr/kernel/pcbe/amd64 755 root sys d none usr/kernel/sched/amd64 755 root sys f none usr/kernel/sched/amd64/FX 755 root sys diff --git a/usr/src/pkgdefs/SUNWhea/prototype_i386 b/usr/src/pkgdefs/SUNWhea/prototype_i386 index 35b0b72129..3d7a74517e 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_i386 +++ b/usr/src/pkgdefs/SUNWhea/prototype_i386 @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -83,8 +83,6 @@ f none usr/include/sys/dktp/dadkio.h 644 root bin f none usr/include/sys/dktp/fdisk.h 644 root bin f none usr/include/sys/dma_engine.h 644 root bin f none usr/include/sys/fp.h 644 root bin -f none usr/include/sys/fs/seg_xmem.h 644 root bin -f none usr/include/sys/fs/xmem.h 644 root bin d none usr/include/sys/i2o 755 root bin f none usr/include/sys/i2o/i2omsg.h 644 root bin f none usr/include/sys/i2o/i2outil.h 644 root bin diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 6e67adaa67..5712e4f5a6 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -6184,6 +6184,15 @@ mondo_loop() { # Remove audit_record_attr. Moved to /usr/lib/security rm -f $root/etc/security/audit_record_attr + # + # Remove xmemfs altogether. + # + rm -f $usr/include/sys/fs/xmem.h + rm -f $usr/include/sys/fs/seg_xmem.h + rm -f $usr/kernel/fs/xmemfs + rm -f $usr/kernel/fs/amd64/xmemfs + rm -rf $usr/lib/fs/xmemfs + # End of pre-archive extraction hacks. if [ $diskless = no -a $zone = global ]; then diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h index 1b1fd119aa..0cda0fd6d4 100644 --- a/usr/src/uts/common/sys/mntent.h +++ b/usr/src/uts/common/sys/mntent.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T @@ -54,7 +54,6 @@ extern "C" { #define MNTTYPE_TMPFS "tmpfs" /* Tmp volatile file system */ #define MNTTYPE_AUTOFS "autofs" /* Automounter ``file'' system */ #define MNTTYPE_MNTFS "mntfs" /* In-kernel mnttab */ -#define MNTTYPE_XMEMFS "xmemfs" /* Extended memory FS, IA32 only */ #define MNTTYPE_DEV "dev" /* /dev file system */ #define MNTTYPE_CTFS "ctfs" /* Contract file system */ #define MNTTYPE_OBJFS "objfs" /* Kernel object file system */ diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index d2ab9a3290..2cdbe02a14 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -22,7 +22,7 @@ # # uts/intel/Makefile.files # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -101,13 +101,6 @@ GENUNIX_OBJS += \ CORE_OBJS += \ prmachdep.o -XMEMFS_OBJS += \ - seg_xmem.o \ - xmem_dir.o \ - xmem_subr.o \ - xmem_vfsops.o \ - xmem_vnops.o - LX_PROC_OBJS += \ lx_prsubr.o \ lx_prvfsops.o \ diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index 853410e89a..22f043eaeb 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -431,7 +431,7 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL # FS_KMODS += autofs cachefs ctfs dev devfs fdfs fifofs hsfs lofs FS_KMODS += lx_afs lx_proc mntfs namefs nfs objfs zfs -FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs xmemfs +FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs # # Streams Modules (/kernel/strmod): diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules index ccdce16e4c..3770e4e9dd 100644 --- a/usr/src/uts/intel/Makefile.rules +++ b/usr/src/uts/intel/Makefile.rules @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -60,10 +60,6 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/fs/proc/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) -$(OBJS_DIR)/%.o: $(UTSBASE)/intel/fs/xmemfs/%.c - $(COMPILE.c) -o $@ $< - $(CTFCONVERT_O) - $(OBJS_DIR)/%.o: $(UTSBASE)/intel/ia32/ml/%.s $(COMPILE.s) -o $@ $< @@ -160,9 +156,6 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/dtrace/%.s $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/fs/proc/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) -$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/fs/xmemfs/%.c - @($(LHEAD) $(LINT.c) $< $(LTAIL)) - $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/ia32/ml/%.s @($(LHEAD) $(LINT.s) $< $(LTAIL)) diff --git a/usr/src/uts/intel/fs/xmemfs/seg_xmem.c b/usr/src/uts/intel/fs/xmemfs/seg_xmem.c deleted file mode 100644 index 12c0fffddd..0000000000 --- a/usr/src/uts/intel/fs/xmemfs/seg_xmem.c +++ /dev/null @@ -1,823 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -/* - * Portions of this source code were derived from Berkeley 4.3 BSD - * under license from the Regents of the University of California. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * The segxmem driver is used by the xmemfs to get faster (than seg_map) - * mappings [lower routine overhead] to random vnode/offsets. - * Mappings are made to a very limited kernel address range and to a - * potentially much larger user address range. It is the speed of mmap - * and munmaps to the user address space that we are concerned with. - * We also need to ensure very low overhead for I/O similar to seg_spt - */ - -#include <sys/types.h> -#include <sys/t_lock.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/buf.h> -#include <sys/systm.h> -#include <sys/vnode.h> -#include <sys/mman.h> -#include <sys/errno.h> -#include <sys/cred.h> -#include <sys/kmem.h> -#include <sys/vtrace.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/thread.h> -#include <sys/dumphdr.h> -#include <sys/map.h> -#include <sys/atomic.h> - -#include <vm/seg_kmem.h> -#include <vm/seg_vn.h> -#include <vm/hat.h> -#include <vm/as.h> -#include <vm/seg.h> -#include <vm/page.h> -#include <vm/pvn.h> -#include <vm/rm.h> -#include <sys/vfs.h> -#include <sys/fs/seg_xmem.h> -#include <sys/fs/xmem.h> -#include <sys/lgrp.h> - -/* - * Private seg op routines. - */ -static void segxmem_free(struct seg *seg); -static int segxmem_dup(struct seg *seg, struct seg *newseg); -static int segxmem_unmap(struct seg *seg, caddr_t raddr, size_t ssize); -static faultcode_t segxmem_fault(struct hat *hat, struct seg *seg, caddr_t addr, - size_t len, enum fault_type type, enum seg_rw rw); -static int segxmem_setprot(struct seg *seg, caddr_t addr, size_t len, - uint_t prot); -static int segxmem_checkprot(struct seg *seg, caddr_t addr, size_t len, - uint_t prot); -static size_t segxmem_incore(struct seg *seg, caddr_t addr, size_t len, - register char *vec); -static int segxmem_sync(struct seg *seg, register caddr_t addr, size_t len, - int attr, uint_t flags); -static int segxmem_lockop(struct seg *seg, caddr_t addr, size_t len, - int attr, int op, ulong_t *lockmap, size_t pos); -static int segxmem_getprot(struct seg *seg, caddr_t addr, size_t len, - uint_t *protv); -static u_offset_t segxmem_getoffset(struct seg *seg, caddr_t addr); -static int segxmem_gettype(struct seg *seg, caddr_t addr); -static int segxmem_getvp(struct seg *, caddr_t, struct vnode **); -static int segxmem_advise(struct seg *seg, caddr_t addr, size_t len, - uint_t behav); -static void segxmem_dump(struct seg *seg); -static int segxmem_pagelock(struct seg *seg, caddr_t addr, size_t len, - struct page ***ppp, enum lock_type type, - enum seg_rw rw); -static int segxmem_setpgsz(struct seg *, caddr_t, size_t, uint_t); -static int segxmem_getmemid(struct seg *, caddr_t, memid_t *); - -#define SEGXMEM_NULLOP(t) (t(*)())NULL - -static struct seg_ops segxmem_ops = { - segxmem_dup, /* dup */ - segxmem_unmap, - segxmem_free, - segxmem_fault, /* Change if HAT_DYNAMIC_ISM_UNMAP suported */ - SEGXMEM_NULLOP(int), /* faulta */ - segxmem_setprot, - segxmem_checkprot, - SEGXMEM_NULLOP(int), /* kluster */ - SEGXMEM_NULLOP(size_t), /* swapout */ - segxmem_sync, /* sync */ - segxmem_incore, /* incore */ - segxmem_lockop, /* lockop */ - segxmem_getprot, - segxmem_getoffset, - segxmem_gettype, - segxmem_getvp, - segxmem_advise, /* advise */ - segxmem_dump, - segxmem_pagelock, /* pagelock */ - segxmem_setpgsz, - segxmem_getmemid, /* getmemid */ - SEGXMEM_NULLOP(lgrp_mem_policy_info_t *), /* getpolicy */ -}; - - -/* - * Statistics for segxmem operations. - * - * No explicit locking to protect these stats. - */ -struct segxmemcnt segxmemcnt = { - { "fault", KSTAT_DATA_ULONG }, - { "getmap", KSTAT_DATA_ULONG }, - { "pagecreate", KSTAT_DATA_ULONG } -}; - -kstat_named_t *segxmemcnt_ptr = (kstat_named_t *)&segxmemcnt; -uint_t segxmemcnt_ndata = sizeof (segxmemcnt) / sizeof (kstat_named_t); - - -int segxmem_DR = -1; /* Indicate if hat supports DR */ - -int remap_broken = 0; - - -int -segxmem_create(struct seg *seg, struct segxmem_crargs *xmem_a) -{ - struct segxmem_data *sxd; - uint_t prot; - caddr_t taddr; - uint_t blocknumber, lastblock; - page_t ***ppa; - struct hat *hat; - size_t tlen; - - ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); - - if (((uintptr_t)seg->s_base | seg->s_size) & PAGEOFFSET) - panic("segxmem not PAGESIZE aligned"); - - sxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); - - seg->s_data = (void *)sxd; - seg->s_ops = &segxmem_ops; - - sxd->sxd_prot = xmem_a->xma_prot; - sxd->sxd_vp = xmem_a->xma_vp; - sxd->sxd_offset = xmem_a->xma_offset; - sxd->sxd_bshift = xmem_a->xma_bshift; - sxd->sxd_bsize = 1 << xmem_a->xma_bshift; - - blocknumber = 0; - lastblock = (seg->s_size - 1) >> sxd->sxd_bshift; - taddr = seg->s_base; - tlen = sxd->sxd_bsize; - ppa = xmem_a->xma_ppa; - hat = seg->s_as->a_hat; - prot = xmem_a->xma_prot; - while (blocknumber <= lastblock) { - page_t **ppp; - - if (VTOXM(sxd->sxd_vp)->xm_ppb == 1) - ppp = (page_t **)ppa; - else - ppp = *ppa; - - hat_memload_array(hat, taddr, tlen, ppp, prot | HAT_NOSYNC, - HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); - - blocknumber++; - ppa++; - taddr += tlen; - } - - return (0); -} - -static void -segxmem_free(seg) - struct seg *seg; -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); - kmem_free(sxd, sizeof (struct segxmem_data)); - -} - -static int -segxmem_dup(struct seg *seg, struct seg *newseg) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - struct segxmem_data *newsxd; - caddr_t vaddr; - ulong_t pfn; - page_t *pp, **ppa; - int i; - int ppb; - - newsxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); - - newsxd->sxd_vp = sxd->sxd_vp; - newsxd->sxd_offset = sxd->sxd_offset; - newsxd->sxd_bsize = sxd->sxd_bsize; - newsxd->sxd_bshift = sxd->sxd_bshift; - newsxd->sxd_prot = sxd->sxd_prot; - - newsxd->sxd_softlockcnt = sxd->sxd_softlockcnt; - - newseg->s_ops = &segxmem_ops; - newseg->s_data = (void *)newsxd; - - ppb = btop(sxd->sxd_bsize); - if (ppb > 1) - ppa = kmem_alloc(ppb * sizeof (page_t *), KM_SLEEP); - else - ppa = &pp; - - for (vaddr = seg->s_base; vaddr < seg->s_base + seg->s_size; - vaddr += sxd->sxd_bsize) { - - /* ### sxd->sxd_vp->xn_ppa[(vaddr - s_base)]->p_pagenum */ - - pfn = hat_getpfnum(seg->s_as->a_hat, vaddr); - - if (pfn == PFN_INVALID) - continue; - - for (i = 0; i < ppb; i++) { - ppa[i] = page_numtopp_nolock(pfn); - pfn++; - } - hat_memload_array(newseg->s_as->a_hat, vaddr, sxd->sxd_bsize, - ppa, sxd->sxd_prot | HAT_NOSYNC, - HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); - } - if (ppb > 1) - kmem_free(ppa, ppb * sizeof (page_t *)); - - return (0); -} - -/* - * This routine is called via a machine specific fault handling - * routine. It is also called by software routines wishing to - * lock or unlock a range of addresses. - */ -static faultcode_t -segxmem_fault( - struct hat *hat, - struct seg *seg, - caddr_t addr, - size_t len, - enum fault_type type, - enum seg_rw rw) -{ - struct segxmem_data *sxd; - size_t npages = btopr(len); - -#ifdef lint - hat = hat; - addr = addr; -#endif - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(addr >= seg->s_base); - ASSERT(((addr + len) - seg->s_base) <= seg->s_size); - - switch (type) { - - case F_SOFTLOCK: - - /* - * Because we know that every shared memory is - * already locked and called in the same context. - */ - atomic_add_long(&sxd->sxd_softlockcnt, npages); - return (0); - - case F_SOFTUNLOCK: - - atomic_add_long(&sxd->sxd_softlockcnt, -npages); - - /* - * Check for softlock - */ - if (sxd->sxd_softlockcnt == 0) { - /* - * All SOFTLOCKS are gone. Wakeup any waiting - * unmappers so they can try again to unmap. - * As an optimization check for waiters first - * without the mutex held, so we're not always - * grabbing it on softunlocks. - */ - if (AS_ISUNMAPWAIT(seg->s_as)) { - mutex_enter(&seg->s_as->a_contents); - if (AS_ISUNMAPWAIT(seg->s_as)) { - AS_CLRUNMAPWAIT(seg->s_as); - cv_broadcast(&seg->s_as->a_cv); - } - mutex_exit(&seg->s_as->a_contents); - } - } - return (0); - - case F_INVAL: - - if ((rw == S_EXEC) && !(sxd->sxd_prot & PROT_EXEC)) - return (FC_NOMAP); - - /* - * all xmem pages should already be mapped - desired mapping - * unknown - */ - - panic("xmem page fault"); - /*NOTREACHED*/ - - case F_PROT: - /* - * We can get away with this because ISM segments are - * always rw. Other than this unusual case, there - * should be no instances of protection violations. - */ - return (0); - - default: - XMEMPRINTF(8, ("segxmem_fault: type %x\n", type)); - return (FC_NOMAP); - } -} - -static int -segxmem_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - if (seg->s_base == addr && seg->s_size == len) { - sxd->sxd_prot = prot; - hat_chgprot(seg->s_as->a_hat, addr, len, prot); - } else { - return (IE_NOMEM); - } - return (0); -} - -/*ARGSUSED*/ -static int -segxmem_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - /* - * Need not acquire the segment lock since - * "sxd_prot" is a read-only field. - */ - return (((sxd->sxd_prot & prot) != prot) ? EACCES : 0); -} - -static int -segxmem_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - if (pgno != 0) { - do - protv[--pgno] = sxd->sxd_prot; - while (pgno != 0); - } - return (0); -} - -static u_offset_t -segxmem_getoffset(struct seg *seg, caddr_t addr) -{ - register struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - return ((u_offset_t)sxd->sxd_offset + (addr - seg->s_base)); -} - -/*ARGSUSED*/ -static int -segxmem_gettype(struct seg *seg, caddr_t addr) -{ - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - return (MAP_SHARED); -} - -/*ARGSUSED*/ -static int -segxmem_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) -{ - register struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock)); - - *vpp = sxd->sxd_vp; - return (0); -} - -#ifndef lint /* currently unused */ -/* - * Check to see if it makes sense to do kluster/read ahead to - * addr + delta relative to the mapping at addr. We assume here - * that delta is a signed PAGESIZE'd multiple (which can be negative). - * - * For segxmem we always "approve" of this action from our standpoint. - */ -/*ARGSUSED*/ -static int -segxmem_kluster(struct seg *seg, caddr_t addr, ssize_t delta) -{ - return (0); -} - -static void -segxmem_badop() -{ - panic("segxmem_badop"); - /*NOTREACHED*/ -} - -#endif - -/* - * Special public segxmem operations - */ - - -void -segxmem_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) -{ - page_t *pp; - struct segxmem_data *sxd = (struct segxmem_data *)(seg->s_data); - struct vnode *vp = sxd->sxd_vp; - u_offset_t off = sxd->sxd_offset; - caddr_t eaddr; - - ASSERT(seg->s_as == &kas); - - panic("segxmem_pageunlock"); - - eaddr = addr + len; - addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); - - for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) { - hat_unlock(kas.a_hat, addr, PAGESIZE); - - /* - * Use page_find() instead of page_lookup() to - * find the page since we know that it has - * "exclusive" lock. - */ - pp = page_find(vp, off); - if (pp == NULL) - panic("segxmem_pageunlock"); - if (rw == S_WRITE) { - hat_setrefmod(pp); - } else if (rw != S_OTHER) { - hat_setref(pp); - } - - page_unlock(pp); - } -} - -/* - * segxmem_getmap allocates from the map an address range to map the vnode vp - * in the range <off, off + len). - * - * If pagecreate is nonzero, segxmem_getmap will create the page(s). - * calls hat_memload_array to load the translations. - * **ppa can be NULL if pagecreate is 0. - */ -caddr_t -segxmem_getmap(struct map *map, struct vnode *vp, u_offset_t off, size_t len, - page_t **ppa, enum seg_rw rw) -{ - caddr_t baseaddr; - uint_t attr = (rw == S_WRITE)?PROT_WRITE|PROT_READ:PROT_READ; - -#ifdef lint - vp = vp; - off = off; -#endif - - segxmemcnt.sx_getmapflt.value.ul++; - - baseaddr = (caddr_t)rmalloc_wait(map, len); - - hat_memload_array(kas.a_hat, baseaddr, len, ppa, attr | HAT_NOSYNC, - HAT_LOAD); - - return (baseaddr); -} - -void -segxmem_release(struct map *map, caddr_t addr, size_t len) -{ - - hat_unload(kas.a_hat, addr, len, HAT_UNLOAD_NOSYNC); - rmfree(map, len, (ulong_t)addr); -} - -int -segxmem_remap(struct seg *seg, struct vnode *vp, caddr_t addr, size_t len, - page_t ***ppa, uchar_t prot) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - uint_t blocknumber, lastblock, flags; - caddr_t taddr; - size_t tlen; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size || - (seg->s_ops != &segxmem_ops) || (sxd->sxd_vp != vp)) - return (1); /* Fail */ - - ASSERT(sxd->sxd_prot == prot); /* remove this later */ - - /* aligned addr and length */ - - blocknumber = (addr - seg->s_base) >> sxd->sxd_bshift; - lastblock = (addr + len - 1 - seg->s_base) >> sxd->sxd_bshift; - taddr = addr; - tlen = sxd->sxd_bsize; - while (blocknumber <= lastblock) { - - /* - * entire xmem segment mapped on mmap() call - if in the - * segment range(checked above), there should be a mapping - * therefore flags always HAT_LOAD_REMAP. - * - */ - if (hat_getpfnum(seg->s_as->a_hat, taddr) != PFN_INVALID) { -#ifdef DEBUG - if (remap_broken) - hat_unload(seg->s_as->a_hat, taddr, - tlen, HAT_UNLOAD); -#endif - - /* - * assume the hat would leave mapping HAT_LOAD_LOCK'ed - * on REMAP. - */ - flags = HAT_LOAD | HAT_LOAD_NOCONSIST | HAT_LOAD_REMAP; - } else { - XMEMPRINTF(4, - ("segxmem_remap: taddr %p pfn inv\n", - (void *)taddr)); - flags = HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST; - } - - prot |= HAT_NOSYNC; - - if (btop(sxd->sxd_bsize) == 1) - hat_memload_array(seg->s_as->a_hat, taddr, tlen, - (page_t **)ppa, prot, flags); - else - hat_memload_array(seg->s_as->a_hat, taddr, tlen, *ppa, - prot, flags); - - blocknumber++; - ppa++; - taddr += tlen; - } - return (0); -} - -/* ARGSUSED */ -static int -segxmem_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) -{ - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - - return (0); -} - -/* - * segxmem pages are always "in core" since the memory is locked down. - */ -/* ARGSUSED */ -static size_t -segxmem_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) -{ - - caddr_t eo_seg; - - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); -#ifdef lint - seg = seg; -#endif - - eo_seg = addr + len; - while (addr < eo_seg) { - /* page exist, and it's locked. */ - *vec++ = (char)0x9; - addr += PAGESIZE; - } - return (len); -} - -static int segxmem_advise(struct seg *seg, caddr_t addr, size_t len, - uint_t behav) -{ -#ifdef lint - seg = seg; - addr = addr; - len = len; - behav = behav; -#endif - return (0); -} - -/* - * called from as_ctl(, MC_LOCK,) - * - */ -/* ARGSUSED */ -static int -segxmem_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, - int op, ulong_t *lockmap, size_t pos) -{ - ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - /* - * for spt, as->a_paglck is never set - * so this routine should not be called. - */ - return (0); -} - -static int -segxmem_unmap(struct seg *seg, caddr_t addr, size_t ssize) -{ - struct segxmem_data *sxd, *nsxd; - struct seg *nseg; - caddr_t segend, delsegend; - - XMEMPRINTF(1, ("segxmem_unmap: seg %p addr %p size %lx\n", - (void *)seg, (void *)addr, ssize)); - - ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); - - hat_unload(seg->s_as->a_hat, addr, ssize, HAT_UNLOAD_UNLOCK); - if (addr == seg->s_base && ssize == seg->s_size) { - seg_free(seg); - return (0); - } - sxd = (struct segxmem_data *)seg->s_data; - - /* partial unmap of the segment - begin, end and middle */ - - /* check for deleting at the beginning */ - - if (addr == seg->s_base) { - seg->s_base += ssize; - seg->s_size -= ssize; - return (0); - } - delsegend = addr + ssize; - segend = seg->s_base + seg->s_size; - - /* check for deleting at the end */ - if (delsegend == segend) { - seg->s_size -= ssize; - return (0); - } - - /* Now for the tough one. Make a new one at end and cut the current */ - - seg->s_size = addr - seg->s_base; /* adjust original segment */ - - nseg = seg_alloc(seg->s_as, delsegend, segend - delsegend); - if (nseg == NULL) - panic("segxmem seg_alloc"); - - nsxd = kmem_zalloc(sizeof (struct segxmem_data), KM_SLEEP); - - nsxd->sxd_vp = sxd->sxd_vp; - nsxd->sxd_offset = sxd->sxd_offset; /* unused */ - nsxd->sxd_bsize = sxd->sxd_bsize; - nsxd->sxd_bshift = sxd->sxd_bshift; - nsxd->sxd_prot = sxd->sxd_prot; - nsxd->sxd_softlockcnt = sxd->sxd_softlockcnt; /* ### */ - - nseg->s_ops = &segxmem_ops; - nseg->s_data = (void *)nsxd; - - return (0); -} - -/* - * Dump the pages belonging to this segxmem segment. - */ -static void -segxmem_dump(struct seg *seg) -{ - struct segxmem_data *sxd; - caddr_t addr; - int i, j; - uint_t nblocks; - pgcnt_t npages; - - sxd = (struct segxmem_data *)seg->s_data; - nblocks = howmany(seg->s_size, sxd->sxd_bsize); - npages = nblocks << (sxd->sxd_bshift - PAGESHIFT); - addr = seg->s_base; - - /* XXX figure out if we need something else here */ - for (i = 0; i < nblocks; i++) { - pfn_t pfn = hat_getpfnum(seg->s_as->a_hat, addr); - - for (j = 0; j < npages; j++) { - dump_addpage(seg->s_as, addr, pfn); - pfn++; - addr += PAGESIZE; - } - } -} -/*ARGSUSED*/ -static int -segxmem_setpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) -{ - return (ENOTSUP); -} - -static int -segxmem_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) -{ - struct segxmem_data *sxd = (struct segxmem_data *)seg->s_data; - - memidp->val[0] = (uintptr_t)sxd->sxd_vp; - memidp->val[1] = sxd->sxd_offset + (uintptr_t)(addr - seg->s_base); - return (0); -} - -/*ARGSUSED*/ -static int -segxmem_pagelock(struct seg *seg, caddr_t addr, size_t len, - struct page ***ppp, enum lock_type type, enum seg_rw rw) -{ - return (ENOTSUP); -} - -#define XMEMBUFSZ 16384 -#define XMEMPAD 128 /* larger than max len xmem string */ - -char xmembuf[XMEMBUFSZ + XMEMPAD]; -uint_t xmembufi; -int xmemlevel = 4; - -void -xmemprintf(const char *fmt, ...) -{ - va_list args; - int len; - char localbuf[XMEMPAD]; - uint_t newval, oldxmembufi; - - va_start(args, fmt); - - len = snprintf(localbuf, INT_MAX, "%d: ", (int)CPU->cpu_id); - len += vsnprintf(localbuf + len, INT_MAX, fmt, args); - - ASSERT(len < XMEMPAD); - - do { - oldxmembufi = xmembufi; - newval = oldxmembufi + len; - if (newval > XMEMBUFSZ) - newval = 0; - } while (cas32(&xmembufi, oldxmembufi, newval) != oldxmembufi); - - bcopy(localbuf, xmembuf + oldxmembufi, len); - - va_end(args); -} diff --git a/usr/src/uts/intel/fs/xmemfs/xmem_dir.c b/usr/src/uts/intel/fs/xmemfs/xmem_dir.c deleted file mode 100644 index a0ccd4c92b..0000000000 --- a/usr/src/uts/intel/fs/xmemfs/xmem_dir.c +++ /dev/null @@ -1,1025 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/systm.h> -#include <sys/time.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/cred.h> -#include <sys/stat.h> -#include <sys/debug.h> -#include <sys/policy.h> -#include <sys/fs/xmem.h> - -static int xdircheckpath(struct xmemnode *, struct xmemnode *, struct cred *); -static int xdirrename(struct xmemnode *, struct xmemnode *, struct xmemnode *, - char *, struct xmemnode *, struct xdirent *, struct cred *); -static void xdirfixdotdot(struct xmemnode *, struct xmemnode *, - struct xmemnode *); -static int xdirmakexnode(struct xmemnode *, struct xmount *, - struct vattr *, enum de_op, struct xmemnode **, struct cred *); -static int xdiraddentry(struct xmemnode *, struct xmemnode *, char *, - enum de_op, struct xmemnode *); - - -#define X_HASH_SIZE 8192 /* must be power of 2 */ -#define X_MUTEX_SIZE 64 - -static struct xdirent *x_hashtable[X_HASH_SIZE]; -static kmutex_t x_hashmutex[X_MUTEX_SIZE]; - -#define X_HASH_INDEX(a) ((a) & (X_HASH_SIZE-1)) -#define X_MUTEX_INDEX(a) ((a) & (X_MUTEX_SIZE-1)) - -#define XMEMFS_HASH(xp, name, hash) \ - { \ - char Xc, *Xcp; \ - hash = ((uintptr_t)(xp)) >> 8; \ - for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \ - hash = (hash << 4) + hash + (uint_t)Xc; \ - } - -void -xmemfs_hash_init(void) -{ - int ix; - - for (ix = 0; ix < X_MUTEX_SIZE; ix++) - mutex_init(&x_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL); -} - -/* - * This routine is where the rubber meets the road for identities. - */ -static void -xmemfs_hash_in(struct xdirent *x) -{ - uint_t hash; - struct xdirent **prevpp; - kmutex_t *t_hmtx; - - XMEMFS_HASH(x->xd_parent, x->xd_name, hash); - x->xd_hash = hash; - prevpp = &x_hashtable[X_HASH_INDEX(hash)]; - t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; - mutex_enter(t_hmtx); - x->xd_link = *prevpp; - *prevpp = x; - mutex_exit(t_hmtx); -} - -/* - * Remove xdirent *t from the hash list. - */ -static void -xmemfs_hash_out(struct xdirent *x) -{ - uint_t hash; - struct xdirent **prevpp; - kmutex_t *t_hmtx; - - hash = x->xd_hash; - prevpp = &x_hashtable[X_HASH_INDEX(hash)]; - t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; - mutex_enter(t_hmtx); - while (*prevpp != x) - prevpp = &(*prevpp)->xd_link; - *prevpp = x->xd_link; - mutex_exit(t_hmtx); -} - -static struct xdirent * -xmemfs_hash_lookup(char *name, struct xmemnode *parent, uint_t hold, - struct xmemnode **found) -{ - struct xdirent *l; - uint_t hash; - kmutex_t *t_hmtx; - struct xmemnode *xp; - - XMEMFS_HASH(parent, name, hash); - t_hmtx = &x_hashmutex[X_MUTEX_INDEX(hash)]; - mutex_enter(t_hmtx); - l = x_hashtable[X_HASH_INDEX(hash)]; - while (l) { - if ((l->xd_hash == hash) && - (l->xd_parent == parent) && - (strcmp(l->xd_name, name) == 0)) { - /* - * We need to make sure that the xmemnode that - * we put a hold on is the same one that we pass back. - * Hence, temporary variable xp is necessary. - * The right way to fix this would be to add the t_hmtx - * lock acquisition to callers like tdirrename, so - * that this race condition doesn't occur. But - * this "fix" is simpler, and less of a performance - * impact. - */ - xp = l->xd_xmemnode; - if (hold) { - ASSERT(xp); - xmemnode_hold(xp); - } - if (found) - *found = xp; - mutex_exit(t_hmtx); - return (l); - } else { - l = l->xd_link; - } - } - mutex_exit(t_hmtx); - return (NULL); -} - -/* - * Search directory 'parent' for entry 'name'. - * - * The calling thread can't hold the write version - * of the rwlock for the directory being searched - * - * 0 is returned on success and *foundxp points - * to the found xmemnode with its vnode held. - */ -int -xdirlookup( - struct xmemnode *parent, - char *name, - struct xmemnode **foundxp, - struct cred *cred) -{ - int error; - - *foundxp = NULL; - if (parent->xn_type != VDIR) - return (ENOTDIR); - - if ((error = xmem_xaccess(parent, VEXEC, cred))) - return (error); - - if (*name == '\0') { - xmemnode_hold(parent); - *foundxp = parent; - return (0); - } - - /* - * Search the directory for the matching name - * We need the lock protecting the xn_dir list - * so that it doesn't change out from underneath us. - * xmemfs_hash_lookup() will pass back the xmemnode - * with a hold on it. - */ - - if (xmemfs_hash_lookup(name, parent, 1, foundxp) != NULL) { - ASSERT(*foundxp); - return (0); - } - - return (ENOENT); -} - -/* - * Enter a directory entry for 'name' and 'xp' into directory 'dir' - * - * Returns 0 on success. - */ -int -xdirenter( - struct xmount *xm, - struct xmemnode *dir, /* target directory to make entry in */ - char *name, /* name of entry */ - enum de_op op, /* entry operation */ - struct xmemnode *fromparent, /* source directory if rename */ - struct xmemnode *xp, /* source xmemnode, if link/rename */ - struct vattr *va, - struct xmemnode **xpp, /* return xmemnode, if create/mkdir */ - struct cred *cred) -{ - struct xdirent *xdp; - struct xmemnode *found = NULL; - int error = 0; - char *s; - - /* - * xn_rwlock is held to serialize direnter and dirdeletes - */ - ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - /* - * Don't allow '/' characters in pathname component - * (thus in ufs_direnter()). - */ - for (s = name; *s; s++) - if (*s == '/') - return (EACCES); - - ASSERT(name[0] != '\0'); - - /* - * For link and rename lock the source entry and check the link count - * to see if it has been removed while it was unlocked. - */ - if (op == DE_LINK || op == DE_RENAME) { - mutex_enter(&xp->xn_tlock); - if (xp->xn_nlink == 0) { - mutex_exit(&xp->xn_tlock); - return (ENOENT); - } - - if (xp->xn_nlink == MAXLINK) { - mutex_exit(&xp->xn_tlock); - return (EMLINK); - } - xp->xn_nlink++; - mutex_exit(&xp->xn_tlock); - gethrestime(&xp->xn_ctime); - } - - /* - * This might be a "dangling detached directory". - * it could have been removed, but a reference - * to it kept in u_cwd. don't bother searching - * it, and with any luck the user will get tired - * of dealing with us and cd to some absolute - * pathway. *sigh*, thus in ufs, too. - */ - if (dir->xn_nlink == 0) { - error = ENOENT; - goto out; - } - - /* - * If this is a rename of a directory and the parent is - * different (".." must be changed), then the source - * directory must not be in the directory hierarchy - * above the target, as this would orphan everything - * below the source directory. - */ - if (op == DE_RENAME) { - if (xp == dir) { - error = EINVAL; - goto out; - } - if (xp->xn_type == VDIR) { - if ((fromparent != dir) && - (error = xdircheckpath(xp, dir, cred))) { - goto out; - } - } - } - - /* - * Search for the entry. Return "found" if it exists. - */ - xdp = xmemfs_hash_lookup(name, dir, 1, &found); - - if (xdp) { - ASSERT(found); - switch (op) { - case DE_CREATE: - case DE_MKDIR: - if (xpp) { - *xpp = found; - error = EEXIST; - } else { - xmemnode_rele(found); - } - break; - - case DE_RENAME: - error = xdirrename(fromparent, xp, - dir, name, found, xdp, cred); - xmemnode_rele(found); - break; - - case DE_LINK: - /* - * Can't link to an existing file. - */ - error = EEXIST; - xmemnode_rele(found); - break; - } - } else { - - /* - * The entry does not exist. Check write permission in - * directory to see if entry can be created. - */ - if (error = xmem_xaccess(dir, VWRITE, cred)) - goto out; - if (op == DE_CREATE || op == DE_MKDIR) { - /* - * Make new xmemnode and directory entry as required. - */ - error = xdirmakexnode(dir, xm, va, op, &xp, cred); - if (error) - goto out; - } - if (error = xdiraddentry(dir, xp, name, op, fromparent)) { - if (op == DE_CREATE || op == DE_MKDIR) { - /* - * Unmake the inode we just made. - */ - rw_enter(&xp->xn_rwlock, RW_WRITER); - if ((xp->xn_type) == VDIR) { - ASSERT(xdp == NULL); - /* - * cleanup allocs made by xdirinit() - */ - xdirtrunc(xp); - } - mutex_enter(&xp->xn_tlock); - xp->xn_nlink = 0; - mutex_exit(&xp->xn_tlock); - gethrestime(&xp->xn_ctime); - rw_exit(&xp->xn_rwlock); - xmemnode_rele(xp); - xp = NULL; - } - } else if (xpp) { - *xpp = xp; - } else if (op == DE_CREATE || op == DE_MKDIR) { - xmemnode_rele(xp); - } - } -out: - if (error && (op == DE_LINK || op == DE_RENAME)) { - /* - * Undo bumped link count. - */ - DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); - gethrestime(&xp->xn_ctime); - } - return (error); -} - -/* - * Delete entry xp of name "nm" from dir. - * Free dir entry space and decrement link count on xmemnode(s). - * - * Return 0 on success. - */ -int -xdirdelete( - struct xmemnode *dir, - struct xmemnode *xp, - char *nm, - enum dr_op op, - struct cred *cred) -{ - register struct xdirent *tpdp; - int error; - size_t namelen; - struct xmemnode *xptmp; - timestruc_t now; - - ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - ASSERT(nm[0] != '\0'); - - /* - * return error when removing . and .. - */ - if (nm[0] == '.') { - if (nm[1] == '\0') - return (EINVAL); - if (nm[1] == '.' && nm[2] == '\0') - return (EEXIST); /* thus in ufs */ - } - - if (error = xmem_xaccess(dir, VEXEC|VWRITE, cred)) - return (error); - - /* - * If the parent directory is "sticky", then the user must - * own the parent directory or the file in it, or else must - * have permission to write the file. Otherwise it may not - * be deleted (except by privileged users). Same as ufs_dirremove. - */ - if (error = xmem_sticky_remove_access(dir, xp, cred)) - return (error); - - if (dir->xn_dir == NULL) - return (ENOENT); - - tpdp = xmemfs_hash_lookup(nm, dir, 0, &xptmp); - if (tpdp == NULL) { - /* - * If it is gone, some other thread got here first! - * Return error ENOENT. - */ - return (ENOENT); - } - - /* - * If the xmemnode in the xdirent changed, we were probably - * the victim of a concurrent rename operation. The original - * is gone, so return that status (same as UFS). - */ - if (xp != xptmp) - return (ENOENT); - - xmemfs_hash_out(tpdp); - - /* - * Take tpdp out of the directory list. - */ - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - if (tpdp->xd_prev) { - tpdp->xd_prev->xd_next = tpdp->xd_next; - } - if (tpdp->xd_next) { - tpdp->xd_next->xd_prev = tpdp->xd_prev; - } - - /* - * If the roving slot pointer happens to match tpdp, - * point it at the previous dirent. - */ - if (dir->xn_dir->xd_prev == tpdp) { - dir->xn_dir->xd_prev = tpdp->xd_prev; - } - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - - /* - * tpdp points to the correct directory entry - */ - namelen = strlen(tpdp->xd_name) + 1; - - xmem_memfree(tpdp, sizeof (struct xdirent) + namelen); - dir->xn_size -= (sizeof (struct xdirent) + namelen); - dir->xn_dirents--; - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - xp->xn_ctime = now; - - ASSERT(xp->xn_nlink > 0); - DECR_COUNT(&xp->xn_nlink, &xp->xn_tlock); - if (op == DR_RMDIR && xp->xn_type == VDIR) { - xdirtrunc(xp); - ASSERT(xp->xn_nlink == 0); - } - return (0); -} - -/* - * xdirinit is used internally to initialize a directory (dir) - * with '.' and '..' entries without checking permissions and locking - */ -void -xdirinit( - struct xmemnode *parent, /* parent of directory to initialize */ - struct xmemnode *dir) /* the new directory */ -{ - struct xdirent *dot, *dotdot; - timestruc_t now; - - ASSERT(RW_WRITE_HELD(&parent->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - dot = xmem_memalloc(sizeof (struct xdirent) + 2, 1); - dotdot = xmem_memalloc(sizeof (struct xdirent) + 3, 1); - - /* - * Initialize the entries - */ - dot->xd_xmemnode = dir; - dot->xd_offset = 0; - dot->xd_name = (char *)dot + sizeof (struct xdirent); - dot->xd_name[0] = '.'; - dot->xd_parent = dir; - xmemfs_hash_in(dot); - - dotdot->xd_xmemnode = parent; - dotdot->xd_offset = 1; - dotdot->xd_name = (char *)dotdot + sizeof (struct xdirent); - dotdot->xd_name[0] = '.'; - dotdot->xd_name[1] = '.'; - dotdot->xd_parent = dir; - xmemfs_hash_in(dotdot); - - /* - * Initialize directory entry list. - */ - dot->xd_next = dotdot; - dot->xd_prev = dotdot; /* dot's xd_prev holds roving slot pointer */ - dotdot->xd_next = NULL; - dotdot->xd_prev = dot; - INCR_COUNT(&parent->xn_nlink, &parent->xn_tlock); - - dir->xn_dir = dot; - dir->xn_size = 2 * sizeof (struct xdirent) + 5; /* dot and dotdot */ - dir->xn_dirents = 2; - dir->xn_nlink = 2; /* one for daddy, and one just for being me */ - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - parent->xn_ctime = now; -} - -/* - * xdirtrunc is called to remove all directory entries under this directory. - * The files themselves are removed elsewhere. - */ -void -xdirtrunc(struct xmemnode *dir) -{ - register struct xdirent *xdp; - size_t namelen; - timestruc_t now; - - ASSERT(RW_WRITE_HELD(&dir->xn_rwlock)); - ASSERT(dir->xn_type == VDIR); - - for (xdp = dir->xn_dir; xdp; xdp = dir->xn_dir) { - ASSERT(xdp->xd_next != xdp); - ASSERT(xdp->xd_prev != xdp); - ASSERT(xdp->xd_xmemnode); - ASSERT(xdp->xd_xmemnode->xn_nlink > 0); - - dir->xn_dir = xdp->xd_next; - namelen = strlen(xdp->xd_name) + 1; - - DECR_COUNT(&xdp->xd_xmemnode->xn_nlink, - &xdp->xd_xmemnode->xn_tlock); - - xmemfs_hash_out(xdp); - - xmem_memfree(xdp, sizeof (struct xdirent) + namelen); - dir->xn_size -= (sizeof (struct xdirent) + namelen); - dir->xn_dirents--; - } - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - - ASSERT(dir->xn_dir == NULL); - ASSERT(dir->xn_size == 0); - ASSERT(dir->xn_dirents == 0); -} - -/* - * Check if the source directory is in the path of the target directory. - * The target directory is locked by the caller. - */ -static int -xdircheckpath( - struct xmemnode *fromxp, - struct xmemnode *toparent, - struct cred *cred) -{ - int error = 0; - struct xmemnode *dir, *dotdot; - struct xdirent *xdp; - - ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); - - xdp = xmemfs_hash_lookup("..", toparent, 1, &dotdot); - if (xdp == NULL) - return (ENOENT); - - ASSERT(dotdot); - - if (dotdot == toparent) { - /* root of fs. search trivially satisfied. */ - xmemnode_rele(dotdot); - return (0); - } - for (;;) { - /* - * Return error for cases like "mv c c/d", - * "mv c c/d/e" and so on. - */ - if (dotdot == fromxp) { - xmemnode_rele(dotdot); - error = EINVAL; - break; - } - dir = dotdot; - error = xdirlookup(dir, "..", &dotdot, cred); - if (error) { - xmemnode_rele(dir); - break; - } - /* - * We're okay if we traverse the directory tree up to - * the root directory and don't run into the - * parent directory. - */ - if (dir == dotdot) { - xmemnode_rele(dir); - xmemnode_rele(dotdot); - break; - } - xmemnode_rele(dir); - } - return (error); -} - -static int -xdirrename( - struct xmemnode *fromparent, /* parent directory of source */ - struct xmemnode *fromxp, /* source xmemnode */ - struct xmemnode *toparent, /* parent directory of target */ - char *nm, /* entry we are trying to change */ - struct xmemnode *to, /* target xmemnode */ - struct xdirent *where, /* target xmemnode directory entry */ - struct cred *cred) /* credentials */ -{ - int error = 0; - int doingdirectory; - timestruc_t now; - -#if defined(lint) - nm = nm; -#endif - ASSERT(RW_WRITE_HELD(&toparent->xn_rwlock)); - - rw_enter(&fromxp->xn_rwlock, RW_READER); - rw_enter(&to->xn_rwlock, RW_READER); - - /* - * Check that everything is on the same filesystem. - */ - if (to->xn_vnode->v_vfsp != toparent->xn_vnode->v_vfsp || - to->xn_vnode->v_vfsp != fromxp->xn_vnode->v_vfsp) { - error = EXDEV; - goto out; - } - - /* - * Short circuit rename of something to itself. - */ - if (fromxp == to) { - error = ESAME; /* special KLUDGE error code */ - goto out; - } - - /* - * Must have write permission to rewrite target entry. - */ - if (error = xmem_xaccess(fromparent, VWRITE, cred)) - goto out; - - /* - * If the parent directory is "sticky", then the user must own - * either the parent directory or the destination of the rename, - * or else must have permission to write the destination. - * Otherwise the destination may not be changed (except by the - * privileged users). This implements append-only directories. - */ - if (error = xmem_sticky_remove_access(toparent, to, cred)) - goto out; - - /* - * Ensure source and target are compatible (both directories - * or both not directories). If target is a directory it must - * be empty and have no links to it; in addition it must not - * be a mount point, and both the source and target must be - * writable. - */ - doingdirectory = (fromxp->xn_type == VDIR); - if (to->xn_type == VDIR) { - if (!doingdirectory) { - error = EISDIR; - goto out; - } - /* - * vn_vfswlock will prevent mounts from using the directory - * until we are done. - */ - if (vn_vfswlock(XNTOV(to))) { - error = EBUSY; - goto out; - } - if (vn_mountedvfs(XNTOV(to)) != NULL) { - vn_vfsunlock(XNTOV(to)); - error = EBUSY; - goto out; - } - - mutex_enter(&to->xn_tlock); - if (to->xn_dirents > 2 || to->xn_nlink > 2) { - mutex_exit(&to->xn_tlock); - vn_vfsunlock(XNTOV(to)); - error = EEXIST; /* SIGH should be ENOTEMPTY */ - /* - * Update atime because checking xn_dirents is - * logically equivalent to reading the directory - */ - gethrestime(&to->xn_atime); - goto out; - } - mutex_exit(&to->xn_tlock); - } else if (doingdirectory) { - error = ENOTDIR; - goto out; - } - - where->xd_xmemnode = fromxp; - gethrestime(&now); - toparent->xn_mtime = now; - toparent->xn_ctime = now; - - /* - * Upgrade to write lock on "to" (i.e., the target xmemnode). - */ - rw_exit(&to->xn_rwlock); - rw_enter(&to->xn_rwlock, RW_WRITER); - - /* - * Decrement the link count of the target xmemnode. - */ - DECR_COUNT(&to->xn_nlink, &to->xn_tlock); - to->xn_ctime = now; - - if (doingdirectory) { - /* - * The entry for "to" no longer exists so release the vfslock. - */ - vn_vfsunlock(XNTOV(to)); - - /* - * Decrement the target link count and delete all entires. - */ - xdirtrunc(to); - ASSERT(to->xn_nlink == 0); - - /* - * Renaming a directory with the parent different - * requires that ".." be rewritten. The window is - * still there for ".." to be inconsistent, but this - * is unavoidable, and a lot shorter than when it was - * done in a user process. - */ - if (fromparent != toparent) - xdirfixdotdot(fromxp, fromparent, toparent); - } -out: - rw_exit(&to->xn_rwlock); - rw_exit(&fromxp->xn_rwlock); - return (error); -} - -static void -xdirfixdotdot( - struct xmemnode *fromxp, /* child directory */ - struct xmemnode *fromparent, /* old parent directory */ - struct xmemnode *toparent) /* new parent directory */ -{ - struct xdirent *dotdot; - - ASSERT(RW_LOCK_HELD(&toparent->xn_rwlock)); - - /* - * Increment the link count in the new parent xmemnode - */ - INCR_COUNT(&toparent->xn_nlink, &toparent->xn_tlock); - gethrestime(&toparent->xn_ctime); - - dotdot = xmemfs_hash_lookup("..", fromxp, 0, NULL); - - ASSERT(dotdot->xd_xmemnode == fromparent); - dotdot->xd_xmemnode = toparent; - - /* - * Decrement the link count of the old parent xmemnode. - * If fromparent is NULL, then this is a new directory link; - * it has no parent, so we need not do anything. - */ - if (fromparent != NULL) { - mutex_enter(&fromparent->xn_tlock); - if (fromparent->xn_nlink != 0) { - fromparent->xn_nlink--; - gethrestime(&fromparent->xn_ctime); - } - mutex_exit(&fromparent->xn_tlock); - } -} - -static int -xdiraddentry( - struct xmemnode *dir, /* target directory to make entry in */ - struct xmemnode *xp, /* new xmemnode */ - char *name, - enum de_op op, - struct xmemnode *fromxp) -{ - struct xdirent *xdp, *tpdp; - size_t namelen, alloc_size; - timestruc_t now; - - /* - * Make sure the parent directory wasn't removed from - * underneath the caller. - */ - if (dir->xn_dir == NULL) - return (ENOENT); - - /* - * Check that everything is on the same filesystem. - */ - if (xp->xn_vnode->v_vfsp != dir->xn_vnode->v_vfsp) - return (EXDEV); - - /* - * Allocate and initialize directory entry - */ - namelen = strlen(name) + 1; - alloc_size = namelen + sizeof (struct xdirent); - xdp = xmem_memalloc(alloc_size, 0); - if (xdp == NULL) - return (ENOSPC); - - if ((op == DE_RENAME) && (xp->xn_type == VDIR)) - xdirfixdotdot(xp, fromxp, dir); - - dir->xn_size += alloc_size; - dir->xn_dirents++; - xdp->xd_xmemnode = xp; - xdp->xd_parent = dir; - - /* - * The directory entry and its name were allocated sequentially. - */ - xdp->xd_name = (char *)xdp + sizeof (struct xdirent); - (void) strcpy(xdp->xd_name, name); - - xmemfs_hash_in(xdp); - - /* - * Some utilities expect the size of a directory to remain - * somewhat static. For example, a routine which unlinks - * files between calls to readdir(); the size of the - * directory changes from underneath it and so the real - * directory offset in bytes is invalid. To circumvent - * this problem, we initialize a directory entry with an - * phony offset, and use this offset to determine end of - * file in xmem_readdir. - */ - tpdp = dir->xn_dir->xd_prev; - /* - * Install at first empty "slot" in directory list. - */ - while (tpdp->xd_next != NULL && (tpdp->xd_next->xd_offset - - tpdp->xd_offset) <= 1) { - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - ASSERT(tpdp->xd_next->xd_offset > tpdp->xd_offset); - tpdp = tpdp->xd_next; - } - xdp->xd_offset = tpdp->xd_offset + 1; - - /* - * If we're at the end of the dirent list and the offset (which - * is necessarily the largest offset in this directory) is more - * than twice the number of dirents, that means the directory is - * 50% holes. At this point we reset the slot pointer back to - * the beginning of the directory so we start using the holes. - * The idea is that if there are N dirents, there must also be - * N holes, so we can satisfy the next N creates by walking at - * most 2N entries; thus the average cost of a create is constant. - * Note that we use the first dirent's xd_prev as the roving - * slot pointer; it's ugly, but it saves a word in every dirent. - */ - if (tpdp->xd_next == NULL && tpdp->xd_offset > 2 * dir->xn_dirents) - dir->xn_dir->xd_prev = dir->xn_dir->xd_next; - else - dir->xn_dir->xd_prev = xdp; - - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - - xdp->xd_next = tpdp->xd_next; - if (xdp->xd_next) { - xdp->xd_next->xd_prev = xdp; - } - xdp->xd_prev = tpdp; - tpdp->xd_next = xdp; - - ASSERT(xdp->xd_next != xdp); - ASSERT(xdp->xd_prev != xdp); - ASSERT(tpdp->xd_next != tpdp); - ASSERT(tpdp->xd_prev != tpdp); - - gethrestime(&now); - dir->xn_mtime = now; - dir->xn_ctime = now; - - return (0); -} - -static int -xdirmakexnode( - struct xmemnode *dir, - struct xmount *xm, - struct vattr *va, - enum de_op op, - struct xmemnode **newnode, - struct cred *cred) -{ - struct xmemnode *xp; - enum vtype type; - - ASSERT(va != NULL); - ASSERT(op == DE_CREATE || op == DE_MKDIR); - if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) || - ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) - return (EOVERFLOW); - type = va->va_type; - xp = xmem_memalloc(sizeof (struct xmemnode), 1); - xp->xn_vnode = vn_alloc(KM_SLEEP); - xmemnode_init(xm, xp, va, cred); - if (type == VBLK || type == VCHR) { - xp->xn_vnode->v_rdev = xp->xn_rdev = va->va_rdev; - } else { - xp->xn_vnode->v_rdev = xp->xn_rdev = NODEV; - } - xp->xn_vnode->v_type = type; - xp->xn_uid = crgetuid(cred); - - /* - * To determine the group-id of the created file: - * 1) If the gid is set in the attribute list (non-Sun & pre-4.0 - * clients are not likely to set the gid), then use it if - * the process is privileged, belongs to the target group, - * or the group is the same as the parent directory. - * 2) If the filesystem was not mounted with the Old-BSD-compatible - * GRPID option, and the directory's set-gid bit is clear, - * then use the process's gid. - * 3) Otherwise, set the group-id to the gid of the parent directory. - */ - if ((va->va_mask & AT_GID) && - ((va->va_gid == dir->xn_gid) || groupmember(va->va_gid, cred) || - secpolicy_vnode_create_gid(cred) == 0)) { - xp->xn_gid = va->va_gid; - } else { - if (dir->xn_mode & VSGID) - xp->xn_gid = dir->xn_gid; - else - xp->xn_gid = crgetgid(cred); - } - /* - * If we're creating a directory, and the parent directory has the - * set-GID bit set, set it on the new directory. - * Otherwise, if the user is neither privileged nor a member of the - * file's new group, clear the file's set-GID bit. - */ - if (dir->xn_mode & VSGID && type == VDIR) - xp->xn_mode |= VSGID; - else if ((xp->xn_mode & VSGID) && - secpolicy_vnode_setids_setgids(cred, xp->xn_gid) != 0) - xp->xn_mode &= ~VSGID; - - if (va->va_mask & AT_ATIME) - xp->xn_atime = va->va_atime; - if (va->va_mask & AT_MTIME) - xp->xn_mtime = va->va_mtime; - - if (op == DE_MKDIR) - xdirinit(dir, xp); - - *newnode = xp; - return (0); -} diff --git a/usr/src/uts/intel/fs/xmemfs/xmem_subr.c b/usr/src/uts/intel/fs/xmemfs/xmem_subr.c deleted file mode 100644 index fd8f80e82f..0000000000 --- a/usr/src/uts/intel/fs/xmemfs/xmem_subr.c +++ /dev/null @@ -1,566 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/param.h> -#include <sys/t_lock.h> -#include <sys/systm.h> -#include <sys/sysmacros.h> -#include <sys/debug.h> -#include <sys/time.h> -#include <sys/cmn_err.h> -#include <sys/vnode.h> -#include <sys/vfs.h> -#include <sys/cred.h> -#include <sys/kmem.h> -#include <sys/stat.h> -#include <sys/mode.h> -#include <vm/hat.h> -#include <vm/seg_map.h> -#include <vm/seg_kmem.h> -#include <vm/pvn.h> -#include <vm/page.h> -#include <sys/atomic.h> -#include <sys/policy.h> -#include <sys/fs/xmem.h> - - -extern void *xpgget(struct xmount *); -extern void xpgput(struct xmount *, void *); - -#define MODESHIFT 3 - -size_t xmemfs_maxkmem = 32768; -size_t xmemfs_kmemcnt; - -int -xmem_xaccess(void *vxp, int mode, struct cred *cred) -{ - struct xmemnode *xp = vxp; - int shift = 0; - /* - * Check access based on owner, group and - * public permissions in xmemnode. - */ - if (crgetuid(cred) != xp->xn_uid) { - shift += MODESHIFT; - if (groupmember(xp->xn_gid, cred) == 0) - shift += MODESHIFT; - } - - mode &= ~(xp->xn_mode << shift); - - if (mode == 0) - return (0); - - return (secpolicy_vnode_access(cred, XNTOV(xp), xp->xn_uid, mode)); -} - -/* - * Decide whether it is okay to remove within a sticky directory. - * Two conditions need to be met: write access to the directory - * is needed. In sticky directories, write access is not sufficient; - * you can remove entries from a directory only if you own the directory, - * if you are privileged, if you own the entry or if they entry is - * a plain file and you have write access to that file. - * Function returns 0 if remove access is granted. - */ -int -xmem_sticky_remove_access(struct xmemnode *dir, struct xmemnode *entry, - struct cred *cr) -{ - uid_t uid; - - if ((dir->xn_mode & S_ISVTX) && - (uid = crgetuid(cr)) != dir->xn_uid && - uid != entry->xn_uid && - (entry->xn_type != VREG || - xmem_xaccess(entry, VWRITE, cr) != 0)) - return (secpolicy_vnode_remove(cr)); - return (0); -} - -/* - * Allocate zeroed memory if xmemfs_maxkmem has not been exceeded - * or the 'musthave' flag is set. 'musthave' allocations should - * always be subordinate to normal allocations so that xmemfs_maxkmem - * can't be exceeded by more than a few KB. Example: when creating - * a new directory, the xmemnode is a normal allocation; if that - * succeeds, the dirents for "." and ".." are 'musthave' allocations. - */ -void * -xmem_memalloc(size_t size, int musthave) -{ - void *ptr = NULL; - - if (musthave) { - atomic_add_long(&xmemfs_kmemcnt, size); - ptr = kmem_zalloc(size, KM_SLEEP); - } else if (xmemfs_kmemcnt + size < xmemfs_maxkmem) { - /* - * kmemcnt may have increased since above check so a little - * more than xmemfs_maxkmem may be allocated. - */ - ptr = kmem_zalloc(size, KM_NOSLEEP); - if (ptr) - atomic_add_long(&xmemfs_kmemcnt, size); - } - return (ptr); -} - -void -xmem_memfree(void *cp, size_t size) -{ - extern size_t xmemfs_kmemcnt; - - kmem_free(cp, size); - atomic_add_long(&xmemfs_kmemcnt, -size); -} - -/* add to the number of pages we have created */ - -int -xmem_mem_add(struct xmount *xm, size_t size) -{ - mutex_enter(&xm->xm_contents); - - /* allocate the last available block */ - if ((xm->xm_mem + size) > xm->xm_max) { - mutex_exit(&xm->xm_contents); - return (1); - } - xm->xm_mem += size; - mutex_exit(&xm->xm_contents); - return (0); -} - -/* sub to the number of pages we have created */ - -static void -xmem_mem_sub(struct xmount *xm, size_t size) -{ - mutex_enter(&xm->xm_contents); - xm->xm_mem -= size; - mutex_exit(&xm->xm_contents); -} - -/* - * xmem_acquire_pages: returns an array of size btop(xm_bsize) page pointers - * or xm_bsize bytes. - * - * If large page, the array will contain 1024 entries (4MB) or 512 entries. - * - * If not large page, there is no array as a page_t * is returned. - */ - -static page_t ** -xmem_acquire_pages(struct xmount *xm, struct vnode *vp, offset_t off) -{ - page_t **ppa, *pp, *pplist; - uint_t pindex; - size_t bsize; - struct seg tmpseg; - - bsize = xm->xm_bsize; - - if (xmem_mem_add(xm, 1)) - return (NULL); - - if (xm->xm_flags & XARGS_RESERVEMEM) { - - mutex_enter(&xm->xm_contents); - ppa = xpgget(xm); - mutex_exit(&xm->xm_contents); - - if (xm->xm_ppb == 1) { - /* ppa is a direct page pointer */ - - if (!page_hashin((page_t *)ppa, vp, off, NULL)) { - panic("xmem_acquire_pages: hashin failed" - " %p %llx", (void *)vp, off); - } - pindex = xm->xm_ppb; /* bypass for loop */ - } else { - pindex = 0; - } - - for (; pindex < xm->xm_ppb; pindex++, off += PAGESIZE) { - pp = ppa[pindex]; - if (!page_hashin(pp, vp, off, NULL)) { - panic("xmem_acquire_pages: hashin failed" - " %p %p %llx", (void *)pp, (void *)vp, off); - } - } - return (ppa); - } - bzero(&tmpseg, sizeof (struct seg)); - tmpseg.s_as = &kas; - - if ((freemem - xm->xm_ppb) < xmemfs_minfree || - page_resv(xm->xm_ppb, KM_NOSLEEP) == 0) { - - cmn_err(CE_WARN, "%s: File system full, no memory", - xm->xm_mntpath); - return (NULL); - } - - (void) page_create_wait(xm->xm_ppb, PG_WAIT); - - pplist = page_get_freelist(vp, off, &tmpseg, - (caddr_t)(uintptr_t)off, bsize, 0, NULL); - if (pplist == NULL && xm->xm_ppb == 1) { - pplist = page_get_cachelist(vp, off, &tmpseg, - (caddr_t)(uintptr_t)off, 0, NULL); - } - if (pplist == NULL) { - page_create_putback(xm->xm_ppb); - page_unresv(xm->xm_ppb); - return (NULL); - } - if (PP_ISAGED(pplist) == 0) { - ASSERT(xm->xm_ppb == 1); - page_hashout(pplist, NULL); - } - - if (xm->xm_ppb > 1) - ppa = kmem_alloc(sizeof (*ppa) * xm->xm_ppb, KM_SLEEP); - - for (pindex = 0; pindex < xm->xm_ppb; pindex++, off += PAGESIZE) { - pp = pplist; - page_sub(&pplist, pp); - ASSERT(PAGE_EXCL(pp)); - ASSERT(pp->p_vnode == NULL); - ASSERT(!hat_page_is_mapped(pp)); - PP_CLRFREE(pp); - PP_CLRAGED(pp); - - if (xm->xm_ppb == 1) - ppa = (page_t **)pp; - else - ppa[pindex] = pp; - - if (!page_hashin(pp, vp, off, NULL)) { - panic("xmem_acquire_pages: hashin failed" - " %p %p %llx", (void *)pp, (void *)vp, off); - } - page_downgrade(pp); /* XXX */ - } - return (ppa); -} - -static void -xmem_release_pages(struct xmount *xm, page_t **ppa) -{ - uint_t pindex; - page_t *pp; - - xmem_mem_sub(xm, 1); - - if (xm->xm_flags & XARGS_RESERVEMEM) { - - /* - * if ppb == 1 and to lessen the load on kmem memory in - * having to allocate a million 4 byte pointers for a - * 4 GB file system, ppa is actually a page_t * - */ - - if (xm->xm_ppb == 1) { - page_hashout((page_t *)ppa, NULL); - pindex = xm->xm_ppb; /* bypass for loop */ - } else - pindex = 0; - - for (; pindex < xm->xm_ppb; pindex++) { - pp = ppa[pindex]; - page_hashout(pp, NULL); - } - mutex_enter(&xm->xm_contents); - xpgput(xm, ppa); - mutex_exit(&xm->xm_contents); - - } else { - int flag = B_INVAL; - - if (xm->xm_ppb == 1) { - VN_DISPOSE((page_t *)ppa, flag, 0, kcred); - } else { - - for (pindex = 0; pindex < xm->xm_ppb; pindex++) - VN_DISPOSE(ppa[pindex], flag, 0, kcred); - - kmem_free(ppa, sizeof (*ppa) * xm->xm_ppb); - } - page_unresv(xm->xm_ppb); - } -} - -/* - * Initialize a xmemnode and add it to file list under mount point. - */ -void -xmemnode_init(struct xmount *xm, struct xmemnode *xp, - vattr_t *vap, cred_t *cred) -{ - struct vnode *vp; - timestruc_t now; - - ASSERT(vap != NULL); - ASSERT(cred != NULL); - - rw_init(&xp->xn_rwlock, NULL, RW_DEFAULT, NULL); - mutex_init(&xp->xn_tlock, NULL, MUTEX_DEFAULT, NULL); - xp->xn_mode = MAKEIMODE(vap->va_type, vap->va_mode); - - if (S_ISREG(xp->xn_mode)) - xp->xn_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); - - xp->xn_mask = 0; - xp->xn_type = vap->va_type; - xp->xn_nodeid = (ino64_t)(uint32_t)((uintptr_t)xp >> 3); - xp->xn_nlink = 1; - xp->xn_size = 0; - xp->xn_uid = crgetuid(cred); - xp->xn_gid = crgetgid(cred); - - xp->xn_fsid = xm->xm_dev; - xp->xn_rdev = vap->va_rdev; - xp->xn_blksize = PAGESIZE; - xp->xn_nblocks = 0; - gethrestime(&now); - xp->xn_atime = now; - xp->xn_mtime = now; - xp->xn_ctime = now; - xp->xn_seq = 0; - xp->xn_dir = NULL; - - vp = XNTOV(xp); - vn_reinit(vp); - vn_setops(vp, xmem_vnodeops); - vp->v_vfsp = xm->xm_vfsp; - vp->v_type = vap->va_type; - vp->v_rdev = vap->va_rdev; - vp->v_data = (caddr_t)xp; - - mutex_enter(&xm->xm_contents); - /* - * Increment the pseudo generation number for this xmemnode. - * Since xmemnodes are allocated and freed, there really is no - * particular generation number for a new xmemnode. Just fake it - * by using a counter in each file system. - */ - xp->xn_gen = xm->xm_gen++; - - /* - * Add new xmemnode to end of linked list of xmemnodes for this xmemfs - * Root directory is handled specially in xmem_mount. - */ - if (xm->xm_rootnode != (struct xmemnode *)NULL) { - xp->xn_forw = NULL; - xp->xn_back = xm->xm_rootnode->xn_back; - xp->xn_back->xn_forw = xm->xm_rootnode->xn_back = xp; - } - mutex_exit(&xm->xm_contents); -} - -/* - * - */ -int -xmem_fillpages(struct xmemnode *xp, struct vnode *vp, offset_t off, - offset_t len, int zerofill) -{ - uint_t blockno, endblock; - caddr_t base; - int error = 0; - struct xmount *xm = (struct xmount *)VTOXM(vp); - offset_t poff; - size_t bsize = xm->xm_bsize; - - blockno = off >> xm->xm_bshift; - poff = (offset_t)blockno << xm->xm_bshift; - endblock = howmany(off + len, (offset_t)bsize); - - if (endblock > xp->xn_ppasz) - return (EINVAL); - - /* Create missing pages if any */ - for (; blockno < endblock; ) { - if (!xp->xn_ppa[blockno]) { - xp->xn_ppa[blockno] = xmem_acquire_pages(xm, vp, poff); - if (!xp->xn_ppa[blockno]) - return (ENOSPC); - if (zerofill) { - page_t **ppp; - if (xm->xm_ppb == 1) - ppp = (page_t **)&xp->xn_ppa[blockno]; - else - ppp = xp->xn_ppa[blockno]; - - base = segxmem_getmap(xm->xm_map, vp, poff, - bsize, ppp, S_WRITE); - (void) kzero(base, bsize); - segxmem_release(xm->xm_map, base, bsize); - } - xp->xn_nblocks++; - } - blockno++; - poff += bsize; - } - return (error); -} - -/* - * xmemnode_trunc - set length of xmemnode and deal with resources - */ -int -xmemnode_trunc(struct xmount *xm, struct xmemnode *xp, u_offset_t newsize) -{ - u_offset_t oldsize = xp->xn_size; - timestruc_t now; - int error = 0; - size_t zlen; - ulong_t newblocks, oldblocks; - - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - ASSERT(RW_WRITE_HELD(&xp->xn_contents)); - - if (newsize == oldsize) { - /* Required by POSIX */ - goto stamp_out; - } - - switch (xp->xn_type) { - case VREG: - - oldblocks = howmany(oldsize, xm->xm_bsize); - newblocks = howmany(newsize, xm->xm_bsize); - - XMEMPRINTF(4, ("xmemnode_trunc: xp %p old %lx new %lx\n", - xp, oldblocks, newblocks)); - /* - * xn_ppasz is the size of the ppa array which may not - * be fully populated if pages cannot be allocated. - */ - ASSERT(xp->xn_ppasz >= oldblocks); - - /* Growing the file */ - if (newblocks > oldblocks) { - if (xp->xn_ppasz < newblocks) { - page_t ***ppa; - ppa = kmem_zalloc(newblocks * sizeof (*ppa), KM_SLEEP); - if (xp->xn_ppasz) { - bcopy(xp->xn_ppa, ppa, - newblocks * sizeof (*ppa)); - - kmem_free(xp->xn_ppa, - xp->xn_ppasz * sizeof (*ppa)); - } - xp->xn_ppa = ppa; - xp->xn_ppasz = newblocks; - } - } - - /* Free pages if shrinking file over block boundary. */ - if (newblocks < oldblocks) { - uint_t next; - page_t ***ppa = NULL; - next = newblocks; - if (next) { - ppa = kmem_zalloc(next * sizeof (*ppa), - KM_SLEEP); - bcopy(xp->xn_ppa, ppa, next * sizeof (*ppa)); - } - for (; next < oldblocks; next++) { - if (!xp->xn_ppa[next]) - continue; - xmem_release_pages(xm, xp->xn_ppa[next]); - xp->xn_nblocks--; - } - kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); - xp->xn_ppa = ppa; - xp->xn_ppasz = newblocks; - } - - /* - * Update the file size now to reflect the pages we just - * blew away as we're about to drop the - * contents lock to zero the partial page (which could - * re-enter xmemfs via getpage and try to reacquire the lock) - * Once we drop the lock, faulters can fill in holes in - * the file and if we haven't updated the size they - * may fill in holes that are beyond EOF, which will then - * never get cleared. - */ - xp->xn_size = newsize; - - - if (newsize) { - /* Zero new size of file to page boundary. */ - zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET); - rw_exit(&xp->xn_contents); - pvn_vpzero(XNTOV(xp), (u_offset_t)newsize, zlen); - rw_enter(&xp->xn_contents, RW_WRITER); - } - - break; - - case VLNK: - /* - * Don't do anything here - * xmem_inactive frees the memory - */ - if (newsize != 0) - error = EINVAL; - goto out; - case VDIR: - /* - * Remove all the directory entries under this directory. - */ - if (newsize != 0) { - error = EINVAL; - goto out; - } - xdirtrunc(xp); - ASSERT(xp->xn_nlink == 0); - break; - default: - goto out; - } - -stamp_out: - gethrestime(&now); - xp->xn_mtime = now; - xp->xn_ctime = now; -out: - /* - * xmemnode_trunc() cannot fail when newsize == 0. - */ - ASSERT(error == 0 || newsize != 0); - return (error); -} diff --git a/usr/src/uts/intel/fs/xmemfs/xmem_vfsops.c b/usr/src/uts/intel/fs/xmemfs/xmem_vfsops.c deleted file mode 100644 index 40dd8e9647..0000000000 --- a/usr/src/uts/intel/fs/xmemfs/xmem_vfsops.c +++ /dev/null @@ -1,810 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/sysmacros.h> -#include <sys/kmem.h> -#include <sys/time.h> -#include <sys/pathname.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/stat.h> -#include <sys/uio.h> -#include <sys/stat.h> -#include <sys/errno.h> -#include <sys/cmn_err.h> -#include <sys/cred.h> -#include <sys/statvfs.h> -#include <sys/mount.h> -#include <sys/mntent.h> -#include <sys/debug.h> -#include <sys/systm.h> -#include <sys/vmsystm.h> -#include <sys/bitmap.h> -#include <fs/fs_subr.h> -#include <vm/page.h> -#include <sys/model.h> -#include <sys/map.h> -#include <vm/seg_kmem.h> -#include <sys/cpuvar.h> -#include <sys/policy.h> - -#include <sys/fs/swapnode.h> -#include <sys/fs/xmem.h> - -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif - -/* - * xmemfs vfs operations. - */ -static int xmemfsinit(int, char *); -static int xmem_mount(struct vfs *, struct vnode *, - struct mounta *, struct cred *); -static int xmem_unmount(struct vfs *, int, struct cred *); -static int xmem_root(struct vfs *, struct vnode **); -static int xmem_statvfs(struct vfs *, struct statvfs64 *); -static int xmem_vget(struct vfs *, struct vnode **, struct fid *); - -/* - * Loadable module wrapper - */ -#include <sys/modctl.h> - -static vfsdef_t vfw = { - VFSDEF_VERSION, - "xmemfs", - xmemfsinit, - 0, - NULL -}; - -/* - * Module linkage information - */ -static struct modlfs modlfs = { - &mod_fsops, "filesystem for xmemfs", &vfw -}; - -static struct modlinkage modlinkage = { - MODREV_1, &modlfs, NULL -}; - -pgcnt_t xmemfs_minfree; - -int -_init() -{ - return (mod_install(&modlinkage)); -} - -int -_fini() -{ - return (mod_remove(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -static int xmemfsfstype; -static major_t xmemfs_major; -static minor_t xmemfs_minor; -static kmutex_t xmemfs_minor_lock; - - -/* - * initialize global xmemfs locks and such - * called when loading xmemfs module - */ -static int -xmemfsinit(int fstype, char *name) -{ - static const fs_operation_def_t xmem_vfsops[] = { - VFSNAME_MOUNT, xmem_mount, - VFSNAME_UNMOUNT, xmem_unmount, - VFSNAME_ROOT, xmem_root, - VFSNAME_STATVFS, xmem_statvfs, - VFSNAME_VGET, xmem_vget, - NULL, NULL - }; - int error; - extern void xmemfs_hash_init(); - - error = vfs_setfsops(fstype, xmem_vfsops, NULL); - if (error != 0) { - cmn_err(CE_WARN, "xmemfsinit: bad vfs ops template"); - return (error); - } - - error = vn_make_ops(name, xmem_vnodeops_template, &xmem_vnodeops); - if (error != 0) { - (void) vfs_freevfsops_by_type(fstype); - cmn_err(CE_WARN, "xmemfsinit: bad vnode ops template"); - return (error); - } - - xmemfs_hash_init(); - xmemfsfstype = fstype; - ASSERT(xmemfsfstype != 0); - - if ((xmemfs_major = getudev()) == (major_t)-1) { - cmn_err(CE_WARN, "xmemfsinit: Can't get unique device number."); - xmemfs_major = 0; - } - mutex_init(&xmemfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); - - return (0); -} - - -/* - * xpg is an array of page_t * if xm_ppb > 1. - * xpg is a page_t * if xm_ppb == 1 - */ -void -xpgput(struct xmount *xm, void *xpg) -{ - ASSERT(xm->xm_xpgcnt < xm->xm_max); - xm->xm_xpgarray[xm->xm_xpgcnt++] = xpg; -} - -void * -xpgget(struct xmount *xm) -{ - if (!xm->xm_xpgcnt) - return (NULL); - - return (xm->xm_xpgarray[--xm->xm_xpgcnt]); -} - -void -xpginit(struct xmount *xm) -{ - xm->xm_xpgcnt = 0; - xm->xm_xpgarray = kmem_zalloc(sizeof (void *) * xm->xm_max, KM_SLEEP); -} - -void -xpgtrunc(struct xmount *xm, size_t newsz) -{ - void *old = xm->xm_xpgarray; - - ASSERT(newsz == xm->xm_xpgcnt); - if (newsz) { - xm->xm_xpgarray = - kmem_alloc(sizeof (void *) * newsz, KM_SLEEP); - bcopy(old, xm->xm_xpgarray, sizeof (void *) * newsz); - } - kmem_free(old, sizeof (void *) * xm->xm_max); -} - -void -xpgdeinit(struct xmount *xm) -{ - xm->xm_xpgcnt = 0; - if (xm->xm_max) - kmem_free(xm->xm_xpgarray, sizeof (void *) * xm->xm_max); - xm->xm_xpgarray = NULL; -} - - -struct xmount *xmountp; /* ### DEBUG */ - -#define XFREE(xm, xp) \ - vn_free(xp->xn_vnode); \ - xmem_memfree(xp, sizeof (struct xmemnode)); \ - rmfreemap(xm->xm_map); \ - xmem_memfree(xm->xm_mntpath, strlen(xm->xm_mntpath) + 1); \ - xpgdeinit(xm); \ - xmem_memfree(xm, sizeof (struct xmount)); - - -static int -xmem_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, - struct cred *cr) -{ - struct xmount *xm; - struct xmemnode *xp; - struct pathname dpn; - char *data = uap->dataptr; - int datalen = uap->datalen; - int error; - struct xmemfs_args xargs; - struct vattr rattr; - int got_attrs, num_pagesizes; - uint_t blocks_left; - size_t frag; - - XMEMPRINTF(1, ("xmem_mount: vfs %p mvp %p uap %p cr %p\n", - (void *)vfsp, (void *)mvp, (void *)uap, (void *)cr)); - - if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) - return (error); - - if (mvp->v_type != VDIR) - return (ENOTDIR); - - /* - * Force non-executable files by setting the "noexec" option - * which will be interpreted by the VFS layer. - */ - vfs_setmntopt(vfsp, MNTOPT_NOEXEC, NULL, 0); - - mutex_enter(&mvp->v_lock); - if ((uap->flags & MS_OVERLAY) == 0 && - (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { - mutex_exit(&mvp->v_lock); - return (EBUSY); - } - mutex_exit(&mvp->v_lock); - - /* - * Get arguments - */ - if (datalen != 0) { - if (datalen != sizeof (xargs)) - return (EINVAL); - else { - if (copyin(data, &xargs, sizeof (xargs))) - return (EFAULT); - } - if (xargs.xa_bsize == 0) - xargs.xa_bsize = PAGESIZE; - } else { - xargs.xa_bsize = PAGESIZE; - xargs.xa_flags = 0; - xargs.xa_fssize = 0; - } - - XMEMPRINTF(1, ("xmem_mount: xa bsize %llx fssize %llx flags %x\n", - xargs.xa_bsize, xargs.xa_fssize, xargs.xa_flags)); - - num_pagesizes = page_num_pagesizes(); - - if (xargs.xa_flags & XARGS_LARGEPAGES) - xargs.xa_bsize = page_get_pagesize(num_pagesizes - 1); - - /* Make sure xa_bsize is a pure power of two */ - if (!IS_P2ALIGNED(xargs.xa_bsize, xargs.xa_bsize - 1)) { - cmn_err(CE_WARN, "xmemfs: invalid blocksize %x", - (int)xargs.xa_bsize); - xargs.xa_bsize = PAGESIZE; - } - - while (--num_pagesizes >= 0) - if (xargs.xa_bsize == page_get_pagesize(num_pagesizes)) - break; - - if (num_pagesizes < 0) { - cmn_err(CE_WARN, - "xmemfs: blocksize %lld not a natural pagesize", - xargs.xa_bsize); - xargs.xa_bsize = PAGESIZE; - } - - if (error = pn_get(uap->dir, UIO_USERSPACE, &dpn)) - return (error); - - xm = xmem_memalloc(sizeof (struct xmount), 1); - - xmountp = xm; - - XMEMPRINTF(4, ("xmem_mount: xm %p\n", (void *)xm)); - - xm->xm_mntpath = xmem_memalloc(dpn.pn_pathlen + 1, 1); - (void) strcpy(xm->xm_mntpath, dpn.pn_path); - pn_free(&dpn); - - xm->xm_vmmapsize = xm->xm_mapsize = - xargs.xa_bsize * SEGXMEM_NUM_SIMULMAPS; - - /* need to allocate more to ensure alignment if largepage */ - - if (xargs.xa_bsize != PAGESIZE) - xm->xm_vmmapsize += xargs.xa_bsize; - - /* Set block size & max memory allowed for the file system */ - xm->xm_bsize = (size_t)xargs.xa_bsize; - xm->xm_bshift = highbit(xargs.xa_bsize) - 1; - - /* - * 5 * lotsfree satisfies XMEMMINFREE for 4 GB of memory and above. - */ - xmemfs_minfree = min(5 * lotsfree, XMEMMINFREE/PAGESIZE); - - if (xargs.xa_fssize) { - - pgcnt_t fspgcnt; - - xargs.xa_fssize = roundup(xargs.xa_fssize, xm->xm_bsize); - - fspgcnt = xargs.xa_fssize >> PAGESHIFT; - - /* sanity check this against freemem */ - if (fspgcnt + xmemfs_minfree > freemem) { - xmem_memfree(xm->xm_mntpath, - strlen(xm->xm_mntpath) + 1); - xmem_memfree(xm, sizeof (struct xmount)); - return (EFBIG); - } - xm->xm_max = xargs.xa_fssize >> xm->xm_bshift; - } else { - /* - * fssize is mandatory - should not be here but if - * fssize == 0 is allowed, grab all of free memory - * minus xmemfs_minfree. - */ - - if (freemem < xmemfs_minfree) - xm->xm_max = 0; - else - xm->xm_max = freemem - xmemfs_minfree; - - xm->xm_max >>= xm->xm_bshift - PAGESHIFT; - } - - xm->xm_ppb = btop(xm->xm_bsize); /* pages per block */ - - - XMEMPRINTF(1, ("xmem_mount: xm_max %lx xm_bsize %lx\n", - xm->xm_max, xm->xm_bsize)); - - /* - * Allocate a map to provide an address for each page in - * (xargs.xa_bsize * 4) and free all of them. - */ - xm->xm_map = rmallocmap_wait(xm->xm_mapsize / PAGESIZE); - - xpginit(xm); - - xp = xmem_memalloc(sizeof (struct xmemnode), 1); - xp->xn_vnode = vn_alloc(KM_SLEEP); - - /* - * do not SLEEP waiting for memory resources after vmem_alloc - */ - - xm->xm_vmmapaddr = xm->xm_mapaddr = - vmem_alloc(heap_arena, xm->xm_vmmapsize, VM_NOSLEEP); - - if (!xm->xm_mapaddr) { - XFREE(xm, xp); - return (ENOMEM); - } - - if ((frag = ((uintptr_t)xm->xm_mapaddr & - ((uintptr_t)xargs.xa_bsize - 1))) != 0) - xm->xm_mapaddr += (xargs.xa_bsize - frag); - - rmfree(xm->xm_map, xm->xm_mapsize, (ulong_t)xm->xm_mapaddr); - - if (xargs.xa_flags & XARGS_RESERVEMEM) { - struct seg tmpseg; - - /* grab all memory now */ - blocks_left = xm->xm_max; - bzero(&tmpseg, sizeof (struct seg)); - tmpseg.s_as = &kas; - - if (page_resv(xm->xm_max * xm->xm_ppb, KM_NOSLEEP) == 0) { - vmem_free(heap_arena, xm->xm_vmmapaddr, - xm->xm_vmmapsize); - XFREE(xm, xp); - return (ENOMEM); - } - - while (blocks_left) { - page_t *pp, *pplist; - page_t **ppa; - int i; - - /* - * optimise for ppb == 1 - let xp_ppa point directly - * to page. - */ - - if (xm->xm_ppb > 1) { - ppa = kmem_alloc(sizeof (page_t *) * xm->xm_ppb, - KM_NOSLEEP); - - if (!ppa) { - xpgtrunc(xm, xm->xm_max - blocks_left); - xm->xm_max -= blocks_left; - page_unresv(blocks_left * xm->xm_ppb); - if (xargs.xa_fssize) - cmn_err(CE_WARN, - "could only reserve %d blocks " - "for xmemfs", (int)xm->xm_max); - break; - } - } - - (void) page_create_wait(xm->xm_ppb, PG_WAIT); - pplist = page_get_freelist(NULL, 0, &tmpseg, NULL, - xm->xm_bsize, 0, NULL); - - if (pplist == NULL && xm->xm_ppb == 1) { - pplist = page_get_cachelist(NULL, 0, &tmpseg, - NULL, 0, NULL); - } - - if (pplist == NULL) { - page_create_putback(xm->xm_ppb); - if (xm->xm_ppb > 1) - kmem_free(ppa, sizeof (page_t *) * - xm->xm_ppb); - xpgtrunc(xm, xm->xm_max - blocks_left); - xm->xm_max -= blocks_left; - page_unresv(blocks_left * xm->xm_ppb); - if (xargs.xa_fssize) - cmn_err(CE_WARN, - "could only reserve %d blocks " - "for xmemfs", (int)xm->xm_max); - break; - } - - if (PP_ISAGED(pplist) == 0) { - ASSERT(xm->xm_ppb == 1); - page_hashout(pplist, NULL); - } - - for (i = 0; i < xm->xm_ppb; i++) { - pp = pplist; - page_sub(&pplist, pp); - ASSERT(PAGE_EXCL(pp)); - ASSERT(pp->p_vnode == NULL); - ASSERT(!hat_page_is_mapped(pp)); - PP_CLRFREE(pp); - PP_CLRAGED(pp); - if (xm->xm_ppb == 1) - ppa = (page_t **)pp; - else - ppa[i] = pp; - } - - xpgput(xm, ppa); - blocks_left--; - } - if (!xm->xm_xpgcnt) { - /* No pages at all */ - page_unresv(xm->xm_max * xm->xm_ppb); - vmem_free(heap_arena, xm->xm_vmmapaddr, - xm->xm_vmmapsize); - XFREE(xm, xp); - return (ENOMEM); - } - xm->xm_flags |= XARGS_RESERVEMEM; - } - xm->xm_bsize = (size_t)xargs.xa_bsize; - - /* - * find an available minor device number for this mount - */ - mutex_enter(&xmemfs_minor_lock); - do { - xmemfs_minor = (xmemfs_minor + 1) & L_MAXMIN32; - xm->xm_dev = makedevice(xmemfs_major, xmemfs_minor); - } while (vfs_devismounted(xm->xm_dev)); - mutex_exit(&xmemfs_minor_lock); - - /* - * Set but don't bother entering the mutex - * (xmount not on mount list yet) - */ - mutex_init(&xm->xm_contents, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&xm->xm_renamelck, NULL, MUTEX_DEFAULT, NULL); - - xm->xm_vfsp = vfsp; - - vfsp->vfs_data = (caddr_t)xm; - vfsp->vfs_fstype = xmemfsfstype; - vfsp->vfs_dev = xm->xm_dev; - vfsp->vfs_bsize = xm->xm_bsize; - vfsp->vfs_flag |= VFS_NOTRUNC; - vfs_make_fsid(&vfsp->vfs_fsid, xm->xm_dev, xmemfsfstype); - - /* - * allocate and initialize root xmemnode structure - */ - bzero(&rattr, sizeof (struct vattr)); - rattr.va_mode = (mode_t)(S_IFDIR | 0777); - rattr.va_type = VDIR; - rattr.va_rdev = 0; - xmemnode_init(xm, xp, &rattr, cr); - - /* - * Get the mode, uid, and gid from the underlying mount point. - */ - rattr.va_mask = AT_MODE|AT_UID|AT_GID; /* Hint to getattr */ - got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr); - - rw_enter(&xp->xn_rwlock, RW_WRITER); - XNTOV(xp)->v_flag |= VROOT; - - /* - * If the getattr succeeded, use its results. Otherwise allow - * the previously set hardwired defaults to prevail. - */ - if (got_attrs == 0) { - xp->xn_mode = rattr.va_mode; - xp->xn_uid = rattr.va_uid; - xp->xn_gid = rattr.va_gid; - } - - /* - * initialize linked list of xmemnodes so that the back pointer of - * the root xmemnode always points to the last one on the list - * and the forward pointer of the last node is null. - */ - xp->xn_back = xp; - xp->xn_forw = NULL; - xp->xn_nlink = 0; - xm->xm_rootnode = xp; - - xdirinit(xp, xp); - - rw_exit(&xp->xn_rwlock); - - return (0); -} - -static int -xmem_unmount(struct vfs *vfsp, int flag, struct cred *cr) -{ - struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - struct xmemnode *xp; - - if (secpolicy_fs_unmount(cr, vfsp) != 0) - return (EPERM); - /* - * forced unmount is not supported by this file system - * and thus, ENOTSUP, is being returned. - */ - if (flag & MS_FORCE) - return (ENOTSUP); - - mutex_enter(&xm->xm_contents); - - /* - * Don't close down the xmemfs if there are open files. - * There should be only one file referenced (the rootnode) - * and only one reference to the vnode for that file. - */ - xp = xm->xm_rootnode; - if (XNTOV(xp)->v_count > 1) { - mutex_exit(&xm->xm_contents); - return (EBUSY); - } - - for (xp = xp->xn_forw; xp; xp = xp->xn_forw) { - if (XNTOV(xp)->v_count > 0) { - mutex_exit(&xm->xm_contents); - return (EBUSY); - } - } - - /* - * We can drop the mutex now because no one can find this mount - */ - mutex_exit(&xm->xm_contents); - - /* - * Free all kmemalloc'd and non-anonalloc'd memory associated with - * this filesystem. To do this, we go through the file list twice, - * once to remove all the directory entries, and then to remove - * all the files. We do this because there is useful code in - * xmemnode_free which assumes that the directory entry has been - * removed before the file. - */ - /* - * Remove all directory entries - */ - for (xp = xm->xm_rootnode; xp; xp = xp->xn_forw) { - rw_enter(&xp->xn_rwlock, RW_WRITER); - if (xp->xn_type == VDIR) - xdirtrunc(xp); - rw_exit(&xp->xn_rwlock); - } - - ASSERT(xm->xm_rootnode); - - /* - * We re-acquire the lock to prevent others who have a HOLD on - * a xmemnode via its pages from blowing it away - * (in xmem_inactive) while we're trying to get to it here. Once - * we have a HOLD on it we know it'll stick around. - */ - mutex_enter(&xm->xm_contents); - /* - * Remove all the files (except the rootnode) backwards. - */ - while ((xp = xm->xm_rootnode->xn_back) != xm->xm_rootnode) { - /* - * Blow the xmemnode away by HOLDing it and RELE'ing it. - * The RELE calls inactive and blows it away because there - * we have the last HOLD. - */ - VN_HOLD(XNTOV(xp)); - mutex_exit(&xm->xm_contents); - VN_RELE(XNTOV(xp)); - mutex_enter(&xm->xm_contents); - /* - * It's still there after the RELE. Someone else like pageout - * has a hold on it so wait a bit and then try again - we know - * they'll give it up soon. - */ - if (xp == xm->xm_rootnode->xn_back) { - mutex_exit(&xm->xm_contents); - delay(hz / 4); - mutex_enter(&xm->xm_contents); - } - } - if (xm->xm_flags & XARGS_RESERVEMEM) { - page_t **ppa; - uint_t pindex; - - while ((ppa = xpgget(xm)) != NULL) { - if (xm->xm_ppb == 1) { - /*LINTED*/ - VN_DISPOSE((page_t *)ppa, B_FREE, 0, kcred); - continue; - } - /* free each page */ - for (pindex = 0; pindex < xm->xm_ppb; pindex++) { - ASSERT(ppa[pindex]->p_szc); - ppa[pindex]->p_szc = 0; - /*LINTED*/ - VN_DISPOSE(ppa[pindex], B_FREE, 0, kcred); - } - kmem_free(ppa, sizeof (*ppa) * xm->xm_ppb); - } - xpgdeinit(xm); - page_unresv(xm->xm_max * xm->xm_ppb); - } - mutex_exit(&xm->xm_contents); - - VN_RELE(XNTOV(xm->xm_rootnode)); - - ASSERT(xm->xm_mntpath); - - xmem_memfree(xm->xm_mntpath, strlen(xm->xm_mntpath) + 1); - - mutex_destroy(&xm->xm_contents); - mutex_destroy(&xm->xm_renamelck); - vmem_free(heap_arena, xm->xm_vmmapaddr, xm->xm_vmmapsize); - rmfreemap(xm->xm_map); - xmem_memfree(xm, sizeof (struct xmount)); - - return (0); -} - -/* - * return root xmemnode for given vnode - */ -static int -xmem_root(struct vfs *vfsp, struct vnode **vpp) -{ - struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - struct xmemnode *xp = xm->xm_rootnode; - struct vnode *vp; - - ASSERT(xp); - - vp = XNTOV(xp); - VN_HOLD(vp); - *vpp = vp; - return (0); -} - -static int -xmem_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) -{ - struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - long blocks; - dev32_t d32; - - sbp->f_bsize = xm->xm_bsize; - sbp->f_frsize = xm->xm_bsize; /* No fragmentation for now ? */ - - /* - * Find the amount of available physical and memory swap - */ - if (xm->xm_flags & XARGS_RESERVEMEM) - blocks = xm->xm_max - xm->xm_mem; - else - blocks = MAX((long)(freemem - lotsfree - xmemfs_minfree), 0); - - sbp->f_bavail = sbp->f_bfree = (fsblkcnt64_t)blocks; - - /* - * Total number of blocks is what's available plus what's been used - */ - sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + xm->xm_mem); - - /* - * return a somewhat arbitrary number of inodes available - */ - sbp->f_favail = sbp->f_ffree = (fsfilcnt64_t)((xm->xm_max/1024)+1); - (void) cmpldev(&d32, vfsp->vfs_dev); - sbp->f_fsid = d32; - (void) strcpy(sbp->f_basetype, vfssw[xmemfsfstype].vsw_name); - (void) strcpy(sbp->f_fstr, xm->xm_mntpath); - sbp->f_flag = vf_to_stf(vfsp->vfs_flag); - sbp->f_namemax = MAXNAMELEN - 1; - return (0); -} - -static int -xmem_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) -{ - register struct xfid *xfid; - register struct xmount *xm = (struct xmount *)VFSTOXM(vfsp); - register struct xmemnode *xp = NULL; - - xfid = (struct xfid *)fidp; - *vpp = NULL; - - mutex_enter(&xm->xm_contents); - for (xp = xm->xm_rootnode; xp; xp = xp->xn_forw) { - mutex_enter(&xp->xn_tlock); - if (xp->xn_nodeid == xfid->xfid_ino) { - /* - * If the gen numbers don't match we know the - * file won't be found since only one xmemnode - * can have this number at a time. - */ - if (xp->xn_gen != xfid->xfid_gen || xp->xn_nlink == 0) { - mutex_exit(&xp->xn_tlock); - mutex_exit(&xm->xm_contents); - return (0); - } - *vpp = (struct vnode *)XNTOV(xp); - - VN_HOLD(*vpp); - - if ((xp->xn_mode & S_ISVTX) && - !(xp->xn_mode & (S_IXUSR | S_IFDIR))) { - mutex_enter(&(*vpp)->v_lock); - (*vpp)->v_flag |= VISSWAP; - mutex_exit(&(*vpp)->v_lock); - } - mutex_exit(&xp->xn_tlock); - mutex_exit(&xm->xm_contents); - return (0); - } - mutex_exit(&xp->xn_tlock); - } - mutex_exit(&xm->xm_contents); - return (0); -} diff --git a/usr/src/uts/intel/fs/xmemfs/xmem_vnops.c b/usr/src/uts/intel/fs/xmemfs/xmem_vnops.c deleted file mode 100644 index de6f99f6cb..0000000000 --- a/usr/src/uts/intel/fs/xmemfs/xmem_vnops.c +++ /dev/null @@ -1,1736 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/t_lock.h> -#include <sys/systm.h> -#include <sys/sysmacros.h> -#include <sys/user.h> -#include <sys/time.h> -#include <sys/vfs.h> -#include <sys/vnode.h> -#include <sys/file.h> -#include <sys/fcntl.h> -#include <sys/flock.h> -#include <sys/kmem.h> -#include <sys/uio.h> -#include <sys/errno.h> -#include <sys/stat.h> -#include <sys/cred.h> -#include <sys/dirent.h> -#include <sys/pathname.h> -#include <sys/vmsystm.h> -#include <sys/map.h> -#include <sys/fs/xmem.h> -#include <sys/mman.h> -#include <vm/hat.h> -#include <vm/seg.h> -#include <vm/as.h> -#include <vm/page.h> -#include <vm/pvn.h> -#include <sys/cmn_err.h> -#include <sys/debug.h> -#include <sys/swap.h> -#include <sys/buf.h> -#include <sys/vm.h> -#include <sys/vtrace.h> -#include <sys/policy.h> -#include <fs/fs_subr.h> - -static int xmem_getapage(struct vnode *, u_offset_t, size_t, uint_t *, - page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); - -#ifndef lint -static int xmem_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, - int, struct cred *); -#endif - - -/* ARGSUSED1 */ -static int -xmem_open(struct vnode **vpp, int flag, struct cred *cred) -{ - /* - * swapon to a xmemfs file is not supported so access - * is denied on open if VISSWAP is set. - */ - if ((*vpp)->v_flag & VISSWAP) - return (EINVAL); - return (0); -} - -/* ARGSUSED1 */ -static int -xmem_close(struct vnode *vp, int flag, int count, offset_t offset, - struct cred *cred) -{ - cleanlocks(vp, ttoproc(curthread)->p_pid, 0); - cleanshares(vp, ttoproc(curthread)->p_pid); - return (0); -} - - -/* - * wrxmem does the real work of write requests for xmemfs. - */ -static int -wrxmem(struct xmount *xm, struct xmemnode *xp, struct uio *uio, - struct cred *cr, struct caller_context *ct) -{ - uint_t blockoffset; /* offset in the block */ - uint_t blkwr; /* offset in blocks into xmem file */ - uint_t blkcnt; - caddr_t base; - ssize_t bytes; /* bytes to uiomove */ - struct vnode *vp; - int error = 0; - size_t bsize = xm->xm_bsize; - rlim64_t limit = uio->uio_llimit; - long oresid = uio->uio_resid; - timestruc_t now; - offset_t offset; - - /* - * xp->xn_size is incremented before the uiomove - * is done on a write. If the move fails (bad user - * address) reset xp->xn_size. - * The better way would be to increment xp->xn_size - * only if the uiomove succeeds. - */ - long xn_size_changed = 0; - offset_t old_xn_size; - - vp = XNTOV(xp); - ASSERT(vp->v_type == VREG); - - XMEMPRINTF(1, ("wrxmem: vp %p resid %lx off %llx\n", - (void *)vp, uio->uio_resid, uio->uio_loffset)); - - ASSERT(RW_WRITE_HELD(&xp->xn_contents)); - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - - if (MANDLOCK(vp, xp->xn_mode)) { - rw_exit(&xp->xn_contents); - /* - * xmem_getattr ends up being called by chklock - */ - error = chklock(vp, FWRITE, - uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); - - rw_enter(&xp->xn_contents, RW_WRITER); - if (error != 0) { - XMEMPRINTF(8, ("wrxmem: vp %p error %x\n", - (void *)vp, error)); - return (error); - } - } - - if ((offset = uio->uio_loffset) < 0) - return (EINVAL); - - if (offset >= limit) { - proc_t *p = ttoproc(curthread); - - mutex_enter(&p->p_lock); - (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, - p, RCA_UNSAFE_SIGINFO); - mutex_exit(&p->p_lock); - return (EFBIG); - } - - if (uio->uio_resid == 0) { - XMEMPRINTF(8, ("wrxmem: vp %p resid %lx\n", - (void *)vp, uio->uio_resid)); - return (0); - } - - /* - * Get the highest blocknumber and allocate page array if needed. - * Note that if xm_bsize != PAGESIZE, each ppa[] is pointer to - * a page array rather than just a page. - */ - blkcnt = howmany((offset + uio->uio_resid), bsize); - blkwr = offset >> xm->xm_bshift; /* write begins here */ - - XMEMPRINTF(1, ("wrxmem: vp %p blkcnt %x blkwr %x xn_ppasz %lx\n", - (void *)vp, blkcnt, blkwr, xp->xn_ppasz)); - - /* file size increase */ - if (xp->xn_ppasz < blkcnt) { - - page_t ***ppa; - int ppasz; - uint_t blksinfile = howmany(xp->xn_size, bsize); - - /* - * check if sufficient blocks available for the given offset. - */ - if (blkcnt - blksinfile > xm->xm_max - xm->xm_mem) - return (ENOSPC); - - /* - * to prevent reallocating every time the file grows by a - * single block, double the size of the array. - */ - if (blkcnt < xp->xn_ppasz * 2) - ppasz = xp->xn_ppasz * 2; - else - ppasz = blkcnt; - - - ppa = kmem_zalloc(ppasz * sizeof (page_t **), KM_SLEEP); - - ASSERT(ppa); - - if (xp->xn_ppasz) { - bcopy(xp->xn_ppa, ppa, blksinfile * sizeof (*ppa)); - kmem_free(xp->xn_ppa, xp->xn_ppasz * sizeof (*ppa)); - } - xp->xn_ppa = ppa; - xp->xn_ppasz = ppasz; - - /* - * fill in the 'hole' if write offset beyond file size. This - * helps in creating large files quickly; an application can - * lseek to a large offset and perform a single write - * operation to create the large file. - */ - - if (blksinfile < blkwr) { - - old_xn_size = xp->xn_size; - xp->xn_size = (offset_t)blkwr * bsize; - - XMEMPRINTF(4, ("wrxmem: fill vp %p blks %x to %x\n", - (void *)vp, blksinfile, blkcnt - 1)); - error = xmem_fillpages(xp, vp, - (offset_t)blksinfile * bsize, - (offset_t)(blkcnt - blksinfile) * bsize, 1); - if (error) { - /* truncate file back to original size */ - (void) xmemnode_trunc(xm, xp, old_xn_size); - return (error); - } - /* - * if error on blkwr, this allows truncation of the - * filled hole. - */ - xp->xn_size = old_xn_size; - } - } - - do { - offset_t pagestart, pageend; - page_t **ppp; - - blockoffset = (uint_t)offset & (bsize - 1); - /* - * A maximum of xm->xm_bsize bytes of data is transferred - * each pass through this loop - */ - bytes = MIN(bsize - blockoffset, uio->uio_resid); - - ASSERT(bytes); - - if (offset + bytes >= limit) { - if (offset >= limit) { - error = EFBIG; - goto out; - } - bytes = limit - offset; - } - - - if (!xp->xn_ppa[blkwr]) { - /* zero fill new pages - simplify partial updates */ - error = xmem_fillpages(xp, vp, offset, bytes, 1); - if (error) - return (error); - } - - /* grow the file to the new length */ - if (offset + bytes > xp->xn_size) { - xn_size_changed = 1; - old_xn_size = xp->xn_size; - xp->xn_size = offset + bytes; - } - -#ifdef LOCKNEST - xmem_getpage(); -#endif - - /* xn_ppa[] is a page_t * if ppb == 1 */ - if (xm->xm_ppb == 1) - ppp = (page_t **)&xp->xn_ppa[blkwr]; - else - ppp = &xp->xn_ppa[blkwr][btop(blockoffset)]; - - pagestart = offset & ~(offset_t)(PAGESIZE - 1); - /* - * subtract 1 in case (offset + bytes) is mod PAGESIZE - * so that pageend is the actual index of last page. - */ - pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); - - base = segxmem_getmap(xm->xm_map, vp, - pagestart, pageend - pagestart + PAGESIZE, - ppp, S_WRITE); - - rw_exit(&xp->xn_contents); - - error = uiomove(base + (offset - pagestart), bytes, - UIO_WRITE, uio); - segxmem_release(xm->xm_map, base, - pageend - pagestart + PAGESIZE); - - /* - * Re-acquire contents lock. - */ - rw_enter(&xp->xn_contents, RW_WRITER); - /* - * If the uiomove failed, fix up xn_size. - */ - if (error) { - if (xn_size_changed) { - /* - * The uiomove failed, and we - * allocated blocks,so get rid - * of them. - */ - (void) xmemnode_trunc(xm, xp, old_xn_size); - } - } else { - if ((xp->xn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && - (xp->xn_mode & (S_ISUID | S_ISGID)) && - secpolicy_vnode_setid_retain(cr, - (xp->xn_mode & S_ISUID) != 0 && xp->xn_uid == 0) - != 0) { - - /* - * Clear Set-UID & Set-GID bits on - * successful write if not privileged - * and at least one of the execute bits - * is set. If we always clear Set-GID, - * mandatory file and record locking is - * unuseable. - */ - xp->xn_mode &= ~(S_ISUID | S_ISGID); - } - gethrestime(&now); - xp->xn_mtime = now; - xp->xn_ctime = now; - } - offset = uio->uio_loffset; /* uiomove sets uio_loffset */ - blkwr++; - } while (error == 0 && uio->uio_resid > 0 && bytes != 0); - -out: - /* - * If we've already done a partial-write, terminate - * the write but return no error. - */ - if (oresid != uio->uio_resid) - error = 0; - return (error); -} - -/* - * rdxmem does the real work of read requests for xmemfs. - */ -static int -rdxmem( - struct xmount *xm, - struct xmemnode *xp, - struct uio *uio, - struct caller_context *ct) -{ - ulong_t blockoffset; /* offset in xmemfs file (uio_offset) */ - caddr_t base; - ssize_t bytes; /* bytes to uiomove */ - struct vnode *vp; - int error; - uint_t blocknumber; - long oresid = uio->uio_resid; - size_t bsize = xm->xm_bsize; - offset_t offset; - - vp = XNTOV(xp); - - XMEMPRINTF(1, ("rdxmem: vp %p\n", (void *)vp)); - - ASSERT(RW_LOCK_HELD(&xp->xn_contents)); - - if (MANDLOCK(vp, xp->xn_mode)) { - rw_exit(&xp->xn_contents); - /* - * xmem_getattr ends up being called by chklock - */ - error = chklock(vp, FREAD, - uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); - rw_enter(&xp->xn_contents, RW_READER); - if (error != 0) { - XMEMPRINTF(1, - ("rdxmem: vp %p error %x\n", (void *)vp, error)); - return (error); - } - } - ASSERT(xp->xn_type == VREG); - - if ((offset = uio->uio_loffset) >= MAXOFF_T) { - XMEMPRINTF(1, ("rdxmem: vp %p bad offset %llx\n", - (void *)vp, uio->uio_loffset)); - return (0); - } - if (offset < 0) - return (EINVAL); - - if (uio->uio_resid == 0) { - XMEMPRINTF(1, ("rdxmem: vp %p resid 0\n", (void *)vp)); - return (0); - } - - blocknumber = offset >> xm->xm_bshift; - do { - offset_t diff, pagestart, pageend; - uint_t pageinblock; - - blockoffset = offset & (bsize - 1); - /* - * A maximum of xm->xm_bsize bytes of data is transferred - * each pass through this loop - */ - bytes = MIN(bsize - blockoffset, uio->uio_resid); - - diff = xp->xn_size - offset; - - if (diff <= 0) { - error = 0; - goto out; - } - if (diff < bytes) - bytes = diff; - - if (!xp->xn_ppa[blocknumber]) - if (error = xmem_fillpages(xp, vp, offset, bytes, 1)) { - return (error); - } - /* - * We have to drop the contents lock to prevent the VM - * system from trying to reacquire it in xmem_getpage() - * should the uiomove cause a pagefault. - */ - rw_exit(&xp->xn_contents); - -#ifdef LOCKNEST - xmem_getpage(); -#endif - - /* 2/10 panic in hat_memload_array - len & MMU_OFFSET */ - - pagestart = offset & ~(offset_t)(PAGESIZE - 1); - pageend = (offset + bytes - 1) & ~(offset_t)(PAGESIZE - 1); - if (xm->xm_ppb == 1) - base = segxmem_getmap(xm->xm_map, vp, - pagestart, pageend - pagestart + PAGESIZE, - (page_t **)&xp->xn_ppa[blocknumber], S_READ); - else { - pageinblock = btop(blockoffset); - base = segxmem_getmap(xm->xm_map, vp, - pagestart, pageend - pagestart + PAGESIZE, - &xp->xn_ppa[blocknumber][pageinblock], S_READ); - - } - error = uiomove(base + (blockoffset & (PAGESIZE - 1)), - bytes, UIO_READ, uio); - - segxmem_release(xm->xm_map, base, - pageend - pagestart + PAGESIZE); - /* - * Re-acquire contents lock. - */ - rw_enter(&xp->xn_contents, RW_READER); - - offset = uio->uio_loffset; - blocknumber++; - } while (error == 0 && uio->uio_resid > 0); - -out: - gethrestime(&xp->xn_atime); - - /* - * If we've already done a partial read, terminate - * the read but return no error. - */ - if (oresid != uio->uio_resid) - error = 0; - - return (error); -} - -/* ARGSUSED2 */ -static int -xmem_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, - struct caller_context *ct) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - int error; - - /* - * We don't currently support reading non-regular files - */ - if (vp->v_type != VREG) - return (EINVAL); - /* - * xmem_rwlock should have already been called from layers above - */ - ASSERT(RW_READ_HELD(&xp->xn_rwlock)); - - rw_enter(&xp->xn_contents, RW_READER); - - error = rdxmem(xm, xp, uiop, ct); - - rw_exit(&xp->xn_contents); - - return (error); -} - -static int -xmem_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, - struct caller_context *ct) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - int error; - - /* - * We don't currently support writing to non-regular files - */ - if (vp->v_type != VREG) - return (EINVAL); /* XXX EISDIR? */ - - /* - * xmem_rwlock should have already been called from layers above - */ - ASSERT(RW_WRITE_HELD(&xp->xn_rwlock)); - - rw_enter(&xp->xn_contents, RW_WRITER); - - if (ioflag & FAPPEND) { - /* - * In append mode start at end of file. - */ - uiop->uio_loffset = xp->xn_size; - } - - error = wrxmem(xm, xp, uiop, cred, ct); - - rw_exit(&xp->xn_contents); - - return (error); -} - -/* ARGSUSED */ -static int -xmem_ioctl(struct vnode *vp, int com, intptr_t data, int flag, - struct cred *cred, int *rvalp) -{ - return (ENOTTY); -} - -/* ARGSUSED2 */ -static int -xmem_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - - mutex_enter(&xp->xn_tlock); - - *vap = xp->xn_attr; - - vap->va_mode = xp->xn_mode & MODEMASK; - vap->va_type = vp->v_type; - vap->va_blksize = xm->xm_bsize; - vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); - - mutex_exit(&xp->xn_tlock); - return (0); -} - -/*ARGSUSED*/ -static int -xmem_setattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred, - caller_context_t *ct) -{ - struct xmount *xm = (struct xmount *)VTOXM(vp); - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - int error; - struct vattr *get; - register long int mask = vap->va_mask; - - /* - * Cannot set these attributes - */ - if (mask & AT_NOSET) - return (EINVAL); - - mutex_enter(&xp->xn_tlock); - - get = &xp->xn_attr; - - error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, - xmem_xaccess, xp); - - if (error != 0) - goto out; - - mask = vap->va_mask; - - /* - * Change file access modes. - */ - if (mask & AT_MODE) { - /* prevent execute permission to be set for regular files */ - if (S_ISREG(get->va_mode)) - vap->va_mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH); - - XMEMPRINTF(1, ("xmem_setattr: va_mode old %x new %x\n", - get->va_mode, vap->va_mode)); - - get->va_mode &= S_IFMT; - get->va_mode |= vap->va_mode & ~S_IFMT; - } - - if (mask & AT_UID) - get->va_uid = vap->va_uid; - if (mask & AT_GID) - get->va_gid = vap->va_gid; - if (mask & AT_ATIME) - get->va_atime = vap->va_atime; - if (mask & AT_MTIME) - get->va_mtime = vap->va_mtime; - if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) - gethrestime(&get->va_ctime); - - if (mask & AT_SIZE) { - if (vp->v_type == VDIR) { - error = EISDIR; - goto out; - } - /* Don't support large files. */ - if (vap->va_size > MAXOFF_T) { - error = EFBIG; - goto out; - } - if (error = xmem_xaccess(xp, VWRITE, cred)) - goto out; - mutex_exit(&xp->xn_tlock); - - rw_enter(&xp->xn_rwlock, RW_WRITER); - rw_enter(&xp->xn_contents, RW_WRITER); - error = xmemnode_trunc(xm, xp, vap->va_size); - rw_exit(&xp->xn_contents); - rw_exit(&xp->xn_rwlock); - goto out1; - } -out: - mutex_exit(&xp->xn_tlock); -out1: - return (error); -} - -/* ARGSUSED2 */ -static int -xmem_access(struct vnode *vp, int mode, int flags, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - int error; - - mutex_enter(&xp->xn_tlock); - error = xmem_xaccess(xp, mode, cred); - mutex_exit(&xp->xn_tlock); - return (error); -} - -/* ARGSUSED3 */ -static int -xmem_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, - struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *nxp = NULL; - int error; - - /* - * Null component name is a synonym for directory being searched. - */ - if (*nm == '\0') { - VN_HOLD(dvp); - *vpp = dvp; - return (0); - } - ASSERT(xp); - - error = xdirlookup(xp, nm, &nxp, cred); - - if (error == 0) { - ASSERT(nxp); - *vpp = XNTOV(nxp); - /* - * If vnode is a device return special vnode instead - */ - if (IS_DEVVP(*vpp)) { - struct vnode *newvp; - - newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, - cred); - VN_RELE(*vpp); - *vpp = newvp; - } - } - return (error); -} - -/*ARGSUSED7*/ -static int -xmem_create(struct vnode *dvp, char *nm, struct vattr *vap, - enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, - int flag) -{ - struct xmemnode *parent; - struct xmount *xm; - struct xmemnode *self; - int error; - struct xmemnode *oldxp; - -again: - parent = (struct xmemnode *)VTOXN(dvp); - xm = (struct xmount *)VTOXM(dvp); - self = NULL; - error = 0; - oldxp = NULL; - - if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { - /* Must be privileged to set sticky bit */ - if (secpolicy_vnode_stky_modify(cred) != 0) - vap->va_mode &= ~VSVTX; - } else if (vap->va_type == VNON) { - return (EINVAL); - } - - /* - * Null component name is a synonym for directory being searched. - */ - if (*nm == '\0') { - VN_HOLD(dvp); - oldxp = parent; - } else { - error = xdirlookup(parent, nm, &oldxp, cred); - } - - if (error == 0) { /* name found */ - ASSERT(oldxp); - - rw_enter(&oldxp->xn_rwlock, RW_WRITER); - - /* - * if create/read-only an existing - * directory, allow it - */ - if (exclusive == EXCL) - error = EEXIST; - else if ((oldxp->xn_type == VDIR) && (mode & VWRITE)) - error = EISDIR; - else { - error = xmem_xaccess(oldxp, mode, cred); - } - - if (error) { - rw_exit(&oldxp->xn_rwlock); - xmemnode_rele(oldxp); - return (error); - } - *vpp = XNTOV(oldxp); - if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && - vap->va_size == 0) { - rw_enter(&oldxp->xn_contents, RW_WRITER); - (void) xmemnode_trunc(xm, oldxp, 0); - rw_exit(&oldxp->xn_contents); - } - rw_exit(&oldxp->xn_rwlock); - if (IS_DEVVP(*vpp)) { - struct vnode *newvp; - - newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, - cred); - VN_RELE(*vpp); - *vpp = newvp; - } - return (0); - } - - if (error != ENOENT) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, nm, DE_CREATE, - (struct xmemnode *)NULL, (struct xmemnode *)NULL, - vap, &self, cred); - rw_exit(&parent->xn_rwlock); - - if (error) { - if (self) - xmemnode_rele(self); - - if (error == EEXIST) { - /* - * This means that the file was created sometime - * after we checked and did not find it and when - * we went to create it. - * Since creat() is supposed to truncate a file - * that already exits go back to the begining - * of the function. This time we will find it - * and go down the xmem_trunc() path - */ - goto again; - } - return (error); - } - - *vpp = XNTOV(self); - - if (IS_DEVVP(*vpp)) { - struct vnode *newvp; - - newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, - cred); - VN_RELE(*vpp); - *vpp = newvp; - } - - return (0); -} - -static int -xmem_remove(struct vnode *dvp, char *nm, struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - int error; - struct xmemnode *xp = NULL; - - error = xdirlookup(parent, nm, &xp, cred); - if (error) - return (error); - - ASSERT(xp); - rw_enter(&parent->xn_rwlock, RW_WRITER); - rw_enter(&xp->xn_rwlock, RW_WRITER); - - if (xp->xn_type != VDIR || - (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) - error = xdirdelete(parent, xp, nm, DR_REMOVE, cred); - - rw_exit(&xp->xn_rwlock); - rw_exit(&parent->xn_rwlock); - xmemnode_rele(xp); - - return (error); -} - -static int -xmem_link(struct vnode *dvp, struct vnode *srcvp, char *tnm, struct cred *cred) -{ - struct xmemnode *parent; - struct xmemnode *from; - struct xmount *xm = (struct xmount *)VTOXM(dvp); - int error; - struct xmemnode *found = NULL; - struct vnode *realvp; - - if (VOP_REALVP(srcvp, &realvp) == 0) - srcvp = realvp; - - parent = (struct xmemnode *)VTOXN(dvp); - from = (struct xmemnode *)VTOXN(srcvp); - - if ((srcvp->v_type == VDIR && - secpolicy_fs_linkdir(cred, dvp->v_vfsp) != 0) || - (from->xn_uid != crgetuid(cred) && secpolicy_basic_link(cred) != 0)) - return (EPERM); - - error = xdirlookup(parent, tnm, &found, cred); - if (error == 0) { - ASSERT(found); - xmemnode_rele(found); - return (EEXIST); - } - - if (error != ENOENT) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, tnm, DE_LINK, (struct xmemnode *)NULL, - from, NULL, (struct xmemnode **)NULL, cred); - rw_exit(&parent->xn_rwlock); - return (error); -} - -static int -xmem_rename( - struct vnode *odvp, /* source parent vnode */ - char *onm, /* source name */ - struct vnode *ndvp, /* destination parent vnode */ - char *nnm, /* destination name */ - struct cred *cred) -{ - struct xmemnode *fromparent; - struct xmemnode *toparent; - struct xmemnode *fromxp = NULL; /* source xmemnode */ - struct xmount *xm = (struct xmount *)VTOXM(odvp); - int error; - int samedir = 0; /* set if odvp == ndvp */ - struct vnode *realvp; - - if (VOP_REALVP(ndvp, &realvp) == 0) - ndvp = realvp; - - fromparent = (struct xmemnode *)VTOXN(odvp); - toparent = (struct xmemnode *)VTOXN(ndvp); - - mutex_enter(&xm->xm_renamelck); - - /* - * Look up xmemnode of file we're supposed to rename. - */ - error = xdirlookup(fromparent, onm, &fromxp, cred); - if (error) { - mutex_exit(&xm->xm_renamelck); - return (error); - } - - /* - * Make sure we can delete the old (source) entry. This - * requires write permission on the containing directory. If - * that directory is "sticky" it further requires (except for - * for privileged users) that the user own the directory or - * the source entry, or else have permission to write the - * source entry. - */ - if (((error = xmem_xaccess(fromparent, VWRITE, cred)) != 0) || - (error = xmem_sticky_remove_access(fromparent, fromxp, cred)) != 0) - goto done; - - /* - * Check for renaming to or from '.' or '..' or that - * fromxp == fromparent - */ - if ((onm[0] == '.' && - (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || - (nnm[0] == '.' && - (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || - (fromparent == fromxp)) { - error = EINVAL; - goto done; - } - - samedir = (fromparent == toparent); - /* - * Make sure we can search and rename into the new - * (destination) directory. - */ - if (!samedir) { - error = xmem_xaccess(toparent, VEXEC|VWRITE, cred); - if (error) - goto done; - } - - /* - * Link source to new target - */ - rw_enter(&toparent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, toparent, nnm, DE_RENAME, - fromparent, fromxp, (struct vattr *)NULL, - (struct xmemnode **)NULL, cred); - rw_exit(&toparent->xn_rwlock); - - if (error) { - /* - * ESAME isn't really an error; it indicates that the - * operation should not be done because the source and target - * are the same file, but that no error should be reported. - */ - if (error == ESAME) - error = 0; - goto done; - } - - /* - * Unlink from source. - */ - rw_enter(&fromparent->xn_rwlock, RW_WRITER); - rw_enter(&fromxp->xn_rwlock, RW_WRITER); - - error = xdirdelete(fromparent, fromxp, onm, DR_RENAME, cred); - - /* - * The following handles the case where our source xmemnode was - * removed before we got to it. - * - * XXX We should also cleanup properly in the case where xdirdelete - * fails for some other reason. Currently this case shouldn't happen. - * (see 1184991). - */ - if (error == ENOENT) - error = 0; - - rw_exit(&fromxp->xn_rwlock); - rw_exit(&fromparent->xn_rwlock); -done: - xmemnode_rele(fromxp); - mutex_exit(&xm->xm_renamelck); - - return (error); -} - -static int -xmem_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, - struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *self = NULL; - struct xmount *xm = (struct xmount *)VTOXM(dvp); - int error; - - /* - * Might be dangling directory. Catch it here, - * because a ENOENT return from xdirlookup() is - * an "o.k. return". - */ - if (parent->xn_nlink == 0) - return (ENOENT); - - error = xdirlookup(parent, nm, &self, cred); - if (error == 0) { - ASSERT(self); - xmemnode_rele(self); - return (EEXIST); - } - if (error != ENOENT) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, nm, DE_MKDIR, - (struct xmemnode *)NULL, (struct xmemnode *)NULL, va, - &self, cred); - if (error) { - rw_exit(&parent->xn_rwlock); - if (self) - xmemnode_rele(self); - return (error); - } - rw_exit(&parent->xn_rwlock); - *vpp = XNTOV(self); - return (0); -} - -static int -xmem_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *self = NULL; - struct vnode *vp; - int error = 0; - - /* - * Return error when removing . and .. - */ - if (strcmp(nm, ".") == 0) - return (EINVAL); - if (strcmp(nm, "..") == 0) - return (EEXIST); /* Should be ENOTEMPTY */ - error = xdirlookup(parent, nm, &self, cred); - if (error) - return (error); - - rw_enter(&parent->xn_rwlock, RW_WRITER); - rw_enter(&self->xn_rwlock, RW_WRITER); - - vp = XNTOV(self); - if (vp == dvp || vp == cdir) { - error = EINVAL; - goto done1; - } - if (self->xn_type != VDIR) { - error = ENOTDIR; - goto done1; - } - - mutex_enter(&self->xn_tlock); - if (self->xn_nlink > 2) { - mutex_exit(&self->xn_tlock); - error = EEXIST; - goto done1; - } - mutex_exit(&self->xn_tlock); - - if (vn_vfswlock(vp)) { - error = EBUSY; - goto done1; - } - if (vn_mountedvfs(vp) != NULL) { - error = EBUSY; - goto done; - } - - /* - * Check for an empty directory - * i.e. only includes entries for "." and ".." - */ - if (self->xn_dirents > 2) { - error = EEXIST; /* SIGH should be ENOTEMPTY */ - /* - * Update atime because checking xn_dirents is logically - * equivalent to reading the directory - */ - gethrestime(&self->xn_atime); - goto done; - } - - error = xdirdelete(parent, self, nm, DR_RMDIR, cred); -done: - vn_vfsunlock(vp); -done1: - rw_exit(&self->xn_rwlock); - rw_exit(&parent->xn_rwlock); - xmemnode_rele(self); - - return (error); -} - -/* ARGSUSED2 */ - -static int -xmem_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xdirent *xdp; - int error; - register struct dirent64 *dp; - register ulong_t offset; - register ulong_t total_bytes_wanted; - register long outcount = 0; - register long bufsize; - int reclen; - caddr_t outbuf; - - if (uiop->uio_loffset >= MAXOFF_T) { - if (eofp) - *eofp = 1; - return (0); - } - /* - * assuming system call has already called xmem_rwlock - */ - ASSERT(RW_READ_HELD(&xp->xn_rwlock)); - - if (uiop->uio_iovcnt != 1) - return (EINVAL); - - if (vp->v_type != VDIR) - return (ENOTDIR); - - /* - * There's a window here where someone could have removed - * all the entries in the directory after we put a hold on the - * vnode but before we grabbed the rwlock. Just return unless - * there are still references to the current file in which case panic. - */ - if (xp->xn_dir == NULL) { - if (xp->xn_nlink) - cmn_err(CE_PANIC, "empty directory 0x%p", (void *)xp); - return (0); - } - - /* - * Get space for multiple directory entries - */ - total_bytes_wanted = uiop->uio_iov->iov_len; - bufsize = total_bytes_wanted + sizeof (struct dirent64); - outbuf = kmem_alloc(bufsize, KM_SLEEP); - - dp = (struct dirent64 *)outbuf; - - - offset = 0; - xdp = xp->xn_dir; - while (xdp) { - offset = xdp->xd_offset; - if (offset >= uiop->uio_offset) { - reclen = (int)DIRENT64_RECLEN(strlen(xdp->xd_name)); - if (outcount + reclen > total_bytes_wanted) - break; - ASSERT(xdp->xd_xmemnode != NULL); - - /* use strncpy(9f) to zero out uninitialized bytes */ - - ASSERT(strlen(xdp->xd_name) + 1 <= - DIRENT64_NAMELEN(reclen)); - (void) strncpy(dp->d_name, xdp->xd_name, - DIRENT64_NAMELEN(reclen)); - dp->d_reclen = (ushort_t)reclen; - dp->d_ino = (ino64_t)xdp->xd_xmemnode->xn_nodeid; - dp->d_off = (offset_t)xdp->xd_offset + 1; - dp = (struct dirent64 *) - ((uintptr_t)dp + dp->d_reclen); - outcount += reclen; - ASSERT(outcount <= bufsize); - } - xdp = xdp->xd_next; - } - error = uiomove(outbuf, outcount, UIO_READ, uiop); - if (!error) { - /* If we reached the end of the list our offset */ - /* should now be just past the end. */ - if (!xdp) { - offset += 1; - if (eofp) - *eofp = 1; - } else if (eofp) - *eofp = 0; - uiop->uio_offset = offset; - } - gethrestime(&xp->xn_atime); - kmem_free(outbuf, bufsize); - return (error); -} - -static int -xmem_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, char *tnm, - struct cred *cred) -{ - struct xmemnode *parent = (struct xmemnode *)VTOXN(dvp); - struct xmemnode *self = (struct xmemnode *)NULL; - struct xmount *xm = (struct xmount *)VTOXM(dvp); - char *cp = NULL; - int error; - size_t len; - - error = xdirlookup(parent, lnm, &self, cred); - if (error == 0) { - /* - * The entry already exists - */ - xmemnode_rele(self); - return (EEXIST); /* was 0 */ - } - - if (error != ENOENT) { - if (self != NULL) - xmemnode_rele(self); - return (error); - } - - rw_enter(&parent->xn_rwlock, RW_WRITER); - error = xdirenter(xm, parent, lnm, DE_CREATE, (struct xmemnode *)NULL, - (struct xmemnode *)NULL, tva, &self, cred); - rw_exit(&parent->xn_rwlock); - - if (error) { - if (self) - xmemnode_rele(self); - return (error); - } - len = strlen(tnm) + 1; - cp = xmem_memalloc(len, 0); - if (cp == NULL) { - xmemnode_rele(self); - return (ENOSPC); - } - (void) strcpy(cp, tnm); - - self->xn_symlink = cp; - self->xn_size = len - 1; - xmemnode_rele(self); - return (error); -} - -/* ARGSUSED2 */ -static int -xmem_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - int error = 0; - - if (vp->v_type != VLNK) - return (EINVAL); - - rw_enter(&xp->xn_rwlock, RW_READER); - rw_enter(&xp->xn_contents, RW_READER); - error = uiomove(xp->xn_symlink, xp->xn_size, UIO_READ, uiop); - gethrestime(&xp->xn_atime); - rw_exit(&xp->xn_contents); - rw_exit(&xp->xn_rwlock); - return (error); -} - -/* ARGSUSED */ -static int -xmem_fsync(struct vnode *vp, int syncflag, struct cred *cred) -{ - return (0); -} - -/* ARGSUSED */ -static void -xmem_inactive(struct vnode *vp, struct cred *cred) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VFSTOXM(vp->v_vfsp); - - rw_enter(&xp->xn_rwlock, RW_WRITER); -top: - mutex_enter(&xp->xn_tlock); - mutex_enter(&vp->v_lock); - ASSERT(vp->v_count >= 1); - - /* - * If we don't have the last hold or the link count is non-zero, - * there's little to do -- just drop our hold. - */ - if (vp->v_count > 1 || xp->xn_nlink != 0) { - vp->v_count--; - mutex_exit(&vp->v_lock); - mutex_exit(&xp->xn_tlock); - rw_exit(&xp->xn_rwlock); - return; - } - - /* - * We have the last hold *and* the link count is zero, so this - * xmemnode is dead from the filesystem's viewpoint. However, - * if the xmemnode has any pages associated with it (i.e. if it's - * a normal file with non-zero size), the xmemnode can still be - * discovered by pageout or fsflush via the page vnode pointers. - * In this case we must drop all our locks, truncate the xmemnode, - * and try the whole dance again. - */ - if (xp->xn_size != 0) { - if (xp->xn_type == VREG) { - mutex_exit(&vp->v_lock); - mutex_exit(&xp->xn_tlock); - rw_enter(&xp->xn_contents, RW_WRITER); - (void) xmemnode_trunc(xm, xp, 0); - rw_exit(&xp->xn_contents); - ASSERT(xp->xn_size == 0); - ASSERT(xp->xn_nblocks == 0); - goto top; - } - if (xp->xn_type == VLNK) - xmem_memfree(xp->xn_symlink, xp->xn_size + 1); - } - - mutex_exit(&vp->v_lock); - mutex_exit(&xp->xn_tlock); - mutex_enter(&xm->xm_contents); - if (xp->xn_forw == NULL) - xm->xm_rootnode->xn_back = xp->xn_back; - else - xp->xn_forw->xn_back = xp->xn_back; - xp->xn_back->xn_forw = xp->xn_forw; - mutex_exit(&xm->xm_contents); - rw_exit(&xp->xn_rwlock); - rw_destroy(&xp->xn_rwlock); - mutex_destroy(&xp->xn_tlock); - vn_free(xp->xn_vnode); - xmem_memfree(xp, sizeof (struct xmemnode)); -} - -static int -xmem_fid(struct vnode *vp, struct fid *fidp) -{ - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xfid *xfid; - - if (fidp->fid_len < (sizeof (struct xfid) - sizeof (ushort_t))) { - fidp->fid_len = sizeof (struct xfid) - sizeof (ushort_t); - return (ENOSPC); - } - - xfid = (struct xfid *)fidp; - bzero(xfid, sizeof (struct xfid)); - xfid->xfid_len = (int)sizeof (struct xfid) - sizeof (ushort_t); - - xfid->xfid_ino = xp->xn_nodeid; - xfid->xfid_gen = xp->xn_gen; - - return (0); -} - - -/* - * Return all the pages from [off..off+len] in given file - */ -static int -xmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp, - page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, - enum seg_rw rw, struct cred *cr) -{ - int err = 0; - struct xmemnode *xp = VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - timestruc_t now; - - cmn_err(CE_PANIC, "xmem_getpage"); - rw_enter(&xp->xn_contents, RW_READER); - - if (off + len > xp->xn_size + xm->xm_bsize) { - rw_exit(&xp->xn_contents); - return (EFAULT); - } - rw_exit(&xp->xn_contents); - - if (len <= xm->xm_bsize) - err = xmem_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, - seg, addr, rw, cr); - else - err = pvn_getpages(xmem_getapage, vp, (u_offset_t)off, len, - protp, pl, plsz, seg, addr, rw, cr); - - rw_enter(&xp->xn_contents, RW_WRITER); - gethrestime(&now); - xp->xn_atime = now; - if (rw == S_WRITE) - xp->xn_mtime = now; - rw_exit(&xp->xn_contents); - - return (err); -} - -/* - * Called from pvn_getpages to get a particular page. - */ -/*ARGSUSED*/ -static int -xmem_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp, - page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, - enum seg_rw rw, struct cred *cr) -{ - cmn_err(CE_PANIC, "xmem_getapage"); - return (0); -} - -/* ARGSUSED */ -int -xmem_putpage(struct vnode *vp, offset_t off, size_t len, int flags, - struct cred *cr) -{ - return (0); -} - -#ifndef lint -/* - * Write out a single page. - * For xmemfs this means choose a physical swap slot and write the page - * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., - * we try to find a bunch of other dirty pages adjacent in the file - * and a bunch of contiguous swap slots, and then write all the pages - * out in a single i/o. - */ -/*ARGSUSED*/ -static int -xmem_putapage(struct vnode *vp, page_t *pp, u_offset_t *offp, - size_t *lenp, int flags, struct cred *cr) -{ - cmn_err(CE_PANIC, "xmem putapage"); - return (1); -} -#endif - - -static int -xmem_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, - size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, - struct cred *cred) -{ - struct seg *seg; - struct segxmem_crargs xmem_a; - struct xmemnode *xp = (struct xmemnode *)VTOXN(vp); - struct xmount *xm = (struct xmount *)VTOXM(vp); - uint_t blocknumber; - int error; - -#ifdef lint - maxprot = maxprot; -#endif - if (vp->v_flag & VNOMAP) - return (ENOSYS); - - if (off < 0) - return (EINVAL); - - /* offset, length and address has to all be block aligned */ - - if (off & (xm->xm_bsize - 1) || len & (xm->xm_bsize - 1) || - ((ulong_t)*addrp) & (xm->xm_bsize - 1)) { - - return (EINVAL); - } - - if (vp->v_type != VREG) - return (ENODEV); - - if (flags & MAP_PRIVATE) - return (EINVAL); /* XXX need to be handled */ - - /* - * Don't allow mapping to locked file - */ - if (vn_has_mandatory_locks(vp, xp->xn_mode)) { - return (EAGAIN); - } - - if (error = xmem_fillpages(xp, vp, off, len, 1)) { - return (error); - } - - blocknumber = off >> xm->xm_bshift; - - if (flags & MAP_FIXED) { - /* - * User specified address - blow away any previous mappings - */ - AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); - seg = as_findseg(as, *addrp, 0); - - /* - * Fast path. segxmem_remap will fail if this is the wrong - * segment or if the len is beyond end of seg. If it fails, - * we do the regular stuff thru as_* routines. - */ - - if (seg && (segxmem_remap(seg, vp, *addrp, len, - &xp->xn_ppa[blocknumber], prot) == 0)) { - AS_LOCK_EXIT(as, &as->a_lock); - return (0); - } - AS_LOCK_EXIT(as, &as->a_lock); - if (seg) - (void) as_unmap(as, *addrp, len); - - as_rangelock(as); - - error = valid_usr_range(*addrp, len, prot, as, as->a_userlimit); - - if (error != RANGE_OKAY || - as_gap(as, len, addrp, &len, AH_CONTAIN, *addrp)) { - as_rangeunlock(as); - return (EINVAL); - } - - } else { - as_rangelock(as); - map_addr(addrp, len, (offset_t)off, 1, flags); - } - - if (*addrp == NULL) { - as_rangeunlock(as); - return (ENOMEM); - } - - xmem_a.xma_vp = vp; - xmem_a.xma_offset = (u_offset_t)off; - xmem_a.xma_prot = prot; - xmem_a.xma_cred = cred; - xmem_a.xma_ppa = &xp->xn_ppa[blocknumber]; - xmem_a.xma_bshift = xm->xm_bshift; - - error = as_map(as, *addrp, len, segxmem_create, &xmem_a); - - as_rangeunlock(as); - return (error); -} - -/* ARGSUSED */ -static int -xmem_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, - size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, - struct cred *cred) -{ - return (0); -} - -/* ARGSUSED */ -static int -xmem_delmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr, - size_t len, uint_t prot, uint_t maxprot, uint_t flags, - struct cred *cred) -{ - return (0); -} - -static int -xmem_freesp(struct vnode *vp, struct flock64 *lp, int flag) -{ - register int i; - register struct xmemnode *xp = VTOXN(vp); - int error; - - ASSERT(vp->v_type == VREG); - ASSERT(lp->l_start >= 0); - - if (lp->l_len != 0) - return (EINVAL); - - rw_enter(&xp->xn_rwlock, RW_WRITER); - if (xp->xn_size == lp->l_start) { - rw_exit(&xp->xn_rwlock); - return (0); - } - - /* - * Check for any mandatory locks on the range - */ - if (MANDLOCK(vp, xp->xn_mode)) { - long save_start; - - save_start = lp->l_start; - - if (xp->xn_size < lp->l_start) { - /* - * "Truncate up" case: need to make sure there - * is no lock beyond current end-of-file. To - * do so, we need to set l_start to the size - * of the file temporarily. - */ - lp->l_start = xp->xn_size; - } - lp->l_type = F_WRLCK; - lp->l_sysid = 0; - lp->l_pid = ttoproc(curthread)->p_pid; - i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; - if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || - lp->l_type != F_UNLCK) { - rw_exit(&xp->xn_rwlock); - return (i ? i : EAGAIN); - } - - lp->l_start = save_start; - } - - rw_enter(&xp->xn_contents, RW_WRITER); - error = xmemnode_trunc((struct xmount *)VFSTOXM(vp->v_vfsp), - xp, lp->l_start); - rw_exit(&xp->xn_contents); - rw_exit(&xp->xn_rwlock); - return (error); -} - -/* ARGSUSED */ -static int -xmem_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, - offset_t offset, struct cred *cred, caller_context_t *ct) -{ - int error; - - if (cmd != F_FREESP) - return (EINVAL); - if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { - if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) - return (EFBIG); - error = xmem_freesp(vp, bfp, flag); - } - return (error); -} - -/* ARGSUSED */ -static int -xmem_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) -{ - return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); -} - -/* ARGSUSED2 */ -static int -xmem_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) -{ - struct xmemnode *xp = VTOXN(vp); - - if (write_lock) { - rw_enter(&xp->xn_rwlock, RW_WRITER); - } else { - rw_enter(&xp->xn_rwlock, RW_READER); - } - return (write_lock); -} - -/* ARGSUSED1 */ -static void -xmem_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) -{ - struct xmemnode *xp = VTOXN(vp); - - rw_exit(&xp->xn_rwlock); -} - -struct vnodeops *xmem_vnodeops; - -const fs_operation_def_t xmem_vnodeops_template[] = { - VOPNAME_OPEN, xmem_open, - VOPNAME_CLOSE, xmem_close, - VOPNAME_READ, xmem_read, - VOPNAME_WRITE, xmem_write, - VOPNAME_IOCTL, xmem_ioctl, - VOPNAME_GETATTR, xmem_getattr, - VOPNAME_SETATTR, xmem_setattr, - VOPNAME_ACCESS, xmem_access, - VOPNAME_LOOKUP, xmem_lookup, - VOPNAME_CREATE, xmem_create, - VOPNAME_REMOVE, xmem_remove, - VOPNAME_LINK, xmem_link, - VOPNAME_RENAME, xmem_rename, - VOPNAME_MKDIR, xmem_mkdir, - VOPNAME_RMDIR, xmem_rmdir, - VOPNAME_READDIR, xmem_readdir, - VOPNAME_SYMLINK, xmem_symlink, - VOPNAME_READLINK, xmem_readlink, - VOPNAME_FSYNC, xmem_fsync, - VOPNAME_INACTIVE, (fs_generic_func_p) xmem_inactive, - VOPNAME_FID, xmem_fid, - VOPNAME_RWLOCK, xmem_rwlock, - VOPNAME_RWUNLOCK, (fs_generic_func_p) xmem_rwunlock, - VOPNAME_SEEK, xmem_seek, - VOPNAME_SPACE, xmem_space, - VOPNAME_GETPAGE, xmem_getpage, - VOPNAME_PUTPAGE, xmem_putpage, - VOPNAME_MAP, (fs_generic_func_p) xmem_map, - VOPNAME_ADDMAP, (fs_generic_func_p) xmem_addmap, - VOPNAME_DELMAP, xmem_delmap, - NULL, NULL -}; diff --git a/usr/src/uts/intel/sys/Makefile b/usr/src/uts/intel/sys/Makefile index 550c2eeb98..491f4d3224 100644 --- a/usr/src/uts/intel/sys/Makefile +++ b/usr/src/uts/intel/sys/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -99,10 +99,6 @@ CLOSEDHDRS = \ SUBHDRS = \ dktp/mscsi.h -FSHDRS = \ - seg_xmem.h \ - xmem.h - ROOTDIR= $(ROOT)/usr/include/sys SCSIDIR= $(ROOTDIR)/scsi SCSIDIRS= $(SCSIDIR) $(SCSIDIR)/conf $(SCSIDIR)/generic \ @@ -115,12 +111,9 @@ ROOTDIRS= $(ROOTDIR) $(ROOTFSDIR) ROOTHDRS= $(HDRS:%=$(ROOTDIR)/%) $(SUBHDRS:%=$(ROOTDIR)/%) $(CLOSED_BUILD)ROOTHDRS += $(CLOSEDHDRS:%=$(ROOTDIR)/%) -ROOTFSHDRS= $(FSHDRS:%=$(ROOTDIR)/fs/%) - CHECKHDRS= \ $(HDRS:%.h=%.check) \ - $(SUBHDRS:%.h=%.check) \ - $(FSHDRS:%.h=fs/%.check) + $(SUBHDRS:%.h=%.check) $(CLOSED_BUILD)CHECKHDRS += $(CLOSEDHDRS:%.h=$(CLOSED)/uts/intel/sys/%.check) @@ -133,9 +126,9 @@ $(ROOTDIR)/%: $(CLOSED)/uts/intel/sys/% .KEEP_STATE: -.PARALLEL: $(CHECKHDRS) $(ROOTHDRS) $(ROOTFSHDRS) +.PARALLEL: $(CHECKHDRS) $(ROOTHDRS) -install_h: $(ROOTDIRS) .WAIT $(ROOTHDRS) $(ROOTFSHDRS) +install_h: $(ROOTDIRS) .WAIT $(ROOTHDRS) $(ROOTDIRS): $(INS.dir) diff --git a/usr/src/uts/intel/sys/fs/seg_xmem.h b/usr/src/uts/intel/sys/fs/seg_xmem.h deleted file mode 100644 index cd172e00c9..0000000000 --- a/usr/src/uts/intel/sys/fs/seg_xmem.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_FS_SEG_XMEM_H -#define _SYS_FS_SEG_XMEM_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/map.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Statistics for segxmem operations. - * - * No explicit locking to protect these stats. - */ -struct segxmemcnt { - kstat_named_t sx_fault; /* number of segxmem_faults */ - kstat_named_t sx_getmapflt; /* number of segxmem_getmaps */ - kstat_named_t sx_release; /* releases with */ - kstat_named_t sx_pagecreate; /* pagecreates */ -}; - - -#if defined(_KERNEL) - -struct segxmem_crargs { - struct vnode *xma_vp; /* vnode maped from */ - u_offset_t xma_offset; /* starting offset for mapping */ - /* offset above could be invalid due to remaps, but ppa will be valid */ - page_t ***xma_ppa; /* page list for this mapping */ - uint_t xma_bshift; /* for converting offset to block # */ - struct cred *xma_cred; /* credentials */ - uchar_t xma_prot; -}; - - -struct sx_prot { - struct sx_prot *spc_next; /* Next such one */ - pgcnt_t spc_pageindex; /* First page with changed prot */ - pgcnt_t spc_numpages; /* & number of such pages */ - uchar_t spc_prot; -}; - -struct segxmem_data { - struct vnode *sxd_vp; /* vnode for this mapping */ - offset_t sxd_offset; /* & initial offset */ - /* - * The above may not be valid after remap, but ppa below will track - * the remaps. - */ - size_t sxd_bsize; /* block size */ - uint_t sxd_bshift; /* for converting offset to block # */ - size_t sxd_softlockcnt; - struct sx_prot *sxd_spc; /* linked list of changed protections */ - uchar_t sxd_prot; -}; - -#define sx_blocks(seg, sxd) howmany((seg)->s_size, 1 << (sxd)->sxd_bshift) - -/* - * Public seg_xmem segment operations. - */ -extern int segxmem_create(struct seg *, struct segxmem_crargs *); -/* - * extern faultcode_t segxmem_fault(struct hat *, struct seg *, caddr_t, size_t, - * enum fault_type, enum seg_rw); - */ -extern caddr_t segxmem_getmap(struct map *, struct vnode *, u_offset_t, - size_t, page_t **, enum seg_rw); -extern void segxmem_release(struct map *, caddr_t, size_t); -extern int segxmem_remap(struct seg *, struct vnode *vp, caddr_t, size_t, - page_t ***, uchar_t); -extern void segxmem_inval(struct seg *, struct vnode *, u_offset_t); - -#endif /* _KERNEL */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_FS_SEG_XMEM_H */ diff --git a/usr/src/uts/intel/sys/fs/xmem.h b/usr/src/uts/intel/sys/fs/xmem.h deleted file mode 100644 index 11e36ee5c2..0000000000 --- a/usr/src/uts/intel/sys/fs/xmem.h +++ /dev/null @@ -1,282 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_FS_XMEM_H -#define _SYS_FS_XMEM_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include <sys/t_lock.h> -#include <vm/seg.h> -#include <vm/seg_vn.h> -#include <sys/fs/seg_xmem.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _KERNEL - -/* - * xmemnode is the file system dependent node for xmemfs. - * - * xn_rwlock protects access of the directory list at xn_dir - * as well as syncronizing read and writes to the xmemnode - * - * xn_contents protects growing, shrinking, reading and writing - * the file along with xn_rwlock (see below). - * - * xn_tlock protects updates to xn_mode and xn_nlink - * - * xm_contents in the xmount filesystem data structure protects - * xn_forw and xn_back which are used to maintain a linked - * list of all xmemfs files associated with that file system - * - * XXX - valid ? The pp array represents the store for xmemfs. - * To grow or shrink the file or fill in holes requires - * manipulation of the pp array. These operations are protected - * by a combination of xn_rwlock and xn_contents. Growing or shrinking - * the array requires the write lock on xn_rwlock and xn_contents. - * Filling in a slot in the array requires the write lock on xn_contents. - * Reading the array requires the read lock on xn_contents. - * - * The ordering of the locking is: - * xn_rwlock -> xn_contents -> page locks on pages in file - * - * xn_tlock doesn't require any xmemnode locks - */ - -struct xmemnode { - struct xmemnode *xn_back; /* linked list of xmemnodes */ - struct xmemnode *xn_forw; /* linked list of xmemnodes */ - union { - struct { - struct xdirent *un_dirlist; /* dirent list */ - uint_t un_dirents; /* number of dirents */ - } un_dirstruct; - char *un_symlink; /* pointer to symlink */ - struct { - page_t ***un_ppa; /* page backing for file */ - size_t un_size; /* size repres. by array */ - } un_ppstruct; - } un_xmemnode; - struct vnode *xn_vnode; /* vnode for this xmemnode */ - int xn_gen; /* pseudo gen number for xfid */ - struct vattr xn_attr; /* attributes */ - krwlock_t xn_contents; /* vm side -serialize mods */ - krwlock_t xn_rwlock; /* rw,trunc size - serialize */ - /* mods and directory updates */ - kmutex_t xn_tlock; /* time, flag, and nlink lock */ -}; - -/* - * each xn_ppa[] entry points to an array of page_t pointers. - */ -#define xn_ppa un_xmemnode.un_ppstruct.un_ppa -#define xn_ppasz un_xmemnode.un_ppstruct.un_size -#define xn_dir un_xmemnode.un_dirstruct.un_dirlist -#define xn_dirents un_xmemnode.un_dirstruct.un_dirents -#define xn_symlink un_xmemnode.un_symlink - -/* - * Attributes - */ -#define xn_mask xn_attr.va_mask -#define xn_type xn_attr.va_type -#define xn_mode xn_attr.va_mode -#define xn_uid xn_attr.va_uid -#define xn_gid xn_attr.va_gid -#define xn_fsid xn_attr.va_fsid -#define xn_nodeid xn_attr.va_nodeid -#define xn_nlink xn_attr.va_nlink -#define xn_size xn_attr.va_size -#define xn_atime xn_attr.va_atime -#define xn_mtime xn_attr.va_mtime -#define xn_ctime xn_attr.va_ctime -#define xn_rdev xn_attr.va_rdev -#define xn_blksize xn_attr.va_blksize -#define xn_nblocks xn_attr.va_nblocks -#define xn_seq xn_attr.va_seq - -/* - * xmemfs directories are made up of a linked list of xdirent structures - * hanging off directory xmemnodes. File names are not fixed length, - * but are null terminated. - */ -struct xdirent { - struct xmemnode *xd_xmemnode; /* xmemnode for this file */ - struct xdirent *xd_next; /* next directory entry */ - struct xdirent *xd_prev; /* prev directory entry */ - uint_t xd_offset; /* "offset" of dir entry */ - uint_t xd_hash; /* a hash of xd_name */ - struct xdirent *xd_link; /* linked via the hash table */ - struct xmemnode *xd_parent; /* parent, dir we are in */ - char *xd_name; /* must be null terminated */ - /* max length is MAXNAMELEN */ -}; - -/* - * xfid overlays the fid structure (for VFS_VGET) - */ -struct xfid { - uint16_t xfid_len; - ino32_t xfid_ino; - int32_t xfid_gen; -}; - -#define ESAME (-1) /* trying to rename linked files (special) */ - -extern struct vnodeops *xmem_vnodeops; -extern const struct fs_operation_def xmem_vnodeops_template[]; - -/* - * xmemfs per-mount data structure. - * - * All fields are protected by xm_contents. - * File renames on a particular file system are protected xm_renamelck. - */ -struct xmount { - struct vfs *xm_vfsp; /* filesystem's vfs struct */ - struct xmemnode *xm_rootnode; /* root xmemnode */ - char *xm_mntpath; /* name of xmemfs mount point */ - uint_t xm_flags; /* Miscellaneous Flags */ - size_t xm_bsize; /* block size for this file system */ - uint_t xm_bshift; /* for converting offset to block # */ - pgcnt_t xm_ppb; /* pages per block */ - struct map *xm_map; /* Map for kernel addresses */ - caddr_t xm_mapaddr; /* Base of above map */ - size_t xm_mapsize; /* size of above map */ - caddr_t xm_vmmapaddr; /* Base of heap for above map */ - size_t xm_vmmapsize; /* size of heap for above map */ - ulong_t xm_max; /* file system max reservation */ - pgcnt_t xm_mem; /* pages of reserved memory */ - dev_t xm_dev; /* unique dev # of mounted `device' */ - uint_t xm_gen; /* pseudo generation number for files */ - kmutex_t xm_contents; /* lock for xmount structure */ - kmutex_t xm_renamelck; /* rename lock for this mount */ - uint_t xm_xpgcnt; /* index and count for xpg_array */ - void **xm_xpgarray; /* array of pointers */ -}; - -#ifndef DEBUG -#define XMEMPRINTF(level, args) -#else -extern int xmemlevel; -/*PRINTFLIKE1*/ -extern void xmemprintf(const char *, ...) - __KPRINTFLIKE(1); -#define XMEMPRINTF(level, args) if (level >= xmemlevel) xmemprintf args -#endif - -#endif /* _KERNEL */ - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -/* - * Make sizeof struct xmemfs_args the same on x86 and amd64. - */ - -struct xmemfs_args { - offset_t xa_fssize; /* file system size in bytes */ - offset_t xa_bsize; /* block size for this file system */ - uint_t xa_flags; /* flags for this mount */ -}; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -/* Flag bits */ -#define XARGS_RESERVEMEM 1 /* pre reserve memory */ -#define XARGS_LARGEPAGES 2 /* Use large pages */ - -#ifdef _KERNEL - -/* - * File system independent to xmemfs conversion macros - */ -#define VFSTOXM(vfsp) ((struct xmount *)(vfsp)->vfs_data) -#define VTOXM(vp) ((struct xmount *)(vp)->v_vfsp->vfs_data) -#define VTOXN(vp) ((struct xmemnode *)(vp)->v_data) -#define XNTOV(xp) ((xp)->xn_vnode) -#define xmemnode_hold(tp) VN_HOLD(XNTOV(tp)) -#define xmemnode_rele(tp) VN_RELE(XNTOV(tp)) - -/* - * enums - */ -enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME }; /* direnter ops */ -enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */ - -/* - * xmemfs_minfree is the amount (in pages) of memory that xmemfs - * leaves free for the rest of the system. - * NB: If xmemfs allocates too much space, other processes will be - * unable to execute. 320 is chosen arbitrarily to be about right for - * an RDBMS environment with all of it's buffers coming from xmemfs. - */ -#define XMEMMINFREE 320 * 1024 * 1024 /* 320 Megabytes */ -/* - * number of simultaneous reads/writes is limited by NUM_SIMULMAPS - * below. We cannot set it much higher as we expect typical block - * size to be 2MB or 4MB and we cannot afford to reserve and keep - * too much kernel virtual memory for ourselves. - */ -#define SEGXMEM_NUM_SIMULMAPS 4 - -extern pgcnt_t xmemfs_minfree; /* memory in pages */ - -extern void xmemnode_init(struct xmount *, struct xmemnode *, - struct vattr *, struct cred *); -extern int xmemnode_trunc(struct xmount *, struct xmemnode *, u_offset_t); -extern int xdirlookup(struct xmemnode *, char *, struct xmemnode **, - struct cred *); -extern int xdirdelete(struct xmemnode *, struct xmemnode *, char *, - enum dr_op, struct cred *); -extern void xdirinit(struct xmemnode *, struct xmemnode *); -extern void xdirtrunc(struct xmemnode *); -extern void *xmem_memalloc(size_t, int); -extern void xmem_memfree(void *, size_t); -extern int xmem_xaccess(void *, int, struct cred *); -extern int xdirenter(struct xmount *, struct xmemnode *, char *, - enum de_op, struct xmemnode *, struct xmemnode *, struct vattr *, - struct xmemnode **, struct cred *); -extern int xmem_fillpages(struct xmemnode *, struct vnode *, offset_t, - offset_t, int); -extern int xmem_sticky_remove_access(struct xmemnode *, struct xmemnode *, - struct cred *); - -#endif /* _KERNEL */ - -#define XMEM_MUSTHAVE 1 - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_FS_XMEM_H */ diff --git a/usr/src/uts/intel/xmemfs/Makefile b/usr/src/uts/intel/xmemfs/Makefile deleted file mode 100644 index 9e1f69f75b..0000000000 --- a/usr/src/uts/intel/xmemfs/Makefile +++ /dev/null @@ -1,94 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# uts/intel/xmemfs/Makefile -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" -# -# This makefile drives the production of the xmemfs file system -# kernel module. -# -# x86 architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = xmemfs -OBJECTS = $(XMEMFS_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(XMEMFS_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(USR_FS_DIR)/$(MODULE) - -INC_PATH += -I../../i86pc - -# -# Include common rules. -# -include $(UTSBASE)/intel/Makefile.intel - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Include common targets. -# -include $(UTSBASE)/intel/Makefile.targ |