diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/stand/lib/fs | |
download | illumos-gate-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/stand/lib/fs')
43 files changed, 13845 insertions, 0 deletions
diff --git a/usr/src/stand/lib/fs/Makefile.com b/usr/src/stand/lib/fs/Makefile.com new file mode 100644 index 0000000000..9f4d8e8124 --- /dev/null +++ b/usr/src/stand/lib/fs/Makefile.com @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +DIRREL = ../ + +include ../../Makefile.com + +# +# Right now, the filesystem modules are only clean when -y is used -- some +# needs to go finish cleaning them up, at which point this can be removed. +# +lint lintcheck := LINTFLAGS += -y diff --git a/usr/src/stand/lib/fs/common/cache.c b/usr/src/stand/lib/fs/common/cache.c new file mode 100644 index 0000000000..f1e620df37 --- /dev/null +++ b/usr/src/stand/lib/fs/common/cache.c @@ -0,0 +1,624 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * This is mostly new code. Major revisions were made to allow multiple + * file systems to share a common cache. While this consisted primarily + * of including a "devid_t" pointer in the hash functions, I also re- + * organized everything to eliminate much of the duplicated code that + * had existed previously. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/sysmacros.h> +#include <sys/filep.h> +#include <sys/salib.h> +#include <sys/promif.h> + +#ifndef ICACHE_SIZE +/* + * These should probably be defined in an architecture-specific header + * file. The values below are analogous to those used in earlier versions + * of this module. + */ + +#define ICACHE_SIZE 350 /* Max number of I-node in file cache */ +#define DCACHE_SIZE 1500 /* Max number of cached directories */ +#define BCACHE_SIZE 250 /* Max number of cached disk blocks */ +#endif + +#define Next 0 /* Next pointer in Fwd/Bak link */ +#define Prev 1 /* Previous pointer in Fwd/Back links */ + +#define Frst 0 /* Ptr to first element of a chain */ +#define Last 1 /* Ptr to last element of a chain */ + +#define Hash 2 /* Offset of hash chain ptrs. */ + +typedef struct cache { /* Generic cache element: */ + struct cache *link[4]; /* .. Fwd/Bak links for hash chain & LRU */ + struct cache **chn; /* .. Hash chain link */ + int dev; /* .. Device file handle */ + void *data; /* .. Ptr to associated data */ + int size; /* .. Size of cached data */ +} cache_t; + +typedef struct head { /* Generic cache header: */ + cache_t *aged[2]; /* .. LRU list */ + int (*cmp)(cache_t *); /* .. Ptr to comparison function */ + int size; /* .. Size of "cache" objects */ + int maxblks; /* .. Max number of cached elements */ + int count; /* .. Current number of cached elements */ + int hits; /* .. Total cache hits */ + int searches; /* .. Total searches */ + int purges; /* .. Total purges */ +} head_t; + +/* Constructor for cache headers: */ +#define cache_head(h, f, t, n) \ + {{(cache_t *)&h, (cache_t *)&h}, f, sizeof (t), n} + +int read_opt; /* Number of times cache was bypassed */ +static int x_dev; /* Target device ID saved here! */ +static int x_len; /* length of object */ + +#define LOG2(x) \ + (((x) <= 16) ? 4 : /* Yeah, it's ugly. But it works! */ \ + (((x) <= 32) ? 5 : /* .. Binary log should be part of */ \ + (((x) <= 64) ? 6 : /* .. the language! */ \ + (((x) <= 128) ? 7 : 8)))) + +static cache_t * +get_cache(cache_t *cap, head_t *chp) +{ + /* + * Search cache: + * + * The caller pass a pointer to the first "cache" object in the current + * hash chain ["cap"] and a pointer to the corresponding cache header + * ["chp"]. This routine follows the cache chain until it finds an + * entry that matches both the current device [as noted in "x_dev"] + * and the cache-specific comparison ["chp->cmp"]. + * + * Returns the address of the matching cache object or null if there + * is none. + */ + + while (cap) { + /* + * Check all entries on the cache chain. We expect + * chains to be relatively short, so we use a simple + * linear search. + */ + if ((x_dev == cap->dev) && (*chp->cmp)(cap)) { + /* + * Found the entry we're looking for! Move it + * to the front of the cache header's LRU list + * before returing its addres to the caller. + */ + cap->link[Next]->link[Prev] = cap->link[Prev]; + cap->link[Prev]->link[Next] = cap->link[Next]; + + cap->link[Prev] = (cache_t *)chp->aged; + cap->link[Next] = chp->aged[Frst]; + chp->aged[Frst]->link[Prev] = cap; + chp->aged[Frst] = cap; + chp->hits += 1; + break; + } + + cap = cap->link[Hash+Next]; + } + + chp->searches += 1; + return (cap); +} + +static cache_t * +reclaim_cache(head_t *chp, int dev) +{ + /* + * Reclaim a cache element: + * + * This routine is used to: [a] free the oldest element from + * the cache headed at "chp" and return the address of the + * corresponding "cache_t" struct (iff dev == -1), or [b] free all + * elements on the cache headed at "chp" that belong to the + * indicated "dev"ice. + */ + cache_t *cap, *cxp; + cache_t *cpp = (cache_t *)chp; + + while ((cap = cpp->link[Prev]) != (cache_t *)chp) { + /* + * We follow the cache's LRU chain from oldest to + * newest member. This ensures that we remove only + * the oldest element when we're called with a + * negative "dev" argument. + */ + if ((dev == -1) || (dev == cap->dev)) { + /* + * This is one of the (perhaps the only) + * elements we're supposed to free. Remove it + * from both the LRU list and its associated + * hash chain. Then free the data bound the + * the cache_t element and, if "dev" is + * not -1, the element itself! + */ + cap->link[Prev]->link[Next] = cap->link[Next]; + cap->link[Next]->link[Prev] = cap->link[Prev]; + + if ((cxp = cap->link[Hash+Prev]) != 0) + cxp->link[Hash+Next] = cap->link[Hash+Next]; + else + *(cap->chn) = cap->link[Hash+Next]; + + if ((cxp = cap->link[Hash+Next]) != 0) + cxp->link[Hash+Prev] = cap->link[Hash+Prev]; + + bkmem_free((caddr_t)cap->data, cap->size); + if (dev == -1) + return (cap); + + bkmem_free((caddr_t)cap, chp->size); + chp->count -= 1; + + } else { + /* + * Skip this element, it's not one of the + * ones we want to free up. + */ + cpp = cap; + } + }; + + return (0); +} + +static cache_t * +set_cache(cache_t **ccp, head_t *chp, int noreclaim) +{ + /* + * Install a cache element: + * + * The caller passes the address of cache descriptor ["chp"] and the + * hash chain into which the new element is to be linked ["ccp"]. This + * routine allocates a new cache_t structure (or, if the maximum number + * of elements has already been allocated, reclaims the oldest element + * from the cache), links it into the indicated hash chain, and returns + * its address to the caller. + */ + cache_t *cap; + + if ((chp->count < chp->maxblks) && + (cap = (cache_t *)bkmem_alloc(chp->size))) { + /* + * We haven't reached the maximum cache size yet. + * Allocate a new "cache_t" struct to be added to the + * cache. + */ + chp->count += 1; + + } else { + if (noreclaim) + return (NULL); + + /* + * Cache is full. Use the "reclaim_cache" routine to + * remove the oldest element from the cache. This + * will become the cache_t struct associated with the + * new element. + */ + cap = reclaim_cache(chp, -1); + chp->purges += 1; + } + + bzero((char *)cap, chp->size); + + cap->chn = ccp; + cap->link[Prev] = (cache_t *)chp; + cap->link[Next] = chp->aged[Frst]; + cap->link[Prev]->link[Next] = cap->link[Next]->link[Prev] = cap; + + if ((cap->link[Hash+Next] = *ccp) != 0) + (*ccp)->link[Hash+Prev] = cap; + return (*ccp = cap); +} + +/* + * The File Cache: + * + * This cache (also known as the inode cache) is used to keep track of all + * files open on a given device. The only special data required to locate + * a cache entry is the file reference number which is file-system dependent + * (for UNIX file systems, it's an inode number). + */ + +typedef struct icache { /* Inode cache element: */ + cache_t ic_hdr; /* .. Standard header */ + int ic_num; /* .. I-node number */ +} ic_t; + +#define IC_MAX_HDRS (1 << LOG2(ICACHE_SIZE/6)) +#define IC_HASH(d, i) (((d) + (i)) & (IC_MAX_HDRS - 1)) + +static int x_inode; + +static int /* Cache search predicate: */ +cmp_icache(cache_t *p) +{ + /* Just check the file number ("x_inode") ... */ + return (((ic_t *)p)->ic_num == x_inode); +} + +static head_t ic_head = cache_head(ic_head, cmp_icache, ic_t, ICACHE_SIZE); +static cache_t *ic_hash[IC_MAX_HDRS]; + +void * +get_icache(int dev, int inum) +{ + /* + * Search File Cache: + * + * This routine searches the file cache looking for the entry bound to + * the given "dev"ice and file number ["inum"]. If said entry exists, + * it returns the address of the associated file structure. Otherwise + * it returns null. + */ + cache_t *icp; + + x_dev = dev; + x_inode = inum; + icp = get_cache(ic_hash[IC_HASH(dev, inum)], &ic_head); + + return (icp ? (caddr_t)icp->data : 0); +} + +void +set_icache(int dev, int inum, void *ip, int size) +{ + /* + * Build a File Cache Entry: + * + * This routne installs the "size"-byte file structure at + * "*ip" in the inode cache where it may be retrieved by + * subsequent call to get_icache. + */ + ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)], + &ic_head, 0); + icp->ic_num = inum; + icp->ic_hdr.data = ip; + icp->ic_hdr.dev = dev; + icp->ic_hdr.size = size; +} + +int +set_ricache(int dev, int inum, void *ip, int size) +{ + /* + * Reliably set the icache + * + * This routine is the same as set_icache except that it + * will return 1 if the entry could not be entered into the cache + * without a purge. + */ + ic_t *icp = (ic_t *)set_cache(&ic_hash[IC_HASH(dev, inum)], + &ic_head, 1); + + if (icp == NULL) + return (1); + + icp->ic_num = inum; + icp->ic_hdr.data = ip; + icp->ic_hdr.dev = dev; + icp->ic_hdr.size = size; + + return (0); +} + +/* + * The Directory Cache: + * + * This cache is designed to speed directory searches. Each entry cor- + * responds to a directory entry that was used in a pathname resolution. + * The idea is that most files used by the boot wil be contained in a hand- + * full of directories, so we can speed searches if we know ahead of time + * just where these directories are. + */ + +typedef struct dcache { /* Directory cache objects: */ + cache_t dc_hdr; /* .. Standard header */ + int dc_inum; /* .. File number */ + int dc_pnum; /* .. Parent diretory's file number */ +} dc_t; + +#define DC_MAX_HDRS (1 << LOG2(DCACHE_SIZE/6)) +#define DC_HASH(d, n, l) (((d) + (n)[0] + (n)[(l)-1] + (l)) & (DC_MAX_HDRS-1)) + +static char *x_name; +static int x_pnum; + +static int +cmp_dcache(cache_t *p) /* Cache Search predicate: */ +{ + /* Check name, length, and parent's file number */ + return ((x_len == p->size) && (x_pnum == ((dc_t *)p)->dc_pnum) && + (strcmp((char *)p->data, x_name) == 0)); +} + +static head_t dc_head = cache_head(dc_head, cmp_dcache, dc_t, DCACHE_SIZE); +static cache_t *dc_hash[DC_MAX_HDRS]; + +int +get_dcache(int dev, char *name, int pnum) +{ + /* + * Search Directory Cache: + * + * This routine searches the directory cache for an entry + * associated with directory number "pnum" from the given + * file system that de-scribes a file of the given "name". + * If we find such an entry, we return the corresponding file + * number, 0 otherwise. + */ + dc_t *dcp; + + x_dev = dev; + x_len = strlen(name)+1; + x_pnum = pnum; + x_name = name; + dcp = (dc_t *)get_cache(dc_hash[DC_HASH(dev, name, x_len)], &dc_head); + + return (dcp ? dcp->dc_inum : 0); +} + +void +set_dcache(int dev, char *name, int pnum, int inum) +{ + /* + * Build Directory Cache Entry: + * + * This routine creates directory cache entries to be retrieved later + * via "get_dcache". The cache key is composed of three parts: The + * device specifier, the file name ("name"), and the file number of + * the directory containing that name ("pnum"). The data portion of + * the entry consists of the file number ("inum"). + */ + + int len = strlen(name)+1; + dc_t *dcp = + (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], &dc_head, 0); + + if (dcp->dc_hdr.data = (void *)bkmem_alloc(len)) { + /* + * Allocate a buffer for the pathname component, and + * make this the "data" portion of the generalize + * "cache_t" struct. Also fill in the cache-specific + * fields (pnum, inum). + */ + dcp->dc_pnum = pnum; + dcp->dc_inum = inum; + dcp->dc_hdr.dev = dev; + dcp->dc_hdr.size = len; + bcopy(name, (char *)dcp->dc_hdr.data, len); + + } else { + /* + * Not enough memory to make a copy of the name! + * There's probably not enough to do much else either! + */ + prom_panic("no memory for directory cache"); + } +} + +int +set_rdcache(int dev, char *name, int pnum, int inum) +{ + /* + * Reliably set the dcache + * + * This routine is the same as set_dcache except that it + * return 1 if the entry could not be entered into + * the cache without a purge. + */ + int len = strlen(name) + 1; + dc_t *dcp = + (dc_t *)set_cache(&dc_hash[DC_HASH(dev, name, len)], + &dc_head, 1); + + if (dcp == NULL) + return (1); + + if ((dcp->dc_hdr.data = (void *)bkmem_alloc(len)) == NULL) { + /* + * Not enough memory to make a copy of the name! + * There's probably not enough to do much else either! + */ + prom_panic("no memory for directory cache"); + /* NOTREACHED */ + } + + /* + * Allocate a buffer for the pathname component, and + * make this the "data" portion of the generalize + * "cache_t" struct. Also fill in the cache-specific + * fields (pnum, inum). + */ + dcp->dc_pnum = pnum; + dcp->dc_inum = inum; + dcp->dc_hdr.dev = dev; + dcp->dc_hdr.size = len; + bcopy(name, (char *)dcp->dc_hdr.data, len); + + return (0); +} + +/* + * Disk Block Cache: + */ + +typedef struct bcache { /* Disk block cache objects: */ + cache_t bc_hdr; /* .. Standard header */ + unsigned long bc_blk; /* .. The block number */ +} bc_t; + +#define BC_MAX_HDRS (1 << LOG2(BCACHE_SIZE/6)) +#define BC_HASH(d, b, l) (((d) + (b) + ((l) >> 8)) & (BC_MAX_HDRS-1)) + +static unsigned long x_blkno; + +static int +cmp_bcache(cache_t *p) /* Cache Search predicate: */ +{ + /* Check block number, buffer size */ + return ((x_len == p->size) && (x_blkno == ((bc_t *)p)->bc_blk)); +} + +static head_t bc_head = cache_head(bc_head, cmp_bcache, bc_t, BCACHE_SIZE); +static cache_t *bc_hash[BC_MAX_HDRS]; + +caddr_t +get_bcache(fileid_t *fp) +{ + /* + * Search Disk Block Cache: + * + * This should be getting pretty monotonous by now. Aren't generalized + * subroutines ("objects", if you prefer) great? + */ + cache_t *bcp; + + x_len = fp->fi_count; + x_blkno = fp->fi_blocknum; + x_dev = fp->fi_devp->di_dcookie; + bcp = get_cache(bc_hash[BC_HASH(x_dev, x_blkno, x_len)], &bc_head); + + return (bcp ? (caddr_t)bcp->data : 0); +} + +int +set_bcache(fileid_t *fp) +{ + /* + * Insert Disk Block Cache Entry: + * + * In this case, we actually read the requested block into a + * dynamically allocated buffer before inserting it into the + * cache. If the read fails, we return a non-zero value. + * + * The search keys for disk blocks are the block number and + * buffer size. The data associated with each entry is the + * corresponding data buffer. + */ + bc_t *bcp; + + if (fp->fi_memp = bkmem_alloc(x_len = fp->fi_count)) { + /* + * We were able to succesffully allocate an input + * buffer, now read the data into it. + */ + if (diskread(fp) != 0) { + /* + * I/O error on read. Free the input buffer, + * print an error message, and bail out. + */ + bkmem_free(fp->fi_memp, x_len); + printf("disk read error\n"); + return (-1); + } + + x_blkno = fp->fi_blocknum; + x_dev = fp->fi_devp->di_dcookie; + bcp = (bc_t *) + set_cache(&bc_hash[BC_HASH(x_dev, x_blkno, x_len)], + &bc_head, 0); + bcp->bc_blk = x_blkno; + bcp->bc_hdr.dev = x_dev; + bcp->bc_hdr.size = x_len; + bcp->bc_hdr.data = (void *)fp->fi_memp; + + } else { + /* + * We could be a bit more convervative here by + * calling "set_cache" before we try to allocate a + * buffer (thereby giving us a chance to re-use a + * previously allocated buffer) but the error recovery + * is a bit trickier, and if we're that short on memory + * we'll have trouble elsewhere anyway! + */ + prom_panic("can't read - no memory"); + } + + return (0); +} + +void +release_cache(int dev) +{ + /* + * Reclaim all cache entries: + * + * This routine is called by the file-system's "closeall" method. It + * removes all cache entries associated with that file system from the + * global cache and release any resources bound to said entrires. + */ + + (void) reclaim_cache(&ic_head, dev); + (void) reclaim_cache(&dc_head, dev); + (void) reclaim_cache(&bc_head, dev); +} + +void +print_cache_data() +{ + /* + * Print some cacheing statistics ... + */ + static char *tag[] = { "inode", "directory", "disk block", 0}; + static head_t *hdp[] = { &ic_head, &dc_head, &bc_head, 0}; + + int j; + + for (j = 0; tag[j]; j++) { + /* + * Print statistics maintained in the header + * ("head_t" struct) of each of the above caches. + */ + head_t *hp = hdp[j]; + + if (j) + printf("\n"); + printf("%s cache:\n", tag[j]); + printf(" max size %d\n", hp->maxblks); + printf(" actual size %d\n", hp->count); + printf(" total searches %d\n", hp->searches); + printf(" cache hits %d\n", hp->hits); + printf(" cache purges %d\n", hp->purges); + } + + printf("\nread opts %d\n", read_opt); +} diff --git a/usr/src/stand/lib/fs/common/diskread.c b/usr/src/stand/lib/fs/common/diskread.c new file mode 100644 index 0000000000..722885b9c3 --- /dev/null +++ b/usr/src/stand/lib/fs/common/diskread.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1994-1996, 2002-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/fs/ufs_fsdir.h> +#include <sys/fs/ufs_fs.h> +#include <sys/fs/ufs_inode.h> +#include <sys/sysmacros.h> +#include <sys/promif.h> +#include <sys/filep.h> +#include <sys/salib.h> + +static char prom_dev_type = 0; + +/* + * unix root slice offset for PROMS that do + * not know about fdisk partitions or Solaris + * slices. + * the default is 0 for machines with proms that + * do know how to interpret solaris slices. + */ +unsigned long unix_startblk = 0; + +/* + * The various flavors of PROM make this grotesque. + */ +int +diskread(fileid_t *filep) +{ + int err; + devid_t *devp; + uint_t blocknum; + + /* add in offset of root slice */ + blocknum = filep->fi_blocknum + unix_startblk; + + devp = filep->fi_devp; + + err = prom_seek(devp->di_dcookie, + (unsigned long long)blocknum * (unsigned long long)DEV_BSIZE); + if (err == -1) { + printf("Seek error at block %x\n", blocknum); + return (-1); + } + + if ((err = prom_read(devp->di_dcookie, filep->fi_memp, filep->fi_count, + blocknum, prom_dev_type)) != filep->fi_count) { + printf("Short read. 0x%x chars read\n", err); + return (-1); + } + + return (0); +} diff --git a/usr/src/stand/lib/fs/common/fsswitch.c b/usr/src/stand/lib/fs/common/fsswitch.c new file mode 100644 index 0000000000..1b492dd7ce --- /dev/null +++ b/usr/src/stand/lib/fs/common/fsswitch.c @@ -0,0 +1,291 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1994-1996, 2002-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootsyms.h> +#include <sys/promif.h> +#include <sys/salib.h> + +static struct boot_fs_ops *dfl_fsw = (struct boot_fs_ops *)NULL; +static char *fsmsg = "Fstype has not been selected yet!\n"; +static char *msg_noops = "not fs_ops supplied\n"; + +/* + * return fs_ops pointer for a given file system name + */ +struct boot_fs_ops * +get_fs_ops_pointer(char *fsw_name) +{ + int fsw_idx; + + for (fsw_idx = 0; fsw_idx < boot_nfsw; fsw_idx++) + if (strcmp(boot_fsw[fsw_idx]->fsw_name, fsw_name) == 0) { + return (boot_fsw[fsw_idx]); + } + return ((struct boot_fs_ops *)NULL); +} + +/* + * set default file system type + */ +void +set_default_fs(char *fsw_name) +{ + int fsw_idx; + + for (fsw_idx = 0; fsw_idx < boot_nfsw; fsw_idx++) + if (strcmp(boot_fsw[fsw_idx]->fsw_name, fsw_name) == 0) { + dfl_fsw = boot_fsw[fsw_idx]; + return; + } + printf("Fstype <%s> is not recognized\n", fsw_name); + prom_panic(""); +} + +/* + * clear default file system type + */ +void +clr_default_fs(void) +{ + dfl_fsw = NULL; +} + +struct boot_fs_ops * +get_default_fs(void) +{ + return (dfl_fsw); +} + +void +boot_no_ops_void() +{ + prom_panic(msg_noops); + /*NOTREACHED*/ +} + +int +boot_no_ops() +{ + prom_panic(msg_noops); + /*NOTREACHED*/ + return (0); +} + +int +close(int fd) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_close)(fd)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +mountroot(char *str) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_mountroot)(str)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +unmountroot(void) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_unmountroot)()); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +/*ARGSUSED*/ +int +open(const char *filename, int flags) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_open)((char *)filename, flags)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +ssize_t +read(int fd, void *buf, size_t size) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_read)(fd, buf, size)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +void +closeall(int flag) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) { + (*dfl_fsw->fsw_closeall)(flag); + return; + } + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +fstat(int fd, struct stat *sb) +{ + struct bootstat buf; + int ret; + + if (dfl_fsw == NULL) + prom_panic(fsmsg); + + ret = (*dfl_fsw->fsw_fstat)(fd, &buf); + if (ret == -1) + return (-1); + + sb->st_dev = buf.st_dev; + sb->st_ino = buf.st_ino; + sb->st_mode = buf.st_mode; + sb->st_nlink = buf.st_nlink; + sb->st_uid = buf.st_uid; + sb->st_gid = buf.st_gid; + sb->st_rdev = buf.st_rdev; + sb->st_size = (off_t)buf.st_size; + sb->st_blksize = buf.st_blksize; + sb->st_blocks = buf.st_blocks; + sb->st_atim.tv_sec = buf.st_atim.tv_sec; + sb->st_atim.tv_nsec = buf.st_atim.tv_nsec; + sb->st_mtim.tv_sec = buf.st_mtim.tv_sec; + sb->st_mtim.tv_nsec = buf.st_mtim.tv_nsec; + sb->st_ctim.tv_sec = buf.st_ctim.tv_sec; + sb->st_ctim.tv_nsec = buf.st_ctim.tv_nsec; + + (void) memcpy(sb->st_fstype, buf.st_fstype, sizeof (sb->st_fstype)); + return (0); +} + +int +stat(const char *filename, struct stat *sb) +{ + int fd, ret = -1; + + if ((fd = open(filename, O_RDONLY)) != -1) { + ret = fstat(fd, sb); + (void) close(fd); + } + + return (ret); +} + +off_t +lseek(int filefd, off_t addr, int whence) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_lseek)(filefd, addr, whence)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +/* + * Kernel Interface + */ +int +kern_open(char *str, int flags) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_open)(str, flags)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +/* + * hi and lo refer to the MS end of the off_t word + * and the LS end of the off_t word for when we want + * to support 64-bit offsets. For now, lseek() just + * supports 32 bits. + */ + +/*ARGSUSED*/ +off_t +kern_lseek(int filefd, off_t hi, off_t lo) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_lseek)(filefd, lo, 0)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +ssize_t +kern_read(int fd, caddr_t buf, size_t size) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_read)(fd, buf, size)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +kern_close(int fd) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_close)(fd)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +kern_fstat(int fd, struct bootstat *buf) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_fstat)(fd, buf)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +kern_getdents(int fd, struct dirent *buf, size_t size) +{ + if (dfl_fsw != (struct boot_fs_ops *)NULL) + return ((*dfl_fsw->fsw_getdents)(fd, buf, size)); + prom_panic(fsmsg); + /*NOTREACHED*/ +} + +int +kern_mountroot(char *path) +{ + return (mountroot(path)); +} + +int +kern_unmountroot(void) +{ + return (unmountroot()); +} diff --git a/usr/src/stand/lib/fs/hsfs/Makefile b/usr/src/stand/lib/fs/hsfs/Makefile new file mode 100644 index 0000000000..37ce85a638 --- /dev/null +++ b/usr/src/stand/lib/fs/hsfs/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = libhsfs.a +OBJECTS = hsfsops.o + +include ../Makefile.com + +include ../../Makefile.targ diff --git a/usr/src/stand/lib/fs/hsfs/hsfs_sig.h b/usr/src/stand/lib/fs/hsfs/hsfs_sig.h new file mode 100644 index 0000000000..2a46386f9c --- /dev/null +++ b/usr/src/stand/lib/fs/hsfs/hsfs_sig.h @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2000 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +static char *hsfs_sig_tab[] = { + SUSP_SP, + SUSP_CE, + SUSP_PD, + SUSP_ST, + SUSP_ER, + RRIP_PX, + RRIP_PN, + RRIP_SL, + RRIP_CL, + RRIP_PL, + RRIP_RE, + RRIP_TF, + RRIP_RR, + RRIP_NM +}; + +static int hsfs_num_sig = sizeof (hsfs_sig_tab) / sizeof (hsfs_sig_tab[0]); + +#define SUSP_SP_IX 0 +#define SUSP_CE_IX 1 +#define SUSP_PD_IX 2 +#define SUSP_ST_IX 3 +#define SUSP_ER_IX 4 + +#define RRIP_PX_IX 5 +#define RRIP_PN_IX 6 +#define RRIP_SL_IX 7 +#define RRIP_CL_IX 8 +#define RRIP_PL_IX 9 +#define RRIP_RE_IX 10 +#define RRIP_RF_IX 11 +#define RRIP_RR_IX 12 +#define RRIP_NM_IX 13 diff --git a/usr/src/stand/lib/fs/hsfs/hsfsops.c b/usr/src/stand/lib/fs/hsfs/hsfsops.c new file mode 100644 index 0000000000..c8a98b4780 --- /dev/null +++ b/usr/src/stand/lib/fs/hsfs/hsfsops.c @@ -0,0 +1,1174 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/fs/ufs_fsdir.h> +#include <sys/fs/ufs_fs.h> +#include <sys/fs/ufs_inode.h> +#include <sys/sysmacros.h> +#include <sys/promif.h> +#include <sys/filep.h> +#include <sys/salib.h> +#include <sys/sacache.h> + +#include <sys/fs/hsfs_spec.h> +#include <sys/fs/hsfs_isospec.h> +#include <sys/fs/hsfs_node.h> +#include <sys/fs/hsfs_susp.h> +#include <sys/fs/hsfs_rrip.h> + +#include "hsfs_sig.h" + +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootconf.h> +#include <sys/bootdebug.h> + +#define hdbtodb(n) ((ISO_SECTOR_SIZE / DEV_BSIZE) * (n)) + +#define THE_EPOCH 1970 +#define END_OF_TIME 2099 + +/* May not need this... */ +static uint_t sua_offset = 0; + +/* The root inode on an HSFS filesystem can be anywhere! */ +static uint_t root_ino = 0; /* This is both a flag and a value */ + +static fileid_t *head; + +/* Only got one of these...ergo, only 1 fs open at once */ +static devid_t *devp; + +struct dirinfo { + int loc; + fileid_t *fi; +}; + +struct hs_direct { + struct direct hs_ufs_dir; + struct hs_direntry hs_dir; +}; + +/* + * Function prototypes + */ + +static int boot_hsfs_mountroot(char *str); +static int boot_hsfs_unmountroot(void); +static int boot_hsfs_open(char *filename, int flags); +static int boot_hsfs_close(int fd); +static ssize_t boot_hsfs_read(int fd, caddr_t buf, size_t size); +static off_t boot_hsfs_lseek(int, off_t, int); +static int boot_hsfs_fstat(int fd, struct bootstat *stp); +static void boot_hsfs_closeall(int flag); +static int boot_hsfs_getdents(int fd, struct dirent *dep, unsigned size); + +struct boot_fs_ops boot_hsfs_ops = { + "hsfs", + boot_hsfs_mountroot, + boot_hsfs_unmountroot, + boot_hsfs_open, + boot_hsfs_close, + boot_hsfs_read, + boot_hsfs_lseek, + boot_hsfs_fstat, + boot_hsfs_closeall, + boot_hsfs_getdents +}; + +static ino_t find(fileid_t *, char *); +static ino_t dlook(fileid_t *, char *); +static int opendir(fileid_t *, ino_t); +static struct hs_direct *readdir(struct dirinfo *); +static uint_t parse_dir(fileid_t *, int, struct hs_direct *); +static uint_t parse_susp(char *, uint_t *, struct hs_direct *); +static void hs_seti(fileid_t *, struct hs_direct *, ino_t); +static void hs_dodates(enum hs_vol_type, struct hs_direntry *, char *); +static time_t hs_date_to_gmtime(int, int, int, int); + +/* + * There is only 1 open (mounted) device at any given time. + * So we can keep a single, global devp file descriptor to + * use to index into the di[] array. This is not true for the + * fi[] array. We can have more than one file open at once, + * so there is no global fd for the fi[]. + * The user program must save the fd passed back from open() + * and use it to do subsequent read()'s. + */ + +static int +opendir(fileid_t *filep, ino_t inode) +{ + struct hs_direct hsdep; + int retval; + + /* Set up the saio request */ + filep->fi_offset = 0; + filep->fi_blocknum = hdbtodb(inode); + filep->fi_count = ISO_SECTOR_SIZE; + + /* Maybe the block is in the disk block cache */ + if ((filep->fi_memp = get_bcache(filep)) == NULL) { + /* Not in the block cache so read it from disk */ + if (retval = set_bcache(filep)) { + return (retval); + } + } + + filep->fi_offset = 0; + filep->fi_blocknum = hdbtodb(inode); + + if (inode != root_ino) + return (0); + + if ((int)(parse_dir(filep, 0, &hsdep)) > 0) { + hs_seti(filep, &hsdep, inode); + return (0); + } + return (1); +} + +static ino_t +find(fileid_t *filep, char *path) +{ + register char *q; + char c; + ino_t inode; + + if (path == NULL || *path == '\0') { + printf("null path\n"); + return (0); + } + + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) + printf("find(): path=<%s>\n", path); + + /* Read the ROOT directory */ + if (opendir(filep, inode = root_ino)) { + printf("find(): root_ino opendir() failed!\n"); + return ((ino_t)-1); + } + + while (*path) { + while (*path == '/') + path++; + if (*(q = path) == '\0') + break; + while (*q != '/' && *q != '\0') + q++; + c = *q; + *q = '\0'; + + if ((inode = dlook(filep, path)) != 0) { + if (c == '\0') + break; + if (opendir(filep, inode)) { + printf("find(): opendir(%d) failed!\n", inode); + *q = c; + return ((ino_t)-1); + } + *q = c; + path = q; + continue; + } else { + *q = c; + return (0); + } + } + return (inode); +} + +static fileid_t * +find_fp(int fd) +{ + fileid_t *filep = head; + + if (fd >= 0) { + while ((filep = filep->fi_forw) != head) + if (fd == filep->fi_filedes) + return (filep->fi_taken ? filep : 0); + } + + return (0); +} + +static ino_t +dlook(fileid_t *filep, char *path) +{ + int dv = filep->fi_devp->di_dcookie; + register struct hs_direct *hsdep; + register struct direct *udp; + register struct inode *ip; + struct dirinfo dirp; + register int len; + ino_t in; + + ip = filep->fi_inode; + if (path == NULL || *path == '\0') + return (0); + if ((ip->i_smode & IFMT) != IFDIR) { + return (0); + } + if (ip->i_size == 0) { + return (0); + } + len = strlen(path); + /* first look through the directory entry cache */ + if (in = get_dcache(dv, path, ip->i_number)) { + if ((filep->fi_inode = get_icache(dv, in)) != NULL) { + filep->fi_offset = 0; + filep->fi_blocknum = hdbtodb(in); + return (in); + } + } + dirp.loc = 0; + dirp.fi = filep; + for (hsdep = readdir(&dirp); hsdep != NULL; hsdep = readdir(&dirp)) { + udp = &hsdep->hs_ufs_dir; + if (udp->d_namlen == 1 && + udp->d_name[0] == '.' && + udp->d_name[1] == '\0') + continue; + if (udp->d_namlen == 2 && + udp->d_name[0] == '.' && + udp->d_name[1] == '.' && + udp->d_name[2] == '\0') + continue; + if (udp->d_namlen == len && (strcmp(path, udp->d_name) == 0)) { + set_dcache(dv, path, ip->i_number, udp->d_ino); + hs_seti(filep, hsdep, udp->d_ino); + filep->fi_offset = 0; + filep->fi_blocknum = hdbtodb(udp->d_ino); + /* put this entry into the cache */ + return (udp->d_ino); + } + /* Allow "*" to print all names at that level, w/out match */ + if (strcmp(path, "*") == 0) + printf("%s\n", udp->d_name); + } + return (0); +} + +/* + * get next entry in a directory. + */ +static struct hs_direct * +readdir(struct dirinfo *dirp) +{ + static struct hs_direct hsdep; + register struct direct *udp = &hsdep.hs_ufs_dir; + register struct inode *ip; + register fileid_t *filep; + register daddr_t lbn; + register int off; + + filep = dirp->fi; + ip = filep->fi_inode; + for (;;) { + if (dirp->loc >= ip->i_size) { + return (NULL); + } + off = dirp->loc & ((1 << ISO_SECTOR_SHIFT) - 1); + if (off == 0) { + lbn = hdbtodb(dirp->loc >> ISO_SECTOR_SHIFT); + filep->fi_blocknum = lbn + hdbtodb(ip->i_number); + filep->fi_count = ISO_SECTOR_SIZE; + /* check the block cache */ + if ((filep->fi_memp = get_bcache(filep)) == 0) + if (set_bcache(filep)) + return ((struct hs_direct *)-1); + } + dirp->loc += parse_dir(filep, off, &hsdep); + if (udp->d_reclen == 0 && dirp->loc <= ip->i_size) { + dirp->loc = roundup(dirp->loc, ISO_SECTOR_SIZE); + continue; + } + return (&hsdep); + } +} + +/* + * Get the next block of data from the file. If possible, dma right into + * user's buffer + */ +static int +getblock(fileid_t *filep, caddr_t buf, int count, int *rcount) +{ + register struct inode *ip; + register caddr_t p; + register int off, size, diff; + register daddr_t lbn; + static int pos; + static char ind[] = "|/-\\"; /* that's entertainment? */ + static int blks_read; + + ip = filep->fi_inode; + p = filep->fi_memp; + if ((signed)filep->fi_count <= 0) { + + /* find the amt left to be read in the file */ + diff = ip->i_size - filep->fi_offset; + if (diff <= 0) { + printf("Short read\n"); + return (-1); + } + + /* which block (or frag) in the file do we read? */ + lbn = hdbtodb(filep->fi_offset >> ISO_SECTOR_SHIFT); + + /* which physical block on the device do we read? */ + filep->fi_blocknum = lbn + hdbtodb(ip->i_number); + + off = filep->fi_offset & ((1 << ISO_SECTOR_SHIFT) - 1); + + size = sizeof (filep->fi_buf); + if (size > ISO_SECTOR_SIZE) + size = ISO_SECTOR_SIZE; + + filep->fi_count = size; + filep->fi_memp = filep->fi_buf; + + /* + * optimization if we are reading large blocks of data then + * we can go directly to user's buffer + */ + *rcount = 0; + if (off == 0 && count >= size) { + filep->fi_memp = buf; + if (diskread(filep)) { + return (-1); + } + *rcount = size; + filep->fi_count = 0; + read_opt++; + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); + return (0); + } else + if (diskread(filep)) + return (-1); + + /* + * round and round she goes (though not on every block.. + * - OBP's take a fair bit of time to actually print stuff) + */ + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); + + if (filep->fi_offset - off + size >= ip->i_size) + filep->fi_count = diff + off; + filep->fi_count -= off; + p = &filep->fi_memp[off]; + } + filep->fi_memp = p; + return (0); +} + + +/* + * This is the high-level read function. It works like this. + * We assume that our IO device buffers up some amount of + * data ant that we can get a ptr to it. Thus we need + * to actually call the device func about filesize/blocksize times + * and this greatly increases our IO speed. When we already + * have data in the buffer, we just return that data (with bcopy() ). + */ + +static ssize_t +boot_hsfs_read(int fd, caddr_t buf, size_t count) +{ + size_t i, j; + struct inode *ip; + caddr_t n; + fileid_t *filep; + int rcount; + + if (!(filep = find_fp(fd))) { + return (-1); + } + + ip = filep->fi_inode; + + if (filep->fi_offset + count > ip->i_size) + count = ip->i_size - filep->fi_offset; + + /* that was easy */ + if ((i = count) == 0) + return (0); + + n = buf; + while (i > 0) { + /* If we need to reload the buffer, do so */ + if ((j = filep->fi_count) == 0) { + getblock(filep, buf, i, &rcount); + i -= rcount; + buf += rcount; + filep->fi_offset += rcount; + } else { + /* else just bcopy from our buffer */ + j = MIN(i, j); + bcopy(filep->fi_memp, buf, (unsigned)j); + buf += j; + filep->fi_memp += j; + filep->fi_offset += j; + filep->fi_count -= j; + i -= j; + } + } + return (buf - n); +} + +/* + * This routine will open a device as it is known by the + * V2 OBP. + * Interface Defn: + * err = mountroot(string); + * err: 0 on success + * -1 on failure + * string: char string describing the properties of the device. + * We must not dork with any fi[]'s here. Save that for later. + */ + +static int +boot_hsfs_mountroot(char *str) +{ + ihandle_t h; + struct hs_volume *fsp; + char *bufp; + + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) + printf("mountroot()\n"); + + /* + * If already mounted, just return success. + */ + if (root_ino != 0) { + return (0); + } + + h = prom_open(str); + + if (h == 0) { + printf("Cannot open %s\n", str); + return (-1); + } + + devp = (devid_t *)bkmem_alloc(sizeof (devid_t)); + devp->di_taken = 1; + devp->di_dcookie = h; + devp->di_desc = (char *)bkmem_alloc(strlen(str) + 1); + (void) strcpy(devp->di_desc, str); + bzero(devp->un_fs.dummy, sizeof (devp->un_fs.dummy)); + head = (fileid_t *)bkmem_alloc(sizeof (fileid_t)); + head->fi_back = head->fi_forw = head; + head->fi_filedes = 0; + head->fi_taken = 0; + + /* Setup read of the "superblock" */ + bzero(head->fi_buf, sizeof (head->fi_buf)); + head->fi_devp = devp; + head->fi_blocknum = hdbtodb(ISO_VOLDESC_SEC); + head->fi_count = ISO_SECTOR_SIZE; + head->fi_memp = head->fi_buf; + head->fi_offset = 0; + + if (diskread(head)) { + printf("mountroot(): read super block failed!\n"); + boot_hsfs_closeall(1); + return (-1); + } + + bufp = head->fi_memp; + fsp = (struct hs_volume *)devp->un_fs.dummy; + /* Since RRIP is based on ISO9660, that's where we start */ + + if (ISO_DESC_TYPE(bufp) != ISO_VD_PVD || + strncmp((char *)(ISO_std_id(bufp)), (char *)(ISO_ID_STRING), + ISO_ID_STRLEN) != 0 || ISO_STD_VER(bufp) != ISO_ID_VER) { + boot_hsfs_closeall(1); + return (-1); + } + + /* Now we fill in the volume descriptor */ + fsp->vol_size = ISO_VOL_SIZE(bufp); + fsp->lbn_size = ISO_BLK_SIZE(bufp); + fsp->lbn_shift = ISO_SECTOR_SHIFT; + fsp->lbn_secshift = ISO_SECTOR_SHIFT; + fsp->vol_set_size = (ushort_t)ISO_SET_SIZE(bufp); + fsp->vol_set_seq = (ushort_t)ISO_SET_SEQ(bufp); + + /* Make sure we have a valid logical block size */ + if (fsp->lbn_size & ~(1 << fsp->lbn_shift)) { + printf("%d byte logical block size invalid.\n", fsp->lbn_size); + boot_hsfs_closeall(1); + return (-1); + } + + /* Since an HSFS root could be located anywhere on the media! */ + root_ino = IDE_EXT_LBN(ISO_root_dir(bufp)); + + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) { + int i; + + printf("root_ino=%d\n", root_ino); + printf("ID="); + for (i = 0; i < ISO_ID_STRLEN; i++) + printf("%c", *(ISO_std_id(bufp)+i)); + printf(" VS=%d\n", fsp->vol_size); + } + + return (0); +} + +/* + * Unmount the currently mounted root fs. In practice, this means + * closing all open files and releasing resources. All of this + * is done by boot_hsfs_closeall(). + */ + +int +boot_hsfs_unmountroot(void) +{ + if (root_ino == 0) + return (-1); + + boot_hsfs_closeall(1); + + return (0); +} + +/* + * We allocate an fd here for use when talking + * to the file itself. + */ + +/*ARGSUSED*/ +static int +boot_hsfs_open(char *filename, int flags) +{ + fileid_t *filep; + ino_t inode; + static int filedes = 1; + + /* build and link a new file descriptor */ + filep = (fileid_t *)bkmem_alloc(sizeof (fileid_t)); + filep->fi_back = head->fi_back; + filep->fi_forw = head; + head->fi_back->fi_forw = filep; + head->fi_back = filep; + + filep->fi_filedes = filedes++; + filep->fi_taken = 1; + filep->fi_path = (char *)bkmem_alloc(strlen(filename) + 1); + (void) strcpy(filep->fi_path, filename); + filep->fi_devp = devp; /* dev is already "mounted" */ + + filep->fi_inode = 0; + + inode = find(filep, filename); + if (inode == (ino_t)0) { + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) + printf("open(%s) ENOENT\n", filename); + (void) boot_hsfs_close(filep->fi_filedes); + return (-1); + } + + filep->fi_blocknum = hdbtodb(inode); + filep->fi_offset = filep->fi_count = 0; + + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) + printf("open(%s) fd=%d\n", filename, filep->fi_filedes); + return (filep->fi_filedes); +} + +/* + * hsfs_fstat() only supports size, mode and times at present time. + */ + +static int +boot_hsfs_fstat(int fd, struct bootstat *stp) +{ + fileid_t *filep; + struct inode *ip; + + if (!(filep = find_fp(fd))) + return (-1); + + ip = filep->fi_inode; + + stp->st_mode = 0; + stp->st_size = 0; + + if (ip == NULL) + return (0); + + switch (ip->i_smode & IFMT) { + case IFDIR: + stp->st_mode = S_IFDIR; + break; + case IFREG: + stp->st_mode = S_IFREG; + break; + default: + break; + } + stp->st_size = ip->i_size; + + /* file times */ + stp->st_atim.tv_sec = ip->i_atime.tv_sec; + stp->st_atim.tv_nsec = ip->i_atime.tv_usec * 1000; + stp->st_mtim.tv_sec = ip->i_mtime.tv_sec; + stp->st_mtim.tv_nsec = ip->i_mtime.tv_usec * 1000; + stp->st_ctim.tv_sec = ip->i_ctime.tv_sec; + stp->st_ctim.tv_nsec = ip->i_ctime.tv_usec * 1000; + + return (0); +} + +/* + * We don't do any IO here. + * We just play games with the device pointers. + */ + +/*ARGSUSED*/ +static off_t +boot_hsfs_lseek(int fd, off_t addr, int whence) +{ + fileid_t *filep; + + if (!(filep = find_fp(fd))) + return (-1); + + filep->fi_offset = addr; + filep->fi_blocknum = addr / DEV_BSIZE; + filep->fi_count = 0; + + return (0); +} + +static int +boot_hsfs_close(int fd) +{ + fileid_t *filep; + + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) + printf("close(%d)\n", fd); + + if (filep = find_fp(fd)) { + /* Clear the ranks */ + bkmem_free(filep->fi_path, strlen(filep->fi_path)+1); + filep->fi_blocknum = filep->fi_count = filep->fi_offset = 0; + filep->fi_memp = (caddr_t)0; + filep->fi_devp = 0; + filep->fi_taken = 0; + + /* unlink and deallocate node */ + filep->fi_forw->fi_back = filep->fi_back; + filep->fi_back->fi_forw = filep->fi_forw; + bkmem_free((char *)filep, sizeof (fileid_t)); + + return (0); + } else { + /* Big problem */ + printf("\nFile descrip %d not allocated!", fd); + return (-1); + } +} + +/*ARGSUSED*/ +static void +boot_hsfs_closeall(int flag) +{ + fileid_t *filep = head; + extern int verbosemode; + + while ((filep = filep->fi_forw) != head) + if (filep->fi_taken) + if (boot_hsfs_close(filep->fi_filedes)) + prom_panic("Filesystem may be inconsistent.\n"); + + release_cache(devp->di_dcookie); + (void) prom_close(devp->di_dcookie); + devp->di_taken = 0; + if (verbosemode) + print_cache_data(); + bkmem_free((char *)devp, sizeof (devid_t)); + bkmem_free((char *)head, sizeof (fileid_t)); + root_ino = 0; +} + +static uint_t +parse_dir(fileid_t *filep, int offset, struct hs_direct *hsdep) +{ + char *bufp = (char *)(filep->fi_memp + offset); + struct direct *udp = &hsdep->hs_ufs_dir; + struct hs_direntry *hdp = &hsdep->hs_dir; + uint_t ce_lbn; + uint_t ce_len; + uint_t nmlen; + uint_t i; + uchar_t c; + int ret_code = 0; + + if ((udp->d_reclen = IDE_DIR_LEN(bufp)) == 0) + return (0); + + hdp->ext_lbn = IDE_EXT_LBN(bufp); + hdp->ext_size = IDE_EXT_SIZE(bufp); + hs_dodates(HS_VOL_TYPE_ISO, hdp, bufp); + hdp->xar_len = IDE_XAR_LEN(bufp); + hdp->intlf_sz = IDE_INTRLV_SIZE(bufp); + hdp->intlf_sk = IDE_INTRLV_SKIP(bufp); + hdp->sym_link = NULL; + + udp->d_ino = hdp->ext_lbn; + + c = IDE_FLAGS(bufp); + if (IDE_REGULAR_FILE(c)) { + hdp->type = VREG; + hdp->mode = IFREG; + hdp->nlink = 1; + } else if (IDE_REGULAR_DIR(c)) { + hdp->type = VDIR; + hdp->mode = IFDIR; + hdp->nlink = 2; + } else { + printf("parse_dir(): file type=0x%x unknown.\n", c); + return ((uint_t)-1); + } + + /* Some initial conditions */ + nmlen = IDE_NAME_LEN(bufp); + c = *IDE_NAME(bufp); + /* Special Case: Current Directory */ + if (nmlen == 1 && c == '\0') { + udp->d_name[0] = '.'; + udp->d_name[1] = '\0'; + udp->d_namlen = 1; + /* Special Case: Parent Directory */ + } else if (nmlen == 1 && c == '\001') { + udp->d_name[0] = '.'; + udp->d_name[1] = '.'; + udp->d_name[2] = '\0'; + udp->d_namlen = 2; + /* Other file name */ + } else { + udp->d_namlen = 0; + for (i = 0; i < nmlen; i++) { + c = *(IDE_name(bufp)+i); + if (c == ';') + break; + else if (c == ' ') + continue; + else + udp->d_name[udp->d_namlen++] = c; + } + udp->d_name[udp->d_namlen] = '\0'; + } + /* System Use Fields */ + ce_len = IDE_SUA_LEN(bufp); + ce_lbn = 0; + if ((int)(ce_len) > 0) { + ce_lbn = parse_susp((char *)IDE_sys_use_area(bufp), &ce_len, hsdep); + while (ce_lbn) { + daddr_t save_blocknum = filep->fi_blocknum; + daddr_t save_offset = filep->fi_offset; + caddr_t save_memp = filep->fi_memp; + uint_t save_count = filep->fi_count; + +#ifdef noisy + print_io_req(filep, "parse_dir(): [I]"); +#endif /* noisy */ + + filep->fi_blocknum = hdbtodb(ce_lbn); + filep->fi_offset = 0; + filep->fi_count = ISO_SECTOR_SIZE; + +#ifdef noisy + print_io_req(filep, "parse_dir(): [0]"); +#endif /* noisy */ + + if ((filep->fi_memp = get_bcache(filep)) == 0) + ret_code = set_bcache(filep); + +#ifdef noisy + print_io_req(filep, "parse_dir(): [1]"); +#endif /* noisy */ + + if (ret_code) { + filep->fi_blocknum = save_blocknum; + filep->fi_offset = save_offset; + filep->fi_memp = save_memp; + filep->fi_count = save_count; + printf("parse_dir(): set_bcache() failed (%d)\n", + ret_code); + break; + } + ce_lbn = parse_susp(filep->fi_memp, &ce_len, hsdep); + + filep->fi_blocknum = save_blocknum; + filep->fi_offset = save_offset; + filep->fi_memp = save_memp; + filep->fi_count = save_count; + +#ifdef noisy + print_io_req(filep, "parse_dir(): [2]"); +#endif /* noisy */ + } + } + + return (udp->d_reclen); +} + +static uint_t +parse_susp(char *bufp, uint_t *ce_len, struct hs_direct *hsdep) +{ + struct direct *udp = &hsdep->hs_ufs_dir; + uchar_t *susp; + uint_t cur_off = 0; + uint_t blk_len = *ce_len; + uint_t susp_len = 0; + uint_t ce_lbn = 0; + uint_t i; + + while (cur_off < blk_len) { + susp = (uchar_t *)(bufp + cur_off); + if (susp[0] == '\0' || susp[1] == '\0') + break; + susp_len = SUF_LEN(susp); + if (susp_len == 0) + break; + for (i = 0; i < hsfs_num_sig; i++) { + if (strncmp(hsfs_sig_tab[i], (char *)susp, SUF_SIG_LEN) == 0) { +#ifdef noisy + if ((boothowto & RB_DEBUG) && (boothowto & RB_VERBOSE)) + printf(" SUSP_%c%c %d\n", susp[0], susp[1], susp_len); +#endif /* noisy */ + switch (i) { + case SUSP_SP_IX: + if (CHECK_BYTES_OK(susp)) { + sua_offset = SP_SUA_OFFSET(susp); +#ifdef lint + /* Like the man said, this may not be needed */ + i = (int)sua_offset; +#endif /* lint */ + } + break; + + case SUSP_CE_IX: + ce_lbn = CE_BLK_LOC(susp); + *ce_len = CE_CONT_LEN(susp); +#ifdef noisy + if ((boothowto & RB_DEBUG) && + (boothowto & RB_VERBOSE)) + printf("parse_susp(): " + "CE: ce_lbn = %d ce_len=%d\n", + ce_lbn, *ce_len); +#endif /* noisy */ + break; + + case SUSP_ST_IX: + printf("parse_susp(): ST: returning %d\n", ce_lbn); + return (ce_lbn); + + case RRIP_SL_IX: +#ifdef noisy + if ((boothowto & RB_DEBUG) && + (boothowto & RB_VERBOSE)) + printf("parse_susp(): ******* SL *******\n"); +#endif /* noisy */ + break; + + case RRIP_RR_IX: + break; + + case RRIP_NM_IX: + if (!RRIP_NAME_FLAGS(susp)) { + udp->d_namlen = RRIP_NAME_LEN(susp); + bcopy((char *)RRIP_name(susp), + (char *)udp->d_name, + udp->d_namlen); + udp->d_name[udp->d_namlen] = '\0'; + } + break; + } + cur_off += susp_len; + break; + } + } + if (i > hsfs_num_sig) { + printf("parse_susp(): Bad SUSP\n"); + cur_off = blk_len; + break; + } + } + return (ce_lbn); +} + +static void +hs_seti(fileid_t *filep, struct hs_direct *hsdep, ino_t inode) +{ + register struct inode *ip; + int dv = filep->fi_devp->di_dcookie; + + /* Try the inode cache first */ + if ((filep->fi_inode = get_icache(dv, inode)) != NULL) + return; + + filep->fi_inode = (struct inode *)bkmem_alloc(sizeof (struct inode)); + ip = filep->fi_inode; + bzero((char *)ip, sizeof (struct inode)); + ip->i_size = hsdep->hs_dir.ext_size; + ip->i_smode = hsdep->hs_dir.mode; + ip->i_number = inode; + ip->i_atime.tv_sec = hsdep->hs_dir.adate.tv_sec; + ip->i_atime.tv_usec = hsdep->hs_dir.adate.tv_usec; + ip->i_ctime.tv_sec = hsdep->hs_dir.cdate.tv_sec; + ip->i_ctime.tv_usec = hsdep->hs_dir.cdate.tv_usec; + ip->i_mtime.tv_sec = hsdep->hs_dir.mdate.tv_sec; + ip->i_mtime.tv_usec = hsdep->hs_dir.mdate.tv_usec; + set_icache(dv, inode, ip, sizeof (struct inode)); +} + +#ifdef noisy +static void +print_io_req(fileid_t *filep, char *str) +{ + printf("%s o=%d b=%d c=%d m=%x\n", + str, + filep->fi_offset, + filep->fi_blocknum, + filep->fi_count, + (uint_t)filep->fi_memp); +} +#endif /* noisy */ + +static int +boot_hsfs_getdents(int fd, struct dirent *dep, unsigned size) +{ + /* + * Read directory entries from the file open on "fd" into the + * "size"-byte buffer at "dep" until the buffer is exhausted + * or we reach EOF on the directory. Returns the number of + * entries read. + */ + int n; + int cnt = 0; + struct dirinfo dir; + struct hs_direct *hdp; + unsigned long oldoff, oldblok; + +#define SLOP (sizeof (struct dirent) - (int)&((struct dirent *)0)->d_name[1]) + + if (!(dir.fi = find_fp(fd)) || + ((dir.fi->fi_inode->i_smode & IFMT) != IFDIR)) { + /* + * Bogus file descriptor, bail out now! + */ + return (-1); + } + + oldoff = dir.loc = dir.fi->fi_offset; + oldblok = dir.fi->fi_blocknum; + + for (hdp = readdir(&dir); hdp; hdp = readdir(&dir)) { + /* + * Compute name length and break loop if there's not + * enough space in the output buffer for the next + * entry. + * + * NOTE: "SLOP" is the number of bytes inserted into the dirent + * struct's "d_name" field by the compiler to preserve + * alignment. + */ + n = strlen(hdp->hs_ufs_dir.d_name); + n = roundup((sizeof (struct dirent) + ((n > SLOP) ? n : 0)), + sizeof (off_t)); + + if (n > size) { + dir.fi->fi_blocknum = oldblok; + dir.fi->fi_offset = oldoff; + break; + } + + oldblok = dir.fi->fi_blocknum; + oldoff = dir.loc; + size -= n; + cnt += 1; + + (void) strcpy(dep->d_name, hdp->hs_ufs_dir.d_name); + dep->d_ino = hdp->hs_ufs_dir.d_ino; + dep->d_off = dir.loc; + dep->d_reclen = (unsigned short)n; + + dep = (struct dirent *)((char *)dep + n); + } + +#undef SLOP + + return (cnt); +} + +static void +hs_dodates(enum hs_vol_type type, struct hs_direntry *hdp, char *bufp) +{ + if (type == HS_VOL_TYPE_HS) { + hs_parse_dirdate(HDE_cdate(bufp), &hdp->cdate); + hs_parse_dirdate(HDE_cdate(bufp), &hdp->adate); + hs_parse_dirdate(HDE_cdate(bufp), &hdp->mdate); + } else if (type == HS_VOL_TYPE_ISO) { + hs_parse_dirdate(IDE_cdate(bufp), &hdp->cdate); + hs_parse_dirdate(IDE_cdate(bufp), &hdp->adate); + hs_parse_dirdate(IDE_cdate(bufp), &hdp->mdate); + } else + prom_panic("hs_dodates: bad volume type"); +} + +/* + * hs_parse_dirdate + * + * Parse the short 'directory-format' date into a Unix timeval. + * This is the date format used in Directory Entries. + * + * If the date is not representable, make something up. + */ +void +hs_parse_dirdate(uchar_t *dp, struct timeval *tvp) +{ + int year, month, day, hour, minute, sec, gmtoff; + + year = HDE_DATE_YEAR(dp); + month = HDE_DATE_MONTH(dp); + day = HDE_DATE_DAY(dp); + hour = HDE_DATE_HOUR(dp); + minute = HDE_DATE_MIN(dp); + sec = HDE_DATE_SEC(dp); + gmtoff = HDE_DATE_GMTOFF(dp); + + tvp->tv_usec = 0; + if (year < THE_EPOCH) { + tvp->tv_sec = 0; + } else { + tvp->tv_sec = hs_date_to_gmtime(year, month, day, gmtoff); + if (tvp->tv_sec != -1) { + tvp->tv_sec += ((hour * 60) + minute) * 60 + sec; + } + } + + return; + +} + +/* + * hs_parse_longdate + * + * Parse the long 'user-oriented' date into a Unix timeval. + * This is the date format used in the Volume Descriptor. + * + * If the date is not representable, make something up. + */ +void +hs_parse_longdate(uchar_t *dp, struct timeval *tvp) +{ + int year, month, day, hour, minute, sec, gmtoff; + + year = HSV_DATE_YEAR(dp); + month = HSV_DATE_MONTH(dp); + day = HSV_DATE_DAY(dp); + hour = HSV_DATE_HOUR(dp); + minute = HSV_DATE_MIN(dp); + sec = HSV_DATE_SEC(dp); + gmtoff = HSV_DATE_GMTOFF(dp); + + tvp->tv_usec = 0; + if (year < THE_EPOCH) { + tvp->tv_sec = 0; + } else { + tvp->tv_sec = hs_date_to_gmtime(year, month, day, gmtoff); + if (tvp->tv_sec != -1) { + tvp->tv_sec += ((hour * 60) + minute) * 60 + sec; + tvp->tv_usec = HSV_DATE_HSEC(dp) * 10000; + } + } + +} + +/* cumulative number of seconds per month, non-leap and leap-year versions */ +static time_t cum_sec[] = { + 0x0, 0x28de80, 0x4dc880, 0x76a700, 0x9e3400, 0xc71280, + 0xee9f80, 0x1177e00, 0x1405c80, 0x167e980, 0x190c800, 0x1b85500 +}; +static time_t cum_sec_leap[] = { + 0x0, 0x28de80, 0x4f1a00, 0x77f880, 0x9f8580, 0xc86400, + 0xeff100, 0x118cf80, 0x141ae00, 0x1693b00, 0x1921980, 0x1b9a680 +}; +#define SEC_PER_DAY 0x15180 +#define SEC_PER_YEAR 0x1e13380 + +/* + * hs_date_to_gmtime + * + * Convert year(1970-2099)/month(1-12)/day(1-31) to seconds-since-1970/1/1. + * + * Returns -1 if the date is out of range. + */ +static time_t +hs_date_to_gmtime(int year, int mon, int day, int gmtoff) +{ + time_t sum; + time_t *cp; + int y; + + if ((year < THE_EPOCH) || (year > END_OF_TIME) || + (mon < 1) || (mon > 12) || + (day < 1) || (day > 31)) + return (-1); + + /* + * Figure seconds until this year and correct for leap years. + * Note: 2000 is a leap year but not 2100. + */ + y = year - THE_EPOCH; + sum = y * SEC_PER_YEAR; + sum += ((y + 1) / 4) * SEC_PER_DAY; + /* + * Point to the correct table for this year and + * add in seconds until this month. + */ + cp = ((y + 2) % 4) ? cum_sec : cum_sec_leap; + sum += cp[mon - 1]; + /* + * Add in seconds until 0:00 of this day. + * (days-per-month validation is not done here) + */ + sum += (day - 1) * SEC_PER_DAY; + sum -= (gmtoff * 15 * 60); + return (sum); +} diff --git a/usr/src/stand/lib/fs/hsfs/llib-lhsfs b/usr/src/stand/lib/fs/hsfs/llib-lhsfs new file mode 100644 index 0000000000..e4b8bfaf8f --- /dev/null +++ b/usr/src/stand/lib/fs/hsfs/llib-lhsfs @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <sys/boothsfs.h> diff --git a/usr/src/stand/lib/fs/nfs/Makefile b/usr/src/stand/lib/fs/nfs/Makefile new file mode 100644 index 0000000000..6ade176337 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/Makefile @@ -0,0 +1,61 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = libnfs.a +LOCOBJS = auth_none.o auth_unix.o bootparams.o getdents.o \ + getdents3.o getdents4.o lookup.o mount.o \ + nfsops.o nfs2ops.o nfs3ops.o nfs4ops.o pathname.o pmap.o rpc.o \ + clnt_budp.o clnt_btcp.o nfs_xdr.o nfs3_xdr.o nfs4_xdr.o xdr_rec.o + +RPC_CMNOBJS = rpc_prot.o +CMNOBJS = bootparam_xdr.o +OBJECTS = $(LOCOBJS) $(RPC_CMNOBJS) $(CMNOBJS) + +include ../Makefile.com + +RPC_CMNDIR = $(TOPDIR)/uts/common/rpc +CMNDIR = $(TOPDIR)/uts/common/fs/nfs +SRCS = $(LOCOBJS:%.o=$(SRCDIR)/%.c) $(RPC_CMNOBJS:%.o=$(RPC_CMNDIR)/%.c) \ + $(CMNOBJS:%.o=$(CMNDIR)/%.c) + +LDLIBS += -linet -lsock -lxdr +CPPFLAGS += $(SOCKCPPFLAGS) $(DHCPCPPFLAGS) -I../../inet +CPPFLAGS += -I$(STANDDIR)/lib/sa -I$(TOPDIR)/head + +# +# This is really wrong, but we have no choice since <rpc/*.h> needs to +# resolve types that are in <sys/stream.h>. Thankfully, we don't use +# anything from libsock.a that relies on the definition of an mblk_t. +# +CPPFLAGS += -U_SYS_STREAM_H + +objs/%.o: $(RPC_CMNDIR)/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +include ../../Makefile.targ diff --git a/usr/src/stand/lib/fs/nfs/auth_inet.h b/usr/src/stand/lib/fs/nfs/auth_inet.h new file mode 100644 index 0000000000..c64a695c75 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/auth_inet.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1997-1999 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#ifndef _AUTH_INET_H +#define _AUTH_INET_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +extern struct opaque_auth _null_auth; +extern AUTH *authnone_create(void); +extern AUTH *authunix_create(char *, uid_t, gid_t, int, gid_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _AUTH_INET_H */ diff --git a/usr/src/stand/lib/fs/nfs/auth_none.c b/usr/src/stand/lib/fs/nfs/auth_none.c new file mode 100644 index 0000000000..efd18d3bc8 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/auth_none.c @@ -0,0 +1,131 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from SunOS 4.1 */ +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * modified for use by the boot program. + * + * auth_none.c + * Creates a client authentication handle for passing "null" + * credentials and verifiers to remote systems. + */ + +#include <rpc/types.h> +#include <rpc/xdr.h> +#include <rpc/auth.h> +#include "clnt.h" + +#define MAX_MARSHEL_SIZE 20 + +static struct auth_ops *authnone_ops(); + +static struct authnone_private { + AUTH no_client; + char marshalled_client[MAX_MARSHEL_SIZE]; + uint_t mcnt; +} *authnone_private; + +static struct authnone_private authnone_local; + +AUTH * +authnone_create(void) +{ + struct authnone_private *ap = authnone_private; + XDR xdr_stream; + XDR *xdrs; + + if (ap == 0) { + ap = &authnone_local; + authnone_private = ap; + } + if (!ap->mcnt) { + ap->no_client.ah_cred = ap->no_client.ah_verf = _null_auth; + ap->no_client.ah_ops = authnone_ops(); + xdrs = &xdr_stream; + xdrmem_create(xdrs, ap->marshalled_client, + (uint_t)MAX_MARSHEL_SIZE, XDR_ENCODE); + (void) xdr_opaque_auth(xdrs, &ap->no_client.ah_cred); + (void) xdr_opaque_auth(xdrs, &ap->no_client.ah_verf); + ap->mcnt = XDR_GETPOS(xdrs); + XDR_DESTROY(xdrs); + } + return (&ap->no_client); +} + +/*ARGSUSED*/ +static bool_t +authnone_marshal(AUTH *client, XDR *xdrs, struct cred *cr) +{ + struct authnone_private *ap = authnone_private; + + if (ap == 0) + return (0); + return ((*xdrs->x_ops->x_putbytes)(xdrs, + ap->marshalled_client, ap->mcnt)); +} + +/* ARGSUSED */ +static void +authnone_verf(AUTH *foo) +{ +} + +/* ARGSUSED */ +static bool_t +authnone_validate(AUTH *foo, struct opaque_auth *bar) +{ + return (TRUE); +} + +/* ARGSUSED */ +static bool_t +authnone_refresh(AUTH *foo, struct rpc_msg *bar, cred_t *cr) +{ + return (FALSE); +} + +/* ARGSUSED */ +static void +authnone_destroy(AUTH *foo) +{ +} + +static struct auth_ops * +authnone_ops(void) +{ + static struct auth_ops ops; + + if (ops.ah_nextverf == NULL) { + ops.ah_nextverf = authnone_verf; + ops.ah_marshal = authnone_marshal; + ops.ah_validate = authnone_validate; + ops.ah_refresh = authnone_refresh; + ops.ah_destroy = authnone_destroy; + } + return (&ops); +} diff --git a/usr/src/stand/lib/fs/nfs/auth_unix.c b/usr/src/stand/lib/fs/nfs/auth_unix.c new file mode 100644 index 0000000000..1f28656a9a --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/auth_unix.c @@ -0,0 +1,304 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Adapted for use by the boot program. + * + * auth_unix.c, Implements UNIX style authentication parameters. + * + * The system is very weak. The client uses no encryption for its + * credentials and only sends null verifiers. The server sends backs + * null verifiers or optionally a verifier that suggests a new short hand + * for the credentials. + */ + +#include <stdlib.h> +#include <sys/sysmacros.h> +#include <rpc/types.h> +#include <rpc/xdr.h> +#include <rpc/auth.h> +#include "clnt.h" +#include <rpc/auth_unix.h> +#include <sys/promif.h> +#include <sys/salib.h> +#include <sys/bootdebug.h> +#include "nfs_inet.h" + +static struct auth_ops *authunix_ops(); +/* + * This struct is pointed to by the ah_private field of an auth_handle. + */ +struct audata { + struct opaque_auth au_origcred; /* original credentials */ + struct opaque_auth au_shcred; /* short hand cred */ + uint_t au_shfaults; /* short hand cache faults */ + char au_marshed[MAX_AUTH_BYTES]; + uint_t au_mpos; /* xdr pos at end of marshed */ +}; +#define AUTH_PRIVATE(auth) ((struct audata *)auth->ah_private) + +static void marshal_new_auth(AUTH *); + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * Create a unix style authenticator. + * Returns an auth handle with the given stuff in it. + */ +AUTH * +authunix_create(char *machname, uid_t uid, gid_t gid, int len, gid_t *aup_gids) +{ + struct authunix_parms aup; + char mymem[MAX_AUTH_BYTES]; + XDR xdrs; + AUTH *auth; + struct audata *au; + + /* + * Allocate and set up auth handle + */ + auth = (AUTH *) bkmem_alloc(sizeof (*auth)); + if (auth == NULL) { + prom_panic("authunix_create: Cannot allocate memory."); + return (NULL); + } + + au = (struct audata *)bkmem_alloc(sizeof (*au)); + if (au == NULL) { + prom_panic("authunix_create: Cannot allocate memory."); + return (NULL); + } + + /* setup authenticator. */ + auth->ah_ops = authunix_ops(); + auth->ah_private = (caddr_t)au; + + /* structure copies */ + auth->ah_verf = au->au_shcred = _null_auth; + + au->au_shfaults = 0; + + /* + * fill in param struct from the given params + */ + aup.aup_time = prom_gettime() / 1000; + aup.aup_machname = machname; + aup.aup_uid = uid; + aup.aup_gid = gid; + aup.aup_len = (uint_t)len; + aup.aup_gids = (gid_t *)aup_gids; + + /* + * Serialize the parameters into origcred + */ + xdrmem_create(&xdrs, mymem, MAX_AUTH_BYTES, XDR_ENCODE); + if (!xdr_authunix_parms(&xdrs, &aup)) { + prom_panic("authunix_create: xdr_authunix_parms failed"); + bkmem_free(auth->ah_private, sizeof (struct audata)); + bkmem_free((caddr_t)auth, sizeof (*auth)); + return ((AUTH *)0); + } + au->au_origcred.oa_length = len = XDR_GETPOS(&xdrs); + au->au_origcred.oa_flavor = (uint_t)AUTH_UNIX; + if ((au->au_origcred.oa_base = bkmem_alloc((uint_t)len)) == NULL) { + prom_panic("authunix_create: memory alloc failed"); + bkmem_free(auth->ah_private, sizeof (struct audata)); + bkmem_free((caddr_t)auth, sizeof (*auth)); + return ((AUTH *)0); + } + (void) bcopy(mymem, au->au_origcred.oa_base, (uint_t)len); + + /* + * set auth handle to reflect new cred. + */ + auth->ah_cred = au->au_origcred; + marshal_new_auth(auth); + return (auth); +} + +/* + * authunix operations + */ + +/* ARGSUSED */ +static void +authunix_nextverf(AUTH *auth) +{ +} + +/* ARGSUSED */ +static bool_t +authunix_marshal(AUTH *auth, XDR *xdrs, cred_t *cr) +{ + struct audata *au = AUTH_PRIVATE(auth); + + return (XDR_PUTBYTES(xdrs, au->au_marshed, au->au_mpos)); +} + +static bool_t +authunix_validate(AUTH *auth, struct opaque_auth *verf) +{ + struct audata *au; + XDR xdrs; + + if (verf->oa_flavor == AUTH_SHORT) { + au = AUTH_PRIVATE(auth); + + + xdrmem_create(&xdrs, verf->oa_base, verf->oa_length, + XDR_DECODE); + + if (xdr_opaque_auth(&xdrs, &au->au_shcred)) { + auth->ah_cred = au->au_shcred; + } else { + xdrs.x_op = XDR_FREE; + (void) xdr_opaque_auth(&xdrs, &au->au_shcred); + au->au_shcred.oa_base = 0; + auth->ah_cred = au->au_origcred; + } + marshal_new_auth(auth); + } + + return (TRUE); +} + +/*ARGSUSED*/ +static bool_t +authunix_refresh(AUTH *auth, struct rpc_msg *msg, cred_t *cr) +{ + struct audata *au = AUTH_PRIVATE(auth); + struct authunix_parms aup; + XDR xdrs; + int stat; + + if (auth->ah_cred.oa_base == au->au_origcred.oa_base) { + /* there is no hope. Punt */ + return (FALSE); + } + au->au_shfaults ++; + + /* first deserialize the creds back into a struct authunix_parms */ + aup.aup_machname = (char *)0; + aup.aup_gids = (gid_t *)0; + xdrmem_create(&xdrs, au->au_origcred.oa_base, + au->au_origcred.oa_length, XDR_DECODE); + stat = xdr_authunix_parms(&xdrs, &aup); + if (!stat) + goto done; + + /* update the time and serialize in place */ + aup.aup_time = (prom_gettime() / 1000); + xdrs.x_op = XDR_ENCODE; + XDR_SETPOS(&xdrs, 0); + stat = xdr_authunix_parms(&xdrs, &aup); + if (!stat) + goto done; + auth->ah_cred = au->au_origcred; + marshal_new_auth(auth); +done: + /* free the struct authunix_parms created by deserializing */ + xdrs.x_op = XDR_FREE; + (void) xdr_authunix_parms(&xdrs, &aup); + XDR_DESTROY(&xdrs); + return (stat); +} + +static void +authunix_destroy(AUTH *auth) +{ + struct audata *au = AUTH_PRIVATE(auth); + + if (au->au_shcred.oa_base != NULL) + bkmem_free(au->au_shcred.oa_base, au->au_shcred.oa_length); + bkmem_free(auth->ah_private, sizeof (struct audata)); + if (auth->ah_verf.oa_base != NULL) + bkmem_free(auth->ah_verf.oa_base, auth->ah_verf.oa_length); + bkmem_free((caddr_t)auth, sizeof (*auth)); +} + +/* + * Marshals (pre-serializes) an auth struct. + * sets private data, au_marshed and au_mpos + */ +static void +marshal_new_auth(AUTH *auth) +{ + XDR xdr_stream; + XDR *xdrs = &xdr_stream; + struct audata *au = AUTH_PRIVATE(auth); + + xdrmem_create(xdrs, au->au_marshed, MAX_AUTH_BYTES, XDR_ENCODE); + if ((!xdr_opaque_auth(xdrs, &(auth->ah_cred))) || + (!xdr_opaque_auth(xdrs, &(auth->ah_verf)))) { + dprintf("marshal_new_auth - Fatal marshalling problem"); + } else { + au->au_mpos = XDR_GETPOS(xdrs); + } + XDR_DESTROY(xdrs); +} + + +static struct auth_ops * +authunix_ops(void) +{ + static struct auth_ops ops; + + if (ops.ah_nextverf == 0) { + ops.ah_nextverf = authunix_nextverf; + ops.ah_marshal = authunix_marshal; + ops.ah_validate = authunix_validate; + ops.ah_refresh = authunix_refresh; + ops.ah_destroy = authunix_destroy; + } + return (&ops); +} + +/* + * XDR for unix authentication parameters. + */ +bool_t +xdr_authunix_parms(XDR *xdrs, struct authunix_parms *p) +{ + if (xdr_u_int(xdrs, &(p->aup_time)) && + xdr_string(xdrs, &(p->aup_machname), MAX_MACHINE_NAME) && + xdr_int(xdrs, (int *)&(p->aup_uid)) && + xdr_int(xdrs, (int *)&(p->aup_gid)) && + xdr_array(xdrs, (caddr_t *)&(p->aup_gids), + &(p->aup_len), NGRPS, sizeof (int), xdr_int)) { + return (TRUE); + } + return (FALSE); +} diff --git a/usr/src/stand/lib/fs/nfs/bootparams.c b/usr/src/stand/lib/fs/nfs/bootparams.c new file mode 100644 index 0000000000..c2f7d12767 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/bootparams.c @@ -0,0 +1,366 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This file contains routines responsible for getting the system's + * name and boot params. Most of it comes from the SVR4 diskless boot + * code (dlboot_inet), modified to work in a non socket environment. + */ + +#include <sys/types.h> +#include <rpc/types.h> +#include <sys/errno.h> +#include <rpc/auth.h> +#include <rpc/xdr.h> +#include <rpc/rpc_msg.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <rpc/rpc.h> +#include <sys/utsname.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <sys/promif.h> +#include <rpcsvc/bootparam.h> +#include "pmap.h" +#include "brpc.h" +#include "socket_inet.h" +#include "ipv4.h" +#include <sys/salib.h> +#include <sys/bootdebug.h> + +extern int errno; +static struct bp_whoami_res bp; +static char bp_hostname[SYS_NMLN+1]; +static char bp_domainname[SYS_NMLN+1]; +static struct in_addr responder; /* network order */ + +static const char *noserver = + "No bootparam (%s) server responding; still trying...\n"; + +#define GETFILE_BTIMEO 1 +#define GETFILE_BRETRIES 2 + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * Returns TRUE if it has set the global structure 'bp' to our boot + * parameters, FALSE if some failure occurred. + */ +bool_t +whoami(void) +{ + struct bp_whoami_arg arg; + struct sockaddr_in to, from; + struct in_addr ipaddr; + enum clnt_stat stat; + bool_t retval = TRUE; + int rexmit; /* retransmission interval */ + int resp_wait; /* secs to wait for resp */ + int namelen; + int printed_waiting_msg; + + /* + * Set our destination IP address to the limited broadcast address + * (INADDR_BROADCAST). + */ + to.sin_family = AF_INET; + to.sin_addr.s_addr = htonl(INADDR_BROADCAST); + to.sin_port = htons(0); + + /* + * Set up the arguments expected by bootparamd. + */ + arg.client_address.address_type = IP_ADDR_TYPE; + ipv4_getipaddr(&ipaddr); + ipaddr.s_addr = htonl(ipaddr.s_addr); + bcopy((caddr_t)&ipaddr, + (caddr_t)&arg.client_address.bp_address_u.ip_addr, + sizeof (ipaddr)); + + /* + * Retransmit/wait for up to resp_wait secs. + */ + rexmit = 0; /* start at default retransmission interval. */ + resp_wait = 16; + + bp.client_name = &bp_hostname[0]; + bp.domain_name = &bp_domainname[0]; + + /* + * Do a broadcast call to find a bootparam daemon that + * will tell us our hostname, domainname and any + * router that we have to use to talk to our NFS server. + */ + printed_waiting_msg = 0; + do { + /* + * First try the SunOS portmapper and if no reply is + * received will then try the SVR4 rpcbind. + * Either way, `bootpaddr' will be set to the + * correct address for the bootparamd that responds. + */ + stat = bpmap_rmtcall((rpcprog_t)BOOTPARAMPROG, + (rpcvers_t)BOOTPARAMVERS, (rpcproc_t)BOOTPARAMPROC_WHOAMI, + xdr_bp_whoami_arg, (caddr_t)&arg, + xdr_bp_whoami_res, (caddr_t)&bp, rexmit, resp_wait, + &to, &from, AUTH_NONE); + if (stat == RPC_TIMEDOUT && !printed_waiting_msg) { + dprintf(noserver, "whoami"); + printed_waiting_msg = 1; + } + /* + * Retransmission interval for second and subsequent tries. + * We expect first bpmap_rmtcall to retransmit and backoff to + * at least this value. + */ + rexmit = resp_wait; + resp_wait = 0; /* go to default wait now. */ + } while (stat == RPC_TIMEDOUT); + + if (stat != RPC_SUCCESS) { + dprintf("whoami RPC call failed with rpc status: %d\n", stat); + retval = FALSE; + goto done; + } else { + if (printed_waiting_msg && (boothowto & RB_VERBOSE)) + printf("Bootparam response received\n"); + + /* Cache responder... We'll send our getfile here... */ + responder.s_addr = from.sin_addr.s_addr; + } + + namelen = strlen(bp.client_name); + if (namelen > SYS_NMLN) { + dprintf("whoami: hostname too long"); + retval = FALSE; + goto done; + } + if (namelen > 0) { + if (boothowto & RB_VERBOSE) + printf("hostname: %s\n", bp.client_name); + sethostname(bp.client_name, namelen); + } else { + dprintf("whoami: no host name\n"); + retval = FALSE; + goto done; + } + + namelen = strlen(bp.domain_name); + if (namelen > SYS_NMLN) { + dprintf("whoami: domainname too long"); + retval = FALSE; + goto done; + } + if (namelen > 0) + if (boothowto & RB_VERBOSE) + printf("domainname: %s\n", bp.domain_name); + else + dprintf("whoami: no domain name\n"); + + if (bp.router_address.address_type == IP_ADDR_TYPE) { + bcopy((caddr_t)&bp.router_address.bp_address_u.ip_addr, + (caddr_t)&ipaddr, sizeof (ipaddr)); + if (ntohl(ipaddr.s_addr) != INADDR_ANY) { + dprintf("whoami: Router ip is: %s\n", + inet_ntoa(ipaddr)); + /* ipv4_route expects IP addresses in network order */ + (void) ipv4_route(IPV4_ADD_ROUTE, RT_DEFAULT, NULL, + &ipaddr); + } + } else + dprintf("whoami: unknown gateway addr family %d\n", + bp.router_address.address_type); +done: + return (retval); +} + +/* + * Returns: + * 1) The ascii form of our root servers name in `server_name'. + * 2) Pathname of our root on the server in `server_path'. + * + * NOTE: it's ok for getfile() to do dynamic allocation - it's only + * used locally, then freed. If the server address returned from the + * getfile call is different from our current destination address, + * reset destination IP address to the new value. + */ +bool_t +getfile(char *fileid, char *server_name, struct in_addr *server_ip, + char *server_path) +{ + struct bp_getfile_arg arg; + struct bp_getfile_res res; + enum clnt_stat stat; + struct sockaddr_in to, from; + int rexmit; + int wait; + uint_t max_retries = 0xFFFFFFFF; + int def_rexmit = 0; + int def_wait = 32; + int printed_waiting_msg; + + /* + * For non-root requests, set a smaller timeout + */ + if (strcmp(fileid, "root") != 0) { + /* + * Only send one request per call + */ + def_wait = GETFILE_BTIMEO; + def_rexmit = GETFILE_BTIMEO; + max_retries = GETFILE_BRETRIES; + } + + arg.client_name = bp.client_name; + arg.file_id = fileid; + + res.server_name = (bp_machine_name_t)bkmem_zalloc(SYS_NMLN + 1); + res.server_path = (bp_path_t)bkmem_zalloc(SYS_NMLN + 1); + + if (res.server_name == NULL || res.server_path == NULL) { + dprintf("getfile: rpc_call failed: No memory\n"); + errno = ENOMEM; + if (res.server_name != NULL) + bkmem_free(res.server_name, SYS_NMLN + 1); + if (res.server_path != NULL) + bkmem_free(res.server_path, SYS_NMLN + 1); + return (FALSE); + } + + to.sin_family = AF_INET; + to.sin_addr.s_addr = responder.s_addr; + to.sin_port = htons(0); + + /* + * Our addressing information was filled in by the call to + * whoami(), so now send an rpc message to the + * bootparam daemon requesting our server information. + * + * Wait only 32 secs for rpc_call to succeed. + */ + rexmit = def_rexmit; + wait = def_wait; + + stat = brpc_call((rpcprog_t)BOOTPARAMPROG, (rpcvers_t)BOOTPARAMVERS, + (rpcproc_t)BOOTPARAMPROC_GETFILE, xdr_bp_getfile_arg, (caddr_t)&arg, + xdr_bp_getfile_res, (caddr_t)&res, rexmit, wait, + &to, &from, AUTH_NONE); + + if (stat == RPC_TIMEDOUT) { + /* + * The server that answered the whoami doesn't + * answer our getfile. Broadcast the call to all. Keep + * trying forever. Set up for limited broadcast. + */ + to.sin_addr.s_addr = htonl(INADDR_BROADCAST); + to.sin_port = htons(0); + + rexmit = def_rexmit; /* use default rexmit interval */ + wait = def_wait; + printed_waiting_msg = 0; + do { + /* + * Limit the number of retries + */ + if (max_retries-- == 0) + break; + + stat = bpmap_rmtcall((rpcprog_t)BOOTPARAMPROG, + (rpcvers_t)BOOTPARAMVERS, + (rpcproc_t)BOOTPARAMPROC_GETFILE, + xdr_bp_getfile_arg, (caddr_t)&arg, + xdr_bp_getfile_res, (caddr_t)&res, rexmit, + wait, &to, &from, AUTH_NONE); + + if (stat == RPC_SUCCESS) { + /* + * set our destination addresses to + * those of the server that responded. + * It's probably our server, and we + * can thus save arping for no reason later. + */ + responder.s_addr = from.sin_addr.s_addr; + if (printed_waiting_msg && + (boothowto & RB_VERBOSE)) { + printf( + "Bootparam response received.\n"); + } + break; + } + if (stat == RPC_TIMEDOUT && !printed_waiting_msg) { + dprintf(noserver, "getfile"); + printed_waiting_msg = 1; + } + /* + * Retransmission interval for second and + * subsequent tries. We expect first bpmap_rmtcall + * to retransmit and backoff to at least this + * value. + */ + rexmit = wait; + wait = def_wait; + } while (stat == RPC_TIMEDOUT); + } + + if (stat == RPC_SUCCESS) { + /* got the goods */ + bcopy(res.server_name, server_name, strlen(res.server_name)); + bcopy(res.server_path, server_path, strlen(res.server_path)); + switch (res.server_address.address_type) { + case IP_ADDR_TYPE: + /* + * server_address is where we will get our root + * from. Replace destination entries in address if + * necessary. + */ + bcopy((caddr_t)&res.server_address.bp_address_u.ip_addr, + (caddr_t)server_ip, sizeof (struct in_addr)); + break; + default: + dprintf("getfile: unknown address type %d\n", + res.server_address.address_type); + server_ip->s_addr = htonl(INADDR_ANY); + bkmem_free(res.server_name, SYS_NMLN + 1); + bkmem_free(res.server_path, SYS_NMLN + 1); + return (FALSE); + } + } else { + dprintf("getfile: rpc_call failed.\n"); + bkmem_free(res.server_name, SYS_NMLN + 1); + bkmem_free(res.server_path, SYS_NMLN + 1); + return (FALSE); + } + + bkmem_free(res.server_name, SYS_NMLN + 1); + bkmem_free(res.server_path, SYS_NMLN + 1); + + return (TRUE); +} diff --git a/usr/src/stand/lib/fs/nfs/brpc.h b/usr/src/stand/lib/fs/nfs/brpc.h new file mode 100644 index 0000000000..e32f989abe --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/brpc.h @@ -0,0 +1,52 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _BRPC_H +#define _BRPC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/rpc.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define RPC_ALLOWABLE_ERRORS (10) /* Threshold on receiving bad results */ +#define RPC_REXMIT_MSEC (500) /* default 1/2 second retransmissions */ +#define RPC_RCVWAIT_MSEC (20000) /* default response waittime */ + +extern enum clnt_stat brpc_call(rpcprog_t, rpcvers_t, rpcproc_t, xdrproc_t, + caddr_t, xdrproc_t, caddr_t, int, int, struct sockaddr_in *, + struct sockaddr_in *, uint_t); + +extern void rpc_disperr(struct rpc_err *stat); + +#ifdef __cplusplus +} +#endif + +#endif /* _BRPC_H */ diff --git a/usr/src/stand/lib/fs/nfs/clnt.h b/usr/src/stand/lib/fs/nfs/clnt.h new file mode 100644 index 0000000000..3101cfdaa6 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/clnt.h @@ -0,0 +1,236 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * clnt.h - Client side remote procedure call interface. + * Stripped down sockets based client for boot. + */ + +#ifndef _RPC_CLNT_H +#define _RPC_CLNT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <rpc/clnt_stat.h> +#include <rpc/auth.h> +#include <netinet/in.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Error info. + */ +struct rpc_err { + enum clnt_stat re_status; + union { + int RE_errno; /* realated system error */ + enum auth_stat RE_why; /* why the auth error occurred */ + } ru; +#define re_errno ru.RE_errno +#define re_why ru.RE_why +}; + + +/* + * Client rpc handle. + * Created by individual implementations, see e.g. rpc_udp.c. + * Client is responsible for initializing auth, see e.g. auth_none.c. + */ +typedef struct __client { + AUTH *cl_auth; /* authenticator */ + struct clnt_ops { + /* call remote procedure */ + enum clnt_stat (*cl_call)(struct __client *, rpcproc_t, + xdrproc_t, caddr_t, xdrproc_t, + caddr_t, struct timeval); + /* abort a call */ + void (*cl_abort)(/* various */); + /* get specific error code */ + void (*cl_geterr)(struct __client *, + struct rpc_err *); + /* frees results */ + bool_t (*cl_freeres)(struct __client *, xdrproc_t, + caddr_t); + /* destroy this structure */ + void (*cl_destroy)(struct __client *); + /* the ioctl() of rpc */ + bool_t (*cl_control)(struct __client *, int, char *); + } *cl_ops; + caddr_t cl_private; /* private stuff */ +} CLIENT; + + +/* + * client side rpc interface ops + * + * Parameter types are: + * + */ + +/* + * enum clnt_stat + * CLNT_CALL(rh, proc, xargs, argsp, xres, resp, timeout) + * CLIENT *rh; + * ulong_t proc; + * xdrproc_t xargs; + * caddr_t argsp; + * xdrproc_t xres; + * caddr_t resp; + * struct timeval timeout; + */ +#define CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs) \ + ((*(rh)->cl_ops->cl_call)(rh, proc, xargs, argsp, xres, resp, secs)) + +/* + * void + * CLNT_ABORT(rh); + * CLIENT *rh; + */ +#define CLNT_ABORT(rh) ((*(rh)->cl_ops->cl_abort)(rh)) + +/* + * struct rpc_err + * CLNT_GETERR(rh); + * CLIENT *rh; + */ +#define CLNT_GETERR(rh, errp) ((*(rh)->cl_ops->cl_geterr)(rh, errp)) + +/* + * bool_t + * CLNT_FREERES(rh, xres, resp); + * CLIENT *rh; + * xdrproc_t xres; + * caddr_t resp; + */ +#define CLNT_FREERES(rh, xres, resp) ((*(rh)->cl_ops->cl_freeres)\ + (rh, xres, resp)) + +/* + * bool_t + * CLNT_CONTROL(cl, request, info) + * CLIENT *cl; + * uint_t request; + * char *info; + */ +#define CLNT_CONTROL(cl, rq, in) ((*(cl)->cl_ops->cl_control)(cl, rq, in)) + +/* + * control operations that apply to both udp and tcp transports + */ +#define CLSET_TIMEOUT 1 /* set timeout (timeval) */ +#define CLGET_TIMEOUT 2 /* get timeout (timeval) */ +#define CLGET_SERVER_ADDR 3 /* get server's address (sockaddr) */ +#define CLGET_FD 6 /* get connections file descriptor */ +#define CLSET_FD_CLOSE 8 /* close fd while clnt_destroy */ +#define CLSET_FD_NCLOSE 9 /* Do not close fd while clnt_destroy */ +/* + * udp only control operations + */ +#define CLSET_RETRY_TIMEOUT 4 /* set retry timeout (timeval) */ +#define CLGET_RETRY_TIMEOUT 5 /* get retry timeout (timeval) */ + +/* + * void + * CLNT_DESTROY(rh); + * CLIENT *rh; + */ +#define CLNT_DESTROY(rh) ((*(rh)->cl_ops->cl_destroy)(rh)) + +/* + * By convention, procedure 0 takes null arguments and returns them + */ + +#define NULLPROC ((ulong_t)0) + +/* + * Below are the client handle creation routines for the various + * implementations of client side rpc. They can return NULL if a + * creation failure occurs. + */ + +/* + * UDP based rpc. + * CLIENT * + * clntbudp_create(raddr, program, version, wait, sockp) + * struct sockaddr_in *raddr; + * ulong_t program; + * ulong_t version; + * struct timeval wait; + * int *sockp; + * + * Same as above, but you specify max packet sizes. + * CLIENT * + * clntbudp_bufcreate(raddr, program, version, wait, sockp, sendsz, recvsz) + * struct sockaddr_in *raddr; + * ulong_t program; + * ulong_t version; + * struct timeval wait; + * int *sockp; + * uint_t sendsz; + * uint_t recvsz; + */ +extern CLIENT *clntbudp_create(struct sockaddr_in *raddr, rpcprog_t program, + rpcvers_t version, struct timeval wait, + int *sockp); +extern CLIENT *clntbudp_bufcreate(struct sockaddr_in *raddr, rpcprog_t program, + rpcvers_t version, struct timeval wait, + int *sockp, uint_t sendsz, uint_t recvsz); + +/* + * TCP based rpc. + * CLIENT * + * clntbtcp_create(raddr, program, version, wait, sockp, sendsz, recvsz) + * struct sockaddr_in *raddr; + * ulong_t program; + * ulong_t version; + * struct timeval wait; + * int *sockp; + * uint_t sendsz; + * uint_t recvsz; + * + */ +extern CLIENT *clntbtcp_create(struct sockaddr_in *raddr, rpcprog_t program, + rpcvers_t version, struct timeval wait, + int *sockp, uint_t sendsz, uint_t recvsz); +/* + * If a creation fails, the following allows the user to figure out why. + */ +struct rpc_createerr { + enum clnt_stat cf_stat; + struct rpc_err cf_error; /* useful when cf_stat == RPC_PMAPFAILURE */ +}; + +extern struct rpc_createerr rpc_createerr; + +#define UDPMSGSIZE 8800 /* rpc imposed limit on udp msg size */ +#define RPCSMALLMSGSIZE 400 /* a more reasonable packet size */ +#define TCPMSGSIZE (32 * 1024) /* reasonably sized RPC/TCP msg */ +#ifdef __cplusplus +} +#endif + +#endif /* !_RPC_CLNT_H */ diff --git a/usr/src/stand/lib/fs/nfs/clnt_btcp.c b/usr/src/stand/lib/fs/nfs/clnt_btcp.c new file mode 100644 index 0000000000..b69808393c --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/clnt_btcp.c @@ -0,0 +1,534 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Boot subsystem client side rpc (TCP) + */ + +#include <sys/salib.h> +#include <sys/errno.h> +#include <rpc/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include "socket_inet.h" +#include "ipv4.h" +#include "clnt.h" +#include <rpc/rpc.h> +#include "brpc.h" +#include "pmap.h" +#include <sys/promif.h> +#include <rpc/xdr.h> +#include <rpc/auth.h> +#include <rpc/auth_sys.h> +#include "auth_inet.h" +#include <rpc/rpc_msg.h> +#include <sys/bootdebug.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +#define MCALL_MSG_SIZE 24 + +extern int errno; + +extern void xdrrec_create(); +extern bool_t xdrrec_endofrecord(); +extern bool_t xdrrec_skiprecord(); + +/* + * If we create another clnt type this should be + * moved to a common file + */ +struct rpc_createerr rpc_createerr; + +static int readtcp(); +static int writetcp(); + +static struct clnt_ops *clntbtcp_ops(); + +/* + * Private data kept per client handle + */ +struct ct_data { + int ct_sock; + bool_t ct_closeit; + struct sockaddr_in ct_raddr; + uint_t ct_wait_msec; + struct timeval ct_total; + struct rpc_err ct_error; + XDR ct_xdrs; + char ct_mcall[MCALL_MSG_SIZE]; + uint_t ct_mpos; + uint_t ct_xdrpos; +}; + +/* + * Create a TCP based client handle. + * If *sockp<0, *sockp is set to a newly created TCP socket. + * If raddr->sin_port is 0 a binder on the remote machine + * is consulted for the correct port number. + * NB: It is the clients responsibility to close *sockp. + * NB: The rpch->cl_auth is initialized to null authentication. + * Caller may wish to set this something more useful. + * + * wait is the amount of time used between retransmitting a call if + * no response has been heard; retransmition occurs until the actual + * rpc call times out. + * + * sendsz and recvsz are the maximum allowable packet sizes that can be + * sent and received. + */ +CLIENT * +clntbtcp_create( + struct sockaddr_in *raddr, + rpcprog_t program, + rpcvers_t version, + struct timeval wait, + int *sockp, + uint_t sendsz, + uint_t recvsz) +{ + CLIENT *cl; + struct ct_data *ct; + struct rpc_msg call_msg; +#if 0 /* XXX not yet */ + int min_buf_sz; + int pref_buf_sz = 64 * 1024; /* 64 KB */ + socklen_t optlen; +#endif /* not yet */ + cl = (CLIENT *)bkmem_alloc(sizeof (CLIENT)); + if (cl == NULL) { + errno = ENOMEM; + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + return ((CLIENT *)NULL); + } + + ct = (struct ct_data *)bkmem_alloc(sizeof (*ct)); + if (ct == NULL) { + errno = ENOMEM; + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + goto fooy; + } + + if (raddr->sin_port == 0) { + ushort_t port; + if ((port = bpmap_getport(program, version, + &(rpc_createerr.cf_stat), raddr, NULL)) == 0) { + goto fooy; + } + raddr->sin_port = htons(port); + } + + if (*sockp < 0) { + struct sockaddr_in from; + + *sockp = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (*sockp < 0) { + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + goto fooy; + } + /* + * Bootparams assumes a local net, so be sure to let lower + * layer protocols know not to route. + */ + if (dontroute) { + (void) setsockopt(*sockp, SOL_SOCKET, SO_DONTROUTE, + (const void *)&dontroute, sizeof (dontroute)); + } + + /* attempt to bind to priv port */ + from.sin_family = AF_INET; + ipv4_getipaddr(&from.sin_addr); + from.sin_addr.s_addr = htonl(from.sin_addr.s_addr); + from.sin_port = get_source_port(TRUE); + + if (bind(*sockp, (struct sockaddr *)&from, sizeof (from)) < 0) { + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + if (*sockp > 0) + close(*sockp); + goto fooy; + } + + if (connect(*sockp, (struct sockaddr *)raddr, + sizeof (struct sockaddr_in)) < 0) { + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + if (*sockp > 0) + close(*sockp); + goto fooy; + } + +#if 0 /* XXX not yet */ + /* + * In the future we may want RPC to use larger transfer sizes + * over TCP. In this case we will want to increase the + * window size. + */ + /* + * Resize the receive window if possible + */ + optlen = sizeof (int); + if (getsockopt(*sockp, SOL_SOCKET, SO_RCVBUF, + (void *)&min_buf_sz, &optlen) != 0) + goto keep_going; + + if (min_buf_sz < pref_buf_sz) + (void) setsockopt(*sockp, SOL_SOCKET, SO_RCVBUF, + (const void *)&pref_buf_sz, sizeof (int)); + +keep_going: +#endif /* not yet */ + ct->ct_closeit = TRUE; + } else + ct->ct_closeit = FALSE; + + /* + * Set up the private data + */ + ct->ct_sock = *sockp; + ct->ct_wait_msec = 0; + ct->ct_total.tv_sec = wait.tv_sec; + ct->ct_total.tv_usec = -1; + ct->ct_raddr = *raddr; + + /* + * Initialize the call message + */ + + /* + * XXX - The xid might need to be randomized more. Imagine if there + * are a rack of blade servers all booting at the same time. They + * may cause havoc on the server with xid replays. + */ + call_msg.rm_xid = (uint_t)prom_gettime() + 1; + call_msg.rm_direction = CALL; + call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; + call_msg.rm_call.cb_prog = program; + call_msg.rm_call.cb_vers = version; + + /* + * pre-serialize the static part of the call msg and stash it away + */ + xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, + XDR_ENCODE); + if (! xdr_callhdr(&(ct->ct_xdrs), &call_msg)) { + if (ct->ct_closeit) + (void) close(*sockp); + goto fooy; + } + ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs)); + XDR_DESTROY(&(ct->ct_xdrs)); + + /* + * XXX - Memory allocations can fail in xdrrec_create, so we need to + * be able to catch those errors. + */ + xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct, readtcp, + writetcp); + + cl->cl_ops = clntbtcp_ops(); + cl->cl_private = (caddr_t)ct; + cl->cl_auth = authnone_create(); + return (cl); + +fooy: + if (ct) + bkmem_free((caddr_t)ct, sizeof (*ct)); + if (cl) + bkmem_free((caddr_t)cl, sizeof (CLIENT)); + return ((CLIENT *)NULL); +} + +static enum clnt_stat +clntbtcp_call( + CLIENT *cl, + rpcproc_t proc, + xdrproc_t xargs, + caddr_t argsp, + xdrproc_t xdr_results, + caddr_t resultsp, + struct timeval utimeout) +{ + struct ct_data *ct; + XDR *xdrs; + struct rpc_msg reply_msg; + uint32_t x_id; + uint32_t *msg_x_id; + bool_t shipnow; + int nrefreshes = 2; /* number of times to refresh cred */ + struct timeval timeout; + + ct = (struct ct_data *)cl->cl_private; + msg_x_id = (uint32_t *)ct->ct_mcall; + + xdrs = &(ct->ct_xdrs); + + ct->ct_total = utimeout; + + /* + * We have to be able to wait for some non-zero period of time, so + * use a default timeout. + */ + if (ct->ct_total.tv_sec == 0) + ct->ct_total.tv_sec = RPC_RCVWAIT_MSEC / 1000; + + ct->ct_wait_msec = ct->ct_total.tv_sec * 1000 + + ct->ct_total.tv_usec / 1000; + + timeout = ct->ct_total; + + shipnow = (xdr_results == (xdrproc_t)0 && timeout.tv_sec == 0 && + timeout.tv_usec == 0) ? FALSE : TRUE; + +call_again: + xdrs->x_op = XDR_ENCODE; + ct->ct_error.re_status = RPC_SUCCESS; + x_id = ntohl(++(*msg_x_id)); + if ((! XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) || + (! XDR_PUTINT32(xdrs, (int32_t *)&proc)) || + (! AUTH_MARSHALL(cl->cl_auth, xdrs, NULL)) || + (! (*xargs)(xdrs, argsp))) { + (void) xdrrec_endofrecord(xdrs, TRUE); + ct->ct_error.re_status = RPC_CANTENCODEARGS; + printf("clntbtcp_call: xdr encode args failed\n"); + return (ct->ct_error.re_status); + } + + if (!xdrrec_endofrecord(xdrs, shipnow)) { + printf("clntbtcp_call: rpc cansend error\n"); + ct->ct_error.re_status = RPC_CANTSEND; + return (ct->ct_error.re_status); + } + + if (!shipnow) + return (RPC_SUCCESS); + + if (timeout.tv_sec == 0 && timeout.tv_usec == 0) { + ct->ct_error.re_status = RPC_TIMEDOUT; + return (ct->ct_error.re_status); + } + + xdrs->x_op = XDR_DECODE; + + /* CONSTCOND */ + while (TRUE) { + reply_msg.acpted_rply.ar_verf = _null_auth; + reply_msg.acpted_rply.ar_results.where = NULL; + reply_msg.acpted_rply.ar_results.proc = xdr_void; + if (!xdrrec_skiprecord(xdrs)) { + return (ct->ct_error.re_status); + } + + if (!xdr_replymsg(xdrs, &reply_msg)) { + if (ct->ct_error.re_status == RPC_SUCCESS) + continue; + return (ct->ct_error.re_status); + } + if (reply_msg.rm_xid == x_id) { + break; + } + } + + /* + * process header + */ + _seterr_reply(&reply_msg, &(ct->ct_error)); + if (ct->ct_error.re_status == RPC_SUCCESS) { + if (!AUTH_VALIDATE(cl->cl_auth, + &reply_msg.acpted_rply.ar_verf)) { + ct->ct_error.re_status = RPC_AUTHERROR; + ct->ct_error.re_why = AUTH_INVALIDRESP; + } else if (!(*xdr_results)(xdrs, resultsp)) { + if (ct->ct_error.re_status == RPC_SUCCESS) { + ct->ct_error.re_status = RPC_CANTDECODERES; + } + } + if (reply_msg.acpted_rply.ar_verf.oa_base != NULL) { + xdrs->x_op = XDR_FREE; + (void) xdr_opaque_auth(xdrs, + &(reply_msg.acpted_rply.ar_verf)); + } + } else { + if (nrefreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg, + NULL)) { + goto call_again; + } + } + return (ct->ct_error.re_status); +} + +/* + * Interface between xdr serializer and tcp connection. + * Behaves like the system calls, read & write, but keeps some error state + * around for the rpc level. + */ +static int +readtcp(struct ct_data *ct, + caddr_t buf, + int len) +{ + int inlen = 0; + uint_t start, diff; + struct sockaddr from; + uint_t fromlen = sizeof (from); + + if (len <= 0) + return (0); + + /* + * Do non-blocking reads here until we get some data or timeout + */ + start = prom_gettime(); + while ((inlen = recvfrom(ct->ct_sock, buf, len, 0, &from, + &fromlen)) == 0) { + diff = (uint_t)(prom_gettime() - start); + if (diff > ct->ct_wait_msec) { + errno = ETIMEDOUT; + inlen = -1; + break; + } + } +#ifdef DEBUG + printf("readtcp: inlen = %d\n", inlen); +#endif + switch (inlen) { + case 0: + /* premature eof */ + ct->ct_error.re_errno = ECONNRESET; + ct->ct_error.re_status = RPC_CANTRECV; + inlen = -1; /* it's really an error */ + break; + case -1: + ct->ct_error.re_errno = errno; + ct->ct_error.re_status = RPC_CANTRECV; + break; + } + + return (inlen); +} + +static int +writetcp(ct, buf, len) + struct ct_data *ct; + caddr_t buf; + int len; +{ + register int i, cnt; + + for (cnt = len; cnt > 0; cnt -= i, buf += i) { + if ((i = sendto(ct->ct_sock, (void *)buf, cnt, 0, + (struct sockaddr *)&(ct->ct_raddr), + sizeof (ct->ct_raddr))) == -1) { + ct->ct_error.re_errno = errno; + ct->ct_error.re_status = RPC_CANTSEND; + return (-1); + } + } + return (len); +} + +static void +clntbtcp_geterr( + CLIENT *cl, + struct rpc_err *errp) +{ + struct ct_data *ct = (struct ct_data *)cl->cl_private; + + *errp = ct->ct_error; +} + + +static bool_t +clntbtcp_freeres( + CLIENT *cl, + xdrproc_t xdr_res, + caddr_t res_ptr) +{ + struct ct_data *ct = (struct ct_data *)cl->cl_private; + XDR *xdrs = &(ct->ct_xdrs); + + xdrs->x_op = XDR_FREE; + return ((*xdr_res)(xdrs, res_ptr)); +} + +static void +clntbtcp_abort() + /* CLIENT *h; */ +{ +} + +/* ARGSUSED */ +static bool_t +clntbtcp_control( + CLIENT *cl, + int request, + char *info) +{ + /* Not implemented in boot */ + return (FALSE); +} + +static void +clntbtcp_destroy(CLIENT *cl) +{ + struct ct_data *ct = (struct ct_data *)cl->cl_private; + + if (ct->ct_closeit) { + (void) socket_close(ct->ct_sock); + } + XDR_DESTROY(&(ct->ct_xdrs)); + bkmem_free((caddr_t)ct, (sizeof (struct ct_data))); + bkmem_free((caddr_t)cl, sizeof (CLIENT)); +} + +static struct clnt_ops * +clntbtcp_ops() +{ + static struct clnt_ops ops; + + if (ops.cl_call == NULL) { + ops.cl_call = clntbtcp_call; + ops.cl_abort = clntbtcp_abort; + ops.cl_geterr = clntbtcp_geterr; + ops.cl_freeres = clntbtcp_freeres; + ops.cl_destroy = clntbtcp_destroy; + ops.cl_control = clntbtcp_control; + } + return (&ops); +} diff --git a/usr/src/stand/lib/fs/nfs/clnt_budp.c b/usr/src/stand/lib/fs/nfs/clnt_budp.c new file mode 100644 index 0000000000..bf2e895373 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/clnt_budp.c @@ -0,0 +1,468 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Boot subsystem client side rpc + */ + +#include <sys/errno.h> +#include <rpc/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include "socket_inet.h" +#include "ipv4.h" +#include "clnt.h" +#include <rpc/rpc.h> +#include "brpc.h" +#include "pmap.h" +#include <sys/promif.h> +#include <rpc/xdr.h> +#include <rpc/auth.h> +#include <rpc/auth_sys.h> +#include "auth_inet.h" +#include <rpc/rpc_msg.h> +#include <sys/salib.h> +#include <sys/bootdebug.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +extern int errno; + +/* + * If we create another clnt type this should be + * moved to a common file + */ +struct rpc_createerr rpc_createerr; + +static struct clnt_ops *clntbudp_ops(); + +/* + * Private data kept per client handle + */ +struct cu_data { + int cu_sock; + bool_t cu_closeit; + struct sockaddr_in cu_raddr; + int cu_rlen; + struct timeval cu_wait; + struct timeval cu_total; + struct rpc_err cu_error; + XDR cu_outxdrs; + uint_t cu_xdrpos; + uint_t cu_sendsz; + char *cu_outbuf; + uint_t cu_recvsz; + char cu_inbuf[1]; +}; + +/* + * Create a UDP based client handle. + * If *sockp<0, *sockp is set to a newly created UPD socket. + * If raddr->sin_port is 0 a binder on the remote machine + * is consulted for the correct port number. + * NB: It is the clients responsibility to close *sockp. + * NB: The rpch->cl_auth is initialized to null authentication. + * Caller may wish to set this something more useful. + * + * wait is the amount of time used between retransmitting a call if + * no response has been heard; retransmition occurs until the actual + * rpc call times out. + * + * sendsz and recvsz are the maximum allowable packet sizes that can be + * sent and received. + */ +CLIENT * +clntbudp_bufcreate(raddr, program, version, wait, sockp, sendsz, recvsz) + struct sockaddr_in *raddr; + rpcprog_t program; + rpcvers_t version; + struct timeval wait; + int *sockp; + uint_t sendsz; + uint_t recvsz; +{ + CLIENT *cl; + struct cu_data *cu; + struct rpc_msg call_msg; + + cl = (CLIENT *)bkmem_alloc(sizeof (CLIENT)); + if (cl == NULL) { + errno = ENOMEM; + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + return ((CLIENT *)NULL); + } + sendsz = ((sendsz + 3) / 4) * 4; + recvsz = ((recvsz + 3) / 4) * 4; + cu = (struct cu_data *)bkmem_alloc(sizeof (*cu) + sendsz + recvsz); + if (cu == NULL) { + errno = ENOMEM; + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + goto fooy; + } + cu->cu_outbuf = &cu->cu_inbuf[recvsz]; + + if (raddr->sin_port == 0) { + ushort_t port; + if ((port = bpmap_getport(program, version, + &(rpc_createerr.cf_stat), raddr, NULL)) == 0) { + goto fooy; + } + raddr->sin_port = htons(port); + } + cl->cl_ops = clntbudp_ops(); + cl->cl_private = (caddr_t)cu; + cu->cu_raddr = *raddr; + cu->cu_rlen = sizeof (cu->cu_raddr); + cu->cu_wait = wait; + cu->cu_total.tv_sec = -1; + cu->cu_total.tv_usec = -1; + cu->cu_sendsz = sendsz; + cu->cu_recvsz = recvsz; + call_msg.rm_xid = (uint_t)prom_gettime() + 1; + call_msg.rm_direction = CALL; + call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; + call_msg.rm_call.cb_prog = program; + call_msg.rm_call.cb_vers = version; + xdrmem_create(&(cu->cu_outxdrs), cu->cu_outbuf, + sendsz, XDR_ENCODE); + if (! xdr_callhdr(&(cu->cu_outxdrs), &call_msg)) { + goto fooy; + } + cu->cu_xdrpos = XDR_GETPOS(&(cu->cu_outxdrs)); + cu->cu_closeit = FALSE; + + if (*sockp < 0) { + struct sockaddr_in from; + + *sockp = socket(PF_INET, SOCK_DGRAM, 0); + if (*sockp < 0) { + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + goto fooy; + } + + if (dontroute) { + (void) setsockopt(*sockp, SOL_SOCKET, SO_DONTROUTE, + (const void *)&dontroute, sizeof (dontroute)); + } + + /* attempt to bind to priv port */ + from.sin_family = AF_INET; + ipv4_getipaddr(&from.sin_addr); + from.sin_addr.s_addr = htonl(from.sin_addr.s_addr); + from.sin_port = get_source_port(TRUE); + + if (bind(*sockp, (struct sockaddr *)&from, sizeof (from)) < 0) { + rpc_createerr.cf_stat = RPC_SYSTEMERROR; + rpc_createerr.cf_error.re_errno = errno; + goto fooy; + } + + cu->cu_closeit = TRUE; + } + + cu->cu_sock = *sockp; + cl->cl_auth = authnone_create(); + return (cl); +fooy: + if (cu) + bkmem_free((caddr_t)cu, sizeof (*cu) + sendsz + recvsz); + if (cl) + bkmem_free((caddr_t)cl, sizeof (CLIENT)); + return ((CLIENT *)NULL); +} + +CLIENT * +clntbudp_create(raddr, program, version, wait, sockp) + struct sockaddr_in *raddr; + rpcprog_t program; + rpcvers_t version; + struct timeval wait; + int *sockp; +{ + + return (clntbudp_bufcreate(raddr, program, version, wait, sockp, + UDPMSGSIZE, UDPMSGSIZE)); +} + +static enum clnt_stat +clntbudp_call(cl, proc, xargs, argsp, xresults, resultsp, utimeout) + CLIENT *cl; /* client handle */ + rpcproc_t proc; /* procedure number */ + xdrproc_t xargs; /* xdr routine for args */ + caddr_t argsp; /* pointer to args */ + xdrproc_t xresults; /* xdr routine for results */ + caddr_t resultsp; /* pointer to results */ + struct timeval utimeout; /* seconds to wait before giving up */ +{ + struct cu_data *cu; + XDR *xdrs; + int outlen; + int inlen; + socklen_t fromlen; + struct sockaddr_in from; + struct rpc_msg reply_msg; + XDR reply_xdrs; + uint_t xdelay; + int wait_time; + bool_t ok; + int nrefreshes = 2; /* number of times to refresh cred */ + struct timeval timeout; + int errors; + + cu = (struct cu_data *)cl->cl_private; + if (cu->cu_total.tv_usec == -1) + timeout = utimeout; /* use supplied timeout */ + else + timeout = cu->cu_total; /* use default timeout */ + + /* + * set a media level timeout + */ + xdelay = cu->cu_wait.tv_sec + 1000 + cu->cu_wait.tv_usec / 1000; + (void) setsockopt(cu->cu_sock, SOL_SOCKET, SO_RCVTIMEO, + (void *)&xdelay, sizeof (xdelay)); + + wait_time = (timeout.tv_sec * 1000) + (timeout.tv_usec / 1000); + if (wait_time == 0) + wait_time = RPC_RCVWAIT_MSEC; + wait_time += prom_gettime(); + + errors = 0; + +call_again: + xdrs = &(cu->cu_outxdrs); + xdrs->x_op = XDR_ENCODE; + XDR_SETPOS(xdrs, cu->cu_xdrpos); + /* + * the transaction is the first thing in the out buffer + */ + (*(ushort_t *)(cu->cu_outbuf))++; + if ((! XDR_PUTINT32(xdrs, (int32_t *)&proc)) || + (! AUTH_MARSHALL(cl->cl_auth, xdrs, NULL)) || + (! (*xargs)(xdrs, argsp))) + return (cu->cu_error.re_status = RPC_CANTENCODEARGS); + outlen = (int)XDR_GETPOS(xdrs); + +send_again: + if (sendto(cu->cu_sock, cu->cu_outbuf, outlen, 0, + (struct sockaddr *)&(cu->cu_raddr), cu->cu_rlen) + != outlen) { + cu->cu_error.re_errno = errno; + return (cu->cu_error.re_status = RPC_CANTSEND); + } + + /* + * sub-optimal code appears here because we have + * some clock time to spare while the packets are in flight. + * (We assume that this is actually only executed once.) + */ +recv_again: + reply_msg.acpted_rply.ar_verf = _null_auth; + reply_msg.acpted_rply.ar_results.where = resultsp; + reply_msg.acpted_rply.ar_results.proc = xresults; + + for (;;) { + if (errors >= RPC_ALLOWABLE_ERRORS) + return (cu->cu_error.re_status); + + if (prom_gettime() >= wait_time) { + cu->cu_error.re_errno = ETIMEDOUT; + return (cu->cu_error.re_status = RPC_TIMEDOUT); + } + + /* + * Use MSG_DONTWAIT because we have set + * a media level timeout above. + */ + fromlen = sizeof (struct sockaddr); + + inlen = recvfrom(cu->cu_sock, cu->cu_inbuf, + (int)cu->cu_recvsz, MSG_DONTWAIT, + (struct sockaddr *)&from, &fromlen); + + if (inlen < 0) { + if (errno == EWOULDBLOCK) { + /* + * Media level has timedout + * and no more data in buffers. + */ + goto send_again; + } + + cu->cu_error.re_status = RPC_CANTRECV; + if (errno == ETIMEDOUT) { + errno = ETIMEDOUT; + cu->cu_error.re_status = RPC_TIMEDOUT; + } + + cu->cu_error.re_errno = errno; + return (cu->cu_error.re_status); + } + + if (inlen < sizeof (uint32_t)) + continue; + + /* see if reply transaction id matches sent id */ + if (*((uint32_t *)(cu->cu_inbuf)) != + *((uint32_t *)(cu->cu_outbuf))) { + dprintf("clntbudp_call: xid: 0x%x != 0x%x\n", + *(uint32_t *)(cu->cu_inbuf), + *(uint32_t *)(cu->cu_outbuf)); + continue; + } + /* we now assume we have the proper reply */ + break; + } + + /* + * now decode and validate the response + */ + xdrmem_create(&reply_xdrs, cu->cu_inbuf, (uint_t)inlen, XDR_DECODE); + ok = xdr_replymsg(&reply_xdrs, &reply_msg); + /* XDR_DESTROY(&reply_xdrs); save a few cycles on noop destroy */ + if (!ok) { + cu->cu_error.re_status = RPC_CANTDECODERES; + return (cu->cu_error.re_status); + } + + _seterr_reply(&reply_msg, &(cu->cu_error)); + if (cu->cu_error.re_status == RPC_SUCCESS) { + if (! AUTH_VALIDATE(cl->cl_auth, + &reply_msg.acpted_rply.ar_verf)) { + cu->cu_error.re_status = RPC_AUTHERROR; + cu->cu_error.re_why = AUTH_INVALIDRESP; + errors++; + goto call_again; + } + if (reply_msg.acpted_rply.ar_verf.oa_base != NULL) { + xdrs->x_op = XDR_FREE; + (void) xdr_opaque_auth(xdrs, + &(reply_msg.acpted_rply.ar_verf)); + } + return (cu->cu_error.re_status); + } /* end successful completion */ + + if (cu->cu_error.re_status == RPC_AUTHERROR) { + /* maybe our credentials need to be refreshed ... */ + if (nrefreshes > 0 && + AUTH_REFRESH(cl->cl_auth, NULL, NULL)) { + nrefreshes--; + } + errors++; + goto call_again; + } + + /* Just keep trying till there's no data... */ + errors++; + dprintf("clntbudp_call: from: %s, error: ", + inet_ntoa(from.sin_addr)); + rpc_disperr(&cu->cu_error); + goto recv_again; +} + +static void +clntbudp_geterr(cl, errp) + CLIENT *cl; + struct rpc_err *errp; +{ + struct cu_data *cu = (struct cu_data *)cl->cl_private; + + *errp = cu->cu_error; +} + + +static bool_t +clntbudp_freeres(cl, xdr_res, res_ptr) + CLIENT *cl; + xdrproc_t xdr_res; + caddr_t res_ptr; +{ + struct cu_data *cu = (struct cu_data *)cl->cl_private; + XDR *xdrs = &(cu->cu_outxdrs); + + xdrs->x_op = XDR_FREE; + return ((*xdr_res)(xdrs, res_ptr)); +} + +static void +clntbudp_abort() + /* CLIENT *h; */ +{ +} + +/* ARGSUSED */ +static bool_t +clntbudp_control(cl, request, info) + CLIENT *cl; + int request; + char *info; +{ + /* CLNT_CONTROL is not used in boot */ + return (FALSE); +} + +static void +clntbudp_destroy(cl) + CLIENT *cl; +{ + struct cu_data *cu = (struct cu_data *)cl->cl_private; + + if (cu->cu_closeit) { + (void) socket_close(cu->cu_sock); + } + XDR_DESTROY(&(cu->cu_outxdrs)); + bkmem_free((caddr_t)cu, (sizeof (*cu) + cu->cu_sendsz + cu->cu_recvsz)); + bkmem_free((caddr_t)cl, sizeof (CLIENT)); +} + +static struct clnt_ops * +clntbudp_ops() +{ + static struct clnt_ops ops; + + if (ops.cl_call == NULL) { + ops.cl_call = clntbudp_call; + ops.cl_abort = clntbudp_abort; + ops.cl_geterr = clntbudp_geterr; + ops.cl_freeres = clntbudp_freeres; + ops.cl_destroy = clntbudp_destroy; + ops.cl_control = clntbudp_control; + } + return (&ops); +} diff --git a/usr/src/stand/lib/fs/nfs/getdents.c b/usr/src/stand/lib/fs/nfs/getdents.c new file mode 100644 index 0000000000..fa9d36302c --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/getdents.c @@ -0,0 +1,188 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Stuff relating to directory reading ... + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <rpc/xdr.h> +#include "clnt.h" +#include <rpc/rpc_msg.h> +#include <sys/t_lock.h> +#include "nfs_inet.h" +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs_prot.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/sysmacros.h> +#include "socket_inet.h" +#include <sys/salib.h> +#include <sys/bootdebug.h> + +#define MAXDENTS 16 +#define MINSIZ 20 + +/* + * Boot needs to be cleaned up to use either dirent32 or dirent64, + * in the meantime use dirent_t and always round to 8 bytes + */ +#define BDIRENT_RECLEN(namelen) \ + ((offsetof(dirent_t, d_name[0]) + 1 + (namelen) + 7) & ~ 7) + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * Get directory entries: + * + * Uses the nfs "READDIR" operation to read directory entries + * into a local buffer. These are then translated into file + * system independent "dirent" structs and returned in the + * caller's buffer. Returns the number of entries converted + * (-1 if there's an error). + * + * Although the xdr functions can allocate memory, we have + * a limited heap so we allocate our own space, + * assuming the worst case of 256 byte names. + * This is a space hog in our local buffer, so we want + * the number of buffers to be small. To make sure we don't + * get more names than we can handle, we tell the rpc + * routine that we only have space for MAXDENT names if + * they are all the minimum size. This keeps the return + * packet unfragmented, but may result in lots of reads + * to process a large directory. Since this is standalone + * we don't worry about speed. With MAXDENTs at 16, the + * local buffer is 4k. + */ + +int +nfsgetdents(struct nfs_file *nfp, struct dirent *dep, unsigned size) +{ + entry *ep; + readdirargs rda; + readdirres res; + enum clnt_stat status; + struct { + entry etlist[MAXDENTS]; + char names[MAXDENTS][NFS_MAXNAMLEN+1]; + } rdbuf; + uint32_t offset; + int j, cnt = 0; + struct timeval zero_timeout = {0, 0}; /* default */ + + bzero((caddr_t)&res, sizeof (res)); + bzero((caddr_t)&rda, sizeof (rda)); + bzero((caddr_t)rdbuf.etlist, sizeof (rdbuf.etlist)); + bcopy((caddr_t)&nfp->fh.fh2, (caddr_t)&rda.dir, NFS_FHSIZE); + bcopy((caddr_t)nfp->cookie.cookie2, (caddr_t)rda.cookie, + sizeof (nfscookie)); + + while (!res.readdirres_u.reply.eof) { + /* + * Keep issuing nfs calls until EOF is reached on + * the directory or the user buffer is filled. + */ + + for (j = 0; j < MAXDENTS; j++) { + /* + * Link our buffers together for the benefit of + * XDR. We do this each time we issue the rpc call + * JIC the xdr decode + * routines screw up the linkage! + */ + + rdbuf.etlist[j].name = rdbuf.names[(MAXDENTS-1) - j]; + rdbuf.etlist[j].nextentry = + (j < (MAXDENTS-1)) ? &rdbuf.etlist[j+1] : 0; + } + + res.readdirres_u.reply.entries = rdbuf.etlist; + /* + * Cannot give the whole buffer unless every name is + * 256 bytes! Assume the worst case of all 1 byte names. + * This results in MINSIZ bytes/name in the xdr stream. + */ + rda.count = sizeof (res) + MAXDENTS*MINSIZ; + bzero((caddr_t)rdbuf.names, sizeof (rdbuf.names)); + + status = CLNT_CALL(root_CLIENT, NFSPROC_READDIR, + xdr_readdirargs, (caddr_t)&rda, + xdr_readdirres, (caddr_t)&res, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs_getdents: RPC error\n"); + return (-1); + } + if (res.status != NFS_OK) { + /* + * The most common failure here would be trying to + * issue a getdents call on a non-directory! + */ + + nfs_error(res.status); + return (-1); + } + + for (ep = rdbuf.etlist; ep; ep = ep->nextentry) { + /* + * Step thru all entries returned by NFS, converting + * to the cannonical form and copying out to the + * user's buffer. + */ + + int n; + + /* + * catch the case user called at EOF + */ + if ((n = strlen(ep->name)) == 0) + return (cnt); + + n = BDIRENT_RECLEN(n); + + if (n > size) + return (cnt); + size -= n; + + (void) strcpy(dep->d_name, ep->name); + dep->d_ino = ep->fileid; + bcopy(ep->cookie, &offset, sizeof (nfscookie)); + dep->d_off = offset; + dep->d_reclen = (ushort_t)n; + + dep = (struct dirent *)((char *)dep + n); + bcopy(ep->cookie, rda.cookie, sizeof (nfscookie)); + bcopy(ep->cookie, nfp->cookie.cookie2, + sizeof (nfscookie)); + cnt++; + } + } + + return (cnt); +} diff --git a/usr/src/stand/lib/fs/nfs/getdents3.c b/usr/src/stand/lib/fs/nfs/getdents3.c new file mode 100644 index 0000000000..e3c24e3d84 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/getdents3.c @@ -0,0 +1,187 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Stuff relating to NFSv3 directory reading ... + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <rpc/xdr.h> +#include "clnt.h" +#include <rpc/rpc_msg.h> +#include <sys/t_lock.h> +#include "nfs_inet.h" +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs_prot.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/sysmacros.h> +#include "socket_inet.h" +#include <sys/salib.h> +#include <sys/bootdebug.h> + +#define MAXDENTS 16 +#define MINSIZ 20 + +/* + * Boot needs to be cleaned up to use either dirent32 or dirent64, + * in the meantime use dirent_t and always round to 8 bytes + */ +#define BDIRENT_RECLEN(namelen) \ + ((offsetof(dirent_t, d_name[0]) + 1 + (namelen) + 7) & ~ 7) + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * Get directory entries: + * + * Uses the nfs "READDIR" operation to read directory entries + * into a local buffer. These are then translated into file + * system independent "dirent" structs and returned in the + * caller's buffer. Returns the number of entries converted + * (-1 if there's an error). + * + * Although the xdr functions can allocate memory, we have + * a limited heap so we allocate our own space, + * assuming the worst case of 256 byte names. + * This is a space hog in our local buffer, so we want + * the number of buffers to be small. To make sure we don't + * get more names than we can handle, we tell the rpc + * routine that we only have space for MAXDENT names if + * they are all the minimum size. This keeps the return + * packet unfragmented, but may result in lots of reads + * to process a large directory. Since this is standalone + * we don't worry about speed. With MAXDENTs at 16, the + * local buffer is 4k. + */ + +int +nfs3getdents(struct nfs_file *nfp, struct dirent *dep, unsigned size) +{ + int cnt = 0; + entry3 *ep; + READDIR3args rda; + READDIR3res res; + enum clnt_stat status; + struct { + entry3 etlist[MAXDENTS]; + char names[MAXDENTS][NFS_MAXNAMLEN+1]; + } rdbuf; + int j; + struct timeval zero_timeout = {0, 0}; /* default */ + + bzero((caddr_t)&res, sizeof (res)); + bzero((caddr_t)&rda, sizeof (rda)); + bzero((caddr_t)rdbuf.etlist, sizeof (rdbuf.etlist)); + + rda.dir.data.data_len = nfp->fh.fh3.len; + rda.dir.data.data_val = nfp->fh.fh3.data; + rda.cookie = nfp->cookie.cookie3; + + while (!res.READDIR3res_u.resok.reply.eof) { + /* + * Keep issuing nfs calls until EOF is reached on + * the directory or the user buffer is filled. + */ + + for (j = 0; j < MAXDENTS; j++) { + /* + * Link our buffers together for the benefit of + * XDR. We do this each time we issue the rpc call + * JIC the xdr decode + * routines screw up the linkage! + */ + + rdbuf.etlist[j].name = rdbuf.names[(MAXDENTS-1) - j]; + rdbuf.etlist[j].nextentry = + (j < (MAXDENTS-1)) ? &rdbuf.etlist[j+1] : 0; + } + + res.READDIR3res_u.resok.reply.entries = rdbuf.etlist; + /* + * Cannot give the whole buffer unless every name is + * 256 bytes! Assume the worst case of all 1 byte names. + * This results in MINSIZ bytes/name in the xdr stream. + */ + rda.count = sizeof (res) + MAXDENTS*MINSIZ; + bzero((caddr_t)rdbuf.names, sizeof (rdbuf.names)); + + status = CLNT_CALL(root_CLIENT, NFSPROC3_READDIR, + xdr_READDIR3args, (caddr_t)&rda, + xdr_READDIR3res, (caddr_t)&res, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs3_getdents: RPC error\n"); + return (-1); + } + if (res.status != NFS3_OK) { + /* + * The most common failure here would be trying to + * issue a getdents call on a non-directory! + */ + + nfs3_error(res.status); + return (-1); + } + + for (ep = rdbuf.etlist; ep; ep = ep->nextentry) { + /* + * Step thru all entries returned by NFS, converting + * to the cannonical form and copying out to the + * user's buffer. + */ + + int n; + + /* + * catch the case user called at EOF + */ + if ((n = strlen(ep->name)) == 0) + return (cnt); + + n = BDIRENT_RECLEN(n); + + if (n > size) + return (cnt); + size -= n; + + (void) strcpy(dep->d_name, ep->name); + dep->d_ino = ep->fileid; + dep->d_off = (off_t)ep->cookie; + dep->d_reclen = (ushort_t)n; + + dep = (struct dirent *)((char *)dep + n); + rda.cookie = ep->cookie; + nfp->cookie.cookie3 = ep->cookie; + cnt++; + } + } + + return (cnt); +} diff --git a/usr/src/stand/lib/fs/nfs/getdents4.c b/usr/src/stand/lib/fs/nfs/getdents4.c new file mode 100644 index 0000000000..f485dede59 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/getdents4.c @@ -0,0 +1,254 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Stuff relating to NFSv4 directory reading ... + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <rpc/xdr.h> +#include "clnt.h" +#include <rpc/rpc_msg.h> +#include <sys/t_lock.h> +#include "nfs_inet.h" +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs_prot.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/sysmacros.h> +#include "socket_inet.h" +#include <sys/salib.h> +#include <sys/bootdebug.h> + +#define MAXDENTS 16 +#define MINSIZ 20 + +/* + * Boot needs to be cleaned up to use either dirent32 or dirent64, + * in the meantime use dirent_t and always round to 8 bytes + */ +#define BDIRENT_RECLEN(namelen) \ + ((offsetof(dirent_t, d_name[0]) + 1 + (namelen) + 7) & ~ 7) + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * Get directory entries: + * + * Uses the nfs "READDIR" operation to read directory entries + * into a local buffer. These are then translated into file + * system independent "dirent" structs and returned in the + * caller's buffer. Returns the number of entries converted + * (-1 if there's an error). + * + * Although the xdr functions can allocate memory, we have + * a limited heap so we allocate our own space, + * assuming the worst case of 256 byte names. + * This is a space hog in our local buffer, so we want + * the number of buffers to be small. To make sure we don't + * get more names than we can handle, we tell the rpc + * routine that we only have space for MAXDENT names if + * they are all the minimum size. This keeps the return + * packet unfragmented, but may result in lots of reads + * to process a large directory. Since this is standalone + * we don't worry about speed. With MAXDENTs at 16, the + * local buffer is 4k. + */ + +int +nfs4getdents(struct nfs_file *nfp, struct dirent *dep, unsigned size) +{ + int cnt = 0; + b_entry4_t *ep; + readdir4arg_t readdir_args; + readdir4res_t readdir_res; + attr4_bitmap1_t bitmap1; + enum clnt_stat status; + struct { + b_entry4_t etlist[MAXDENTS]; + char names[MAXDENTS][NFS_MAXNAMLEN+1]; + } rdbuf; + int j; + struct timeval zero_timeout = {0, 0}; /* default */ + utf8string str; + char tagname[] = "inetboot readdir"; + + bzero((caddr_t)&readdir_res, sizeof (readdir4res_t)); + bzero((caddr_t)&readdir_args, sizeof (readdir4arg_t)); + bzero((caddr_t)rdbuf.etlist, sizeof (rdbuf.etlist)); + + str.utf8string_len = sizeof (tagname) - 1; + str.utf8string_val = tagname; + + if (nfp->fh.fh4.len > 0) + compound_init(&readdir_args.rd_arg, &str, 0, 2, &nfp->fh.fh4); + else + compound_init(&readdir_args.rd_arg, &str, 0, 2, NULL); + + readdir_args.rd_opreaddir = OP_READDIR; + readdir_args.rd_cookie = nfp->cookie.cookie4; + + while (!readdir_res.rd_eof) { + /* + * Keep issuing nfs calls until EOF is reached on + * the directory or the user buffer is filled. + */ + for (j = 0; j < MAXDENTS; j++) { + /* + * Link our buffers together for the benefit of + * XDR. We do this each time we issue the rpc call + * JIC the xdr decode + * routines screw up the linkage! + */ + rdbuf.etlist[j].b_name.utf8string_len = NFS_MAXNAMLEN; + rdbuf.etlist[j].b_name.utf8string_val = + rdbuf.names[(MAXDENTS-1) - j]; + rdbuf.etlist[j].b_nextentry = + (j < (MAXDENTS-1)) ? &rdbuf.etlist[j+1] : 0; + } + + readdir_res.rd_entries = rdbuf.etlist; + /* + * Cannot give the whole buffer unless every name is + * 256 bytes! Assume the worst case of all 1 byte names. + * This results in MINSIZ bytes/name in the xdr stream. + */ + readdir_args.rd_dircount = MAXDENTS * MINSIZ; + readdir_args.rd_maxcount = sizeof (readdir4res_t) + + (MAXDENTS * MINSIZ); + bzero((caddr_t)rdbuf.names, sizeof (rdbuf.names)); + + /* + * Set the attr bitmap, so we get the fileid back. + */ + bitmap1.word = 0; + bitmap1.bm_fattr4_fileid = 1; + readdir_args.rd_attr_req.b_bitmap_len = 1; + readdir_args.rd_attr_req.b_bitmap_val[0] = bitmap1.word; + + status = CLNT_CALL(root_CLIENT, NFSPROC4_COMPOUND, + xdr_readdir4_args, (caddr_t)&readdir_args, + xdr_readdir4_res, (caddr_t)&readdir_res, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs4_getdents: RPC error\n"); + return (-1); + } + if (readdir_res.rd_status != NFS4_OK) { + /* + * The most common failure here would be trying to + * issue a getdents call on a non-directory! + */ + + nfs4_error(readdir_res.rd_status); + return (-1); + } + + /* + * If we are reading from the beginning of the + * directory we will need to create the "." and ".." + * since we won't be getting them from the server. To obtain + * the fileid's just issue a couple otw lookups to get the + * info we need. + */ + if (readdir_args.rd_cookie == 0 && + rdbuf.etlist[0].b_cookie > 2) { + int n; + int error; + uint64_t fileid; + struct vattr va; + + /* + * Do a getattr for the '.' + */ + error = nfs4getattr(nfp, &va); + if (error) + return (-1); + + dep->d_name[0] = '.'; + dep->d_name[1] = '\0'; + dep->d_ino = va.va_nodeid; + dep->d_off = 1; + n = BDIRENT_RECLEN(1); + dep->d_reclen = n; + dep = (struct dirent *)((char *)dep + n); + + /* + * Do a lookupp for the '..' + */ + (void) nfs4lookupp(nfp, &error, &fileid); + if (error) + return (-1); + + dep->d_name[0] = '.'; + dep->d_name[1] = '.'; + dep->d_name[2] = '\0'; + dep->d_ino = fileid; + dep->d_off = 2; + n = BDIRENT_RECLEN(2); + dep->d_reclen = n; + dep = (struct dirent *)((char *)dep + n); + } + + for (ep = rdbuf.etlist; ep; ep = ep->b_nextentry) { + /* + * Step thru all entries returned by NFS, converting + * to the cannonical form and copying out to the + * user's buffer. + */ + int n; + int namlen; + + /* + * catch the case user called at EOF + */ + if ((namlen = ep->b_name.utf8string_len) == 0) + return (cnt); + + n = BDIRENT_RECLEN(namlen); + + if (n > size) + return (cnt); + size -= n; + + bcopy(ep->b_name.utf8string_val, dep->d_name, namlen); + dep->d_name[namlen] = '\0'; + dep->d_ino = ep->b_fileid; + dep->d_off = (off_t)ep->b_cookie; + dep->d_reclen = (ushort_t)n; + + dep = (struct dirent *)((char *)dep + n); + readdir_args.rd_cookie = ep->b_cookie; + nfp->cookie.cookie4 = ep->b_cookie; + cnt++; + } + } + + return (cnt); +} diff --git a/usr/src/stand/lib/fs/nfs/inc.flg b/usr/src/stand/lib/fs/nfs/inc.flg new file mode 100644 index 0000000000..e11cff6c31 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/inc.flg @@ -0,0 +1,26 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# ident "%Z%%M% %I% %E% SMI" +# + +find_files "s.*" usr/src/common/net/dhcp diff --git a/usr/src/stand/lib/fs/nfs/llib-lnfs b/usr/src/stand/lib/fs/nfs/llib-lnfs new file mode 100644 index 0000000000..174d8b9000 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/llib-lnfs @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +/* + * The only symbols that other modules want to use are + * boot_nfs_ops, so just prototype them by hand here; eventually a + * header file would be appropriate. + */ +#include <sys/types.h> +#include <sys/vfs.h> +#include <sys/bootvfs.h> + +extern struct boot_fs_ops boot_nfs_ops; diff --git a/usr/src/stand/lib/fs/nfs/lookup.c b/usr/src/stand/lib/fs/nfs/lookup.c new file mode 100644 index 0000000000..443b822d4e --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/lookup.c @@ -0,0 +1,328 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This file contains the file lookup code for NFS. + */ + +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpc/types.h> +#include <rpc/auth.h> +#include <rpc/xdr.h> +#include <rpc/rpc_msg.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <rpcsvc/mount.h> +#include <pathname.h> +#include <sys/errno.h> +#include <sys/promif.h> +#include "nfs_inet.h" +#include "socket_inet.h" +#include <rpcsvc/nfs_prot.h> +#include <rpcsvc/nfs4_prot.h> +#include <sys/types.h> +#include <sys/salib.h> +#include <sys/sacache.h> +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootdebug.h> +#include "mac.h" + +static int root_inum = 1; /* Dummy i-node number for root */ +static int next_inum = 1; /* Next dummy i-node number */ + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * starting at current directory (root for us), lookup the pathname. + * return the file handle of said file. + */ + +static int lookuppn(struct pathname *pnp, struct nfs_file *cfile, + bool_t needroothandle); + +/* + * For NFSv4 we may be calling lookup in the context of evaluating the + * root path. In this case we set needroothandle to TRUE. + */ +int +lookup(char *pathname, struct nfs_file *cur_file, bool_t needroothandle) +{ + struct pathname pnp; + int error; + + static char lkup_path[NFS_MAXPATHLEN]; /* pn_alloc doesn't */ + + pnp.pn_buf = &lkup_path[0]; + bzero(pnp.pn_buf, NFS_MAXPATHLEN); + error = pn_get(pathname, &pnp); + if (error) + return (error); + error = lookuppn(&pnp, cur_file, needroothandle); + return (error); +} + +static int +lookuppn(struct pathname *pnp, struct nfs_file *cfile, bool_t needroothandle) +{ + char component[NFS_MAXNAMLEN+1]; /* buffer for component */ + int nlink = 0; + int error = 0; + int dino, cino; + struct nfs_file *cdp = NULL; + + *cfile = roothandle; /* structure copy - start at the root. */ + dino = root_inum; +begin: + /* + * Each time we begin a new name interpretation (e.g. + * when first called and after each symbolic link is + * substituted), we allow the search to start at the + * root directory if the name starts with a '/', otherwise + * continuing from the current directory. + */ + component[0] = '\0'; + if (pn_peekchar(pnp) == '/') { + if (!needroothandle) + *cfile = roothandle; + dino = root_inum; + pn_skipslash(pnp); + } + +next: + /* + * Make sure we have a directory. + */ + if (!cfile_is_dir(cfile)) { + error = ENOTDIR; + goto bad; + } + /* + * Process the next component of the pathname. + */ + error = pn_stripcomponent(pnp, component); + if (error) + goto bad; + + /* + * Check for degenerate name (e.g. / or "") + * which is a way of talking about a directory, + * e.g. "/." or ".". + */ + if (component[0] == '\0') + return (0); + + /* + * Handle "..": two special cases. + * 1. If at root directory (e.g. after chroot) + * then ignore it so can't get out. + * 2. If this vnode is the root of a mounted + * file system, then replace it with the + * vnode which was mounted on so we take the + * .. in the other file system. + */ + if (strcmp(component, "..") == 0) { + if (cfile == &roothandle) + goto skip; + } + + /* + * Perform a lookup in the current directory. + * We create a simple negative lookup cache by storing + * inode -1 to indicate file not found. + */ + cino = get_dcache(mac_get_dev(), component, dino); + if (cino == -1) + return (ENOENT); +#ifdef DEBUG + dprintf("lookup: component %s pathleft %s\n", component, pnp->pn_path); +#endif + if ((cino == 0) || + ((cdp = (struct nfs_file *)get_icache(mac_get_dev(), cino)) == + 0)) { + struct nfs_file *lkp; + + /* + * If an RPC error occurs, error is not changed, + * else it is the NFS error if NULL is returned. + */ + error = -1; + switch (cfile->version) { + case NFS_VERSION: + lkp = nfslookup(cfile, component, &error); + break; + case NFS_V3: + lkp = nfs3lookup(cfile, component, &error); + break; + case NFS_V4: + lkp = nfs4lookup(cfile, component, &error); + break; + default: + printf("lookup: NFS Version %d not supported\n", + cfile->version); + lkp = NULL; + break; + } + + /* + * Check for RPC error + */ + if (error == -1) { + printf("lookup: lookup RPC error\n"); + return (error); + } + + /* + * Check for NFS error + */ + if (lkp == NULL) { + if ((error != NFSERR_NOENT) && + (error != NFS3ERR_NOENT) && + (error != NFS4ERR_NOENT)) { +#ifdef DEBUG + dprintf("lookup: lkp is NULL with error %d\n", error); +#endif + return (error); + } +#ifdef DEBUG + dprintf("lookup: lkp is NULL with error %d\n", error); +#endif + /* + * File not found so set cached inode to -1 + */ + set_dcache(mac_get_dev(), component, dino, -1); + return (error); + } + + if (cdp = (struct nfs_file *) + bkmem_alloc(sizeof (struct nfs_file))) { + /* + * Save this entry in cache for next time ... + */ + if (!cino) + cino = ++next_inum; + *cdp = *lkp; + + set_dcache(mac_get_dev(), component, dino, cino); + set_icache(mac_get_dev(), cino, cdp, + sizeof (struct nfs_file)); + } else { + /* + * Out of memory, clear cache keys so we don't get + * confused later. + */ + cino = 0; + cdp = lkp; + } + } + dino = cino; + + /* + * If we hit a symbolic link and there is more path to be + * translated or this operation does not wish to apply + * to a link, then place the contents of the link at the + * front of the remaining pathname. + */ + if (cfile_is_lnk(cdp)) { + struct pathname linkpath; + static char path_tmp[NFS_MAXPATHLEN]; /* used for symlinks */ + char *pathp; + + linkpath.pn_buf = &path_tmp[0]; + + nlink++; + if (nlink > MAXSYMLINKS) { + error = ELOOP; + goto bad; + } + switch (cdp->version) { + case NFS_VERSION: + error = nfsgetsymlink(cdp, &pathp); + break; + case NFS_V3: + error = nfs3getsymlink(cdp, &pathp); + break; + case NFS_V4: + error = nfs4getsymlink(cdp, &pathp); + break; + default: + printf("getsymlink: NFS Version %d not supported\n", + cdp->version); + error = ENOTSUP; + break; + } + + if (error) + goto bad; + + pn_get(pathp, &linkpath); + + if (pn_pathleft(&linkpath) == 0) + (void) pn_set(&linkpath, "."); + error = pn_combine(pnp, &linkpath); /* linkpath before pn */ + if (error) + goto bad; + goto begin; + } + + if (needroothandle) { + roothandle = *cdp; + needroothandle = FALSE; + } + *cfile = *cdp; + +skip: + /* + * Skip to next component of the pathname. + * If no more components, return last directory (if wanted) and + * last component (if wanted). + */ + if (pn_pathleft(pnp) == 0) { + (void) pn_set(pnp, component); + return (0); + } + /* + * skip over slashes from end of last component + */ + pn_skipslash(pnp); + goto next; +bad: + /* + * Error. + */ + return (error); +} diff --git a/usr/src/stand/lib/fs/nfs/mount.c b/usr/src/stand/lib/fs/nfs/mount.c new file mode 100644 index 0000000000..89d5c2f71a --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/mount.c @@ -0,0 +1,803 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/utsname.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <rpc/types.h> +#include <rpc/auth.h> +#include <sys/t_lock.h> +#include <netdb.h> +#include "clnt.h" +#include <rpc/xdr.h> +#include <rpc/rpc_msg.h> +#include <rpc/rpc.h> +#include "brpc.h" +#include "auth_inet.h" +#include "pmap.h" +#include <rpcsvc/nfs_prot.h> +#include <rpcsvc/nfs4_prot.h> +#include "nfs_inet.h" +#include <rpcsvc/bootparam.h> +#include <dhcp_impl.h> +#include <rpcsvc/mount.h> +#include <sys/promif.h> +#include <sys/salib.h> +#include "socket_inet.h" +#include "ipv4.h" +#include "mac.h" +#include <sys/bootdebug.h> +#include <errno.h> +#include "dhcpv4.h" +#include <sys/mntent.h> + +struct nfs_file roothandle; /* root file handle */ +static char root_hostname[SYS_NMLN]; /* server hostname */ +static char my_hostname[MAXHOSTNAMELEN]; +static char root_pathbuf[NFS_MAXPATHLEN]; /* the root's path */ +static char root_boot_file[NFS_MAXPATHLEN]; /* optional boot file */ +static struct sockaddr_in root_to; /* server sock ip */ + /* in network order */ +CLIENT *root_CLIENT = NULL; /* CLIENT handle */ +int dontroute = FALSE; /* In case rarp/bootparams was selected */ +char rootopts[MAX_PATH_LEN]; +static gid_t fake_gids = 1; /* fake gids list for auth_unix */ + +extern void set_default_filename(char *); /* boot.c */ + +/* + * xdr routines used by mount. + */ + +bool_t +xdr_fhstatus(XDR *xdrs, struct fhstatus *fhsp) +{ + if (!xdr_int(xdrs, (int *)&fhsp->fhs_status)) + return (FALSE); + if (fhsp->fhs_status == 0) { + return (xdr_fhandle(xdrs, fhsp->fhstatus_u.fhs_fhandle)); + } + return (TRUE); +} + +bool_t +xdr_fhandle(XDR *xdrs, fhandle fhp) +{ + return (xdr_opaque(xdrs, (char *)fhp, NFS_FHSIZE)); +} + +bool_t +xdr_path(XDR *xdrs, char **pathp) +{ + return (xdr_string(xdrs, pathp, MNTPATHLEN)); +} + +bool_t +xdr_fhandle3(XDR *xdrs, fhandle3 *objp) +{ + return (xdr_bytes(xdrs, (char **)&objp->fhandle3_val, + (uint_t *)&objp->fhandle3_len, FHSIZE3)); +} + +bool_t +xdr_mountstat3(XDR *xdrs, mountstat3 *objp) +{ + return (xdr_enum(xdrs, (enum_t *)objp)); +} + +bool_t +xdr_mountres3_ok(XDR *xdrs, mountres3_ok *objp) +{ + if (!xdr_fhandle3(xdrs, &objp->fhandle)) + return (FALSE); + return (xdr_array(xdrs, (char **)&objp->auth_flavors.auth_flavors_val, + (uint_t *)&objp->auth_flavors.auth_flavors_len, ~0, + sizeof (int), (xdrproc_t)xdr_int)); +} + +bool_t +xdr_mountres3(XDR *xdrs, mountres3 *objp) +{ + if (!xdr_mountstat3(xdrs, &objp->fhs_status)) + return (FALSE); + if (objp->fhs_status == MNT_OK) + return (xdr_mountres3_ok(xdrs, &objp->mountres3_u.mountinfo)); + return (TRUE); +} + +static int +nfsmountroot(char *path, struct nfs_file *filep) +{ + int rexmit; + int resp_wait; + enum clnt_stat status; + struct fhstatus root_tmp; /* to pass to rpc/xdr */ + + /* + * Wait up to 16 secs for first response, retransmitting expon. + */ + rexmit = 0; /* default retransmission interval */ + resp_wait = 16; + + do { + status = brpc_call((rpcprog_t)MOUNTPROG, (rpcvers_t)MOUNTVERS, + (rpcproc_t)MOUNTPROC_MNT, xdr_path, (caddr_t)&path, + xdr_fhstatus, (caddr_t)&(root_tmp), rexmit, resp_wait, + &root_to, NULL, AUTH_UNIX); + if (status == RPC_TIMEDOUT) { + dprintf("boot: %s:%s mount server not responding.\n", + root_hostname, path); + } + rexmit = resp_wait; + resp_wait = 0; /* use default wait time. */ + } while (status == RPC_TIMEDOUT); + + if ((status != RPC_SUCCESS) || (root_tmp.fhs_status != 0)) { + nfs_error(root_tmp.fhs_status); + root_to.sin_port = 0; + return (-1); + } + + /* + * Since the mount succeeded, we'll mark the filep's + * status as NFS_OK, and its type as NFDIR. If these + * points aren't the case, then we wouldn't be here. + */ + bcopy(&root_tmp.fhstatus_u.fhs_fhandle, &filep->fh.fh2, FHSIZE); + filep->ftype.type2 = NFDIR; + filep->version = NFS_VERSION; + nfs_readsize = nfs_readsize < NFS_MAXDATA ? nfs_readsize : NFS_MAXDATA; + /* + * Set a reasonable lower limit on readsize + */ + nfs_readsize = (nfs_readsize != 0 && nfs_readsize < 512) ? + 512 : nfs_readsize; + return (0); +} + +int +setup_root_vars(void) +{ + size_t buflen; + uint16_t readsize; + + /* + * Root server name. Required. + */ + buflen = sizeof (root_hostname); + if (dhcp_getinfo(DSYM_VENDOR, VS_NFSMNT_ROOTSRVR_NAME, 0, + root_hostname, &buflen)) { + root_hostname[buflen] = '\0'; + } else { + dprintf("BOUND: Missing Root Server Name Option\n"); + errno = EINVAL; + return (-1); + } + + /* + * Root server IP. Required. + */ + buflen = sizeof (root_to.sin_addr); + if (!dhcp_getinfo(DSYM_VENDOR, VS_NFSMNT_ROOTSRVR_IP, 0, + &root_to.sin_addr, &buflen)) { + dprintf("BOUND: Missing Root Server IP Option\n"); + errno = EINVAL; + return (-1); + } + + /* + * Root path Required. + */ + buflen = sizeof (root_pathbuf); + if (dhcp_getinfo(DSYM_VENDOR, VS_NFSMNT_ROOTPATH, 0, + root_pathbuf, &buflen)) { + root_pathbuf[buflen] = '\0'; + } else { + dprintf("BOUND: Missing Root Path Option\n"); + errno = EINVAL; + return (-1); + } + + /* + * Optional Bootfile path. + */ + buflen = sizeof (root_boot_file); + if (dhcp_getinfo(DSYM_VENDOR, VS_NFSMNT_BOOTFILE, 0, + root_boot_file, &buflen)) { + root_boot_file[buflen] = '\0'; + dprintf("BOUND: Optional Boot File is: %s\n", root_boot_file); + } + + /* if we got a boot file name, use it as the default */ + if (root_boot_file[0] != '\0') + set_default_filename(root_boot_file); + + /* + * Set the NFS read size. The mount code will adjust it to + * the maximum size. + */ + buflen = sizeof (readsize); + if (dhcp_getinfo(DSYM_VENDOR, VS_BOOT_NFS_READSIZE, 0, + &readsize, &buflen)) { + nfs_readsize = ntohs(readsize); + if (boothowto & RB_VERBOSE) { + printf("Boot NFS read size: %d\n", nfs_readsize); + } + } + + /* + * Optional rootopts. + */ + buflen = sizeof (rootopts); + if (dhcp_getinfo(DSYM_VENDOR, VS_NFSMNT_ROOTOPTS, 0, + rootopts, &buflen)) { + rootopts[buflen] = '\0'; + dprintf("BOUND: Optional Rootopts is: %s\n", rootopts); + } + + return (0); +} + +static void +mnt3_error(enum mountstat3 status) +{ + if (!(boothowto & RB_DEBUG)) + return; + + switch (status) { + case MNT_OK: + printf("Mount: No error.\n"); + break; + case MNT3ERR_PERM: + printf("Mount: Not owner.\n"); + break; + case MNT3ERR_NOENT: + printf("Mount: No such file or directory.\n"); + break; + case MNT3ERR_IO: + printf("Mount: I/O error.\n"); + break; + case MNT3ERR_ACCES: + printf("Mount: Permission denied.\n"); + break; + case MNT3ERR_NOTDIR: + printf("Mount: Not a directory.\n"); + break; + case MNT3ERR_INVAL: + printf("Mount: Invalid argument.\n"); + break; + case MNT3ERR_NAMETOOLONG: + printf("Mount: File name too long.\n"); + break; + case MNT3ERR_NOTSUPP: + printf("Mount: Operation not supported.\n"); + break; + case MNT3ERR_SERVERFAULT: + printf("Mount: Server fault.\n"); + break; + default: + printf("Mount: unknown error.\n"); + break; + } +} + +static int +nfs3mountroot(char *path, struct nfs_file *filep) +{ + int rexmit; + int resp_wait; + struct mountres3 res3; + enum clnt_stat status; + + /* + * Wait up to 16 secs for first response, retransmitting expon. + */ + rexmit = 0; /* default retransmission interval */ + resp_wait = 16; + + /* + * Try to mount using V3 + */ + do { + bzero(&res3, sizeof (struct mountres3)); + + status = brpc_call((rpcprog_t)MOUNTPROG, (rpcvers_t)MOUNTVERS3, + (rpcproc_t)MOUNTPROC_MNT, xdr_path, (caddr_t)&path, + xdr_mountres3, (caddr_t)&res3, rexmit, resp_wait, + &root_to, NULL, AUTH_UNIX); + + if (status != RPC_TIMEDOUT) + break; + + dprintf("boot: %s:%s mount server not responding.\n", + root_hostname, path); + + rexmit = resp_wait; + resp_wait = 0; /* use default wait time. */ + + xdr_free(xdr_mountres3, (caddr_t)&res3); + } while (status == RPC_TIMEDOUT); + + if ((status != RPC_SUCCESS) || (res3.fhs_status != MNT_OK)) { + mnt3_error(res3.fhs_status); + root_to.sin_port = 0; + return (-1); + } + + /* + * Since the mount succeeded, we'll mark the filep's + * status as NFS_OK, and its type as NF3DIR. If these + * points aren't the case, then we wouldn't be here. + */ + filep->fh.fh3.len = res3.mountres3_u.mountinfo.fhandle.fhandle3_len; + bcopy(res3.mountres3_u.mountinfo.fhandle.fhandle3_val, + filep->fh.fh3.data, + filep->fh.fh3.len); + filep->ftype.type3 = NF3DIR; + filep->version = NFS_V3; + /* + * Hardwire in a known reasonable upper limit of 32K + */ + nfs_readsize = nfs_readsize < 32 * 1024 ? nfs_readsize : 32 * 1024; + /* + * Set a reasonable lower limit on readsize + */ + nfs_readsize = (nfs_readsize != 0 && nfs_readsize < 512) ? + 512 : nfs_readsize; + xdr_free(xdr_mountres3, (caddr_t)&res3); + return (0); +} + +/* + * Setup v4 client for inetboot + */ +static int +nfs4init(char *path, uint16_t nfs_port) +{ + struct timeval wait; + int fd = -1; + int error = 0; + enum clnt_stat rpc_stat; + struct nfs_file rootpath; + + wait.tv_sec = RPC_RCVWAIT_MSEC / 1000; + wait.tv_usec = 0; + + /* + * If we haven't explicitly set the port number, set to the standard + * 2049 and don't cause a rpcbind request. + */ + if (nfs_port == 0) + nfs_port = 2049; + + root_to.sin_port = htons(nfs_port); + + /* + * Support TCP only + */ + root_CLIENT = clntbtcp_create(&root_to, NFS_PROGRAM, + NFS_V4, wait, &fd, + NFS4BUF_SIZE, NFS4BUF_SIZE); + + if (root_CLIENT == NULL) { + root_to.sin_port = 0; + return (-1); + } + + root_CLIENT->cl_auth = + authunix_create(my_hostname, 0, 1, 1, &fake_gids); + + /* + * Send NULL proc the server first to see if V4 exists + */ + rpc_stat = CLNT_CALL(root_CLIENT, NFSPROC4_NULL, xdr_void, NULL, + xdr_void, NULL, wait); + + if (rpc_stat != RPC_SUCCESS) { + dprintf("boot: NULL proc failed NFSv4 service not available\n"); + AUTH_DESTROY(root_CLIENT->cl_auth); + CLNT_DESTROY(root_CLIENT); + root_to.sin_port = 0; + return (-1); + } + + /* + * Do a lookup to get to the root_path. This is nice since it can + * handle multicomponent lookups. + */ + roothandle.version = NFS_V4; + roothandle.ftype.type4 = NF4DIR; + roothandle.fh.fh4.len = 0; /* Force a PUTROOTFH */ + roothandle.offset = (uint_t)0; /* it's a directory! */ + error = lookup(path, &rootpath, TRUE); + + if (error) { + printf("boot: lookup %s failed\n", path); + return (-1); + } + roothandle = rootpath; /* structure copy */ + + /* + * Hardwire in a known reasonable upper limit of 32K + */ + nfs_readsize = nfs_readsize < 32 * 1024 ? nfs_readsize : 32 * 1024; + /* + * Set a reasonable lower limit on readsize + */ + nfs_readsize = (nfs_readsize != 0 && nfs_readsize < 512) ? + 512 : nfs_readsize; + + return (0); +} + +static int +atoi(const char *p) +{ + int n; + int c, neg = 0; + + if (!isdigit(c = *p)) { + while (c == ' ' || c == '\t' || c == '\n') + c = *++p; + switch (c) { + case '-': + neg++; + /* FALLTHROUGH */ + case '+': + c = *++p; + } + if (!isdigit(c)) + return (0); + } + for (n = '0' - c; isdigit(c = *++p); ) { + n *= 10; /* two steps to avoid unnecessary overflow */ + n += '0' - c; /* accum neg to avoid surprises at MAX */ + } + return (neg ? n : -n); +} + +/* + * Parse suboptions from a string. + * Same as getsubopt(3C). + */ +static int +getsubopt(char **optionsp, char * const *tokens, char **valuep) +{ + char *s = *optionsp, *p; + int i; + size_t optlen; + + *valuep = NULL; + if (*s == '\0') + return (-1); + p = strchr(s, ','); /* find next option */ + if (p == NULL) { + p = s + strlen(s); + } else { + *p++ = '\0'; /* mark end and point to next */ + } + *optionsp = p; /* point to next option */ + p = strchr(s, '='); /* find value */ + if (p == NULL) { + optlen = strlen(s); + *valuep = NULL; + } else { + optlen = p - s; + *valuep = ++p; + } + for (i = 0; tokens[i] != NULL; i++) { + if ((optlen == strlen(tokens[i])) && + (strncmp(s, tokens[i], optlen) == 0)) + return (i); + } + /* no match, point value at option and return error */ + *valuep = s; + return (-1); +} + +/* + * The only interesting NFS mount options for initiating the kernel + * all others are ignored. + */ +static char *optlist[] = { +#define OPT_RSIZE 0 + MNTOPT_RSIZE, +#define OPT_TIMEO 1 + MNTOPT_TIMEO, +#define OPT_VERS 2 + MNTOPT_VERS, +#define OPT_PROTO 3 + MNTOPT_PROTO, +#define OPT_PORT 4 + MNTOPT_PORT, + NULL +}; + +/* + * This routine will open a device as it is known by the V2 OBP. It + * then goes thru the stuff necessary to initialize the network device, + * get our network parameters, (using DHCP or rarp/bootparams), and + * finally actually go and get the root filehandle. Sound like fun? + * Suuurrrree. Take a look. + * + * Returns 0 if things worked. -1 if we crashed and burned. + */ +int +boot_nfs_mountroot(char *str) +{ + int status; + enum clnt_stat rpc_stat; + char *root_path = &root_pathbuf[0]; /* to make XDR happy */ + struct timeval wait; + int fd; + int bufsize; + char *opts, *val; + int nfs_version = 0; + int istcp = 1; + int nfs_port = 0; /* Cause pmap to get port */ + struct sockaddr_in tmp_addr; /* throw away */ + + if (root_CLIENT != NULL) { + AUTH_DESTROY(root_CLIENT->cl_auth); + CLNT_DESTROY(root_CLIENT); + root_CLIENT = NULL; + } + + root_to.sin_family = AF_INET; + root_to.sin_addr.s_addr = htonl(INADDR_ANY); + root_to.sin_port = htons(0); + + mac_init(str); + + (void) ipv4_setpromiscuous(TRUE); + + if (get_netconfig_strategy() == NCT_BOOTP_DHCP) { + if (boothowto & RB_VERBOSE) + printf("Using BOOTP/DHCP...\n"); + if (dhcp() != 0 || setup_root_vars() != 0) { + (void) ipv4_setpromiscuous(FALSE); + if (boothowto & RB_VERBOSE) + printf("BOOTP/DHCP configuration failed!\n"); + return (-1); + } + + /* now that we have an IP address, turn off promiscuous mode */ + (void) ipv4_setpromiscuous(FALSE); + } else { + /* Use RARP/BOOTPARAMS. RARP will try forever... */ + if (boothowto & RB_VERBOSE) + printf("Using RARP/BOOTPARAMS...\n"); + mac_call_rarp(); + + /* + * Since there is no way to determine our netmask, and therefore + * figure out if the router we got is useful, we assume all + * services are local. Use DHCP if this bothers you. + */ + dontroute = TRUE; + + /* now that we have an IP address, turn off promiscuous mode */ + (void) ipv4_setpromiscuous(FALSE); + + /* get our hostname */ + if (whoami() == FALSE) + return (-1); + + /* get our bootparams. */ + if (getfile("root", root_hostname, &root_to.sin_addr, + root_pathbuf) == FALSE) + return (-1); + + /* get our rootopts. */ + (void) getfile("rootopts", root_hostname, &tmp_addr.sin_addr, + rootopts); + } + + /* mount root */ + if (boothowto & RB_VERBOSE) { + printf("root server: %s (%s)\n", root_hostname, + inet_ntoa(root_to.sin_addr)); + printf("root directory: %s\n", root_pathbuf); + } + + /* + * Assumes we've configured the stack and thus know our + * IP address/hostname, either by using DHCP or rarp/bootparams. + */ + gethostname(my_hostname, sizeof (my_hostname)); + + wait.tv_sec = RPC_RCVWAIT_MSEC / 1000; + wait.tv_usec = 0; + + /* + * Parse out the interesting root options, if an invalid + * or unknown option is provided, silently ignore it and + * use the defaults. + */ + opts = rootopts; + while (*opts) { + int ival; + switch (getsubopt(&opts, optlist, &val)) { + case OPT_RSIZE: + if (val == NULL || !isdigit(*val)) + break; + nfs_readsize = atoi(val); + break; + case OPT_TIMEO: + if (val == NULL || !isdigit(*val)) + break; + ival = atoi(val); + wait.tv_sec = ival / 10; + wait.tv_usec = (ival % 10) * 100000; + break; + case OPT_VERS: + if (val == NULL || !isdigit(*val)) + break; + nfs_version = atoi(val); + break; + case OPT_PROTO: + if (val == NULL || isdigit(*val)) + break; + if ((strncmp(val, "udp", 3) == 0)) + istcp = 0; + else + istcp = 1; /* must be tcp */ + break; + case OPT_PORT: + if (val == NULL || !isdigit(*val)) + break; + nfs_port = atoi(val); + + /* + * Currently nfs_dlinet.c doesn't support setting + * the root NFS port. Delete this when it does. + */ + nfs_port = 0; + break; + default: + /* + * Unknown options are silently ignored + */ + break; + } + } + + /* + * If version is set, then try that version first. + */ + switch (nfs_version) { + case NFS_VERSION: + if (nfsmountroot(root_path, &roothandle) == 0) + goto domount; + break; + case NFS_V3: + if (nfs3mountroot(root_path, &roothandle) == 0) + goto domount; + break; + case NFS_V4: + /* + * With v4 we skip the mount and go straight to + * setting the root filehandle. Because of this we + * do things slightly differently and obtain our + * client handle first. + */ + if (istcp && nfs4init(root_path, nfs_port) == 0) { + /* + * If v4 init succeeded then we are done. Just return. + */ + return (0); + } + } + + /* + * If there was no chosen version or the chosen version failed + * try all versions in order, this may still fail to boot + * at the kernel level if the options are not right, but be + * generous at this early stage. + */ + if (istcp && nfs4init(root_path, nfs_port) == 0) { + /* + * If v4 init succeeded then we are done. Just return. + */ + return (0); + } + + if (nfs3mountroot(root_path, &roothandle) == 0) + goto domount; + + if ((status = nfsmountroot(root_path, &roothandle)) != 0) + return (status); + +domount: + /* + * Only v2 and v3 go on from here. + */ + roothandle.offset = (uint_t)0; /* it's a directory! */ + root_to.sin_port = htons(nfs_port); /* NFS is next after mount */ + + /* + * Create the CLIENT handle for NFS operations + */ + if (roothandle.version == NFS_VERSION) + bufsize = NFSBUF_SIZE; + else + bufsize = NFS3BUF_SIZE; + + /* + * First try TCP then UDP (unless UDP asked for explicitly), if mountd + * alows this version but neither transport is available we are stuck. + */ + if (istcp) { + fd = -1; + root_CLIENT = clntbtcp_create(&root_to, NFS_PROGRAM, + roothandle.version, wait, &fd, bufsize, bufsize); + if (root_CLIENT != NULL) { + root_CLIENT->cl_auth = + authunix_create(my_hostname, 0, 1, 1, &fake_gids); + /* + * Send NULL proc, check if the server really exists + */ + rpc_stat = CLNT_CALL(root_CLIENT, 0, + xdr_void, NULL, xdr_void, NULL, wait); + + if (rpc_stat == RPC_SUCCESS) + return (0); + + AUTH_DESTROY(root_CLIENT->cl_auth); + CLNT_DESTROY(root_CLIENT); + root_CLIENT = NULL; + } + /* Fall through to UDP case */ + } + + fd = -1; + root_CLIENT = clntbudp_bufcreate(&root_to, NFS_PROGRAM, + roothandle.version, wait, &fd, bufsize, bufsize); + if (root_CLIENT == NULL) + return (-1); + + root_CLIENT->cl_auth = + authunix_create(my_hostname, 0, 1, 1, &fake_gids); + /* + * Send NULL proc, check if the server really exists + */ + rpc_stat = CLNT_CALL(root_CLIENT, 0, + xdr_void, NULL, xdr_void, NULL, wait); + + if (rpc_stat == RPC_SUCCESS) + return (0); + + AUTH_DESTROY(root_CLIENT->cl_auth); + CLNT_DESTROY(root_CLIENT); + root_CLIENT = NULL; + return (-1); +} diff --git a/usr/src/stand/lib/fs/nfs/nfs2ops.c b/usr/src/stand/lib/fs/nfs/nfs2ops.c new file mode 100644 index 0000000000..78aefb6404 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs2ops.c @@ -0,0 +1,377 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Simple nfs ops - open, close, read, and lseek. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <sys/fcntl.h> +#include <sys/vfs.h> +#include <errno.h> +#include <sys/promif.h> +#include <rpc/xdr.h> +#include "nfs_inet.h" +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootdebug.h> +#include <sys/salib.h> +#include <sys/sacache.h> +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs_prot.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +static struct timeval zero_timeout = {0, 0}; /* default */ + +/* + * NFS Version 2 specific functions + */ + +ssize_t +nfsread(struct nfs_file *filep, char *buf, size_t size) +{ + readargs read_args; + readres read_res; + enum clnt_stat read_stat; + uint_t readcnt = 0; /* # bytes read by nfs */ + uint_t count = 0; /* # bytes transferred to buf */ + int done = FALSE; /* last block has come in */ + int framing_errs = 0; /* stack errors */ + char *buf_offset; /* current buffer offset */ + struct timeval timeout; +#ifndef i386 + static uint_t pos; /* progress indicator counter */ + static char ind[] = "|/-\\"; /* progress indicator */ + static int blks_read; +#endif + + read_args.file = filep->fh.fh2; /* structure copy */ + read_args.offset = filep->offset; + buf_offset = buf; + + /* Optimize for reads of less than one block size */ + + if (nfs_readsize == 0) + nfs_readsize = READ_SIZE; + + if (size < nfs_readsize) + read_args.count = size; + else + read_args.count = nfs_readsize; + + do { + /* use the user's buffer to stuff the data into. */ + read_res.readres_u.reply.data.data_val = buf_offset; + + /* + * Handle the case where the file does not end + * on a block boundary. + */ + if ((count + read_args.count) > size) + read_args.count = size - count; + + timeout.tv_sec = NFS_REXMIT_MIN; /* Total wait for call */ + timeout.tv_usec = 0; + do { + read_stat = CLNT_CALL(root_CLIENT, NFSPROC_READ, + xdr_readargs, (caddr_t)&read_args, + xdr_readres, (caddr_t)&read_res, timeout); + + if (read_stat == RPC_TIMEDOUT) { + dprintf("NFS read(%d) timed out. Retrying...\n", + read_args.count); + /* + * If the remote is there and trying to respond, + * but our stack is having trouble reassembling + * the reply, reduce the read size in an + * attempt to compensate. Reset the + * transmission and reply wait timers. + */ + if (errno == ETIMEDOUT) + framing_errs++; + + if (framing_errs > NFS_MAX_FERRS && + read_args.count > NFS_READ_DECR) { + read_args.count -= NFS_READ_DECR; + nfs_readsize -= NFS_READ_DECR; + dprintf("NFS Read size now %d.\n", + nfs_readsize); + timeout.tv_sec = NFS_REXMIT_MIN; + framing_errs = 0; + } else { + if (timeout.tv_sec < NFS_REXMIT_MAX) + timeout.tv_sec++; + else + timeout.tv_sec = 0; + /* default RPC */ + } + } + } while (read_stat == RPC_TIMEDOUT); + + if (read_stat != RPC_SUCCESS) + return (-1); + + readcnt = read_res.readres_u.reply.data.data_len; + /* + * Handle the case where the file is simply empty, and + * nothing could be read. + */ + if (readcnt == 0) + break; /* eof */ + + /* + * Handle the case where the file is smaller than + * the size of the read request, thus the request + * couldn't be completely filled. + */ + if (readcnt < read_args.count) { +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("nfsread(): partial read %d" + " instead of %d\n", + readcnt, read_args.count); +#endif + done = TRUE; /* update the counts and exit */ + } + + /* update various offsets */ + count += readcnt; + filep->offset += readcnt; + buf_offset += readcnt; + read_args.offset += readcnt; +#ifndef i386 + /* + * round and round she goes (though not on every block.. + * - OBP's take a fair bit of time to actually print stuff) + */ + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); +#endif + } while (count < size && !done); + + return (count); +} + +static vtype_t nf_to_vt[] = { + VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK +}; + +int +nfsgetattr(struct nfs_file *nfp, struct vattr *vap) +{ + enum clnt_stat getattr_stat; + attrstat getattr_res; + fattr *na; + struct timeval timeout = {0, 0}; /* default */ + + getattr_stat = CLNT_CALL(root_CLIENT, NFSPROC_GETATTR, + xdr_nfs_fh, (caddr_t)&(nfp->fh.fh2), + xdr_attrstat, (caddr_t)&getattr_res, timeout); + + if (getattr_stat != RPC_SUCCESS) { + dprintf("nfs_getattr: RPC error %d\n", getattr_stat); + return (-1); + } + if (getattr_res.status != NFS_OK) { + nfs_error(getattr_res.status); + return (getattr_res.status); + } + + /* adapted from nattr_to_vattr() in nfs_client.c */ + + na = &getattr_res.attrstat_u.attributes; + if (vap->va_mask & AT_TYPE) { + if (na->type < NFNON || na->type > NFSOCK) + vap->va_type = VBAD; + else + vap->va_type = nf_to_vt[na->type]; + } + if (vap->va_mask & AT_MODE) + vap->va_mode = na->mode; + if (vap->va_mask & AT_SIZE) + vap->va_size = na->size; + if (vap->va_mask & AT_NODEID) + vap->va_nodeid = na->fileid; + if (vap->va_mask & AT_ATIME) { + vap->va_atime.tv_sec = na->atime.seconds; + vap->va_atime.tv_nsec = na->atime.useconds * 1000; + } + if (vap->va_mask & AT_CTIME) { + vap->va_ctime.tv_sec = na->ctime.seconds; + vap->va_ctime.tv_nsec = na->ctime.useconds * 1000; + } + if (vap->va_mask & AT_MTIME) { + vap->va_mtime.tv_sec = na->mtime.seconds; + vap->va_mtime.tv_nsec = na->mtime.useconds * 1000; + } + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("nfs_getattr(): done.\n"); +#endif + return (getattr_res.status); +} + +/* + * Display nfs error messages. + */ +/*ARGSUSED*/ +void +nfs_error(enum nfsstat status) +{ + if (!(boothowto & RB_DEBUG)) + return; + + switch (status) { + case NFSERR_PERM: + printf("NFS: Not owner.\n"); + break; + case NFSERR_NOENT: +#ifdef NFS_OPS_DEBUG + printf("NFS: No such file or directory.\n"); +#endif /* NFS_OPS_DEBUG */ + break; + case NFSERR_IO: + printf("NFS: IO ERROR occurred on NFS server.\n"); + break; + case NFSERR_NXIO: + printf("NFS: No such device or address.\n"); + break; + case NFSERR_ACCES: + printf("NFS: Permission denied.\n"); + break; + case NFSERR_EXIST: + printf("NFS: File exists.\n"); + break; + case NFSERR_NODEV: + printf("NFS: No such device.\n"); + break; + case NFSERR_NOTDIR: + printf("NFS: Not a directory.\n"); + break; + case NFSERR_ISDIR: + printf("NFS: Is a directory.\n"); + break; + case NFSERR_FBIG: + printf("NFS: File too large.\n"); + break; + case NFSERR_NOSPC: + printf("NFS: No space left on device.\n"); + break; + case NFSERR_ROFS: + printf("NFS: Read-only filesystem.\n"); + break; + case NFSERR_NAMETOOLONG: + printf("NFS: File name too long.\n"); + break; + case NFSERR_NOTEMPTY: + printf("NFS: Directory not empty.\n"); + break; + case NFSERR_DQUOT: + printf("NFS: Disk quota exceeded.\n"); + break; + case NFSERR_STALE: + printf("NFS: Stale file handle.\n"); + break; + case NFSERR_WFLUSH: + printf("NFS: server's write cache has been flushed.\n"); + break; + default: + printf("NFS: unknown error.\n"); + break; + } +} + +struct nfs_file * +nfslookup(struct nfs_file *dir, char *name, int *nstat) +{ + static struct nfs_file cd; + diropargs dirop; + diropres res_lookup; + enum clnt_stat status; + + *nstat = (int)NFS_OK; + + bcopy(&dir->fh.fh2, &dirop.dir, NFS_FHSIZE); + dirop.name = name; + + status = CLNT_CALL(root_CLIENT, NFSPROC_LOOKUP, xdr_diropargs, + (caddr_t)&dirop, xdr_diropres, (caddr_t)&res_lookup, + zero_timeout); + if (status != RPC_SUCCESS) { + dprintf("lookup: RPC error.\n"); + return (NULL); + } + if (res_lookup.status != NFS_OK) { + nfs_error(res_lookup.status); + *nstat = (int)res_lookup.status; + return (NULL); + } + + bzero((caddr_t)&cd, sizeof (struct nfs_file)); + cd.version = NFS_VERSION; + cd.ftype.type2 = res_lookup.diropres_u.diropres.attributes.type; + bcopy(&res_lookup.diropres_u.diropres.file, &cd.fh.fh2, NFS_FHSIZE); + return (&cd); +} + +/* + * Gets symbolic link into pathname. + */ +int +nfsgetsymlink(struct nfs_file *cfile, char **path) +{ + enum clnt_stat status; + struct readlinkres linkres; + static char symlink_path[NFS_MAXPATHLEN]; + + /* + * linkres needs a zeroed buffer to place path data into: + */ + bzero(symlink_path, NFS_MAXPATHLEN); + linkres.readlinkres_u.data = &symlink_path[0]; + + status = CLNT_CALL(root_CLIENT, NFSPROC_READLINK, + xdr_nfs_fh, (caddr_t)&cfile->fh.fh2, + xdr_readlinkres, (caddr_t)&linkres, zero_timeout); + if (status != RPC_SUCCESS) { + dprintf("nfsgetsymlink: RPC call failed.\n"); + return (-1); + } + if (linkres.status != NFS_OK) { + nfs_error(linkres.status); + return (linkres.status); + } + + *path = linkres.readlinkres_u.data; + + return (NFS_OK); +} diff --git a/usr/src/stand/lib/fs/nfs/nfs3_xdr.c b/usr/src/stand/lib/fs/nfs/nfs3_xdr.c new file mode 100644 index 0000000000..f9e3a4f137 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs3_xdr.c @@ -0,0 +1,284 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/xdr.h> +#include <rpc/rpc.h> +#include <rpcsvc/nfs_prot.h> + +/* + * Xdr routines for NFS ops. + */ + +static bool_t +xdr_b_nfs_fh3(XDR *xdrs, nfs_fh3 *objp) +{ + return (xdr_bytes(xdrs, (char **)&objp->data.data_val, + (uint_t *)&objp->data.data_len, NFS3_FHSIZE)); +} + +static bool_t +xdr_b_fattr3(XDR *xdrs, fattr3 *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->type)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->mode)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->nlink)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->uid)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->gid)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->size)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->used)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->rdev.specdata1)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->rdev.specdata2)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->fsid)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->fileid)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->atime.seconds)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->atime.nseconds)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->mtime.seconds)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->mtime.nseconds)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->ctime.seconds)) + return (FALSE); + return (xdr_u_int(xdrs, &objp->ctime.nseconds)); +} + +static bool_t +xdr_b_post_op_attr(XDR *xdrs, post_op_attr *objp) +{ + if (!xdr_bool(xdrs, &objp->attributes_follow)) + return (FALSE); + switch (objp->attributes_follow) { + case TRUE: + return (xdr_b_fattr3(xdrs, &objp->post_op_attr_u.attributes)); + case FALSE: + return (TRUE); + default: + return (FALSE); + } +} + +static bool_t +xdr_b_diropargs3(XDR *xdrs, diropargs3 *objp) +{ + if (!xdr_b_nfs_fh3(xdrs, &objp->dir)) + return (FALSE); + return (xdr_string(xdrs, &objp->name, ~0)); +} + +bool_t +xdr_GETATTR3args(XDR *xdrs, GETATTR3args *objp) +{ + return (xdr_b_nfs_fh3(xdrs, &objp->object)); +} + +static bool_t +xdr_b_GETATTR3resok(XDR *xdrs, GETATTR3resok *objp) +{ + return (xdr_b_fattr3(xdrs, &objp->obj_attributes)); +} + +bool_t +xdr_GETATTR3res(XDR *xdrs, GETATTR3res *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->status)) + return (FALSE); + if (objp->status == NFS3_OK) + return (xdr_b_GETATTR3resok(xdrs, &objp->GETATTR3res_u.resok)); + return (TRUE); +} + +bool_t +xdr_LOOKUP3args(XDR *xdrs, LOOKUP3args *objp) +{ + return (xdr_b_diropargs3(xdrs, &objp->what)); +} + +static bool_t +xdr_b_LOOKUP3resok(XDR *xdrs, LOOKUP3resok *objp) +{ + if (!xdr_b_nfs_fh3(xdrs, &objp->object)) + return (FALSE); + if (!xdr_b_post_op_attr(xdrs, &objp->obj_attributes)) + return (FALSE); + return (xdr_b_post_op_attr(xdrs, &objp->dir_attributes)); +} + +static bool_t +xdr_b_LOOKUP3resfail(XDR *xdrs, LOOKUP3resfail *objp) +{ + return (xdr_b_post_op_attr(xdrs, &objp->dir_attributes)); +} + +bool_t +xdr_LOOKUP3res(XDR *xdrs, LOOKUP3res *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->status)) + return (FALSE); + + if (objp->status == NFS3_OK) + return (xdr_b_LOOKUP3resok(xdrs, &objp->LOOKUP3res_u.resok)); + + return (xdr_b_LOOKUP3resfail(xdrs, &objp->LOOKUP3res_u.resfail)); +} + +bool_t +xdr_READLINK3args(XDR *xdrs, READLINK3args *objp) +{ + return (xdr_b_nfs_fh3(xdrs, &objp->symlink)); +} + +static bool_t +xdr_b_READLINK3resok(XDR *xdrs, READLINK3resok *objp) +{ + if (!xdr_b_post_op_attr(xdrs, &objp->symlink_attributes)) + return (FALSE); + return (xdr_string(xdrs, &objp->data, ~0)); +} + +static bool_t +xdr_b_READLINK3resfail(XDR *xdrs, READLINK3resfail *objp) +{ + return (xdr_b_post_op_attr(xdrs, &objp->symlink_attributes)); +} + +bool_t +xdr_READLINK3res(XDR *xdrs, READLINK3res *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->status)) + return (FALSE); + if (objp->status == NFS3_OK) + return (xdr_b_READLINK3resok(xdrs, + &objp->READLINK3res_u.resok)); + return (xdr_b_READLINK3resfail(xdrs, &objp->READLINK3res_u.resfail)); +} + +bool_t +xdr_READ3args(XDR *xdrs, READ3args *objp) +{ + if (!xdr_b_nfs_fh3(xdrs, &objp->file)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->offset)) + return (FALSE); + return (xdr_u_int(xdrs, &objp->count)); +} + +static bool_t +xdr_b_READ3resok(XDR *xdrs, READ3resok *objp) +{ + if (!xdr_b_post_op_attr(xdrs, &objp->file_attributes)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->count)) + return (FALSE); + if (!xdr_bool(xdrs, &objp->eof)) + return (FALSE); + return (xdr_bytes(xdrs, (char **)&objp->data.data_val, + (uint_t *)&objp->data.data_len, ~0)); +} + +static bool_t +xdr_b_READ3resfail(XDR *xdrs, READ3resfail *objp) +{ + return (xdr_b_post_op_attr(xdrs, &objp->file_attributes)); +} + +bool_t +xdr_READ3res(XDR *xdrs, READ3res *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->status)) + return (FALSE); + if (objp->status == NFS3_OK) + return (xdr_b_READ3resok(xdrs, &objp->READ3res_u.resok)); + return (xdr_b_READ3resfail(xdrs, &objp->READ3res_u.resfail)); +} + +bool_t +xdr_READDIR3args(XDR *xdrs, READDIR3args *objp) +{ + if (!xdr_b_nfs_fh3(xdrs, &objp->dir)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->cookie)) + return (FALSE); + if (!xdr_opaque(xdrs, objp->cookieverf, NFS3_COOKIEVERFSIZE)) + return (FALSE); + return (xdr_u_int(xdrs, &objp->count)); +} + +static bool_t +xdr_b_entry3(XDR *xdrs, entry3 *objp) +{ + if (!xdr_u_longlong_t(xdrs, &objp->fileid)) + return (FALSE); + if (!xdr_string(xdrs, &objp->name, ~0)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, &objp->cookie)) + return (FALSE); + return (xdr_pointer(xdrs, (char **)&objp->nextentry, + sizeof (entry3), (xdrproc_t)xdr_b_entry3)); +} + +static bool_t +xdr_b_READDIR3resok(XDR *xdrs, READDIR3resok *objp) +{ + if (!xdr_b_post_op_attr(xdrs, &objp->dir_attributes)) + return (FALSE); + if (!xdr_opaque(xdrs, objp->cookieverf, NFS3_COOKIEVERFSIZE)) + return (FALSE); + if (!xdr_pointer(xdrs, (char **)&objp->reply.entries, + sizeof (entry3), (xdrproc_t)xdr_b_entry3)) + return (FALSE); + return (xdr_bool(xdrs, &objp->reply.eof)); +} + +static bool_t +xdr_b_READDIR3resfail(XDR *xdrs, READDIR3resfail *objp) +{ + return (xdr_b_post_op_attr(xdrs, &objp->dir_attributes)); +} + +bool_t +xdr_READDIR3res(XDR *xdrs, READDIR3res *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->status)) + return (FALSE); + if (objp->status == NFS3_OK) + return (xdr_b_READDIR3resok(xdrs, &objp->READDIR3res_u.resok)); + return (xdr_b_READDIR3resfail(xdrs, &objp->READDIR3res_u.resfail)); +} diff --git a/usr/src/stand/lib/fs/nfs/nfs3ops.c b/usr/src/stand/lib/fs/nfs/nfs3ops.c new file mode 100644 index 0000000000..dd8ad52904 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs3ops.c @@ -0,0 +1,446 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Simple nfs V3 ops + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <sys/fcntl.h> +#include <sys/vfs.h> +#include <errno.h> +#include <sys/promif.h> +#include <rpc/xdr.h> +#include "nfs_inet.h" +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootdebug.h> +#include <sys/salib.h> +#include <sys/sacache.h> +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs_prot.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * NFS Version 3 specific functions + */ + +ssize_t +nfs3read(struct nfs_file *filep, char *buf, size_t size) +{ + READ3args read_args; + READ3res read_res; + enum clnt_stat read_stat; + uint_t readcnt = 0; /* # bytes read by nfs */ + uint_t count = 0; /* # bytes transferred to buf */ + int done = FALSE; /* last block has come in */ + int framing_errs = 0; /* stack errors */ + char *buf_offset; /* current buffer offset */ + struct timeval timeout; +#ifndef i386 + static uint_t pos; /* progress indicator counter */ + static char ind[] = "|/-\\"; /* progress indicator */ + static int blks_read; +#endif + + read_args.file.data.data_len = filep->fh.fh3.len; + read_args.file.data.data_val = filep->fh.fh3.data; + read_args.offset = filep->offset; + + bzero(&read_res, sizeof (read_res)); + + buf_offset = buf; + + /* Optimize for reads of less than one block size */ + + if (nfs_readsize == 0) + nfs_readsize = READ3_SIZE; + + if (size < nfs_readsize) + read_args.count = size; + else + read_args.count = nfs_readsize; + + do { + /* use the user's buffer to stuff the data into. */ + read_res.READ3res_u.resok.data.data_val = buf_offset; + + /* + * Handle the case where the file does not end + * on a block boundary. + */ + if ((count + read_args.count) > size) + read_args.count = size - count; + + timeout.tv_sec = NFS_REXMIT_MIN; /* Total wait for call */ + timeout.tv_usec = 0; + do { + read_stat = CLNT_CALL(root_CLIENT, NFSPROC3_READ, + xdr_READ3args, (caddr_t)&read_args, + xdr_READ3res, (caddr_t)&read_res, timeout); + + if (read_stat == RPC_TIMEDOUT) { + dprintf("NFS read(%d) timed out. Retrying...\n", + read_args.count); + /* + * If the remote is there and trying to respond, + * but our stack is having trouble reassembling + * the reply, reduce the read size in an + * attempt to compensate. Reset the + * transmission and reply wait timers. + */ + if (errno == ETIMEDOUT) + framing_errs++; + + if (framing_errs > NFS_MAX_FERRS && + read_args.count > NFS_READ_DECR) { + read_args.count /= 2; + nfs_readsize /= 2; + dprintf("NFS Read size now %d.\n", + nfs_readsize); + timeout.tv_sec = NFS_REXMIT_MIN; + framing_errs = 0; + } else { + if (timeout.tv_sec < NFS_REXMIT_MAX) + timeout.tv_sec++; + else + timeout.tv_sec = 0; + /* default RPC */ + } + } + } while (read_stat == RPC_TIMEDOUT); + + if (read_stat != RPC_SUCCESS) + return (-1); + + if (read_res.status != NFS3_OK) + return (-1); + + readcnt = read_res.READ3res_u.resok.data.data_len; + /* + * If we are at EOF, update counts and exit + */ + if (read_res.READ3res_u.resok.eof == TRUE) + done = TRUE; + + /* + * Handle the case where the file is smaller than + * the size of the read request, thus the request + * couldn't be completely filled. + */ + if (readcnt < read_args.count) { +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("nfs3read(): partial read %d" + " instead of %d\n", + readcnt, read_args.count); +#endif + done = TRUE; /* update the counts and exit */ + } + + /* update various offsets */ + count += readcnt; + filep->offset += readcnt; + buf_offset += readcnt; + read_args.offset += readcnt; +#ifndef i386 + /* + * round and round she goes (though not on every block.. + * - OBP's take a fair bit of time to actually print stuff) + */ + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); +#endif + } while (count < size && !done); + + return (count); +} + +int +nfs3getattr(struct nfs_file *nfp, struct vattr *vap) +{ + enum clnt_stat getattr_stat; + GETATTR3args getattr_args; + GETATTR3res getattr_res; + fattr3 *na; + struct timeval timeout = {0, 0}; /* default */ + vtype_t nf3_to_vt[] = + { VBAD, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; + + + bzero(&getattr_args, sizeof (getattr_args)); + getattr_args.object.data.data_len = nfp->fh.fh3.len; + getattr_args.object.data.data_val = nfp->fh.fh3.data; + + bzero(&getattr_res, sizeof (getattr_res)); + + getattr_stat = CLNT_CALL(root_CLIENT, NFSPROC3_GETATTR, + xdr_GETATTR3args, (caddr_t)&getattr_args, + xdr_GETATTR3res, (caddr_t)&getattr_res, timeout); + + if (getattr_stat != RPC_SUCCESS) { + dprintf("nfs_getattr: RPC error %d\n", getattr_stat); + return (-1); + } + if (getattr_res.status != NFS3_OK) { + nfs3_error(getattr_res.status); + return (getattr_res.status); + } + + na = &getattr_res.GETATTR3res_u.resok.obj_attributes; + if (vap->va_mask & AT_TYPE) { + if (na->type < NF3REG || na->type > NF3FIFO) + vap->va_type = VBAD; + else + vap->va_type = nf3_to_vt[na->type]; + } + if (vap->va_mask & AT_MODE) + vap->va_mode = (mode_t)na->mode; + if (vap->va_mask & AT_SIZE) + vap->va_size = (u_offset_t)na->size; + if (vap->va_mask & AT_NODEID) + vap->va_nodeid = (u_longlong_t)na->fileid; + if (vap->va_mask & AT_ATIME) { + vap->va_atime.tv_sec = na->atime.seconds; + vap->va_atime.tv_nsec = na->atime.nseconds; + } + if (vap->va_mask & AT_CTIME) { + vap->va_ctime.tv_sec = na->ctime.seconds; + vap->va_ctime.tv_nsec = na->ctime.nseconds; + } + if (vap->va_mask & AT_MTIME) { + vap->va_mtime.tv_sec = na->mtime.seconds; + vap->va_mtime.tv_nsec = na->mtime.nseconds; + } + + return (NFS3_OK); +} + +/* + * Display nfs error messages. + */ +/*ARGSUSED*/ +void +nfs3_error(enum nfsstat3 status) +{ + if (!(boothowto & RB_DEBUG)) + return; + + switch (status) { + case NFS3_OK: + printf("NFS: No error.\n"); + break; + case NFS3ERR_PERM: + printf("NFS: Not owner.\n"); + break; + case NFS3ERR_NOENT: +#ifdef NFS_OPS_DEBUG + printf("NFS: No such file or directory.\n"); +#endif /* NFS_OPS_DEBUG */ + break; + case NFS3ERR_IO: + printf("NFS: IO ERROR occurred on NFS server.\n"); + break; + case NFS3ERR_NXIO: + printf("NFS: No such device or address.\n"); + break; + case NFS3ERR_ACCES: + printf("NFS: Permission denied.\n"); + break; + case NFS3ERR_EXIST: + printf("NFS: File exists.\n"); + break; + case NFS3ERR_XDEV: + printf("NFS: Cross device hard link.\n"); + break; + case NFS3ERR_NODEV: + printf("NFS: No such device.\n"); + break; + case NFS3ERR_NOTDIR: + printf("NFS: Not a directory.\n"); + break; + case NFS3ERR_ISDIR: + printf("NFS: Is a directory.\n"); + break; + case NFS3ERR_INVAL: + printf("NFS: Invalid argument.\n"); + break; + case NFS3ERR_FBIG: + printf("NFS: File too large.\n"); + break; + case NFS3ERR_NOSPC: + printf("NFS: No space left on device.\n"); + break; + case NFS3ERR_ROFS: + printf("NFS: Read-only filesystem.\n"); + break; + case NFS3ERR_MLINK: + printf("NFS: Too many hard links.\n"); + break; + case NFS3ERR_NAMETOOLONG: + printf("NFS: File name too long.\n"); + break; + case NFS3ERR_NOTEMPTY: + printf("NFS: Directory not empty.\n"); + break; + case NFS3ERR_DQUOT: + printf("NFS: Disk quota exceeded.\n"); + break; + case NFS3ERR_STALE: + printf("NFS: Stale file handle.\n"); + break; + case NFS3ERR_REMOTE: + printf("NFS: Remote file in path.\n"); + break; + case NFS3ERR_BADHANDLE: + printf("NFS: Illegal NFS file handle.\n"); + break; + case NFS3ERR_NOT_SYNC: + printf("NFS: Synchronization mismatch.\n"); + break; + case NFS3ERR_BAD_COOKIE: + printf("NFS: Stale Cookie.\n"); + break; + case NFS3ERR_NOTSUPP: + printf("NFS: Operation is not supported.\n"); + break; + case NFS3ERR_TOOSMALL: + printf("NFS: Buffer too small.\n"); + break; + case NFS3ERR_SERVERFAULT: + printf("NFS: Server fault.\n"); + break; + case NFS3ERR_BADTYPE: + printf("NFS: Unsupported object type.\n"); + break; + case NFS3ERR_JUKEBOX: + printf("NFS: Resource temporarily unavailable.\n"); + break; + default: + printf("NFS: unknown error.\n"); + break; + } +} + +struct nfs_file * +nfs3lookup(struct nfs_file *dir, char *name, int *nstat) +{ + struct timeval zero_timeout = {0, 0}; /* default */ + static struct nfs_file cd; + LOOKUP3args dirop; + LOOKUP3res res_lookup; + enum clnt_stat status; + + *nstat = (int)NFS3_OK; + + bzero((caddr_t)&dirop, sizeof (LOOKUP3args)); + bzero((caddr_t)&res_lookup, sizeof (LOOKUP3res)); + + dirop.what.dir.data.data_len = dir->fh.fh3.len; + dirop.what.dir.data.data_val = dir->fh.fh3.data; + dirop.what.name = name; + + status = CLNT_CALL(root_CLIENT, NFSPROC3_LOOKUP, xdr_LOOKUP3args, + (caddr_t)&dirop, xdr_LOOKUP3res, (caddr_t)&res_lookup, + zero_timeout); + if (status != RPC_SUCCESS) { + dprintf("lookup: RPC error.\n"); + return (NULL); + } + if (res_lookup.status != NFS3_OK) { + nfs3_error(res_lookup.status); + *nstat = (int)res_lookup.status; + (void) CLNT_FREERES(root_CLIENT, + xdr_LOOKUP3res, (caddr_t)&res_lookup); + return (NULL); + } + + bzero((caddr_t)&cd, sizeof (struct nfs_file)); + cd.version = NFS_V3; + /* + * Server must supply post_op_attr's + */ + if (res_lookup.LOOKUP3res_u.resok.obj_attributes.attributes_follow == + FALSE) { + printf("nfs3lookup: server fails to return post_op_attr\n"); + (void) CLNT_FREERES(root_CLIENT, + xdr_LOOKUP3res, (caddr_t)&res_lookup); + return (NULL); + } + + cd.ftype.type3 = +res_lookup.LOOKUP3res_u.resok.obj_attributes.post_op_attr_u.attributes.type; + cd.fh.fh3.len = res_lookup.LOOKUP3res_u.resok.object.data.data_len; + bcopy(res_lookup.LOOKUP3res_u.resok.object.data.data_val, + cd.fh.fh3.data, cd.fh.fh3.len); + (void) CLNT_FREERES(root_CLIENT, xdr_LOOKUP3res, (caddr_t)&res_lookup); + return (&cd); +} + +/* + * Gets symbolic link into pathname. + */ +int +nfs3getsymlink(struct nfs_file *cfile, char **path) +{ + struct timeval zero_timeout = {0, 0}; /* default */ + enum clnt_stat status; + struct READLINK3res linkres; + struct READLINK3args linkargs; + static char symlink_path[NFS_MAXPATHLEN]; + + bzero(&linkargs, sizeof (linkargs)); + linkargs.symlink.data.data_len = cfile->fh.fh3.len; + linkargs.symlink.data.data_val = cfile->fh.fh3.data; + + /* + * linkres needs a zeroed buffer to place path data into: + */ + bzero(&linkres, sizeof (linkres)); + bzero(symlink_path, NFS_MAXPATHLEN); + linkres.READLINK3res_u.resok.data = symlink_path; + + status = CLNT_CALL(root_CLIENT, NFSPROC3_READLINK, + xdr_READLINK3args, (caddr_t)&linkargs, + xdr_READLINK3res, (caddr_t)&linkres, zero_timeout); + if (status != RPC_SUCCESS) { + dprintf("nfs3getsymlink: RPC call failed.\n"); + return (-1); + } + if (linkres.status != NFS3_OK) { + nfs3_error(linkres.status); + return (linkres.status); + } + + *path = symlink_path; + + return (NFS3_OK); +} diff --git a/usr/src/stand/lib/fs/nfs/nfs4_xdr.c b/usr/src/stand/lib/fs/nfs/nfs4_xdr.c new file mode 100644 index 0000000000..e33f2aa4c2 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs4_xdr.c @@ -0,0 +1,437 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/salib.h> +#include <rpc/types.h> +#include <rpc/xdr.h> +#include <rpc/rpc.h> +#include <rpcsvc/nfs4_prot.h> +#include "nfs_inet.h" + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * XDR routines for NFSv4 ops. + */ +static bool_t +xdr_b_utf8string(XDR *xdrs, utf8string *objp) +{ + return (xdr_bytes(xdrs, (char **)&objp->utf8string_val, + (uint_t *)&objp->utf8string_len, NFS4_MAX_UTF8STRING)); +} + +static bool_t +xdr_nfs_bfh4(XDR *xdrs, struct nfs_bfh4 *objp) +{ + char *data = (char *)&objp->data; + return (xdr_bytes(xdrs, (char **)&data, (uint_t *)&objp->len, + NFS4_FHSIZE)); +} + +static bool_t +xdr_b_putfh4_args(XDR *xdrs, putfh4arg_t *objp) +{ + if (!xdr_u_int(xdrs, (uint_t *)&objp->pf_opnum)) + return (FALSE); + return (xdr_nfs_bfh4(xdrs, (struct nfs_bfh4 *)&objp->pf_filehandle)); +} + +/* + * Common xdr routines for compound. Let the specific op routines handle + * op specific portions of the compound. + */ +static bool_t +xdr_b_compound_args(XDR *xdrs, b_compound_t *objp) +{ + if (!xdr_b_utf8string(xdrs, &objp->ca_tag)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->ca_minorversion)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->ca_argarray_len)) + return (FALSE); + if (objp->ca_isputrootfh) + return (xdr_u_int(xdrs, &objp->ca_opputfh.pf_opnum)); + return (xdr_b_putfh4_args(xdrs, &objp->ca_opputfh)); +} + +static bool_t +xdr_b_compound_res(XDR *xdrs, b_compound_t *objp) +{ + if (!xdr_enum(xdrs, (enum_t *)&objp->cr_status)) + return (FALSE); + if (!xdr_b_utf8string(xdrs, &objp->cr_tag)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->cr_resarray_len)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->cr_opputfh)) + return (FALSE); + return (xdr_enum(xdrs, (enum_t *)&objp->cr_putfh_status)); +} + +static bool_t +xdr_b_bitmap4(XDR *xdrs, b_bitmap4_t *objp) +{ + char *arp = (char *)&objp->b_bitmap_val; + return (xdr_array(xdrs, (char **)&arp, + (uint_t *)&objp->b_bitmap_len, ~0, + sizeof (uint_t), (xdrproc_t)xdr_u_int)); +} + +static bool_t +xdr_b_stateid4(XDR *xdrs, stateid4 *objp) +{ + if (!xdr_u_int(xdrs, (uint_t *)&objp->seqid)) + return (FALSE); + return (xdr_opaque(xdrs, objp->other, 12)); +} + +bool_t +xdr_getattr4_args(XDR *xdrs, getattr4arg_t *objp) +{ + if (!xdr_b_compound_args(xdrs, (b_compound_t *)&objp->ga_arg)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->ga_opgetattr)) + return (FALSE); + return (xdr_b_bitmap4(xdrs, (b_bitmap4_t *)&objp->ga_attr_req)); +} + +static bool_t +xdr_b_getattr_res_common(XDR *xdrs, getattrres_cmn_t *objp) +{ + if (!xdr_u_int(xdrs, (uint_t *)&objp->gc_opgetattr)) + return (FALSE); + if (!xdr_enum(xdrs, (enum_t *)&objp->gc_attr_status)) + return (FALSE); + + /* + * If the getattr suceeded, proceed and begin to decode the attributes. + */ + if (objp->gc_attr_status == NFS4_OK) { + char attrvals[sizeof (b_fattr4_t)]; + char *ap = attrvals; + + if (!xdr_b_bitmap4(xdrs, (b_bitmap4_t *)&objp->gc_retattr)) + return (FALSE); + + bzero(&attrvals, sizeof (attrvals)); + if (!xdr_bytes(xdrs, (char **)&ap, + (uint_t *)&objp->gc_attrlist_len, + sizeof (b_fattr4_t))) + return (FALSE); +#ifdef DEBUG + printf("xdr_b_getattr_res_common: attrlist_len = %d\n", + objp->gc_attrlist_len); +#endif + /* + * Go through the bitmap and see if the server + * sent us anything. + */ + if (objp->gc_attrlist_len > 0) { + XDR mxdrs; + b_fattr4_t *fattrp = &objp->gc_attrs; + attr4_bitmap1_t bitmap1; + attr4_bitmap2_t bitmap2; +#ifdef DEBUG + int i; + + printf("dumping contents of attr buffer\n"); + for (i = 0; i < objp->gc_attrlist_len; i++) { + printf("[%d] = 0x%x\n", i, ap[i]); + } +#endif + bitmap1.word = objp->gc_retattr.b_bitmap_val[0]; + bitmap2.word = objp->gc_retattr.b_bitmap_val[1]; + +#ifdef DEBUG + printf("xdr_b_getattr_res_common: bitmap1 = %d " + " bitmap2 = %d\n", + bitmap1.word, bitmap2.word); +#endif + xdrmem_create(&mxdrs, ap, objp->gc_attrlist_len, + XDR_DECODE); + + /* + * Start with the first bitmap + */ + if (bitmap1.word > 0) { + if (bitmap1.bm_supported_attrs) { + if (!xdr_b_bitmap4(&mxdrs, + (b_bitmap4_t *)&fattrp->b_supported_attrs)) + return (FALSE); + } + + if (bitmap1.bm_fattr4_type) { + if (!xdr_enum(&mxdrs, + (enum_t *)&fattrp->b_fattr4_type)) { + return (FALSE); + } + } + if (bitmap1.bm_fattr4_size) { + if (!xdr_u_longlong_t(&mxdrs, + (u_longlong_t *)&fattrp->b_fattr4_size)) + return (FALSE); + } + + if (bitmap1.bm_fattr4_fsid) { + if (!xdr_u_longlong_t(&mxdrs, + (u_longlong_t *)&fattrp->b_fattr4_fsid.major)) + return (FALSE); + + if (!xdr_u_longlong_t(&mxdrs, + (u_longlong_t *)&fattrp->b_fattr4_fsid.minor)) + return (FALSE); + } + if (bitmap1.bm_fattr4_filehandle) { + if (!xdr_nfs_bfh4(&mxdrs, + (struct nfs_bfh4 *)&fattrp->b_fattr4_filehandle)) + return (FALSE); + } + if (bitmap1.bm_fattr4_fileid) { + if (!xdr_u_longlong_t(&mxdrs, + (u_longlong_t *)&fattrp->b_fattr4_fileid)) + return (FALSE); + } + } + + /* + * Now the second bitmap + */ + if (bitmap2.word > 0) { + if (bitmap2.bm_fattr4_mode) { + if (!xdr_u_int(&mxdrs, + (uint_t *)&objp->gc_attrs.b_fattr4_mode)) + return (FALSE); + } + + if (bitmap2.bm_fattr4_time_access) { + if (!xdr_longlong_t(&mxdrs, + (longlong_t *)&objp->gc_attrs.b_fattr4_time_access.seconds)) + return (FALSE); + if (!xdr_u_int(&mxdrs, + (uint_t *)&objp->gc_attrs.b_fattr4_time_access.nseconds)) + return (FALSE); + } + + if (bitmap2.bm_fattr4_time_metadata) { + if (!xdr_longlong_t(&mxdrs, + (longlong_t *)&objp->gc_attrs.b_fattr4_time_metadata.seconds)) + return (FALSE); + if (!xdr_u_int(&mxdrs, + (uint_t *)&objp->gc_attrs.b_fattr4_time_metadata.nseconds)) + return (FALSE); + } + + if (bitmap2.bm_fattr4_time_modify) { + if (!xdr_longlong_t(&mxdrs, + (longlong_t *)&objp->gc_attrs.b_fattr4_time_modify.seconds)) + return (FALSE); + if (!xdr_u_int(&mxdrs, + (uint_t *)&objp->gc_attrs.b_fattr4_time_modify.nseconds)) + return (FALSE); + } + } + } + } + return (TRUE); +} + +bool_t +xdr_getattr4_res(XDR *xdrs, getattr4res_t *objp) +{ + if (!xdr_b_compound_res(xdrs, (b_compound_t *)&objp->gr_res)) + return (FALSE); + return (xdr_b_getattr_res_common(xdrs, + (getattrres_cmn_t *)&objp->gr_cmn)); +} + +bool_t +xdr_lookup4_args(XDR *xdrs, lookup4arg_t *objp) +{ + if (!xdr_b_compound_args(xdrs, (b_compound_t *)&objp->la_arg)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->la_oplookup)) + return (FALSE); + if (!xdr_b_utf8string(xdrs, (utf8string *)&objp->la_pathname)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->la_opgetattr)) + return (FALSE); + return (xdr_b_bitmap4(xdrs, (b_bitmap4_t *)&objp->la_attr_req)); +} + +bool_t +xdr_lookup4_res(XDR *xdrs, lookup4res_t *objp) +{ + if (!xdr_b_compound_res(xdrs, (b_compound_t *)&objp->lr_res)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->lr_oplookup)) + return (FALSE); + if (!xdr_enum(xdrs, (enum_t *)&objp->lr_lookup_status)) + return (FALSE); + if (objp->lr_lookup_status == NFS4_OK) { + return (xdr_b_getattr_res_common(xdrs, + (getattrres_cmn_t *)&objp->lr_gcmn)); + } + return (TRUE); +} + +bool_t +xdr_lookupp4_args(XDR *xdrs, lookupp4arg_t *objp) +{ + if (!xdr_b_compound_args(xdrs, (b_compound_t *)&objp->la_arg)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->la_oplookupp)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->la_opgetattr)) + return (FALSE); + return (xdr_b_bitmap4(xdrs, (b_bitmap4_t *)&objp->la_attr_req)); +} + +bool_t +xdr_read4_args(XDR *xdrs, read4arg_t *objp) +{ + if (!xdr_b_compound_args(xdrs, (b_compound_t *)&objp->r_arg)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->r_opread)) + return (FALSE); + if (!xdr_b_stateid4(xdrs, (stateid4 *)&objp->r_stateid)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, (u_longlong_t *)&objp->r_offset)) + return (FALSE); + return (xdr_u_int(xdrs, (uint_t *)&objp->r_count)); +} + +bool_t +xdr_read4_res(XDR *xdrs, read4res_t *objp) +{ + if (!xdr_b_compound_res(xdrs, (b_compound_t *)&objp->r_res)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->r_opread)) + return (FALSE); + if (!xdr_enum(xdrs, (enum_t *)&objp->r_status)) + return (FALSE); + if (objp->r_status == NFS4_OK) { + if (!xdr_bool(xdrs, (bool_t *)&objp->r_eof)) + return (FALSE); + return (xdr_bytes(xdrs, (char **)&objp->r_data_val, + (uint_t *)&objp->r_data_len, ~0)); + } + return (TRUE); +} + +bool_t +xdr_readdir4_args(XDR *xdrs, readdir4arg_t *objp) +{ + if (!xdr_b_compound_args(xdrs, (b_compound_t *)&objp->rd_arg)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->rd_opreaddir)) + return (FALSE); + if (!xdr_u_longlong_t(xdrs, (u_longlong_t *)&objp->rd_cookie)) + return (FALSE); + if (!xdr_opaque(xdrs, objp->rd_cookieverf, NFS4_VERIFIER_SIZE)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->rd_dircount)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->rd_maxcount)) + return (FALSE); + return (xdr_b_bitmap4(xdrs, (b_bitmap4_t *)&objp->rd_attr_req)); +} + +static bool_t +xdr_b_entry4(XDR *xdrs, b_entry4_t *objp) +{ + uint_t attrlen; + char attrvals[sizeof (b_fattr4_t)]; + char *ap = attrvals; + XDR mxdrs; + + if (!xdr_u_longlong_t(xdrs, (u_longlong_t *)&objp->b_cookie)) + return (FALSE); + if (!xdr_b_utf8string(xdrs, &objp->b_name)) + return (FALSE); + + bzero(&attrvals, sizeof (attrvals)); + if (!xdr_bytes(xdrs, (char **)&ap, (uint_t *)&attrlen, + sizeof (b_fattr4_t))) + return (FALSE); + + /* + * We are *only* interested in the fileid, so just extract that. + */ + if (attrlen < sizeof (uint64_t)) + return (FALSE); + + xdrmem_create(&mxdrs, ap, attrlen, XDR_DECODE); + + if (!xdr_u_longlong_t(&mxdrs, (u_longlong_t *)&objp->b_fileid)) + return (FALSE); + return (xdr_pointer(xdrs, (char **)&objp->b_nextentry, + sizeof (b_entry4_t), (xdrproc_t)xdr_b_entry4)); +} + +bool_t +xdr_readdir4_res(XDR *xdrs, readdir4res_t *objp) +{ + if (!xdr_b_compound_res(xdrs, (b_compound_t *)&objp->rd_res)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->rd_opreaddir)) + return (FALSE); + if (!xdr_enum(xdrs, (enum_t *)&objp->rd_status)) + return (FALSE); + if (objp->rd_status == NFS4_OK) { + if (!xdr_opaque(xdrs, objp->rd_cookieverf, NFS4_VERIFIER_SIZE)) + return (FALSE); + if (!xdr_pointer(xdrs, (char **)&objp->rd_entries, + sizeof (b_entry4_t), (xdrproc_t)xdr_b_entry4)) + return (FALSE); + return (xdr_bool(xdrs, &objp->rd_eof)); + } + return (TRUE); +} + +bool_t +xdr_readlink4_args(XDR *xdrs, readlink4arg_t *objp) +{ + if (!xdr_b_compound_args(xdrs, (b_compound_t *)&objp->rl_arg)) + return (FALSE); + return (xdr_u_int(xdrs, (uint_t *)&objp->rl_opreadlink)); +} + +bool_t +xdr_readlink4_res(XDR *xdrs, readlink4res_t *objp) +{ + if (!xdr_b_compound_res(xdrs, (b_compound_t *)&objp->rl_res)) + return (FALSE); + if (!xdr_u_int(xdrs, (uint_t *)&objp->rl_opreadlink)) + return (FALSE); + if (!xdr_enum(xdrs, (enum_t *)&objp->rl_status)) + return (FALSE); + if (objp->rl_status == NFS4_OK) + return (xdr_b_utf8string(xdrs, (utf8string *)&objp->rl_link)); + return (TRUE); +} diff --git a/usr/src/stand/lib/fs/nfs/nfs4ops.c b/usr/src/stand/lib/fs/nfs/nfs4ops.c new file mode 100644 index 0000000000..21f856b714 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs4ops.c @@ -0,0 +1,692 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Simple nfs V4 ops + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <sys/fcntl.h> +#include <sys/vfs.h> +#include <errno.h> +#include <sys/promif.h> +#include <rpc/xdr.h> +#include "nfs_inet.h" +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootdebug.h> +#include <sys/salib.h> +#include <sys/sacache.h> +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs4_prot.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +static struct timeval zero_timeout = {0, 0}; /* default */ + +/* + * NFS Version 4 specific functions + */ + +ssize_t +nfs4read(struct nfs_file *filep, char *buf, size_t size) +{ + enum clnt_stat status; + read4arg_t readargs; + read4res_t readres; + char *buf_offset; + uint_t count = 0; + uint_t readcnt = 0; + bool_t done = FALSE; + struct timeval timeout; + int framing_errs = 0; +#ifndef i386 + static uint_t pos; + static char ind[] = "|/-\\"; + static int blks_read; +#endif + utf8string str; + char tagname[] = "inetboot read"; + + bzero(&readres, sizeof (readres)); + + str.utf8string_len = sizeof (tagname) - 1; + str.utf8string_val = tagname; + + /* + * read + */ + buf_offset = buf; + + if (nfs_readsize == 0) + nfs_readsize = READ4_SIZE; + + if (size < nfs_readsize) + readargs.r_count = size; + else + readargs.r_count = nfs_readsize; + + if (filep->fh.fh4.len > 0) + compound_init(&readargs.r_arg, &str, 0, 2, &filep->fh.fh4); + else + compound_init(&readargs.r_arg, &str, 0, 2, NULL); + + readargs.r_opread = OP_READ; + /* + * zero out the stateid field + */ + bzero(&readargs.r_stateid, sizeof (readargs.r_stateid)); + readargs.r_offset = filep->offset; + + do { + readres.r_data_val = buf_offset; + + if ((count + readargs.r_count) > size) + readargs.r_count = size - count; + + timeout.tv_sec = NFS_REXMIT_MIN; + timeout.tv_usec = 0; + + do { + status = CLNT_CALL(root_CLIENT, NFSPROC4_COMPOUND, + xdr_read4_args, (caddr_t)&readargs, + xdr_read4_res, (caddr_t)&readres, + timeout); + + if (status == RPC_TIMEDOUT) { + dprintf("NFS read(%d) timed out. Retrying...\n", readargs.r_count); + if (errno == ETIMEDOUT) + framing_errs++; + + if (framing_errs > NFS_MAX_FERRS && + readargs.r_count > NFS_READ_DECR) { + readargs.r_count /= 2; + nfs_readsize /= 2; + dprintf("NFS read size now %d.\n", + nfs_readsize); + timeout.tv_sec = NFS_REXMIT_MIN; + framing_errs = 0; + } else { + if (timeout.tv_sec < NFS_REXMIT_MAX) + timeout.tv_sec++; + else + timeout.tv_sec = 0; + } + } + } while (status == RPC_TIMEDOUT); + + if (status != RPC_SUCCESS) + return (-1); + + if (readres.r_status != NFS4_OK) { + nfs4_error(readres.r_status); + return (-1); + } + + readcnt = readres.r_data_len; + + if (readres.r_eof == TRUE) + done = TRUE; + + if (readcnt < readargs.r_count) { +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("nfs4read: partial read %d instead of %d\n", readcnt, + readargs.count); +#endif + done = TRUE; + } + + count += readcnt; + filep->offset += readcnt; + buf_offset += readcnt; + readargs.r_offset += readcnt; +#ifndef i386 + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); +#endif + } while (count < size && !done); + + return (count); +} + + +static vtype_t nf4_to_vt[] = { + VBAD, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO +}; + +int +nfs4getattr(struct nfs_file *nfp, struct vattr *vap) +{ + enum clnt_stat status; + attr4_bitmap1_t bitmap1; + attr4_bitmap2_t bitmap2; + getattr4arg_t getattrargs; + getattr4res_t getattrres; + b_fattr4_t *bfattr4; + utf8string str; + char tagname[] = "inetboot getattr"; + + bzero(&getattrres, sizeof (getattrres)); + /* + * Putfh + */ + str.utf8string_len = sizeof (tagname) - 1; + str.utf8string_val = tagname; + + if (nfp->fh.fh4.len > 0) + compound_init(&getattrargs.ga_arg, &str, 0, 2, &nfp->fh.fh4); + else + compound_init(&getattrargs.ga_arg, &str, 0, 2, NULL); + + /* + * getattr + */ + getattrargs.ga_opgetattr = OP_GETATTR; + /* + * Set up the attribute bitmap. We pretty much need everything + * except for the filehandle and supported attrs. + */ + bitmap1.word = 0; + bitmap1.bm_fattr4_type = 1; + bitmap1.bm_fattr4_size = 1; + bitmap1.bm_fattr4_fileid = 1; + bitmap2.word = 0; + bitmap2.bm_fattr4_mode = 1; + bitmap2.bm_fattr4_time_access = 1; + bitmap2.bm_fattr4_time_metadata = 1; + bitmap2.bm_fattr4_time_modify = 1; + + getattrargs.ga_attr_req.b_bitmap_len = NFS4_MAX_BITWORDS; + getattrargs.ga_attr_req.b_bitmap_val[0] = bitmap1.word; + getattrargs.ga_attr_req.b_bitmap_val[1] = bitmap2.word; + + status = CLNT_CALL(root_CLIENT, NFSPROC4_COMPOUND, xdr_getattr4_args, + (caddr_t)&getattrargs, xdr_getattr4_res, + (caddr_t)&getattrres, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs4getattr: RPC error %d\n", status); + return (-1); + } + + if (getattrres.gr_attr_status != NFS4_OK) { + nfs4_error(getattrres.gr_attr_status); + return (getattrres.gr_attr_status); + } + + bfattr4 = &getattrres.gr_attrs; + if (vap->va_mask & AT_TYPE) { + if (bfattr4->b_fattr4_type < NF4REG || + bfattr4->b_fattr4_type > NF4FIFO) + vap->va_type = VBAD; + else + vap->va_type = nf4_to_vt[bfattr4->b_fattr4_type]; + } + if (vap->va_mask & AT_MODE) + vap->va_mode = (mode_t)bfattr4->b_fattr4_mode; + if (vap->va_mask & AT_SIZE) + vap->va_size = (u_offset_t)bfattr4->b_fattr4_size; + if (vap->va_mask & AT_NODEID) + vap->va_nodeid = (uint64_t)bfattr4->b_fattr4_fileid; + /* + * XXX - may need to do something more here. + */ + if (vap->va_mask & AT_ATIME) { + vap->va_atime.tv_sec = bfattr4->b_fattr4_time_access.seconds; + vap->va_atime.tv_nsec = bfattr4->b_fattr4_time_access.nseconds; + } + if (vap->va_mask & AT_CTIME) { + vap->va_ctime.tv_sec = bfattr4->b_fattr4_time_metadata.seconds; + vap->va_ctime.tv_nsec = + bfattr4->b_fattr4_time_metadata.nseconds; + } + if (vap->va_mask & AT_MTIME) { + vap->va_mtime.tv_sec = bfattr4->b_fattr4_time_modify.seconds; + vap->va_mtime.tv_nsec = bfattr4->b_fattr4_time_modify.nseconds; + } + + return (NFS4_OK); +} + +/* + * Display nfs error messages. + */ +/*ARGSUSED*/ +void +nfs4_error(enum nfsstat4 status) +{ + if (!(boothowto & RB_DEBUG)) + return; + + switch (status) { + case NFS4_OK: + printf("NFS: No error.\n"); + break; + case NFS4ERR_PERM: + printf("NFS: Not owner.\n"); + break; + case NFS4ERR_NOENT: +#ifdef NFS_OPS_DEBUG + printf("NFS: No such file or directory.\n"); +#endif /* NFS_OPS_DEBUG */ + break; + case NFS4ERR_IO: + printf("NFS: IO ERROR occurred on NFS server.\n"); + break; + case NFS4ERR_NXIO: + printf("NFS: No such device or address.\n"); + break; + case NFS4ERR_ACCESS: + printf("NFS: Permission denied.\n"); + break; + case NFS4ERR_EXIST: + printf("NFS: File exists.\n"); + break; + case NFS4ERR_XDEV: + printf("NFS: Cross device hard link.\n"); + break; + case NFS4ERR_NOTDIR: + printf("NFS: Not a directory.\n"); + break; + case NFS4ERR_ISDIR: + printf("NFS: Is a directory.\n"); + break; + case NFS4ERR_INVAL: + printf("NFS: Invalid argument.\n"); + break; + case NFS4ERR_FBIG: + printf("NFS: File too large.\n"); + break; + case NFS4ERR_NOSPC: + printf("NFS: No space left on device.\n"); + break; + case NFS4ERR_ROFS: + printf("NFS: Read-only filesystem.\n"); + break; + case NFS4ERR_MLINK: + printf("NFS: Too many hard links.\n"); + break; + case NFS4ERR_NAMETOOLONG: + printf("NFS: File name too long.\n"); + break; + case NFS4ERR_NOTEMPTY: + printf("NFS: Directory not empty.\n"); + break; + case NFS4ERR_DQUOT: + printf("NFS: Disk quota exceeded.\n"); + break; + case NFS4ERR_STALE: + printf("NFS: Stale file handle.\n"); + break; + case NFS4ERR_BADHANDLE: + printf("NFS: Illegal NFS file handle.\n"); + break; + case NFS4ERR_BAD_COOKIE: + printf("NFS: Stale Cookie.\n"); + break; + case NFS4ERR_NOTSUPP: + printf("NFS: Operation is not supported.\n"); + break; + case NFS4ERR_TOOSMALL: + printf("NFS: Buffer too small.\n"); + break; + case NFS4ERR_SERVERFAULT: + printf("NFS: Server fault.\n"); + break; + case NFS4ERR_BADTYPE: + printf("NFS: Unsupported object type.\n"); + break; + case NFS4ERR_BAD_STATEID: + printf("NFS: Bad stateid\n"); + break; + case NFS4ERR_BAD_SEQID: + printf("NFS: Bad seqid\n"); + break; + default: + printf("NFS: unknown error.\n"); + break; + } +} + +/* + * lookup one component. for multicomponent lookup use a driver like lookup(). + */ +struct nfs_file * +nfs4lookup(struct nfs_file *dir, char *name, int *nstat) +{ + static struct nfs_file cd; + attr4_bitmap1_t bitmap1; + lookup4arg_t lookupargs; + lookup4res_t lookupres; + enum clnt_stat status; + utf8string str; + char tagname[] = "inetboot lookup"; + + /* + * NFSv4 uses a special LOOKUPP op + * for looking up the parent directory. + */ + if (strcmp(name, "..") == 0) + return (nfs4lookupp(dir, nstat, NULL)); + + *nstat = (int)NFS4_OK; + + bzero(&lookupres, sizeof (lookupres)); + + /* + * Check if we have a filehandle and initialize the compound + * with putfh or putrootfh appropriately. + */ + str.utf8string_len = sizeof (tagname) - 1; + str.utf8string_val = tagname; + + if (dir->fh.fh4.len > 0) + compound_init(&lookupargs.la_arg, &str, 0, 3, &dir->fh.fh4); + else + compound_init(&lookupargs.la_arg, &str, 0, 3, NULL); + + /* + * lookup + */ + lookupargs.la_oplookup = OP_LOOKUP; + /* + * convert the pathname from char * to utf8string + */ + lookupargs.la_pathname.utf8string_len = strlen(name); + lookupargs.la_pathname.utf8string_val = + bkmem_alloc(lookupargs.la_pathname.utf8string_len); + if (lookupargs.la_pathname.utf8string_val == NULL) { + dprintf("nfs4lookup: bkmem_alloc failed\n"); + return (NULL); + } + bcopy(name, lookupargs.la_pathname.utf8string_val, + lookupargs.la_pathname.utf8string_len); + + /* + * Setup the attr bitmap. All we need is the type and filehandle info + */ + lookupargs.la_opgetattr = OP_GETATTR; + bitmap1.word = 0; + bitmap1.bm_fattr4_type = 1; + bitmap1.bm_fattr4_filehandle = 1; + lookupargs.la_attr_req.b_bitmap_len = 1; + lookupargs.la_attr_req.b_bitmap_val[0] = bitmap1.word; + lookupargs.la_attr_req.b_bitmap_val[1] = 0; + + status = CLNT_CALL(root_CLIENT, NFSPROC4_COMPOUND, xdr_lookup4_args, + (caddr_t)&lookupargs, xdr_lookup4_res, + (caddr_t)&lookupres, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs4lookup: RPC error. status %d\n", status); + return (NULL); + } + + if (lookupres.lr_lookup_status != NFS4_OK) { +#ifdef DEBUG + dprintf("nfs4lookup: lookup status = %d\n", + lookupres.lr_lookup_status); +#endif + nfs4_error(lookupres.lr_lookup_status); + *nstat = (int)lookupres.lr_lookup_status; + if (lookupargs.la_pathname.utf8string_val != NULL) + bkmem_free(lookupargs.la_pathname.utf8string_val, + lookupargs.la_pathname.utf8string_len); + return (NULL); + } + + if (lookupres.lr_attr_status != NFS4_OK) { +#ifdef DEBUG + dprintf("nfs4lookup: getattr status = %d\n", + lookupres.lr_attr_status); +#endif + nfs4_error(lookupres.lr_attr_status); + *nstat = (int)lookupres.lr_attr_status; + if (lookupargs.la_pathname.utf8string_val != NULL) + bkmem_free(lookupargs.la_pathname.utf8string_val, + lookupargs.la_pathname.utf8string_len); + return (NULL); + } + + /* + * We have all the information we need to update the file pointer + */ + bzero((caddr_t)&cd, sizeof (struct nfs_file)); + cd.version = NFS_V4; + cd.ftype.type4 = lookupres.lr_attrs.b_fattr4_type; + cd.fh.fh4.len = lookupres.lr_attrs.b_fattr4_filehandle.len; + bcopy(lookupres.lr_attrs.b_fattr4_filehandle.data, cd.fh.fh4.data, + cd.fh.fh4.len); + + /* + * Free the arg string + */ + if (lookupargs.la_pathname.utf8string_val != NULL) + bkmem_free(lookupargs.la_pathname.utf8string_val, + lookupargs.la_pathname.utf8string_len); + + return (&cd); +} + +/* + * lookup parent directory. + */ +struct nfs_file * +nfs4lookupp(struct nfs_file *dir, int *nstat, uint64_t *fileid) +{ + static struct nfs_file cd; + attr4_bitmap1_t bitmap1; + lookupp4arg_t lookuppargs; + lookup4res_t lookupres; + enum clnt_stat status; + utf8string str; + char tagname[] = "inetboot lookupp"; + + *nstat = (int)NFS4_OK; + + bzero(&lookupres, sizeof (lookupres)); + + /* + * Check if we have a filehandle and initialize the compound + * with putfh or putrootfh appropriately. + */ + str.utf8string_len = sizeof (tagname) - 1; + str.utf8string_val = tagname; + + if (dir->fh.fh4.len > 0) + compound_init(&lookuppargs.la_arg, &str, 0, 3, &dir->fh.fh4); + else + compound_init(&lookuppargs.la_arg, &str, 0, 3, NULL); + + /* + * lookupp + */ + lookuppargs.la_oplookupp = OP_LOOKUPP; + /* + * Setup the attr bitmap. Normally, all we need is the type and + * filehandle info, but getdents might require the fileid of the + * parent. + */ + lookuppargs.la_opgetattr = OP_GETATTR; + bitmap1.word = 0; + bitmap1.bm_fattr4_type = 1; + bitmap1.bm_fattr4_filehandle = 1; + if (fileid != NULL) + bitmap1.bm_fattr4_fileid = 1; + lookuppargs.la_attr_req.b_bitmap_len = 1; + lookuppargs.la_attr_req.b_bitmap_val[0] = bitmap1.word; + lookuppargs.la_attr_req.b_bitmap_val[1] = 0; + + status = CLNT_CALL(root_CLIENT, NFSPROC4_COMPOUND, xdr_lookupp4_args, + (caddr_t)&lookuppargs, xdr_lookup4_res, + (caddr_t)&lookupres, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs4lookupp: RPC error. status %d\n", status); + return (NULL); + } + + if (lookupres.lr_lookup_status != NFS4_OK) { +#ifdef DEBUG + dprintf("nfs4lookupp: lookupp status = %d\n", + lookupres.lr_lookup_status); +#endif + nfs4_error(lookupres.lr_lookup_status); + *nstat = (int)lookupres.lr_lookup_status; + return (NULL); + } + + if (lookupres.lr_attr_status != NFS4_OK) { +#ifdef DEBUG + dprintf("nfs4lookupp: getattr status = %d\n", + lookupres.lr_attr_status); +#endif + nfs4_error(lookupres.lr_attr_status); + *nstat = (int)lookupres.lr_attr_status; + return (NULL); + } + + /* + * We have all the information we need to update the file pointer + */ + bzero((caddr_t)&cd, sizeof (struct nfs_file)); + cd.version = NFS_V4; + cd.ftype.type4 = lookupres.lr_attrs.b_fattr4_type; + cd.fh.fh4.len = lookupres.lr_attrs.b_fattr4_filehandle.len; + bcopy(lookupres.lr_attrs.b_fattr4_filehandle.data, cd.fh.fh4.data, + cd.fh.fh4.len); + + /* + * Fill in the fileid if the user passed in one + */ + if (fileid != NULL) + *fileid = lookupres.lr_attrs.b_fattr4_fileid; + + return (&cd); +} + +/* + * Gets symbolic link into pathname. + */ +int +nfs4getsymlink(struct nfs_file *cfile, char **path) +{ + enum clnt_stat status; + readlink4arg_t readlinkargs; + readlink4res_t readlinkres; + static char symlink_path[NFS_MAXPATHLEN]; + int spathlen; + utf8string str; + char tagname[] = "inetboot getsymlink"; + int error = NFS4_OK; + + bzero(&readlinkres, sizeof (readlinkres)); + + /* + * readlink + */ + str.utf8string_len = sizeof (tagname) - 1; + str.utf8string_val = tagname; + + if (cfile->fh.fh4.len > 0) + compound_init(&readlinkargs.rl_arg, &str, 0, 2, + &cfile->fh.fh4); + else + compound_init(&readlinkargs.rl_arg, &str, 0, 2, NULL); + + readlinkargs.rl_opreadlink = OP_READLINK; + status = CLNT_CALL(root_CLIENT, NFSPROC4_COMPOUND, xdr_readlink4_args, + (caddr_t)&readlinkargs, xdr_readlink4_res, + (caddr_t)&readlinkres, zero_timeout); + + if (status != RPC_SUCCESS) { + dprintf("nfs4getsymlink: RPC readlink error %d\n", status); + error = -1; + goto out; + } + + if (readlinkres.rl_status != NFS4_OK) { + nfs4_error(readlinkres.rl_status); + error = readlinkres.rl_status; + goto out; + } + + /* + * Convert the utf8string to a normal character string + */ + spathlen = readlinkres.rl_link.utf8string_len; + if (spathlen <= 0 || readlinkres.rl_link.utf8string_val == NULL) { + *path = NULL; + error = readlinkres.rl_status; + goto out; + } + + bcopy(readlinkres.rl_link.utf8string_val, symlink_path, spathlen); + symlink_path[spathlen] = '\0'; + *path = symlink_path; + +out: + /* + * Free the results + */ + if (readlinkres.rl_link.utf8string_val != NULL) + bkmem_free(readlinkres.rl_link.utf8string_val, spathlen); + + return (error); +} + +/* + * Should just forget about the tag, but will leave in support for the time + * being. + */ +void +compound_init(b_compound_t *cp, utf8string *str, uint_t mvers, uint_t arglen, + struct nfs_bfh4 *pfh) +{ + if (str == NULL || str->utf8string_len == 0) { + cp->ca_tag.utf8string_len = 0; + cp->ca_tag.utf8string_val = NULL; + } else { + cp->ca_tag.utf8string_len = str->utf8string_len; + cp->ca_tag.utf8string_val = str->utf8string_val; + } + cp->ca_minorversion = mvers; + cp->ca_argarray_len = arglen; + if (pfh == NULL) { + cp->ca_isputrootfh = TRUE; + cp->ca_opputfh.pf_opnum = OP_PUTROOTFH; + } else { + cp->ca_isputrootfh = FALSE; + cp->ca_opputfh.pf_opnum = OP_PUTFH; + cp->ca_opputfh.pf_filehandle.len = pfh->len; + bcopy(pfh->data, cp->ca_opputfh.pf_filehandle.data, pfh->len); + } +} diff --git a/usr/src/stand/lib/fs/nfs/nfs_inet.h b/usr/src/stand/lib/fs/nfs/nfs_inet.h new file mode 100644 index 0000000000..0abb0d3514 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs_inet.h @@ -0,0 +1,462 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * nfs_inet.h contains definitions specific to inetboot's nfs implementation. + */ + +#ifndef _NFS_INET_H +#define _NFS_INET_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <netinet/in.h> +#include <sys/socket.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <sys/saio.h> +#include <rpcsvc/nfs_prot.h> +#include <rpcsvc/nfs4_prot.h> +#include "clnt.h" +#include <sys/vfs.h> +#include <sys/dirent.h> + +#define NFSBUF_SIZE (READ_SIZE+1024) +#define READ_SIZE (8192) /* NFS readsize */ +#define NFS_READ_DECR (1024) /* NFS readsize decrement */ +#define NFS3BUF_SIZE (READ3_SIZE+1024) +#define READ3_SIZE (32 * 1024) /* NFS3 readsize */ +#define NFS4BUF_SIZE (READ4_SIZE+1024) +#define READ4_SIZE (32 * 1024) /* NFS4 readsize */ +#define NFS4_MAX_UTF8STRING (8 * 1024) +#define NFS4_MAX_BITWORDS (2) +#define NFS_MAX_FERRS (3) /* MAX frame errors before decr read size */ +#define NFS_REXMIT_MIN (3) /* NFS retry min in secs */ +#define NFS_REXMIT_MAX (15) /* NFS retry max in secs */ + +extern int nfs_readsize; +extern struct nfs_file roothandle; +extern CLIENT *root_CLIENT; + +/* + * Boot specific V4 fh with maximum allowed data statically allocated + */ +struct nfs_bfh4 { + uint_t len; + char data[NFS4_FHSIZE]; +}; + +/* + * Boot specific V3 fh with maximum allowed data statically allocated + */ +struct nfs_bfh3 { + uint_t len; + char data[NFS3_FHSIZE]; +}; + +union _nfs_fh { + nfs_fh fh2; + struct nfs_bfh3 fh3; + struct nfs_bfh4 fh4; +}; + +union _nfs_cookie { + nfscookie cookie2; + cookie3 cookie3; + nfs_cookie4 cookie4; +}; + +union _nfs_ftype { + ftype type2; + ftype3 type3; + nfs_ftype4 type4; +}; + +/* + * NFS: This structure represents the current open file. + */ +struct nfs_file { + int version; + ulong_t offset; + union _nfs_ftype ftype; + union _nfs_fh fh; + union _nfs_cookie cookie; +}; + +struct nfs_fid { + ushort_t nf_len; + ushort_t nf_pad; + struct nfs_fh fh; +}; + +#define cfile_is_dir(cf) (((cf)->version == NFS_VERSION) ? \ + ((cf)->ftype.type2 == NFDIR) : \ + (((cf)->version == NFS_V3) ? \ + ((cf)->ftype.type3 == NF3DIR) : \ + (((cf)->version == NFS_V4) ? \ + ((cf)->ftype.type4 == NF4DIR) : 0))) + +#define cfile_is_lnk(cf) (((cf)->version == NFS_VERSION) ? \ + ((cf)->ftype.type2 == NFLNK) : \ + (((cf)->version == NFS_V3) ? \ + ((cf)->ftype.type3 == NF3LNK) : \ + (((cf)->version == NFS_V4) ? \ + ((cf)->ftype.type4 == NF4LNK) : 0))) + +/* + * Predefine an attribute bitmap that inetboot will most likely be + * interested in. + */ +typedef union attr4_bitmap1_u { + struct { + unsigned int +#ifdef _BIT_FIELDS_HTOL + b_pad4: 11, + b_fattr4_fileid: 1, + b_fattr4_filehandle: 1, + b_pad3: 10, + b_fattr4_fsid: 1, + b_pad2: 3, + b_fattr4_size: 1, + b_pad1: 2, + b_fattr4_type: 1, + b_supported_attrs: 1; +#endif +#ifdef _BIT_FIELDS_LTOH + b_supported_attrs: 1, + b_fattr4_type: 1, + b_pad1: 2, + b_fattr4_size: 1, + b_pad2: 3, + b_fattr4_fsid: 1, + b_pad3: 10, + b_fattr4_filehandle: 1, + b_fattr4_fileid: 1, + b_pad4: 11; +#endif + } bitmap_s; + uint_t word; +} attr4_bitmap1_t; + +#define bm_supported_attrs bitmap_s.b_supported_attrs +#define bm_fattr4_type bitmap_s.b_fattr4_type +#define bm_fattr4_size bitmap_s.b_fattr4_size +#define bm_fattr4_fsid bitmap_s.b_fattr4_fsid +#define bm_fattr4_fileid bitmap_s.b_fattr4_fileid +#define bm_fattr4_filehandle bitmap_s.b_fattr4_filehandle + +typedef union attr4_bitmap2_u { + struct { + unsigned int +#ifdef _BIT_FIELDS_HTOL + b_pad4: 10, + b_fattr4_time_modify: 1, + b_fattr4_time_metadata: 1, + b_pad3: 4, + b_fattr4_time_access: 1, + b_pad2: 13, + b_fattr4_mode: 1, + b_pad1: 1; +#endif +#ifdef _BIT_FIELDS_LTOH + b_pad1: 1, + b_fattr4_mode: 1, + b_pad2: 13, + b_fattr4_time_access: 1, + b_pad3: 4, + b_fattr4_time_metadata: 1, + b_fattr4_time_modify: 1, + b_pad4: 10; +#endif + } bitmap_s; + uint_t word; +} attr4_bitmap2_t; + +#define bm_fattr4_mode bitmap_s.b_fattr4_mode +#define bm_fattr4_time_access bitmap_s.b_fattr4_time_access +#define bm_fattr4_time_metadata bitmap_s.b_fattr4_time_metadata +#define bm_fattr4_time_modify bitmap_s.b_fattr4_time_modify + +typedef struct b_bitmap4 { + uint_t b_bitmap_len; + uint_t b_bitmap_val[NFS4_MAX_BITWORDS]; +} b_bitmap4_t; + +/* + * Define a usable set of v4 atttributes for inetboot. + */ +typedef struct b_fattr4_s { + b_bitmap4_t b_supported_attrs; + nfs_ftype4 b_fattr4_type; + uint64_t b_fattr4_size; + fsid4 b_fattr4_fsid; + struct nfs_bfh4 b_fattr4_filehandle; + uint64_t b_fattr4_fileid; + mode4 b_fattr4_mode; + nfstime4 b_fattr4_time_access; + nfstime4 b_fattr4_time_metadata; + nfstime4 b_fattr4_time_modify; +} b_fattr4_t; + +/* + * common to putfh and putfhroot. + */ +typedef struct putfh4arg_s { + uint_t pf_opnum; /* can either be putfh or putrootfh */ + struct nfs_bfh4 pf_filehandle; /* only used by putfh */ +} putfh4arg_t; + +/* + * Use this struct to construct our OTW compound procedures. Layout makes for + * easy XDR'ing. Include putfh. + */ +typedef union compound_u { + struct { + utf8string tag; + uint_t minorversion; /* 0 */ + uint_t argarray_len; /* 1 + n for putfh */ + bool_t isputrootfh; /* flag */ + putfh4arg_t opputfh; /* putfh args */ + } compound_ua_s; + struct { + nfsstat4 status; /* status of last op */ + utf8string tag; + uint_t resarray_len; /* 1 + n for putfh */ + uint_t opputfh; /* putfh opnum */ + nfsstat4 putfh_status; /* putfh status */ + } compound_ur_s; +} b_compound_t; + +/* + * Define some macros for easy access into the compound structrue + */ +#define ca_tag compound_ua_s.tag +#define ca_minorversion compound_ua_s.minorversion +#define ca_argarray_len compound_ua_s.argarray_len +#define ca_isputrootfh compound_ua_s.isputrootfh +#define ca_opputfh compound_ua_s.opputfh + +#define cr_status compound_ur_s.status +#define cr_tag compound_ur_s.tag +#define cr_resarray_len compound_ur_s.resarray_len +#define cr_opputfh compound_ur_s.opputfh +#define cr_putfh_status compound_ur_s.putfh_status +/* + * Define simple compound structs that include op specific data + */ +typedef struct getattrres_cmn { + uint_t gc_opgetattr; /* getattr opnum */ + nfsstat4 gc_attr_status; /* getattr result */ + b_bitmap4_t gc_retattr; /* getattr result */ + uint_t gc_attrlist_len; /* getattr result */ + b_fattr4_t gc_attrs; /* getattr result */ +} getattrres_cmn_t; + +/* + * getattr: putfh/getattr + */ +typedef struct getattr4arg_s { + b_compound_t ga_arg; /* compound + putfh */ + uint_t ga_opgetattr; /* getattr opnum */ + b_bitmap4_t ga_attr_req; /* getattr arg */ +} getattr4arg_t; + +typedef struct getattr4res_s { + b_compound_t gr_res; /* compound + putfh */ + getattrres_cmn_t gr_cmn; +} getattr4res_t; + +#define gr_opgetattr gr_cmn.gc_opgetattr +#define gr_attr_status gr_cmn.gc_attr_status +#define gr_retattr gr_cmn.gc_retattr +#define gr_attrs gr_cmn.gc_attrs + +/* + * lookup: putfh/lookup/getattr + */ +typedef struct lookup4arg_s { + b_compound_t la_arg; /* compound + putfh */ + uint_t la_oplookup; /* lookup opnum */ + component4 la_pathname; /* lookup arg */ + uint_t la_opgetattr; /* getattr opnum */ + b_bitmap4_t la_attr_req; /* getattr arg */ +} lookup4arg_t; + +typedef struct lookup4res_s { + b_compound_t lr_res; /* compound + putfh */ + uint_t lr_oplookup; /* lookup opnum */ + nfsstat4 lr_lookup_status; /* lookup result */ + getattrres_cmn_t lr_gcmn; /* getattr result */ +} lookup4res_t; + +#define lr_opgetattr lr_gcmn.gc_opgetattr +#define lr_attr_status lr_gcmn.gc_attr_status +#define lr_retattr lr_gcmn.gc_retattr +#define lr_attrs lr_gcmn.gc_attrs + +/* + * lookupp: putfh/lookupp/getattr + * + * For results: use the lookup4res_t + */ +typedef struct lookupp4arg_s { + b_compound_t la_arg; /* compound + putfh */ + uint_t la_oplookupp; /* lookupp opnum */ + uint_t la_opgetattr; /* lookupp arg */ + b_bitmap4_t la_attr_req; /* lookupp arg */ +} lookupp4arg_t; + +/* + * read: putfh/read + */ +typedef struct read4arg_s { + b_compound_t r_arg; /* compound + putfh */ + uint_t r_opread; /* read opnum */ + stateid4 r_stateid; /* read arg */ + offset4 r_offset; /* read arg */ + count4 r_count; /* read arg */ +} read4arg_t; + +typedef struct read4res_s { + b_compound_t r_res; /* compound + putfh */ + uint_t r_opread; /* read opnum */ + nfsstat4 r_status; /* read result */ + bool_t r_eof; /* read result */ + uint_t r_data_len; /* read result */ + char *r_data_val; /* read result */ +} read4res_t; + +typedef struct b_entry4_s { + nfs_cookie4 b_cookie; + utf8string b_name; + uint64_t b_fileid; + struct b_entry4_s *b_nextentry; +} b_entry4_t; + +/* + * readdir: putfh/readdir/getattr + */ +typedef struct readdir4arg_s { + b_compound_t rd_arg; /* compoud + putfh */ + uint_t rd_opreaddir; /* readdir opnum */ + nfs_cookie4 rd_cookie; /* readdir arg */ + verifier4 rd_cookieverf; /* readdir arg */ + count4 rd_dircount; /* readdir arg */ + count4 rd_maxcount; /* readdir arg */ + b_bitmap4_t rd_attr_req; /* readdir arg */ +} readdir4arg_t; + +typedef struct readdir4res_s { + b_compound_t rd_res; /* compound + putfh */ + uint_t rd_opreaddir; /* readdir opnum */ + nfsstat4 rd_status; /* readdir result */ + verifier4 rd_cookieverf; /* readdir result */ + b_entry4_t *rd_entries; /* readdir result */ + bool_t rd_eof; /* readdir result */ +} readdir4res_t; + +/* + * readlink: putfh/readlink + */ +typedef struct readlink4arg_s { + b_compound_t rl_arg; /* compound + putfh */ + uint_t rl_opreadlink; /* readlink opnum */ +} readlink4arg_t; + +typedef struct readlink4res_s { + b_compound_t rl_res; /* compound + putfh */ + uint_t rl_opreadlink; /* readlink opnum */ + nfsstat4 rl_status; /* readlink result */ + utf8string rl_link; /* readlink result */ +} readlink4res_t; + +/* + * Generic NFS functions + */ +extern int boot_nfs_mountroot(char *); +extern int boot_nfs_unmountroot(void); +extern int lookup(char *pathname, struct nfs_file *, bool_t); +extern bool_t whoami(void); +extern bool_t getfile(char *, char *, struct in_addr *, char *); + +/* + * NFS Version 2 specific functions + */ +extern void nfs_error(enum nfsstat); +extern ssize_t nfsread(struct nfs_file *, char *, size_t); +extern int nfsgetattr(struct nfs_file *, struct vattr *); +extern int nfsgetdents(struct nfs_file *, struct dirent *, unsigned); +extern struct nfs_file *nfslookup(struct nfs_file *, char *, int *); +extern int nfsgetsymlink(struct nfs_file *cfile, char **path); + +/* + * NFS Version 3 specific functions + */ +extern void nfs3_error(enum nfsstat3); +extern ssize_t nfs3read(struct nfs_file *, char *, size_t); +extern int nfs3getattr(struct nfs_file *, struct vattr *); +extern int nfs3getdents(struct nfs_file *, struct dirent *, unsigned); +extern struct nfs_file *nfs3lookup(struct nfs_file *, char *, int *); +extern int nfs3getsymlink(struct nfs_file *, char **); + +/* + * NFS Version 4 specific functions + */ +extern void nfs4_error(enum nfsstat4); +extern ssize_t nfs4read(struct nfs_file *, char *, size_t); +extern int nfs4getattr(struct nfs_file *, struct vattr *); +extern int nfs4_getdents(struct nfs_file *, struct dirent *, unsigned); +extern struct nfs_file *nfs4lookup(struct nfs_file *, char *, int *); +extern struct nfs_file *nfs4lookupp(struct nfs_file *, int *, uint64_t *); +extern int nfs4getsymlink(struct nfs_file *, char **); +extern void compound_init(b_compound_t *, utf8string *, uint_t, uint_t, + struct nfs_bfh4 *); + +/* + * NFSv4 xdr ops + */ +extern bool_t xdr_getattr4_args(XDR *, getattr4arg_t *); +extern bool_t xdr_getattr4_res(XDR *, getattr4res_t *); +extern bool_t xdr_lookup4_args(XDR *, lookup4arg_t *); +extern bool_t xdr_lookup4_res(XDR *, lookup4res_t *); +extern bool_t xdr_lookupp4_args(XDR *, lookupp4arg_t *); +extern bool_t xdr_read4_args(XDR *, read4arg_t *); +extern bool_t xdr_read4_res(XDR *, read4res_t *); +extern bool_t xdr_readdir4_args(XDR *, readdir4arg_t *); +extern bool_t xdr_readdir4_res(XDR *, readdir4res_t *); +extern bool_t xdr_readlink4_args(XDR *, readlink4arg_t *); +extern bool_t xdr_readlink4_res(XDR *, readlink4res_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _NFS_INET_H */ diff --git a/usr/src/stand/lib/fs/nfs/nfs_xdr.c b/usr/src/stand/lib/fs/nfs/nfs_xdr.c new file mode 100644 index 0000000000..2146bf315c --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfs_xdr.c @@ -0,0 +1,266 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/xdr.h> +#include <rpc/rpc.h> +#include <rpcsvc/nfs_prot.h> + +/* + * XDR routines for NFS ops. + */ +static bool_t +xdr_b_nfsstat(XDR *xdrs, nfsstat *objp) +{ + return (xdr_enum(xdrs, (enum_t *)objp)); +} + +static bool_t +xdr_b_ftype(XDR *xdrs, ftype *objp) +{ + return (xdr_enum(xdrs, (enum_t *)objp)); +} + +bool_t +xdr_nfs_fh(XDR *xdrs, nfs_fh *objp) +{ + return (xdr_opaque(xdrs, objp->data, NFS_FHSIZE)); +} + +static bool_t +xdr_b_nfstime(XDR *xdrs, nfstime *objp) +{ + if (!xdr_u_int(xdrs, &objp->seconds)) { + return (FALSE); + } + return (xdr_u_int(xdrs, &objp->useconds)); +} + +static bool_t +xdr_b_fattr(XDR *xdrs, fattr *objp) +{ + if (!xdr_b_ftype(xdrs, &objp->type)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->mode)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->nlink)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->uid)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->gid)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->size)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->blocksize)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->rdev)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->blocks)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->fsid)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->fileid)) { + return (FALSE); + } + if (!xdr_b_nfstime(xdrs, &objp->atime)) { + return (FALSE); + } + if (!xdr_b_nfstime(xdrs, &objp->mtime)) { + return (FALSE); + } + return (xdr_b_nfstime(xdrs, &objp->ctime)); +} + +static bool_t +xdr_b_filename(XDR *xdrs, filename *objp) +{ + return (xdr_string(xdrs, objp, NFS_MAXNAMLEN)); +} + +static bool_t +xdr_b_nfspath(XDR *xdrs, nfspath *objp) +{ + return (xdr_string(xdrs, objp, NFS_MAXPATHLEN)); +} + +bool_t +xdr_attrstat(XDR *xdrs, attrstat *objp) +{ + if (!xdr_b_nfsstat(xdrs, &objp->status)) { + return (FALSE); + } + if (objp->status == NFS_OK) { + return (xdr_b_fattr(xdrs, &objp->attrstat_u.attributes)); + } + return (TRUE); +} + +bool_t +xdr_diropargs(XDR *xdrs, diropargs *objp) +{ + if (!xdr_nfs_fh(xdrs, &objp->dir)) { + return (FALSE); + } + return (xdr_b_filename(xdrs, &objp->name)); +} + +static bool_t +xdr_b_diropokres(XDR *xdrs, diropokres *objp) +{ + if (!xdr_nfs_fh(xdrs, &objp->file)) { + return (FALSE); + } + return (xdr_b_fattr(xdrs, &objp->attributes)); +} + +bool_t +xdr_diropres(XDR *xdrs, diropres *objp) +{ + if (!xdr_b_nfsstat(xdrs, &objp->status)) { + return (FALSE); + } + if (objp->status == NFS_OK) { + return (xdr_b_diropokres(xdrs, &objp->diropres_u.diropres)); + } + return (TRUE); +} + +bool_t +xdr_readlinkres(XDR *xdrs, readlinkres *objp) +{ + if (!xdr_b_nfsstat(xdrs, &objp->status)) { + return (FALSE); + } + if (objp->status == NFS_OK) { + return (xdr_b_nfspath(xdrs, &objp->readlinkres_u.data)); + } + return (TRUE); +} + +bool_t +xdr_readargs(XDR *xdrs, readargs *objp) +{ + if (!xdr_nfs_fh(xdrs, &objp->file)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->offset)) { + return (FALSE); + } + if (!xdr_u_int(xdrs, &objp->count)) { + return (FALSE); + } + return (xdr_u_int(xdrs, &objp->totalcount)); +} + +static bool_t +xdr_b_readokres(XDR *xdrs, readokres *objp) +{ + if (!xdr_b_fattr(xdrs, &objp->attributes)) { + return (FALSE); + } + return (xdr_bytes(xdrs, (char **)&objp->data.data_val, + (uint_t *)&objp->data.data_len, NFS_MAXDATA)); +} + +bool_t +xdr_readres(XDR *xdrs, readres *objp) +{ + if (!xdr_b_nfsstat(xdrs, &objp->status)) { + return (FALSE); + } + if (objp->status == NFS_OK) { + return (xdr_b_readokres(xdrs, &objp->readres_u.reply)); + } + return (TRUE); +} + +static bool_t +xdr_b_nfscookie(XDR *xdrs, nfscookie objp) +{ + return (xdr_opaque(xdrs, objp, NFS_COOKIESIZE)); +} + +bool_t +xdr_readdirargs(XDR *xdrs, readdirargs *objp) +{ + if (!xdr_nfs_fh(xdrs, &objp->dir)) { + return (FALSE); + } + if (!xdr_b_nfscookie(xdrs, objp->cookie)) { + return (FALSE); + } + return (xdr_u_int(xdrs, &objp->count)); +} + +static bool_t +xdr_b_entry(XDR *xdrs, entry *objp) +{ + if (!xdr_u_int(xdrs, &objp->fileid)) { + return (FALSE); + } + if (!xdr_b_filename(xdrs, &objp->name)) { + return (FALSE); + } + if (!xdr_b_nfscookie(xdrs, objp->cookie)) { + return (FALSE); + } + return (xdr_pointer(xdrs, (char **)&objp->nextentry, sizeof (entry), + (xdrproc_t)xdr_b_entry)); +} + +static bool_t +xdr_b_dirlist(XDR *xdrs, dirlist *objp) +{ + if (!xdr_pointer(xdrs, (char **)&objp->entries, sizeof (entry), + (xdrproc_t)xdr_b_entry)) { + return (FALSE); + } + return (xdr_bool(xdrs, &objp->eof)); +} + +bool_t +xdr_readdirres(XDR *xdrs, readdirres *objp) +{ + if (!xdr_b_nfsstat(xdrs, &objp->status)) { + return (FALSE); + } + if (objp->status == NFS_OK) { + return (xdr_b_dirlist(xdrs, &objp->readdirres_u.reply)); + } + return (TRUE); +} diff --git a/usr/src/stand/lib/fs/nfs/nfsops.c b/usr/src/stand/lib/fs/nfs/nfsops.c new file mode 100644 index 0000000000..2adf856fc0 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/nfsops.c @@ -0,0 +1,467 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Simple nfs ops - open, close, read, and lseek. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <rpc/types.h> +#include <rpc/auth.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <sys/fcntl.h> +#include <sys/vfs.h> +#include <errno.h> +#include <sys/promif.h> +#include <rpc/xdr.h> +#include "nfs_inet.h" +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootdebug.h> +#include <sys/salib.h> +#include <sys/sacache.h> +#include <rpc/rpc.h> +#include "brpc.h" +#include <rpcsvc/nfs_prot.h> +#include "socket_inet.h" +#include "mac.h" +#include <sys/mode.h> + +ushort_t vttoif_tab[] = { + 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, + S_IFDOOR, 0, S_IFSOCK, 0 +}; + +static int file_desc = 1; +static struct nfs_files { + struct nfs_file file; + int desc; + struct nfs_files *next; +} nfs_files[1] = { + {0, 0, 0}, +}; + +#define dprintf if (boothowto & RB_DEBUG) printf + +static int boot_nfs_open(char *filename, int flags); +static int boot_nfs_close(int fd); +static ssize_t boot_nfs_read(int fd, caddr_t buf, size_t size); +static off_t boot_nfs_lseek(int, off_t, int); +static int boot_nfs_fstat(int fd, struct bootstat *stp); +static void boot_nfs_closeall(int flag); +static int boot_nfs_getdents(int fd, struct dirent *dep, unsigned size); + +struct boot_fs_ops boot_nfs_ops = { + "nfs", + boot_nfs_mountroot, + boot_nfs_unmountroot, + boot_nfs_open, + boot_nfs_close, + boot_nfs_read, + boot_nfs_lseek, + boot_nfs_fstat, + boot_nfs_closeall, + boot_nfs_getdents +}; + +/* + * bootops.c calls a closeall() function to close all open files. Since + * we only permit one open file at a time (not counting the device), this + * is simple to implement. + */ + +/*ARGSUSED*/ +static void +boot_nfs_closeall(int flag) +{ + struct nfs_files *filep; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_closeall(%x)\n", flag); +#endif + + /* delete any dynamically allocated entries */ + while ((filep = nfs_files->next) != NULL) { + nfs_files->next = filep->next; + bkmem_free((caddr_t)filep, sizeof (struct nfs_files)); + } + + /* clear the first, static file */ + bzero((caddr_t)nfs_files, sizeof (struct nfs_files)); + + /* Close device */ + release_cache(mac_get_dev()); + + mac_fini(); +} + +/* + * Get a file pointer given a file descriptor. Return 0 on error + */ +static struct nfs_files * +get_filep(int fd) +{ + struct nfs_files *filep; + + for (filep = nfs_files; filep; filep = filep->next) { + if (fd == filep->desc) + return (filep); + } + return (NULL); +} + +/* + * Unmount the root fs -- not supported for this fstype. + */ + +int +boot_nfs_unmountroot(void) +{ + return (-1); +} + +/* + * open a file for reading. Note: writing is NOT supported. + */ + +static int +boot_nfs_open(char *path, int flags) +{ + struct nfs_files *filep, *newfilep; + int got_filep; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_open(%s, %x)\n", path, flags); +#endif + + /* file can only be opened readonly. */ + if (flags & ~O_RDONLY) { + dprintf("boot_nfs_open: files can only be opened O_RDONLY.\n"); + return (-1); + } + + if (path == NULL || *path == '\0') { + dprintf("boot_nfs_open: NULL or EMPTY pathname argument.\n"); + return (-1); + } + + /* Try and find a vacant file pointer */ + filep = nfs_files; + got_filep = FALSE; + do { + if (filep->desc == 0) { + filep->desc = file_desc++; + got_filep = TRUE; + break; /* We've got a file pointer */ + } + /* Get next entry if not at end of list */ + if (filep->next) + filep = filep->next; + } while (filep->next); + + /* If a a vacant file pointer cannot be found, make one */ + if (!got_filep) { + if ((newfilep = (struct nfs_files *) + bkmem_zalloc(sizeof (struct nfs_files))) == 0) { + dprintf("open: Cannot allocate file pointer\n"); + return (-1); + } + filep->next = newfilep; + filep = newfilep; + filep->desc = file_desc++; + } + + if (lookup(path, &filep->file, FALSE) != 0) { +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_open(): Cannot open '%s'.\n", path); +#endif + /* zero file pointer */ + bzero((caddr_t)filep, sizeof (struct nfs_file)); + filep->desc = 0; + return (-1); + } + bzero(&filep->file.cookie, sizeof (filep->file.cookie)); + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_open(): '%s' successful, fd = 0x%x\n", + path, filep->desc); +#endif + return (filep->desc); +} + +/* + * close a previously opened file. + */ +static int +boot_nfs_close(int fd) +{ + struct nfs_files *filep; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_close(%d)\n", fd); +#endif + if ((filep = get_filep(fd)) == 0) + return (0); + + /* + * zero file pointer + */ + bzero((caddr_t)&filep->file, sizeof (struct nfs_file)); + + /* + * "close" the fd. + */ + filep->desc = 0; + + return (0); +} + +/* + * read from a file. + */ +static ssize_t +boot_nfs_read(int fd, char *buf, size_t size) +{ + struct nfs_files *filep; + int count = 0; + + if (fd == 0) { + dprintf("boot_nfs_read: Bad file number.\n"); + return (-1); + } + if (buf == NULL) { + dprintf("boot_nfs_read: Bad address.\n"); + return (-1); + } + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_read(%d, %x, 0x%x)\n", fd, buf, size); +#endif + + /* initialize for read */ + if ((filep = get_filep(fd)) == 0) + return (-1); + + switch (filep->file.version) { + case NFS_VERSION: + count = nfsread(&filep->file, buf, size); + break; + case NFS_V3: + count = nfs3read(&filep->file, buf, size); + break; + case NFS_V4: + count = nfs4read(&filep->file, buf, size); + break; + default: + printf("boot_nfs_read: NFS Version %d not supported\n", + filep->file.version); + count = -1; + break; + } + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_read(): 0x%x bytes.\n", count); +#endif + return (count); +} + +/* + * lseek - move read file pointer. + */ + +static off_t +boot_nfs_lseek(int fd, off_t offset, int whence) +{ + struct nfs_files *filep; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_lseek(%d, 0x%x, %d)\n", fd, offset, whence); +#endif + + if (fd == 0) { + dprintf("boot_nfs_lseek: Bad file number.\n"); + return (-1); + } + + if ((filep = get_filep(fd)) == 0) + return (-1); + + switch (whence) { + + case SEEK_SET: + /* + * file ptr is set to offset from beginning of file + */ + filep->file.offset = offset; + break; + case SEEK_CUR: + /* + * file ptr is set to offset from current position + */ + filep->file.offset += offset; + break; + case SEEK_END: + /* + * file ptr is set to current size of file plus offset. + * But since we only support reading, this is illegal. + */ + default: + /* + * invalid offset origin + */ + dprintf("boot_nfs_lseek: invalid whence value.\n"); + return (-1); + } + +#ifdef notyet + return (filep->file.offset); +#else + /* + * BROKE - lseek should return the offset seeked to on a + * successful seek, not zero - This must be fixed in the + * kernel before It can be fixed here. + */ + return (0); +#endif /* notyet */ +} + +/* + * This version of fstat supports mode, size, inode #, and times only. + * It can be enhanced if more is required, + */ + +static int +boot_nfs_fstat(int fd, struct bootstat *stp) +{ + struct vattr va; + struct nfs_files *filep; + int status; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) { + printf("boot_nfs_fstat(%d, 0x%x)\n", fd, stp); + } +#endif + if (fd == 0) { + dprintf("boot_nfs_fstat(): Bad file number 0.\n"); + return (-1); + } + + if ((filep = get_filep(fd)) == 0) + return (-1); + + bzero((char *)&va, sizeof (va)); + va.va_mask = AT_TYPE | AT_SIZE | AT_MODE | AT_NODEID | \ + AT_ATIME | AT_CTIME | AT_MTIME; + + switch (filep->file.version) { + case NFS_VERSION: + status = nfsgetattr(&filep->file, &va); + break; + case NFS_V3: + status = nfs3getattr(&filep->file, &va); + break; + case NFS_V4: + status = nfs4getattr(&filep->file, &va); + break; + default: + printf("boot_nfs_fstat: NFS Version %d not supported\n", + filep->file.version); + status = -1; + break; + } + + if (status != 0) + return (-1); + + if (va.va_size > (u_offset_t)MAXOFF_T) { + dprintf("boot_nfs_fstat(): File too large.\n"); + return (-1); + } + stp->st_size = (off_t)va.va_size; + stp->st_mode = VTTOIF(va.va_type) | va.va_mode; + stp->st_atim.tv_sec = va.va_atime.tv_sec; + stp->st_atim.tv_nsec = va.va_atime.tv_nsec; + stp->st_ctim.tv_sec = va.va_ctime.tv_sec; + stp->st_ctim.tv_nsec = va.va_ctime.tv_nsec; + stp->st_mtim.tv_sec = va.va_mtime.tv_sec; + stp->st_mtim.tv_nsec = va.va_mtime.tv_nsec; + stp->st_ino = (ino_t)va.va_nodeid; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_fstat(): done.\n"); +#endif + return (0); +} + +static int +boot_nfs_getdents(int fd, struct dirent *dep, unsigned size) +{ + struct nfs_files *filep; + int status; + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) { + printf("boot_nfs_getdents(%d, 0x%x, 0x%x)\n", fd, dep, size); + } +#endif + + if (fd == 0) { + dprintf("boot_nfs_getdents(): Bad file number 0.\n"); + return (-1); + } + + if ((filep = get_filep(fd)) == 0) + return (-1); + + switch (filep->file.version) { + case NFS_VERSION: + status = nfsgetdents(&filep->file, dep, size); + break; + case NFS_V3: + status = nfs3getdents(&filep->file, dep, size); + break; + default: + printf("boot_nfs_getdents: NFS Version %d not supported\n", + filep->file.version); + status = -1; + } + +#ifdef NFS_OPS_DEBUG + if ((boothowto & DBFLAGS) == DBFLAGS) + printf("boot_nfs_getdents(): done.\n"); +#endif + return (status); +} diff --git a/usr/src/stand/lib/fs/nfs/pathname.c b/usr/src/stand/lib/fs/nfs/pathname.c new file mode 100644 index 0000000000..e00a5bb942 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/pathname.c @@ -0,0 +1,177 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <pathname.h> +#include <sys/promif.h> +#include <sys/salib.h> +#include <sys/bootdebug.h> + +/* + * Pathname utilities. + * + * In translating file names we copy each argument file + * name into a pathname structure where we operate on it. + * Each pathname structure can hold MAXPATHLEN characters + * including a terminating null, and operations here support + * fetching strings from user space, getting the next character from + * a pathname, combining two pathnames (used in symbolic + * link processing), and peeling off the first component + * of a pathname. + */ + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * Setup contents of pathname structure. Warn about missing allocations. + * Structure itself is typically automatic + * variable in calling routine for convenience. + * + * NOTE: if buf is NULL, failure occurs. + */ +int +pn_alloc(struct pathname *pnp) +{ + if (pnp->pn_buf == NULL) + return (-1); + pnp->pn_path = (char *)pnp->pn_buf; + pnp->pn_pathlen = 0; + return (0); +} + +/* + * Pull a pathname from user user or kernel space + */ +int +pn_get(char *str, struct pathname *pnp) +{ + if (pn_alloc(pnp) != 0) + return (-1); + bcopy(str, pnp->pn_path, strlen(str)); + pnp->pn_pathlen = strlen(str); /* don't count null byte */ + return (0); +} + +/* + * Set pathname to argument string. + */ +int +pn_set(struct pathname *pnp, char *path) +{ + pnp->pn_path = pnp->pn_buf; + pnp->pn_pathlen = strlen(pnp->pn_path); /* don't count null byte */ + bcopy(pnp->pn_path, path, pnp->pn_pathlen); + return (0); +} + +/* + * Combine two argument pathnames by putting + * second argument before first in first's buffer, + * and freeing second argument. + * This isn't very general: it is designed specifically + * for symbolic link processing. + */ +int +pn_combine(struct pathname *pnp, struct pathname *sympnp) +{ + + if (pnp->pn_pathlen + sympnp->pn_pathlen >= MAXPATHLEN) + return (ENAMETOOLONG); + bcopy(pnp->pn_path, pnp->pn_buf + sympnp->pn_pathlen, + (uint_t)pnp->pn_pathlen); + bcopy(sympnp->pn_path, pnp->pn_buf, (uint_t)sympnp->pn_pathlen); + pnp->pn_pathlen += sympnp->pn_pathlen; + pnp->pn_buf[pnp->pn_pathlen] = '\0'; + pnp->pn_path = pnp->pn_buf; + return (0); +} + +/* + * Get next component off a pathname and leave in + * buffer comoponent which should have room for + * NFS_MAXNAMLEN (1024) bytes and a null terminator character. + * If PEEK is set in flags, just peek at the component, + * i.e., don't strip it out of pnp. + */ +int +pn_getcomponent(struct pathname *pnp, char *component, int flags) +{ + char *cp; + int l; + int n; + + cp = pnp->pn_path; + l = pnp->pn_pathlen; + n = 1024; + while ((l > 0) && (*cp != '/')) { + if (--n < 0) + return (ENAMETOOLONG); + *component++ = *cp++; + --l; + } + if (!(flags & PN_PEEK)) { + pnp->pn_path = cp; + pnp->pn_pathlen = l; + } + *component = 0; + return (0); +} + +/* + * skip over consecutive slashes in the pathname + */ +void +pn_skipslash(struct pathname *pnp) +{ + while ((pnp->pn_pathlen != 0) && (*pnp->pn_path == '/')) { + pnp->pn_path++; + pnp->pn_pathlen--; + } +} + +/* + * free pathname resources. This is a nop - the user of these + * routines is responsible for allocating and freeing their memory. + */ +/*ARGSUSED*/ +void +pn_free(struct pathname *pnp) +{ + /* nop */ + dprintf("pn_free(): you shouldn't be calling pn_free()!\n"); +} diff --git a/usr/src/stand/lib/fs/nfs/pathname.h b/usr/src/stand/lib/fs/nfs/pathname.h new file mode 100644 index 0000000000..35692cc748 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/pathname.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _PATHNAME_H +#define _PATHNAME_H + +#pragma ident "%Z%%M% %I% %E% SMI" /* from SunOS4.1 2.12 */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Pathname structure. + * System calls which operate on path names gather the + * pathname from system call into this structure and reduce + * it by peeling off translated components. If a symbolic + * link is encountered the new pathname to be translated + * is also assembled in this structure. + */ + +struct pathname { + char *pn_buf; /* underlying storage */ + char *pn_path; /* remaining pathname */ + uint_t pn_pathlen; /* remaining length */ +}; + +#define PN_STRIP 0x00 /* Strip next component off pn */ +#define PN_PEEK 0x01 /* Only peek at next pn component */ +#define pn_peekcomponent(PNP, COMP) pn_getcomponent(PNP, COMP, PN_PEEK) +#define pn_stripcomponent(PNP, COMP) pn_getcomponent(PNP, COMP, PN_STRIP) + +#define pn_peekchar(PNP) (((PNP)->pn_pathlen != 0) ? \ + *((PNP)->pn_path) : (char)0) +#define pn_pathleft(PNP) ((PNP)->pn_pathlen) +#define pn_getpath(PNP) ((PNP)->pn_path) +#define pn_copy(PNP1, PNP2) (pn_set(PNP2, pn_getpath(PNP1))) + +extern int pn_alloc(); /* allocat buffer for pathname */ +extern int pn_get(); /* allocate buf and copy path into it */ +#ifdef notneeded +extern int pn_getchar(); /* get next pathname char */ +#endif +extern int pn_set(); /* set pathname to string */ +extern int pn_combine(); /* combine to pathnames (for symlink) */ +extern int pn_getcomponent(); /* get next component of pathname */ +extern void pn_skipslash(); /* skip over slashes */ +extern void pn_free(); /* free pathname buffer */ +extern int pn_append(); /* Append string to pathname */ +extern int pn_getlast(); /* Get last component of pathname */ + +#ifdef __cplusplus +} +#endif + +#endif /* _PATHNAME_H */ diff --git a/usr/src/stand/lib/fs/nfs/pmap.c b/usr/src/stand/lib/fs/nfs/pmap.c new file mode 100644 index 0000000000..1fffd0ff2a --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/pmap.c @@ -0,0 +1,532 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This file contains the routines that maintain a linked list of known + * program to udp port mappings. There are three static members initialized + * by default, one for the portmapper itself (of course), one for rpcbind, + * and one for nfs. If a program number is not in the list, then routines + * in this file contact the portmapper on the server, and dynamically add + * new members to this list. + * + * This file also contains bpmap_rmtcall() - which lets one get the port + * number AND run the rpc call in one step. Only the server that successfully + * completes the rpc call will return a result. + * + * NOTE: Because we will end up caching the port entries we need + * before the kernel begins running, we can use dynamic allocation here. + * boot_memfree() calls bpmap_memfree() to free up any dynamically + * allocated entries when the boot program has finished its job. + */ + +#include <sys/types.h> +#include <rpc/types.h> +#include <sys/errno.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/if_ether.h> +#include <rpc/xdr.h> +#include <rpc/auth.h> +#include <sys/t_lock.h> +#include "clnt.h" +#include <rpc/pmap_prot.h> +#include <rpc/pmap_rmt.h> +#include <rpc/rpc.h> +#include "brpc.h" +#include "pmap.h" +#include "nfs_inet.h" +#include <rpcsvc/nfs_prot.h> +#include <rpc/rpcb_prot.h> +#include <sys/salib.h> +#include "socket_inet.h" +#include <sys/promif.h> +#include <sys/bootdebug.h> + +/* portmap structure */ +#define PMAP_STATIC (3) /* last statically allocated list entry */ +struct pmaplist pre_init[PMAP_STATIC + 1] = { + { {PMAPPROG, PMAPVERS, IPPROTO_UDP, PMAPPORT}, &pre_init[1] }, + /* SVR4 rpcbind listens to old portmapper port */ + { {RPCBPROG, RPCBVERS, IPPROTO_UDP, PMAPPORT}, &pre_init[2] }, + { {NFS_PROGRAM, NFS_VERSION, IPPROTO_UDP, NFS_PORT}, &pre_init[3] }, + { {NFS_PROGRAM, NFS_V3, IPPROTO_UDP, NFS_PORT}, NULL } +}; + +struct pmaplist *map_head = &pre_init[0]; +struct pmaplist *map_tail = &pre_init[PMAP_STATIC]; + +#define dprintf if (boothowto & RB_DEBUG) printf + +/* + * bpmap_addport: adds a new entry on to the end of the pmap cache. + * Items are kept in host order. + */ +static void +bpmap_addport(rpcprog_t prog, rpcvers_t vers, rpcport_t port) +{ + struct pmaplist *newp; + + /* allocate new pmaplist */ + newp = (struct pmaplist *)bkmem_alloc(sizeof (struct pmaplist)); + + if (newp == NULL) + return; /* not fatal here, we'll just throw out the entry */ + + newp->pml_map.pm_prog = prog; + newp->pml_map.pm_vers = vers; + newp->pml_map.pm_prot = (rpcprot_t)IPPROTO_UDP; + newp->pml_map.pm_port = port; + + map_tail->pml_next = newp; + newp->pml_next = NULL; + map_tail = newp; +} + +/* + * bpmap_delport: deletes an existing entry from the list. Caution - don't + * call this function to delete statically allocated entries. Why would + * you want to, anyway? Only IPPROTO_UDP is supported, of course. + */ +static void +bpmap_delport(rpcprog_t prog, rpcvers_t vers) +{ + struct pmaplist *tmp, *prev; + + prev = map_head; + for (tmp = map_head; tmp != NULL; tmp = tmp->pml_next) { + if ((tmp->pml_map.pm_prog == prog) && + (tmp->pml_map.pm_vers == vers)) { + if (tmp == map_head) + map_head = tmp->pml_next; /* new head */ + else if (tmp == map_tail) { + map_tail = prev; /* new tail */ + map_tail->pml_next = NULL; + } else { + /* internal delete */ + prev->pml_next = tmp->pml_next; + } +#ifdef DEBUG + printf("bpmap_delport: prog: %x, vers: %x\n", prog, + vers); +#endif /* DEBUG */ + bkmem_free((caddr_t)tmp, sizeof (struct pmaplist)); + break; + } else + prev = tmp; + } +} + +/* + * Modified strtol(3). + */ +static int +strtoi(char *str, char **ptr) +{ + int c, val; + + for (val = 0, c = *str++; c >= '0' && c <= '9'; c = *str++) { + val *= 10; + val += c - '0'; + } + *ptr = str; + return (val); +} + +/* + * (from dlboot_inet.c) (kernel) + * Convert a port number from a sockaddr_in expressed + * in universal address format. + */ +static int +uaddr2port(char *addr) +{ + int p1, p2; + char *next; + + /* + * A struct sockaddr_in expressed in universal address + * format looks like: + * + * "IP.IP.IP.IP.PORT[top byte].PORT[bot. byte]" + * + * Where each component expresses as a charactor, + * the corresponding part of the IP address + * and port number. + * Thus 127.0.0.1, port 2345 looks like: + * + * 49 50 55 46 48 46 48 46 49 46 57 46 52 49 + * 1 2 7 . 0 . 0 . 1 . 9 . 4 1 + * + * 2345 = 929base16 = 9.32+9 = 9.41 + */ + (void) strtoi(addr, &next); + (void) strtoi(next, &next); + (void) strtoi(next, &next); + (void) strtoi(next, &next); + p1 = strtoi(next, &next); + p2 = strtoi(next, &next); + + return ((p1 << 8) + p2); +} + +/* + * Xdr routines used for calling portmapper/rpcbind. + */ + +bool_t +xdr_pmap(XDR *xdrs, struct pmap *regs) +{ + if (xdr_rpcprog(xdrs, ®s->pm_prog) && + xdr_rpcvers(xdrs, ®s->pm_vers) && + xdr_rpcprot(xdrs, ®s->pm_prot)) + return (xdr_rpcprot(xdrs, ®s->pm_port)); + return (FALSE); +} + +bool_t +xdr_rpcb(XDR *xdrs, RPCB *objp) +{ + if (!xdr_rpcprog(xdrs, &objp->r_prog)) + return (FALSE); + if (!xdr_rpcvers(xdrs, &objp->r_vers)) + return (FALSE); + if (!xdr_string(xdrs, &objp->r_netid, ~0)) + return (FALSE); + if (!xdr_string(xdrs, &objp->r_addr, ~0)) + return (FALSE); + if (!xdr_string(xdrs, &objp->r_owner, ~0)) + return (FALSE); + return (TRUE); +} + +/* + * XDR remote call arguments + * written for XDR_ENCODE direction only + */ +bool_t +xdr_rmtcall_args(XDR *xdrs, struct rmtcallargs *cap) +{ + uint_t lenposition, argposition, position; + + if (xdr_rpcprog(xdrs, &(cap->prog)) && + xdr_rpcvers(xdrs, &(cap->vers)) && + xdr_rpcproc(xdrs, &(cap->proc))) { + lenposition = XDR_GETPOS(xdrs); + if (!xdr_u_int(xdrs, &(cap->arglen))) + return (FALSE); + argposition = XDR_GETPOS(xdrs); + if (!(*(cap->xdr_args))(xdrs, cap->args_ptr)) + return (FALSE); + position = XDR_GETPOS(xdrs); + cap->arglen = position - argposition; + XDR_SETPOS(xdrs, lenposition); + if (!xdr_u_int(xdrs, &(cap->arglen))) + return (FALSE); + XDR_SETPOS(xdrs, position); + return (TRUE); + } + return (FALSE); +} + +/* + * XDR remote call results + * written for XDR_DECODE direction only + */ +bool_t +xdr_rmtcallres(XDR *xdrs, struct rmtcallres *crp) +{ + caddr_t port_ptr; + + port_ptr = (caddr_t)crp->port_ptr; + if (xdr_reference(xdrs, &port_ptr, sizeof (uint_t), xdr_u_int) && + xdr_u_int(xdrs, &crp->resultslen)) { + crp->port_ptr = (rpcport_t *)port_ptr; + return ((*(crp->xdr_results))(xdrs, crp->results_ptr)); + } + return (FALSE); +} + +/* + * XDR remote call arguments + * written for XDR_ENCODE direction only + */ +bool_t +xdr_rpcb_rmtcallargs(XDR *xdrs, struct rpcb_rmtcallargs *objp) +{ + uint_t lenposition, argposition, position; + + if (!xdr_rpcprog(xdrs, &objp->prog)) + return (FALSE); + if (!xdr_rpcvers(xdrs, &objp->vers)) + return (FALSE); + if (!xdr_rpcproc(xdrs, &objp->proc)) + return (FALSE); + /* + * All the jugglery for just getting the size of the arguments + */ + lenposition = XDR_GETPOS(xdrs); + if (!xdr_u_int(xdrs, &(objp->arglen))) + return (FALSE); + argposition = XDR_GETPOS(xdrs); + if (!(*(objp->xdr_args))(xdrs, objp->args_ptr)) + return (FALSE); + position = XDR_GETPOS(xdrs); + objp->arglen = position - argposition; + XDR_SETPOS(xdrs, lenposition); + if (!xdr_u_int(xdrs, &(objp->arglen))) + return (FALSE); + XDR_SETPOS(xdrs, position); + return (TRUE); +} + +/* + * XDR remote call results + * written for XDR_DECODE direction only + */ +bool_t +xdr_rpcb_rmtcallres(XDR *xdrs, struct rpcb_rmtcallres *objp) +{ + if (!xdr_string(xdrs, &objp->addr_ptr, ~0)) + return (FALSE); + if (!xdr_u_int(xdrs, &objp->resultslen)) + return (FALSE); + return ((*(objp->xdr_results))(xdrs, objp->results_ptr)); +} + +/* + * bpmap_rmtcall: does PMAPPROC_CALLIT broadcasts w/ rpc_call requests. + * Lets one do a PMAPGETPORT/RPC PROC call in one easy step. sockaddr_in args + * are taken as network order. + * + * Code adapted from bpmap_rmtcall() in dlboot_inet.c (kernel) + */ +/*ARGSUSED*/ +enum clnt_stat +bpmap_rmtcall( + rpcprog_t prog, /* rpc program number to call. */ + rpcvers_t vers, /* rpc program version */ + rpcproc_t proc, /* rpc procedure to call */ + xdrproc_t in_xdr, /* routine to serialize arguments */ + caddr_t args, /* arg vector for remote call */ + xdrproc_t out_xdr, /* routine to deserialize results */ + caddr_t ret, /* addr of buf to place results in */ + int rexmit, /* retransmission interval (secs) */ + int wait, /* how long (secs) to wait for a resp */ + struct sockaddr_in *to, /* destination */ + struct sockaddr_in *from, /* filled in w/ responder's port/addr */ + uint_t auth) /* type of authentication wanted. */ +{ + enum clnt_stat status; /* rpc_call status */ + rpcport_t port = 0; /* returned port # */ + struct rmtcallargs pmap_a; /* args for pmap call */ + struct rmtcallres pmap_r; /* results from pmap call */ + struct rpcb_rmtcallargs rpcb_a; /* args for rpcb call */ + struct rpcb_rmtcallres rpcb_r; /* results from rpcb call */ + char ua[UA_SIZE]; /* universal addr buffer */ + + /* initialize pmap */ + pmap_a.prog = prog; + pmap_a.vers = vers; + pmap_a.proc = proc; + pmap_a.args_ptr = args; + pmap_a.xdr_args = in_xdr; + pmap_r.port_ptr = &port; + pmap_r.results_ptr = ret; + pmap_r.xdr_results = out_xdr; + + status = brpc_call((rpcprog_t)PMAPPROG, (rpcvers_t)PMAPVERS, + (rpcproc_t)PMAPPROC_CALLIT, xdr_rmtcall_args, (caddr_t)&pmap_a, + xdr_rmtcallres, (caddr_t)&pmap_r, rexmit, wait, to, from, + AUTH_NONE); + if (status != RPC_PROGUNAVAIL) { + if (status == RPC_SUCCESS) { + /* delete old port mapping, if it exists */ + bpmap_delport(prog, vers); + + /* save the new port mapping */ + bpmap_addport(prog, vers, port); + } + return (status); + } + + /* + * PMAP is unavailable. Maybe there's a SVR4 machine, with rpcbind. + */ + bzero(ua, sizeof (ua)); + + /* initialize rpcb */ + rpcb_a.prog = prog; + rpcb_a.vers = vers; + rpcb_a.proc = proc; + rpcb_a.args_ptr = args; + rpcb_a.xdr_args = in_xdr; + rpcb_r.addr_ptr = ua; + rpcb_r.results_ptr = ret; + rpcb_r.xdr_results = out_xdr; + + status = brpc_call((rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS, + (rpcproc_t)RPCBPROC_CALLIT, xdr_rpcb_rmtcallargs, (caddr_t)&rpcb_a, + xdr_rpcb_rmtcallres, (caddr_t)&rpcb_r, rexmit, wait, to, from, + AUTH_NONE); + if (status == RPC_SUCCESS) { + /* delete old port mapping, if it exists */ + bpmap_delport(prog, vers); + + /* save the new port mapping */ + port = ntohs(uaddr2port(ua)); + bpmap_addport(prog, vers, port); + } + return (status); +} + +/* + * bpmap_getport: Queries current list of cached pmap_list entries, + * returns the port number of the entry found. If the port number + * is not cached, then getport makes a rpc call first to the portmapper, + * and then to rpcbind (SVR4) if the portmapper does not respond. The + * returned port is then added to the cache, and the port number is + * returned. If both portmapper and rpc bind fail to give us the necessary + * port, we return 0 to signal we hit an error, and set rpc_stat to + * the appropriate RPC error code. Only IPPROTO_UDP protocol is supported. + * + * Port and sockaddr_in arguments taken in network order. rpcport_t is returned + * in host order. + */ +rpcport_t +bpmap_getport(rpcprog_t prog, rpcvers_t vers, enum clnt_stat *rpc_stat, + struct sockaddr_in *to, struct sockaddr_in *from) +{ + struct pmaplist *walk; + struct pmap pmap_send; /* portmap */ + in_port_t pmap_port; + rpcport_t dport; + +#ifdef DEBUG + printf("bpmap_getport: called with: prog: %d, vers: %d\n", prog, vers); +#endif /* DEBUG */ + for (walk = map_head; walk != 0; walk = walk->pml_next) { + if ((walk->pml_map.pm_prog == prog) && + (walk->pml_map.pm_vers == vers) && + (walk->pml_map.pm_prot == (rpcprot_t)IPPROTO_UDP)) { +#ifdef DEBUG + printf("bpmap_getport: Found in cache. returning: %d\n", + walk->pml_map.pm_port); +#endif /* DEBUG */ + return (walk->pml_map.pm_port); + } + } + + /* + * Not in the cache. First try the portmapper (SunOS server?) and + * if that fails, try rpcbind (SVR4 server). + */ + pmap_send.pm_prog = prog; + pmap_send.pm_vers = vers; + pmap_send.pm_prot = (rpcprot_t)IPPROTO_UDP; + pmap_send.pm_port = 0; /* what we're after */ + + *rpc_stat = brpc_call(PMAPPROG, PMAPVERS, PMAPPROC_GETPORT, + xdr_pmap, (caddr_t)&pmap_send, xdr_u_short, + (caddr_t)&pmap_port, 0, 0, to, from, AUTH_NONE); + + if (*rpc_stat == RPC_PROGUNAVAIL) { + /* + * The portmapper isn't available. Try rpcbind. + * Maybe the server is a SVR4 server. + */ + char *ua; /* universal address */ + char ua_buf[UA_SIZE]; /* and its buffer */ + RPCB rpcb_send; + + rpcb_send.r_prog = prog; + rpcb_send.r_vers = vers; + rpcb_send.r_netid = NULL; + rpcb_send.r_addr = NULL; + rpcb_send.r_owner = NULL; + + bzero(ua_buf, UA_SIZE); + ua = ua_buf; + + /* + * Again, default # of retries. xdr_wrapstring() + * wants a char **. + */ + *rpc_stat = brpc_call(RPCBPROG, RPCBVERS, RPCBPROC_GETADDR, + xdr_rpcb, (caddr_t)&rpcb_send, xdr_wrapstring, + (char *)&ua, 0, 0, to, from, AUTH_NONE); + + if (*rpc_stat == RPC_SUCCESS) { + if (ua[0] != '\0') + dport = ntohs(uaddr2port(ua)); + else + return (0); /* Address unknown */ + } + } else { + /* + * Why are rpcport_t's uint32_t? port numbers are uint16_t + * for ipv4 AND ipv6.... XXXX + */ + dport = (rpcport_t)pmap_port; + } + + if (*rpc_stat != RPC_SUCCESS) { + dprintf("pmap_getport: Failed getting port.\n"); + return (0); /* we failed. */ + } + +#ifdef DEBUG + printf("bpmap_getport: prog: %d, vers: %d; returning port: %d.\n", + prog, vers, dport); +#endif /* DEBUG */ + + bpmap_addport(prog, vers, dport); + + return (dport); +} + +/* + * bpmap_memfree: frees up any dynamically allocated entries. + */ +void +bpmap_memfree(void) +{ + struct pmaplist *current, *tmp; + + if (map_tail == &pre_init[PMAP_STATIC]) + return; /* no dynamic entries */ + + /* free from head of the list to the tail. */ + current = pre_init[PMAP_STATIC].pml_next; + while (current != NULL) { + tmp = current->pml_next; + bkmem_free((caddr_t)current, sizeof (struct pmaplist)); + current = tmp; + } +} diff --git a/usr/src/stand/lib/fs/nfs/pmap.h b/usr/src/stand/lib/fs/nfs/pmap.h new file mode 100644 index 0000000000..2932d99f88 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/pmap.h @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _PMAP_H +#define _PMAP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define UA_SIZE (128) /* max space needed for an universal addr */ + +extern enum clnt_stat bpmap_rmtcall(rpcprog_t, rpcvers_t, rpcproc_t, xdrproc_t, + caddr_t, xdrproc_t, caddr_t, int, int, struct sockaddr_in *, + struct sockaddr_in *, uint_t); +extern rpcport_t bpmap_getport(rpcprog_t, rpcvers_t, enum clnt_stat *, + struct sockaddr_in *, struct sockaddr_in *); +extern void bpmap_memfree(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _PMAP_H */ diff --git a/usr/src/stand/lib/fs/nfs/rpc.c b/usr/src/stand/lib/fs/nfs/rpc.c new file mode 100644 index 0000000000..55b76fe154 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/rpc.c @@ -0,0 +1,482 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * This file contains a simple implementation of RPC. Standard XDR is + * used. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/sysmacros.h> +#include <rpc/types.h> +#include <errno.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include "socket_inet.h" +#include "ipv4.h" +#include <rpc/xdr.h> +#include <rpc/auth.h> +#include <rpc/auth_sys.h> +#include <rpc/rpc_msg.h> +#include <sys/t_lock.h> +#include <netdb.h> +#include "clnt.h" +#include <rpc/rpc.h> +#include "brpc.h" +#include "auth_inet.h" +#include "pmap.h" +#include <sys/promif.h> +#include "nfs_inet.h" +#include <rpcsvc/nfs_prot.h> +#include <rpc/auth_unix.h> +#include <sys/salib.h> +#include "mac.h" +#include <sys/bootdebug.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +static struct in_addr cached_destination; + +void +rpc_disperr(struct rpc_err *stat) +{ + if (boothowto & RB_DEBUG) { + switch (stat->re_status) { + case RPC_CANTENCODEARGS: + printf("RPC: Can't encode arguments.\n"); + break; + case RPC_CANTDECODERES: + printf("RPC: Can't decode result.\n"); + break; + case RPC_CANTSEND: + printf("RPC: Unable to send (%s).\n", + strerror(errno)); + break; + case RPC_CANTRECV: + printf("RPC: Unable to receive (%s).\n", + strerror(errno)); + break; + case RPC_TIMEDOUT: + printf("RPC: Timed out.\n"); + break; + case RPC_VERSMISMATCH: + printf("RPC: Incompatible versions of RPC.\n"); + break; + case RPC_AUTHERROR: + printf("RPC: Authentication error:\n"); + switch (stat->re_why) { + case AUTH_BADCRED: + printf("remote: bogus credentials " + "(seal broken).\n"); + break; + case AUTH_REJECTEDCRED: + printf("remote: client should begin new " + "session.\n"); + break; + case AUTH_BADVERF: + printf("remote: bogus verifier " + "(seal broken).\n"); + break; + case AUTH_REJECTEDVERF: + printf("remote: verifier expired or was " + "replayed.\n"); + break; + case AUTH_TOOWEAK: + printf("remote: rejected due to security " + "reasons.\n"); + break; + case AUTH_INVALIDRESP: + printf("local: bogus response verifier.\n"); + break; + case AUTH_FAILED: + /* FALLTHRU */ + default: + printf("local: unknown error.\n"); + break; + } + break; + case RPC_PROGUNAVAIL: + printf("RPC: Program unavailable.\n"); + break; + case RPC_PROGVERSMISMATCH: + printf("RPC: Program/version mismatch.\n"); + break; + case RPC_PROCUNAVAIL: + printf("RPC: Procedure unavailable.\n"); + break; + case RPC_CANTDECODEARGS: + printf("RPC: Server can't decode arguments.\n"); + break; + case RPC_SYSTEMERROR: + printf("RPC: Remote system error.\n"); + break; + case RPC_UNKNOWNHOST: + printf("RPC: Unknown host.\n"); + break; + case RPC_UNKNOWNPROTO: + printf("RPC: Unknown protocol.\n"); + break; + case RPC_PMAPFAILURE: + printf("RPC: Port mapper failure.\n"); + break; + case RPC_PROGNOTREGISTERED: + printf("RPC: Program not registered.\n"); + break; + case RPC_FAILED: + printf("RPC: Failed (unspecified error).\n"); + break; + default: + printf("RPC: (unknown error code).\n"); + break; + } + } +} + +/* + * rpc_hdr: sets the fields in the rpc msg header. + * + * Returns: TRUE on success, FALSE if failure. + */ +/*ARGSUSED*/ +static bool_t +rpc_hdr(XDR *xdrs, uint_t xid, rpcprog_t prog, rpcvers_t vers, rpcproc_t proc) +{ + struct rpc_msg call_msg; + + /* setup header */ + call_msg.rm_xid = xid; + call_msg.rm_direction = CALL; + call_msg.rm_call.cb_rpcvers = (rpcvers_t)RPC_MSG_VERSION; + call_msg.rm_call.cb_prog = prog; + call_msg.rm_call.cb_vers = vers; + + /* xdr the header. */ + if (xdr_callhdr(xdrs, &call_msg) == FALSE) + return (FALSE); + else + return (TRUE); +} + +/* + * our version of brpc_call(). We cache in portnumber in to->sin_port for + * your convenience. to and from addresses are taken and received in network + * order. + */ +enum clnt_stat +brpc_call( + rpcprog_t prog, /* rpc program number to call. */ + rpcvers_t vers, /* rpc program version */ + rpcproc_t proc, /* rpc procedure to call */ + xdrproc_t in_xdr, /* routine to serialize arguments */ + caddr_t args, /* arg vector for remote call */ + xdrproc_t out_xdr, /* routine to deserialize results */ + caddr_t ret, /* addr of buf to place results in */ + int rexmit, /* retransmission interval (secs) */ + int wait_time, /* how long (secs) to wait (resp) */ + struct sockaddr_in *to, /* destination */ + struct sockaddr_in *from_who, /* responder's port/address */ + uint_t auth) /* type of auth wanted. */ +{ + int s; + char hostname[MAXHOSTNAMELEN]; + struct sockaddr_in from; /* us. */ + socklen_t from_len; + XDR xmit_xdrs, rcv_xdrs; /* xdr memory */ + AUTH *xmit_auth; /* our chosen auth cookie */ + gid_t fake_gids = 1; /* fake gids list for auth_unix */ + caddr_t trm_msg, rcv_msg; /* outgoing/incoming rpc mesgs */ + struct rpc_msg reply; /* our reply msg header */ + int trm_len, rcv_len; + struct rpc_err rpc_error; /* to store RPC errors in on rcv. */ + static uint_t xid; /* current xid */ + uint_t xmit_len; /* How much of the buffer we used */ + int nrefreshes = 2; /* # of times to refresh cred */ + int flags = 0; /* send flags */ + uint_t xdelay; + int errors, preserve_errno; + uint32_t timeout; + socklen_t optlen; + + xmit_auth = NULL; + + trm_len = mac_get_mtu(); + trm_msg = bkmem_alloc(trm_len); + rcv_msg = bkmem_alloc(NFSBUF_SIZE); + + if (trm_msg == NULL || rcv_msg == NULL) { + errno = ENOMEM; + rpc_error.re_status = RPC_CANTSEND; + goto gt_error; + } + + if ((s = socket(PF_INET, SOCK_DGRAM, 0)) < 0) { + rpc_error.re_status = RPC_CANTSEND; + goto gt_error; + } + + if (dontroute) { + (void) setsockopt(s, SOL_SOCKET, SO_DONTROUTE, + (const void *)&dontroute, sizeof (dontroute)); + } + + if (to->sin_addr.s_addr == cached_destination.s_addr) { + optlen = sizeof (timeout); + (void) getsockopt(s, SOL_SOCKET, SO_RCVTIMEO, (void *)&timeout, + &optlen); + } else { + cached_destination.s_addr = htonl(INADDR_ANY); + } + + /* Bind our endpoint. */ + from.sin_family = AF_INET; + ipv4_getipaddr(&from.sin_addr); + from.sin_addr.s_addr = htonl(from.sin_addr.s_addr); + from.sin_port = get_source_port(B_TRUE); + + if (bind(s, (struct sockaddr *)&from, sizeof (from)) < 0) { + rpc_error.re_status = RPC_CANTSEND; + goto gt_error; + } + + bzero((caddr_t)&rpc_error, sizeof (struct rpc_err)); + + /* initialize reply's rpc_msg struct, so we can decode later. */ + reply.acpted_rply.ar_verf = _null_auth; /* struct copy */ + reply.acpted_rply.ar_results.where = ret; + reply.acpted_rply.ar_results.proc = out_xdr; + + if (ntohs(to->sin_port) == 0) { + /* snag the udp port we need. */ + if ((to->sin_port = (in_port_t)bpmap_getport(prog, vers, + &(rpc_error.re_status), to, NULL)) == 0) + goto gt_error; + to->sin_port = htons(to->sin_port); + } + + /* generate xid - increment */ + if (xid == 0) + xid = (uint_t)(prom_gettime() / 1000) + 1; + else + xid++; + + /* set up outgoing pkt as xdr modified. */ + xdrmem_create(&xmit_xdrs, trm_msg, trm_len, XDR_ENCODE); + + /* setup rpc header */ + if (rpc_hdr(&xmit_xdrs, xid, prog, vers, proc) != TRUE) { + dprintf("brpc_call: cannot setup rpc header.\n"); + rpc_error.re_status = RPC_FAILED; + goto gt_error; + } + + /* setup authentication */ + switch (auth) { + case AUTH_NONE: + xmit_auth = authnone_create(); + break; + case AUTH_UNIX: + /* + * Assumes we've configured the stack and thus know our + * IP address/hostname, either by using DHCP or rarp/bootparams. + */ + gethostname(hostname, sizeof (hostname)); + xmit_auth = authunix_create(hostname, 0, 1, 1, &fake_gids); + break; + default: + dprintf("brpc_call: Unsupported authentication type: %d\n", + auth); + rpc_error.re_status = RPC_AUTHERROR; + goto gt_error; + /*NOTREACHED*/ + } + + /* + * rpc_hdr puts everything in the xmit buffer for the header + * EXCEPT the proc. Put it, and our authentication info into + * it now, serializing as we go. We will be at the place where + * we left off. + */ + xmit_xdrs.x_op = XDR_ENCODE; + if ((XDR_PUTINT32(&xmit_xdrs, (int32_t *)&proc) == FALSE) || + (AUTH_MARSHALL(xmit_auth, &xmit_xdrs, NULL) == FALSE) || + ((*in_xdr)(&xmit_xdrs, args) == FALSE)) { + rpc_error.re_status = RPC_CANTENCODEARGS; + goto gt_error; + } else + xmit_len = (int)XDR_GETPOS(&xmit_xdrs); /* for sendto */ + + /* + * Right now the outgoing packet should be all serialized and + * ready to go... Set up timers. + */ + + xdelay = (rexmit == 0) ? RPC_REXMIT_MSEC : (rexmit * 1000); + (void) setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, (void *)&xdelay, + sizeof (xdelay)); + wait_time = (wait_time == 0) ? RPC_RCVWAIT_MSEC : (wait_time * 1000); + + wait_time += prom_gettime(); + + /* + * send out the request. The first item in the receive buffer will + * be the xid. Check if it is correct. + */ + errors = 0; + rpc_error.re_status = RPC_TIMEDOUT; + do { + if (sendto(s, trm_msg, xmit_len, flags, (struct sockaddr *)to, + sizeof (struct sockaddr_in)) < 0) { + rpc_error.re_status = RPC_CANTSEND; + goto gt_error; + } + + from_len = sizeof (struct sockaddr_in); + while ((rcv_len = recvfrom(s, rcv_msg, NFSBUF_SIZE, + MSG_DONTWAIT, (struct sockaddr *)from_who, + &from_len)) > 0 || errors < RPC_ALLOWABLE_ERRORS) { + if (rcv_len < 0) { + if (errno == EWOULDBLOCK || + errno == ETIMEDOUT) { + break; /* timeout */ + } + rpc_error.re_status = RPC_CANTRECV; + goto gt_error; + } + if (ntohl(*((uint32_t *)(rcv_msg))) != xid) { + dprintf("brpc_call: xid: 0x%x != 0x%x\n", + *(uint32_t *)(rcv_msg), xid); + continue; + } + /* + * Let's deserialize the data into our 'ret' buffer. + */ + xdrmem_create(&rcv_xdrs, rcv_msg, rcv_len, XDR_DECODE); + if (xdr_replymsg(&rcv_xdrs, &reply) == FALSE) { + rpc_error.re_status = RPC_CANTDECODERES; + goto gt_error; + } + _seterr_reply(&reply, &rpc_error); + switch (rpc_error.re_status) { + case RPC_SUCCESS: + /* + * XXX - validate for unix and none + * always return true. + */ + if (AUTH_VALIDATE(xmit_auth, + &reply.acpted_rply.ar_verf) == FALSE) { + rpc_error.re_status = RPC_AUTHERROR; + rpc_error.re_why = AUTH_INVALIDRESP; + errors++; + } + if (reply.acpted_rply.ar_verf.oa_base != + 0) { + xmit_xdrs.x_op = XDR_FREE; + (void) xdr_opaque_auth( + &xmit_xdrs, + &reply.acpted_rply.ar_verf); + } + break; + + case RPC_AUTHERROR: + /* + * Let's see if our credentials need + * refreshing + */ + if (nrefreshes > 0 && AUTH_REFRESH(xmit_auth, + NULL, NULL)) { + nrefreshes--; + } + errors++; + break; + + case RPC_PROCUNAVAIL: + /* + * Might be a silly portmapper implementation + * erroneously responding to our rpc broadcast + * indirect portmapper call. For this + * particular case, we don't increment the + * error counter because we want to keep + * sifting for successful replies... + */ + if (to->sin_addr.s_addr != + ntohl(INADDR_BROADCAST)) + errors++; + break; + + case RPC_PROGVERSMISMATCH: + /* + * Successfully talked to server, but they + * don't speak our lingo. + */ + goto gt_error; + + default: + /* Just keep trying till there's no data... */ + errors++; + break; + } + + if (rpc_error.re_status != RPC_SUCCESS) { + dprintf("brpc_call: from: %s, error: ", + inet_ntoa(from_who->sin_addr)); + rpc_disperr(&rpc_error); + } else + break; + } + + /* + * If we're having trouble reassembling datagrams, let the + * application know ASAP so that it can take the appropriate + * actions. + */ + + } while (rpc_error.re_status != RPC_SUCCESS && errno != ETIMEDOUT && + prom_gettime() < wait_time); + +gt_error: + if (xmit_auth != NULL) + AUTH_DESTROY(xmit_auth); + + if (trm_msg != NULL) + bkmem_free(trm_msg, trm_len); + if (rcv_msg != NULL) + bkmem_free(rcv_msg, NFSBUF_SIZE); + + if (rpc_error.re_status != RPC_SUCCESS) + rpc_disperr(&rpc_error); + + /* + * socket calls reset errno. Since we want to hold onto the errno + * value if it is ETIMEDOUT to communicate to our caller that this + * RPC_TIMEDOUT situation is due to a stack problem (we're getting + * a reply, but the stack simply can't assemble it.), we need to + * preserve errno's value over the socket_close(). + */ + preserve_errno = (errno == ETIMEDOUT) ? errno : 0; + (void) socket_close(s); + errno = preserve_errno; + + return (rpc_error.re_status); +} diff --git a/usr/src/stand/lib/fs/nfs/xdr_rec.c b/usr/src/stand/lib/fs/nfs/xdr_rec.c new file mode 100644 index 0000000000..64f1b8d6a2 --- /dev/null +++ b/usr/src/stand/lib/fs/nfs/xdr_rec.c @@ -0,0 +1,596 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * xdr_rec.c, Implements TCP/IP based XDR streams with a "record marking" + * layer above tcp (for rpc's use). + * + * These routines interface XDRSTREAMS to a tcp/ip connection. + * There is a record marking layer between the xdr stream + * and the tcp transport level. A record is composed on one or more + * record fragments. A record fragment is a thirty-two bit header followed + * by n bytes of data, where n is contained in the header. The header + * is represented as a htonl(u_long). The high order bit encodes + * whether or not the fragment is the last fragment of the record + * (1 => fragment is last, 0 => more fragments to follow. + * The other 31 bits encode the byte length of the fragment. + */ + +#include <rpc/types.h> +#include <rpc/xdr.h> +#include <netinet/in.h> +#include <sys/promif.h> +#include <sys/salib.h> +#include <sys/bootdebug.h> + +#define dprintf if (boothowto & RB_DEBUG) printf + +extern long lseek(); + +static bool_t xdrrec_getint32(); +static bool_t xdrrec_putint32(); +static bool_t xdrrec_getbytes(); +static bool_t xdrrec_putbytes(); +static uint_t xdrrec_getpos(); +static bool_t xdrrec_setpos(); +static int32_t *xdrrec_inline(); +static void xdrrec_destroy(); + +static struct xdr_ops *xdrrec_ops(); +static bool_t flush_out(); +static bool_t fill_input_buf(); +static bool_t get_input_bytes(); +static bool_t set_input_fragment(); +static bool_t skip_input_bytes(); +static uint_t fix_buf_size(); + +/* + * A record is composed of one or more record fragments. + * A record fragment is a four-byte header followed by zero to + * 2**32-1 bytes. The header is treated as a long unsigned and is + * encode/decoded to the network via htonl/ntohl. The low order 31 bits + * are a byte count of the fragment. The highest order bit is a boolean: + * 1 => this fragment is the last fragment of the record, + * 0 => this fragment is followed by more fragment(s). + * + * The fragment/record machinery is not general; it is constructed to + * meet the needs of xdr and rpc based on tcp. + */ +#define LAST_FRAG 0x80000000 + +typedef struct rec_strm { + caddr_t tcp_handle; + caddr_t the_buffer; + /* + * out-goung bits + */ + int (*writeit)(); + caddr_t out_base; /* output buffer (points to frag header) */ + caddr_t out_finger; /* next output position */ + caddr_t out_boundry; /* data cannot up to this address */ + uint32_t *frag_header; /* beginning of current fragment */ + bool_t frag_sent; /* true if buffer sent in middle of record */ + /* + * in-coming bits + */ + int (*readit)(); + uint32_t in_size; /* fixed size of the input buffer */ + caddr_t in_base; + caddr_t in_finger; /* location of next byte to be had */ + caddr_t in_boundry; /* can read up to this location */ + int fbtbc; /* fragment bytes to be consumed */ + bool_t last_frag; + uint_t sendsize; + uint_t recvsize; +} RECSTREAM; + + +/* + * Create an xdr handle for xdrrec + * xdrrec_create fills in xdrs. Sendsize and recvsize are + * send and recv buffer sizes (0 => use default). + * tcp_handle is an opaque handle that is passed as the first parameter to + * the procedures readit and writeit. Readit and writeit are read and + * write respectively. They are like the system + * calls expect that they take an opaque handle rather than an fd. + */ +void +xdrrec_create(XDR *xdrs, uint_t sendsize, uint_t recvsize, caddr_t tcp_handle, + int (*readit)(), int (*writeit)()) +{ + RECSTREAM *rstrm = (RECSTREAM *)mem_alloc(sizeof (RECSTREAM)); + if (rstrm == NULL) { + dprintf("xdrrec_create: out of memory\n"); + /* + * This is bad. Should rework xdrrec_create to + * return a handle, and in this case return NULL + */ + return; + } + /* + * adjust sizes and allocate buffer quad byte aligned + */ + rstrm->sendsize = sendsize = fix_buf_size(sendsize); + rstrm->recvsize = recvsize = fix_buf_size(recvsize); + rstrm->the_buffer = mem_alloc(sendsize + recvsize + BYTES_PER_XDR_UNIT); + if (rstrm->the_buffer == NULL) { + dprintf("xdrrec_create: out of memory\n"); + return; + } + for (rstrm->out_base = rstrm->the_buffer; + (uint_t)rstrm->out_base % BYTES_PER_XDR_UNIT != 0; + rstrm->out_base++); + rstrm->in_base = rstrm->out_base + sendsize; + /* + * now the rest ... + */ + xdrs->x_ops = xdrrec_ops(); + xdrs->x_private = (caddr_t)rstrm; + rstrm->tcp_handle = tcp_handle; + rstrm->readit = readit; + rstrm->writeit = writeit; + rstrm->out_finger = rstrm->out_boundry = rstrm->out_base; + rstrm->frag_header = (uint32_t *)rstrm->out_base; + rstrm->out_finger += sizeof (uint_t); + rstrm->out_boundry += sendsize; + rstrm->frag_sent = FALSE; + rstrm->in_size = recvsize; + rstrm->in_boundry = rstrm->in_base; + rstrm->in_finger = (rstrm->in_boundry += recvsize); + rstrm->fbtbc = 0; + rstrm->last_frag = TRUE; + +} + + +/* + * The routines defined below are the xdr ops which will go into the + * xdr handle filled in by xdrrec_create. + */ + +static bool_t +xdrrec_getint32(XDR *xdrs, int32_t *ip) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + int32_t *bufip = (int32_t *)(rstrm->in_finger); + int32_t myint; + + /* first try the inline, fast case */ + if ((rstrm->fbtbc >= sizeof (int32_t)) && + (((int)rstrm->in_boundry - (int)bufip) >= sizeof (int32_t))) { + *ip = (int32_t)ntohl((uint32_t)(*bufip)); + rstrm->fbtbc -= sizeof (int32_t); + rstrm->in_finger += sizeof (int32_t); + } else { + if (!xdrrec_getbytes(xdrs, (caddr_t)&myint, sizeof (int32_t))) + return (FALSE); + *ip = (int32_t)ntohl((uint32_t)myint); + } + return (TRUE); +} + +static bool_t +xdrrec_putint32(XDR *xdrs, int32_t *ip) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + int32_t *dest_ip = ((int32_t *)(rstrm->out_finger)); + + if ((rstrm->out_finger += sizeof (int32_t)) > rstrm->out_boundry) { + /* + * this case should almost never happen so the code is + * inefficient + */ + rstrm->out_finger -= sizeof (int32_t); + rstrm->frag_sent = TRUE; + if (! flush_out(rstrm, FALSE)) + return (FALSE); + dest_ip = ((int32_t *)(rstrm->out_finger)); + rstrm->out_finger += sizeof (int32_t); + } + *dest_ip = (int32_t)htonl((uint32_t)(*ip)); + return (TRUE); +} + +/* + * We need to be a little smarter here because we don't want to induce any + * pathological behavior in inetboot's networking stack. The algorithm we + * pursue is to try to consume the entire fragment exactly instead of + * blindly requesting the max to fill the input buffer. + */ +static bool_t /* must manage buffers, fragments, and records */ +xdrrec_getbytes(XDR *xdrs, caddr_t addr, int32_t len) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + int current; + int frag_len; + + while (len > 0) { + current = frag_len = rstrm->fbtbc; + if (current == 0) { + if (rstrm->last_frag) + return (FALSE); + if (!set_input_fragment(rstrm)) + return (FALSE); + continue; + } + + current = (len < current) ? len : current; + if (!get_input_bytes(rstrm, addr, frag_len, current)) + return (FALSE); + addr += current; + rstrm->fbtbc -= current; + len -= current; + } + return (TRUE); +} + +static bool_t +xdrrec_putbytes(XDR *xdrs, caddr_t addr, int32_t len) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + int current; + + while (len > 0) { + current = (uint_t)rstrm->out_boundry - + (uint_t)rstrm->out_finger; + current = (len < current) ? len : current; + bcopy(addr, rstrm->out_finger, current); + rstrm->out_finger += current; + addr += current; + len -= current; + if (rstrm->out_finger == rstrm->out_boundry) { + rstrm->frag_sent = TRUE; + if (! flush_out(rstrm, FALSE)) + return (FALSE); + } + } + return (TRUE); +} + +static uint_t +xdrrec_getpos(XDR *xdrs) +{ + RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private; + int32_t pos; + + pos = lseek((int)rstrm->tcp_handle, (int32_t)0, 1); + if (pos != -1) + switch (xdrs->x_op) { + + case XDR_ENCODE: + pos += rstrm->out_finger - rstrm->out_base; + break; + + case XDR_DECODE: + pos -= rstrm->in_boundry - rstrm->in_finger; + break; + + default: + pos = (uint_t)-1; + break; + } + return ((uint_t)pos); +} + +static bool_t +xdrrec_setpos(XDR *xdrs, uint_t pos) +{ + RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private; + uint_t currpos = xdrrec_getpos(xdrs); + int delta = currpos - pos; + caddr_t newpos; + + if ((int)currpos != -1) + switch (xdrs->x_op) { + + case XDR_ENCODE: + newpos = rstrm->out_finger - delta; + if ((newpos > (caddr_t)(rstrm->frag_header)) && + (newpos < rstrm->out_boundry)) { + rstrm->out_finger = newpos; + return (TRUE); + } + break; + + case XDR_DECODE: + newpos = rstrm->in_finger - delta; + if ((delta < (int)(rstrm->fbtbc)) && + (newpos <= rstrm->in_boundry) && + (newpos >= rstrm->in_base)) { + rstrm->in_finger = newpos; + rstrm->fbtbc -= delta; + return (TRUE); + } + break; + } + return (FALSE); +} + +static int32_t * +xdrrec_inline(XDR *xdrs, int len) +{ + RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private; + int32_t *buf = NULL; + + switch (xdrs->x_op) { + + case XDR_ENCODE: + if ((rstrm->out_finger + len) <= rstrm->out_boundry) { + buf = (int32_t *)rstrm->out_finger; + rstrm->out_finger += len; + } + break; + + case XDR_DECODE: + if ((len <= rstrm->fbtbc) && + ((rstrm->in_finger + len) <= rstrm->in_boundry)) { + buf = (int32_t *)rstrm->in_finger; + rstrm->fbtbc -= len; + rstrm->in_finger += len; + } + break; + } + return (buf); +} + +static void +xdrrec_destroy(XDR *xdrs) +{ + RECSTREAM *rstrm = (RECSTREAM *)xdrs->x_private; + + mem_free(rstrm->the_buffer, + rstrm->sendsize + rstrm->recvsize + BYTES_PER_XDR_UNIT); + mem_free((caddr_t)rstrm, sizeof (RECSTREAM)); +} + + +/* + * Exported routines to manage xdr records + */ + +/* + * Before reading (deserializing from the stream, one should always call + * this procedure to guarantee proper record alignment. + */ +bool_t +xdrrec_skiprecord(XDR *xdrs) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + + while (rstrm->fbtbc > 0 || (! rstrm->last_frag)) { + if (! skip_input_bytes(rstrm, rstrm->fbtbc)) + return (FALSE); + rstrm->fbtbc = 0; + if ((! rstrm->last_frag) && (! set_input_fragment(rstrm))) + return (FALSE); + } + rstrm->last_frag = FALSE; + return (TRUE); +} + +#ifdef notneeded +/* + * Look ahead fuction. + * Returns TRUE iff there is no more input in the buffer + * after consuming the rest of the current record. + */ +bool_t +xdrrec_eof(XDR *xdrs) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + + while (rstrm->fbtbc > 0 || (! rstrm->last_frag)) { + if (! skip_input_bytes(rstrm, rstrm->fbtbc)) + return (TRUE); + rstrm->fbtbc = 0; + if ((! rstrm->last_frag) && (! set_input_fragment(rstrm))) + return (TRUE); + } + if (rstrm->in_finger == rstrm->in_boundry) + return (TRUE); + return (FALSE); +} +#endif /* notneeded */ + +/* + * The client must tell the package when an end-of-record has occurred. + * The second paraemters tells whether the record should be flushed to the + * (output) tcp stream. (This let's the package support batched or + * pipelined procedure calls.) TRUE => immmediate flush to tcp connection. + */ +bool_t +xdrrec_endofrecord(XDR *xdrs, bool_t sendnow) +{ + RECSTREAM *rstrm = (RECSTREAM *)(xdrs->x_private); + uint32_t len; /* fragment length */ + + if (sendnow || rstrm->frag_sent || + ((uint32_t)rstrm->out_finger + sizeof (uint32_t) >= + (uint32_t)rstrm->out_boundry)) { + rstrm->frag_sent = FALSE; + return (flush_out(rstrm, TRUE)); + } + len = (uint32_t)(rstrm->out_finger) - (uint32_t)(rstrm->frag_header) - + sizeof (uint32_t); + *(rstrm->frag_header) = htonl((uint32_t)len | LAST_FRAG); + rstrm->frag_header = (uint32_t *)rstrm->out_finger; + rstrm->out_finger += sizeof (uint32_t); + return (TRUE); +} + + +/* + * Internal useful routines + */ +static bool_t +flush_out(RECSTREAM *rstrm, bool_t eor) +{ + uint32_t eormask = (eor == TRUE) ? LAST_FRAG : 0; + uint32_t len = (uint32_t)(rstrm->out_finger) - + (uint32_t)(rstrm->frag_header) - sizeof (uint32_t); + + *(rstrm->frag_header) = htonl(len | eormask); + len = (uint32_t)(rstrm->out_finger) - (uint32_t)(rstrm->out_base); + if ((*(rstrm->writeit))(rstrm->tcp_handle, rstrm->out_base, (int)len) + != (int)len) + return (FALSE); + + rstrm->frag_header = (uint32_t *)rstrm->out_base; + rstrm->out_finger = (caddr_t)rstrm->out_base + sizeof (uint32_t); + return (TRUE); +} + +static bool_t /* knows nothing about records! Only about input buffers */ +fill_input_buf(RECSTREAM *rstrm, int frag_len) +{ + caddr_t where; + uint_t i; + int len; + + where = rstrm->in_base; + i = (uint_t)rstrm->in_boundry % BYTES_PER_XDR_UNIT; + where += i; + len = (frag_len < (rstrm->in_size - i)) ? frag_len : + rstrm->in_size - i; +#ifdef DEBUG + printf("fill_input_buf: len = %d\n", len); +#endif + if ((len = (*(rstrm->readit))(rstrm->tcp_handle, where, len)) == -1) + return (FALSE); + rstrm->in_finger = where; + where += len; + rstrm->in_boundry = where; + return (TRUE); +} + +static bool_t +get_input_bytes(RECSTREAM *rstrm, caddr_t addr, int frag_len, int len) +{ + int current; + + while (len > 0) { + current = (int)rstrm->in_boundry - (int)rstrm->in_finger; +#ifdef DEBUG + printf("get_input_bytes: len = %d, frag_len = %d, current %d\n", + len, frag_len, current); +#endif + /* + * set_input_bytes doesn't know how large the fragment is, we + * need to get the header so just grab a header's size worth + */ + if (frag_len == 0) + frag_len = len; + + if (current == 0) { + if (! fill_input_buf(rstrm, frag_len)) + return (FALSE); + continue; + } + + current = (len < current) ? len : current; + bcopy(rstrm->in_finger, addr, current); + rstrm->in_finger += current; + addr += current; + len -= current; + } + return (TRUE); +} + +static bool_t /* next four bytes of the input stream are treated as a header */ +set_input_fragment(RECSTREAM *rstrm) +{ + uint32_t header; + + if (! get_input_bytes(rstrm, (caddr_t)&header, 0, sizeof (header))) + return (FALSE); + header = (uint32_t)ntohl(header); + rstrm->last_frag = ((header & LAST_FRAG) == 0) ? FALSE : TRUE; + rstrm->fbtbc = header & (~LAST_FRAG); +#ifdef DEBUG + printf("set_input_fragment: frag_len = %d, last frag = %s\n", + rstrm->fbtbc, rstrm->last_frag ? "TRUE" : "FALSE"); +#endif + return (TRUE); +} + +static bool_t /* consumes input bytes; knows nothing about records! */ +skip_input_bytes(RECSTREAM *rstrm, int32_t cnt) +{ + int current; +#ifdef DEBUG + printf("skip_input_fragment: cnt = %d\n", cnt); +#endif + while (cnt > 0) { + current = (int)rstrm->in_boundry - (int)rstrm->in_finger; + if (current == 0) { + if (! fill_input_buf(rstrm, cnt)) + return (FALSE); + continue; + } + current = (cnt < current) ? cnt : current; + rstrm->in_finger += current; + cnt -= current; + } + return (TRUE); +} + +static uint_t +fix_buf_size(uint_t s) +{ + + if (s < 100) + s = 4000; + return (RNDUP(s)); +} + +static struct xdr_ops * +xdrrec_ops() +{ + static struct xdr_ops ops; + + if (ops.x_getint32 == NULL) { + ops.x_getint32 = xdrrec_getint32; + ops.x_putint32 = xdrrec_putint32; + ops.x_getbytes = xdrrec_getbytes; + ops.x_putbytes = xdrrec_putbytes; + ops.x_getpostn = xdrrec_getpos; + ops.x_setpostn = xdrrec_setpos; + ops.x_inline = xdrrec_inline; + ops.x_destroy = xdrrec_destroy; + } + + return (&ops); +} diff --git a/usr/src/stand/lib/fs/req.flg b/usr/src/stand/lib/fs/req.flg new file mode 100644 index 0000000000..23af479db3 --- /dev/null +++ b/usr/src/stand/lib/fs/req.flg @@ -0,0 +1,29 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +echo_file usr/src/stand/lib/fs/Makefile.com diff --git a/usr/src/stand/lib/fs/ufs/Makefile b/usr/src/stand/lib/fs/ufs/Makefile new file mode 100644 index 0000000000..d873a77b62 --- /dev/null +++ b/usr/src/stand/lib/fs/ufs/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = libufs.a +OBJECTS = ufsops.o lufsboot.o + +include ../Makefile.com + +include ../../Makefile.targ diff --git a/usr/src/stand/lib/fs/ufs/llib-lufs b/usr/src/stand/lib/fs/ufs/llib-lufs new file mode 100644 index 0000000000..2a5be182e9 --- /dev/null +++ b/usr/src/stand/lib/fs/ufs/llib-lufs @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + +#include <sys/bootufs.h> diff --git a/usr/src/stand/lib/fs/ufs/lufsboot.c b/usr/src/stand/lib/fs/ufs/lufsboot.c new file mode 100644 index 0000000000..548e1bcb7e --- /dev/null +++ b/usr/src/stand/lib/fs/ufs/lufsboot.c @@ -0,0 +1,1099 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/fs/ufs_fsdir.h> +#include <sys/fs/ufs_fs.h> +#include <sys/fs/ufs_inode.h> +#include <sys/fs/ufs_log.h> +#include <sys/sysmacros.h> +#include <sys/promif.h> +#include <sys/machparam.h> + +#include <sys/stat.h> +#include <sys/bootdebug.h> +#include <sys/salib.h> +#include <sys/saio.h> +#include <sys/filep.h> + + +/* + * Big theory statement on how ufsboot makes use of the log + * in case the filesystem wasn't shut down cleanly. + * + * The structure of the ufs on-disk log looks like this: + * + * +-----------------+ + * | SUPERBLOCK | + * | ... | + * | fs_logbno +--> +-----------------------+ + * | ... | | EXTENT BLOCK | + * +-----------------+ | ... | + * | nextents | + * +----------------------+ extents[0].pbno | + * | | { extents[1].pbno } +------------+ + * | | ... +--> ... | + * | +-----------------------+ | + * v | + * +-----------------------------+ \ | + * | ON-DISK LOG HEADER | | | + * | ... | | | + * | od_head_lof +--+ | | + * | ... | | | | + * +-----------------------------+ <|---|- od_bol_lof | + * | sector (may contain deltas) | | | (logical offset) | + * | +-------------------------+ | | | + * | | trailer (some ident#) | | > extents[0].nbno | + * +---+-------------------------+ | | blocks ("sectors") | + * . . | | | + * . . | | | + * +-----------------------------+<-+ | | + * | delta1 delta2 delta3 | | | + * | d +-------------------------+ | | + * | e | ident#: od_head_ident | | | + * +---+-------------------------+ / | + * | + * +-----------------------------+ <---------------------------+ + * | lta4 delta5 delta6 de | + * | l +-------------------------+ + * | t | ident#: od_head_ident+1 | + * +---+-------------------------+ + * . . + * +-----------------------------+ + * | sector (may contain deltas) | + * | +------------------+ + * | | trailer (ident#) | + * +----------+------------------+ <-- od_eol_lof (logical offset) + * + * The ufs on-disk log has the following properties: + * + * 1. The log is made up from at least one extent. "fs_logbno" in + * the superblock points to where this is found. + * 2. Extents describe the logical layout. + * - Logical offset 0 is the on-disk log header. It's also + * at the beginning of the first physical block. + * - If there's more than one extent, the equation holds: + * extent[i+1].lbno == extent[i].lbno + extent[i].nbno + * i.e. logical offsets form a contiguous sequence. Yet on disk, + * two logically-adjacent offsets may be located in two + * physically disjoint extents, so logical offsets need to be + * translated into physical disk block addresses for access. + * - Various fields in the on-disk log header structure refer + * to such logical log offsets. + * 3. The actual logical logspace begins after the log header, at + * the logical offset indicated by "od_bol_lof". Every 512 Bytes + * (a "sector" in terms of ufs logging) is a sector trailer which + * contains a sequence number, the sector ident. + * 4. Deltas are packed tight in the remaining space, i.e. a delta + * may be part of more than one sector. Reads from the logspace + * must be split at sector boundaries, since the trailer is never + * part of a delta. Delta sizes vary. + * 5. The field "od_head_lof" points to the start of the dirty part + * of the log, i.e. to the first delta header. Likewise, "od_head_ident" + * is the sequence number where the valid part of the log starts; if + * the sector pointed to by "od_head_lof" has a sector ident different + * from "od_head_ident", the log is empty. + * 6. The valid part of the log extends for as many sectors as their ident + * numbers form a contiguous sequence. When reaching the logical end of + * the log, "od_bol_lof", logical offsets wrap around to "od_bol_lof", + * i.e. the log forms a circular buffer. + * + * For the strategy how to handle accessing the log, item 4. is the + * most important one - its consequence is that the log can only be + * read in one direction - forward, starting at the head. + * + * The task of identifying whether a given metadata block is + * actually in the log therefore requires reading the entire + * log. Doing so is memory-efficient but kills speed if re-done + * at every metadata read (64MB log size vs. 512 byte metadata + * block size: 128 times as much I/O, possibly only to find out + * that this block was not in the log ...). + * + * First thought to speed this up is to let ufsboot roll the log. + * But this is not possible because: + * - ufsboot currently does not implement any write functionality, + * the boot-time ufs implementation is read-only. + * - firmware write interfaces may or may not be available, in any + * case, they're rarely used and untested for such a purpose. + * - that would duplicate a lot of code, since at the moment only + * kernel ufs logging implements log rolling. + * - the boot environment cannot be considered high-performance; + * rolling the log there would be slow. + * - boot device and root device could well be different, creating + * inconsistencies e.g. with a mirrored root if the log is rolled. + * + * Therefore, caching the log structural information (boot-relevant + * deltas and their logical log offset) is required for fast access + * to the data in the log. This code builds a logmap for that purpose. + * + * As a simple optimization, if we find the log is empty, we will not + * use it - log reader support for ufsboot has no noticeable overhead + * for clean logs, or for root filesystems that aren't logging. + */ + +#define LB_HASHSHIFT 13 +#define LB_HASHSIZE (1 << LB_HASHSHIFT) +#define LB_HASHFUNC(mof) (((mof) >> LB_HASHSHIFT) & (LB_HASHSIZE - 1)) + +#define LOGBUF_MAXSIZE (8*1024*1024) +#define LOGBUF_MINSIZE (256*1024) + +#define LOG_IS_EMPTY 0 +#define LOG_IS_OK 1 +#define LOG_IS_ERRORED 2 + +/* + * We build a hashed logmap of those while scanning the log. + * sizeof(lb_map_t) is 40 on 64bit, 32 on 32bit; the max sized + * resalloc'ed buffer can accomodate around ~500k of those; + * this is approximately the maximum amount of deltas we'll + * see if a 64MB ufs log is completely filled. We'll make no + * attempt to free and reallocate the resalloc'ed buffer if + * we overflow, as conservative sizing should make that an + * impossibility. A future enhancement may allocate memory + * here as needed - once the boot time memory allocator + * supports that. + */ +typedef struct lb_mapentry { + struct lb_mapentry *l_next; /* hash chaining */ + struct lb_mapentry *l_prev; /* hash chaining */ + int64_t l_mof; /* disk addr this delta is against */ + int16_t l_nb; /* size of delta */ + int16_t l_flags; + int32_t l_lof; /* log offset for delta header */ + int32_t l_tid; /* transaction this delta is part of */ + delta_t l_typ; /* see <sys/fs/ufs_trans.h> */ +} lb_me_t; + +#define LB_ISCANCELLED 1 + +#define inslist(lh, l) if ((*(lh))) { \ + (*(lh))->l_prev->l_next = (l); \ + (l)->l_next = (*(lh)); \ + (l)->l_prev = (*(lh))->l_prev; \ + (*(lh))->l_prev = (l); \ + } else { \ + (l)->l_next = (l); \ + (l)->l_prev = (l); \ + (*(lh)) = l; \ + } + +#define remlist(lh, l) \ + if ((l)->l_next == (l)) { \ + if (*(lh) != (l) || (l)->l_prev != (l)) \ + dprintf("Logmap hash inconsistency.\n"); \ + *(lh) = (lb_me_t *)NULL; \ + } else { \ + if (*(lh) == (l)) \ + *(lh) = (l)->l_next; \ + (l)->l_prev->l_next = (l)->l_next; \ + (l)->l_next->l_prev = (l)->l_prev; \ + } + +#define lufs_alloc_me() \ + (lb_me_t *)lufs_alloc_from_logbuf(sizeof (lb_me_t)) + +extern int boothowto; +static int ufs_is_lufs = 0; +static fileid_t *logfp = (fileid_t *)NULL; +static extent_block_t *eb = (extent_block_t *)NULL; +static ml_odunit_t odi; + +#ifndef i386 +static char logbuffer_min[LOGBUF_MINSIZE]; +#endif +static caddr_t logbuffer = (caddr_t)NULL; +static caddr_t elogbuffer = (caddr_t)NULL; +static caddr_t logbuf_curptr; +static lb_me_t **loghash = (lb_me_t **)NULL; +static lb_me_t *lfreelist; + +static uint32_t curtid; + + +int lufs_support = 1; + +void lufs_boot_init(fileid_t *); +void lufs_closeall(void); +void lufs_merge_deltas(fileid_t *); + +static int lufs_logscan(void); + +extern int diskread(fileid_t *filep); +extern caddr_t resalloc(enum RESOURCES, size_t, caddr_t, int); + +#if defined(i386) +#define LOGBUF_BASEADDR ((caddr_t)(KERNEL_TEXT - LOGBUF_MAXSIZE)) +#elif defined(__sparcv9) +#define LOGBUF_BASEADDR ((caddr_t)(SYSBASE - LOGBUF_MAXSIZE)) +#endif + +static int +lufs_alloc_logbuf(void) +{ + /* + * Allocate memory for caching the log. Since the logbuffer can + * potentially exceed the boot scratch memory limit, we use resalloc + * directly, passing the allocation to the low-level boot-time + * backend allocator. The chosen VA range is the top end of + * the kernel's segmap segment, so we're not interfering + * with the kernel because segmap is created at a time when + * the 2nd-stage boot has already been unloaded and this VA + * range was given back. + * + * On sparc platforms, the kernel cannot recover the memory + * obtained from resalloc because the page structs are allocated + * before the call to BOP_QUIESCE. To avoid leaking this + * memory, the logbuffer is allocated from a small bss array + * that should hold the logmap except in the most extreme cases. + * If the bss array is too small, the logbuffer is extended + * from resalloc 1 page at a time. + */ + +#ifdef i386 + logbuffer = resalloc(RES_CHILDVIRT, LOGBUF_MAXSIZE, + LOGBUF_BASEADDR, 0UL); + elogbuffer = logbuffer+LOGBUF_MAXSIZE; +#else + logbuffer = logbuffer_min; + elogbuffer = logbuffer+LOGBUF_MINSIZE; +#endif + logbuf_curptr = logbuffer; + lfreelist = (lb_me_t *)NULL; + + if (logbuffer == (caddr_t)NULL) + return (0); + + dprintf("Buffer for boot loader logging support: 0x%p, size 0x%x\n", + logbuffer, elogbuffer-logbuffer); + + return (1); +} + +static void +lufs_free_logbuf() +{ + /* + * Solaris/x86 has no prom_free() routine at this time. + * Reclaiming the VA range below KERNEL_TEXT on Solaris/x86 + * is done by the kernel startup itself, in hat_unload_prom() + * after the bootloader has been quiesced. + * + * Solaris on sparc has a prom_free() routine that will update + * the memlist properties to reflect the freeing of the + * logbuffer. However, the sparc kernel cannot recover + * the memory freed after the call to BOP_QUIESCE as the + * page struct have already been allocated. We call + * prom_free anyway so that the kernel can reclaim this + * memory in the future. + */ +#ifndef i386 + if (logbuffer == LOGBUF_BASEADDR) + prom_free(logbuffer, elogbuffer-logbuffer); +#endif + logbuffer = (caddr_t)NULL; +} + +static caddr_t +lufs_alloc_from_logbuf(size_t sz) +{ + caddr_t tmpaddr; + lb_me_t *l; + + /* + * Satisfy lb_me_t allocations from the freelist + * first if possible. + */ + if ((sz == sizeof (lb_me_t)) && lfreelist) { + l = lfreelist; + lfreelist = lfreelist->l_next; + return ((caddr_t)l); + } + if (elogbuffer < logbuf_curptr + sz) { +#ifdef i386 + return ((caddr_t)NULL); +#else + caddr_t np; + size_t nsz; + + /* + * Out of space in current chunk - try to add another. + */ + if (logbuffer == logbuffer_min) { + np = LOGBUF_BASEADDR; + } else { + np = elogbuffer; + } + nsz = roundup(sz, PAGESIZE); + if (np + nsz > LOGBUF_BASEADDR + LOGBUF_MAXSIZE) { + return ((caddr_t)NULL); + } + + np = resalloc(RES_CHILDVIRT, nsz, np, 0UL); + if (np == (caddr_t)NULL) { + return ((caddr_t)NULL); + } + if (logbuffer == logbuffer_min) + logbuffer = LOGBUF_BASEADDR; + logbuf_curptr = np; + elogbuffer = logbuf_curptr + nsz; +#endif + } + + tmpaddr = logbuf_curptr; + logbuf_curptr += sz; + bzero(tmpaddr, sz); + return (tmpaddr); +} + +static int32_t +lufs_read_log(int32_t addr, caddr_t va, int nb) +{ + int i, fastpath = 0; + daddr_t pblk, lblk; + sect_trailer_t *st; + uint32_t ident; + + /* + * Fast path for skipping the read if no target buffer + * is specified. Don't do this for the initial scan. + */ + if (ufs_is_lufs && (va == (caddr_t)NULL)) + fastpath = 1; + + while (nb) { + /* log wraparound check */ + if (addr == odi.od_eol_lof) + addr = odi.od_bol_lof; + if (fastpath) + goto read_done; + + /* + * Translate logically-contiguous log offsets into physical + * block numbers. For a log consisting of a single extent: + * pbno = btodb(addr) - extents[0].lbno; + * Otherwise, search for the extent which contains addr. + */ + pblk = 0; + lblk = btodb(addr); + for (i = 0; i < eb->nextents; i++) { + if (lblk >= eb->extents[i].lbno && + lblk < eb->extents[i].lbno + + eb->extents[i].nbno) { + pblk = lblk - eb->extents[i].lbno + + eb->extents[i].pbno; + break; + } + } + + if (pblk == 0) { + /* + * block #0 can never be in a log extent since this + * block always contains the primary superblock copy. + */ + dprintf("No log extent found for log offset 0x%llx.\n", + addr); + return (0); + } + + /* + * Check whether the block we want is cached from the last + * read. If not, read it in now. + */ + if (logfp->fi_blocknum != pblk) { + logfp->fi_blocknum = pblk; + logfp->fi_memp = logfp->fi_buf; + logfp->fi_count = DEV_BSIZE; + logfp->fi_offset = 0; + if (diskread(logfp)) { + dprintf("I/O error reading the ufs log" \ + " at block 0x%x.\n", + logfp->fi_blocknum); + return (0); + } + /* + * Log structure verification. The block which we just + * read has an ident number that must match its offset + * in blocks from the head of the log. Since the log + * can wrap around, we have to check for that to get the + * ident right. Out-of-sequence idents can happen after + * power failures, panics during a partial transaction, + * media errors, ... - in any case, they mark the end of + * the valid part of the log. + */ + st = (sect_trailer_t *)(logfp->fi_memp + + LDL_USABLE_BSIZE); + /* od_head_ident is where the sequence starts */ + ident = odi.od_head_ident; + if (lblk >= lbtodb(odi.od_head_lof)) { + /* no wraparound */ + ident += (lblk - lbtodb(odi.od_head_lof)); + } else { + /* log wrapped around the end */ + ident += (lbtodb(odi.od_eol_lof) - + lbtodb(odi.od_head_lof)); + ident += (lblk - lbtodb(odi.od_bol_lof)); + } + + if (ident != st->st_ident) + return (0); + } +read_done: + /* + * Copy the delta contents to the destination buffer if + * one was specified. Otherwise, just skip the contents. + */ + i = MIN(NB_LEFT_IN_SECTOR(addr), nb); + if (va != NULL) { + bcopy(logfp->fi_buf + (addr - ldbtob(lbtodb(addr))), + va, i); + va += i; + } + nb -= i; + addr += i; + /* + * Skip sector trailer if necessary. + */ + if (NB_LEFT_IN_SECTOR(addr) == 0) + addr += sizeof (sect_trailer_t); + } + return (addr); +} + +void +lufs_boot_init(fileid_t *filep) +{ + struct fs *sb = (struct fs *)filep->fi_memp; + int err = 0; + + /* + * boot_ufs_mountroot() should have called us with a + * filep pointing to the superblock. Verify that this + * is so first. + * Then check whether this filesystem has a dirty log. + * Also return if lufs support was disabled on request. + */ + if (!lufs_support || + sb != (struct fs *)&filep->fi_devp->un_fs.di_fs || + sb->fs_clean != FSLOG || sb->fs_logbno == NULL) { + return; + } + + if (boothowto & RB_VERBOSE) + printf("The boot filesystem is logging.\n"); + + /* + * The filesystem is logging, there is a log area + * allocated for it. Check the log state and determine + * whether it'll be possible to use this log. + */ + + /* + * Allocate a private fileid_t for use when reading + * from the log. + */ + eb = (extent_block_t *)bkmem_zalloc(sb->fs_bsize); + logfp = (fileid_t *)bkmem_zalloc(sizeof (fileid_t)); + logfp->fi_memp = logfp->fi_buf; + logfp->fi_devp = filep->fi_devp; + + /* + * Read the extent block and verify that what we + * find there are actually lufs extents. + * Make it simple: the extent block including all + * extents cannot be larger than a filesystem block. + * So read a whole filesystem block, to make sure + * we have read all extents in the same operation. + */ + logfp->fi_blocknum = sb->fs_logbno; + logfp->fi_count = sb->fs_bsize; + logfp->fi_memp = (caddr_t)eb; + logfp->fi_offset = 0; + if (diskread(logfp) || eb->type != LUFS_EXTENTS) { + dprintf("Failed to read log extent block.\n"); + err = LOG_IS_ERRORED; + goto out; + } + + /* + * Read the on disk log header. If that fails, + * try the backup copy on the adjacent block. + */ + logfp->fi_blocknum = eb->extents[0].pbno; + logfp->fi_count = sizeof (ml_odunit_t); + logfp->fi_memp = (caddr_t)&odi; + logfp->fi_offset = 0; + if (diskread(logfp)) { + logfp->fi_blocknum = eb->extents[0].pbno + 1; + logfp->fi_count = sizeof (ml_odunit_t); + logfp->fi_memp = (caddr_t)&odi; + logfp->fi_offset = 0; + if (diskread(logfp)) { + dprintf("Failed to read on-disk log header.\n"); + err = LOG_IS_ERRORED; + goto out; + } + } + + /* + * Verify that we understand this log, and + * that the log isn't bad or empty. + */ + if (odi.od_version != LUFS_VERSION_LATEST) { + dprintf("On-disk log format v%d != supported format v%d.\n", + odi.od_version, LUFS_VERSION_LATEST); + err = LOG_IS_ERRORED; + } else if (odi.od_badlog) { + dprintf("On-disk log is marked bad.\n"); + err = LOG_IS_ERRORED; + } else if (odi.od_chksum != odi.od_head_ident + odi.od_tail_ident) { + dprintf("On-disk log checksum %d != ident sum %d.\n", + odi.od_chksum, odi.od_head_ident + odi.od_tail_ident); + err = LOG_IS_ERRORED; + } else { + /* + * All consistency checks ok. Scan the log, build the + * log hash. If this succeeds we'll be using the log + * when reading from this filesystem. + */ + err = lufs_logscan(); + } +out: + ufs_is_lufs = 1; + switch (err) { + case LOG_IS_EMPTY: + if (boothowto & RB_VERBOSE) + printf("The ufs log is empty and will not be used.\n"); + lufs_closeall(); + break; + case LOG_IS_OK: + if (boothowto & RB_VERBOSE) + printf("Using the ufs log.\n"); + break; + case LOG_IS_ERRORED: + if (boothowto & RB_VERBOSE) + printf("Couldn't build log hash. Can't use ufs log.\n"); + lufs_closeall(); + break; + default: + dprintf("Invalid error %d while scanning the ufs log.\n", err); + break; + } +} + +static int +lufs_logscan_read(int32_t *addr, struct delta *d) +{ + *addr = lufs_read_log(*addr, (caddr_t)d, sizeof (struct delta)); + + if (*addr == 0 || + d->d_typ < DT_NONE || d->d_typ > DT_MAX || + d->d_nb >= odi.od_logsize) + return (0); + + return (1); +} + +static int +lufs_logscan_skip(int32_t *addr, struct delta *d) +{ + switch (d->d_typ) { + case DT_COMMIT: + /* + * A DT_COMMIT delta has no size as such, but will + * always "fill up" the sector that contains it. + * The next delta header is found at the beginning + * of the next 512-Bytes sector, adjust "addr" to + * reflect that. + */ + *addr += ((*addr & (DEV_BSIZE - 1))) ? + NB_LEFT_IN_SECTOR(*addr) + + sizeof (sect_trailer_t) : 0; + return (1); + case DT_CANCEL: + case DT_ABZERO: + /* + * These types of deltas occupy no space in the log + */ + return (1); + default: + /* + * Skip over the delta contents. + */ + *addr = lufs_read_log(*addr, NULL, d->d_nb); + } + + return (*addr != NULL); +} + +static void +lufs_logscan_freecancel(void) +{ + lb_me_t **lh, *l, *lnext; + int i; + + /* + * Walk the entire log hash and put cancelled entries + * onto the freelist. Corner cases: + * a) empty hash chain (*lh == NULL) + * b) only one entry in chain, and that is cancelled. + * If for every cancelled delta another one would've + * been added, this situation couldn't occur, but a + * DT_CANCEL delta can lead to this as it is never + * added. + */ + for (i = 0; i < LB_HASHSIZE; i++) { + lh = &loghash[i]; + l = *lh; + do { + if (*lh == (lb_me_t *)NULL) + break; + lnext = l->l_next; + if (l->l_flags & LB_ISCANCELLED) { + remlist(lh, l); + bzero((caddr_t)l, sizeof (lb_me_t)); + l->l_next = lfreelist; + lfreelist = l; + /* + * Just removed the hash head. In order not + * to terminate the while loop, respin chain + * walk for this hash chain. + */ + if (lnext == *lh) { + i--; + break; + } + } + l = lnext; + } while (l != *lh); + } +} + +static int +lufs_logscan_addmap(int32_t *addr, struct delta *d) +{ + lb_me_t **lh, *l; + + switch (d->d_typ) { + case DT_COMMIT: + /* + * Handling DT_COMMIT deltas is special. We need to: + * 1. increase the transaction ID + * 2. remove cancelled entries. + */ + lufs_logscan_freecancel(); + curtid++; + break; + case DT_INODE: + /* + * Deltas against parts of on-disk inodes are + * assumed to be timestamps. Ignore those. + */ + if (d->d_nb != sizeof (struct dinode)) + break; + /* FALLTHROUGH */ + case DT_CANCEL: + case DT_ABZERO: + case DT_AB: + case DT_DIR: + case DT_FBI: + /* + * These types of deltas contain and/or modify structural + * information that is needed for booting the system: + * - where to find a file (DT_DIR, DT_FBI) + * - the file itself (DT_INODE) + * - data blocks associated with a file (DT_AB, DT_ABZERO) + * + * Building the hash chains becomes complicated because there + * may exist an older (== previously added) entry that overlaps + * with the one we want to add. + * Four cases must be distinguished: + * 1. The new delta is an exact match for an existing one, + * or is a superset of an existing one, and both + * belong to the same transaction. + * The new delta completely supersedes the old one, so + * remove that and reuse the structure for the new. + * Then add the new delta to the head of the hashchain. + * 2. The new delta is an exact match for an existing one, + * or is a superset of an existing one, but the two + * belong to different transactions (i.e. the old one is + * committed). + * The existing one is marked to be cancelled when the + * next DT_COMMIT record is found, and the hash chain + * walk is continued as there may be more existing entries + * found which overlap the new delta (happens if that is + * a superset of those in the log). + * Once no more overlaps are found, goto 4. + * 3. An existing entry completely covers the new one. + * The new delta is then added directly before this + * existing one. + * 4. No (more) overlaps with existing entries are found. + * Unless this is a DT_CANCEL delta, whose only purpose + * is already handled by marking overlapping entries for + * cancellation, add the new delta at the hash chain head. + * + * This strategy makes sure that the hash chains are properly + * ordered. lufs_merge_deltas() walks the hash chain backward, + * which then ensures that delta merging is done in the same + * order as those deltas occur in the log - remember, the + * log can only be read in one direction. + * + */ + lh = &loghash[LB_HASHFUNC(d->d_mof)]; + l = *lh; + do { + if (l == (lb_me_t *)NULL) + break; + /* + * This covers the first two cases above. + * If this is a perfect match from the same transaction, + * and it isn't already cancelled, we simply replace it + * with its newer incarnation. + * Otherwise, mark it for cancellation. Handling of + * DT_COMMIT is going to remove it, then. + */ + if (WITHIN(l->l_mof, l->l_nb, d->d_mof, d->d_nb)) { + if (!(l->l_flags & LB_ISCANCELLED)) { + if (l->l_tid == curtid && + d->d_typ != DT_CANCEL) { + remlist(lh, l); + l->l_mof = d->d_mof; + l->l_lof = *addr; + l->l_nb = d->d_nb; + l->l_typ = d->d_typ; + l->l_flags = 0; + l->l_tid = curtid; + inslist(lh, l); + return (1); + } else { + /* + * 2nd case - cancel only. + */ + l->l_flags |= LB_ISCANCELLED; + } + } + } else if (WITHIN(d->d_mof, d->d_nb, + l->l_mof, l->l_nb)) { + /* + * This is the third case above. + * With deltas DT_ABZERO/DT_AB and DT_FBI/DT_DIR + * this may happen - an existing previous delta + * is larger than the current one we're planning + * to add - DT_ABZERO deltas are supersets of + * DT_AB deltas, and likewise DT_FBI/DT_DIR. + * In order to do merging correctly, such deltas + * put up a barrier for new ones that overlap, + * and we have to add the new delta immediately + * before (!) the existing one. + */ + lb_me_t *newl; + newl = lufs_alloc_me(); + if (newl == (lb_me_t *)NULL) { + /* + * No memory. Throw away everything + * and try booting without logging + * support. + */ + curtid = 0; + return (0); + } + newl->l_mof = d->d_mof; + newl->l_lof = *addr; /* "payload" address */ + newl->l_nb = d->d_nb; + newl->l_typ = d->d_typ; + newl->l_tid = curtid; + newl->l_prev = l->l_prev; + newl->l_next = l; + l->l_prev->l_next = newl; + l->l_prev = newl; + if (*lh == l) + *lh = newl; + return (1); + } + l = l->l_next; + } while (l != *lh); + + /* + * This is case 4., add a new delta at the head of the chain. + * + * If the new delta is a DT_CANCEL entry, we handled it by + * marking everything it covered for cancellation. We can + * get by without actually adding the delta itself to the + * hash, as it'd need to be removed by the commit code anyway. + */ + if (d->d_typ == DT_CANCEL) + break; + + l = lufs_alloc_me(); + if (l == (lb_me_t *)NULL) { + /* + * No memory. Throw away everything + * and try booting without logging + * support. + */ + curtid = 0; + return (0); + } + l->l_mof = d->d_mof; + l->l_lof = *addr; /* this is the "payload" address */ + l->l_nb = d->d_nb; + l->l_typ = d->d_typ; + l->l_tid = curtid; + inslist(lh, l); + break; + default: + break; + } + return (1); +} + +static int +lufs_logscan_prescan(void) +{ + /* + * Simulate a full log by setting the tail to be one sector + * behind the head. This will make the logscan read all + * of the log until an out-of-sequence sector ident is + * found. + */ + odi.od_tail_lof = dbtob(btodb(odi.od_head_lof)) - DEV_BSIZE; + if (odi.od_tail_lof < odi.od_bol_lof) + odi.od_tail_lof = odi.od_eol_lof - DEV_BSIZE; + if (odi.od_tail_lof >= odi.od_eol_lof) + odi.od_tail_lof = odi.od_bol_lof; + + /* + * While sector trailers maintain TID values, od_head_tid + * is not being updated by the kernel ufs logging support + * at this time. We therefore count transactions ourselves + * starting at zero - as does the kernel ufs logscan code. + */ + curtid = 0; + + if (!lufs_alloc_logbuf()) { + dprintf("Failed to allocate log buffer.\n"); + return (0); + } + + loghash = (lb_me_t **)lufs_alloc_from_logbuf( + LB_HASHSIZE * sizeof (lb_me_t *)); + if (loghash == (lb_me_t **)NULL) { + dprintf("Can't allocate loghash[] array."); + return (0); + } + return (1); +} + +/* + * This function must remove all uncommitted entries (l->l_tid == curtid) + * from the log hash. Doing this, we implicitly delete pending cancellations + * as well. + * It uses the same hash walk algorithm as lufs_logscan_freecancel(). Only + * the check for entries that need to be removed is different. + */ +static void +lufs_logscan_postscan(void) +{ + lb_me_t **lh, *l, *lnext; + int i; + + for (i = 0; i < LB_HASHSIZE; i++) { + lh = &loghash[i]; + l = *lh; + do { + if (l == (lb_me_t *)NULL) + break; + lnext = l->l_next; + if (l->l_tid == curtid) { + remlist(lh, l); + bzero((caddr_t)l, sizeof (lb_me_t)); + l->l_next = lfreelist; + lfreelist = l; + if (*lh == (lb_me_t *)NULL) + break; + /* + * Just removed the hash head. In order not + * to terminate the while loop, respin chain + * walk for this hash chain. + */ + if (lnext == *lh) { + i--; + break; + } + } else { + l->l_flags &= ~(LB_ISCANCELLED); + } + l = lnext; + } while (l != *lh); + } +} + +/* + * This function builds the log hash. It performs the same sequence + * of actions at logscan as the kernel ufs logging support: + * - Prepare the log for scanning by simulating a full log. + * - As long as sectors read from the log have contiguous idents, do: + * read the delta header + * add the delta to the logmap + * skip over the contents to the start of the next delta header + * - After terminating the scan, remove uncommitted entries. + * + * This function cannot fail except if mapping the logbuffer area + * during lufs_logscan_prescan() fails. If there is a structural + * integrity problem and the on-disk log cannot be read, we'll + * treat this as the same situation as an uncommitted transaction + * at the end of the log (or, corner case of that, an empty log + * with no committed transactions in it at all). + * + */ +static int +lufs_logscan(void) +{ + int32_t addr; + struct delta d; + + if (!lufs_logscan_prescan()) + return (LOG_IS_ERRORED); + + addr = odi.od_head_lof; + + /* + * Note that addr == od_tail_lof means a completely filled + * log. This almost never happens, so the common exit path + * from this loop is via one of the 'break's. + */ + while (addr != odi.od_tail_lof) { + if (!lufs_logscan_read(&addr, &d)) + break; + if (!lufs_logscan_addmap(&addr, &d)) + return (LOG_IS_ERRORED); + if (!lufs_logscan_skip(&addr, &d)) + break; + } + + lufs_logscan_postscan(); + /* + * Check whether the log contains data, and if so whether + * it contains committed data. + */ + if (addr == odi.od_head_lof || curtid == 0) { + return (LOG_IS_EMPTY); + } + return (LOG_IS_OK); +} + +/* + * A metadata block was read from disk. Check whether the logmap + * has a delta against this byte range, and if so read it in, since + * the data in the log is more recent than what was read from other + * places on the disk. + */ +void +lufs_merge_deltas(fileid_t *fp) +{ + int nb; + int64_t bof; + lb_me_t **lh, *l; + int32_t skip; + + /* + * No logmap: Empty log. Nothing to do here. + */ + if (!ufs_is_lufs || logbuffer == (caddr_t)NULL) + return; + + bof = ldbtob(fp->fi_blocknum); + nb = fp->fi_count; + + /* + * Search the log hash. + * Merge deltas if an overlap is found. + */ + + lh = &loghash[LB_HASHFUNC(bof)]; + + if (*lh == (lb_me_t *)NULL) + return; + + l = *lh; + + do { + l = l->l_prev; + if (OVERLAP(l->l_mof, l->l_nb, bof, nb)) { + /* + * Found a delta in the log hash which overlaps + * with the current metadata block. Read the + * actual delta payload from the on-disk log + * directly into the file buffer. + */ + if (l->l_typ != DT_ABZERO) { + /* + * We have to actually read this part of the + * log as it could contain a sector trailer, or + * wrap around the end of the log. + * If it did, the second offset generation would + * be incorrect if we'd started at l->l_lof. + */ + if (!(skip = lufs_read_log(l->l_lof, NULL, + MAX(bof - l->l_mof, 0)))) + dprintf("scan/merge error, pre-skip\n"); + if (!(skip = lufs_read_log(skip, + fp->fi_memp + MAX(l->l_mof - bof, 0), + MIN(l->l_mof + l->l_nb, bof + nb) - + MAX(l->l_mof, bof)))) + dprintf("scan/merge error, merge\n"); + } else { + /* + * DT_ABZERO requires no disk access, just + * clear the byte range which overlaps with + * the delta. + */ + bzero(fp->fi_memp + MAX(l->l_mof - bof, 0), + MIN(l->l_mof + l->l_nb, bof + nb) - + MAX(l->l_mof, bof)); + } + } + } while (l->l_prev != (*lh)->l_prev); + + printf("*\b"); +} + +void +lufs_closeall(void) +{ + if (ufs_is_lufs) { + bkmem_free((char *)eb, logfp->fi_devp->un_fs.di_fs.fs_bsize); + bkmem_free((char *)logfp, sizeof (fileid_t)); + eb = (extent_block_t *)NULL; + bzero((caddr_t)&odi, sizeof (ml_odunit_t)); + logfp = (fileid_t *)NULL; + lufs_free_logbuf(); + ufs_is_lufs = 0; + } +} diff --git a/usr/src/stand/lib/fs/ufs/ufsops.c b/usr/src/stand/lib/fs/ufs/ufsops.c new file mode 100644 index 0000000000..a8f322bce0 --- /dev/null +++ b/usr/src/stand/lib/fs/ufs/ufsops.c @@ -0,0 +1,950 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/fs/ufs_fsdir.h> +#include <sys/fs/ufs_fs.h> +#include <sys/fs/ufs_inode.h> +#include <sys/sysmacros.h> +#include <sys/promif.h> + +#include <sys/stat.h> +#include <sys/bootvfs.h> +#include <sys/bootdebug.h> +#include <sys/salib.h> +#include <sys/sacache.h> + + +int print_cache_stats = 0; + +/* + * This fd is used when talking to the device file itself. + */ +static fileid_t *head; +/* + * hooks into ufs logging support + */ +extern void lufs_boot_init(fileid_t *); +extern void lufs_closeall(void); +extern void lufs_merge_deltas(fileid_t *); + +/* Only got one of these...ergo, only 1 fs open at once */ +/* static */ +devid_t *ufs_devp; + +struct dirinfo { + int loc; + fileid_t *fi; +}; + +/* + * Function prototypes + */ +static int boot_ufs_mountroot(char *str); +static int boot_ufs_unmountroot(void); +static int boot_ufs_open(char *filename, int flags); +static int boot_ufs_close(int fd); +static ssize_t boot_ufs_read(int fd, caddr_t buf, size_t size); +static off_t boot_ufs_lseek(int, off_t, int); +static int boot_ufs_fstat(int fd, struct bootstat *stp); +static void boot_ufs_closeall(int flag); +static int boot_ufs_getdents(int fd, struct dirent *dep, unsigned size); + +struct boot_fs_ops boot_ufs_ops = { + "ufs", + boot_ufs_mountroot, + boot_ufs_unmountroot, + boot_ufs_open, + boot_ufs_close, + boot_ufs_read, + boot_ufs_lseek, + boot_ufs_fstat, + boot_ufs_closeall, + boot_ufs_getdents +}; + +static ino_t find(fileid_t *filep, char *path); +static ino_t dlook(fileid_t *filep, char *path); +static daddr32_t sbmap(fileid_t *filep, daddr32_t bn); +static struct direct *readdir(struct dirinfo *dstuff); + +/* These are the pools of buffers, etc. */ +#define NBUFS (NIADDR+1) +/* Compilers like to play with alignment, so force the issue here */ +static union { + char *blk[NBUFS]; + daddr32_t *dummy; +} b; +daddr32_t blknos[NBUFS]; + +/* + * There is only 1 open (mounted) device at any given time. + * So we can keep a single, global devp file descriptor to + * use to index into the di[] array. This is not true for the + * fi[] array. We can have more than one file open at once, + * so there is no global fd for the fi[]. + * The user program must save the fd passed back from open() + * and use it to do subsequent read()'s. + */ + +static int +openi(fileid_t *filep, ino_t inode) +{ + int retval; + struct dinode *dp; + devid_t *devp = filep->fi_devp; + + /* Try the inode cache first */ + if ((filep->fi_inode = get_icache(devp->di_dcookie, inode)) != NULL) + return (0); + /* Nope, not there so lets read it off the disk. */ + filep->fi_offset = 0; + filep->fi_blocknum = fsbtodb(&devp->un_fs.di_fs, + itod(&devp->un_fs.di_fs, inode)); + + /* never more than 1 disk block */ + filep->fi_count = devp->un_fs.di_fs.fs_bsize; + filep->fi_memp = filep->fi_buf; + + /* Maybe the block is in the disk block cache */ + if ((filep->fi_memp = get_bcache(filep)) == NULL) { + /* Not in the block cache so read it from disk */ + if (retval = set_bcache(filep)) + return (retval); + lufs_merge_deltas(filep); + } + + dp = (struct dinode *)filep->fi_memp; + filep->fi_inode = (struct inode *) + bkmem_alloc(sizeof (struct inode)); + bzero((char *)filep->fi_inode, sizeof (struct inode)); + filep->fi_inode->i_ic = + dp[itoo(&devp->un_fs.di_fs, inode)].di_un.di_icom; + filep->fi_inode->i_number = inode; + if (set_ricache(devp->di_dcookie, inode, (void *)filep->fi_inode, + sizeof (struct inode))) + filep->fi_inode->i_flag = FI_NOCACHE; + return (0); +} + +static fileid_t * +find_fp(int fd) +{ + fileid_t *filep = head; + + if (fd >= 0) { + while ((filep = filep->fi_forw) != head) + if (fd == filep->fi_filedes) + return (filep->fi_taken ? filep : 0); + } + + return (0); +} + +static ino_t +find(fileid_t *filep, char *path) +{ + char *q; + char c; + ino_t inode; + char lpath[MAXPATHLEN]; + char *lpathp = lpath; + int len, r; + devid_t *devp; + + if (path == NULL || *path == '\0') { + printf("null path\n"); + return ((ino_t)0); + } + + bzero(lpath, sizeof (lpath)); + bcopy(path, lpath, strlen(path)); + devp = filep->fi_devp; + while (*lpathp) { + /* if at the beginning of pathname get root inode */ + r = (lpathp == lpath); + if (r && openi(filep, (ino_t)UFSROOTINO)) + return ((ino_t)0); + while (*lpathp == '/') + lpathp++; /* skip leading slashes */ + q = lpathp; + while (*q != '/' && *q != '\0') + q++; /* find end of component */ + c = *q; + *q = '\0'; /* terminate component */ + + /* Bail out early if opening root */ + if (r && (*lpathp == '\0')) + return ((ino_t)UFSROOTINO); + if ((inode = dlook(filep, lpathp)) != 0) { + if (openi(filep, inode)) + return ((ino_t)0); + if ((filep->fi_inode->i_smode & IFMT) == IFLNK) { + filep->fi_blocknum = + fsbtodb(&devp->un_fs.di_fs, + filep->fi_inode->i_db[0]); + filep->fi_count = DEV_BSIZE; + /* check the block cache */ + if ((filep->fi_memp = get_bcache(filep)) + == NULL) { + if (set_bcache(filep)) + return ((ino_t)0); + lufs_merge_deltas(filep); + } + len = strlen(filep->fi_memp); + if (filep->fi_memp[0] == '/') + /* absolute link */ + lpathp = lpath; + /* copy rest of unprocessed path up */ + bcopy(q, lpathp + len, strlen(q + 1) + 2); + /* point to unprocessed path */ + *(lpathp + len) = c; + /* prepend link in before unprocessed path */ + bcopy(filep->fi_memp, lpathp, len); + lpathp = lpath; + continue; + } else + *q = c; + if (c == '\0') + break; + lpathp = q; + continue; + } else { + return ((ino_t)0); + } + } + return (inode); +} + +static daddr32_t +sbmap(fileid_t *filep, daddr32_t bn) +{ + struct inode *inodep; + int i, j, sh; + daddr32_t nb, *bap; + daddr32_t *db; + devid_t *devp; + + devp = filep->fi_devp; + inodep = filep->fi_inode; + db = inodep->i_db; + + /* + * blocks 0..NDADDR are direct blocks + */ + if (bn < NDADDR) { + nb = db[bn]; + return (nb); + } + + /* + * addresses NIADDR have single and double indirect blocks. + * the first step is to determine how many levels of indirection. + */ + sh = 1; + bn -= NDADDR; + for (j = NIADDR; j > 0; j--) { + sh *= NINDIR(&devp->un_fs.di_fs); + if (bn < sh) + break; + bn -= sh; + } + if (j == 0) { + return ((daddr32_t)0); + } + + /* + * fetch the first indirect block address from the inode + */ + nb = inodep->i_ib[NIADDR - j]; + if (nb == 0) { + return ((daddr32_t)0); + } + + /* + * fetch through the indirect blocks + */ + for (; j <= NIADDR; j++) { + if (blknos[j] != nb) { + filep->fi_blocknum = fsbtodb(&devp->un_fs.di_fs, nb); + filep->fi_count = devp->un_fs.di_fs.fs_bsize; + /* First look through the disk block cache */ + if ((filep->fi_memp = get_bcache(filep)) == NULL) { + if (set_bcache(filep)) /* Gotta do I/O */ + return (0); + lufs_merge_deltas(filep); + } + b.blk[j] = filep->fi_memp; + blknos[j] = nb; + } + bap = (daddr32_t *)b.blk[j]; + sh /= NINDIR(&devp->un_fs.di_fs); + i = (bn / sh) % NINDIR(&devp->un_fs.di_fs); + nb = bap[i]; + if (nb == 0) { + return ((daddr32_t)0); + } + } + return (nb); +} + +static ino_t +dlook(fileid_t *filep, char *path) +{ + devid_t *devp = filep->fi_devp; + struct direct *dp; + struct inode *ip; + struct dirinfo dirp; + int len; + ino_t in; +#ifdef DEBUG + static int warned = 0; +#endif + + ip = filep->fi_inode; + if (path == NULL || *path == '\0') + return (0); + if ((ip->i_smode & IFMT) != IFDIR) + return (0); + if (ip->i_size == 0) + return (0); + len = strlen(path); + + /* + * First look through the directory entry cache + */ + if ((in = get_dcache(devp->di_dcookie, path, ip->i_number)) != 0) + return (in); + + /* + * If the entire directory is cached, return failure + */ + if (ip->i_flag & FI_CACHED) + return (0); + + /* + * Otherwise, read the entire directory into the cache + */ + in = 0; + dirp.loc = 0; + dirp.fi = filep; + if (!(ip->i_flag & FI_NOCACHE)) + ip->i_flag |= FI_CACHED; + for (dp = readdir(&dirp); dp != NULL; dp = readdir(&dirp)) { + if (dp->d_ino == 0) + continue; + if (dp->d_namlen == len && strcmp(path, dp->d_name) == 0) + in = dp->d_ino; + + /* + * Allow "*" to print all names at that level, w/out match + */ + if (strcmp(path, "*") == 0) + printf("%s\n", dp->d_name); + + if (ip->i_flag & FI_NOCACHE) + continue; + + /* + * Put this entry into the cache. If the entry has been + * partially cached, check before inserting. This should be + * rare if sized correctly + */ + if ((ip->i_flag & FI_PARTIAL_CACHE) && + (get_dcache(devp->di_dcookie, dp->d_name, dp->d_ino) != 0)) + continue; + + if (set_rdcache(devp->di_dcookie, dp->d_name, ip->i_number, + dp->d_ino)) { + ip->i_flag &= ~FI_CACHED; + ip->i_flag |= FI_PARTIAL_CACHE; +#ifdef DEBUG + if (!warned) { + printf("ufsboot: directory cache too small\n"); + warned++; + } +#endif + } + } + return (in); +} + +/* + * get next entry in a directory. + */ +struct direct * +readdir(struct dirinfo *dstuff) +{ + struct direct *dp; + fileid_t *filep; + daddr32_t lbn, d; + int off; + devid_t *devp; + + filep = dstuff->fi; + devp = filep->fi_devp; + for (;;) { + if (dstuff->loc >= filep->fi_inode->i_size) { + return (NULL); + } + off = blkoff(&devp->un_fs.di_fs, dstuff->loc); + if (off == 0) { + lbn = lblkno(&devp->un_fs.di_fs, dstuff->loc); + d = sbmap(filep, lbn); + + if (d == 0) + return (NULL); + + filep->fi_blocknum = fsbtodb(&devp->un_fs.di_fs, d); + filep->fi_count = + blksize(&devp->un_fs.di_fs, filep->fi_inode, lbn); + /* check the block cache */ + if ((filep->fi_memp = get_bcache(filep)) == NULL) { + if (set_bcache(filep)) + return (NULL); + lufs_merge_deltas(filep); + } + } + dp = (struct direct *)(filep->fi_memp + off); + dstuff->loc += dp->d_reclen; + if (dp->d_ino == 0) + continue; + return (dp); + } +} + +/* + * Get the next block of data from the file. If possible, dma right into + * user's buffer + */ +static int +getblock(fileid_t *filep, caddr_t buf, int count, int *rcount) +{ + struct fs *fs; + caddr_t p; + int off, size, diff; + daddr32_t lbn; + devid_t *devp; +#ifndef i386 + static int pos; + static char ind[] = "|/-\\"; /* that's entertainment? */ + static int blks_read; +#endif + devp = filep->fi_devp; + p = filep->fi_memp; + if ((signed)filep->fi_count <= 0) { + + /* find the amt left to be read in the file */ + diff = filep->fi_inode->i_size - filep->fi_offset; + if (diff <= 0) { + printf("Short read\n"); + return (-1); + } + + fs = &devp->un_fs.di_fs; + /* which block (or frag) in the file do we read? */ + lbn = lblkno(fs, filep->fi_offset); + + /* which physical block on the device do we read? */ + filep->fi_blocknum = fsbtodb(fs, sbmap(filep, lbn)); + + off = blkoff(fs, filep->fi_offset); + + /* either blksize or fragsize */ + size = blksize(fs, filep->fi_inode, lbn); + filep->fi_count = size; + filep->fi_memp = filep->fi_buf; + + /* + * optimization if we are reading large blocks of data then + * we can go directly to user's buffer + */ + *rcount = 0; + if (off == 0 && count >= size) { + filep->fi_memp = buf; + if (diskread(filep)) { + return (-1); + } + *rcount = size; + filep->fi_count = 0; + read_opt++; +#ifndef i386 + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); +#endif + return (0); + } else + if (diskread(filep)) + return (-1); + + /* + * round and round she goes (though not on every block.. + * - OBP's take a fair bit of time to actually print stuff) + * On x86, the screen oriented bootconf program doesn't + * want this noise... + */ +#ifndef i386 + if ((blks_read++ & 0x3) == 0) + printf("%c\b", ind[pos++ & 3]); +#endif + + if (filep->fi_offset - off + size >= filep->fi_inode->i_size) + filep->fi_count = diff + off; + filep->fi_count -= off; + p = &filep->fi_memp[off]; + } + filep->fi_memp = p; + return (0); +} + + +/* + * This is the high-level read function. It works like this. + * We assume that our IO device buffers up some amount of + * data and that we can get a ptr to it. Thus we need + * to actually call the device func about filesize/blocksize times + * and this greatly increases our IO speed. When we already + * have data in the buffer, we just return that data (with bcopy() ). + */ + +static ssize_t +boot_ufs_read(int fd, caddr_t buf, size_t count) +{ + size_t i, j; + caddr_t n; + int rcount; + fileid_t *filep; + + if (!(filep = find_fp(fd))) { + return (-1); + } + + if (filep->fi_offset + count > filep->fi_inode->i_size) + count = filep->fi_inode->i_size - filep->fi_offset; + + /* that was easy */ + if ((i = count) == 0) + return (0); + + n = buf; + while (i > 0) { + /* If we need to reload the buffer, do so */ + if ((j = filep->fi_count) == 0) { + getblock(filep, buf, i, &rcount); + i -= rcount; + buf += rcount; + filep->fi_offset += rcount; + } else { + /* else just bcopy from our buffer */ + j = MIN(i, j); + bcopy(filep->fi_memp, buf, (unsigned)j); + buf += j; + filep->fi_memp += j; + filep->fi_offset += j; + filep->fi_count -= j; + i -= j; + } + } + return (buf - n); +} + +/* + * This routine will open a device as it is known by the V2 OBP. + * Interface Defn: + * err = boot_ufs_mountroot(string); + * err = 0 on success + * err = -1 on failure + * string: char string describing the properties of the device. + * We must not dork with any fi[]'s here. Save that for later. + */ + +static int +boot_ufs_mountroot(char *str) +{ + int h; + + /* + * Open the device and setup the read of the ufs superblock + * only the first time mountroot is called. Subsequent calls + * to mountroot succeed immediatly + */ + if (ufs_devp == NULL) { + + /* + * Encode the knowledge that we normally boot from the 'a' + * slice of the leaf device on the OBP path; we also permit + * a 'nolabel' device, i.e. the entire device. Since v2path + * points to 'str' as well, changing str should have the + * desired result. + */ + if (strchr(str, ':') == NULL) { + (void) strcat(str, ":a"); + } + h = prom_open(str); + if (h == 0) { + printf("Cannot open %s\n", str); + return (-1); + } + + ufs_devp = (devid_t *)bkmem_alloc(sizeof (devid_t)); + ufs_devp->di_taken = 1; + ufs_devp->di_dcookie = h; + ufs_devp->di_desc = (char *)bkmem_alloc(strlen(str) + 1); + (void) strcpy(ufs_devp->di_desc, str); + bzero(ufs_devp->un_fs.dummy, SBSIZE); + head = (fileid_t *)bkmem_alloc(sizeof (fileid_t)); + head->fi_back = head->fi_forw = head; + head->fi_filedes = 0; + head->fi_taken = 0; + + /* Setup read of the superblock */ + head->fi_devp = ufs_devp; + head->fi_blocknum = SBLOCK; + head->fi_count = (uint_t)SBSIZE; + head->fi_memp = (caddr_t)&(ufs_devp->un_fs.di_fs); + head->fi_offset = 0; + + if (diskread(head) || + ufs_devp->un_fs.di_fs.fs_magic != FS_MAGIC) { + boot_ufs_closeall(1); + return (-1); + } + lufs_boot_init(head); + } + return (0); +} + +/* + * Unmount the currently mounted root fs. In practice, this means + * closing all open files and releasing resources. All of this + * is done by boot_ufs_closeall(). + */ + +int +boot_ufs_unmountroot(void) +{ + if (ufs_devp == NULL) + return (-1); + + boot_ufs_closeall(1); + + return (0); +} + +/* + * We allocate an fd here for use when talking + * to the file itself. + */ + +/*ARGSUSED*/ +static int +boot_ufs_open(char *filename, int flags) +{ + fileid_t *filep; + ino_t inode; + static int filedes = 1; + + /* build and link a new file descriptor */ + filep = (fileid_t *)bkmem_alloc(sizeof (fileid_t)); + filep->fi_back = head->fi_back; + filep->fi_forw = head; + head->fi_back->fi_forw = filep; + head->fi_back = filep; + filep->fi_filedes = filedes++; + filep->fi_taken = 1; + filep->fi_path = (char *)bkmem_alloc(strlen(filename) + 1); + (void) strcpy(filep->fi_path, filename); + filep->fi_devp = ufs_devp; /* dev is already "mounted" */ + filep->fi_inode = NULL; + bzero(filep->fi_buf, MAXBSIZE); + + inode = find(filep, filename); + if (inode == (ino_t)0) { + boot_ufs_close(filep->fi_filedes); + return (-1); + } + if (openi(filep, inode)) { + boot_ufs_close(filep->fi_filedes); + return (-1); + } + + filep->fi_offset = filep->fi_count = 0; + + return (filep->fi_filedes); +} + +/* + * We don't do any IO here. + * We just play games with the device pointers. + */ + +static off_t +boot_ufs_lseek(int fd, off_t addr, int whence) +{ + fileid_t *filep; + + /* Make sure user knows what file he is talking to */ + if (!(filep = find_fp(fd))) + return (-1); + + switch (whence) { + case SEEK_CUR: + filep->fi_offset += addr; + break; + case SEEK_SET: + filep->fi_offset = addr; + break; + default: + case SEEK_END: + printf("ufs_lseek(): invalid whence value %d\n", whence); + break; + } + + filep->fi_blocknum = addr / DEV_BSIZE; + filep->fi_count = 0; + + return (0); +} + +/* + * ufs_fstat() only supports size, mode, and times at present time. + */ + +static int +boot_ufs_fstat(int fd, struct bootstat *stp) +{ + fileid_t *filep; + struct inode *ip; + + if (!(filep = find_fp(fd))) + return (-1); + + ip = filep->fi_inode; + + stp->st_mode = 0; + stp->st_size = 0; + + if (ip == NULL) + return (0); + + switch (ip->i_smode & IFMT) { + case IFDIR: + stp->st_mode = S_IFDIR; + break; + case IFLNK: + stp->st_mode = S_IFLNK; + break; + case IFREG: + stp->st_mode = S_IFREG; + break; + default: + break; + } + stp->st_size = ip->i_size; + stp->st_atim.tv_sec = ip->i_atime.tv_sec; + stp->st_atim.tv_nsec = ip->i_atime.tv_usec * 1000; + stp->st_mtim.tv_sec = ip->i_mtime.tv_sec; + stp->st_mtim.tv_nsec = ip->i_mtime.tv_usec * 1000; + stp->st_ctim.tv_sec = ip->i_ctime.tv_sec; + stp->st_ctim.tv_nsec = ip->i_ctime.tv_usec * 1000; + + return (0); +} + +static int +boot_ufs_close(int fd) +{ + fileid_t *filep; + + /* Make sure user knows what file he is talking to */ + if (!(filep = find_fp(fd))) + return (-1); + + if (filep->fi_taken && (filep != head)) { + /* Clear the ranks */ + bkmem_free(filep->fi_path, strlen(filep->fi_path)+1); + filep->fi_blocknum = filep->fi_count = filep->fi_offset = 0; + filep->fi_memp = (caddr_t)0; + filep->fi_devp = 0; + filep->fi_taken = 0; + + /* unlink and deallocate node */ + filep->fi_forw->fi_back = filep->fi_back; + filep->fi_back->fi_forw = filep->fi_forw; + bkmem_free((char *)filep, sizeof (fileid_t)); + + return (0); + } else { + /* Big problem */ + printf("\nFile descrip %d not allocated!", fd); + return (-1); + } +} + +/*ARGSUSED*/ +static void +boot_ufs_closeall(int flag) +{ + fileid_t *filep = head; + + while ((filep = filep->fi_forw) != head) + if (filep->fi_taken) + if (boot_ufs_close(filep->fi_filedes)) + prom_panic("Filesystem may be inconsistent.\n"); + + release_cache(ufs_devp->di_dcookie); + (void) prom_close(ufs_devp->di_dcookie); + ufs_devp->di_taken = 0; + if (verbosemode & print_cache_stats) + print_cache_data(); + lufs_closeall(); + bkmem_free((char *)ufs_devp, sizeof (devid_t)); + bkmem_free((char *)head, sizeof (fileid_t)); + ufs_devp = (devid_t *)NULL; + head = (fileid_t *)NULL; +} + +static int +boot_ufs_getdents(int fd, struct dirent *dep, unsigned size) +{ + /* + * Read directory entries from the file open on "fd" into the + * "size"-byte buffer at "dep" until the buffer is exhausted + * or we reach EOF on the directory. Returns the number of + * entries read. + */ + int n; + fileid_t *fp; + unsigned long oldoff, oldblok; + +#define SLOP (sizeof (struct dirent) - (int)&((struct dirent *)0)->d_name[1]) + + if (fp = find_fp(fd)) { + /* + * File is open, check type to make sure it's a directory. + */ + + while ((fp->fi_inode->i_smode & IFMT) == IFLNK) { + /* + * If file is a symbolic link, we'll follow + * it JIC it points to a directory! + */ + fileid_t fx; + char pn[MAXPATHLEN]; + fp->fi_count = DEV_BSIZE; + fp->fi_blocknum = fsbtodb(&fp->fi_devp->un_fs.di_fs, + fp->fi_inode->i_db[0]); + + /* + * Return failure if: + * (a) we get an I/O error reading the path name. + * (b) the path name points to a non-existant file, + * (c) we get an I/O error reading the target inode. + */ + if ((fp->fi_memp = get_bcache(fp)) == NULL) { + if (set_bcache(fp)) + return (-1); + lufs_merge_deltas(fp); + } + if (!(n = find(&fx, strcpy(pn, fp->fi_memp))) || + openi(fp = &fx, n)) { + return (-1); + } + } + + if ((fp->fi_inode->i_smode & IFMT) == IFDIR) { + /* + * If target file is a directory, go ahead + * and read it. This consists of making + * repeated calls to readdir() until we reach + * end-of-file or run out of buffer space. + */ + int cnt = 0; + struct direct *dp; + struct dirinfo dir; + + dir.fi = fp; + oldblok = fp->fi_blocknum; + dir.loc = oldoff = fp->fi_offset; + + for (dp = readdir(&dir); dp; dp = readdir(&dir)) { + /* + * Read all directory entries in the file ... + */ + + if (dp->d_ino) { + /* + * Next entry is valid. + * Compute name length and + * break loop if there's not + * enough space in the output + * buffer for the next entry. + * + * NOTE: "SLOP" is the number + * of bytes inserted into the + * dirent struct's "d_name" + * field by the compiler to + * preserve alignment. + */ + dep->d_ino = dp->d_ino; + n = strlen(dp->d_name); + n = roundup((sizeof (struct dirent) + + ((n > SLOP) ? n : 0)), + sizeof (off_t)); + + if (n > size) + break; /* user buffer is full */ + + oldblok = fp->fi_blocknum; + oldoff = dir.loc; + size -= n; + cnt += 1; + + (void) strcpy(dep->d_name, dp->d_name); + dep->d_off = dir.loc; + dep->d_reclen = (ushort_t)n; + + dep = (struct dirent *) + ((char *)dep + n); + } + } + /* + * Remember where we left off for next time + */ + fp->fi_blocknum = oldblok; + fp->fi_offset = oldoff; + + return (cnt); + } + } + +#undef SLOP + + return (-1); +} |