summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/devfs/devfs_vfsops.c
blob: 2d834eb693032e5855352d0d93ebafed1fe42dd5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * This is the device filesystem.
 *
 * It is a combination of a namer to drive autoconfiguration,
 * plus the access methods for the device drivers of the system.
 *
 * The prototype is fairly dependent on specfs for the latter part
 * of its implementation, though a final version would integrate the two.
 */
#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/time.h>
#include <sys/pathname.h>
#include <sys/vfs.h>
#include <sys/vfs_opreg.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <sys/stat.h>
#include <sys/errno.h>
#include <sys/cmn_err.h>
#include <sys/cred.h>
#include <sys/statvfs.h>
#include <sys/mount.h>
#include <sys/debug.h>
#include <sys/modctl.h>
#include <fs/fs_subr.h>
#include <sys/fs/dv_node.h>
#include <sys/fs/snode.h>
#include <sys/sunndi.h>
#include <sys/policy.h>
#include <sys/sunmdi.h>

/*
 * devfs vfs operations.
 */
static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
    struct cred *);
static int devfs_unmount(struct vfs *, int, struct cred *);
static int devfs_root(struct vfs *, struct vnode **);
static int devfs_statvfs(struct vfs *, struct statvfs64 *);
static int devfs_mountroot(struct vfs *, enum whymountroot);

static int devfsinit(int, char *);

static vfsdef_t devfs_vfssw = {
	VFSDEF_VERSION,
	"devfs",	/* type name string */
	devfsinit,	/* init routine */
	0,		/* flags */
	NULL		/* mount options table prototype */
};

static kmutex_t devfs_lock;	/* protects global data */
static int devfstype;		/* fstype */
static dev_t devfsdev;		/* the fictious 'device' we live on */
static struct devfs_data *devfs_mntinfo;	/* linked list of instances */

/*
 * Module linkage information
 */
static struct modlfs modlfs = {
	&mod_fsops, "devices filesystem", &devfs_vfssw
};

static struct modlinkage modlinkage = {
	MODREV_1, (void *)&modlfs, NULL
};

int
_init(void)
{
	int e;

	mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
	dv_node_cache_init();
	if ((e = mod_install(&modlinkage)) != 0) {
		dv_node_cache_fini();
		mutex_destroy(&devfs_lock);
		return (e);
	}
	dcmn_err(("devfs loaded\n"));
	return (0);
}

int
_fini(void)
{
	return (EBUSY);
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}

/*ARGSUSED1*/
static int
devfsinit(int fstype, char *name)
{
	static const fs_operation_def_t devfs_vfsops_template[] = {
		VFSNAME_MOUNT,		{ .vfs_mount = devfs_mount },
		VFSNAME_UNMOUNT,	{ .vfs_unmount = devfs_unmount },
		VFSNAME_ROOT,		{ .vfs_root = devfs_root },
		VFSNAME_STATVFS,	{ .vfs_statvfs = devfs_statvfs },
		VFSNAME_SYNC,		{ .vfs_sync = fs_sync },
		VFSNAME_MOUNTROOT,	{ .vfs_mountroot = devfs_mountroot },
		NULL,			NULL
	};
	int error;
	int dev;
	extern major_t getudev(void);	/* gack - what a function */

	devfstype = fstype;
	/*
	 * Associate VFS ops vector with this fstype
	 */
	error = vfs_setfsops(fstype, devfs_vfsops_template, NULL);
	if (error != 0) {
		cmn_err(CE_WARN, "devfsinit: bad vfs ops template");
		return (error);
	}

	error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops);
	if (error != 0) {
		(void) vfs_freevfsops_by_type(fstype);
		cmn_err(CE_WARN, "devfsinit: bad vnode ops template");
		return (error);
	}

	/*
	 * Invent a dev_t (sigh).
	 */
	if ((dev = getudev()) == DDI_MAJOR_T_NONE) {
		cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
		dev = 0;
	}
	devfsdev = makedevice(dev, 0);

	return (0);
}

/*
 * The name of the mount point and the name of the attribute
 * filesystem are passed down from userland for now.
 */
static int
devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
    struct cred *cr)
{
	struct devfs_data *devfs_data;
	struct vnode *avp;
	struct dv_node *dv;
	struct vattr va;

	dcmn_err(("devfs_mount\n"));

	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
		return (EPERM);

	/*
	 * check that the mount point is sane
	 */
	if (mvp->v_type != VDIR)
		return (ENOTDIR);

	ASSERT(uap->flags & MS_SYSSPACE);
	/*
	 * Devfs can only be mounted from kernel during boot.
	 * avp is the existing /devices, the same as the mount point.
	 */
	avp = mvp;

	/*
	 * Create and initialize the vfs-private data.
	 * This includes a hand-crafted root vnode (we build
	 * this here mostly so that traverse() doesn't sleep
	 * in VFS_ROOT()).
	 */
	mutex_enter(&devfs_lock);
	ASSERT(devfs_mntinfo == NULL);
	dv = dv_mkroot(vfsp, devfsdev);
	dv->dv_attrvp = avp;		/* attribute root vp */

	ASSERT(dv == dv->dv_dotdot);

	devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
	devfs_data->devfs_vfsp = vfsp;
	devfs_data->devfs_root = dv;

	vfsp->vfs_data = (caddr_t)devfs_data;
	vfsp->vfs_fstype = devfstype;
	vfsp->vfs_dev = devfsdev;
	vfsp->vfs_bsize = DEV_BSIZE;
	vfsp->vfs_mtime = ddi_get_time();
	vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);

	/* We're there. */
	devfs_mntinfo = devfs_data;
	mutex_exit(&devfs_lock);

	va.va_mask = AT_ATIME|AT_MTIME;
	gethrestime(&va.va_atime);
	gethrestime(&va.va_mtime);
	(void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL);
	return (0);
}


/*
 * We never unmount devfs in a real production system.
 */
/*ARGSUSED*/
static int
devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
{
	return (EBUSY);
}

/*
 * return root vnode for given vfs
 */
static int
devfs_root(struct vfs *vfsp, struct vnode **vpp)
{
	dcmn_err(("devfs_root\n"));
	*vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
	VN_HOLD(*vpp);
	return (0);
}

/*
 * return 'generic superblock' information to userland.
 *
 * not much that we can usefully admit to here
 */
static int
devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
{
	extern kmem_cache_t *dv_node_cache;

	dev32_t d32;

	dcmn_err(("devfs_statvfs\n"));
	bzero(sbp, sizeof (*sbp));
	sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
	/*
	 * We could compute the number of devfsnodes here .. but since
	 * it's dynamic anyway, it's not clear how useful this is.
	 */
	sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");

	/* no illusions that free/avail files is relevant to devfs */
	sbp->f_ffree = 0;
	sbp->f_favail = 0;

	/* no illusions that blocks are relevant to devfs */
	sbp->f_bfree = 0;
	sbp->f_bavail = 0;
	sbp->f_blocks = 0;

	(void) cmpldev(&d32, vfsp->vfs_dev);
	sbp->f_fsid = d32;
	(void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
	sbp->f_namemax = MAXNAMELEN - 1;
	(void) strcpy(sbp->f_fstr, "devices");

	return (0);
}

/*
 * devfs always mount after root is mounted, so this should never
 * be invoked.
 */
/*ARGSUSED*/
static int
devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
{
	dcmn_err(("devfs_mountroot\n"));

	return (EINVAL);
}

struct dv_node *
devfs_dip_to_dvnode(dev_info_t *dip)
{
	char *dirpath;
	struct vnode *dirvp;

	ASSERT(dip != NULL);

	/* no-op if devfs not mounted yet */
	if (devfs_mntinfo == NULL)
		return (NULL);

	/*
	 * The lookupname below only looks up cached dv_nodes
	 * because devfs_clean_key is set in thread specific data.
	 */
	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
	(void) ddi_pathname(dip, dirpath);
	if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
		dcmn_err(("directory %s not found\n", dirpath));
		kmem_free(dirpath, MAXPATHLEN);
		return (NULL);
	}

	kmem_free(dirpath, MAXPATHLEN);
	return (VTODV(dirvp));
}

/*
 * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
 * and not a vHCI we also need to clean any vHCI branches because they
 * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
 * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
 * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
 * the vHCI is successful - which requires a clean vHCI branch to removed
 * the devi_refs associated with devfs vnodes.
 */
static int
devfs_clean_vhci(dev_info_t *dip, void *args)
{
	struct dv_node	*dvp;
	uint_t		flags = (uint_t)(uintptr_t)args;

	(void) tsd_set(devfs_clean_key, (void *)1);
	dvp = devfs_dip_to_dvnode(dip);
	if (dvp) {
		(void) dv_cleandir(dvp, NULL, flags);
		VN_RELE(DVTOV(dvp));
	}
	(void) tsd_set(devfs_clean_key, NULL);
	return (DDI_WALK_CONTINUE);
}

/*
 * devfs_clean()
 *
 * Destroy unreferenced dv_node's and detach devices.
 *
 * devfs_clean will try its best to clean up unused nodes. It is
 * no longer valid to assume that just because devfs_clean fails,
 * the device is not removable. This is because device contracts
 * can result in userland processes releasing a device during the
 * device offline process in the kernel. Thus it is no longer
 * correct to fail an offline just because devfs_clean finds
 * referenced dv_nodes. To enforce this, devfs_clean() always
 * returns success i.e. 0.
 *
 * devfs_clean() may return before removing all possible nodes if
 * we cannot acquire locks in areas of the code where potential for
 * deadlock exists (see comments in dv_find() and dv_cleandir() for
 * examples of this).
 *
 * devfs caches unreferenced dv_node to speed by the performance
 * of ls, find, etc. devfs_clean() is invoked to cleanup cached
 * dv_nodes to reclaim memory as well as to facilitate device
 * removal (dv_node reference devinfo nodes, which prevents driver
 * detach).
 *
 * If a shell parks in a /devices directory, the dv_node will be
 * held, preventing the corresponding device to be detached.
 * This would be a denial of service against DR. To prevent this,
 * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
 * The dv_cleandir() implementation does the right thing to ensure
 * successful DR.
 */
int
devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
{
	struct dv_node		*dvp;

	dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
	    (void *)dip, flags));

	/* avoid recursion back into the device tree */
	(void) tsd_set(devfs_clean_key, (void *)1);
	dvp = devfs_dip_to_dvnode(dip);
	if (dvp == NULL) {
		(void) tsd_set(devfs_clean_key, NULL);
		return (0);
	}

	(void) dv_cleandir(dvp, devnm, flags);
	(void) tsd_set(devfs_clean_key, NULL);
	VN_RELE(DVTOV(dvp));

	/*
	 * If we are doing a DV_CLEAN_FORCE, and we did not start at the
	 * root, and we did not start at a vHCI node then clean vHCI
	 * branches too.  Failure to clean vHCI branch does not cause EBUSY.
	 *
	 * Also, to accommodate nexus callers that clean 'self' to DR 'child'
	 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
	 * above fails - this prevents a busy DR 'child' sibling from causing
	 * the DR of 'child' to fail because a vHCI branch was not cleaned.
	 */
	if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
	    (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
		/*
		 * NOTE: for backport the following is recommended
		 * 	(void) devfs_clean_vhci(scsi_vhci_dip,
		 *	    (void *)(uintptr_t)flags);
		 */
		mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
	}

	return (0);
}

/*
 * lookup a devfs relative pathname, returning held vnodes for the final
 * component and the containing directory (if requested).
 *
 * NOTE: We can't use lookupname because this would use the current
 *	processes credentials (CRED) in the call lookuppnvp instead
 *	of kcred.  It also does not give you the flexibility so
 * 	specify the directory to start the resolution in (devicesdir).
 */
int
devfs_lookupname(
	char	*pathname,		/* user pathname */
	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
	vnode_t **compvpp)		/* ret for ptr to component vnode */
{
	struct pathname	pn;
	int		error;

	ASSERT(devicesdir);		/* devfs must be initialized */
	ASSERT(pathname);		/* must have some path */

	if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
		return (error);

	/* make the path relative to /devices. */
	pn_skipslash(&pn);
	if (pn_pathleft(&pn) == 0) {
		/* all we had was "\0" or "/" (which skipslash skiped) */
		if (dirvpp)
			*dirvpp = NULL;
		if (compvpp) {
			VN_HOLD(devicesdir);
			*compvpp = devicesdir;
		}
	} else {
		/*
		 * Use devfs lookup to resolve pathname to the vnode for
		 * the device via relative lookup in devfs. Extra holds for
		 * using devicesdir as directory we are searching and for
		 * being our root without being == rootdir.
		 */
		VN_HOLD(devicesdir);
		VN_HOLD(devicesdir);
		error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
		    devicesdir, devicesdir, kcred);
	}
	pn_free(&pn);

	return (error);
}

/*
 * Given a devfs path (without the /devices prefix), walk
 * the dv_node sub-tree rooted at the path.
 */
int
devfs_walk(
	char		*path,
	void		(*callback)(struct dv_node *, void *),
	void		*arg)
{
	char *dirpath, *devnm;
	struct vnode	*dirvp;

	ASSERT(path && callback);

	if (*path != '/' || devfs_mntinfo == NULL)
		return (ENXIO);

	dcmn_err(("devfs_walk: path = %s", path));

	dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);

	(void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);

	devnm = strrchr(dirpath, '/');

	ASSERT(devnm);

	*devnm++ = '\0';

	if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
		dcmn_err(("directory %s not found\n", dirpath));
		kmem_free(dirpath, MAXPATHLEN);
		return (ENXIO);
	}

	/*
	 * if path == "/", visit the root dv_node
	 */
	if (*devnm == '\0') {
		callback(VTODV(dirvp), arg);
		devnm = NULL;
	}

	dv_walk(VTODV(dirvp), devnm, callback, arg);

	VN_RELE(dirvp);

	kmem_free(dirpath, MAXPATHLEN);

	return (0);
}

int
devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
{
	struct vnode *rvp;
	struct dv_node *dvp;
	int rval = -1;

	/* fail if devfs not mounted yet */
	if (devfs_mntinfo == NULL)
		return (rval);

	if (VOP_REALVP(vp, &rvp, NULL) == 0 && vn_matchops(rvp, dv_vnodeops)) {
		dvp = VTODV(rvp);
		rw_enter(&dvp->dv_contents, RW_READER);
		if (dvp->dv_priv) {
			dphold(dvp->dv_priv);
			*dpp = dvp->dv_priv;
			rval = 0;
		}
		rw_exit(&dvp->dv_contents);
	}
	return (rval);
}