summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/share.c
blob: 3acc10bf7db3dbca3819ce184cfe6524cfeb1777 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */

#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/share.h>
#include <sys/cmn_err.h>
#include <sys/kmem.h>
#include <sys/debug.h>
#include <sys/t_lock.h>
#include <sys/errno.h>
#include <sys/nbmlock.h>

int share_debug = 0;

#ifdef DEBUG
static void print_shares(struct vnode *);
static void print_share(struct shrlock *);
#endif

static int isreadonly(struct vnode *);
static void do_cleanshares(struct vnode *, pid_t, int32_t);


/*
 * Add the share reservation shr to vp.
 */
int
add_share(struct vnode *vp, struct shrlock *shr)
{
	struct shrlocklist *shrl;

	/*
	 * An access of zero is not legal, however some older clients
	 * generate it anyways.  Allow the request only if it is
	 * coming from a remote system.  Be generous in what you
	 * accept and strict in what you send.
	 */
	if ((shr->s_access == 0) && (GETSYSID(shr->s_sysid) == 0)) {
		return (EINVAL);
	}

	/*
	 * Sanity check to make sure we have valid options.
	 * There is known overlap but it doesn't hurt to be careful.
	 */
	if (shr->s_access & ~(F_RDACC|F_WRACC|F_RWACC|F_RMACC|F_MDACC)) {
		return (EINVAL);
	}
	if (shr->s_deny & ~(F_NODNY|F_RDDNY|F_WRDNY|F_RWDNY|F_COMPAT|
	    F_MANDDNY|F_RMDNY)) {
		return (EINVAL);
	}

	mutex_enter(&vp->v_lock);
	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
		/*
		 * If the share owner matches previous request
		 * do special handling.
		 */
		if ((shrl->shr->s_sysid == shr->s_sysid) &&
		    (shrl->shr->s_pid == shr->s_pid) &&
		    (shrl->shr->s_own_len == shr->s_own_len) &&
		    bcmp(shrl->shr->s_owner, shr->s_owner,
		    shr->s_own_len) == 0) {

			/*
			 * If the existing request is F_COMPAT and
			 * is the first share then allow any F_COMPAT
			 * from the same process.  Trick:  If the existing
			 * F_COMPAT is write access then it must have
			 * the same owner as the first.
			 */
			if ((shrl->shr->s_deny & F_COMPAT) &&
			    (shr->s_deny & F_COMPAT) &&
			    ((shrl->next == NULL) ||
			    (shrl->shr->s_access & F_WRACC)))
				break;
		}

		/*
		 * If a first share has been done in compatibility mode
		 * handle the special cases.
		 */
		if ((shrl->shr->s_deny & F_COMPAT) && (shrl->next == NULL)) {

			if (!(shr->s_deny & F_COMPAT)) {
				/*
				 * If not compat and want write access or
				 * want to deny read or
				 * write exists, fails
				 */
				if ((shr->s_access & F_WRACC) ||
				    (shr->s_deny & F_RDDNY) ||
				    (shrl->shr->s_access & F_WRACC)) {
					mutex_exit(&vp->v_lock);
					return (EAGAIN);
				}
				/*
				 * If read only file allow, this may allow
				 * a deny write but that is meaningless on
				 * a read only file.
				 */
				if (isreadonly(vp))
					break;
				mutex_exit(&vp->v_lock);
				return (EAGAIN);
			}
			/*
			 * This is a compat request and read access
			 * and the first was also read access
			 * we always allow it, otherwise we reject because
			 * we have handled the only valid write case above.
			 */
			if ((shr->s_access == F_RDACC) &&
			    (shrl->shr->s_access == F_RDACC))
				break;
			mutex_exit(&vp->v_lock);
			return (EAGAIN);
		}

		/*
		 * If we are trying to share in compatibility mode
		 * and the current share is compat (and not the first)
		 * we don't know enough.
		 */
		if ((shrl->shr->s_deny & F_COMPAT) && (shr->s_deny & F_COMPAT))
			continue;

		/*
		 * If this is a compat we check for what can't succeed.
		 */
		if (shr->s_deny & F_COMPAT) {
			/*
			 * If we want write access or
			 * if anyone is denying read or
			 * if anyone has write access we fail
			 */
			if ((shr->s_access & F_WRACC) ||
			    (shrl->shr->s_deny & F_RDDNY) ||
			    (shrl->shr->s_access & F_WRACC)) {
				mutex_exit(&vp->v_lock);
				return (EAGAIN);
			}
			/*
			 * If the first was opened with only read access
			 * and is a read only file we allow.
			 */
			if (shrl->next == NULL) {
				if ((shrl->shr->s_access == F_RDACC) &&
				    isreadonly(vp)) {
					break;
				}
				mutex_exit(&vp->v_lock);
				return (EAGAIN);
			}
			/*
			 * We still can't determine our fate so continue
			 */
			continue;
		}

		/*
		 * Simple bitwise test, if we are trying to access what
		 * someone else is denying or we are trying to deny
		 * what someone else is accessing we fail.
		 */
		if ((shr->s_access & shrl->shr->s_deny) ||
		    (shr->s_deny & shrl->shr->s_access)) {
			mutex_exit(&vp->v_lock);
			return (EAGAIN);
		}
	}

	shrl = kmem_alloc(sizeof (struct shrlocklist), KM_SLEEP);
	shrl->shr = kmem_alloc(sizeof (struct shrlock), KM_SLEEP);
	shrl->shr->s_access = shr->s_access;
	shrl->shr->s_deny = shr->s_deny;

	/*
	 * Make sure no other deny modes are also set with F_COMPAT
	 */
	if (shrl->shr->s_deny & F_COMPAT)
		shrl->shr->s_deny = F_COMPAT;
	shrl->shr->s_sysid = shr->s_sysid;		/* XXX ref cnt? */
	shrl->shr->s_pid = shr->s_pid;
	shrl->shr->s_own_len = shr->s_own_len;
	shrl->shr->s_owner = kmem_alloc(shr->s_own_len, KM_SLEEP);
	bcopy(shr->s_owner, shrl->shr->s_owner, shr->s_own_len);
	shrl->next = vp->v_shrlocks;
	vp->v_shrlocks = shrl;
#ifdef DEBUG
	if (share_debug)
		print_shares(vp);
#endif

	mutex_exit(&vp->v_lock);

	return (0);
}

/*
 *	nlmid	sysid	pid
 *	=====	=====	===
 *	!=0	!=0	=0	in cluster; NLM lock
 *	!=0	=0	=0	in cluster; special case for NLM lock
 *	!=0	=0	!=0	in cluster; PXFS local lock
 *	!=0	!=0	!=0	cannot happen
 *	=0	!=0	=0	not in cluster; NLM lock
 *	=0	=0	!=0	not in cluster; local lock
 *	=0	=0	=0	cannot happen
 *	=0	!=0	!=0	cannot happen
 */
static int
is_match_for_del(struct shrlock *shr, struct shrlock *element)
{
	int nlmid1, nlmid2;
	int result = 0;

	nlmid1 = GETNLMID(shr->s_sysid);
	nlmid2 = GETNLMID(element->s_sysid);

	if (nlmid1 != 0) {		/* in a cluster */
		if (GETSYSID(shr->s_sysid) != 0 && shr->s_pid == 0) {
			/*
			 * Lock obtained through nlm server.  Just need to
			 * compare whole sysids.  pid will always = 0.
			 */
			result = shr->s_sysid == element->s_sysid;
		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid == 0) {
			/*
			 * This is a special case.  The NLM server wishes to
			 * delete all share locks obtained through nlmid1.
			 */
			result = (nlmid1 == nlmid2);
		} else if (GETSYSID(shr->s_sysid) == 0 && shr->s_pid != 0) {
			/*
			 * Lock obtained locally through PXFS.  Match nlmids
			 * and pids.
			 */
			result = (nlmid1 == nlmid2 &&
			    shr->s_pid == element->s_pid);
		}
	} else {			/* not in a cluster */
		result = ((shr->s_sysid == 0 &&
		    shr->s_pid == element->s_pid) ||
		    (shr->s_sysid != 0 &&
		    shr->s_sysid == element->s_sysid));
	}
	return (result);
}

/*
 * Delete the given share reservation.  Returns 0 if okay, EINVAL if the
 * share could not be found.  If the share reservation is an NBMAND share
 * reservation, signal anyone waiting for the share to go away (e.g.,
 * blocking lock requests).
 */

int
del_share(struct vnode *vp, struct shrlock *shr)
{
	struct shrlocklist *shrl;
	struct shrlocklist **shrlp;
	int found = 0;
	int is_nbmand = 0;

	mutex_enter(&vp->v_lock);
	/*
	 * Delete the shares with the matching sysid and owner
	 * But if own_len == 0 and sysid == 0 delete all with matching pid
	 * But if own_len == 0 delete all with matching sysid.
	 */
	shrlp = &vp->v_shrlocks;
	while (*shrlp) {
		if ((shr->s_own_len == (*shrlp)->shr->s_own_len &&
		    (bcmp(shr->s_owner, (*shrlp)->shr->s_owner,
		    shr->s_own_len) == 0)) ||

		    (shr->s_own_len == 0 &&
		    is_match_for_del(shr, (*shrlp)->shr))) {

			shrl = *shrlp;
			*shrlp = shrl->next;

			if (shrl->shr->s_deny & F_MANDDNY)
				is_nbmand = 1;

			/* XXX deref sysid */
			kmem_free(shrl->shr->s_owner, shrl->shr->s_own_len);
			kmem_free(shrl->shr, sizeof (struct shrlock));
			kmem_free(shrl, sizeof (struct shrlocklist));
			found++;
			continue;
		}
		shrlp = &(*shrlp)->next;
	}

	if (is_nbmand)
		cv_broadcast(&vp->v_cv);

	mutex_exit(&vp->v_lock);
	return (found ? 0 : EINVAL);
}

/*
 * Clean up all local share reservations that the given process has with
 * the given file.
 */
void
cleanshares(struct vnode *vp, pid_t pid)
{
	do_cleanshares(vp, pid, 0);
}

/*
 * Cleanup all remote share reservations that
 * were made by the given sysid on given vnode.
 */
void
cleanshares_by_sysid(struct vnode *vp, int32_t sysid)
{
	if (sysid == 0)
		return;

	do_cleanshares(vp, 0, sysid);
}

/*
 * Cleanup share reservations on given vnode made
 * by the either given pid or sysid.
 * If sysid is 0, remove all shares made by given pid,
 * otherwise all shares made by the given sysid will
 * be removed.
 */
static void
do_cleanshares(struct vnode *vp, pid_t pid, int32_t sysid)
{
	struct shrlock shr;

	if (vp->v_shrlocks == NULL)
		return;

	shr.s_access = 0;
	shr.s_deny = 0;
	shr.s_pid = pid;
	shr.s_sysid = sysid;
	shr.s_own_len = 0;
	shr.s_owner = NULL;

	(void) del_share(vp, &shr);
}

static int
is_match_for_has_remote(int32_t sysid1, int32_t sysid2)
{
	int result = 0;

	if (GETNLMID(sysid1) != 0) { /* in a cluster */
		if (GETSYSID(sysid1) != 0) {
			/*
			 * Lock obtained through nlm server.  Just need to
			 * compare whole sysids.
			 */
			result = (sysid1 == sysid2);
		} else if (GETSYSID(sysid1) == 0) {
			/*
			 * This is a special case.  The NLM server identified
			 * by nlmid1 wishes to find out if it has obtained
			 * any share locks on the vnode.
			 */
			result = (GETNLMID(sysid1) == GETNLMID(sysid2));
		}
	} else {			/* not in a cluster */
		result = ((sysid1 != 0 && sysid1 == sysid2) ||
		    (sysid1 == 0 && sysid2 != 0));
	}
	return (result);
}


/*
 * Determine whether there are any shares for the given vnode
 * with a remote sysid. Returns zero if not, non-zero if there are.
 * If sysid is non-zero then determine if this sysid has a share.
 *
 * Note that the return value from this function is potentially invalid
 * once it has been returned.  The caller is responsible for providing its
 * own synchronization mechanism to ensure that the return value is useful.
 */
int
shr_has_remote_shares(vnode_t *vp, int32_t sysid)
{
	struct shrlocklist *shrl;
	int result = 0;

	mutex_enter(&vp->v_lock);
	shrl = vp->v_shrlocks;
	while (shrl) {
		if (is_match_for_has_remote(sysid, shrl->shr->s_sysid)) {

			result = 1;
			break;
		}
		shrl = shrl->next;
	}
	mutex_exit(&vp->v_lock);
	return (result);
}

static int
isreadonly(struct vnode *vp)
{
	return (vp->v_type != VCHR && vp->v_type != VBLK &&
	    vp->v_type != VFIFO && vn_is_readonly(vp));
}

#ifdef DEBUG
static void
print_shares(struct vnode *vp)
{
	struct shrlocklist *shrl;

	if (vp->v_shrlocks == NULL) {
		printf("<NULL>\n");
		return;
	}

	shrl = vp->v_shrlocks;
	while (shrl) {
		print_share(shrl->shr);
		shrl = shrl->next;
	}
}

static void
print_share(struct shrlock *shr)
{
	int i;

	if (shr == NULL) {
		printf("<NULL>\n");
		return;
	}

	printf("    access(%d):	", shr->s_access);
	if (shr->s_access & F_RDACC)
		printf("R");
	if (shr->s_access & F_WRACC)
		printf("W");
	if ((shr->s_access & (F_RDACC|F_WRACC)) == 0)
		printf("N");
	printf("\n");
	printf("    deny:	");
	if (shr->s_deny & F_COMPAT)
		printf("C");
	if (shr->s_deny & F_RDDNY)
		printf("R");
	if (shr->s_deny & F_WRDNY)
		printf("W");
	if (shr->s_deny == F_NODNY)
		printf("N");
	printf("\n");
	printf("    sysid:	%d\n", shr->s_sysid);
	printf("    pid:	%d\n", shr->s_pid);
	printf("    owner:	[%d]", shr->s_own_len);
	printf("'");
	for (i = 0; i < shr->s_own_len; i++)
		printf("%02x", (unsigned)shr->s_owner[i]);
	printf("'\n");
}
#endif

/*
 * Return non-zero if the given I/O request conflicts with a registered
 * share reservation.
 *
 * A process is identified by the tuple (sysid, pid). When the caller
 * context is passed to nbl_share_conflict, the sysid and pid in the
 * caller context are used. Otherwise the sysid is zero, and the pid is
 * taken from the current process.
 *
 * Conflict Algorithm:
 *   1. An op request of NBL_READ will fail if a different
 *      process has a mandatory share reservation with deny read.
 *
 *   2. An op request of NBL_WRITE will fail if a different
 *      process has a mandatory share reservation with deny write.
 *
 *   3. An op request of NBL_READWRITE will fail if a different
 *      process has a mandatory share reservation with deny read
 *      or deny write.
 *
 *   4. An op request of NBL_REMOVE will fail if there is
 *      a mandatory share reservation with an access of read,
 *      write, or remove. (Anything other than meta data access).
 *
 *   5. An op request of NBL_RENAME will fail if there is
 *      a mandatory share reservation with:
 *        a) access write or access remove
 *      or
 *        b) access read and deny remove
 *
 *   Otherwise there is no conflict and the op request succeeds.
 *
 * This behavior is required for interoperability between
 * the nfs server, cifs server, and local access.
 * This behavior can result in non-posix semantics.
 *
 * When mandatory share reservations are enabled, a process
 * should call nbl_share_conflict to determine if the
 * desired operation would conflict with an existing share
 * reservation.
 *
 * The call to nbl_share_conflict may be skipped if the
 * process has an existing share reservation and the operation
 * is being performed in the context of that existing share
 * reservation.
 */
int
nbl_share_conflict(vnode_t *vp, nbl_op_t op, caller_context_t *ct)
{
	struct shrlocklist *shrl;
	int conflict = 0;
	pid_t pid;
	int sysid;

	ASSERT(nbl_in_crit(vp));

	if (ct == NULL) {
		pid = curproc->p_pid;
		sysid = 0;
	} else {
		pid = ct->cc_pid;
		sysid = ct->cc_sysid;
	}

	mutex_enter(&vp->v_lock);
	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
		if (!(shrl->shr->s_deny & F_MANDDNY))
			continue;
		/*
		 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
		 * check if the share reservation being examined
		 * belongs to the current process.
		 * NBL_REMOVE and NBL_RENAME do not.
		 * This behavior is required by the conflict
		 * algorithm described above.
		 */
		switch (op) {
		case NBL_READ:
			if ((shrl->shr->s_deny & F_RDDNY) &&
			    (shrl->shr->s_sysid != sysid ||
			    shrl->shr->s_pid != pid))
				conflict = 1;
			break;
		case NBL_WRITE:
			if ((shrl->shr->s_deny & F_WRDNY) &&
			    (shrl->shr->s_sysid != sysid ||
			    shrl->shr->s_pid != pid))
				conflict = 1;
			break;
		case NBL_READWRITE:
			if ((shrl->shr->s_deny & F_RWDNY) &&
			    (shrl->shr->s_sysid != sysid ||
			    shrl->shr->s_pid != pid))
				conflict = 1;
			break;
		case NBL_REMOVE:
			if (shrl->shr->s_access & (F_RWACC|F_RMACC))
				conflict = 1;
			break;
		case NBL_RENAME:
			if (shrl->shr->s_access & (F_WRACC|F_RMACC))
				conflict = 1;

			else if ((shrl->shr->s_access & F_RDACC) &&
			    (shrl->shr->s_deny & F_RMDNY))
				conflict = 1;
			break;
#ifdef DEBUG
		default:
			cmn_err(CE_PANIC,
			    "nbl_share_conflict: bogus op (%d)",
			    op);
			break;
#endif
		}
		if (conflict)
			break;
	}

	mutex_exit(&vp->v_lock);
	return (conflict);
}

/*
 * Determine if the given process has a NBMAND share reservation on the
 * given vnode. Returns 1 if the process has such a share reservation,
 * returns 0 otherwise.
 */
int
proc_has_nbmand_share_on_vp(vnode_t *vp, pid_t pid)
{
	struct shrlocklist *shrl;

	/*
	 * Any NBMAND share reservation on the vp for this process?
	 */
	mutex_enter(&vp->v_lock);
	for (shrl = vp->v_shrlocks; shrl != NULL; shrl = shrl->next) {
		if (shrl->shr->s_sysid == 0 &&
		    (shrl->shr->s_deny & F_MANDDNY) &&
		    (shrl->shr->s_pid == pid)) {
			mutex_exit(&vp->v_lock);
			return (1);
		}
	}
	mutex_exit(&vp->v_lock);

	return (0);
}