1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#ifndef _NFS_RNODE4_H
#define _NFS_RNODE4_H
#ifdef __cplusplus
extern "C" {
#endif
#include <nfs/rnode.h> /* for symlink_cache, nfs_rwlock_t, etc. */
#include <nfs/nfs4.h>
#include <nfs/nfs4_clnt.h>
#include <sys/thread.h>
#include <sys/sysmacros.h> /* for offsetof */
typedef enum nfs4_stub_type {
NFS4_STUB_NONE,
NFS4_STUB_MIRRORMOUNT,
NFS4_STUB_REFERRAL
} nfs4_stub_type_t;
typedef enum nfs4_access_type {
NFS4_ACCESS_UNKNOWN,
NFS4_ACCESS_ALLOWED,
NFS4_ACCESS_DENIED
} nfs4_access_type_t;
/*
* Access cache
*/
typedef struct acache4_hash {
struct acache4 *next;
struct acache4 *prev;
krwlock_t lock;
} acache4_hash_t;
typedef struct acache4 {
struct acache4 *next; /* next and prev must be first */
struct acache4 *prev;
uint32_t known;
uint32_t allowed;
struct rnode4 *rnode;
cred_t *cred;
struct acache4 *list;
struct acache4_hash *hashq;
} acache4_t;
/*
* Note on the different buffer sizes in rddir4_cache:
* There seems to be some discrepancy between the intended and actual
* use of entlen and buflen, which does not correspond to the comment below.
* entlen - nfsv2/3 used as both alloc'd size of entries buffer and
* as the actual size of the entries (XXX is this correct?).
* nfsv4 will use it only as the alloc'd size.
* buflen - used for calculations of readahead.
* actlen - added for nfsv4 to serve as the size of the useful
* portion of the entries buffer. That is because in
* nfsv4, the otw entries are converted to system entries,
* and may not be the same size - thus buffer may not be full.
*/
typedef struct rddir4_cache {
lloff_t _cookie; /* cookie used to find this cache entry */
lloff_t _ncookie; /* cookie used to find the next cache entry */
char *entries; /* buffer containing dirent entries */
int eof; /* EOF reached after this request */
int entlen; /* size of dirent entries in buf */
int buflen; /* size of the buffer used to store entries */
int actlen; /* size of the actual entries (nfsv4 only) */
int flags; /* control flags, see below */
kcondvar_t cv; /* cv for blocking */
int error; /* error from RPC operation */
void *data; /* private data */
} rddir4_cache;
#define nfs4_cookie _cookie._f
#define nfs4_ncookie _ncookie._f
/*
* Shadow vnode, v4 only.
*
* A file's shadow vnode list is protected by its hash bucket lock,
* r_hashq->r_lock.
*
* sv_r_vnode is protected by the appropriate vnode locks.
*
* sv_dfh, sv_name, sv_dfileid, and sv_dfileid_valid are protected
* by rp->r_svlock.
*/
typedef struct insq_link {
void *forw;
void *back;
} insq_link_t;
typedef struct svnode {
insq_link_t sv_link; /* must be first for insque */
vnode_t *sv_r_vnode; /* vnode for this shadow */
nfs4_fname_t *sv_name; /* component name */
nfs4_sharedfh_t *sv_dfh; /* directory file handle */
} svnode_t;
#define sv_forw sv_link.forw
#define sv_back sv_link.back
extern svnode_t *vtosv(vnode_t *);
#define VTOSV(vp) vtosv(vp)
#define SVTOV(svp) (((svp)->sv_r_vnode))
#define IS_SHADOW(vp, rp) ((vp) != (rp)->r_vnode)
/*
* The format of the hash bucket used to lookup rnodes from a file handle.
*/
typedef struct r4hashq {
struct rnode4 *r_hashf;
struct rnode4 *r_hashb;
krwlock_t r_lock;
} r4hashq_t;
/*
* Remote file information structure.
*
* The rnode is the "inode" for remote files. It contains all the
* information necessary to handle remote file on the client side.
*
* Note on file sizes: we keep two file sizes in the rnode: the size
* according to the client (r_size) and the size according to the server
* (r_attr.va_size). They can differ because we modify r_size during a
* write system call (nfs_rdwr), before the write request goes over the
* wire (before the file is actually modified on the server). If an OTW
* request occurs before the cached data is written to the server the file
* size returned from the server (r_attr.va_size) may not match r_size.
* r_size is the one we use, in general. r_attr.va_size is only used to
* determine whether or not our cached data is valid.
*
* Each rnode has 5 locks associated with it (not including the rnode
* hash table and free list locks):
*
* r_rwlock: Serializes nfs_write and nfs_setattr requests
* and allows nfs_read requests to proceed in parallel.
* Serializes reads/updates to directories.
*
* r_lkserlock: Serializes lock requests with map, write, and
* readahead operations.
*
* r_statelock: Protects all fields in the rnode except for
* those listed below. This lock is intented
* to be held for relatively short periods of
* time (not accross entire putpage operations,
* for example).
*
* r_statev4_lock: Protects the created_v4 flag, the lock_owners list,
* and all the delegation fields except r_deleg_list.
*
* r_os_lock: Protects r_open_streams.
*
*
* The following members are protected by the mutex rp4freelist_lock:
* r_freef
* r_freeb
*
* The following members are protected by the hash bucket rwlock:
* r_hashf
* r_hashb
*
* r_fh is read-only except when an rnode is created (or recycled from the
* free list).
*
* The following members are protected by nfs4_server_t::s_lock:
* r_deleg_list
*
* Note: r_modaddr is only accessed when the r_statelock mutex is held.
* Its value is also controlled via r_rwlock. It is assumed that
* there will be only 1 writer active at a time, so it safe to
* set r_modaddr and release r_statelock as long as the r_rwlock
* writer lock is held.
*
* r_inmap informs nfs4_read()/write() that there is a call to nfs4_map()
* in progress. nfs4_read()/write() check r_inmap to decide whether
* to perform directio on the file or not. r_inmap is atomically
* incremented in nfs4_map() before the address space routines are
* called and atomically decremented just before nfs4_map() exits.
* r_inmap is not protected by any lock.
*
* r_mapcnt tells that the rnode has mapped pages. r_inmap can be 0
* while the rnode has mapped pages.
*
* 64-bit offsets: the code formerly assumed that atomic reads of
* r_size were safe and reliable; on 32-bit architectures, this is
* not true since an intervening bus cycle from another processor
* could update half of the size field. The r_statelock must now
* be held whenever any kind of access of r_size is made.
*
* Lock ordering:
* r_rwlock > r_lkserlock > r_os_lock > r_statelock > r_statev4_lock
* vnode_t::v_lock > r_os_lock
*/
struct exportinfo; /* defined in nfs/export.h */
struct servinfo4; /* defined in nfs/nfs4_clnt.h */
struct failinfo; /* defined in nfs/nfs_clnt.h */
struct mntinfo4; /* defined in nfs/nfs4_clnt.h */
typedef struct rnode4 {
/* the hash fields must be first to match the rhashq_t */
struct rnode4 *r_hashf; /* hash queue forward pointer */
struct rnode4 *r_hashb; /* hash queue back pointer */
struct rnode4 *r_freef; /* free list forward pointer */
struct rnode4 *r_freeb; /* free list back pointer */
r4hashq_t *r_hashq; /* pointer to the hash bucket */
svnode_t r_svnode; /* "master" shadow vnode for file */
kmutex_t r_svlock; /* serializes access to svnode list */
nfs_rwlock_t r_rwlock; /* serializes write/setattr requests */
nfs_rwlock_t r_lkserlock; /* serialize lock with other ops */
kmutex_t r_statelock; /* protects (most of) rnode contents */
nfs4_sharedfh_t *r_fh; /* file handle */
struct servinfo4
*r_server; /* current server */
u_offset_t r_nextr; /* next byte read offset (read-ahead) */
uint_t r_flags; /* flags, see below */
short r_error; /* async write error */
cred_t *r_unlcred; /* unlinked credentials */
char *r_unlname; /* unlinked file name */
vnode_t *r_unldvp; /* parent dir of unlinked file */
vnode_t *r_xattr_dir; /* cached xattr dir vnode */
len_t r_size; /* client's view of file size */
vattr_t r_attr; /* cached vnode attributes */
hrtime_t r_time_attr_saved; /* time attributes were cached */
hrtime_t r_time_attr_inval; /* time attributes become invalid */
hrtime_t r_time_cache_inval; /* time caches become invalid */
time_t r_delay_wait; /* future time for DELAY handling */
int r_delay_interval; /* Number of Secs of last DELAY */
time_t r_last_recov; /* time of last recovery operation */
nfs4_recov_t r_recov_act; /* action from last recovery op */
long r_mapcnt; /* count of mmapped pages */
uint_t r_count; /* # of refs not reflect in v_count */
uint_t r_awcount; /* # of outstanding async write */
uint_t r_gcount; /* getattrs waiting to flush pages */
kcondvar_t r_cv; /* condvar for blocked threads */
int (*r_putapage) /* address of putapage routine */
(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *);
void *r_dir; /* cache of readdir responses */
rddir4_cache *r_direof; /* pointer to the EOF entry */
symlink_cache r_symlink; /* cached readlink response */
verifier4 r_writeverf; /* file data write verifier */
u_offset_t r_modaddr; /* address for page in writerp */
commit_t r_commit; /* commit information */
u_offset_t r_truncaddr; /* base for truncate operation */
vsecattr_t *r_secattr; /* cached security attributes (acls) */
verifier4 r_cookieverf4; /* version 4 readdir cookie verifier */
nfs4_pathconf_info_t r_pathconf; /* cached pathconf info */
acache4_t *r_acache; /* list of access cache entries */
list_t r_open_streams; /* open streams list */
kmutex_t r_os_lock; /* protects r_open_streams */
nfs4_lock_owner_t
r_lo_head; /* lock owners list head */
int created_v4; /* 1 if file has been created in v4 */
kmutex_t r_statev4_lock; /* protects created_v4, state4ptr */
list_node_t r_deleg_link; /* linkage into list of */
/* delegated rnodes for this server */
open_delegation_type4
r_deleg_type; /* type of delegation granted */
stateid4 r_deleg_stateid;
/* delegation state id */
nfs_space_limit4
r_deleg_limit; /* file limits returned from */
/* server on delegated open */
nfsace4 r_deleg_perms; /* file permissions returned from */
/* server on delegated open */
fattr4_change r_deleg_change; /* current deleg change attr */
fattr4_change r_deleg_change_grant;
/* change @ write deleg grant */
cred_t *r_deleg_cred; /* credential in force when the */
/* delegation was granted */
open_delegation_type4
r_deleg_needs_recovery;
/* delegation needs recovery */
/* This contains the delegation type */
/* for use with CLAIM_PREVIOUS. */
/* OPEN_DELEGATE_NONE means recovery */
/* is not needed. */
unsigned r_deleg_needs_recall:1;
/* delegation has been recalled by */
/* the server during open with */
/* CLAIM_PREVIOUS */
unsigned r_deleg_return_pending:1;
/* delegreturn is pending, don't use */
/* the delegation stateid, set in */
/* nfs4_dlistadd */
unsigned r_deleg_return_inprog:1;
/* delegreturn is in progress, may */
/* only be set by nfs4delegreturn. */
nfs_rwlock_t r_deleg_recall_lock;
/* lock for synchronizing delegreturn */
/* with in other operations, acquired */
/* in read mode by nfs4_start_fop, */
/* acquired in write mode in */
/* nfs4delegreturn */
fattr4_change r_change; /* GETATTR4 change attr; client */
/* should always request change */
/* when c/mtime requested to keep */
/* change and c/mtime in sync */
fattr4_fileid r_mntd_fid; /* mounted on fileid attr */
kthread_t *r_serial; /* attrcache validation thread */
kthread_t *r_pgflush; /* thread flushing page cache */
list_t r_indelmap; /* list of delmap callers */
fattr4_fsid r_srv_fsid; /* fsid of srv fs containing object */
/* when rnode created; compare with */
/* sv_fsid (servinfo4_t) to see why */
/* stub type was set */
nfs4_stub_type_t r_stub_type;
/* e.g. mirror-mount or referral */
uint_t r_inmap; /* to serialize read/write and mmap */
} rnode4_t;
#define r_vnode r_svnode.sv_r_vnode
/*
* Flags
*/
#define R4READDIRWATTR 0x1 /* Use READDIR with attributes */
#define R4DIRTY 0x2 /* dirty pages from write operation */
#define R4STALE 0x4 /* stale, don't even attempt to write */
#define R4MODINPROGRESS 0x8 /* page modification happening */
#define R4TRUNCATE 0x10 /* truncating, don't commit */
#define R4HAVEVERF 0x20 /* have a write verifier to compare against */
#define R4COMMIT 0x40 /* commit in progress */
#define R4COMMITWAIT 0x80 /* someone is waiting to do a commit */
#define R4HASHED 0x100 /* rnode is in hash queues */
#define R4OUTOFSPACE 0x200 /* an out of space error has happened */
#define R4LODANGLERS 0x400 /* rnode has dangling lock_owners to cleanup */
#define R4WRITEMODIFIED 0x800 /* file data has been modified by write */
#define R4DIRECTIO 0x1000 /* bypass the buffer cache */
#define R4RECOVERR 0x2000 /* couldn't recover */
#define R4RECEXPFH 0x4000 /* recovering expired filehandle */
#define R4RECOVERRP 0x8000 /* R4RECOVERR pending, but not set (yet) */
#define R4ISXATTR 0x20000 /* rnode is a named attribute */
#define R4DELMAPLIST 0x40000 /* delmap callers tracked for as callback */
#define R4PGFLUSH 0x80000 /* page flush thread active */
#define R4INCACHEPURGE 0x100000 /* purging caches due to file size change */
#define R4LOOKUP 0x200000 /* a lookup has been done in the directory */
/*
* Convert between vnode and rnode
*/
#define RTOV4(rp) ((rp)->r_vnode)
#define VTOR4(vp) ((rnode4_t *)((vp)->v_data))
#define RP_ISSTUB(rp) (((rp)->r_stub_type != NFS4_STUB_NONE))
#define RP_ISSTUB_MIRRORMOUNT(rp) ((rp)->r_stub_type == NFS4_STUB_MIRRORMOUNT)
#define RP_ISSTUB_REFERRAL(rp) ((rp)->r_stub_type == NFS4_STUB_REFERRAL)
/*
* Open file instances.
*/
typedef struct nfs4_opinst {
struct nfs4_opinst *re_next; /* next in list */
vnode_t *re_vp; /* held reference */
uint32_t re_numosp; /* number of valid open streams */
nfs4_open_stream_t **re_osp; /* held reference */
} nfs4_opinst_t;
#ifdef _KERNEL
extern long nrnode;
/* Used for r_delay_interval */
#define NFS4_INITIAL_DELAY_INTERVAL 1
#define NFS4_MAX_DELAY_INTERVAL 20
/* Used for check_rtable4 */
#define NFSV4_RTABLE4_OK 0
#define NFSV4_RTABLE4_NOT_FREE_LIST 1
#define NFSV4_RTABLE4_DIRTY_PAGES 2
#define NFSV4_RTABLE4_POS_R_COUNT 3
extern rnode4_t *r4find(r4hashq_t *, nfs4_sharedfh_t *, struct vfs *);
extern rnode4_t *r4find_unlocked(nfs4_sharedfh_t *, struct vfs *);
extern void r4flush(struct vfs *, cred_t *);
extern void destroy_rtable4(struct vfs *, cred_t *);
extern int check_rtable4(struct vfs *);
extern void rp4_addfree(rnode4_t *, cred_t *);
extern void rp4_addhash(rnode4_t *);
extern void rp4_rmhash(rnode4_t *);
extern void rp4_rmhash_locked(rnode4_t *);
extern int rtable4hash(nfs4_sharedfh_t *);
extern vnode_t *makenfs4node(nfs4_sharedfh_t *, nfs4_ga_res_t *, struct vfs *,
hrtime_t, cred_t *, vnode_t *, nfs4_fname_t *);
extern vnode_t *makenfs4node_by_fh(nfs4_sharedfh_t *, nfs4_sharedfh_t *,
nfs4_fname_t **, nfs4_ga_res_t *, mntinfo4_t *, cred_t *, hrtime_t);
extern nfs4_opinst_t *r4mkopenlist(struct mntinfo4 *);
extern void r4releopenlist(nfs4_opinst_t *);
extern int r4find_by_fsid(mntinfo4_t *, fattr4_fsid *);
/* Access cache calls */
extern nfs4_access_type_t nfs4_access_check(rnode4_t *, uint32_t, cred_t *);
extern void nfs4_access_cache(rnode4_t *rp, uint32_t, uint32_t, cred_t *);
extern int nfs4_access_purge_rp(rnode4_t *);
extern int nfs4_free_data_reclaim(rnode4_t *);
extern void nfs4_rnode_invalidate(struct vfs *);
extern time_t r2lease_time(rnode4_t *);
extern int nfs4_directio(vnode_t *, int, cred_t *);
/* shadow vnode functions */
extern void sv_activate(vnode_t **, vnode_t *, nfs4_fname_t **, int);
extern vnode_t *sv_find(vnode_t *, vnode_t *, nfs4_fname_t **);
extern void sv_update_path(vnode_t *, char *, char *);
extern void sv_inactive(vnode_t *);
extern void sv_exchange(vnode_t **);
extern void sv_uninit(svnode_t *);
extern void nfs4_clear_open_streams(rnode4_t *);
/*
* Mark cached attributes as timed out
*
* The caller must not be holding the rnode r_statelock mutex.
*/
#define PURGE_ATTRCACHE4_LOCKED(rp) \
rp->r_time_attr_inval = gethrtime(); \
rp->r_time_attr_saved = rp->r_time_attr_inval; \
rp->r_pathconf.pc4_xattr_valid = 0; \
rp->r_pathconf.pc4_cache_valid = 0;
#define PURGE_ATTRCACHE4(vp) { \
rnode4_t *rp = VTOR4(vp); \
mutex_enter(&rp->r_statelock); \
PURGE_ATTRCACHE4_LOCKED(rp); \
mutex_exit(&rp->r_statelock); \
}
extern void nfs4_async_readdir(vnode_t *, rddir4_cache *,
cred_t *, int (*)(vnode_t *, rddir4_cache *, cred_t *));
extern char *rnode4info(rnode4_t *rp);
extern int writerp4(rnode4_t *, caddr_t, int, struct uio *, int);
extern void nfs4_set_nonvattrs(rnode4_t *, struct nfs4attr_to_vattr *);
extern void nfs4delegabandon(rnode4_t *);
extern stateid4 nfs4_get_w_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *,
nfs_opnum4, nfs4_stateid_types_t *);
extern stateid4 nfs4_get_stateid(cred_t *, rnode4_t *, pid_t, mntinfo4_t *,
nfs_opnum4, nfs4_stateid_types_t *, bool_t);
extern nfsstat4 nfs4_find_or_create_lock_owner(pid_t, rnode4_t *, cred_t *,
nfs4_open_owner_t **, nfs4_open_stream_t **,
nfs4_lock_owner_t **);
extern cred_t *nfs4_get_otw_cred_by_osp(rnode4_t *, cred_t *,
nfs4_open_stream_t **, bool_t *, bool_t *);
/*
* Defines for the flag argument of nfs4delegreturn
*/
#define NFS4_DR_FORCE 0x1 /* discard even if start_op fails */
#define NFS4_DR_PUSH 0x2 /* push modified data back to the server */
#define NFS4_DR_DISCARD 0x4 /* discard the delegation w/o delegreturn */
#define NFS4_DR_DID_OP 0x8 /* calling function did nfs4_start_op */
#define NFS4_DR_RECALL 0x10 /* delegreturn done in response to CB_RECALL */
#define NFS4_DR_REOPEN 0x20 /* perform file reopens, if applicable */
extern int nfs4delegreturn(rnode4_t *, int);
extern void nfs4_delegreturn_all(nfs4_server_t *);
extern void nfs4delegreturn_cleanup(rnode4_t *, nfs4_server_t *);
extern void nfs4_delegation_accept(rnode4_t *, open_claim_type4, OPEN4res *,
nfs4_ga_res_t *, cred_t *);
extern void nfs4_dlistclean(void);
extern void nfs4_deleg_discard(mntinfo4_t *, nfs4_server_t *);
extern void rddir4_cache_create(rnode4_t *);
extern void rddir4_cache_purge(rnode4_t *);
extern void rddir4_cache_destroy(rnode4_t *);
extern rddir4_cache *rddir4_cache_lookup(rnode4_t *, offset_t, int);
extern void rddir4_cache_rele(rnode4_t *, rddir4_cache *);
extern void r4_stub_mirrormount(rnode4_t *);
extern void r4_stub_referral(rnode4_t *);
extern void r4_stub_none(rnode4_t *);
#ifdef DEBUG
extern char *rddir4_cache_buf_alloc(size_t, int);
extern void rddir4_cache_buf_free(void *, size_t);
#endif
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _NFS_RNODE4_H */
|