summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/fs/ufs_inode.h
blob: bfdd32fa4a27822d18a19b0b3b8fd1212b5a87ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
 */

/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
/*	  All Rights Reserved	*/

/*
 * University Copyright- Copyright (c) 1982, 1986, 1988
 * The Regents of the University of California
 * All Rights Reserved
 *
 * University Acknowledgment- Portions of this document are derived from
 * software developed by the University of California, Berkeley, and its
 * contributors.
 */

#ifndef	_SYS_FS_UFS_INODE_H
#define	_SYS_FS_UFS_INODE_H

#include <sys/isa_defs.h>
#include <sys/fbuf.h>
#include <sys/fdbuffer.h>
#include <sys/fcntl.h>
#include <sys/uio.h>
#include <sys/t_lock.h>
#include <sys/thread.h>
#include <sys/cred.h>
#include <sys/time.h>
#include <sys/types32.h>
#include <sys/fs/ufs_fs.h>
#include <sys/fs/ufs_lockfs.h>
#include <sys/fs/ufs_trans.h>
#include <sys/kstat.h>
#include <sys/fs/ufs_acl.h>
#include <sys/fs/ufs_panic.h>
#include <sys/dnlc.h>

#ifdef _KERNEL
#include <sys/vfs_opreg.h>
#endif

#ifdef	__cplusplus
extern "C" {
#endif

/*
 * The I node is the focus of all local file activity in UNIX.
 * There is a unique inode allocated for each active file,
 * each current directory, each mounted-on file, each mapping,
 * and the root.  An inode is `named' by its dev/inumber pair.
 * Data in icommon is read in from permanent inode on volume.
 *
 * Each inode has 5 locks associated with it:
 *	i_rwlock:	Serializes ufs_write and ufs_setattr request
 *			and allows ufs_read requests to proceed in parallel.
 *			Serializes reads/updates to directories.
 *	vfs_dqrwlock:	Manages quota sub-system quiescence.  See below.
 *	i_contents:	Protects almost all of the fields in the inode
 *			except for those listed below. When held
 *			in writer mode also protects those fields
 *			listed under i_tlock.
 *	i_tlock:	When i_tlock is held with the i_contents reader
 *			lock the i_atime, i_mtime, i_ctime,
 *			i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag
 *			i_seq, i_writer & i_mapcnt fields are protected.
 *			For more i_flag locking info see below.
 *	ih_lock:	Protects inode hash chain buckets
 *	ifree_lock:	Protects inode freelist
 *
 * Lock ordering:
 *	i_rwlock > i_contents > i_tlock
 *	i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock
 *	i_contents > i_tlock
 *	vfs_dqrwlock > i_contents(writer) > i_tlock
 *	ih_lock > i_contents > i_tlock
 *
 * Making major changes to quota sub-system state, while the file
 * system is mounted required the addition of another lock.  The
 * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs
 * structure.  This lock is used to manage quota sub-system quiescence
 * for a particular file system. Major changes to quota sub-system
 * state (disabling quotas, enabling quotas, and setting new quota
 * limits) all require the file system to be quiescent and grabbing
 * vfs_dqrwlock as writer accomplishes this.  On the other hand,
 * grabbing vfs_dqrwlock as reader makes the quota sub-system
 * non-quiescent and lets the quota sub-system know that now is not a
 * good time to change major quota sub-system state.  Typically
 * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for
 * writing.  However, there are cases where vfs_dqrwlock is grabbed for
 * reading without a corresponding i_contents write grab because there
 * is no relevant inode.  There are also cases where i_contents is
 * grabbed for writing when a vfs_dqrwlock read grab is not needed
 * because the inode changes do not affect quotas.
 *
 * Unfortunately, performance considerations have required that we be more
 * intelligent about using i_tlock when updating i_flag.  Ideally, we would
 * have simply separated out several of the bits in i_flag into their own
 * ints to avoid problems.  But, instead, we have implemented the following
 * rules:
 *
 *	o You can update any i_flag field while holding the writer-contents,
 *	  or by holding the reader-contents AND holding i_tlock.
 *	  You can only call ITIMES_NOLOCK while holding the writer-contents,
 *	  or by holding the reader-contents AND holding i_tlock.
 *
 *	o For a directory, holding the reader-rw_lock is sufficient for setting
 *	  IACC.
 *
 *	o Races with IREF are avoided by holding the reader contents lock
 *	  and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage.
 *	  And by holding the writer-contents in ufs_iinactive.
 *
 *	o The callers are no longer required to handle the calls to ITIMES
 *	  and ITIMES_NOLOCK.  The functions that set the i_flag bits are
 *	  responsible for managing those calls.  The exceptions are the
 *	  bmap routines.
 *
 * SVR4 Extended Fundamental Type (EFT) support:
 * 	The inode structure has been enhanced to support
 *	32-bit user-id, 32-bit group-id, and 32-bit device number.
 *	Standard SVR4 ufs also supports 32-bit mode field.  For the reason
 *	of backward compatibility with the previous ufs disk format,
 *	32-bit mode field is not supported.
 *
 *	The current inode structure is 100% backward compatible with
 *	the previous inode structure if no user-id or group-id exceeds
 *	USHRT_MAX, and no major or minor number of a device number
 *	stored in an inode exceeds 255.
 *
 * Rules for managing i_seq:
 *	o i_seq is locked under the same rules as i_flag
 *	o The i_ctime or i_mtime MUST never change without increasing
 *	  the value of i_seq.
 *	o You may increase the value of i_seq without the timestamps
 *	  changing, this may decrease the callers performance but will
 *	  be functionally correct.
 *	o The common case is when IUPD or ICHG is set, increase i_seq
 *	  and immediately call ITIMES* or ufs_iupdat to create a new timestamp.
 *	o A less common case is the setting of IUPD or ICHG and while still
 *	  holding the correct lock defer the timestamp and i_seq update
 *	  until later, but it must still be done before the lock is released.
 *	  bmap_write is an example of this, where the caller does the update.
 *	o If multiple changes are being made with the timestamps being
 *	  updated only at the end, a single increase of i_seq is allowed.
 *	o If changes are made with IUPD or ICHG being set, but
 *	  the controlling lock is being dropped before the timestamp is
 *	  updated, there is a risk that another thread will also change
 *	  the file, update i_flag, and push just one timestamp update.
 *	  There is also the risk that another thread calls ITIMES or
 *	  ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq,
 *	  this will cause ufs_imark to change the timestamps without changing
 *	  i_seq. If the controlling lock is dropped, ISEQ must be set to
 *	  force i_seq to be increased on next ufs_imark, but i_seq MUST still
 *	  be increased by the original setting thread before its deferred
 *	  call to ITIMES to insure it is increased the correct number of times.
 */

#define	UID_LONG  (o_uid_t)65535
				/* flag value to indicate uid is 32-bit long */
#define	GID_LONG  (o_uid_t)65535
				/* flag value to indicate gid is 32-bit long */

#define	NDADDR	12		/* direct addresses in inode */
#define	NIADDR	3		/* indirect addresses in inode */
#define	FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t)
				/* max fast symbolic name length is 56 */

#define	i_fs	i_ufsvfs->vfs_bufp->b_un.b_fs
#define	i_vfs	i_vnode->v_vfsp

struct 	icommon {
	o_mode_t ic_smode;	/*  0: mode and type of file */
	short	ic_nlink;	/*  2: number of links to file */
	o_uid_t	ic_suid;	/*  4: owner's user id */
	o_gid_t	ic_sgid;	/*  6: owner's group id */
	u_offset_t ic_lsize;	/*  8: number of bytes in file */
#ifdef _KERNEL
	struct timeval32 ic_atime;	/* 16: time last accessed */
	struct timeval32 ic_mtime;	/* 24: time last modified */
	struct timeval32 ic_ctime;	/* 32: last time inode changed */
#else
	time32_t ic_atime;	/* 16: time last accessed */
	int32_t	ic_atspare;
	time32_t ic_mtime;	/* 24: time last modified */
	int32_t	ic_mtspare;
	time32_t ic_ctime;	/* 32: last time inode changed */
	int32_t	ic_ctspare;
#endif
	daddr32_t	ic_db[NDADDR];	/* 40: disk block addresses */
	daddr32_t	ic_ib[NIADDR];	/* 88: indirect blocks */
	int32_t	ic_flags;	/* 100: cflags */
	int32_t	ic_blocks;	/* 104: 512 byte blocks actually held */
	int32_t	ic_gen;		/* 108: generation number */
	int32_t	ic_shadow;	/* 112: shadow inode */
	uid_t	ic_uid;		/* 116: long EFT version of uid */
	gid_t	ic_gid;		/* 120: long EFT version of gid */
	uint32_t ic_oeftflag;	/* 124: extended attr directory ino, 0 = none */
};

/*
 * Large directories can be cached. Directory caching can take the following
 * states:
 */
typedef enum {
	CD_DISABLED_NOMEM = -2,
	CD_DISABLED_TOOBIG,
	CD_DISABLED,
	CD_ENABLED
} cachedir_t;

/*
 * Large Files: Note we use the inline functions load_double, store_double
 * to load and store the long long values of i_size. Therefore the
 * address of i_size must be eight byte aligned. Kmem_alloc of incore
 * inode structure makes sure that the structure is 8-byte aligned.
 * XX64 - reorder this structure?
 */
typedef struct inode {
	struct	inode *i_chain[2];	/* must be first */
	struct inode *i_freef;	/* free list forward - must be before i_ic */
	struct inode *i_freeb;	/* free list back - must be before i_ic */
	struct 	icommon	i_ic;	/* Must be here */
	struct	vnode *i_vnode;	/* vnode associated with this inode */
	struct	vnode *i_devvp;	/* vnode for block I/O */
	dev_t	i_dev;		/* device where inode resides */
	ino_t	i_number;	/* i number, 1-to-1 with device address */
	off_t	i_diroff;	/* offset in dir, where we found last entry */
				/* just a hint - no locking needed */
	struct ufsvfs *i_ufsvfs; /* incore fs associated with inode */
	struct	dquot *i_dquot;	/* quota structure controlling this file */
	krwlock_t i_rwlock;	/* serializes write/setattr requests */
	krwlock_t i_contents;	/* protects (most of) inode contents */
	kmutex_t i_tlock;	/* protects time fields, i_flag */
	offset_t i_nextr;	/*					*/
				/* next byte read offset (read-ahead)	*/
				/*   No lock required			*/
				/*					*/
	uint_t	i_flag;		/* inode flags */
	uint_t	i_seq;		/* modification sequence number */
	cachedir_t i_cachedir;	/* Cache this directory on next lookup */
				/* - no locking needed  */
	long	i_mapcnt;	/* mappings to file pages */
	int	*i_map;		/* block list for the corresponding file */
	dev_t	i_rdev;		/* INCORE rdev from i_oldrdev by ufs_iget */
	size_t	i_delaylen;	/* delayed writes, units=bytes */
	offset_t i_delayoff;	/* where we started delaying */
	offset_t i_nextrio;	/* where to start the next clust */
	long	i_writes;	/* number of outstanding bytes in write q */
	kcondvar_t i_wrcv;	/* sleep/wakeup for write throttle */
	offset_t i_doff;	/* dinode byte offset in file system */
	si_t *i_ufs_acl;	/* pointer to acl entry */
	dcanchor_t i_danchor;	/* directory cache anchor */
	kthread_t *i_writer;	/* thread which is in window in wrip() */
} inode_t;

struct dinode {
	union {
		struct	icommon di_icom;
		char	di_size[128];
	} di_un;
};

#define	i_mode		i_ic.ic_smode
#define	i_nlink		i_ic.ic_nlink
#define	i_uid		i_ic.ic_uid
#define	i_gid		i_ic.ic_gid
#define	i_smode		i_ic.ic_smode
#define	i_suid		i_ic.ic_suid
#define	i_sgid		i_ic.ic_sgid

#define	i_size		i_ic.ic_lsize
#define	i_db		i_ic.ic_db
#define	i_ib		i_ic.ic_ib

#define	i_atime		i_ic.ic_atime
#define	i_mtime		i_ic.ic_mtime
#define	i_ctime		i_ic.ic_ctime

#define	i_shadow	i_ic.ic_shadow
#define	i_oeftflag	i_ic.ic_oeftflag
#define	i_blocks	i_ic.ic_blocks
#define	i_cflags	i_ic.ic_flags
#ifdef _LITTLE_ENDIAN
/*
 * Originally done on x86, but carried on to all other little
 * architectures, which provides for file system compatibility.
 */
#define	i_ordev		i_ic.ic_db[1]	/* USL SVR4 compatibility */
#else
#define	i_ordev		i_ic.ic_db[0]	/* was i_oldrdev */
#endif
#define	i_gen		i_ic.ic_gen
#define	i_forw		i_chain[0]
#define	i_back		i_chain[1]

/* EFT transition aids - obsolete */
#define	oEFT_MAGIC	0x90909090
#define	di_oeftflag	di_ic.ic_oeftflag

#define	di_ic		di_un.di_icom
#define	di_mode		di_ic.ic_smode
#define	di_nlink	di_ic.ic_nlink
#define	di_uid		di_ic.ic_uid
#define	di_gid		di_ic.ic_gid
#define	di_smode	di_ic.ic_smode
#define	di_suid		di_ic.ic_suid
#define	di_sgid		di_ic.ic_sgid

#define	di_size		di_ic.ic_lsize
#define	di_db		di_ic.ic_db
#define	di_ib		di_ic.ic_ib

#define	di_atime	di_ic.ic_atime
#define	di_mtime	di_ic.ic_mtime
#define	di_ctime	di_ic.ic_ctime
#define	di_cflags	di_ic.ic_flags

#ifdef _LITTLE_ENDIAN
#define	di_ordev	di_ic.ic_db[1]
#else
#define	di_ordev	di_ic.ic_db[0]
#endif
#define	di_shadow	di_ic.ic_shadow
#define	di_blocks	di_ic.ic_blocks
#define	di_gen		di_ic.ic_gen

/* flags */
#define	IUPD		0x0001		/* file has been modified */
#define	IACC		0x0002		/* inode access time to be updated */
#define	IMOD		0x0004		/* inode has been modified */
#define	ICHG		0x0008		/* inode has been changed */
#define	INOACC		0x0010		/* no access time update in getpage */
#define	IMODTIME	0x0020		/* mod time already set */
#define	IREF		0x0040		/* inode is being referenced */
#define	ISYNC		0x0080		/* do all allocation synchronously */
#define	IFASTSYMLNK	0x0100		/* fast symbolic link */
#define	IMODACC		0x0200		/* only access time changed; */
					/*   filesystem won't become active */
#define	IATTCHG		0x0400		/* only size/blocks have changed */
#define	IBDWRITE	0x0800		/* the inode has been scheduled for */
					/* write operation asynchronously */
#define	ISTALE		0x1000		/* inode couldn't be read from disk */
#define	IDEL		0x2000		/* inode is being deleted */
#define	IDIRECTIO	0x4000		/* attempt directio */
#define	ISEQ		0x8000		/* deferred i_seq increase */
#define	IJUNKIQ		0x10000		/* on junk idle queue */
#define	IQUIET		0x20000		/* No file system full messages */

/* cflags */
#define	IXATTR		0x0001		/* extended attribute */
#define	IFALLOCATE	0x0002		/* fallocate'd file */
#define	ICOMPRESS	0x0004		/* compressed for dcfs - see */
					/*   `ufs_ioctl()`_FIO_COMPRESSED */

/* modes */
#define	IFMT		0170000		/* type of file */
#define	IFIFO		0010000		/* named pipe (fifo) */
#define	IFCHR		0020000		/* character special */
#define	IFDIR		0040000		/* directory */
#define	IFBLK		0060000		/* block special */
#define	IFREG		0100000		/* regular */
#define	IFLNK		0120000		/* symbolic link */
#define	IFSHAD		0130000		/* shadow indode */
#define	IFSOCK		0140000		/* socket */
#define	IFATTRDIR	0160000		/* Attribute directory */

#define	ISUID		04000		/* set user id on execution */
#define	ISGID		02000		/* set group id on execution */
#define	ISVTX		01000		/* save swapped text even after use */
#define	IREAD		0400		/* read, write, execute permissions */
#define	IWRITE		0200
#define	IEXEC		0100

/* specify how the inode info is written in ufs_syncip() */
#define	I_SYNC		1		/* wait for the inode written to disk */
#define	I_DSYNC		2		/* wait for the inode written to disk */
					/* only if IATTCHG is set */
#define	I_ASYNC		0		/* don't wait for the inode written */

/* flags passed to ufs_itrunc(), indirtrunc(), and free() */
#define	I_FREE	0x00000001		/* inode is being freed */
#define	I_DIR	0x00000002		/* inode is a directory */
#define	I_IBLK	0x00000004		/* indirect block */
#define	I_CHEAP	0x00000008		/* cheap free */
#define	I_SHAD	0x00000010		/* inode is a shadow inode */
#define	I_QUOTA	0x00000020		/* quota file */
#define	I_NOCANCEL	0x40		/* Don't cancel these fragments */
#define	I_ACCT	0x00000080		/* Update ufsvfs' unreclaimed_blocks */

/*
 * If ufs_dircheckforname() fails to find an entry with the given name,
 * this "slot" structure holds state for ufs_direnter_*() as to where
 * there is space to put an entry with that name.
 * If ufs_dircheckforname() finds an entry with the given name, this structure
 * holds state for ufs_dirrename() and ufs_dirremove() as to where the
 * entry is. "status" indicates what ufs_dircheckforname() found:
 *      NONE            name not found, large enough free slot not found,
 *      FOUND           name not found, large enough free slot found
 *      EXIST           name found
 * If ufs_dircheckforname() fails due to an error, this structure is not
 * filled in.
 *
 * After ufs_dircheckforname() succeeds the values are:
 *      status  offset          size            fbp, ep
 *      ------  ------          ----            -------
 *      NONE    end of dir      needed          not valid
 *      FOUND   start of entry  of ent          both valid if fbp != NULL
 *      EXIST   start of entry  of prev ent     valid
 *
 * "endoff" is set to 0 if the an entry with the given name is found, or if no
 * free slot could be found or made; this means that the directory should not
 * be truncated.  If the entry was found, the search terminates so
 * ufs_dircheckforname() didn't find out where the last valid entry in the
 * directory was, so it doesn't know where to cut the directory off; if no free
 * slot could be found or made, the directory has to be extended to make room
 * for the new entry, so there's nothing to cut off.
 * Otherwise, "endoff" is set to the larger of the offset of the last
 * non-empty entry in the directory, or the offset at which the new entry will
 * be placed, whichever is larger.  This is used by ufs_diraddentry(); if a new
 * entry is to be added to the directory, any complete directory blocks at the
 * end of the directory that contain no non-empty entries are lopped off the
 * end, thus shrinking the directory dynamically.
 */
typedef enum {NONE, FOUND, EXIST} slotstat_t;
struct ufs_slot {
	struct	direct *ep;	/* pointer to slot */
	struct	fbuf *fbp;	/* dir buf where slot is */
	off_t	offset;		/* offset of area with free space */
	off_t	endoff;		/* last useful location found in search */
	slotstat_t status;	/* status of slot */
	int	size;		/* size of area at slotoffset */
	int	cached;		/* cached directory */
};

/*
 * Statistics on inodes
 * Not protected by locks
 */
struct instats {
	kstat_named_t in_size;		/* current cache size */
	kstat_named_t in_maxsize;	/* maximum cache size */
	kstat_named_t in_hits;		/* cache hits */
	kstat_named_t in_misses;	/* cache misses */
	kstat_named_t in_malloc;	/* kmem_alloce'd */
	kstat_named_t in_mfree;		/* kmem_free'd */
	kstat_named_t in_maxreached;	/* Largest size reached by cache */
	kstat_named_t in_frfront;	/* # put at front of freelist */
	kstat_named_t in_frback;	/* # put at back of freelist */
	kstat_named_t in_qfree;		/* q's to delete thread */
	kstat_named_t in_scan;		/* # inodes scanned */
	kstat_named_t in_tidles;	/* # inodes idled by idle thread */
	kstat_named_t in_lidles;	/* # inodes idled by ufs_lookup */
	kstat_named_t in_vidles;	/* # inodes idled by ufs_vget */
	kstat_named_t in_kcalloc;	/* # inodes kmem_cache_alloced */
	kstat_named_t in_kcfree;	/* # inodes kmem_cache_freed */
	kstat_named_t in_poc;		/* # push-on-close's */
};

#ifdef _KERNEL

/*
 * Extended attributes
 */

#define	XATTR_DIR_NAME	"/@/"
extern int	ufs_ninode;		/* high-water mark for inode cache */

extern struct vnodeops *ufs_vnodeops;	/* vnode operations for ufs */
extern const struct fs_operation_def ufs_vnodeops_template[];

/*
 * Convert between inode pointers and vnode pointers
 */
#define	VTOI(VP)	((struct inode *)(VP)->v_data)
#define	ITOV(IP)	((struct vnode *)(IP)->i_vnode)

/*
 * convert to fs
 */
#define	ITOF(IP)	((struct fs *)(IP)->i_fs)

/*
 * Convert between vnode types and inode formats
 */
extern enum vtype	iftovt_tab[];

#ifdef notneeded

/* Look at sys/mode.h and os/vnode.c */

extern int		vttoif_tab[];

#endif

/*
 * Mark an inode with the current (unique) timestamp.
 * (Note that UFS's concept of time only keeps 32 bits of seconds
 * in the on-disk format).
 */
struct timeval32 iuniqtime;
extern kmutex_t ufs_iuniqtime_lock;

#define	ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip)

#define	ITIMES(ip) { \
	mutex_enter(&(ip)->i_tlock); \
	ITIMES_NOLOCK(ip); \
	mutex_exit(&(ip)->i_tlock); \
}

/*
 * The following interfaces are used to do atomic loads and stores
 * of an inode's i_size, which is a long long data type.
 *
 * For LP64, we just to a load or a store - atomicity and alignment
 * are 8-byte guaranteed.  For x86 there are no such instructions,
 * so we grab i_contents as reader to get the size; we already hold
 * it as writer when we're setting the size.
 */

#ifdef _LP64

#define	UFS_GET_ISIZE(resultp, ip)	*(resultp) = (ip)->i_size
#define	UFS_SET_ISIZE(value, ip)	(ip)->i_size = (value)

#else	/* _LP64 */

#define	UFS_GET_ISIZE(resultp, ip)				\
	{							\
		rw_enter(&(ip)->i_contents, RW_READER);		\
		*(resultp) = (ip)->i_size;			\
		rw_exit(&(ip)->i_contents);			\
	}
#define	UFS_SET_ISIZE(value, ip)				\
	{							\
		ASSERT(RW_WRITE_HELD(&(ip)->i_contents));	\
		(ip)->i_size = (value);				\
	}

#endif	/* _LP64 */

/*
 * Allocate the specified block in the inode
 * and make sure any in-core pages are initialized.
 */
#define	BMAPALLOC(ip, off, size, cr) \
	bmap_write((ip), (u_offset_t)(off), (size), BI_NORMAL, NULL, cr)

#define	ESAME	(-1)		/* trying to rename linked files (special) */

#define	UFS_HOLE	(daddr32_t)-1	/* value used when no block allocated */

/*
 * enums
 */

/* direnter ops */
enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME, DE_SYMLINK, DE_ATTRDIR};

/* dirremove ops */
enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME };

/*
 * block initialization type for bmap_write
 *
 * BI_NORMAL - allocate and zero fill pages in memory
 * BI_ALLOC_ONLY - only allocate the block, do not zero out pages in mem
 * BI_FALLOCATE - allocate only, do not zero out pages, and store as negative
 *                block number in inode block list
 */
enum bi_type { BI_NORMAL, BI_ALLOC_ONLY, BI_FALLOCATE };

/*
 * This overlays the fid structure (see vfs.h)
 *
 * LP64 note: we use int32_t instead of ino_t since UFS does not use
 * inode numbers larger than 32-bits and ufid's are passed to NFS
 * which expects them to not grow in size beyond 10 bytes (12 including
 * the length).
 */
struct ufid {
	ushort_t ufid_len;
	ushort_t ufid_flags;
	int32_t	ufid_ino;
	int32_t	ufid_gen;
};

/*
 * each ufs thread (see ufs_thread.c) is managed by this struct
 */
struct ufs_q {
	union uq_head {
		void		*_uq_generic;	/* first entry on q */
		struct inode	*_uq_i;
		ufs_failure_t	*_uq_uf;
	} _uq_head;
	int		uq_ne;		/* # of entries/failures found */
	int		uq_lowat;	/* thread runs when ne == lowat */
	int		uq_hiwat;	/* synchronous idle if ne >= hiwat */
	ushort_t	uq_flags;	/* flags (see below) */
	kcondvar_t	uq_cv;		/* for sleep/wakeup */
	kthread_id_t	uq_threadp;	/* thread managing this q */
	kmutex_t	uq_mutex;	/* protects this struct */
};

#define	uq_head		_uq_head._uq_generic
#define	uq_ihead	_uq_head._uq_i
#define	uq_ufhead	_uq_head._uq_uf

/*
 * uq_flags
 */
#define	UQ_EXIT		(0x0001)	/* q server exits at its convenience */
#define	UQ_WAIT		(0x0002)	/* thread is waiting on q server */
#define	UQ_SUSPEND	(0x0004)	/* request for suspension */
#define	UQ_SUSPENDED	(0x0008)	/* thread has suspended itself */

/*
 * When logging is enabled, statvfs must account for blocks and files that
 * may be on the delete queue.  Protected by ufsvfsp->vfs_delete.uq_mutex
 */
struct ufs_delq_info {
	u_offset_t	delq_unreclaimed_blocks;
	ulong_t		delq_unreclaimed_files;
};


/*
 * global idle queues
 * The queues are sized dynamically in proportion to ufs_ninode
 * which, unless overridden, scales with the amount of memory.
 * The idle queue is halved whenever it hits the low water mark
 * (1/4 of ufs_ninode), but can burst to sizes much larger. The number
 * of hash queues is currently maintained to give on average IQHASHQLEN
 * entries when the idle queue is at the low water mark.
 * Note, we do not need to search along the hash queues, but use them
 * in order to batch together geographically local inodes to allow
 * their updates (via the log or buffer cache) to require less disk seeks.
 * This gives an incredible performance boost for logging and a boost for
 * non logging file systems.
 */
typedef struct {
	inode_t *i_chain[2];	/* must match inode_t, but unused */
	inode_t *i_freef;	/* must match inode_t, idle list forward */
	inode_t *i_freeb;	/* must match inode_t, idle list back  */
} iqhead_t;

extern struct ufs_q ufs_idle_q;		/* used by global ufs idle thread */
extern iqhead_t *ufs_junk_iq;		/* junk idle queues */
extern iqhead_t *ufs_useful_iq;		/* useful idle queues */
extern int ufs_njunk_iq;		/* number of entries in junk iq */
extern int ufs_nuseful_iq;		/* number of entries in useful iq */
extern int ufs_niqhash;			/* number of iq hash qs - power of 2 */
extern int ufs_iqhashmask;		/* iq hash mask = ufs_niqhash - 1 */

#define	IQHASHQLEN 32			/* see comments above */
#define	INOCGSHIFT 7			/* 128 inodes per cylinder group */
#define	IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask)
#define	IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */

extern struct ufs_q	ufs_hlock;	/* used by global ufs hlock thread */

/*
 * vfs_lfflags flags
 */
#define	UFS_LARGEFILES	((ushort_t)0x1)	/* set if mount allows largefiles */

/*
 * vfs_dfritime flags
 */
#define	UFS_DFRATIME	0x1		/* deferred access time */

/*
 * UFS VFS private data.
 *
 * UFS file system instances may be linked on several lists.
 *
 * -	The vfs_next field chains together every extant ufs instance; this
 *	list is rooted at ufs_instances and should be used in preference to
 *	the overall vfs list (which is properly the province of the generic
 *	file system code, not of file system implementations).  This same list
 *	link is used during forcible unmounts to chain together instances that
 *	can't yet be completely dismantled,
 *
 * -	The vfs_wnext field is used within ufs_update to form a work list of
 *	UFS instances to be synced out.
 */
typedef struct ufsvfs {
	struct vfs	*vfs_vfs;	/* back link			*/
	struct ufsvfs	*vfs_next;	/* instance list link		*/
	struct ufsvfs	*vfs_wnext;	/* work list link		*/
	struct vnode	*vfs_root;	/* root vnode			*/
	struct buf	*vfs_bufp;	/* buffer containing superblock */
	struct vnode	*vfs_devvp;	/* block device vnode		*/
	ushort_t	vfs_lfflags;	/* Large files (set by mount)   */
	ushort_t	vfs_qflags;	/* QUOTA: filesystem flags	*/
	struct inode	*vfs_qinod;	/* QUOTA: pointer to quota file */
	uint_t		vfs_btimelimit;	/* QUOTA: block time limit	*/
	uint_t		vfs_ftimelimit;	/* QUOTA: file time limit	*/
	krwlock_t	vfs_dqrwlock;	/* QUOTA: protects quota fields */
	/*
	 * some fs local threads
	 */
	struct ufs_q	vfs_delete;	/* delayed inode delete */
	struct ufs_q	vfs_reclaim;	/* reclaim open, deleted files */

	/*
	 * This is copied from the super block at mount time.
	 */
	int		vfs_nrpos;	/* # rotational positions */
	/*
	 * This lock protects cg's and super block pointed at by
	 * vfs_bufp->b_fs.  Locks contents of fs and cg's and contents
	 * of vfs_dio.
	 */
	kmutex_t	vfs_lock;
	struct ulockfs	vfs_ulockfs;	/* ufs lockfs support */
	uint_t		vfs_dio;	/* delayed io (_FIODIO) */
	uint_t		vfs_nointr;	/* disallow lockfs interrupts */
	uint_t		vfs_nosetsec;	/* disallow ufs_setsecattr */
	uint_t		vfs_syncdir;	/* synchronous local directory ops */
	uint_t		vfs_dontblock;	/* don't block on forced umount */

	/*
	 * trans (logging ufs) stuff
	 */
	uint_t		vfs_domatamap;	/* set if matamap enabled */
	ulong_t		vfs_maxacl;	/* transaction stuff - max acl size */
	ulong_t		vfs_dirsize;	/* logspace for directory creation */
	ulong_t		vfs_avgbfree;	/* average free blks in cg (blkpref) */
	/*
	 * Some useful constants
	 */
	int	vfs_nindirshift;	/* calc. from fs_nindir */
	int	vfs_nindiroffset;	/* calc. from fs_ninidr */
	int	vfs_ioclustsz;		/* bytes in read/write cluster */
	int	vfs_iotransz;		/* max device i/o transfer size  */

	vfs_ufsfx_t	vfs_fsfx;	/* lock/fix-on-panic support */
	/*
	 * More useful constants
	 */
	int	vfs_minfrags;		/* calc. from fs_minfree */
	/*
	 * Force DirectIO on all files
	 */
	uint_t	vfs_forcedirectio;
	/*
	 * Deferred inode time related fields
	 */
	clock_t		vfs_iotstamp;	/* last I/O timestamp */
	uint_t		vfs_dfritime;	/* deferred inode time flags */
	/*
	 * Some more useful info
	 */
	dev_t		vfs_dev;	/* device mounted from */
	struct ml_unit	*vfs_log;	/* pointer to embedded log struct */
	uint_t		vfs_noatime;    /* disable inode atime updates */
	/*
	 * snapshot stuff
	 */
	void		*vfs_snapshot;	/* snapshot handle */
	/*
	 *  Controls logging "file system full" messages to messages file
	 */
	clock_t		vfs_lastwhinetime;

	int 		vfs_nolog_si;	/* not logging summary info */
	int		vfs_validfs;	/* indicates mounted fs */

	/*
	 * Additional information about vfs_delete above
	 */
	struct ufs_delq_info vfs_delete_info; /* what's on the delete queue */
} ufsvfs_t;

#define	vfs_fs	vfs_bufp->b_un.b_fs

/*
 * values for vfs_validfs
 */
#define	UT_UNMOUNTED	0
#define	UT_MOUNTED	1
#define	UT_HLOCKING	2

/* inohsz is guaranteed to be a power of 2 */
#define	INOHASH(ino)	(((int)ino) & (inohsz - 1))

#define	ISFALLOCBLK(ip, bn)	\
	(((bn) < 0) && ((bn) % ip->i_fs->fs_frag == 0) && \
	((ip)->i_cflags & IFALLOCATE && (bn) != UFS_HOLE))

union ihead {
	union	ihead	*ih_head[2];
	struct	inode	*ih_chain[2];
};

extern	union	ihead	*ihead;
extern  kmutex_t	*ih_lock;
extern  int	*ih_ne;
extern	int	inohsz;

extern	clock_t	ufs_iowait;

#endif /* _KERNEL */

/*
 * ufs function prototypes
 */
#if defined(_KERNEL) && !defined(_BOOT)

extern	void	ufs_iinit(void);
extern	int	ufs_iget(struct vfs *, ino_t, struct inode **, cred_t *);
extern	int	ufs_iget_alloced(struct vfs *, ino_t, struct inode **,
    cred_t *);
extern	void	ufs_reset_vnode(vnode_t *);
extern	void	ufs_iinactive(struct inode *);
extern	void	ufs_iupdat(struct inode *, int);
extern	int	ufs_rmidle(struct inode *);
extern	int	ufs_itrunc(struct inode *, u_offset_t, int, cred_t *);
extern	int	ufs_iaccess(struct inode *, int, cred_t *, int);
extern  int	rdip(struct inode *, struct uio *, int, struct cred *);
extern  int	wrip(struct inode *, struct uio *, int, struct cred *);

extern void	ufs_imark(struct inode *);
extern void	ufs_itimes_nolock(struct inode *);

extern	int	ufs_diraccess(struct inode *, int, struct cred *);
extern	int	ufs_dirlook(struct inode *, char *, struct inode **,
    cred_t *, int, int);
extern	int	ufs_direnter_cm(struct inode *, char *, enum de_op,
    struct vattr *, struct inode **, cred_t *, int);
extern	int	ufs_direnter_lr(struct inode *, char *, enum de_op,
    struct inode *, struct inode *, cred_t *);
extern	int	ufs_dircheckpath(ino_t, struct inode *, struct inode *,
    struct cred *);
extern	int	ufs_dirmakeinode(struct inode *, struct inode **,
    struct vattr *, enum de_op, cred_t *);
extern	int	ufs_dirremove(struct inode *, char *, struct inode *,
    vnode_t *, enum dr_op, cred_t *);
extern  int	ufs_dircheckforname(struct inode *, char *, int,
    struct ufs_slot *, struct inode **, struct cred *, int);
extern	int	ufs_xattrdirempty(struct inode *, ino_t, cred_t *);
extern	int	blkatoff(struct inode *, off_t, char **, struct fbuf **);

extern	void	sbupdate(struct vfs *);

extern	int	ufs_ialloc(struct inode *, ino_t, mode_t, struct inode **,
    cred_t *);
extern	void	ufs_ifree(struct inode *, ino_t, mode_t);
extern	void	free(struct inode *, daddr_t, off_t, int);
extern	int	alloc(struct inode *, daddr_t, int, daddr_t *, cred_t *);
extern	int	realloccg(struct inode *, daddr_t, daddr_t, int, int,
    daddr_t *, cred_t *);
extern	int	ufs_allocsp(struct vnode *, struct flock64 *, cred_t *);
extern	int	ufs_freesp(struct vnode *, struct flock64 *, int, cred_t *);
extern	ino_t	dirpref(inode_t *);
extern	daddr_t	blkpref(struct inode *, daddr_t, int, daddr32_t *);
extern	daddr_t	contigpref(ufsvfs_t *, size_t, size_t);

extern	int	ufs_rdwri(enum uio_rw, int, struct inode *, caddr_t, ssize_t,
	offset_t, enum uio_seg, int *, cred_t *);

extern	int	bmap_read(struct inode *, u_offset_t, daddr_t *, int *);
extern	int	bmap_write(struct inode *, u_offset_t, int, enum bi_type,
    daddr_t *, struct cred *);
extern	int	bmap_has_holes(struct inode *);
extern	int	bmap_find(struct inode *, boolean_t, u_offset_t *);
extern	int	bmap_set_bn(struct vnode *, u_offset_t, daddr32_t);

extern	void	ufs_vfs_add(struct ufsvfs *);
extern	void	ufs_vfs_remove(struct ufsvfs *);

extern	void	ufs_sbwrite(struct ufsvfs *);
extern	void	ufs_update(int);
extern	int	ufs_getsummaryinfo(dev_t, struct ufsvfs *, struct fs *);
extern	int	ufs_putsummaryinfo(dev_t, struct ufsvfs *, struct fs *);
extern	int	ufs_syncip(struct inode *, int, int, top_t);
extern	int	ufs_sync_indir(struct inode *);
extern	int	ufs_indirblk_sync(struct inode *, offset_t);
extern	int	ufs_badblock(struct inode *, daddr_t);
extern	int	ufs_indir_badblock(struct inode *, daddr32_t *);
extern	void	ufs_notclean(struct ufsvfs *);
extern	void	ufs_checkclean(struct vfs *);
extern	int	isblock(struct fs *, uchar_t *, daddr_t);
extern	void	setblock(struct fs *, uchar_t *, daddr_t);
extern	void	clrblock(struct fs *, uchar_t *, daddr_t);
extern	int	isclrblock(struct fs *, uchar_t *, daddr_t);
extern	void	fragacct(struct fs *, int, int32_t *, int);
extern	int	skpc(char, uint_t, char *);
extern	int	ufs_fbwrite(struct fbuf *, struct inode *);
extern	int	ufs_fbiwrite(struct fbuf *, struct inode *, daddr_t, long);
extern	int	ufs_putapage(struct vnode *, struct page *, u_offset_t *,
				size_t *, int, struct cred *);
extern inode_t	*ufs_alloc_inode(ufsvfs_t *, ino_t);
extern void	ufs_free_inode(inode_t *);

/*
 * special stuff
 */
extern	void	ufs_setreclaim(struct inode *);
extern	int	ufs_scan_inodes(int, int (*)(struct inode *, void *), void *,
				struct ufsvfs *);
extern	int	ufs_sync_inode(struct inode *, void *);
extern	int	ufs_sticky_remove_access(struct inode *, struct inode *,
    struct cred *);
/*
 * quota
 */
extern	int	chkiq(struct ufsvfs *, int, struct inode *, uid_t, int,
			struct cred *, char **errp, size_t *lenp);

/*
 * ufs thread stuff
 */
extern	void	ufs_thread_delete(struct vfs *);
extern	void	ufs_delete_drain(struct vfs *, int, int);
extern	void	ufs_delete(struct ufsvfs *, struct inode *, int);
extern	void	ufs_inode_cache_reclaim(void *);
extern	void	ufs_idle_drain(struct vfs *);
extern	void	ufs_idle_some(int);
extern	void	ufs_thread_idle(void);
extern	void	ufs_thread_reclaim(struct vfs *);
extern	void	ufs_thread_init(struct ufs_q *, int);
extern	void	ufs_thread_start(struct ufs_q *, void (*)(), struct vfs *);
extern	void	ufs_thread_exit(struct ufs_q *);
extern	void	ufs_thread_suspend(struct ufs_q *);
extern	void	ufs_thread_continue(struct ufs_q *);
extern	void	ufs_thread_hlock(void *);
extern	void	ufs_delete_init(struct ufsvfs *, int);
extern	void	ufs_delete_adjust_stats(struct ufsvfs *, struct statvfs64 *);
extern	void	ufs_delete_drain_wait(struct ufsvfs *, int);

/*
 * ufs lockfs stuff
 */
struct seg;
extern int ufs_reconcile_fs(struct vfs *, struct ufsvfs *, int);
extern int ufs_quiesce(struct ulockfs *);
extern int ufs_flush(struct vfs *);
extern int ufs_fiolfs(struct vnode *, struct lockfs *, int);
extern int ufs__fiolfs(struct vnode *, struct lockfs *, int, int);
extern int ufs_fiolfss(struct vnode *, struct lockfs *);
extern int ufs_fioffs(struct vnode *, char *, struct cred *);
extern int ufs_check_lockfs(struct ufsvfs *, struct ulockfs *, ulong_t);
extern int ufs_lockfs_begin(struct ufsvfs *, struct ulockfs **, ulong_t);
extern int ufs_lockfs_trybegin(struct ufsvfs *, struct ulockfs **, ulong_t);
extern int ufs_lockfs_begin_getpage(struct ufsvfs *, struct ulockfs **,
		struct seg *, int, uint_t *);
extern void ufs_lockfs_end(struct ulockfs *);
/*
 * ufs acl stuff
 */
extern int ufs_si_inherit(struct inode *, struct inode *, o_mode_t, cred_t *);
extern void si_cache_init(void);
extern int ufs_si_load(struct inode *, cred_t *);
extern void ufs_si_del(struct inode *);
extern int ufs_acl_access(struct inode *, int, cred_t *);
extern void ufs_si_cache_flush(dev_t);
extern int ufs_si_free(si_t *, struct vfs *, cred_t *);
extern int ufs_acl_setattr(struct inode *, struct vattr *, cred_t *);
extern int ufs_acl_get(struct inode *, vsecattr_t *, int, cred_t *);
extern int ufs_acl_set(struct inode *, vsecattr_t *, int, cred_t *);
/*
 * ufs directio stuff
 */
extern void ufs_directio_init();
extern int ufs_directio_write(struct inode *, uio_t *, int, int, cred_t *,
    int *);
extern int ufs_directio_read(struct inode *, uio_t *, cred_t *, int *);
#define	DIRECTIO_FAILURE	(0)
#define	DIRECTIO_SUCCESS	(1)

/*
 * ufs extensions for PXFS
 */

int ufs_rdwr_data(vnode_t *vp, u_offset_t offset, size_t len, fdbuffer_t *fdb,
    int flags, cred_t *cr);
int ufs_alloc_data(vnode_t *vp, u_offset_t offset, size_t *len, fdbuffer_t *fdb,
    int flags, cred_t *cr);

/*
 * prototypes to support the forced unmount
 */

void ufs_freeze(struct ulockfs *, struct lockfs *);
int ufs_thaw(struct vfs *, struct ufsvfs *, struct ulockfs *);

/*
 * extended attributes
 */

int ufs_xattrmkdir(inode_t *, inode_t **, int, struct cred *);
int ufs_xattr_getattrdir(vnode_t *, inode_t **, int, struct cred *);
void ufs_unhook_shadow(inode_t *, inode_t *);

#endif	/* defined(_KERNEL) && !defined(_BOOT) */

#ifdef	__cplusplus
}
#endif

#endif	/* _SYS_FS_UFS_INODE_H */