summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/lvm/md_mddb.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/sys/lvm/md_mddb.h')
-rw-r--r--usr/src/uts/common/sys/lvm/md_mddb.h913
1 files changed, 913 insertions, 0 deletions
diff --git a/usr/src/uts/common/sys/lvm/md_mddb.h b/usr/src/uts/common/sys/lvm/md_mddb.h
new file mode 100644
index 0000000000..45f023b26d
--- /dev/null
+++ b/usr/src/uts/common/sys/lvm/md_mddb.h
@@ -0,0 +1,913 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_MD_MDDB_H
+#define _SYS_MD_MDDB_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/buf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if 0 /* DRP FOR DEBUGGING */
+#define MDDB_FAKE
+#endif
+
+/* Private flags */
+#define MD_PRV_GOTIT 0x0001 /* Been snarfed */
+#define MD_PRV_DELETE 0x0002 /* Record pending to be deleted */
+#define MD_PRV_COMMIT 0x0004 /* Record pending to be commited */
+#define MD_PRV_CLEANUP 0x0008 /* Record pending to be cleaned up */
+#define MD_PRV_CONVD 0x0010 /* Record has been converted (32->64) */
+#define MD_PRV_PENDDEL (MD_PRV_GOTIT | MD_PRV_DELETE)
+#define MD_PRV_PENDCOM (MD_PRV_GOTIT | MD_PRV_COMMIT)
+#define MD_PRV_PENDCLEAN (MD_PRV_GOTIT | MD_PRV_CLEANUP)
+
+
+#define MDDB_E_INVALID (-1) /* an invalid argument was passed */
+#define MDDB_E_EXISTS (-2) /* doing an operation a 2nd time which can */
+ /* only be done once */
+#define MDDB_E_MASTER (-3) /* problem occurred accessing mastor block */
+ /* returned from NEW_DEV */
+#define MDDB_E_TOOSMALL (-4) /* device is not large enough */
+#define MDDB_E_NORECORD (-5) /* record does not exits */
+ /*
+ * returned from: mddb_getnextrec
+ * mddb_getrecsize
+ * mddb_commitrec
+ * mddb_commitrecs
+ * mddb_deleterec
+ */
+#define MDDB_E_NOSPACE (-6) /* no space to create record */
+#define MDDB_E_NOTNOW (-7) /* do not presently have enough resources */
+ /* to perform requested operation */
+#define MDDB_E_NODB (-8) /* no database exist */
+#define MDDB_E_NOTOWNER (-9) /* have not been told to grab this set */
+#define MDDB_E_STALE (-10) /* database is stale */
+#define MDDB_E_TOOFEW (-11) /* not enough replicas available */
+#define MDDB_E_TAGDATA (-12) /* tagged data detected */
+#define MDDB_E_ACCOK (-13) /* 50/50 mode */
+#define MDDB_E_NTAGDATA (-14) /* tagop try, no tag data */
+#define MDDB_E_ACCNOTOK (-15) /* accop try, no accept possible */
+#define MDDB_E_NOLOCBLK (-16) /* No valid locators found */
+#define MDDB_E_NOLOCNMS (-17) /* No valid locator name information */
+#define MDDB_E_NODIRBLK (-18) /* No directory blocks found */
+#define MDDB_E_NOTAGREC (-19) /* No tag record blocks found */
+#define MDDB_E_NOTAG (-20) /* No matching tag record found */
+#define MDDB_E_NODEVID (-21) /* No device id found */
+
+#define MDDB_MINBLKS 16 /* enough for a few metadevices */
+#define MDDB_MAXBLKS 8192 /* size of free bit map (must be / 8) */
+#define MDDB_MN_MINBLKS 32768 /* Multinode metadb minimum size */
+ /* 16MB */
+#define MDDB_MN_MAXBLKS 524288 /* size of free bit map (must be / 8) */
+ /* 256MB */
+
+#define MDDB_C_STALE 0x0001
+#define MDDB_C_TOOFEW 0x0002
+#define MDDB_C_NOTOWNER 0x0004
+#define MDDB_C_SET_MN_STALE 0x0008 /* Set MN set to stale */
+#define MDDB_C_IMPORT 0x0010
+
+/*
+ * Defines used to set/reset new master flag in set structure.
+ * Used during reconfig cycle to determine quickly if there is
+ * new master for the set.
+ */
+#define MDDB_NM_SET 0x0001
+#define MDDB_NM_RESET 0x0002
+#define MDDB_NM_GET 0x0004
+
+/* Definitions of flag in Locator Block Device ID data area - mddb_did_info */
+#define MDDB_DID_EXISTS 0x0001 /* Device ID exists */
+#define MDDB_DID_VALID 0x0002 /* Device ID valid on current system */
+#define MDDB_DID_UPDATED 0x0004 /* locator/sidelocator info updated */
+
+/* Definitions of flag in Locator Block - mddb_lb */
+#define MDDB_DEVID_STYLE 0x0001 /* Locator Block in Device ID format */
+#define MDDB_MNSET 0x0002 /* MDDB is for a multi-node set */
+
+
+#define MDDB_MAX_PATCH 25 /* number of locations that */
+ /* can be patched in etc/system */
+
+/*
+ * Set struct used by all parts of the driver, to store anchor pointers.
+ */
+typedef struct md_set {
+ uint_t s_status; /* set status */
+ void **s_ui; /* set unit incore anchor */
+ void **s_un; /* set unit anchor */
+ void *s_hsp; /* set Hot Spare Pool anchor */
+ void *s_hs; /* set Hot Spare anchor */
+ void *s_db; /* set MDDB anchor */
+ kmutex_t s_dbmx; /* set MDDB mutex */
+ void *s_nm; /* set namespace anchor */
+ mddb_recid_t s_nmid; /* set namespace anchor record */
+ void *s_did_nm; /* set device id namespace anchor */
+ mddb_recid_t s_did_nmid; /* set device id namespace anchor rec */
+ void *s_dtp; /* set data tag rec */
+ int s_am_i_master; /* incore master flag for this node */
+ md_mn_nodeid_t s_nodeid; /* nodeid of this node - for MN sets */
+ uint_t s_rcnt; /* incore resync count for set */
+} md_set_t;
+
+
+#define MDDB_MAGIC_MB 0x6d646d62 /* magic number for master blocks */
+#define MDDB_MAGIC_DB 0x6d646462 /* magic number for directory blocks */
+#define MDDB_MAGIC_RB 0x6d647262 /* magic number for record blocks */
+#define MDDB_MAGIC_LB 0x6d646c62 /* magic number for locator blocks */
+#define MDDB_MAGIC_LN 0x6d646c6e /* magic number for locator names */
+#define MDDB_MAGIC_DT 0x6d646474 /* magic number for data tag */
+#define MDDB_MAGIC_DI 0x6d646469 /* magic number for device ID block */
+#define MDDB_MAGIC_DU 0x6d646475 /* magic num for dummy mb */
+#define MDDB_MAGIC_DE 0x6d646465 /* magic num for mb devid */
+
+#define MDDB_GLOBAL_XOR 1234567890
+
+#define MDDB_REV_MAJOR (uint_t)0xff00
+#define MDDB_REV_MINOR (uint_t)0x00ff
+
+/*
+ * MDDB_REV_MNMB:
+ * If a MN diskset, master block revision is set to MDDB_REV_MNMB.
+ * Even though the master block structure is no different
+ * for a MN set, setting the revision field to a different
+ * number keeps any pre-MN_diskset code from accessing
+ * this diskset. It also allows for an early determination
+ * of a MN diskset when reading in from disk so that the
+ * proper size locator block and locator names structure
+ * can be read in thus saving time on diskset startup.
+ * Since no change in master block structure, the MDDB_REV_MINOR
+ * portion of the revision was incremented.
+ *
+ * MDDB_REV_MNLB:
+ * If a MN diskset, the locator block structure is a different size in
+ * order to accomodate up to MD_MNMAXSIDES nodes in a diskset
+ * with any nodeid (sideno) allowed.
+ * The revision is set to MDDB_REV_MNLB which is a change of the
+ * MDDB_REV_MAJOR portion of the revision.
+ *
+ * MDDB_REV_MNLN:
+ * If a MN diskset, the locator names is a different size in
+ * order to accomodate up to MD_MNMAXSIDES nodes in a diskset
+ * with any nodeid (sideno) allowed.
+ * The revision is set to MDDB_REV_MNLN which is a change of the
+ * MDDB_REV_MAJOR portion of the revision.
+ */
+
+#define MDDB_REV_MB (uint_t)0x0201
+#define MDDB_REV_MNMB (uint_t)0x0202
+#define MDDB_REV_DB (uint_t)0x0201
+#define MDDB_REV_LB (uint_t)0x0500
+#define MDDB_REV_MNLB (uint_t)0x0600
+#define MDDB_REV_LN (uint_t)0x0100
+#define MDDB_REV_MNLN (uint_t)0x0300
+#define MDDB_REV_RB (uint_t)0x0200
+#define MDDB_REV_RB64 (uint_t)0x0201
+#define MDDB_REV_DT (uint_t)0x0100
+#define MDDB_REV_DI (uint_t)0x0100
+
+#define MDDB_BSIZE (uint_t)DEV_BSIZE
+#define MDDB_PREFIXCNT 10
+#define MDDB_DRVNMCNT 10
+
+typedef int mddb_block_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+typedef struct md_mnname_suffix {
+ md_name_suffix mn_ln_suffix;
+ uint_t mn_ln_sideno;
+} md_mnname_suffix_t;
+
+typedef struct mddb_ln {
+ int ln_magic;
+ uint_t ln_revision;
+ uint_t ln_checksum;
+ struct timeval32 ln_timestamp;
+ md_name_prefix ln_prefixes[MDDB_PREFIXCNT];
+ /* Don't change array sizes without changing RNDUP_BLKCNT */
+ md_name_suffix ln_suffixes[MD_MAXSIDES][MDDB_NLB];
+} mddb_ln_t;
+
+/*
+ * Locator name structure for MN diskset. Same as for traditional
+ * and local diskset except that more sides are supported and the
+ * side number can be any number since the side number is stored
+ * in the ln_mnsuffixes structure instead of being used as an index
+ * into that array. This means that the whole array may need to be
+ * searched in order to find the correct information given a side number.
+ */
+typedef struct mddb_mnln {
+ int ln_magic;
+ uint_t ln_revision;
+ uint_t ln_checksum;
+ struct timeval32 ln_timestamp;
+ md_name_prefix ln_prefixes[MDDB_PREFIXCNT];
+ /* Don't change array sizes without changing MDDB_MNLNCNT */
+ md_mnname_suffix_t ln_mnsuffixes[MD_MNMAXSIDES][MDDB_NLB];
+} mddb_mnln_t;
+
+#define RNDUP_BLKCNT(sz, delta) (((sz) - \
+ ((delta) * \
+ ((MD_MAXSIDES - 1) * MDDB_NLB)) + \
+ MDDB_BSIZE - 1) / MDDB_BSIZE)
+#define MDDB_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), 0)
+#define MDDB_LOCAL_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), \
+ sizeof (md_name_suffix))
+
+#define MDDB_MNLNCNT ((sizeof (mddb_mnln_t) + (MDDB_BSIZE - 1)) \
+ / MDDB_BSIZE)
+
+typedef struct mddb_dt {
+ uint_t dt_mag;
+ uint_t dt_rev;
+ uint_t dt_cks;
+ mddb_dtag_t dt_dtag;
+} mddb_dt_t;
+
+#define MDDB_DT_BYTES (roundup(sizeof (mddb_dt_t), MDDB_BSIZE))
+#define MDDB_DT_BLOCKS (btodb(MDDB_DT_BYTES))
+
+typedef union identifier {
+ char serial[MDDB_SN_LEN];
+ struct timeval32 createtime;
+} identifier_t;
+
+typedef struct mddb_locator {
+ dev32_t l_dev;
+ daddr32_t l_blkno;
+ int l_flags;
+} mddb_locator_t;
+
+typedef struct mddb_sidelocator {
+ uchar_t l_drvnm_index;
+ minor_t l_mnum;
+} mddb_sidelocator_t;
+
+typedef struct mddb_mnsidelocator {
+ uchar_t mnl_drvnm_index;
+ minor_t mnl_mnum;
+ uint_t mnl_sideno;
+} mddb_mnsidelocator_t;
+
+typedef struct mddb_drvnm {
+ uchar_t dn_len;
+ char dn_data[MD_MAXDRVNM];
+} mddb_drvnm_t;
+
+/*
+ * Locator Block Device ID Information
+ * Several device id's may share one disk block in an effort to
+ * conserve used replica space.
+ */
+typedef struct mddb_did_info {
+ uint_t info_flags; /* MDDB Device ID flags */
+ uint_t info_firstblk; /* Device ID Start Block */
+ uint_t info_blkcnt; /* Device ID Block Count */
+ uint_t info_offset; /* Device ID offset w/i Block */
+ uint_t info_length; /* Device ID Length */
+ uint_t info_checksum; /* Device ID Checksum */
+ char info_minor_name[32]; /* Minor name of lb dev */
+} mddb_did_info_t;
+
+typedef struct mddb_did_blk {
+ int blk_magic; /* used for verification */
+ uint_t blk_revision; /* used for verification */
+ int blk_checksum; /* used for verification */
+ uint_t blk_commitcnt; /* matches LB's commitcnt */
+ mddb_did_info_t blk_info[MDDB_NLB];
+} mddb_did_blk_t;
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+#define MDDB_DID_BYTES (roundup(sizeof (mddb_did_blk_t), MDDB_BSIZE))
+#define MDDB_DID_BLOCKS (btodb(MDDB_DID_BYTES))
+
+/*
+ * Device ID Disk Blocks.
+ * Incore linked list of disk blocks containing device IDs.
+ * The list is built when reading in the mddb_did_blk structure and
+ * when reading in the actual disk blocks containing device ids.
+ * This list is used to easily write out all disk blocks containing
+ * device ids.
+ */
+typedef struct mddb_did_db {
+ uint_t db_firstblk; /* Disk Block's logical addr */
+ uint_t db_blkcnt; /* Contig Disk Block Count */
+ caddr_t db_ptr; /* Ptr to incore Block(s) */
+ struct mddb_did_db *db_next; /* Ptr to next in list */
+} mddb_did_db_t;
+
+/*
+ * Device ID Free List.
+ * Incore linked list of free space in disk blocks containing device IDs.
+ * Used to manage placement of device IDs in disk blocks.
+ * All disk blocks on free list are also in linked list of disk block
+ * containing device IDs (mddb_did_db_t).
+ */
+typedef struct mddb_did_free {
+ uint_t free_blk; /* Disk Block's logical addr */
+ uint_t free_offset; /* offset of free space */
+ uint_t free_length; /* length of free space */
+ struct mddb_did_free *free_next; /* Ptr to next in list */
+} mddb_did_free_t;
+
+/*
+ * Device ID Incore Area
+ * Contains pointer to Device ID Disk Block list and
+ * Device ID Free List.
+ * Also contains incore array of pointers to device IDs. Pointers
+ * point into the device ID Disk Block list and are used as a
+ * shortcut to find incore device IDs.
+ */
+typedef struct mddb_did_ic {
+ mddb_did_blk_t *did_ic_blkp;
+ mddb_did_db_t *did_ic_dbp;
+ mddb_did_free_t *did_ic_freep;
+ ddi_devid_t did_ic_devid[MDDB_NLB]; /* Ptr to device IDs */
+} mddb_did_ic_t;
+
+/*
+ * Locator Block (LB):
+ * - Are fixed size, but the size is different
+ * for local/shared set db replicas.
+ * - All LB's start at logical block 0.
+ * - After a replica quorum is found, there is
+ * is only one incore copy of the LB.
+ * - LB's are only written when replicas are added, deleted, or errored.
+ * - LB's provide information about other replica's and their state.
+ */
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+typedef struct mddb_lb {
+ int lb_magic; /* used for verification */
+ uint_t lb_revision; /* used for verification */
+ int lb_checksum; /* used for verification */
+ uint_t lb_commitcnt; /* IMPORTANT */
+ struct timeval32 lb_timestamp; /* informative only */
+ int lb_loccnt; /* used for verification */
+ identifier_t lb_ident; /* used for verification */
+ uint_t lb_flags; /* flags describing LB */
+ uint_t lb_spare[8]; /* Spare/Pad */
+ mddb_block_t lb_didfirstblk; /* Devid Array Start Block */
+ mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */
+ mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */
+ mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */
+ struct timeval32 lb_inittime; /* creation of database */
+ set_t lb_setno; /* used for verification */
+ mddb_block_t lb_blkcnt; /* used for verification */
+ mddb_block_t lb_lnfirstblk;
+ mddb_block_t lb_lnblkcnt;
+ mddb_block_t lb_dbfirstblk;
+ mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT];
+ mddb_locator_t lb_locators[MDDB_NLB];
+ /* Don't change array sizes without changing RNDUP_BLKCNT */
+ mddb_sidelocator_t lb_sidelocators[MD_MAXSIDES][MDDB_NLB];
+} mddb_lb_t;
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+/*
+ * Locator block structure for MN diskset. Same as for traditional
+ * and local diskset except that more sides are supported and the
+ * side number can be any number since the side number is stored
+ * in the lb_mnsidelocators structure instead of being used as an index
+ * into that array. This means that the whole array may need to be
+ * searched in order to find the correct information given a side number.
+ */
+typedef struct mddb_mnlb {
+ int lb_magic; /* used for verification */
+ uint_t lb_revision; /* used for verification */
+ int lb_checksum; /* used for verification */
+ uint_t lb_commitcnt; /* IMPORTANT */
+ struct timeval32 lb_timestamp; /* informative only */
+ int lb_loccnt; /* used for verification */
+ identifier_t lb_ident; /* used for verification */
+ uint_t lb_flags; /* flags describing LB */
+ uint_t lb_spare[8]; /* Spare/Pad */
+ mddb_block_t lb_didfirstblk; /* Devid Array Start Block */
+ mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */
+ mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */
+ mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */
+ struct timeval32 lb_inittime; /* creation of database */
+ set_t lb_setno; /* used for verification */
+ mddb_block_t lb_blkcnt; /* used for verification */
+ mddb_block_t lb_lnfirstblk;
+ mddb_block_t lb_lnblkcnt;
+ mddb_block_t lb_dbfirstblk;
+ mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT];
+ mddb_locator_t lb_locators[MDDB_NLB];
+ /* Don't change array sizes without changing MDDB_MNLBCNT */
+ mddb_mnsidelocator_t lb_mnsidelocators[MD_MNMAXSIDES][MDDB_NLB];
+} mddb_mnlb_t;
+
+
+#define MDDB_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), 0)
+#define MDDB_LOCAL_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), \
+ sizeof (mddb_sidelocator_t))
+
+#define MDDB_MNLBCNT ((sizeof (mddb_mnlb_t) + (MDDB_BSIZE - 1)) \
+ / MDDB_BSIZE)
+
+typedef struct mddb_map {
+ daddr32_t m_consecutive;
+ daddr32_t m_firstblk;
+} mddb_map_t;
+
+/*
+ * Master block(s) (MB)
+ * - Are written by userland; Never by the driver!
+ * - Each replica has there own master blocks,
+ * the master block(s) are not shared.
+ * - MB's are not in the logical block address space of the database.
+ * - MB's are a fixed size record (MDDB_BSIZE)
+ * - MB's provide the logical to physical block translation,
+ * for their replica.
+ */
+typedef struct mddb_mb {
+ int mb_magic; /* used for verification */
+ uint_t mb_revision; /* used for verification */
+ uint_t mb_checksum; /* used for verification */
+#ifdef _LP64
+ uint32_t mb_next; /* incore to next mb */
+#else
+ struct mddb_mb *mb_next; /* incore to next mb */
+#endif /* _LP64 */
+ daddr32_t mb_nextblk; /* block # for next mb */
+ md_timeval32_t mb_timestamp; /* timestamp */
+ daddr32_t mb_blkcnt; /* size of blkmap */
+ daddr32_t mb_blkno; /* physical loc. for this MB */
+ set_t mb_setno; /* used for verification */
+ struct timeval32 mb_setcreatetime; /* set creation timestamp */
+ int spares[7];
+ mddb_map_t mb_blkmap; /* logical->physical blk map */
+ int mb_devid_magic; /* verify devid in mb */
+ short mb_devid_len; /* len of following devid */
+ char mb_devid[1]; /* devid byte array */
+} mddb_mb_t;
+
+/*
+ * In-core version of mddb_mb. It is known that the mddb_mb is 512 bytes on
+ * disk, really, and so this structure is 512 + sizeof(struct mddb_mb_ic *)
+ */
+#define MDDB_IC_BSIZE (MDDB_BSIZE + sizeof (struct mddb_mb_ic *))
+typedef struct mddb_mb_ic {
+ struct mddb_mb_ic *mbi_next;
+ struct mddb_mb mbi_mddb_mb;
+} mddb_mb_ic_t;
+
+
+/*
+ * there can be no address in record block. The checksum must
+ * stay the same where ever the record is in memory. Many
+ * things depend on this. Also the timestamp is the time the the
+ * record was committed not the time it was written to a particular
+ * device.
+ *
+ * Old definition of mddb_rb, for 32-bit apps and libraries
+ */
+typedef struct mddb_rb {
+ uint_t rb_magic;
+ uint_t rb_revision;
+ uint_t rb_checksum;
+ uint_t rb_checksum_fiddle;
+ uint_t rb_private;
+ void *rb_userdata;
+ uint_t rb_commitcnt;
+ uint_t rb_spare[1];
+ struct timeval32 rb_timestamp;
+ int rb_data[1];
+} mddb_rb_t;
+
+/* This is, and always will be, the on-disk version of mddb_rb */
+typedef struct mddb_rb32 {
+ uint_t rb_magic;
+ uint_t rb_revision;
+ uint_t rb_checksum;
+ uint_t rb_checksum_fiddle;
+ uint_t rb_private;
+ uint32_t rb_userdata;
+ uint_t rb_commitcnt;
+ uint_t rb_spare[1];
+ struct timeval32 rb_timestamp;
+ int rb_data[1];
+} mddb_rb32_t;
+
+/*
+ * directory entries
+ */
+typedef struct mddb_optinfo {
+ int o_li;
+ int o_flags;
+} mddb_optinfo_t;
+
+/* Old definition of mddb_de, for 32-bit apps and libraries */
+typedef struct mddb_de {
+ struct mddb_de *de_next;
+ mddb_rb_t *de_rb;
+ mddb_recid_t de_recid;
+ mddb_type_t de_type1;
+ uint_t de_type2;
+ uint_t de_reqsize;
+ uint_t de_recsize;
+ mddb_block_t de_blkcount;
+ uint_t de_flags;
+ mddb_optinfo_t de_optinfo[2];
+ mddb_block_t de_blks[1];
+} mddb_de_t;
+
+/*
+ * In core version of mddb_de, includes pointer for mddb_rb32_t user data
+ * mddb_rb32_t is used incore
+ */
+typedef struct mddb_de_ic {
+ void *de_rb_userdata;
+ void *de_rb_userdata_ic;
+ uint_t de_owner_nodeid;
+ struct mddb_de_ic *de_next;
+ mddb_rb32_t *de_rb;
+ mddb_recid_t de_recid;
+ mddb_type_t de_type1;
+ uint_t de_type2;
+ size_t de_reqsize;
+ size_t de_icreqsize;
+ size_t de_recsize;
+ uint_t de_blkcount;
+ uint_t de_flags;
+ mddb_optinfo_t de_optinfo[2];
+ mddb_block_t de_blks[1];
+} mddb_de_ic_t;
+
+typedef struct mddb_db {
+ uint_t db_magic;
+ uint_t db_revision;
+ uint_t db_checksum;
+ mddb_block_t db_blknum;
+ struct mddb_db *db_next;
+ mddb_block_t db_nextblk;
+ struct timeval32 db_timestamp;
+ uint_t db_recsum;
+#ifdef _KERNEL
+ mddb_de_ic_t *db_firstentry;
+#else
+ mddb_de_t *db_firstentry;
+#endif
+} mddb_db_t;
+
+/*
+ * This is, and always will be, the on-disk version of mddb_de
+ * When mddb_de32 is read in it is converted into mddb_de_ic
+ */
+typedef struct mddb_de32 {
+ uint32_t de32_next;
+ uint32_t de32_rb;
+ mddb_recid_t de32_recid;
+ mddb_type_t de32_type1;
+ uint_t de32_type2;
+ uint_t de32_reqsize;
+ uint_t de32_recsize;
+ mddb_block_t de32_blkcount;
+ uint_t de32_flags;
+ mddb_optinfo_t de32_optinfo[2];
+ mddb_block_t de32_blks[1];
+} mddb_de32_t;
+
+/*
+ * This is, and always will be, the on-disk version of mddb_db
+ * When mddb_db32 is read in it is converted into mddb_db
+ * To minimize impact on mddb format mddb_db fileds remain intact
+ */
+typedef struct mddb_db32 {
+ uint_t db32_magic;
+ uint_t db32_revision;
+ uint_t db32_checksum;
+ mddb_block_t db32_blknum;
+ uint32_t db32_next;
+ mddb_block_t db32_nextblk;
+ struct timeval32 db32_timestamp;
+ uint_t db32_recsum;
+ uint32_t db32_firstentry;
+} mddb_db32_t;
+
+#define de32tode(from, to) \
+ { \
+ int i; \
+ to->de_rb_userdata = NULL; \
+ to->de_owner_nodeid = MD_MN_INVALID_NID; \
+ to->de_next = (struct mddb_de_ic *)(uintptr_t)from->de32_next; \
+ to->de_rb = (mddb_rb32_t *)(uintptr_t)from->de32_rb; \
+ to->de_recid = from->de32_recid; \
+ to->de_type1 = from->de32_type1; \
+ to->de_type2 = from->de32_type2; \
+ to->de_reqsize = from->de32_reqsize; \
+ to->de_recsize = from->de32_recsize; \
+ to->de_blkcount = from->de32_blkcount; \
+ to->de_flags = from->de32_flags; \
+ to->de_optinfo[0] = from->de32_optinfo[0]; \
+ to->de_optinfo[1] = from->de32_optinfo[1]; \
+ for (i = 0; i < from->de32_blkcount; i++) \
+ to->de_blks[i] = from->de32_blks[i]; \
+ }
+
+#define detode32(from, to) \
+ { \
+ int i; \
+ to->de32_next = (uint32_t)(uintptr_t)from->de_next; \
+ to->de32_rb = (uint32_t)(uintptr_t)from->de_rb; \
+ to->de32_recid = from->de_recid; \
+ to->de32_type1 = from->de_type1; \
+ to->de32_type2 = from->de_type2; \
+ to->de32_reqsize = from->de_reqsize; \
+ to->de32_recsize = from->de_recsize; \
+ to->de32_blkcount = from->de_blkcount; \
+ to->de32_flags = from->de_flags; \
+ to->de32_optinfo[0] = from->de_optinfo[0]; \
+ to->de32_optinfo[1] = from->de_optinfo[1]; \
+ for (i = 0; i < from->de_blkcount; i++) \
+ to->de32_blks[i] = from->de_blks[i]; \
+ }
+
+#define db32todb(from, to) \
+ to->db_magic = from->db32_magic; \
+ to->db_revision = from->db32_revision; \
+ to->db_checksum = from->db32_checksum; \
+ to->db_blknum = from->db32_blknum; \
+ to->db_next = (struct mddb_db *)(uintptr_t)from->db32_next; \
+ to->db_nextblk = from->db32_nextblk; \
+ to->db_timestamp = from->db32_timestamp; \
+ to->db_recsum = from->db32_recsum; \
+ to->db_firstentry = (mddb_de_ic_t *)(uintptr_t)from->db32_firstentry;
+
+#define dbtodb32(from, to) \
+ to->db32_magic = from->db_magic; \
+ to->db32_revision = from->db_revision; \
+ to->db32_checksum = from->db_checksum; \
+ to->db32_blknum = from->db_blknum; \
+ to->db32_next = (uint32_t)(uintptr_t)from->db_next; \
+ to->db32_nextblk = from->db_nextblk; \
+ to->db32_timestamp = from->db_timestamp; \
+ to->db32_recsum = from->db_recsum; \
+ to->db32_firstentry = (uint32_t)(uintptr_t)from->db_firstentry;
+
+/*
+ * information about a replica of the data base
+ */
+typedef struct mddb_ri {
+ struct mddb_ri *ri_next;
+ uint_t ri_flags;
+ uint_t ri_commitcnt;
+ int ri_transplant;
+ md_dev64_t ri_dev;
+ daddr32_t ri_blkno;
+ char ri_driver[16];
+ mddb_mb_ic_t *ri_mbip;
+ mddb_lb_t *ri_lbp;
+ mddb_dt_t *ri_dtp;
+ mddb_did_ic_t *ri_did_icp;
+ ddi_devid_t ri_devid;
+ ddi_devid_t ri_old_devid;
+ char ri_minor_name[MDDB_MINOR_NAME_MAX];
+ char ri_devname[MAXPATHLEN];
+} mddb_ri_t;
+
+typedef struct mddb_bf {
+ struct mddb_bf *bf_next;
+ mddb_locator_t *bf_locator;
+ buf_t bf_buf;
+} mddb_bf_t;
+
+/*
+ * Information for sets of databases (which include replicas)
+ */
+#define MDDB_BITSRECID 31
+#define MDDB_SETSHIFT (MDDB_BITSRECID - MD_BITSSET)
+#define MDDB_SETMASK (MD_SETMASK << MDDB_SETSHIFT)
+#define MDDB_RECIDMASK ((1 << MDDB_SETSHIFT) - 1)
+
+#define DBSET(id) (((id) & MDDB_SETMASK) >> MDDB_SETSHIFT)
+#define DBID(id) ((id) & MDDB_RECIDMASK)
+#define MAKERECID(s, i) ((((s) << MDDB_SETSHIFT) & MDDB_SETMASK) | \
+ ((i) & MDDB_RECIDMASK))
+
+#define MDDB_PARSE_LOCBLK 0x00000001
+#define MDDB_PARSE_LOCNM 0x00000002
+#define MDDB_PARSE_OPTRECS 0x00000004
+#define MDDB_PARSE_MASK 0x0000000F
+
+
+#define MDDB_BLOCK_PARSE 0x00000001 /* Block sending parse msgs */
+#define MDDB_UNBLOCK_PARSE 0x00000002 /* Unblock sending parse msgs */
+
+/*
+ * We need to keep s_ident and s_inittime 32 bit. They are used in mddb_lb
+ */
+typedef struct mddb_set {
+ uint_t s_setno; /* set number */
+ uint_t s_sideno; /* side number */
+ identifier_t s_ident; /* set identifier */
+ char *s_setname; /* set name */
+ mddb_mb_ic_t **s_mbiarray; /* master blocks array */
+ mddb_db_t *s_dbp; /* directory block */
+ mddb_lb_t *s_lbp; /* locator block */
+ /* May be cast to mddb_mnlb_t */
+ /* if accessing sidenames in */
+ /* MN diskset */
+ mddb_ln_t *s_lnp; /* locator names block */
+ /* May be cast to mddb_mnln_t */
+ /* if accessing sidenames in */
+ /* MN diskset */
+ mddb_dtag_lst_t *s_dtlp; /* List of data tags found */
+ mddb_did_ic_t *s_did_icp; /* Device ID incore area */
+ mddb_ri_t *s_rip; /* replicas incore list */
+ int s_freeblkcnt; /* visable for test code */
+ int s_totalblkcnt; /* visable for test code */
+ int s_mn_parseflags; /* mddb parse flags for MNset */
+ int s_mn_parseflags_sending; /* parse flgs sent to slaves */
+ uchar_t *s_freebitmap; /* free blocks bitmap */
+ uint_t s_freebitmapsize; /* size of bitmap */
+ struct timeval32 s_inittime; /* timestamp set created */
+ mddb_recid_t s_zombie; /* zombie record - createrec */
+ int s_staledeletes; /* number of stale deleterec */
+ int s_optcmtcnt; /* Following are opt. record */
+ int s_opthavelck; /* bookkeeping records ... */
+ int s_optwantlck;
+ kcondvar_t s_optwantlck_cv;
+ int s_optwaiterr;
+ int s_opthungerr;
+ kcondvar_t s_opthungerr_cv;
+ int s_opthavequeuinglck;
+ int s_optwantqueuinglck;
+ kcondvar_t s_optqueuing_cv;
+ ulong_t s_bufmisses;
+ mddb_bf_t *s_freebufhead;
+ int s_bufwakeup;
+ kcondvar_t s_buf_cv;
+ size_t s_databuffer_size;
+ void *s_databuffer;
+ int s_singlelockgotten;
+ int s_singlelockwanted;
+ kcondvar_t s_single_thread_cv;
+ md_hi_arr_t s_med;
+} mddb_set_t;
+
+#ifndef MDDB_FAKE
+#ifdef _KERNEL
+/* md_mddb.c */
+extern uint_t mddb_lb_did_convert(mddb_set_t *,
+ uint_t, uint_t *);
+extern void mddb_locatorblock2splitname(mddb_ln_t *,
+ int, side_t, md_splitname *);
+extern int mddb_configure(mddb_cfgcmd_t,
+ struct mddb_config *);
+extern mddb_recid_t mddb_getnextrec(mddb_recid_t,
+ mddb_type_t, uint_t);
+extern int mddb_getoptloc(mddb_optloc_t *);
+extern void *mddb_getrecaddr(mddb_recid_t);
+extern void *mddb_getrecaddr_resize(mddb_recid_t, size_t,
+ off_t);
+extern int mddb_getrecprivate(mddb_recid_t);
+extern void mddb_setrecprivate(mddb_recid_t, uint_t);
+extern mddb_de_ic_t *mddb_getrecdep(mddb_recid_t);
+extern mddb_type_t mddb_getrectype1(mddb_recid_t);
+extern int mddb_getrectype2(mddb_recid_t);
+extern int mddb_getrecsize(mddb_recid_t);
+extern int mddb_commitrec(mddb_recid_t);
+extern int mddb_commitrecs(mddb_recid_t *);
+extern int mddb_deleterec(mddb_recid_t);
+extern mddb_recstatus_t mddb_getrecstatus(mddb_recid_t);
+extern mddb_recid_t mddb_createrec(size_t usersize,
+ mddb_type_t type, uint_t type2,
+ md_create_rec_option_t option, set_t setno);
+extern void mddb_init(void);
+extern void mddb_unload(void);
+extern void mddb_unload_set(set_t setno);
+extern mddb_recid_t mddb_makerecid(set_t setno, mddb_recid_t id);
+extern set_t mddb_getsetnum(mddb_recid_t id);
+extern char *mddb_getsetname(set_t setno);
+extern side_t mddb_getsidenum(set_t setno);
+extern int mddb_ownset(set_t setno);
+extern int getmed_ioctl(mddb_med_parm_t *medpp, int mode);
+extern int setmed_ioctl(mddb_med_parm_t *medpp, int mode);
+extern int updmed_ioctl(mddb_med_upd_parm_t *medpp,
+ int mode);
+extern int take_set(mddb_config_t *cp, int mode);
+extern int release_set(mddb_config_t *cp, int mode);
+extern int gettag_ioctl(mddb_dtag_get_parm_t *dtgpp,
+ int mode);
+extern int usetag_ioctl(mddb_dtag_use_parm_t *dtupp,
+ int mode);
+extern int accept_ioctl(mddb_accept_parm_t *medpp,
+ int mode);
+extern int md_update_locator_namespace(set_t setno,
+ side_t side, char *dname, char *pname,
+ md_dev64_t devt);
+extern int mddb_validate_lb(set_t setno, int *rmaxsz);
+extern int mddb_getinvlb_devid(set_t setno, int count,
+ int size, char **ctdptr);
+extern int md_update_minor(set_t, side_t, mdkey_t);
+#ifdef DEBUG
+extern void mddb_check(void);
+#endif /* DEBUG */
+#endif /* _KERNEL */
+
+#else
+
+caddr_t mddb_fakeit;
+
+#define md_lb_did_convert(a, b, c) (0)
+#define mddb_configure(a, b) (0)
+#define mddb_getnextrec(a, b, c) ((mddb_recid_t)0)
+#define mddb_getrecaddr(a) (mddb_fakeit)
+#define mddb_getrecprivate(a) (0)
+#define mddb_setrecprivate(a, b) (0)
+#define mddb_getrectype1(a) (0)
+#define mddb_getrectype2(a) (0)
+#define mddb_getrecsize(a) (0)
+#define mddb_commitrec(a) (0)
+#define mddb_commitrecs(a) (0)
+#define mddb_deleterec(a) (0)
+#define mddb_getrecstatus(a) (MDDB_OK)
+#define mddb_createrec(s, a, b) (0xffff & (int)(mddb_fakeit = \
+ (caddr_t)kmem_zalloc(s, KM_SLEEP)))
+#define mddb_unload() (0)
+
+#endif
+
+#define MDDB_NOSLEEP 1
+#define MDDB_SLEEPOK 0
+
+#define MDDB_NOOLDOK 0x1
+#define MDDB_MUSTEXIST 0x2
+#define MDDB_NOINIT 0x4
+#define MDDB_MULTINODE 0x8
+#define MDDB_MN_STALE 0x10 /* MN set is stale */
+
+/* Flags passed to selectreplicas - not a bit mask */
+#define MDDB_SCANALL 1
+#define MDDB_RETRYSCAN 0
+#define MDDB_SCANALLSYNC 2 /* During reconfig, sync up incore */
+ /* and ondisk mddb by writing incore */
+ /* values to disk. Don't write */
+ /* change log records. */
+
+/* Flags passed to writestart and writecopy */
+#define MDDB_WRITECOPY_ALL 1 /* Write all incore mddb to disk */
+#define MDDB_WRITECOPY_SYNC 2 /* Write incore mddb to disk except */
+ /* - change log records */
+ /* - optimized resync records */
+
+
+#define MDDB_PROBE 1
+#define MDDB_NOPROBE 0
+
+
+/*
+ * MN diskset definitions used to determine if a slave can write
+ * directly to the mddb. ONLY_MASTER only allows the master node
+ * to write to the mddb. ANY_NODE allows any node to write
+ * to the mddb.
+ */
+#define MDDB_WR_ONLY_MASTER 0
+#define MDDB_WR_ANY_NODE 1
+
+#define MDDB_L_LOCKED 0x0001 /* this record is locked */
+#define MDDB_L_WANTED 0x0002
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MD_MDDB_H */