summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2018-04-03 12:10:10 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2018-04-03 12:10:10 +0000
commit976f1c5d53cffc14d4b30bdb72a2e2fa6257b746 (patch)
tree173c45ab22cfe106048abedafd829c37ff7db391
parent89f28c29187081c8db00a633d949df9b5f2a403d (diff)
parentdaefc4e3f903d9fdcba459a512dc09df2d416e91 (diff)
downloadillumos-joyent-976f1c5d53cffc14d4b30bdb72a2e2fa6257b746.tar.gz
[illumos-gate merge]
commit daefc4e3f903d9fdcba459a512dc09df2d416e91 9349 dnet: this statement may fall through commit f22cbd2db87ae3945ed6a9166f8b9d61b65c6ab9 9348 mii: duplicate 'const' declaration specifier commit 9f7e0c70988ceb8fe0dc3b631ab169a6af80ecc5 9347 c2audit: comparison between pointer and zero character constant commit cf95dd1699b88778bfda4b3d97e13237466400c4 9343 ib: comparison between pointer and zero character constant commit ae7a42b1202d66a3f5538a09de0a28329264fd15 9181 ipf: this use of "defined" may not be portable commit fff695d46af8deb863236b0569e68ea5a9bb6756 9308 mdnsd: this statement may fall through commit c15ff06a5cef5880e0afc02c7a87c6b22455f848 9307 rdist: this statement may fall through commit f6b0cb17c5dc44ae872c22ebbbd2663c98723311 9306 pppd: this use of "defined" may not be portable commit 5b85b345b049f7f7544f27a5b5e2b1bfa91da91e 9305 pppdump: this statement may fall through commit 7f24fc96855e7e749ca4e15ffdce8ea80fc94dbf 9131 regexp.h: this statement may fall through commit acca861ce6b8e9720b720b04d3880f44a3e10281 9301 nvme: Package description out of date commit 6f9a31a96454e5a48ab600f85d7e8b556b897e1a 9263 rdc: comparison between pointer and zero character constant commit 73ff8cc657529b1dc6066efa02507cc59523bf46 9240 nxge: this statement may fall through commit ea60f77b8bdb7a88d6143d8bd30ddebab992abee 9142 libxcurses: comparison between pointer and zero character constant commit 17f11284b49b98353b5119463254074fd9bc0a28 9238 ZFS Spacemap Encoding V2 commit 1c10ae76c0cb31326c320e7cef1d3f24a1f47125 9286 want refreservation=auto
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/pppd/pppd.h15
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/pppdump/zlib.c14
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/rdist/gram.y2
-rw-r--r--usr/src/cmd/cmd-inet/usr.bin/rdist/server.c3
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c4
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSDigest.c7
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/mdnsd/uDNS.c3
-rw-r--r--usr/src/cmd/ipf/tools/ip_fil.c9
-rw-r--r--usr/src/cmd/ipf/tools/ipmon.c10
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c142
-rw-r--r--usr/src/cmd/zdb/zdb.c125
-rw-r--r--usr/src/cmd/ztest/ztest.c7
-rw-r--r--usr/src/common/zfs/zfeature_common.c6
-rw-r--r--usr/src/common/zfs/zfeature_common.h1
-rw-r--r--usr/src/head/regexp.h2
-rw-r--r--usr/src/lib/libxcurses/src/libc/xcurses/slk.c55
-rw-r--r--usr/src/lib/libzfs/common/libzfs_dataset.c80
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c33
-rw-r--r--usr/src/lib/libzpool/common/llib-lzpool1
-rw-r--r--usr/src/man/man1m/zfs.1m44
-rw-r--r--usr/src/man/man5/zpool-features.529
-rw-r--r--usr/src/pkg/manifests/driver-storage-nvme.mf2
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf9
-rw-r--r--usr/src/test/zfs-tests/runfiles/delphix.run5
-rw-r--r--usr/src/test/zfs-tests/runfiles/omnios.run5
-rw-r--r--usr/src/test/zfs-tests/runfiles/openindiana.run5
-rw-r--r--usr/src/test/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh6
-rw-r--r--usr/src/test/zfs-tests/tests/functional/reservation/reservation_019_pos.sh65
-rw-r--r--usr/src/test/zfs-tests/tests/functional/reservation/reservation_020_pos.sh66
-rw-r--r--usr/src/test/zfs-tests/tests/functional/reservation/reservation_021_neg.sh74
-rw-r--r--usr/src/test/zfs-tests/tests/functional/reservation/reservation_022_pos.sh84
-rw-r--r--usr/src/uts/common/avs/ns/rdc/rdc_diskq.c6
-rw-r--r--usr/src/uts/common/c2/audit_start.c2
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c58
-rw-r--r--usr/src/uts/common/fs/zfs/range_tree.c2
-rw-r--r--usr/src/uts/common/fs/zfs/spa_checkpoint.c50
-rw-r--r--usr/src/uts/common/fs/zfs/space_map.c796
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h12
-rw-r--r--usr/src/uts/common/fs/zfs/sys/space_map.h114
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c2
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_indirect.c2
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_indirect_mapping.c9
-rw-r--r--usr/src/uts/common/inet/ipf/ip_log.c9
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_compat.h13
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_fil.h9
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_nat.h9
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ip_proxy.h11
-rw-r--r--usr/src/uts/common/inet/ipf/opts.h9
-rw-r--r--usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c4
-rw-r--r--usr/src/uts/common/io/mii/mii.c4
-rw-r--r--usr/src/uts/common/io/nxge/npi/npi_mac.c4
-rw-r--r--usr/src/uts/common/io/nxge/npi/npi_vir.c3
-rw-r--r--usr/src/uts/common/io/nxge/nxge_zcp.c3
-rw-r--r--usr/src/uts/intel/io/dnet/dnet.c1
54 files changed, 1553 insertions, 492 deletions
diff --git a/usr/src/cmd/cmd-inet/usr.bin/pppd/pppd.h b/usr/src/cmd/cmd-inet/usr.bin/pppd/pppd.h
index 8c14c1f6ad..6804a8c72d 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/pppd/pppd.h
+++ b/usr/src/cmd/cmd-inet/usr.bin/pppd/pppd.h
@@ -82,15 +82,22 @@ extern "C" {
* If this evaluates non-zero, then sifup() must be called before
* sifaddr().
*/
-#define SIFUPFIRST \
- (defined(SVR4) && (defined(SNI) || defined(__USLC__)))
+#if (defined(SVR4) && (defined(SNI) || defined(__USLC__)))
+#define SIFUPFIRST (1)
+#else
+#define SIFUPFIRST (0)
+#endif
/*
* If this evaluates non-zero, then sif6up() must be called before
* sif6addr().
*/
-#define SIF6UPFIRST \
- (defined(__linux__) || (defined(SVR4) && (defined(SNI) || defined(__USLC__))))
+#if (defined(__linux__) || \
+ (defined(SVR4) && (defined(SNI) || defined(__USLC__))))
+#define SIF6UPFIRST (1)
+#else
+#define SIF6UPFIRST (0)
+#endif
/*
* Option descriptor structure.
diff --git a/usr/src/cmd/cmd-inet/usr.bin/pppdump/zlib.c b/usr/src/cmd/cmd-inet/usr.bin/pppdump/zlib.c
index c3c68f65f8..eb369dad18 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/pppdump/zlib.c
+++ b/usr/src/cmd/cmd-inet/usr.bin/pppdump/zlib.c
@@ -2884,6 +2884,7 @@ int f;
break;
}
z->state->mode = FLAG;
+ /* FALLTHROUGH */
case FLAG:
NEEDBYTE
if ((b = NEXTBYTE) & 0x20)
@@ -2902,6 +2903,7 @@ int f;
}
Trace((stderr, "inflate: zlib header ok\n"));
z->state->mode = BLOCKS;
+ /* FALLTHROUGH */
case BLOCKS:
r = inflate_blocks(z->state->blocks, z, r);
if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0)
@@ -2922,18 +2924,22 @@ int f;
break;
}
z->state->mode = CHECK4;
+ /* FALLTHROUGH */
case CHECK4:
NEEDBYTE
z->state->sub.check.need = (uLong)NEXTBYTE << 24;
z->state->mode = CHECK3;
+ /* FALLTHROUGH */
case CHECK3:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE << 16;
z->state->mode = CHECK2;
+ /* FALLTHROUGH */
case CHECK2:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE << 8;
z->state->mode = CHECK1;
+ /* FALLTHROUGH */
case CHECK1:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE;
@@ -2947,6 +2953,7 @@ int f;
}
Trace((stderr, "inflate: zlib check ok\n"));
z->state->mode = DONE;
+ /* FALLTHROUGH */
case DONE:
return Z_STREAM_END;
case BAD:
@@ -3474,6 +3481,7 @@ int r;
s->sub.decode.td = td;
}
s->mode = CODES;
+ /* FALLTHROUGH */
case CODES:
UPDATE
if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
@@ -3499,11 +3507,13 @@ int r;
p--; /* can always return one */
}
s->mode = DRY;
+ /* FALLTHROUGH */
case DRY:
FLUSH
if (s->read != s->write)
LEAVE
s->mode = DONEB;
+ /* FALLTHROUGH */
case DONEB:
r = Z_STREAM_END;
LEAVE
@@ -4180,6 +4190,7 @@ int r;
c->sub.code.need = c->lbits;
c->sub.code.tree = c->ltree;
c->mode = LEN;
+ /* FALLTHROUGH */
case LEN: /* i: get length/literal/eob next */
j = c->sub.code.need;
NEEDBITS(j)
@@ -4227,6 +4238,7 @@ int r;
c->sub.code.tree = c->dtree;
Tracevv((stderr, "inflate: length %u\n", c->len));
c->mode = DIST;
+ /* FALLTHROUGH */
case DIST: /* i: get distance next */
j = c->sub.code.need;
NEEDBITS(j)
@@ -4257,6 +4269,7 @@ int r;
DUMPBITS(j)
Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist));
c->mode = COPY;
+ /* FALLTHROUGH */
case COPY: /* o: copying bytes in window, waiting for space */
#ifndef __TURBOC__ /* Turbo C bug for following expression */
f = (uInt)(q - s->window) < c->sub.copy.dist ?
@@ -4287,6 +4300,7 @@ int r;
if (s->read != s->write)
LEAVE
c->mode = END;
+ /* FALLTHROUGH */
case END:
r = Z_STREAM_END;
LEAVE
diff --git a/usr/src/cmd/cmd-inet/usr.bin/rdist/gram.y b/usr/src/cmd/cmd-inet/usr.bin/rdist/gram.y
index 552c9d0813..160e9fd802 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/rdist/gram.y
+++ b/usr/src/cmd/cmd-inet/usr.bin/rdist/gram.y
@@ -15,7 +15,6 @@
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
#include "defs.h"
@@ -205,6 +204,7 @@ again:
;
if (c == EOF)
return(0);
+ /* FALLTHROUGH */
case '\n':
yylineno++;
case ' ':
diff --git a/usr/src/cmd/cmd-inet/usr.bin/rdist/server.c b/usr/src/cmd/cmd-inet/usr.bin/rdist/server.c
index 88fac286eb..dd319ed40a 100644
--- a/usr/src/cmd/cmd-inet/usr.bin/rdist/server.c
+++ b/usr/src/cmd/cmd-inet/usr.bin/rdist/server.c
@@ -218,6 +218,7 @@ server()
default:
error("server: unknown command '%s'\n", cp);
+ continue;
case '\0':
continue;
}
@@ -1619,7 +1620,7 @@ more:
default:
s--;
- /* fall into... */
+ /* FALLTHROUGH */
case '\1':
case '\2':
nerrs++;
diff --git a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c
index 607293e838..e75f734e22 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSCommon.c
@@ -1536,8 +1536,8 @@ mDNSexport mDNSu32 RDataHashValue(const ResourceRecord *const rr)
sum = DomainNameHashValue((domainname *)rdb->data);
ptr += dlen;
len -= dlen;
- /* FALLTHROUGH */
}
+ /* FALLTHROUGH */
default:
{
@@ -4185,6 +4185,7 @@ decimal: if (!F.havePrecision)
case 'p': F.havePrecision = F.lSize = 1;
F.precision = sizeof(void*) * 2; // 8 characters on 32-bit; 16 characters on 64-bit
+ /* FALLTHROUGH */
case 'X': digits = "0123456789ABCDEF";
goto hexadecimal;
case 'x': digits = "0123456789abcdef";
@@ -4278,6 +4279,7 @@ hexadecimal: if (F.lSize) n = va_arg(arg, unsigned long);
default: s = mDNS_VACB;
i = mDNS_snprintf(mDNS_VACB, sizeof(mDNS_VACB), "<<UNKNOWN FORMAT CONVERSION CODE %%%c>>", c);
+ /* FALLTHROUGH */
case '%': *sbuffer++ = (char)c;
if (++nwritten >= buflen) goto exit;
diff --git a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSDigest.c b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSDigest.c
index e1aa82df11..6520ac6f6e 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSDigest.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/DNSDigest.c
@@ -610,16 +610,21 @@ void md5_block_data_order (MD5_CTX *c, const void *p,int num);
#define HOST_p_c2l(c,l,n) { \
switch (n) { \
case 0: l =((unsigned long)(*((c)++))); \
+ /* FALLTHROUGH */ \
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
+ /* FALLTHROUGH */ \
case 2: l|=((unsigned long)(*((c)++)))<<16; \
+ /* FALLTHROUGH */ \
case 3: l|=((unsigned long)(*((c)++)))<<24; \
} }
#define HOST_p_c2l_p(c,l,sc,len) { \
switch (sc) { \
case 0: l =((unsigned long)(*((c)++))); \
if (--len == 0) break; \
+ /* FALLTHROUGH */ \
case 1: l|=((unsigned long)(*((c)++)))<< 8; \
if (--len == 0) break; \
+ /* FALLTHROUGH */ \
case 2: l|=((unsigned long)(*((c)++)))<<16; \
} }
/* NOTE the pointer is not incremented at the end of this */
@@ -627,7 +632,9 @@ void md5_block_data_order (MD5_CTX *c, const void *p,int num);
l=0; (c)+=n; \
switch (n) { \
case 3: l =((unsigned long)(*(--(c))))<<16; \
+ /* FALLTHROUGH */ \
case 2: l|=((unsigned long)(*(--(c))))<< 8; \
+ /* FALLTHROUGH */ \
case 1: l|=((unsigned long)(*(--(c)))); \
} }
#define _HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
diff --git a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/uDNS.c b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/uDNS.c
index 6ce0158b56..56aced0180 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/mdnsd/uDNS.c
+++ b/usr/src/cmd/cmd-inet/usr.lib/mdnsd/uDNS.c
@@ -2310,7 +2310,8 @@ mDNSlocal void UpdateOneSRVRecord(mDNS *m, AuthRecord *rr)
case regState_NATError:
if (!NATChanged) return;
- // if nat changed, register if we have a target (below)
+ // if nat changed, register if we have a target (below)
+ /* FALLTHROUGH */
case regState_NoTarget:
if (!newtarget->c[0])
diff --git a/usr/src/cmd/ipf/tools/ip_fil.c b/usr/src/cmd/ipf/tools/ip_fil.c
index 0736074059..7219d1de05 100644
--- a/usr/src/cmd/ipf/tools/ip_fil.c
+++ b/usr/src/cmd/ipf/tools/ip_fil.c
@@ -7,8 +7,13 @@
* Use is subject to license terms.
*/
-#ifndef SOLARIS
-#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#include <sys/param.h>
diff --git a/usr/src/cmd/ipf/tools/ipmon.c b/usr/src/cmd/ipf/tools/ipmon.c
index 398730f08e..b6904f4cda 100644
--- a/usr/src/cmd/ipf/tools/ipmon.c
+++ b/usr/src/cmd/ipf/tools/ipmon.c
@@ -9,9 +9,13 @@
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
-
-#ifndef SOLARIS
-#define SOLARIS (defined(__SVR4) || defined(__svr4__)) && defined(sun)
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#include <sys/types.h>
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 06ce396ef8..5d3af7ff3c 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -178,55 +178,110 @@ mdb_nicenum(uint64_t num, char *buf)
}
}
-static int verbose;
-
+/*
+ * <addr>::sm_entries <buffer length in bytes>
+ *
+ * Treat the buffer specified by the given address as a buffer that contains
+ * space map entries. Iterate over the specified number of entries and print
+ * them in both encoded and decoded form.
+ */
+/* ARGSUSED */
static int
-freelist_walk_init(mdb_walk_state_t *wsp)
+sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
- if (wsp->walk_addr == NULL) {
- mdb_warn("must supply starting address\n");
- return (WALK_ERR);
+ uint64_t bufsz = 0;
+ boolean_t preview = B_FALSE;
+
+ if (!(flags & DCMD_ADDRSPEC))
+ return (DCMD_USAGE);
+
+ if (argc < 1) {
+ preview = B_TRUE;
+ bufsz = 2;
+ } else if (argc != 1) {
+ return (DCMD_USAGE);
+ } else {
+ switch (argv[0].a_type) {
+ case MDB_TYPE_STRING:
+ bufsz = mdb_strtoull(argv[0].a_un.a_str);
+ break;
+ case MDB_TYPE_IMMEDIATE:
+ bufsz = argv[0].a_un.a_val;
+ break;
+ default:
+ return (DCMD_USAGE);
+ }
}
- wsp->walk_data = 0; /* Index into the freelist */
- return (WALK_NEXT);
-}
+ char *actions[] = { "ALLOC", "FREE", "INVALID" };
+ for (uintptr_t bufend = addr + bufsz; addr < bufend;
+ addr += sizeof (uint64_t)) {
+ uint64_t nwords;
+ uint64_t start_addr = addr;
-static int
-freelist_walk_step(mdb_walk_state_t *wsp)
-{
- uint64_t entry;
- uintptr_t number = (uintptr_t)wsp->walk_data;
- char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
- "INVALID", "INVALID", "INVALID", "INVALID" };
- int mapshift = SPA_MINBLOCKSHIFT;
+ uint64_t word = 0;
+ if (mdb_vread(&word, sizeof (word), addr) == -1) {
+ mdb_warn("failed to read space map entry %p", addr);
+ return (DCMD_ERR);
+ }
- if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
- mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
- return (WALK_DONE);
- }
- wsp->walk_addr += sizeof (entry);
- wsp->walk_data = (void *)(number + 1);
+ if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) {
+ (void) mdb_printf("\t [%6llu] %s: txg %llu, "
+ "pass %llu\n",
+ (u_longlong_t)(addr),
+ actions[SM_DEBUG_ACTION_DECODE(word)],
+ (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
+ (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ continue;
+ }
- if (SM_DEBUG_DECODE(entry)) {
- mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n",
- number,
- ddata[SM_DEBUG_ACTION_DECODE(entry)],
- SM_DEBUG_TXG_DECODE(entry),
- SM_DEBUG_SYNCPASS_DECODE(entry));
- } else {
- mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c "
- "size=%06llx", number,
- SM_OFFSET_DECODE(entry) << mapshift,
- (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
- mapshift,
- SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
- SM_RUN_DECODE(entry) << mapshift);
- if (verbose)
- mdb_printf(" (raw=%012llx)\n", entry);
- mdb_printf("\n");
+ char entry_type;
+ uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID;
+
+ if (SM_PREFIX_DECODE(word) != SM2_PREFIX) {
+ entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
+ 'A' : 'F';
+ raw_offset = SM_OFFSET_DECODE(word);
+ raw_run = SM_RUN_DECODE(word);
+ nwords = 1;
+ } else {
+ ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX);
+
+ raw_run = SM2_RUN_DECODE(word);
+ vdev_id = SM2_VDEV_DECODE(word);
+
+ /* it is a two-word entry so we read another word */
+ addr += sizeof (uint64_t);
+ if (addr >= bufend) {
+ mdb_warn("buffer ends in the middle of a two "
+ "word entry\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (mdb_vread(&word, sizeof (word), addr) == -1) {
+ mdb_warn("failed to read space map entry %p",
+ addr);
+ return (DCMD_ERR);
+ }
+
+ entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ?
+ 'A' : 'F';
+ raw_offset = SM2_OFFSET_DECODE(word);
+ nwords = 2;
+ }
+
+ (void) mdb_printf("\t [%6llx] %c range:"
+ " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n",
+ (u_longlong_t)start_addr,
+ entry_type, (u_longlong_t)raw_offset,
+ (u_longlong_t)(raw_offset + raw_run),
+ (u_longlong_t)raw_run,
+ (u_longlong_t)vdev_id, (u_longlong_t)nwords);
+
+ if (preview)
+ break;
}
- return (WALK_NEXT);
+ return (DCMD_OK);
}
static int
@@ -3974,6 +4029,9 @@ static const mdb_dcmd_t dcmds[] = {
"\t-M display metaslab group statistic\n"
"\t-h display histogram (requires -m or -M)\n",
"given a spa_t, print vdev summary", spa_vdevs },
+ { "sm_entries", "<buffer length in bytes>",
+ "print out space map entries from a buffer decoded",
+ sm_entries},
{ "vdev", ":[-remMh]\n"
"\t-r display recursively\n"
"\t-e display statistics\n"
@@ -4024,8 +4082,6 @@ static const mdb_dcmd_t dcmds[] = {
};
static const mdb_walker_t walkers[] = {
- { "zms_freelist", "walk ZFS metaslab freelist",
- freelist_walk_init, freelist_walk_step, NULL },
{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
txg_list_walk_init, txg_list_walk_step, NULL },
{ "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index aafb0aa29f..6ce4263db8 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -774,7 +774,6 @@ verify_spacemap_refcounts(spa_t *spa)
static void
dump_spacemap(objset_t *os, space_map_t *sm)
{
- uint64_t alloc, offset, entry;
char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
"INVALID", "INVALID", "INVALID", "INVALID" };
@@ -791,41 +790,73 @@ dump_spacemap(objset_t *os, space_map_t *sm)
/*
* Print out the freelist entries in both encoded and decoded form.
*/
- alloc = 0;
- for (offset = 0; offset < space_map_length(sm);
- offset += sizeof (entry)) {
- uint8_t mapshift = sm->sm_shift;
+ uint8_t mapshift = sm->sm_shift;
+ int64_t alloc = 0;
+ uint64_t word;
+ for (uint64_t offset = 0; offset < space_map_length(sm);
+ offset += sizeof (word)) {
VERIFY0(dmu_read(os, space_map_object(sm), offset,
- sizeof (entry), &entry, DMU_READ_PREFETCH));
- if (SM_DEBUG_DECODE(entry)) {
+ sizeof (word), &word, DMU_READ_PREFETCH));
+ if (sm_entry_is_debug(word)) {
(void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
- (u_longlong_t)(offset / sizeof (entry)),
- ddata[SM_DEBUG_ACTION_DECODE(entry)],
- (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
- (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
+ (u_longlong_t)(offset / sizeof (word)),
+ ddata[SM_DEBUG_ACTION_DECODE(word)],
+ (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
+ (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ continue;
+ }
+
+ uint8_t words;
+ char entry_type;
+ uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
+
+ if (sm_entry_is_single_word(word)) {
+ entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
+ 'A' : 'F';
+ entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
+ sm->sm_start;
+ entry_run = SM_RUN_DECODE(word) << mapshift;
+ words = 1;
} else {
- (void) printf("\t [%6llu] %c range:"
- " %010llx-%010llx size: %06llx\n",
- (u_longlong_t)(offset / sizeof (entry)),
- SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
- (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
- mapshift) + sm->sm_start),
- (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
- mapshift) + sm->sm_start +
- (SM_RUN_DECODE(entry) << mapshift)),
- (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
- if (SM_TYPE_DECODE(entry) == SM_ALLOC)
- alloc += SM_RUN_DECODE(entry) << mapshift;
- else
- alloc -= SM_RUN_DECODE(entry) << mapshift;
+ /* it is a two-word entry so we read another word */
+ ASSERT(sm_entry_is_double_word(word));
+
+ uint64_t extra_word;
+ offset += sizeof (extra_word);
+ VERIFY0(dmu_read(os, space_map_object(sm), offset,
+ sizeof (extra_word), &extra_word,
+ DMU_READ_PREFETCH));
+
+ ASSERT3U(offset, <=, space_map_length(sm));
+
+ entry_run = SM2_RUN_DECODE(word) << mapshift;
+ entry_vdev = SM2_VDEV_DECODE(word);
+ entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
+ 'A' : 'F';
+ entry_off = (SM2_OFFSET_DECODE(extra_word) <<
+ mapshift) + sm->sm_start;
+ words = 2;
}
+
+ (void) printf("\t [%6llu] %c range:"
+ " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
+ (u_longlong_t)(offset / sizeof (word)),
+ entry_type, (u_longlong_t)entry_off,
+ (u_longlong_t)(entry_off + entry_run),
+ (u_longlong_t)entry_run,
+ (u_longlong_t)entry_vdev, words);
+
+ if (entry_type == 'A')
+ alloc += entry_run;
+ else
+ alloc -= entry_run;
}
- if (alloc != space_map_allocated(sm)) {
- (void) printf("space_map_object alloc (%llu) INCONSISTENT "
- "with space map summary (%llu)\n",
- (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
+ if ((uint64_t)alloc != space_map_allocated(sm)) {
+ (void) printf("space_map_object alloc (%lld) INCONSISTENT "
+ "with space map summary (%lld)\n",
+ (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
}
}
@@ -1153,7 +1184,7 @@ dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
dump_dde(ddt, &dde, walk);
- ASSERT(error == ENOENT);
+ ASSERT3U(error, ==, ENOENT);
(void) printf("\n");
}
@@ -3070,15 +3101,14 @@ typedef struct checkpoint_sm_exclude_entry_arg {
} checkpoint_sm_exclude_entry_arg_t;
static int
-checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
- void *arg)
+checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
{
checkpoint_sm_exclude_entry_arg_t *cseea = arg;
vdev_t *vd = cseea->cseea_vd;
- metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- uint64_t end = offset + size;
+ metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
+ uint64_t end = sme->sme_offset + sme->sme_run;
- ASSERT(type == SM_FREE);
+ ASSERT(sme->sme_type == SM_FREE);
/*
* Since the vdev_checkpoint_sm exists in the vdev level
@@ -3096,7 +3126,7 @@ checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
* metaslab boundaries. So if needed we could add code
* that handles metaslab-crossing segments in the future.
*/
- VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(sme->sme_offset, >=, ms->ms_start);
VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
/*
@@ -3104,10 +3134,10 @@ checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
* also verify that the entry is there to begin with.
*/
mutex_enter(&ms->ms_lock);
- range_tree_remove(ms->ms_allocatable, offset, size);
+ range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
mutex_exit(&ms->ms_lock);
- cseea->cseea_checkpoint_size += size;
+ cseea->cseea_checkpoint_size += sme->sme_run;
return (0);
}
@@ -4082,15 +4112,14 @@ typedef struct verify_checkpoint_sm_entry_cb_arg {
#define ENTRIES_PER_PROGRESS_UPDATE 10000
static int
-verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
- void *arg)
+verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
{
verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
vdev_t *vd = vcsec->vcsec_vd;
- metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- uint64_t end = offset + size;
+ metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
+ uint64_t end = sme->sme_offset + sme->sme_run;
- ASSERT(type == SM_FREE);
+ ASSERT(sme->sme_type == SM_FREE);
if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
(void) fprintf(stderr,
@@ -4104,7 +4133,7 @@ verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
/*
* See comment in checkpoint_sm_exclude_entry_cb()
*/
- VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(sme->sme_offset, >=, ms->ms_start);
VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
/*
@@ -4113,7 +4142,7 @@ verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
* their respective ms_allocateable trees should not contain them.
*/
mutex_enter(&ms->ms_lock);
- range_tree_verify(ms->ms_allocatable, offset, size);
+ range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
mutex_exit(&ms->ms_lock);
return (0);
@@ -4359,7 +4388,7 @@ verify_checkpoint(spa_t *spa)
DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
- if (error == ENOENT) {
+ if (error == ENOENT && !dump_opt['L']) {
/*
* If the feature is active but the uberblock is missing
* then we must be in the middle of discarding the
@@ -4382,7 +4411,7 @@ verify_checkpoint(spa_t *spa)
error = 3;
}
- if (error == 0)
+ if (error == 0 && !dump_opt['L'])
verify_checkpoint_blocks(spa);
return (error);
@@ -4488,7 +4517,7 @@ dump_zpool(spa_t *spa)
if (dump_opt['h'])
dump_history(spa);
- if (rc == 0 && !dump_opt['L'])
+ if (rc == 0)
rc = verify_checkpoint(spa);
if (rc != 0) {
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 1522c75485..ff45ab193e 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -193,6 +193,7 @@ extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern boolean_t zfs_abd_scatter_enabled;
+extern boolean_t zfs_force_some_double_word_sm_entries;
static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
@@ -6394,6 +6395,12 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
zfs_deadman_synctime_ms = 300000;
+ /*
+ * As two-word space map entries may not come up often (especially
+ * if pool and vdev sizes are small) we want to force at least some
+ * of them so the feature get tested.
+ */
+ zfs_force_some_double_word_sm_entries = B_TRUE;
ztest_fd_rand = open("/dev/urandom", O_RDONLY);
ASSERT3S(ztest_fd_rand, >=, 0);
diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c
index 74ed42b97d..1d56bb6e3f 100644
--- a/usr/src/common/zfs/zfeature_common.c
+++ b/usr/src/common/zfs/zfeature_common.c
@@ -245,6 +245,12 @@ zpool_feature_init(void)
"Pool state can be checkpointed, allowing rewind later.",
ZFEATURE_FLAG_READONLY_COMPAT, NULL);
+ zfeature_register(SPA_FEATURE_SPACEMAP_V2,
+ "com.delphix:spacemap_v2", "spacemap_v2",
+ "Space maps representing large segments are more efficient.",
+ ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
+ NULL);
+
static const spa_feature_t large_blocks_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
diff --git a/usr/src/common/zfs/zfeature_common.h b/usr/src/common/zfs/zfeature_common.h
index 2b68c39c7b..a852f07018 100644
--- a/usr/src/common/zfs/zfeature_common.h
+++ b/usr/src/common/zfs/zfeature_common.h
@@ -59,6 +59,7 @@ typedef enum spa_feature {
SPA_FEATURE_DEVICE_REMOVAL,
SPA_FEATURE_OBSOLETE_COUNTS,
SPA_FEATURE_POOL_CHECKPOINT,
+ SPA_FEATURE_SPACEMAP_V2,
SPA_FEATURES
} spa_feature_t;
diff --git a/usr/src/head/regexp.h b/usr/src/head/regexp.h
index 11a1105bc5..8dfa8ae528 100644
--- a/usr/src/head/regexp.h
+++ b/usr/src/head/regexp.h
@@ -289,7 +289,9 @@ compile(char *instring, char *ep, const char *endbuf, int seof)
*ep++ = (char)c;
continue;
}
+ /* FALLTHROUGH */
}
+ /* FALLTHROUGH */
/* Drop through to default to use \ to turn off special chars */
defchar:
diff --git a/usr/src/lib/libxcurses/src/libc/xcurses/slk.c b/usr/src/lib/libxcurses/src/libc/xcurses/slk.c
index a23188b064..c3b9cdaafc 100644
--- a/usr/src/lib/libxcurses/src/libc/xcurses/slk.c
+++ b/usr/src/lib/libxcurses/src/libc/xcurses/slk.c
@@ -24,8 +24,6 @@
* All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* slk.c
*
@@ -35,12 +33,6 @@
*
*/
-#if M_RCSID
-#ifndef lint
-static char rcsID[] = "$Header: /rd/src/libc/xcurses/rcs/slk.c 1.1 1995/07/19 16:38:06 ant Exp $";
-#endif
-#endif
-
#include <private.h>
/*
@@ -72,7 +64,7 @@ slk_attron(const chtype at)
__m_trace("slk_attron(%lx)", at);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wattron(__m_screen->_slk._w, at);
return __m_return_code("slk_attron", code);
@@ -87,7 +79,7 @@ slk_attroff(const chtype at)
__m_trace("slk_attroff(%lx)", at);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wattroff(__m_screen->_slk._w, at);
return __m_return_code("slk_attroff", code);
@@ -102,7 +94,7 @@ slk_attrset(const chtype at)
__m_trace("slk_attrset(%lx)", at);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wattrset(__m_screen->_slk._w, at);
return __m_return_code("slk_attrset", code);
@@ -117,7 +109,7 @@ slk_attr_off(const attr_t at, void *opts)
__m_trace("slk_attr_off(%x, %p)", at, opts);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wattr_off(__m_screen->_slk._w, at, opts);
return __m_return_code("slk_attr_off", code);
@@ -132,7 +124,7 @@ slk_attr_on(const attr_t at, void *opts)
__m_trace("slk_attr_on(%x, %p)", at, opts);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wattr_on(__m_screen->_slk._w, at, opts);
return __m_return_code("slk_attr_on", code);
@@ -147,7 +139,7 @@ slk_attr_set(const attr_t at, short co, void *opts)
__m_trace("slk_attr_set(%x, %d, %p)", at, co, opts);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wattr_set(__m_screen->_slk._w, at, co, opts);
return __m_return_code("slk_attr_set", code);
@@ -162,8 +154,8 @@ slk_color(short co)
__m_trace("slk_color(%d)", co);
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
- code = wcolor_set(__m_screen->_slk._w, co, (void *) 0);
+ if (__m_screen->_slk._w != NULL)
+ code = wcolor_set(__m_screen->_slk._w, co, NULL);
return __m_return_code("slk_color", code);
}
@@ -177,7 +169,7 @@ slk_touch()
__m_trace("slk_touch(void)");
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wtouchln(__m_screen->_slk._w, 0, 1, 1);
return __m_return_code("slk_touch", code);
@@ -192,10 +184,10 @@ slk_clear()
__m_trace("slk_clear(void)");
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0) {
+ if (__m_screen->_slk._w != NULL) {
if (werase(__m_screen->_slk._w) == OK)
code = wrefresh(__m_screen->_slk._w);
- } else if (label_off != (char *) 0) {
+ } else if (label_off != NULL) {
(void) tputs(label_off, 1, __m_outc);
(void) fflush(__m_screen->_of);
code = OK;
@@ -213,9 +205,9 @@ slk_restore()
__m_trace("slk_clear(void)");
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0) {
+ if (__m_screen->_slk._w != NULL) {
for (i = 0; i < 8; ++i) {
- if (__m_screen->_slk._labels[i] != (char *) 0) {
+ if (__m_screen->_slk._labels[i] != NULL) {
(void) slk_set(
i, __m_screen->_slk._labels[i],
__m_screen->_slk._justify[i]
@@ -224,7 +216,7 @@ slk_restore()
}
code = slk_refresh();
- } else if (label_on != (char *) 0) {
+ } else if (label_on != NULL) {
(void) tputs(label_on, 1, __m_outc);
(void) fflush(__m_screen->_of);
code = OK;
@@ -242,7 +234,7 @@ slk_noutrefresh()
__m_trace("slk_noutrefresh(void)");
#endif
- if (__m_screen->_slk._w != (WINDOW *) 0)
+ if (__m_screen->_slk._w != NULL)
code = wnoutrefresh(__m_screen->_slk._w);
return __m_return_code("slk_noutrefresh", code);
@@ -314,11 +306,11 @@ slk_wset(int index, const wchar_t *label, int justify)
if (index < 1 || 8 < index || justify < 0 || 2 < justify)
goto error1;
- if (label == (wchar_t *) 0)
+ if (label == NULL)
label = M_MB_L("");
/* Copy the characters that fill the first 8 columns of the label. */
- for (wp = wcs, width = 0; label != '\0'; label += i, wp += cc._n) {
+ for (wp = wcs, width = 0; *label != '\0'; label += i, wp += cc._n) {
if ((i = __m_wcs_cc(label, A_NORMAL, 0, &cc)) < 0)
goto error1;
@@ -335,12 +327,12 @@ slk_wset(int index, const wchar_t *label, int justify)
/* Remember the new label. */
__m_screen->_slk._justify[index] = (short) justify;
- if (__m_screen->_slk._labels[index] != (char *) 0)
+ if (__m_screen->_slk._labels[index] != NULL)
free(__m_screen->_slk._labels[index]);
- if ((__m_screen->_slk._labels[index] = m_strdup(mbs)) == (char *) 0)
+ if ((__m_screen->_slk._labels[index] = m_strdup(mbs)) == NULL)
goto error1;
- if (__m_screen->_slk._w != (WINDOW *) 0) {
+ if (__m_screen->_slk._w != NULL) {
/* Write the justified label into the slk window. */
i = format[__m_slk_format][index];
(void) __m_cc_erase(__m_screen->_slk._w, 0, i, 0, i + 7);
@@ -357,19 +349,19 @@ slk_wset(int index, const wchar_t *label, int justify)
}
(void) mvwaddstr(__m_screen->_slk._w, 0, i, mbs);
- } else if (plab_norm != (char *) 0) {
+ } else if (plab_norm != NULL) {
(void) tputs(
tparm(
plab_norm, (long) index, (long) mbs,
0L, 0L, 0L, 0L, 0L, 0L, 0L
), 1, __m_outc
);
- } else if (pkey_plab != (char *) 0) {
+ } else if (pkey_plab != NULL) {
/* Lookup multibyte sequence for the function key. */
for (i = KEY_F(index), k = __m_keyindex; (*k)[1] != i; ++k)
;
- if (cur_term->_str[**k] != (char *) 0) {
+ if (cur_term->_str[**k] != NULL) {
(void) tputs(
tparm(
pkey_plab, (long) index,
@@ -384,4 +376,3 @@ slk_wset(int index, const wchar_t *label, int justify)
error1:
return __m_return_code("slk_wset", code);
}
-
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index 3a6d56add4..79df1aa994 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
@@ -1391,7 +1391,6 @@ badlabel:
switch (prop) {
case ZFS_PROP_RESERVATION:
- case ZFS_PROP_REFRESERVATION:
if (intval > volsize) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' is greater than current "
@@ -1402,6 +1401,17 @@ badlabel:
}
break;
+ case ZFS_PROP_REFRESERVATION:
+ if (intval > volsize && intval != UINT64_MAX) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "'%s' is greater than current "
+ "volume size"), propname);
+ (void) zfs_error(hdl, EZFS_BADPROP,
+ errbuf);
+ goto error;
+ }
+ break;
+
case ZFS_PROP_VOLSIZE:
if (intval % blocksize != 0) {
zfs_nicenum(blocksize, buf,
@@ -1503,6 +1513,61 @@ zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl)
return (1);
}
+/*
+ * Helper for 'zfs {set|clone} refreservation=auto'. Must be called after
+ * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinal value.
+ * Return codes must match zfs_add_synthetic_resv().
+ */
+static int
+zfs_fix_auto_resv(zfs_handle_t *zhp, nvlist_t *nvl)
+{
+ uint64_t volsize;
+ uint64_t resvsize;
+ zfs_prop_t prop;
+ nvlist_t *props;
+
+ if (!ZFS_IS_VOLUME(zhp)) {
+ return (0);
+ }
+
+ if (zfs_which_resv_prop(zhp, &prop) != 0) {
+ return (-1);
+ }
+
+ if (prop != ZFS_PROP_REFRESERVATION) {
+ return (0);
+ }
+
+ if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(prop), &resvsize) != 0) {
+ /* No value being set, so it can't be "auto" */
+ return (0);
+ }
+ if (resvsize != UINT64_MAX) {
+ /* Being set to a value other than "auto" */
+ return (0);
+ }
+
+ props = fnvlist_alloc();
+
+ fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+ zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE));
+
+ if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
+ &volsize) != 0) {
+ volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
+ }
+
+ resvsize = zvol_volsize_to_reservation(volsize, props);
+ fnvlist_free(props);
+
+ (void) nvlist_remove_all(nvl, zfs_prop_to_name(prop));
+ if (nvlist_add_uint64(nvl, zfs_prop_to_name(prop), resvsize) != 0) {
+ (void) no_memory(zhp->zfs_hdl);
+ return (-1);
+ }
+ return (1);
+}
+
void
zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
char *errbuf)
@@ -1668,6 +1733,12 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
goto error;
}
}
+
+ if (added_resv != 1 &&
+ (added_resv = zfs_fix_auto_resv(zhp, nvl)) == -1) {
+ goto error;
+ }
+
/*
* Check how many properties we're setting and allocate an array to
* store changelist pointers for postfix().
@@ -3686,6 +3757,7 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
if (props) {
zfs_type_t type;
+
if (ZFS_IS_VOLUME(zhp)) {
type = ZFS_TYPE_VOLUME;
} else {
@@ -3694,6 +3766,10 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
if ((props = zfs_valid_proplist(hdl, type, props, zoned,
zhp, zhp->zpool_hdl, errbuf)) == NULL)
return (-1);
+ if (zfs_fix_auto_resv(zhp, props) == -1) {
+ nvlist_free(props);
+ return (-1);
+ }
}
ret = lzc_clone(target, zhp->zfs_name, props);
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index ffaa9f984a..61f3127662 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2018 Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright (c) 2017 Datto Inc.
@@ -1224,6 +1224,7 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
const char *propname;
char *value;
boolean_t isnone = B_FALSE;
+ boolean_t isauto = B_FALSE;
if (type == ZFS_TYPE_POOL) {
proptype = zpool_prop_get_type(prop);
@@ -1259,8 +1260,9 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
(void) nvpair_value_string(elem, &value);
if (strcmp(value, "none") == 0) {
isnone = B_TRUE;
- } else if (zfs_nicestrtonum(hdl, value, ivalp)
- != 0) {
+ } else if (strcmp(value, "auto") == 0) {
+ isauto = B_TRUE;
+ } else if (zfs_nicestrtonum(hdl, value, ivalp) != 0) {
goto error;
}
} else if (datatype == DATA_TYPE_UINT64) {
@@ -1290,6 +1292,31 @@ zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
prop == ZFS_PROP_SNAPSHOT_LIMIT)) {
*ivalp = UINT64_MAX;
}
+
+ /*
+ * Special handling for setting 'refreservation' to 'auto'. Use
+ * UINT64_MAX to tell the caller to use zfs_fix_auto_resv().
+ * 'auto' is only allowed on volumes.
+ */
+ if (isauto) {
+ switch (prop) {
+ case ZFS_PROP_REFRESERVATION:
+ if ((type & ZFS_TYPE_VOLUME) == 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "'%s=auto' only allowed on "
+ "volumes"), nvpair_name(elem));
+ goto error;
+ }
+ *ivalp = UINT64_MAX;
+ break;
+ default:
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "'auto' is invalid value for '%s'"),
+ nvpair_name(elem));
+ goto error;
+ }
+ }
+
break;
case PROP_TYPE_INDEX:
diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool
index 871facace3..7b58c21513 100644
--- a/usr/src/lib/libzpool/common/llib-lzpool
+++ b/usr/src/lib/libzpool/common/llib-lzpool
@@ -71,3 +71,4 @@ extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern boolean_t zfs_abd_scatter_enabled;
+extern boolean_t zfs_force_some_double_word_sm_entries;
diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m
index 70ae935ed9..8d913a9855 100644
--- a/usr/src/man/man1m/zfs.1m
+++ b/usr/src/man/man1m/zfs.1m
@@ -27,6 +27,7 @@
.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
.\" Copyright (c) 2014 Integros [integros.com]
.\" Copyright 2017 Nexenta Systems, Inc.
+.\" Copyright 2018 Joyent, Inc.
.\"
.Dd December 6, 2017
.Dt ZFS 1M
@@ -1345,7 +1346,7 @@ Limits the amount of space a dataset can consume.
This property enforces a hard limit on the amount of space used.
This hard limit does not include space used by descendents, including file
systems and snapshots.
-.It Sy refreservation Ns = Ns Em size Ns | Ns Sy none
+.It Sy refreservation Ns = Ns Em size Ns | Ns Sy none Ns | Ns Sy auto
The minimum amount of space guaranteed to a dataset, not including its
descendents.
When the amount of space used is below this value, the dataset is treated as if
@@ -1363,6 +1364,22 @@ this reservation to accommodate the current number of
.Qq referenced
bytes in the dataset.
.Pp
+If
+.Sy refreservation
+is set to
+.Sy auto ,
+a volume is thick provisioned
+.Po or
+.Qq not sparse
+.Pc .
+.Sy refreservation Ns = Ns Sy auto
+is only supported on volumes.
+See
+.Sy volsize
+in the
+.Sx Native Properties
+section for more information about sparse volumes.
+.Pp
This property can also be referred to by its shortened column name,
.Sy refreserv .
.It Sy reservation Ns = Ns Em size Ns | Ns Sy none
@@ -1577,22 +1594,39 @@ Extreme care should be used when adjusting the volume size.
Though not recommended, a
.Qq sparse volume
.Po also known as
-.Qq thin provisioning
+.Qq thin provisioned
.Pc
can be created by specifying the
.Fl s
option to the
.Nm zfs Cm create Fl V
-command, or by changing the reservation after the volume has been created.
+command, or by changing the value of the
+.Sy refreservation
+property
+.Po or
+.Sy reservation
+property on pool version 8 or earlier
+.Pc
+after the volume has been created.
A
.Qq sparse volume
-is a volume where the reservation is less then the volume size.
+is a volume where the value of
+.Sy refreservation
+is less than the size of the volume plus the space required to store its
+metadata.
Consequently, writes to a sparse volume can fail with
.Er ENOSPC
when the pool is low on space.
For a sparse volume, changes to
.Sy volsize
-are not reflected in the reservation.
+are not reflected in the
+.Sy refreservation.
+A volume that is not sparse is said to be
+.Qq thick provisioned .
+A sparse volume can become thick provisioned by setting
+.Sy refreservation
+to
+.Sy auto .
.It Sy vscan Ns = Ns Sy on Ns | Ns Sy off
Controls whether regular files should be scanned for viruses when a file is
opened and closed.
diff --git a/usr/src/man/man5/zpool-features.5 b/usr/src/man/man5/zpool-features.5
index 931fd8e69c..c97bb19e65 100644
--- a/usr/src/man/man5/zpool-features.5
+++ b/usr/src/man/man5/zpool-features.5
@@ -423,7 +423,6 @@ This feature becomes \fBactive\fR as soon as it is enabled and will
never return to being \fBenabled\fR.
.RE
-
.sp
.ne 2
.na
@@ -488,6 +487,34 @@ This feature becomes \fBactive\fR when the "zpool checkpoint" command
is used to checkpoint the pool.
The feature will only return back to being \fBenabled\fR when the pool
is rewound or the checkpoint has been discarded.
+
+.RE
+.sp
+.ne 2
+.na
+\fB\fBspacemap_v2\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID com.delphix:spacemap_v2
+READ\-ONLY COMPATIBLE yes
+DEPENDENCIES none
+.TE
+
+This feature enables the use of the new space map encoding which
+consists of two words (instead of one) whenever it is advantageous.
+The new encoding allows space maps to represent large regions of
+space more efficiently on-disk while also increasing their maximum
+addressable offset.
+
+This feature becomes \fBactive\fR once it is \fBenabled\fR, and never
+returns back to being \fBenabled\fR.
+
+.RE
+.sp
+.ne 2
+.na
\fB\fBlarge_blocks\fR\fR
.ad
.RS 4n
diff --git a/usr/src/pkg/manifests/driver-storage-nvme.mf b/usr/src/pkg/manifests/driver-storage-nvme.mf
index 98f9b8cc81..4c8d39cacb 100644
--- a/usr/src/pkg/manifests/driver-storage-nvme.mf
+++ b/usr/src/pkg/manifests/driver-storage-nvme.mf
@@ -25,7 +25,7 @@
<include global_zone_only_component>
set name=pkg.fmri value=pkg:/driver/storage/nvme@$(PKGVERS)
set name=pkg.description \
- value="Driver for Intel NVMe 1.1b compliant storage devices"
+ value="Driver for NVM Express compliant storage devices"
set name=pkg.summary value="NVMe driver"
set name=info.classification \
value=org.opensolaris.category.2008:System/Hardware
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 87ed0ee18b..faf818c66f 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -13,6 +13,7 @@
# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
# Copyright 2015, 2016 Nexenta Systems, Inc. All rights reserved.
# Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
+# Copyright 2018 Joyent, Inc.
#
set name=pkg.fmri value=pkg:/system/test/zfstest@$(PKGVERS)
@@ -2397,6 +2398,14 @@ file path=opt/zfs-tests/tests/functional/reservation/reservation_017_pos \
mode=0555
file path=opt/zfs-tests/tests/functional/reservation/reservation_018_pos \
mode=0555
+file path=opt/zfs-tests/tests/functional/reservation/reservation_019_pos \
+ mode=0555
+file path=opt/zfs-tests/tests/functional/reservation/reservation_020_pos \
+ mode=0555
+file path=opt/zfs-tests/tests/functional/reservation/reservation_021_neg \
+ mode=0555
+file path=opt/zfs-tests/tests/functional/reservation/reservation_022_pos \
+ mode=0555
file path=opt/zfs-tests/tests/functional/reservation/setup mode=0555
file path=opt/zfs-tests/tests/functional/rootpool/cleanup mode=0555
file path=opt/zfs-tests/tests/functional/rootpool/rootpool_002_neg mode=0555
diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run
index ef84f99f68..ff77d8f1f2 100644
--- a/usr/src/test/zfs-tests/runfiles/delphix.run
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run
@@ -12,6 +12,7 @@
#
# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
# Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
+# Copyright 2018 Joyent, Inc.
#
[DEFAULT]
@@ -515,7 +516,9 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos',
'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos',
'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos',
- 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos']
+ 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos',
+ 'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg',
+ 'reservation_022_pos']
[/opt/zfs-tests/tests/functional/rootpool]
tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run
index 030f4ffa54..ebf446f61a 100644
--- a/usr/src/test/zfs-tests/runfiles/omnios.run
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run
@@ -12,6 +12,7 @@
#
# Copyright (c) 2013, 2017 by Delphix. All rights reserved.
# Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
+# Copyright 2018 Joyent, Inc.
#
[DEFAULT]
@@ -484,7 +485,9 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos',
'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos',
'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos',
- 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos']
+ 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos',
+ 'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg',
+ 'reservation_022_pos']
[/opt/zfs-tests/tests/functional/rootpool]
tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run
index 40f5008319..2d8af0bf69 100644
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run
@@ -12,6 +12,7 @@
#
# Copyright (c) 2012, 2017 by Delphix. All rights reserved.
# Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
+# Copyright 2018 Joyent, Inc.
#
[DEFAULT]
@@ -484,7 +485,9 @@ tests = ['reservation_001_pos', 'reservation_002_pos', 'reservation_003_pos',
'reservation_007_pos', 'reservation_008_pos', 'reservation_009_pos',
'reservation_010_pos', 'reservation_011_pos', 'reservation_012_pos',
'reservation_013_pos', 'reservation_014_pos', 'reservation_015_pos',
- 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos']
+ 'reservation_016_pos', 'reservation_017_pos', 'reservation_018_pos',
+ 'reservation_019_pos', 'reservation_020_pos', 'reservation_021_neg',
+ 'reservation_022_pos']
[/opt/zfs-tests/tests/functional/rootpool]
tests = ['rootpool_002_neg', 'rootpool_003_neg', 'rootpool_007_pos']
diff --git a/usr/src/test/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh b/usr/src/test/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
index c819d664f8..e62124c475 100644
--- a/usr/src/test/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
+++ b/usr/src/test/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
@@ -19,7 +19,7 @@
#
# DESCRIPTION:
-# Discard checkpoint on a stressed pool. Ensure that we can
+# Discard checkpoint on a stressed pool. Ensure that we can
# export and import the pool while discarding but not run any
# operations that have to do with the checkpoint or change the
# pool's config.
@@ -63,6 +63,10 @@ log_onexit test_cleanup
# the current setup the checkpoint space maps should
# have tens of thousands of entries.
#
+# Note: If two-words entries are used in the space
+# map, we should have even more time to
+# verify this.
+#
mdb_ctf_set_int zfs_spa_discard_memory_limit 0t128
log_must zpool checkpoint $NESTEDPOOL
diff --git a/usr/src/test/zfs-tests/tests/functional/reservation/reservation_019_pos.sh b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_019_pos.sh
new file mode 100644
index 0000000000..5c63d63a82
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_019_pos.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/bash -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+
+#
+# DESCRIPTION:
+#
+# A thin provisioned volume can become thick provisioned with 'zfs set
+# refreservation=auto'.
+#
+# STRATEGY:
+# 1) Create a sparse value.
+# 2) Use zfs set refreservation=auto to make it thick provisioned.
+# 3) Verify that refreservation is now the size predicted by
+# volsize_to_reservation().
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ if datasetexists $TESTPOOL/$TESTVOL; then
+ log_must zfs destroy -f $TESTPOOL/$TESTVOL
+ fi
+}
+
+log_onexit cleanup
+
+log_assert "A thin provisioned volume can become thick provisioned with" \
+ "'zfs set refreservation=auto'."
+
+space_avail=$(get_prop available $TESTPOOL)
+(( vol_size = (space_avail / 2) & ~(1024 * 1024 - 1) ))
+
+vol=$TESTPOOL/$TESTVOL
+
+# Create sparse vol and verify
+log_must zfs create -V $vol_size -s $vol
+resv=$(get_prop refreservation $vol)
+log_must test $resv -eq 0
+
+# Set refreservation
+log_must zfs set refreservation=auto $vol
+
+# Verify
+resv=$(get_prop refreservation $vol)
+expected=$(volsize_to_reservation $vol $vol_size)
+log_must test $resv -eq $expected
+
+log_pass "Setting refreservation=auto set refreservation to expected value"
diff --git a/usr/src/test/zfs-tests/tests/functional/reservation/reservation_020_pos.sh b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_020_pos.sh
new file mode 100644
index 0000000000..554f496b07
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_020_pos.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/bash -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+
+#
+# DESCRIPTION:
+#
+# Cloning a thick provisioned volume results in a sparse volume
+#
+# STRATEGY:
+# 1) Create a thick provisioned volume.
+# 2) Snapshot and clone it.
+# 3) Verify that the clone is sparse.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ if datasetexists $TESTPOOL/$TESTVOL; then
+ # Destroy first vol and descendants in one go.
+ log_must zfs destroy -Rf $TESTPOOL/$TESTVOL
+ fi
+}
+
+log_onexit cleanup
+
+log_assert "Cloning a thick provisioned volume results in a sparse volume"
+
+space_avail=$(get_prop available $TESTPOOL)
+(( vol_size = (space_avail / 4) & ~(1024 * 1024 - 1) ))
+
+vol=$TESTPOOL/$TESTVOL
+snap=$vol@clone
+vol2=$TESTPOOL/$TESTVOL2
+
+# Create sparse vol and verify
+log_must zfs create -V $vol_size $vol
+resv=$(get_prop refreservation $vol)
+expected=$(volsize_to_reservation $vol $vol_size)
+log_must test $resv -eq $expected
+
+# Clone it
+log_must zfs snapshot $snap
+log_must zfs clone $snap $vol2
+
+# Verify
+resv=$(get_prop refreservation $vol2)
+log_must test $resv -eq 0
+
+log_pass "Cloning a thick provisioned volume results in a sparse volume"
diff --git a/usr/src/test/zfs-tests/tests/functional/reservation/reservation_021_neg.sh b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_021_neg.sh
new file mode 100644
index 0000000000..249cb6e2ae
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_021_neg.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/bash -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+
+#
+# DESCRIPTION:
+#
+# The use of refreservation=auto on a filesystem does not change the
+# refreservation and results in an error.
+#
+# STRATEGY:
+# 1) Create a filesystem
+# 2) Verify that zfs set refreservation=auto fails without changing
+# refreservation from none.
+# 3) Set refreservation to a valid value.
+# 4) Verify that zfs set refreservation=auto fails without changing
+# refreservation from the previous value.
+#
+
+verify_runnable "both"
+
+fs=$TESTPOOL/$TESTFS/$(basename $0).$$
+
+function cleanup
+{
+ if datasetexists "$fs"; then
+ log_must zfs destroy -f "$fs"
+ fi
+}
+
+log_onexit cleanup
+
+log_assert "refreservation=auto on a filesystem generates an error without" \
+ "changing refreservation"
+
+space_avail=$(get_prop available $TESTPOOL)
+(( fs_size = space_avail / 4 ))
+
+# Create a filesystem with no refreservation
+log_must zfs create $fs
+resv=$(get_prop refreservation $fs)
+log_must test $resv -eq 0
+
+# Verify that refreservation=auto fails without altering refreservation
+log_mustnot zfs set refreservation=auto $fs
+resv=$(get_prop refreservation $fs)
+log_must test $resv -eq 0
+
+# Set refreservation and verify
+log_must zfs set refreservation=$fs_size $fs
+resv=$(get_prop refreservation $fs)
+log_must test $resv -eq $fs_size
+
+# Verify that refreservation=auto fails without altering refreservation
+log_mustnot zfs set refreservation=auto $fs
+resv=$(get_prop refreservation $fs)
+log_must test $resv -eq $fs_size
+
+log_pass "refreservation=auto does not work on filesystems, as expected"
diff --git a/usr/src/test/zfs-tests/tests/functional/reservation/reservation_022_pos.sh b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_022_pos.sh
new file mode 100644
index 0000000000..7909d0e794
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/reservation/reservation_022_pos.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/bash -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+
+#
+# DESCRIPTION:
+#
+# Cloning a volume with -o refreservation=auto creates a thick provisioned
+# volume
+#
+# STRATEGY:
+# 1) Create a sparse volume.
+# 2) Snapshot and clone it, using clone -o refreservation=auto.
+# 3) Verify that the clone has refreservation that matches the size predicted by
+# volsize_to_reservation().
+# 4) Snapshot this second volume and clone it, using clone -o
+# refreservation=auto.
+# 5) Verify that the second clone has refreservation that matches the size
+# predicted by volsize_to_reservation().
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+ if datasetexists $TESTPOOL/$TESTVOL; then
+ # Destroy first vol and descendants in one go.
+ log_must zfs destroy -Rf $TESTPOOL/$TESTVOL
+ fi
+}
+
+log_onexit cleanup
+
+log_assert "Cloning a volume with -o refreservation=auto creates a thick" \
+ "provisioned volume"
+
+space_avail=$(get_prop available $TESTPOOL)
+(( vol_size = (space_avail / 4) & ~(1024 * 1024 - 1) ))
+
+vol=$TESTPOOL/$TESTVOL
+vol2=$TESTPOOL/$TESTVOL2
+vol3=$TESTPOOL/$TESTVOL2-again
+
+# Create sparse vol and verify
+log_must zfs create -s -V $vol_size $vol
+resv=$(get_prop refreservation $vol)
+log_must test $resv -eq 0
+
+# Clone it
+snap=$vol@clone
+log_must zfs snapshot $snap
+log_must zfs clone -o refreservation=auto $snap $vol2
+
+# Verify it is thick provisioned
+resv=$(get_prop refreservation $vol2)
+expected=$(volsize_to_reservation $vol2 $vol_size)
+log_must test $resv -eq $expected
+
+# Clone the thick provisioned volume
+snap=$vol2@clone
+log_must zfs snapshot $snap
+log_must zfs clone -o refreservation=auto $snap $vol3
+
+# Verify new newest clone is also thick provisioned
+resv=$(get_prop refreservation $vol3)
+expected=$(volsize_to_reservation $vol3 $vol_size)
+log_must test $resv -eq $expected
+
+log_pass "Cloning a thick provisioned volume results in a sparse volume"
diff --git a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c b/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c
index 8b292989b9..b01866c9cc 100644
--- a/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c
+++ b/usr/src/uts/common/avs/ns/rdc/rdc_diskq.c
@@ -192,7 +192,7 @@ rdc_open_diskq(rdc_k_info_t *krdc)
mutex_enter(&grp->diskqmutex);
mutexheld++;
- if (&urdc->disk_queue[0] == '\0') {
+ if (urdc->disk_queue[0] == '\0') {
goto fail;
}
@@ -606,7 +606,7 @@ rdc_read_diskq_header(rdc_k_info_t *krdc)
(void) snprintf(buf, NSC_MAXPATH, "%s:%s", urdc->secondary.intf,
&urdc->secondary.intf[0]);
cmn_err(CE_WARN, "!Disk Queue Header read failed for %s",
- &urdc->group_name[0] == '\0' ? buf:
+ urdc->group_name[0] == '\0' ? buf:
&urdc->group_name[0]);
return (-1);
}
@@ -626,7 +626,7 @@ rdc_read_diskq_header(rdc_k_info_t *krdc)
(void) snprintf(buf, NSC_MAXPATH, "%s:%s", urdc->secondary.intf,
&urdc->secondary.file[0]);
cmn_err(CE_WARN, "!Disk Queue Header read failed(%d) for %s",
- rc, &urdc->group_name[0] == '\0' ? buf :
+ rc, urdc->group_name[0] == '\0' ? buf :
&urdc->group_name[0]);
return (-1);
}
diff --git a/usr/src/uts/common/c2/audit_start.c b/usr/src/uts/common/c2/audit_start.c
index f82bd21e58..1f31d7dec9 100644
--- a/usr/src/uts/common/c2/audit_start.c
+++ b/usr/src/uts/common/c2/audit_start.c
@@ -175,7 +175,7 @@ audit_start(
/* get basic event for system call */
tad->tad_event = audit_s2e[scid].au_event;
- if (audit_s2e[scid].au_init != (au_event_t)AUE_NULL) {
+ if (audit_s2e[scid].au_init != (au_event_t (*)(au_event_t))NULL) {
/* get specific event */
tad->tad_event = (*audit_s2e[scid].au_init)(tad->tad_event);
}
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 88d81c9540..82ca2d6cbf 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -2096,17 +2096,6 @@ metaslab_group_preload(metaslab_group_t *mg)
*
* 3. The on-disk size of the space map should actually decrease.
*
- * Checking the first condition is tricky since we don't want to walk
- * the entire AVL tree calculating the estimated on-disk size. Instead we
- * use the size-ordered range tree in the metaslab and calculate the
- * size required to write out the largest segment in our free tree. If the
- * size required to represent that segment on disk is larger than the space
- * map object then we avoid condensing this map.
- *
- * To determine the second criterion we use a best-case estimate and assume
- * each segment can be represented on-disk as a single 64-bit entry. We refer
- * to this best-case estimate as the space map's minimal form.
- *
* Unfortunately, we cannot compute the on-disk size of the space map in this
* context because we cannot accurately compute the effects of compression, etc.
* Instead, we apply the heuristic described in the block comment for
@@ -2117,9 +2106,6 @@ static boolean_t
metaslab_should_condense(metaslab_t *msp)
{
space_map_t *sm = msp->ms_sm;
- range_seg_t *rs;
- uint64_t size, entries, segsz, object_size, optimal_size, record_size;
- dmu_object_info_t doi;
vdev_t *vd = msp->ms_group->mg_vd;
uint64_t vdev_blocksize = 1 << vd->vdev_ashift;
uint64_t current_txg = spa_syncing_txg(vd->vdev_spa);
@@ -2148,34 +2134,22 @@ metaslab_should_condense(metaslab_t *msp)
msp->ms_condense_checked_txg = current_txg;
/*
- * Use the ms_allocatable_by_size range tree, which is ordered by
- * size, to obtain the largest segment in the free tree. We always
- * condense metaslabs that are empty and metaslabs for which a
- * condense request has been made.
+ * We always condense metaslabs that are empty and metaslabs for
+ * which a condense request has been made.
*/
- rs = avl_last(&msp->ms_allocatable_by_size);
- if (rs == NULL || msp->ms_condense_wanted)
+ if (avl_is_empty(&msp->ms_allocatable_by_size) ||
+ msp->ms_condense_wanted)
return (B_TRUE);
- /*
- * Calculate the number of 64-bit entries this segment would
- * require when written to disk. If this single segment would be
- * larger on-disk than the entire current on-disk structure, then
- * clearly condensing will increase the on-disk structure size.
- */
- size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
- entries = size / (MIN(size, SM_RUN_MAX));
- segsz = entries * sizeof (uint64_t);
-
- optimal_size =
- sizeof (uint64_t) * avl_numnodes(&msp->ms_allocatable->rt_root);
- object_size = space_map_length(msp->ms_sm);
+ uint64_t object_size = space_map_length(msp->ms_sm);
+ uint64_t optimal_size = space_map_estimate_optimal_size(sm,
+ msp->ms_allocatable, SM_NO_VDEVID);
+ dmu_object_info_t doi;
dmu_object_info_from_db(sm->sm_dbuf, &doi);
- record_size = MAX(doi.doi_data_block_size, vdev_blocksize);
+ uint64_t record_size = MAX(doi.doi_data_block_size, vdev_blocksize);
- return (segsz <= object_size &&
- object_size >= (optimal_size * zfs_condense_pct / 100) &&
+ return (object_size >= (optimal_size * zfs_condense_pct / 100) &&
object_size > zfs_metaslab_condense_block_threshold * record_size);
}
@@ -2250,11 +2224,11 @@ metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
* optimal, this is typically close to optimal, and much cheaper to
* compute.
*/
- space_map_write(sm, condense_tree, SM_ALLOC, tx);
+ space_map_write(sm, condense_tree, SM_ALLOC, SM_NO_VDEVID, tx);
range_tree_vacate(condense_tree, NULL, NULL);
range_tree_destroy(condense_tree);
- space_map_write(sm, msp->ms_allocatable, SM_FREE, tx);
+ space_map_write(sm, msp->ms_allocatable, SM_FREE, SM_NO_VDEVID, tx);
mutex_enter(&msp->ms_lock);
msp->ms_condensing = B_FALSE;
}
@@ -2366,8 +2340,10 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
metaslab_condense(msp, txg, tx);
} else {
mutex_exit(&msp->ms_lock);
- space_map_write(msp->ms_sm, alloctree, SM_ALLOC, tx);
- space_map_write(msp->ms_sm, msp->ms_freeing, SM_FREE, tx);
+ space_map_write(msp->ms_sm, alloctree, SM_ALLOC,
+ SM_NO_VDEVID, tx);
+ space_map_write(msp->ms_sm, msp->ms_freeing, SM_FREE,
+ SM_NO_VDEVID, tx);
mutex_enter(&msp->ms_lock);
}
@@ -2382,7 +2358,7 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
*/
mutex_exit(&msp->ms_lock);
space_map_write(vd->vdev_checkpoint_sm,
- msp->ms_checkpointing, SM_FREE, tx);
+ msp->ms_checkpointing, SM_FREE, SM_NO_VDEVID, tx);
mutex_enter(&msp->ms_lock);
space_map_update(vd->vdev_checkpoint_sm);
diff --git a/usr/src/uts/common/fs/zfs/range_tree.c b/usr/src/uts/common/fs/zfs/range_tree.c
index f0bcaf5280..7c6ce90e18 100644
--- a/usr/src/uts/common/fs/zfs/range_tree.c
+++ b/usr/src/uts/common/fs/zfs/range_tree.c
@@ -179,7 +179,7 @@ range_tree_add(void *arg, uint64_t start, uint64_t size)
}
/* Make sure we don't overlap with either of our neighbors */
- VERIFY(rs == NULL);
+ VERIFY3P(rs, ==, NULL);
rs_before = avl_nearest(&rt->rt_root, where, AVL_BEFORE);
rs_after = avl_nearest(&rt->rt_root, where, AVL_AFTER);
diff --git a/usr/src/uts/common/fs/zfs/spa_checkpoint.c b/usr/src/uts/common/fs/zfs/spa_checkpoint.c
index a4af48d8c5..db0d2caa61 100644
--- a/usr/src/uts/common/fs/zfs/spa_checkpoint.c
+++ b/usr/src/uts/common/fs/zfs/spa_checkpoint.c
@@ -203,13 +203,12 @@ typedef struct spa_checkpoint_discard_sync_callback_arg {
} spa_checkpoint_discard_sync_callback_arg_t;
static int
-spa_checkpoint_discard_sync_callback(maptype_t type, uint64_t offset,
- uint64_t size, void *arg)
+spa_checkpoint_discard_sync_callback(space_map_entry_t *sme, void *arg)
{
spa_checkpoint_discard_sync_callback_arg_t *sdc = arg;
vdev_t *vd = sdc->sdc_vd;
- metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- uint64_t end = offset + size;
+ metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
+ uint64_t end = sme->sme_offset + sme->sme_run;
if (sdc->sdc_entry_limit == 0)
return (EINTR);
@@ -224,8 +223,8 @@ spa_checkpoint_discard_sync_callback(maptype_t type, uint64_t offset,
* metaslab boundaries. So if needed we could add code
* that handles metaslab-crossing segments in the future.
*/
- VERIFY3U(type, ==, SM_FREE);
- VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(sme->sme_type, ==, SM_FREE);
+ VERIFY3U(sme->sme_offset, >=, ms->ms_start);
VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
/*
@@ -237,14 +236,15 @@ spa_checkpoint_discard_sync_callback(maptype_t type, uint64_t offset,
mutex_enter(&ms->ms_lock);
if (range_tree_is_empty(ms->ms_freeing))
vdev_dirty(vd, VDD_METASLAB, ms, sdc->sdc_txg);
- range_tree_add(ms->ms_freeing, offset, size);
+ range_tree_add(ms->ms_freeing, sme->sme_offset, sme->sme_run);
mutex_exit(&ms->ms_lock);
- ASSERT3U(vd->vdev_spa->spa_checkpoint_info.sci_dspace, >=, size);
- ASSERT3U(vd->vdev_stat.vs_checkpoint_space, >=, size);
+ ASSERT3U(vd->vdev_spa->spa_checkpoint_info.sci_dspace, >=,
+ sme->sme_run);
+ ASSERT3U(vd->vdev_stat.vs_checkpoint_space, >=, sme->sme_run);
- vd->vdev_spa->spa_checkpoint_info.sci_dspace -= size;
- vd->vdev_stat.vs_checkpoint_space -= size;
+ vd->vdev_spa->spa_checkpoint_info.sci_dspace -= sme->sme_run;
+ vd->vdev_stat.vs_checkpoint_space -= sme->sme_run;
sdc->sdc_entry_limit--;
return (0);
@@ -289,12 +289,13 @@ spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
* Thus, we set the maximum entries that the space map callback
* will be applied to be half the entries that could fit in the
* imposed memory limit.
+ *
+ * Note that since this is a conservative estimate we also
+ * assume the worst case scenario in our computation where each
+ * entry is two-word.
*/
uint64_t max_entry_limit =
- (zfs_spa_discard_memory_limit / sizeof (uint64_t)) >> 1;
-
- uint64_t entries_in_sm =
- space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t);
+ (zfs_spa_discard_memory_limit / (2 * sizeof (uint64_t))) >> 1;
/*
* Iterate from the end of the space map towards the beginning,
@@ -318,14 +319,15 @@ spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
spa_checkpoint_discard_sync_callback_arg_t sdc;
sdc.sdc_vd = vd;
sdc.sdc_txg = tx->tx_txg;
- sdc.sdc_entry_limit = MIN(entries_in_sm, max_entry_limit);
+ sdc.sdc_entry_limit = max_entry_limit;
- uint64_t entries_before = entries_in_sm;
+ uint64_t words_before =
+ space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t);
error = space_map_incremental_destroy(vd->vdev_checkpoint_sm,
spa_checkpoint_discard_sync_callback, &sdc, tx);
- uint64_t entries_after =
+ uint64_t words_after =
space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t);
#ifdef DEBUG
@@ -333,9 +335,9 @@ spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
#endif
zfs_dbgmsg("discarding checkpoint: txg %llu, vdev id %d, "
- "deleted %llu entries - %llu entries are left",
- tx->tx_txg, vd->vdev_id, (entries_before - entries_after),
- entries_after);
+ "deleted %llu words - %llu words are left",
+ tx->tx_txg, vd->vdev_id, (words_before - words_after),
+ words_after);
if (error != EINTR) {
if (error != 0) {
@@ -344,15 +346,15 @@ spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
"space map of vdev %llu\n",
error, vd->vdev_id);
}
- ASSERT0(entries_after);
+ ASSERT0(words_after);
ASSERT0(vd->vdev_checkpoint_sm->sm_alloc);
- ASSERT0(vd->vdev_checkpoint_sm->sm_length);
+ ASSERT0(space_map_length(vd->vdev_checkpoint_sm));
space_map_free(vd->vdev_checkpoint_sm, tx);
space_map_close(vd->vdev_checkpoint_sm);
vd->vdev_checkpoint_sm = NULL;
- VERIFY0(zap_remove(vd->vdev_spa->spa_meta_objset,
+ VERIFY0(zap_remove(spa_meta_objset(vd->vdev_spa),
vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, tx));
}
}
diff --git a/usr/src/uts/common/fs/zfs/space_map.c b/usr/src/uts/common/fs/zfs/space_map.c
index 989daea941..b42d449c77 100644
--- a/usr/src/uts/common/fs/zfs/space_map.c
+++ b/usr/src/uts/common/fs/zfs/space_map.c
@@ -41,68 +41,194 @@
* Note on space map block size:
*
* The data for a given space map can be kept on blocks of any size.
- * Larger blocks entail fewer i/o operations, but they also cause the
- * DMU to keep more data in-core, and also to waste more i/o bandwidth
+ * Larger blocks entail fewer I/O operations, but they also cause the
+ * DMU to keep more data in-core, and also to waste more I/O bandwidth
* when only a few blocks have changed since the last transaction group.
*/
/*
+ * Enabled whenever we want to stress test the use of double-word
+ * space map entries.
+ */
+boolean_t zfs_force_some_double_word_sm_entries = B_FALSE;
+
+boolean_t
+sm_entry_is_debug(uint64_t e)
+{
+ return (SM_PREFIX_DECODE(e) == SM_DEBUG_PREFIX);
+}
+
+boolean_t
+sm_entry_is_single_word(uint64_t e)
+{
+ uint8_t prefix = SM_PREFIX_DECODE(e);
+ return (prefix != SM_DEBUG_PREFIX && prefix != SM2_PREFIX);
+}
+
+boolean_t
+sm_entry_is_double_word(uint64_t e)
+{
+ return (SM_PREFIX_DECODE(e) == SM2_PREFIX);
+}
+
+/*
* Iterate through the space map, invoking the callback on each (non-debug)
* space map entry.
*/
int
space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
{
- uint64_t *entry, *entry_map, *entry_map_end;
- uint64_t bufsize, size, offset, end;
+ uint64_t sm_len = space_map_length(sm);
+ ASSERT3U(sm->sm_blksz, !=, 0);
+
+ dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, sm_len,
+ ZIO_PRIORITY_SYNC_READ);
+
+ uint64_t blksz = sm->sm_blksz;
int error = 0;
+ for (uint64_t block_base = 0; block_base < sm_len && error == 0;
+ block_base += blksz) {
+ dmu_buf_t *db;
+ error = dmu_buf_hold(sm->sm_os, space_map_object(sm),
+ block_base, FTAG, &db, DMU_READ_PREFETCH);
+ if (error != 0)
+ return (error);
- end = space_map_length(sm);
+ uint64_t *block_start = db->db_data;
+ uint64_t block_length = MIN(sm_len - block_base, blksz);
+ uint64_t *block_end = block_start +
+ (block_length / sizeof (uint64_t));
- bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
- entry_map = zio_buf_alloc(bufsize);
+ VERIFY0(P2PHASE(block_length, sizeof (uint64_t)));
+ VERIFY3U(block_length, !=, 0);
+ ASSERT3U(blksz, ==, db->db_size);
- if (end > bufsize) {
- dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
- end - bufsize, ZIO_PRIORITY_SYNC_READ);
- }
+ for (uint64_t *block_cursor = block_start;
+ block_cursor < block_end && error == 0; block_cursor++) {
+ uint64_t e = *block_cursor;
- for (offset = 0; offset < end && error == 0; offset += bufsize) {
- size = MIN(end - offset, bufsize);
- VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
- VERIFY(size != 0);
- ASSERT3U(sm->sm_blksz, !=, 0);
+ if (sm_entry_is_debug(e)) /* Skip debug entries */
+ continue;
- dprintf("object=%llu offset=%llx size=%llx\n",
- space_map_object(sm), offset, size);
+ uint64_t raw_offset, raw_run, vdev_id;
+ maptype_t type;
+ if (sm_entry_is_single_word(e)) {
+ type = SM_TYPE_DECODE(e);
+ vdev_id = SM_NO_VDEVID;
+ raw_offset = SM_OFFSET_DECODE(e);
+ raw_run = SM_RUN_DECODE(e);
+ } else {
+ /* it is a two-word entry */
+ ASSERT(sm_entry_is_double_word(e));
+ raw_run = SM2_RUN_DECODE(e);
+ vdev_id = SM2_VDEV_DECODE(e);
+
+ /* move on to the second word */
+ block_cursor++;
+ e = *block_cursor;
+ VERIFY3P(block_cursor, <=, block_end);
+
+ type = SM2_TYPE_DECODE(e);
+ raw_offset = SM2_OFFSET_DECODE(e);
+ }
- error = dmu_read(sm->sm_os, space_map_object(sm), offset, size,
- entry_map, DMU_READ_PREFETCH);
- if (error != 0)
- break;
+ uint64_t entry_offset = (raw_offset << sm->sm_shift) +
+ sm->sm_start;
+ uint64_t entry_run = raw_run << sm->sm_shift;
- entry_map_end = entry_map + (size / sizeof (uint64_t));
- for (entry = entry_map; entry < entry_map_end && error == 0;
- entry++) {
- uint64_t e = *entry;
- uint64_t offset, size;
+ VERIFY0(P2PHASE(entry_offset, 1ULL << sm->sm_shift));
+ VERIFY0(P2PHASE(entry_run, 1ULL << sm->sm_shift));
+ ASSERT3U(entry_offset, >=, sm->sm_start);
+ ASSERT3U(entry_offset, <, sm->sm_start + sm->sm_size);
+ ASSERT3U(entry_run, <=, sm->sm_size);
+ ASSERT3U(entry_offset + entry_run, <=,
+ sm->sm_start + sm->sm_size);
- if (SM_DEBUG_DECODE(e)) /* Skip debug entries */
- continue;
+ space_map_entry_t sme = {
+ .sme_type = type,
+ .sme_vdev = vdev_id,
+ .sme_offset = entry_offset,
+ .sme_run = entry_run
+ };
+ error = callback(&sme, arg);
+ }
+ dmu_buf_rele(db, FTAG);
+ }
+ return (error);
+}
- offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
- sm->sm_start;
- size = SM_RUN_DECODE(e) << sm->sm_shift;
+/*
+ * Reads the entries from the last block of the space map into
+ * buf in reverse order. Populates nwords with number of words
+ * in the last block.
+ *
+ * Refer to block comment within space_map_incremental_destroy()
+ * to understand why this function is needed.
+ */
+static int
+space_map_reversed_last_block_entries(space_map_t *sm, uint64_t *buf,
+ uint64_t bufsz, uint64_t *nwords)
+{
+ int error = 0;
+ dmu_buf_t *db;
- VERIFY0(P2PHASE(offset, 1ULL << sm->sm_shift));
- VERIFY0(P2PHASE(size, 1ULL << sm->sm_shift));
- VERIFY3U(offset, >=, sm->sm_start);
- VERIFY3U(offset + size, <=, sm->sm_start + sm->sm_size);
- error = callback(SM_TYPE_DECODE(e), offset, size, arg);
+ /*
+ * Find the offset of the last word in the space map and use
+ * that to read the last block of the space map with
+ * dmu_buf_hold().
+ */
+ uint64_t last_word_offset =
+ sm->sm_phys->smp_objsize - sizeof (uint64_t);
+ error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset,
+ FTAG, &db, DMU_READ_NO_PREFETCH);
+ if (error != 0)
+ return (error);
+
+ ASSERT3U(sm->sm_object, ==, db->db_object);
+ ASSERT3U(sm->sm_blksz, ==, db->db_size);
+ ASSERT3U(bufsz, >=, db->db_size);
+ ASSERT(nwords != NULL);
+
+ uint64_t *words = db->db_data;
+ *nwords =
+ (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
+
+ ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t));
+
+ uint64_t n = *nwords;
+ uint64_t j = n - 1;
+ for (uint64_t i = 0; i < n; i++) {
+ uint64_t entry = words[i];
+ if (sm_entry_is_double_word(entry)) {
+ /*
+ * Since we are populating the buffer backwards
+ * we have to be extra careful and add the two
+ * words of the double-word entry in the right
+ * order.
+ */
+ ASSERT3U(j, >, 0);
+ buf[j - 1] = entry;
+
+ i++;
+ ASSERT3U(i, <, n);
+ entry = words[i];
+ buf[j] = entry;
+ j -= 2;
+ } else {
+ ASSERT(sm_entry_is_debug(entry) ||
+ sm_entry_is_single_word(entry));
+ buf[j] = entry;
+ j--;
}
}
- zio_buf_free(entry_map, bufsize);
+ /*
+ * Assert that we wrote backwards all the
+ * way to the beginning of the buffer.
+ */
+ ASSERT3S(j, ==, -1);
+
+ dmu_buf_rele(db, FTAG);
return (error);
}
@@ -116,124 +242,122 @@ int
space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
dmu_tx_t *tx)
{
- uint64_t bufsize, len;
- uint64_t *entry_map;
- int error = 0;
-
- len = space_map_length(sm);
- bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
- entry_map = zio_buf_alloc(bufsize);
+ uint64_t bufsz = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
+ uint64_t *buf = zio_buf_alloc(bufsz);
dmu_buf_will_dirty(sm->sm_dbuf, tx);
/*
- * Since we can't move the starting offset of the space map
- * (e.g there are reference on-disk pointing to it), we destroy
- * its entries incrementally starting from the end.
+ * Ideally we would want to iterate from the beginning of the
+ * space map to the end in incremental steps. The issue with this
+ * approach is that we don't have any field on-disk that points
+ * us where to start between each step. We could try zeroing out
+ * entries that we've destroyed, but this doesn't work either as
+ * an entry that is 0 is a valid one (ALLOC for range [0x0:0x200]).
+ *
+ * As a result, we destroy its entries incrementally starting from
+ * the end after applying the callback to each of them.
*
- * The logic that follows is basically the same as the one used
- * in space_map_iterate() but it traverses the space map
- * backwards:
+ * The problem with this approach is that we cannot literally
+ * iterate through the words in the space map backwards as we
+ * can't distinguish two-word space map entries from their second
+ * word. Thus we do the following:
*
- * 1] We figure out the size of the buffer that we want to use
- * to read the on-disk space map entries.
- * 2] We figure out the offset at the end of the space map where
- * we will start reading entries into our buffer.
- * 3] We read the on-disk entries into the buffer.
- * 4] We iterate over the entries from end to beginning calling
- * the callback function on each one. As we move from entry
- * to entry we decrease the size of the space map, deleting
- * effectively each entry.
- * 5] If there are no more entries in the space map or the
- * callback returns a value other than 0, we stop iterating
- * over the space map. If there are entries remaining and
- * the callback returned zero we go back to step [1].
+ * 1] We get all the entries from the last block of the space map
+ * and put them into a buffer in reverse order. This way the
+ * last entry comes first in the buffer, the second to last is
+ * second, etc.
+ * 2] We iterate through the entries in the buffer and we apply
+ * the callback to each one. As we move from entry to entry we
+ * we decrease the size of the space map, deleting effectively
+ * each entry.
+ * 3] If there are no more entries in the space map or the callback
+ * returns a value other than 0, we stop iterating over the
+ * space map. If there are entries remaining and the callback
+ * returned 0, we go back to step [1].
*/
- uint64_t offset = 0, size = 0;
- while (len > 0 && error == 0) {
- size = MIN(bufsize, len);
-
- VERIFY(P2PHASE(size, sizeof (uint64_t)) == 0);
- VERIFY3U(size, >, 0);
- ASSERT3U(sm->sm_blksz, !=, 0);
-
- offset = len - size;
-
- IMPLY(bufsize > len, offset == 0);
- IMPLY(bufsize == len, offset == 0);
- IMPLY(bufsize < len, offset > 0);
-
-
- EQUIV(size == len, offset == 0);
- IMPLY(size < len, bufsize < len);
-
- dprintf("object=%llu offset=%llx size=%llx\n",
- space_map_object(sm), offset, size);
-
- error = dmu_read(sm->sm_os, space_map_object(sm),
- offset, size, entry_map, DMU_READ_PREFETCH);
+ int error = 0;
+ while (space_map_length(sm) > 0 && error == 0) {
+ uint64_t nwords = 0;
+ error = space_map_reversed_last_block_entries(sm, buf, bufsz,
+ &nwords);
if (error != 0)
break;
- uint64_t num_entries = size / sizeof (uint64_t);
-
- ASSERT3U(num_entries, >, 0);
+ ASSERT3U(nwords, <=, bufsz / sizeof (uint64_t));
- while (num_entries > 0) {
- uint64_t e, entry_offset, entry_size;
- maptype_t type;
+ for (uint64_t i = 0; i < nwords; i++) {
+ uint64_t e = buf[i];
- e = entry_map[num_entries - 1];
-
- ASSERT3U(num_entries, >, 0);
- ASSERT0(error);
-
- if (SM_DEBUG_DECODE(e)) {
+ if (sm_entry_is_debug(e)) {
sm->sm_phys->smp_objsize -= sizeof (uint64_t);
space_map_update(sm);
- len -= sizeof (uint64_t);
- num_entries--;
continue;
}
- type = SM_TYPE_DECODE(e);
- entry_offset = (SM_OFFSET_DECODE(e) << sm->sm_shift) +
- sm->sm_start;
- entry_size = SM_RUN_DECODE(e) << sm->sm_shift;
+ int words = 1;
+ uint64_t raw_offset, raw_run, vdev_id;
+ maptype_t type;
+ if (sm_entry_is_single_word(e)) {
+ type = SM_TYPE_DECODE(e);
+ vdev_id = SM_NO_VDEVID;
+ raw_offset = SM_OFFSET_DECODE(e);
+ raw_run = SM_RUN_DECODE(e);
+ } else {
+ ASSERT(sm_entry_is_double_word(e));
+ words = 2;
+
+ raw_run = SM2_RUN_DECODE(e);
+ vdev_id = SM2_VDEV_DECODE(e);
+
+ /* move to the second word */
+ i++;
+ e = buf[i];
+
+ ASSERT3P(i, <=, nwords);
+
+ type = SM2_TYPE_DECODE(e);
+ raw_offset = SM2_OFFSET_DECODE(e);
+ }
+
+ uint64_t entry_offset =
+ (raw_offset << sm->sm_shift) + sm->sm_start;
+ uint64_t entry_run = raw_run << sm->sm_shift;
VERIFY0(P2PHASE(entry_offset, 1ULL << sm->sm_shift));
- VERIFY0(P2PHASE(entry_size, 1ULL << sm->sm_shift));
+ VERIFY0(P2PHASE(entry_run, 1ULL << sm->sm_shift));
VERIFY3U(entry_offset, >=, sm->sm_start);
- VERIFY3U(entry_offset + entry_size, <=,
+ VERIFY3U(entry_offset, <, sm->sm_start + sm->sm_size);
+ VERIFY3U(entry_run, <=, sm->sm_size);
+ VERIFY3U(entry_offset + entry_run, <=,
sm->sm_start + sm->sm_size);
- error = callback(type, entry_offset, entry_size, arg);
+ space_map_entry_t sme = {
+ .sme_type = type,
+ .sme_vdev = vdev_id,
+ .sme_offset = entry_offset,
+ .sme_run = entry_run
+ };
+ error = callback(&sme, arg);
if (error != 0)
break;
if (type == SM_ALLOC)
- sm->sm_phys->smp_alloc -= entry_size;
+ sm->sm_phys->smp_alloc -= entry_run;
else
- sm->sm_phys->smp_alloc += entry_size;
-
- sm->sm_phys->smp_objsize -= sizeof (uint64_t);
+ sm->sm_phys->smp_alloc += entry_run;
+ sm->sm_phys->smp_objsize -= words * sizeof (uint64_t);
space_map_update(sm);
- len -= sizeof (uint64_t);
- num_entries--;
}
- IMPLY(error == 0, num_entries == 0);
- EQUIV(offset == 0 && error == 0, len == 0 && num_entries == 0);
}
- if (len == 0) {
+ if (space_map_length(sm) == 0) {
ASSERT0(error);
- ASSERT0(offset);
- ASSERT0(sm->sm_length);
ASSERT0(sm->sm_phys->smp_objsize);
ASSERT0(sm->sm_alloc);
}
- zio_buf_free(entry_map, bufsize);
+ zio_buf_free(buf, bufsz);
return (error);
}
@@ -244,16 +368,15 @@ typedef struct space_map_load_arg {
} space_map_load_arg_t;
static int
-space_map_load_callback(maptype_t type, uint64_t offset, uint64_t size,
- void *arg)
+space_map_load_callback(space_map_entry_t *sme, void *arg)
{
space_map_load_arg_t *smla = arg;
- if (type == smla->smla_type) {
- VERIFY3U(range_tree_space(smla->smla_rt) + size, <=,
+ if (sme->sme_type == smla->smla_type) {
+ VERIFY3U(range_tree_space(smla->smla_rt) + sme->sme_run, <=,
smla->smla_sm->sm_size);
- range_tree_add(smla->smla_rt, offset, size);
+ range_tree_add(smla->smla_rt, sme->sme_offset, sme->sme_run);
} else {
- range_tree_remove(smla->smla_rt, offset, size);
+ range_tree_remove(smla->smla_rt, sme->sme_offset, sme->sme_run);
}
return (0);
@@ -365,43 +488,239 @@ space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
}
}
-uint64_t
-space_map_entries(space_map_t *sm, range_tree_t *rt)
+static void
+space_map_write_intro_debug(space_map_t *sm, maptype_t maptype, dmu_tx_t *tx)
{
- avl_tree_t *t = &rt->rt_root;
- range_seg_t *rs;
- uint64_t size, entries;
+ dmu_buf_will_dirty(sm->sm_dbuf, tx);
+
+ uint64_t dentry = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
+ SM_DEBUG_ACTION_ENCODE(maptype) |
+ SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) |
+ SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
+
+ dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_objsize,
+ sizeof (dentry), &dentry, tx);
+
+ sm->sm_phys->smp_objsize += sizeof (dentry);
+}
+
+/*
+ * Writes one or more entries given a segment.
+ *
+ * Note: The function may release the dbuf from the pointer initially
+ * passed to it, and return a different dbuf. Also, the space map's
+ * dbuf must be dirty for the changes in sm_phys to take effect.
+ */
+static void
+space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
+ uint64_t vdev_id, uint8_t words, dmu_buf_t **dbp, void *tag, dmu_tx_t *tx)
+{
+ ASSERT3U(words, !=, 0);
+ ASSERT3U(words, <=, 2);
+
+ /* ensure the vdev_id can be represented by the space map */
+ ASSERT3U(vdev_id, <=, SM_NO_VDEVID);
+
+ /*
+ * if this is a single word entry, ensure that no vdev was
+ * specified.
+ */
+ IMPLY(words == 1, vdev_id == SM_NO_VDEVID);
+
+ dmu_buf_t *db = *dbp;
+ ASSERT3U(db->db_size, ==, sm->sm_blksz);
+
+ uint64_t *block_base = db->db_data;
+ uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t));
+ uint64_t *block_cursor = block_base +
+ (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
+
+ ASSERT3P(block_cursor, <=, block_end);
+
+ uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
+ uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
+ uint64_t run_max = (words == 2) ? SM2_RUN_MAX : SM_RUN_MAX;
+
+ ASSERT3U(rs->rs_start, >=, sm->sm_start);
+ ASSERT3U(rs->rs_start, <, sm->sm_start + sm->sm_size);
+ ASSERT3U(rs->rs_end - rs->rs_start, <=, sm->sm_size);
+ ASSERT3U(rs->rs_end, <=, sm->sm_start + sm->sm_size);
+
+ while (size != 0) {
+ ASSERT3P(block_cursor, <=, block_end);
+
+ /*
+ * If we are at the end of this block, flush it and start
+ * writing again from the beginning.
+ */
+ if (block_cursor == block_end) {
+ dmu_buf_rele(db, tag);
+ uint64_t next_word_offset = sm->sm_phys->smp_objsize;
+ VERIFY0(dmu_buf_hold(sm->sm_os,
+ space_map_object(sm), next_word_offset,
+ tag, &db, DMU_READ_PREFETCH));
+ dmu_buf_will_dirty(db, tx);
+
+ /* update caller's dbuf */
+ *dbp = db;
+
+ ASSERT3U(db->db_size, ==, sm->sm_blksz);
+
+ block_base = db->db_data;
+ block_cursor = block_base;
+ block_end = block_base +
+ (db->db_size / sizeof (uint64_t));
+ }
+
+ /*
+ * If we are writing a two-word entry and we only have one
+ * word left on this block, just pad it with an empty debug
+ * entry and write the two-word entry in the next block.
+ */
+ uint64_t *next_entry = block_cursor + 1;
+ if (next_entry == block_end && words > 1) {
+ ASSERT3U(words, ==, 2);
+ *block_cursor = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
+ SM_DEBUG_ACTION_ENCODE(0) |
+ SM_DEBUG_SYNCPASS_ENCODE(0) |
+ SM_DEBUG_TXG_ENCODE(0);
+ block_cursor++;
+ sm->sm_phys->smp_objsize += sizeof (uint64_t);
+ ASSERT3P(block_cursor, ==, block_end);
+ continue;
+ }
+
+ uint64_t run_len = MIN(size, run_max);
+ switch (words) {
+ case 1:
+ *block_cursor = SM_OFFSET_ENCODE(start) |
+ SM_TYPE_ENCODE(maptype) |
+ SM_RUN_ENCODE(run_len);
+ block_cursor++;
+ break;
+ case 2:
+ /* write the first word of the entry */
+ *block_cursor = SM_PREFIX_ENCODE(SM2_PREFIX) |
+ SM2_RUN_ENCODE(run_len) |
+ SM2_VDEV_ENCODE(vdev_id);
+ block_cursor++;
+
+ /* move on to the second word of the entry */
+ ASSERT3P(block_cursor, <, block_end);
+ *block_cursor = SM2_TYPE_ENCODE(maptype) |
+ SM2_OFFSET_ENCODE(start);
+ block_cursor++;
+ break;
+ default:
+ panic("%d-word space map entries are not supported",
+ words);
+ break;
+ }
+ sm->sm_phys->smp_objsize += words * sizeof (uint64_t);
+
+ start += run_len;
+ size -= run_len;
+ }
+ ASSERT0(size);
+
+}
+
+/*
+ * Note: The space map's dbuf must be dirty for the changes in sm_phys to
+ * take effect.
+ */
+static void
+space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
+ uint64_t vdev_id, dmu_tx_t *tx)
+{
+ spa_t *spa = tx->tx_pool->dp_spa;
+ dmu_buf_t *db;
+
+ space_map_write_intro_debug(sm, maptype, tx);
+
+#ifdef DEBUG
/*
- * All space_maps always have a debug entry so account for it here.
+ * We do this right after we write the intro debug entry
+ * because the estimate does not take it into account.
*/
- entries = 1;
+ uint64_t initial_objsize = sm->sm_phys->smp_objsize;
+ uint64_t estimated_growth =
+ space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID);
+ uint64_t estimated_final_objsize = initial_objsize + estimated_growth;
+#endif
/*
- * Traverse the range tree and calculate the number of space map
- * entries that would be required to write out the range tree.
+ * Find the offset right after the last word in the space map
+ * and use that to get a hold of the last block, so we can
+ * start appending to it.
*/
- for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
- size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
- entries += howmany(size, SM_RUN_MAX);
+ uint64_t next_word_offset = sm->sm_phys->smp_objsize;
+ VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm),
+ next_word_offset, FTAG, &db, DMU_READ_PREFETCH));
+ ASSERT3U(db->db_size, ==, sm->sm_blksz);
+
+ dmu_buf_will_dirty(db, tx);
+
+ avl_tree_t *t = &rt->rt_root;
+ for (range_seg_t *rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
+ uint64_t offset = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
+ uint64_t length = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
+ uint8_t words = 1;
+
+ /*
+ * We only write two-word entries when both of the following
+ * are true:
+ *
+ * [1] The feature is enabled.
+ * [2] The offset or run is too big for a single-word entry,
+ * or the vdev_id is set (meaning not equal to
+ * SM_NO_VDEVID).
+ *
+ * Note that for purposes of testing we've added the case that
+ * we write two-word entries occasionally when the feature is
+ * enabled and zfs_force_some_double_word_sm_entries has been
+ * set.
+ */
+ if (spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_V2) &&
+ (offset >= (1ULL << SM_OFFSET_BITS) ||
+ length > SM_RUN_MAX ||
+ vdev_id != SM_NO_VDEVID ||
+ (zfs_force_some_double_word_sm_entries &&
+ spa_get_random(100) == 0)))
+ words = 2;
+
+ space_map_write_seg(sm, rs, maptype, vdev_id, words,
+ &db, FTAG, tx);
}
- return (entries);
+
+ dmu_buf_rele(db, FTAG);
+
+#ifdef DEBUG
+ /*
+ * We expect our estimation to be based on the worst case
+ * scenario [see comment in space_map_estimate_optimal_size()].
+ * Therefore we expect the actual objsize to be equal or less
+ * than whatever we estimated it to be.
+ */
+ ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_objsize);
+#endif
}
+/*
+ * Note: This function manipulates the state of the given space map but
+ * does not hold any locks implicitly. Thus the caller is responsible
+ * for synchronizing writes to the space map.
+ */
void
space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
- dmu_tx_t *tx)
+ uint64_t vdev_id, dmu_tx_t *tx)
{
objset_t *os = sm->sm_os;
- spa_t *spa = dmu_objset_spa(os);
- avl_tree_t *t = &rt->rt_root;
- range_seg_t *rs;
- uint64_t size, total, rt_space, nodes;
- uint64_t *entry, *entry_map, *entry_map_end;
- uint64_t expected_entries, actual_entries = 1;
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
VERIFY3U(space_map_object(sm), !=, 0);
+
dmu_buf_will_dirty(sm->sm_dbuf, tx);
/*
@@ -421,58 +740,10 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
else
sm->sm_phys->smp_alloc -= range_tree_space(rt);
- expected_entries = space_map_entries(sm, rt);
-
- entry_map = zio_buf_alloc(sm->sm_blksz);
- entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
- entry = entry_map;
-
- *entry++ = SM_DEBUG_ENCODE(1) |
- SM_DEBUG_ACTION_ENCODE(maptype) |
- SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
- SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
-
- total = 0;
- nodes = avl_numnodes(&rt->rt_root);
- rt_space = range_tree_space(rt);
- for (rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
- uint64_t start;
-
- size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
- start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
+ uint64_t nodes = avl_numnodes(&rt->rt_root);
+ uint64_t rt_space = range_tree_space(rt);
- total += size << sm->sm_shift;
-
- while (size != 0) {
- uint64_t run_len;
-
- run_len = MIN(size, SM_RUN_MAX);
-
- if (entry == entry_map_end) {
- dmu_write(os, space_map_object(sm),
- sm->sm_phys->smp_objsize, sm->sm_blksz,
- entry_map, tx);
- sm->sm_phys->smp_objsize += sm->sm_blksz;
- entry = entry_map;
- }
-
- *entry++ = SM_OFFSET_ENCODE(start) |
- SM_TYPE_ENCODE(maptype) |
- SM_RUN_ENCODE(run_len);
-
- start += run_len;
- size -= run_len;
- actual_entries++;
- }
- }
-
- if (entry != entry_map) {
- size = (entry - entry_map) * sizeof (uint64_t);
- dmu_write(os, space_map_object(sm), sm->sm_phys->smp_objsize,
- size, entry_map, tx);
- sm->sm_phys->smp_objsize += size;
- }
- ASSERT3U(expected_entries, ==, actual_entries);
+ space_map_write_impl(sm, rt, maptype, vdev_id, tx);
/*
* Ensure that the space_map's accounting wasn't changed
@@ -480,9 +751,6 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
*/
VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
VERIFY3U(range_tree_space(rt), ==, rt_space);
- VERIFY3U(range_tree_space(rt), ==, total);
-
- zio_buf_free(entry_map, sm->sm_blksz);
}
static int
@@ -524,7 +792,6 @@ space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
space_map_close(sm);
return (error);
}
-
*smp = sm;
return (0);
@@ -656,6 +923,133 @@ space_map_free(space_map_t *sm, dmu_tx_t *tx)
sm->sm_object = 0;
}
+/*
+ * Given a range tree, it makes a worst-case estimate of how much
+ * space would the tree's segments take if they were written to
+ * the given space map.
+ */
+uint64_t
+space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt,
+ uint64_t vdev_id)
+{
+ spa_t *spa = dmu_objset_spa(sm->sm_os);
+ uint64_t shift = sm->sm_shift;
+ uint64_t *histogram = rt->rt_histogram;
+ uint64_t entries_for_seg = 0;
+
+ /*
+ * In order to get a quick estimate of the optimal size that this
+ * range tree would have on-disk as a space map, we iterate through
+ * its histogram buckets instead of iterating through its nodes.
+ *
+ * Note that this is a highest-bound/worst-case estimate for the
+ * following reasons:
+ *
+ * 1] We assume that we always add a debug padding for each block
+ * we write and we also assume that we start at the last word
+ * of a block attempting to write a two-word entry.
+ * 2] Rounding up errors due to the way segments are distributed
+ * in the buckets of the range tree's histogram.
+ * 3] The activation of zfs_force_some_double_word_sm_entries
+ * (tunable) when testing.
+ *
+ * = Math and Rounding Errors =
+ *
+ * rt_histogram[i] bucket of a range tree represents the number
+ * of entries in [2^i, (2^(i+1))-1] of that range_tree. Given
+ * that, we want to divide the buckets into groups: Buckets that
+ * can be represented using a single-word entry, ones that can
+ * be represented with a double-word entry, and ones that can
+ * only be represented with multiple two-word entries.
+ *
+ * [Note that if the new encoding feature is not enabled there
+ * are only two groups: single-word entry buckets and multiple
+ * single-word entry buckets. The information below assumes
+ * two-word entries enabled, but it can easily applied when
+ * the feature is not enabled]
+ *
+ * To find the highest bucket that can be represented with a
+ * single-word entry we look at the maximum run that such entry
+ * can have, which is 2^(SM_RUN_BITS + sm_shift) [remember that
+ * the run of a space map entry is shifted by sm_shift, thus we
+ * add it to the exponent]. This way, excluding the value of the
+ * maximum run that can be represented by a single-word entry,
+ * all runs that are smaller exist in buckets 0 to
+ * SM_RUN_BITS + shift - 1.
+ *
+ * To find the highest bucket that can be represented with a
+ * double-word entry, we follow the same approach. Finally, any
+ * bucket higher than that are represented with multiple two-word
+ * entries. To be more specific, if the highest bucket whose
+ * segments can be represented with a single two-word entry is X,
+ * then bucket X+1 will need 2 two-word entries for each of its
+ * segments, X+2 will need 4, X+3 will need 8, ...etc.
+ *
+ * With all of the above we make our estimation based on bucket
+ * groups. There is a rounding error though. As we mentioned in
+ * the example with the one-word entry, the maximum run that can
+ * be represented in a one-word entry 2^(SM_RUN_BITS + shift) is
+ * not part of bucket SM_RUN_BITS + shift - 1. Thus, segments of
+ * that length fall into the next bucket (and bucket group) where
+ * we start counting two-word entries and this is one more reason
+ * why the estimated size may end up being bigger than the actual
+ * size written.
+ */
+ uint64_t size = 0;
+ uint64_t idx = 0;
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2) ||
+ (vdev_id == SM_NO_VDEVID && sm->sm_size < SM_OFFSET_MAX)) {
+
+ /*
+ * If we are trying to force some double word entries just
+ * assume the worst-case of every single word entry being
+ * written as a double word entry.
+ */
+ uint64_t entry_size =
+ (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2) &&
+ zfs_force_some_double_word_sm_entries) ?
+ (2 * sizeof (uint64_t)) : sizeof (uint64_t);
+
+ uint64_t single_entry_max_bucket = SM_RUN_BITS + shift - 1;
+ for (; idx <= single_entry_max_bucket; idx++)
+ size += histogram[idx] * entry_size;
+
+ if (!spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2)) {
+ for (; idx < RANGE_TREE_HISTOGRAM_SIZE; idx++) {
+ ASSERT3U(idx, >=, single_entry_max_bucket);
+ entries_for_seg =
+ 1ULL << (idx - single_entry_max_bucket);
+ size += histogram[idx] *
+ entries_for_seg * entry_size;
+ }
+ return (size);
+ }
+ }
+
+ ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2));
+
+ uint64_t double_entry_max_bucket = SM2_RUN_BITS + shift - 1;
+ for (; idx <= double_entry_max_bucket; idx++)
+ size += histogram[idx] * 2 * sizeof (uint64_t);
+
+ for (; idx < RANGE_TREE_HISTOGRAM_SIZE; idx++) {
+ ASSERT3U(idx, >=, double_entry_max_bucket);
+ entries_for_seg = 1ULL << (idx - double_entry_max_bucket);
+ size += histogram[idx] *
+ entries_for_seg * 2 * sizeof (uint64_t);
+ }
+
+ /*
+ * Assume the worst case where we start with the padding at the end
+ * of the current block and we add an extra padding entry at the end
+ * of all subsequent blocks.
+ */
+ size += ((size / sm->sm_blksz) + 1) * sizeof (uint64_t);
+
+ return (size);
+}
+
uint64_t
space_map_object(space_map_t *sm)
{
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 936d805389..b2d85079d3 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -138,6 +138,7 @@ _NOTE(CONSTCOND) } while (0)
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
#define SPA_COMPRESSBITS 7
+#define SPA_VDEVBITS 24
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
@@ -168,15 +169,15 @@ typedef struct zio_cksum_salt {
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 0 | vdev1 | GRID | ASIZE |
+ * 0 | pad | vdev1 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 1 |G| offset1 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 2 | vdev2 | GRID | ASIZE |
+ * 2 | pad | vdev2 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 3 |G| offset2 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
- * 4 | vdev3 | GRID | ASIZE |
+ * 4 | pad | vdev3 | GRID | ASIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@@ -355,8 +356,9 @@ typedef struct blkptr {
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
-#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, 32)
-#define DVA_SET_VDEV(dva, x) BF64_SET((dva)->dva_word[0], 32, 32, x)
+#define DVA_GET_VDEV(dva) BF64_GET((dva)->dva_word[0], 32, SPA_VDEVBITS)
+#define DVA_SET_VDEV(dva, x) \
+ BF64_SET((dva)->dva_word[0], 32, SPA_VDEVBITS, x)
#define DVA_GET_OFFSET(dva) \
BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
diff --git a/usr/src/uts/common/fs/zfs/sys/space_map.h b/usr/src/uts/common/fs/zfs/sys/space_map.h
index 98b87269cb..d3d852978a 100644
--- a/usr/src/uts/common/fs/zfs/sys/space_map.h
+++ b/usr/src/uts/common/fs/zfs/sys/space_map.h
@@ -93,50 +93,100 @@ typedef struct space_map {
/*
* debug entry
*
- * 1 3 10 50
- * ,---+--------+------------+---------------------------------.
- * | 1 | action | syncpass | txg (lower bits) |
- * `---+--------+------------+---------------------------------'
- * 63 62 60 59 50 49 0
+ * 2 2 10 50
+ * +-----+-----+------------+----------------------------------+
+ * | 1 0 | act | syncpass | txg (lower bits) |
+ * +-----+-----+------------+----------------------------------+
+ * 63 62 61 60 59 50 49 0
*
*
- * non-debug entry
+ * one-word entry
*
* 1 47 1 15
- * ,-----------------------------------------------------------.
+ * +-----------------------------------------------------------+
* | 0 | offset (sm_shift units) | type | run |
- * `-----------------------------------------------------------'
- * 63 62 17 16 15 0
+ * +-----------------------------------------------------------+
+ * 63 62 16 15 14 0
+ *
+ *
+ * two-word entry
+ *
+ * 2 2 36 24
+ * +-----+-----+---------------------------+-------------------+
+ * | 1 1 | pad | run | vdev |
+ * +-----+-----+---------------------------+-------------------+
+ * 63 62 61 60 59 24 23 0
+ *
+ * 1 63
+ * +------+----------------------------------------------------+
+ * | type | offset |
+ * +------+----------------------------------------------------+
+ * 63 62 0
+ *
+ * Note that a two-word entry will not strandle a block boundary.
+ * If necessary, the last word of a block will be padded with a
+ * debug entry (with act = syncpass = txg = 0).
*/
-/* All this stuff takes and returns bytes */
-#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, 15) + 1)
-#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, 15)
-#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
-#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
-#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, 47)
-#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, 47)
-#define SM_DEBUG_DECODE(x) BF64_DECODE(x, 63, 1)
-#define SM_DEBUG_ENCODE(x) BF64_ENCODE(x, 63, 1)
+typedef enum {
+ SM_ALLOC,
+ SM_FREE
+} maptype_t;
+
+typedef struct space_map_entry {
+ maptype_t sme_type;
+ uint32_t sme_vdev; /* max is 2^24-1; SM_NO_VDEVID if not present */
+ uint64_t sme_offset; /* max is 2^63-1; units of sm_shift */
+ uint64_t sme_run; /* max is 2^36; units of sm_shift */
+} space_map_entry_t;
+
+#define SM_NO_VDEVID (1 << SPA_VDEVBITS)
-#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 3)
-#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 3)
+/* one-word entry constants */
+#define SM_DEBUG_PREFIX 2
+#define SM_OFFSET_BITS 47
+#define SM_RUN_BITS 15
+/* two-word entry constants */
+#define SM2_PREFIX 3
+#define SM2_OFFSET_BITS 63
+#define SM2_RUN_BITS 36
+
+#define SM_PREFIX_DECODE(x) BF64_DECODE(x, 62, 2)
+#define SM_PREFIX_ENCODE(x) BF64_ENCODE(x, 62, 2)
+
+#define SM_DEBUG_ACTION_DECODE(x) BF64_DECODE(x, 60, 2)
+#define SM_DEBUG_ACTION_ENCODE(x) BF64_ENCODE(x, 60, 2)
#define SM_DEBUG_SYNCPASS_DECODE(x) BF64_DECODE(x, 50, 10)
#define SM_DEBUG_SYNCPASS_ENCODE(x) BF64_ENCODE(x, 50, 10)
-
#define SM_DEBUG_TXG_DECODE(x) BF64_DECODE(x, 0, 50)
#define SM_DEBUG_TXG_ENCODE(x) BF64_ENCODE(x, 0, 50)
-#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
-
-typedef enum {
- SM_ALLOC,
- SM_FREE
-} maptype_t;
-
-typedef int (*sm_cb_t)(maptype_t type, uint64_t offset, uint64_t size,
- void *arg);
+#define SM_OFFSET_DECODE(x) BF64_DECODE(x, 16, SM_OFFSET_BITS)
+#define SM_OFFSET_ENCODE(x) BF64_ENCODE(x, 16, SM_OFFSET_BITS)
+#define SM_TYPE_DECODE(x) BF64_DECODE(x, 15, 1)
+#define SM_TYPE_ENCODE(x) BF64_ENCODE(x, 15, 1)
+#define SM_RUN_DECODE(x) (BF64_DECODE(x, 0, SM_RUN_BITS) + 1)
+#define SM_RUN_ENCODE(x) BF64_ENCODE((x) - 1, 0, SM_RUN_BITS)
+#define SM_RUN_MAX SM_RUN_DECODE(~0ULL)
+#define SM_OFFSET_MAX SM_OFFSET_DECODE(~0ULL)
+
+#define SM2_RUN_DECODE(x) (BF64_DECODE(x, SPA_VDEVBITS, SM2_RUN_BITS) + 1)
+#define SM2_RUN_ENCODE(x) BF64_ENCODE((x) - 1, SPA_VDEVBITS, SM2_RUN_BITS)
+#define SM2_VDEV_DECODE(x) BF64_DECODE(x, 0, SPA_VDEVBITS)
+#define SM2_VDEV_ENCODE(x) BF64_ENCODE(x, 0, SPA_VDEVBITS)
+#define SM2_TYPE_DECODE(x) BF64_DECODE(x, SM2_OFFSET_BITS, 1)
+#define SM2_TYPE_ENCODE(x) BF64_ENCODE(x, SM2_OFFSET_BITS, 1)
+#define SM2_OFFSET_DECODE(x) BF64_DECODE(x, 0, SM2_OFFSET_BITS)
+#define SM2_OFFSET_ENCODE(x) BF64_ENCODE(x, 0, SM2_OFFSET_BITS)
+#define SM2_RUN_MAX SM2_RUN_DECODE(~0ULL)
+#define SM2_OFFSET_MAX SM2_OFFSET_DECODE(~0ULL)
+
+boolean_t sm_entry_is_debug(uint64_t e);
+boolean_t sm_entry_is_single_word(uint64_t e);
+boolean_t sm_entry_is_double_word(uint64_t e);
+
+typedef int (*sm_cb_t)(space_map_entry_t *sme, void *arg);
int space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype);
int space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg);
@@ -154,7 +204,9 @@ uint64_t space_map_allocated(space_map_t *sm);
uint64_t space_map_length(space_map_t *sm);
void space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
- dmu_tx_t *tx);
+ uint64_t vdev_id, dmu_tx_t *tx);
+uint64_t space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt,
+ uint64_t vdev_id);
void space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx);
uint64_t space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx);
void space_map_free(space_map_t *sm, dmu_tx_t *tx);
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index e23e4a01c1..ae7f4b501c 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -2459,7 +2459,7 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
mutex_exit(&vd->vdev_dtl_lock);
space_map_truncate(vd->vdev_dtl_sm, vdev_dtl_sm_blksz, tx);
- space_map_write(vd->vdev_dtl_sm, rtsync, SM_ALLOC, tx);
+ space_map_write(vd->vdev_dtl_sm, rtsync, SM_ALLOC, SM_NO_VDEVID, tx);
range_tree_vacate(rtsync, NULL, NULL);
range_tree_destroy(rtsync);
diff --git a/usr/src/uts/common/fs/zfs/vdev_indirect.c b/usr/src/uts/common/fs/zfs/vdev_indirect.c
index 988e216871..304453aa94 100644
--- a/usr/src/uts/common/fs/zfs/vdev_indirect.c
+++ b/usr/src/uts/common/fs/zfs/vdev_indirect.c
@@ -728,7 +728,7 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx)
space_map_object(vd->vdev_obsolete_sm));
space_map_write(vd->vdev_obsolete_sm,
- vd->vdev_obsolete_segments, SM_ALLOC, tx);
+ vd->vdev_obsolete_segments, SM_ALLOC, SM_NO_VDEVID, tx);
space_map_update(vd->vdev_obsolete_sm);
range_tree_vacate(vd->vdev_obsolete_segments, NULL, NULL);
}
diff --git a/usr/src/uts/common/fs/zfs/vdev_indirect_mapping.c b/usr/src/uts/common/fs/zfs/vdev_indirect_mapping.c
index ea80fbc473..02999aae72 100644
--- a/usr/src/uts/common/fs/zfs/vdev_indirect_mapping.c
+++ b/usr/src/uts/common/fs/zfs/vdev_indirect_mapping.c
@@ -14,7 +14,7 @@
*/
/*
- * Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2015, 2017 by Delphix. All rights reserved.
*/
#include <sys/dmu_tx.h>
@@ -536,14 +536,13 @@ typedef struct load_obsolete_space_map_arg {
} load_obsolete_space_map_arg_t;
static int
-load_obsolete_sm_callback(maptype_t type, uint64_t offset, uint64_t size,
- void *arg)
+load_obsolete_sm_callback(space_map_entry_t *sme, void *arg)
{
load_obsolete_space_map_arg_t *losma = arg;
- ASSERT3S(type, ==, SM_ALLOC);
+ ASSERT3S(sme->sme_type, ==, SM_ALLOC);
vdev_indirect_mapping_increment_obsolete_count(losma->losma_vim,
- offset, size, losma->losma_counts);
+ sme->sme_offset, sme->sme_run, losma->losma_counts);
return (0);
}
diff --git a/usr/src/uts/common/inet/ipf/ip_log.c b/usr/src/uts/common/inet/ipf/ip_log.c
index 193d9024b4..584ee42d9a 100644
--- a/usr/src/uts/common/inet/ipf/ip_log.c
+++ b/usr/src/uts/common/inet/ipf/ip_log.c
@@ -31,8 +31,13 @@
# include <osreldate.h>
# endif
#endif
-#ifndef SOLARIS
-# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#include <sys/errno.h>
#include <sys/types.h>
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h
index 2522c4031e..f624ffd953 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h
@@ -28,14 +28,19 @@
#if defined(_KERNEL) || defined(KERNEL) || defined(__KERNEL__)
# undef KERNEL
# undef _KERNEL
-# undef __KERNEL__
+# undef __KERNEL__
# define KERNEL
# define _KERNEL
-# define __KERNEL__
+# define __KERNEL__
#endif
-#ifndef SOLARIS
-#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#if SOLARIS2 >= 8
# ifndef USE_INET6
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
index 2df1a0b115..90fc701ae1 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_fil.h
@@ -17,8 +17,13 @@
#include "netinet/ip_compat.h"
#include <sys/zone.h>
-#ifndef SOLARIS
-# define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#ifndef __P
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_nat.h b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h
index 000ee90dce..d118cdf354 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_nat.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_nat.h
@@ -13,8 +13,13 @@
#ifndef __IP_NAT_H__
#define __IP_NAT_H__
-#ifndef SOLARIS
-#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
diff --git a/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h
index 7c490673e0..95b7eb56d2 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_proxy.h
@@ -9,13 +9,16 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifndef __IP_PROXY_H__
#define __IP_PROXY_H__
-#ifndef SOLARIS
-#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51)
diff --git a/usr/src/uts/common/inet/ipf/opts.h b/usr/src/uts/common/inet/ipf/opts.h
index a3cec03aba..3579f9f8d0 100644
--- a/usr/src/uts/common/inet/ipf/opts.h
+++ b/usr/src/uts/common/inet/ipf/opts.h
@@ -12,8 +12,13 @@
#ifndef __OPTS_H__
#define __OPTS_H__
-#ifndef SOLARIS
-#define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#ifdef SOLARIS
+#undef SOLARIS
+#endif
+#if (defined(sun) && (defined(__svr4__) || defined(__SVR4)))
+#define SOLARIS (1)
+#else
+#define SOLARIS (0)
#endif
#define OPT_REMOVE 0x000001
#define OPT_DEBUG 0x000002
diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c
index c6b919de67..e78c3735ad 100644
--- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c
+++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c
@@ -862,7 +862,7 @@ ibnex_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
break;
}
- if ((apid_n = ibnex_get_apid(dcp)) == '\0') {
+ if ((apid_n = ibnex_get_apid(dcp)) == NULL) {
IBTF_DPRINTF_L2("ibnex",
"%s: ibnex_get_apid failed", msg);
rv = EIO;
@@ -970,7 +970,7 @@ ibnex_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
break;
}
- if ((apid_n = ibnex_get_apid(dcp)) == '\0') {
+ if ((apid_n = ibnex_get_apid(dcp)) == NULL) {
IBTF_DPRINTF_L2("ibnex",
"%s: ibnex_get_apid failed", msg);
rv = EIO;
diff --git a/usr/src/uts/common/io/mii/mii.c b/usr/src/uts/common/io/mii/mii.c
index bfff2a52e8..b024899783 100644
--- a/usr/src/uts/common/io/mii/mii.c
+++ b/usr/src/uts/common/io/mii/mii.c
@@ -61,7 +61,7 @@ enum {
MII_ELOOP,
};
-static const char *mii_errors[] = {
+static const char * const mii_errors[] = {
"",
"Failure resetting PHY.",
"Failure starting PHY.",
@@ -71,7 +71,7 @@ static const char *mii_errors[] = {
};
/* Indexed by XCVR_ type */
-static const const char *mii_xcvr_types[] = {
+static const char * const mii_xcvr_types[] = {
"Undefined",
"Unknown",
"10 Mbps",
diff --git a/usr/src/uts/common/io/nxge/npi/npi_mac.c b/usr/src/uts/common/io/nxge/npi/npi_mac.c
index d196bc80a3..8d808577f9 100644
--- a/usr/src/uts/common/io/nxge/npi/npi_mac.c
+++ b/usr/src/uts/common/io/nxge/npi/npi_mac.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <npi_mac.h>
#define MIF_DELAY 500
@@ -1292,6 +1290,7 @@ npi_mac_port_attr(npi_handle_t handle, io_op_t op, uint8_t portn,
" npi_mac_port_attr" "MAC_PORT_IPG:"
" Invalid Input: portn <%d>",
portn));
+ /* FALLTHROUGH */
default:
return (NPI_FAILURE | NPI_MAC_PORT_INVALID(portn));
}
@@ -1390,6 +1389,7 @@ npi_mac_port_attr(npi_handle_t handle, io_op_t op, uint8_t portn,
" MAC_PORT_IPG:"
" Invalid Input: portn <%d>",
portn));
+ /* FALLTHROUGH */
default:
return (NPI_FAILURE | NPI_MAC_PORT_INVALID(portn));
}
diff --git a/usr/src/uts/common/io/nxge/npi/npi_vir.c b/usr/src/uts/common/io/nxge/npi/npi_vir.c
index 017f64d108..6c20963309 100644
--- a/usr/src/uts/common/io/nxge/npi/npi_vir.c
+++ b/usr/src/uts/common/io/nxge/npi/npi_vir.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <npi_vir.h>
/* One register only */
@@ -1052,6 +1050,7 @@ npi_ldsv_ld_get(npi_handle_t handle, uint8_t ldg, uint8_t ld,
case VECTOR2:
offset = LDSV2_REG + LDSV_OFFSET(ldg);
+ break;
default:
NPI_ERROR_MSG((handle.function, NPI_ERR_CTL, "npi_ldsv_get"
diff --git a/usr/src/uts/common/io/nxge/nxge_zcp.c b/usr/src/uts/common/io/nxge/nxge_zcp.c
index 7fd9a3ad3c..a56efd7ea8 100644
--- a/usr/src/uts/common/io/nxge/nxge_zcp.c
+++ b/usr/src/uts/common/io/nxge/nxge_zcp.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <nxge_impl.h>
#include <nxge_zcp.h>
#include <nxge_ipp.h>
@@ -358,6 +356,7 @@ nxge_zcp_inject_err(p_nxge_t nxgep, uint32_t err_id)
break;
}
}
+ /* FALLTHROUGH */
default:
if (err_id == NXGE_FM_EREPORT_ZCP_RRFIFO_OVERRUN)
diff --git a/usr/src/uts/intel/io/dnet/dnet.c b/usr/src/uts/intel/io/dnet/dnet.c
index a6badb9b4b..e1592ad902 100644
--- a/usr/src/uts/intel/io/dnet/dnet.c
+++ b/usr/src/uts/intel/io/dnet/dnet.c
@@ -3099,6 +3099,7 @@ check_srom_valid(uchar_t *vi)
crc ^= 7;
}
}
+ /* FALLTHROUGH */
default:
return (0);