diff options
author | Dan McDonald <danmcd@joyent.com> | 2021-02-05 00:21:19 -0500 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2021-02-05 00:21:21 -0500 |
commit | 4d320273bce348b033c83d1742ce873c351c16d4 (patch) | |
tree | 3334438c3d25e918c9b2761ece90ef6b820ea62e /usr/src | |
parent | fc716b10604146af1eb12bfb4f10a04ab2946fd4 (diff) | |
parent | 3c2328bf3bf6527c6b28445336d32183a277b1e1 (diff) | |
download | illumos-joyent-4d320273bce348b033c83d1742ce873c351c16d4.tar.gz |
[illumos-gate merge]
commit 3c2328bf3bf6527c6b28445336d32183a277b1e1
13471 zoneadmd should infer zone.max-processes
13472 Man pages missing the max-processes resource control
commit 6d177d7a4a2c4b126a6f5ecc7a226e1dcac383ed
13390 clean up libdlpi warning gags
commit f915e6d33fea11e547f382c9f44614aa99739ba5
13467 Update tzdata to 2021a
commit 602e131c272200d35797cbad3554783e0c0fdb04
13493 usba: multiply-defined symbols
commit f6ef42236c2f60c5f16c454c9b574a4dc35e1cab
13092 ZFS I/O pipeline should use the pageout_reserve pool
commit 2d9166aee5315081107056f3d663e4adee4c1c2a
13097 improve VM tunables for modern systems
13107 pageout should use hrtime instead of lbolt
commit 3770e1fa2763b08d66814eeb974a28a87198fee1
13490 4133 missed Makefile.asthdr
commit 475b46fedca96b852acd24d70191d29fb444a008
13486 cmd/ast should use fewer FRC dependencies
commit 6960cd891105f9a002a0327e31a6182f9c6de88e
13379 bhyve upstream sync 2020 Dec
Conflicts:
git add usr/src/uts/common/os/vm_pageout.c
git add usr/src/cmd/zoneadmd/vplat.c
git add usr/src/man/man5/resource_controls.5
vm_pageout.c conflict resolution by Andy Fiddaman <omnios@citrus-it.co.uk>
Diffstat (limited to 'usr/src')
70 files changed, 4672 insertions, 2036 deletions
diff --git a/usr/src/cmd/ast/ksh/builtins/Makefile b/usr/src/cmd/ast/ksh/builtins/Makefile index 8ae2eb1b67..db529aef57 100644 --- a/usr/src/cmd/ast/ksh/builtins/Makefile +++ b/usr/src/cmd/ast/ksh/builtins/Makefile @@ -108,12 +108,9 @@ $(XPG4SH:%=$(ROOTXPG4BIN)/%): $(ALIASPROG:%=$(ROOTBIN)/%): $(ROOTPROG) $(RM) $@; $(LN) $(ROOTPROG) $@ -.KEEP_STATE: - # Set common AST build flags (e.g., needed to support the math stuff). include ../../Makefile.ast - ASTSRC= $(C_AST)/src/cmd/ksh93 OBJECTS= \ diff --git a/usr/src/cmd/ast/libast/Makefile.com b/usr/src/cmd/ast/libast/Makefile.com index 862c2f8e67..0a29efde91 100644 --- a/usr/src/cmd/ast/libast/Makefile.com +++ b/usr/src/cmd/ast/libast/Makefile.com @@ -96,10 +96,12 @@ CERRWARN += -_gcc=-Wno-address SMATCH= off +.KEEP_STATE: + all: install_h mkpicdirs .WAIT $(LIBS) mkpicdirs: - mkdir -p $(LOBJDIRS:%=pics/%) + @mkdir -p $(LOBJDIRS:%=pics/%) include $(SRC)/lib/Makefile.targ @@ -110,7 +112,16 @@ pics/%.o: $(ASTSRC)/%.c ###################################################################### # Header file generation -ast/prototyped.h: FRC +ast/%:= FILEMODE= 0644 + +# The HEADERGEN headers are generated from the corresponding FEATURE/ file +# with any ast_ prefix removed. +$(HEADERGEN:%=ast/%): $(FEATURES:%=FEATURE/%) + src=`echo $(@F:%.h=%) | sed 's/^ast_//'`; \ + $(AST_PROTO) FEATURE/$$src > $@ + $(POST_PROCESS_AST) $@ + +ast/prototyped.h: $(AST_TOOLS)/proto $(MKDIR) -p $(@D) $(AST_TOOLS)/proto -f /dev/null > $@ @@ -122,24 +133,25 @@ ast/ast_common.h: ast/prototyped.h ast/lc.h: lc.h $(AST_PROTO) lc.h > ast/lc.h -# The HEADERGEN headers are generated from the corresponding FEATURE/ file -# with any ast_ prefix removed. -$(HEADERGEN:%=ast/%): FRC - src=`echo $(@F:%.h=%) | sed 's/^ast_//'`; \ - $(AST_PROTO) FEATURE/$$src > $@ +ast/%.h: $(ASTSRC)/include/%.h + $(INS.file) $(POST_PROCESS_AST) $@ -$(HEADERSRC:%=ast/%): FRC - src=$(@F) ;\ - [[ $$src = ast_namval.h ]] && src=namval.h ;\ - for d in include $(LOBJDIRS) std; do \ - if [[ -f $(ASTSRC)/$$d/$$src ]]; then \ - echo "$$d/$$src -> $@"; \ - cp $(ASTSRC)/$$d/$$src $@; \ - $(POST_PROCESS_AST) $@; \ - break; \ - fi; \ - done +ast/%.h: $(ASTSRC)/comp/%.h + $(INS.file) + $(POST_PROCESS_AST) $@ + +ast/%.h: $(ASTSRC)/cdt/%.h + $(INS.file) + $(POST_PROCESS_AST) $@ + +ast/%.h: $(ASTSRC)/std/%.h + $(INS.file) + $(POST_PROCESS_AST) $@ + +ast/ast_namval.h: $(ASTSRC)/include/namval.h + $(CP) $(ASTSRC)/include/namval.h $@ + $(POST_PROCESS_AST) $@ CLOBBERFILES += ast_common.h t.c CLOBBERFILES += ast/* diff --git a/usr/src/cmd/ast/libcmd/Makefile.com b/usr/src/cmd/ast/libcmd/Makefile.com index 1b75255ba0..484fa737c7 100644 --- a/usr/src/cmd/ast/libcmd/Makefile.com +++ b/usr/src/cmd/ast/libcmd/Makefile.com @@ -77,6 +77,8 @@ CERRWARN += -_gcc=-Wno-implicit-function-declaration # not linted SMATCH=off +.KEEP_STATE: + all: install_h .WAIT $(LIBS) include $(SRC)/lib/Makefile.targ @@ -88,12 +90,12 @@ pics/%.o: $(ASTSRC)/%.c ###################################################################### # Header file generation -$(HEADERSRC:%=ast/%): FRC +$(HEADERSRC:%=ast/%): $(HEADERSRC:%=$(ASTSRC)/%) $(MKDIR) -p $(@D) $(CP) $(ASTSRC)/$(@F) $@ # This rule is derived from $(CONTRIB)/ast/src/lib/libcmd/Makefile -ast/cmdext.h: +ast/cmdext.h: $(OBJECTS:%.o=$(ASTSRC)/%.c) $(MKDIR) -p $(@D) echo '#include <shcmd.h>' > $@.tmp $(SED) \ diff --git a/usr/src/cmd/ast/libdll/Makefile.com b/usr/src/cmd/ast/libdll/Makefile.com index b6164335cc..94cc76115f 100644 --- a/usr/src/cmd/ast/libdll/Makefile.com +++ b/usr/src/cmd/ast/libdll/Makefile.com @@ -72,6 +72,8 @@ CERRWARN += $(CNOWARN_UNINIT) # needs work SMOFF += all_func_returns,strcpy_overflow +.KEEP_STATE: + all: install_h .WAIT $(LIBS) include $(SRC)/lib/Makefile.targ @@ -83,7 +85,7 @@ pics/%.o: $(ASTSRC)/%.c ###################################################################### # Header file generation -$(HEADERGEN:%=ast/%): FRC +$(HEADERGEN:%=ast/%): $(FEATURES:%=FEATURE/%) $(MKDIR) -p $(@D) src=`echo $(@F:%.h=%) | sed 's/^ast_//'`; \ [[ $$src = dlldefs ]] && src=dll; \ diff --git a/usr/src/cmd/ast/libpp/Makefile.com b/usr/src/cmd/ast/libpp/Makefile.com index 6140425eec..dc9f564d1a 100644 --- a/usr/src/cmd/ast/libpp/Makefile.com +++ b/usr/src/cmd/ast/libpp/Makefile.com @@ -72,6 +72,8 @@ CERRWARN += -_gcc=-Wno-unused-value # "pplex() parse error: turning off implications after 60 seconds" SMATCH = off +.KEEP_STATE: + all: $(LIBS) include $(SRC)/lib/Makefile.targ diff --git a/usr/src/cmd/ast/libshell/Makefile.com b/usr/src/cmd/ast/libshell/Makefile.com index be07424b73..5608eb9e3a 100644 --- a/usr/src/cmd/ast/libshell/Makefile.com +++ b/usr/src/cmd/ast/libshell/Makefile.com @@ -70,10 +70,12 @@ CERRWARN += -_gcc=-Wno-char-subscripts # smatch gets out of memory on common/sh/macro.c SMATCH= off +.KEEP_STATE: + all: install_h mkpicdirs .WAIT $(LIBS) mkpicdirs: - mkdir -p $(LOBJDIRS:%=pics/%) + @mkdir -p $(LOBJDIRS:%=pics/%) include $(SRC)/lib/Makefile.targ @@ -84,7 +86,7 @@ pics/%.o: $(ASTSRC)/%.c ###################################################################### # Header file generation -$(HEADERSRC:%=ast/%): FRC +$(HEADERSRC:%=ast/%): $(HEADERSRC:%=$(ASTSRC)/include/%) $(MKDIR) -p $(@D) $(CP) $(ASTSRC)/include/$(@F) $@ diff --git a/usr/src/cmd/ast/libsum/Makefile.com b/usr/src/cmd/ast/libsum/Makefile.com index 0195c0d862..d7446cba8c 100644 --- a/usr/src/cmd/ast/libsum/Makefile.com +++ b/usr/src/cmd/ast/libsum/Makefile.com @@ -65,6 +65,8 @@ CERRWARN += -_gcc=-Wno-parentheses SMOFF += all_func_returns +.KEEP_STATE: + # This codepath is performance-critical sparc_COPTFLAG = -xO5 -_cc=-xprefetch=auto,explicit sparcv9_COPTFLAG = $(sparc_COPTFLAG) @@ -82,7 +84,7 @@ pics/%.o: $(ASTSRC)/%.c ###################################################################### # Header file generation -$(HEADERSRC:%=ast/%): FRC +$(HEADERSRC:%=ast/%): $(HEADERSRC:%=$(ASTSRC)/%) $(MKDIR) -p $(@D) $(CP) $(ASTSRC)/$(@F) $@ @@ -94,5 +96,3 @@ _feature: FRC $(MAKE) -f Makefile.iffe generate include ../../Makefile.astmsg - -FRC: diff --git a/usr/src/cmd/ast/tools/Makefile b/usr/src/cmd/ast/tools/Makefile index 69408eeaea..fafe494dc8 100644 --- a/usr/src/cmd/ast/tools/Makefile +++ b/usr/src/cmd/ast/tools/Makefile @@ -13,6 +13,8 @@ # Copyright 2021 OmniOS Community Edition (OmniOSce) Association. # +SHELL= /usr/bin/ksh93 + CTOOLS= mamake ratz release proto lcgen SHTOOLS= iffe mamprobe mprobe package gentab @@ -32,7 +34,7 @@ _msg _feature: clean clobber: $(RM) $(OBJS) $(TOOLS) -package: FRC +package: $(RM) $@ # The string returned by 'package' must match the value used by # AT&T upstream, which is "i386" or "sun4", regardless of whether @@ -85,6 +87,5 @@ CERRWARN += -_gcc=-Wno-unused-value CERRWARN += $(CNOWARN_UNINIT) SMATCH= off +.KEEP_STATE: .PARALLEL: $(TOOLS) - -FRC: diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile index 47f978a137..1ad7823c95 100644 --- a/usr/src/cmd/bhyve/Makefile +++ b/usr/src/cmd/bhyve/Makefile @@ -61,6 +61,7 @@ SRCS = acpi.c \ pci_virtio_rnd.c \ pci_virtio_viona.c \ pci_xhci.c \ + pctestdev.c \ pm.c \ post.c \ ps2kbd.c \ @@ -97,7 +98,7 @@ SRCS = acpi.c \ #pci_hda.c \ # The bhyve generic net-backend stuff has been ignored by us at the moment -# because SmartOS users prefer to use viona for its superior network perf. +# because illumos users prefer to use viona for its superior network perf. #net_backends.c \ diff --git a/usr/src/cmd/bhyve/README.sync b/usr/src/cmd/bhyve/README.sync index af90209ac3..2384413853 100644 --- a/usr/src/cmd/bhyve/README.sync +++ b/usr/src/cmd/bhyve/README.sync @@ -1,15 +1,42 @@ +Git commit hashes in this file refer to the official FreeBSD distributed +public Git repository at https://git.freebsd.org/src.git + The bhyve kernel module and its associated userland consumers have been updated -to the latest upstream FreeBSD sources as documented in +to the latest upstream FreeBSD sources as of: + +commit 2bb4be0f86501ec0565dba3d37ce0f7d7fc9c464 +Author: grehan <grehan@FreeBSD.org> +Date: Fri Dec 18 00:38:48 2020 +0000 + + Fix issues with various VNC clients. + + PR: 250795 + Submitted by: Marko Kiiskila <marko@apache.org> + Reviewed by: jhb (bhyve) + MFC after: 3 weeks + Relnotes: yes + Differential Revision: https://reviews.freebsd.org/D27605 + +Divergence Notes: - usr/src/uts/i86pc/io/vmm/README.sync +A previous sync skipped commit 0ff7076bdbc6dae5ea44c0acdb567e1cede199d1 which +introduced a generic backend functionality to network devices. Without that in +place, subsequent updates reflect the absence of that subsystem. Integrating +net backends has not been a priority, given the common use of viona on illumos. -The userland components in this directory have further been updated with the -following cherry-picked updates which will need taking into account during the -next sync. +The draft Save/Restore functionality, added in FreeBSD commit +483d953a86a2507355f8287c5107dc827a0ff516, has not been synced into illumos bhyve +yet. It is not built by default in FreeBSD, so we're not interested in taking +it until it successfully endures more in-depth testing. - commit 44a544d41c504fbe37f836eb503e4ae721daada9 - Author: grehan <grehan@FreeBSD.org> - Date: Fri Dec 18 00:38:48 2020 +0000 +The VirtFS filesystem sharing feature, added in FreeBSD commit +100353cfbf882e23c911300ebd0cb458bd3ee975, has not been synced into illumos bhyve +yet. It depends on the userland lib9p which needs a fair amount of work to +build and run on illumos. The integration of this feature is being tracked in +https://www.illumos.org/issues/13380 - Fix issues with various VNC clients. +The stub usr/src/compat/bhyve/stdatomic.h file only includes enough glue +to satisfy the use of <stdatomic.h> in usr/src/cmd/bhyve/rfb.c, and in +particular assumes that atomic variables are sized as an int. If other bhyve +pieces start using stdatomic.h, this will need enhancing. diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 5029134ede..0cbb2c78eb 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -203,6 +203,7 @@ static int gdb_port = 0; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int virtio_msix = 1; static int x2apic_mode = 0; /* default is xAPIC */ +static int destroy_on_poweroff = 0; static int strictio; static int strictmsr = 1; @@ -248,7 +249,11 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-abehuwxACHPSWY]\n" +#ifdef __FreeBSD__ + "Usage: %s [-abehuwxACDHPSWY]\n" +#else + "Usage: %s [-abdehuwxACDHPSWY]\n" +#endif " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" " %*s [-g <gdb port>] [-l <lpc>]\n" #ifdef __FreeBSD__ @@ -263,6 +268,7 @@ usage(int code) #ifndef __FreeBSD__ " -d: suspend cpu at boot\n" #endif + " -D: destroy on power-off\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" " -h: help\n" @@ -984,6 +990,8 @@ vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) case VM_SUSPEND_RESET: exit(0); case VM_SUSPEND_POWEROFF: + if (destroy_on_poweroff) + vm_destroy(ctx); exit(1); case VM_SUSPEND_HALT: exit(2); @@ -1294,9 +1302,9 @@ main(int argc, char *argv[]) memflags = 0; #ifdef __FreeBSD__ - optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:B:U:"; + optstr = "abehuwxACDHIPSWYp:g:G:c:s:m:l:B:U:"; #else - optstr = "abdehuwxACHIPSWYg:G:c:s:m:l:B:U:"; + optstr = "abdehuwxACDHIPSWYg:G:c:s:m:l:B:U:"; #endif while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { @@ -1309,6 +1317,9 @@ main(int argc, char *argv[]) case 'b': bvmcons = 1; break; + case 'D': + destroy_on_poweroff = 1; + break; case 'B': if (smbios_parse(optarg) != 0) { errx(EX_USAGE, "invalid SMBIOS " diff --git a/usr/src/cmd/bhyve/pci_ahci.c b/usr/src/cmd/bhyve/pci_ahci.c index 57934f9c84..0d4951a61e 100644 --- a/usr/src/cmd/bhyve/pci_ahci.c +++ b/usr/src/cmd/bhyve/pci_ahci.c @@ -136,9 +136,9 @@ struct ahci_ioreq { struct ahci_port { struct blockif_ctxt *bctx; struct pci_ahci_softc *pr_sc; + struct ata_params ata_ident; uint8_t *cmd_lst; uint8_t *rfis; - char ident[AHCI_PORT_IDENT]; int port; int atapi; int reset; @@ -983,7 +983,50 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); } else { - uint16_t buf[256]; + ahci_write_fis_piosetup(p); + write_prdt(p, slot, cfis, (void*)&p->ata_ident, sizeof(struct ata_params)); + ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); + } +} + +static void +ata_identify_init(struct ahci_port* p, int atapi) +{ + struct ata_params* ata_ident = &p->ata_ident; + + if (atapi) { + ata_ident->config = ATA_PROTO_ATAPI | ATA_ATAPI_TYPE_CDROM | + ATA_ATAPI_REMOVABLE | ATA_DRQ_FAST; + ata_ident->capabilities1 = ATA_SUPPORT_LBA | + ATA_SUPPORT_DMA; + ata_ident->capabilities2 = (1 << 14 | 1); + ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88; + ata_ident->obsolete62 = 0x3f; + ata_ident->mwdmamodes = 7; + if (p->xfermode & ATA_WDMA0) + ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8)); + ata_ident->apiomodes = 3; + ata_ident->mwdmamin = 0x0078; + ata_ident->mwdmarec = 0x0078; + ata_ident->pioblind = 0x0078; + ata_ident->pioiordy = 0x0078; + ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3); + ata_ident->satacapabilities2 = ((p->ssts & ATA_SS_SPD_MASK) >> 3); + ata_ident->satasupport = ATA_SUPPORT_NCQ_STREAM; + ata_ident->version_major = 0x3f0; + ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | + ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); + ata_ident->support.command2 = (1 << 14); + ata_ident->support.extension = (1 << 14); + ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | + ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); + ata_ident->enabled.extension = (1 << 14); + ata_ident->udmamodes = 0x7f; + if (p->xfermode & ATA_UDMA0) + ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8)); + ata_ident->transport_major = 0x1020; + ata_ident->integrity = 0x00a5; + } else { uint64_t sectors; int sectsz, psectsz, psectoff, candelete, ro; uint16_t cyl; @@ -995,87 +1038,84 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) sectors = blockif_size(p->bctx) / sectsz; blockif_chs(p->bctx, &cyl, &heads, &sech); blockif_psectsz(p->bctx, &psectsz, &psectoff); - memset(buf, 0, sizeof(buf)); - buf[0] = 0x0040; - buf[1] = cyl; - buf[3] = heads; - buf[6] = sech; - ata_string((uint8_t *)(buf+10), p->ident, 20); - ata_string((uint8_t *)(buf+23), "001", 8); - ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40); - buf[47] = (0x8000 | 128); - buf[48] = 0; - buf[49] = (1 << 8 | 1 << 9 | 1 << 11); - buf[50] = (1 << 14); - buf[53] = (1 << 1 | 1 << 2); + ata_ident->config = ATA_DRQ_FAST; + ata_ident->cylinders = cyl; + ata_ident->heads = heads; + ata_ident->sectors = sech; + + ata_ident->sectors_intr = (0x8000 | 128); + ata_ident->tcg = 0; + + ata_ident->capabilities1 = ATA_SUPPORT_DMA | + ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY; + ata_ident->capabilities2 = (1 << 14); + ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88; if (p->mult_sectors) - buf[59] = (0x100 | p->mult_sectors); + ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors); if (sectors <= 0x0fffffff) { - buf[60] = sectors; - buf[61] = (sectors >> 16); + ata_ident->lba_size_1 = sectors; + ata_ident->lba_size_2 = (sectors >> 16); } else { - buf[60] = 0xffff; - buf[61] = 0x0fff; + ata_ident->lba_size_1 = 0xffff; + ata_ident->lba_size_2 = 0x0fff; } - buf[63] = 0x7; + ata_ident->mwdmamodes = 0x7; if (p->xfermode & ATA_WDMA0) - buf[63] |= (1 << ((p->xfermode & 7) + 8)); - buf[64] = 0x3; - buf[65] = 120; - buf[66] = 120; - buf[67] = 120; - buf[68] = 120; - buf[69] = 0; - buf[75] = 31; - buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 | - ATA_SUPPORT_NCQ); - buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED | - (p->ssts & ATA_SS_SPD_MASK) >> 3); - buf[80] = 0x3f0; - buf[81] = 0x28; - buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE| - ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); - buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | - ATA_SUPPORT_FLUSHCACHE48 | 1 << 14); - buf[84] = (1 << 14); - buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE| - ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); - buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | - ATA_SUPPORT_FLUSHCACHE48 | 1 << 15); - buf[87] = (1 << 14); - buf[88] = 0x7f; + ata_ident->mwdmamodes |= (1 << ((p->xfermode & 7) + 8)); + ata_ident->apiomodes = 0x3; + ata_ident->mwdmamin = 0x0078; + ata_ident->mwdmarec = 0x0078; + ata_ident->pioblind = 0x0078; + ata_ident->pioiordy = 0x0078; + ata_ident->support3 = 0; + ata_ident->queue = 31; + ata_ident->satacapabilities = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 | + ATA_SUPPORT_NCQ); + ata_ident->satacapabilities2 = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED | + (p->ssts & ATA_SS_SPD_MASK) >> 3); + ata_ident->version_major = 0x3f0; + ata_ident->version_minor = 0x28; + ata_ident->support.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE | + ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); + ata_ident->support.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | + ATA_SUPPORT_FLUSHCACHE48 | 1 << 14); + ata_ident->support.extension = (1 << 14); + ata_ident->enabled.command1 = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE | + ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); + ata_ident->enabled.command2 = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | + ATA_SUPPORT_FLUSHCACHE48 | 1 << 15); + ata_ident->enabled.extension = (1 << 14); + ata_ident->udmamodes = 0x7f; if (p->xfermode & ATA_UDMA0) - buf[88] |= (1 << ((p->xfermode & 7) + 8)); - buf[100] = sectors; - buf[101] = (sectors >> 16); - buf[102] = (sectors >> 32); - buf[103] = (sectors >> 48); + ata_ident->udmamodes |= (1 << ((p->xfermode & 7) + 8)); + ata_ident->lba_size48_1 = sectors; + ata_ident->lba_size48_2 = (sectors >> 16); + ata_ident->lba_size48_3 = (sectors >> 32); + ata_ident->lba_size48_4 = (sectors >> 48); + if (candelete && !ro) { - buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT; - buf[105] = 1; - buf[169] = ATA_SUPPORT_DSM_TRIM; + ata_ident->support3 |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT; + ata_ident->max_dsm_blocks = 1; + ata_ident->support_dsm = ATA_SUPPORT_DSM_TRIM; } - buf[106] = 0x4000; - buf[209] = 0x4000; + ata_ident->pss = ATA_PSS_VALID_VALUE; + ata_ident->lsalign = 0x4000; if (psectsz > sectsz) { - buf[106] |= 0x2000; - buf[106] |= ffsl(psectsz / sectsz) - 1; - buf[209] |= (psectoff / sectsz); + ata_ident->pss |= ATA_PSS_MULTLS; + ata_ident->pss |= ffsl(psectsz / sectsz) - 1; + ata_ident->lsalign |= (psectoff / sectsz); } if (sectsz > 512) { - buf[106] |= 0x1000; - buf[117] = sectsz / 2; - buf[118] = ((sectsz / 2) >> 16); + ata_ident->pss |= ATA_PSS_LSSABOVE512; + ata_ident->lss_1 = sectsz / 2; + ata_ident->lss_2 = ((sectsz / 2) >> 16); } - buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); - buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); - buf[222] = 0x1020; - buf[255] = 0x00a5; - ahci_checksum((uint8_t *)buf, sizeof(buf)); - ahci_write_fis_piosetup(p); - write_prdt(p, slot, cfis, (void *)buf, sizeof(buf)); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); + ata_ident->support2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); + ata_ident->enabled2 = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); + ata_ident->transport_major = 0x1020; + ata_ident->integrity = 0x00a5; } + ahci_checksum((uint8_t*)ata_ident, sizeof(struct ata_params)); } static void @@ -1085,44 +1125,8 @@ handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis) ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); } else { - uint16_t buf[256]; - - memset(buf, 0, sizeof(buf)); - buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5); - ata_string((uint8_t *)(buf+10), p->ident, 20); - ata_string((uint8_t *)(buf+23), "001", 8); - ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40); - buf[49] = (1 << 9 | 1 << 8); - buf[50] = (1 << 14 | 1); - buf[53] = (1 << 2 | 1 << 1); - buf[62] = 0x3f; - buf[63] = 7; - if (p->xfermode & ATA_WDMA0) - buf[63] |= (1 << ((p->xfermode & 7) + 8)); - buf[64] = 3; - buf[65] = 120; - buf[66] = 120; - buf[67] = 120; - buf[68] = 120; - buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3); - buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3); - buf[78] = (1 << 5); - buf[80] = 0x3f0; - buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | - ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); - buf[83] = (1 << 14); - buf[84] = (1 << 14); - buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | - ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); - buf[87] = (1 << 14); - buf[88] = 0x7f; - if (p->xfermode & ATA_UDMA0) - buf[88] |= (1 << ((p->xfermode & 7) + 8)); - buf[222] = 0x1020; - buf[255] = 0x00a5; - ahci_checksum((uint8_t *)buf, sizeof(buf)); ahci_write_fis_piosetup(p); - write_prdt(p, slot, cfis, (void *)buf, sizeof(buf)); + write_prdt(p, slot, cfis, (void *)&p->ata_ident, sizeof(struct ata_params)); ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); } } @@ -2314,6 +2318,10 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) MD5_CTX mdctx; u_char digest[16]; char *next, *next2; + char *bopt, *uopt, *xopts, *config; + FILE* fp; + size_t block_len; + int comma, optpos; ret = 0; @@ -2330,6 +2338,9 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) slots = 32; for (p = 0; p < MAX_PORTS && opts != NULL; p++, opts = next) { + struct ata_params *ata_ident = &sc->port[p].ata_ident; + memset(ata_ident, 0, sizeof(struct ata_params)); + /* Identify and cut off type of present port. */ if (strncmp(opts, "hd:", 3) == 0) { atapi = 0; @@ -2352,13 +2363,82 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) if (opts[0] == 0) continue; + uopt = strdup(opts); + bopt = NULL; + fp = open_memstream(&bopt, &block_len); + comma = 0; + optpos = 0; + + for (xopts = strtok(uopt, ","); + xopts != NULL; + xopts = strtok(NULL, ",")) { + + /* First option assume as block filename. */ + if (optpos == 0) { + /* + * Create an identifier for the backing file. + * Use parts of the md5 sum of the filename + */ + char ident[AHCI_PORT_IDENT]; + MD5Init(&mdctx); + MD5Update(&mdctx, opts, strlen(opts)); + MD5Final(digest, &mdctx); + snprintf(ident, AHCI_PORT_IDENT, + "BHYVE-%02X%02X-%02X%02X-%02X%02X", + digest[0], digest[1], digest[2], digest[3], digest[4], + digest[5]); + ata_string((uint8_t*)&ata_ident->serial, ident, 20); + ata_string((uint8_t*)&ata_ident->revision, "001", 8); + if (atapi) { + ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DVD ROM", 40); + } + else { + ata_string((uint8_t*)&ata_ident->model, "BHYVE SATA DISK", 40); + } + } + + if ((config = strchr(xopts, '=')) != NULL) { + *config++ = '\0'; + if (!strcmp("nmrr", xopts)) { + ata_ident->media_rotation_rate = atoi(config); + } + else if (!strcmp("ser", xopts)) { + ata_string((uint8_t*)(&ata_ident->serial), config, 20); + } + else if (!strcmp("rev", xopts)) { + ata_string((uint8_t*)(&ata_ident->revision), config, 8); + } + else if (!strcmp("model", xopts)) { + ata_string((uint8_t*)(&ata_ident->model), config, 40); + } + else { + /* Pass all other options to blockif_open. */ + *--config = '='; + fprintf(fp, "%s%s", comma ? "," : "", xopts); + comma = 1; + } + } + else { + /* Pass all other options to blockif_open. */ + fprintf(fp, "%s%s", comma ? "," : "", xopts); + comma = 1; + } + optpos++; + } + free(uopt); + fclose(fp); + + DPRINTF("%s\n", bopt); + /* * Attempt to open the backing image. Use the PCI slot/func * and the port number for the identifier string. */ snprintf(bident, sizeof(bident), "%d:%d:%d", pi->pi_slot, pi->pi_func, p); - bctxt = blockif_open(opts, bident); + bctxt = blockif_open(bopt, bident); + free(bopt); + if (bctxt == NULL) { sc->ports = p; ret = 1; @@ -2380,17 +2460,7 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) (void) blockif_set_wce(bctxt, 1); #endif - /* - * Create an identifier for the backing file. - * Use parts of the md5 sum of the filename - */ - MD5Init(&mdctx); - MD5Update(&mdctx, opts, strlen(opts)); - MD5Final(digest, &mdctx); - snprintf(sc->port[p].ident, AHCI_PORT_IDENT, - "BHYVE-%02X%02X-%02X%02X-%02X%02X", - digest[0], digest[1], digest[2], digest[3], digest[4], - digest[5]); + ata_identify_init(&sc->port[p], atapi); /* * Allocate blockif request structures and add them diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c index 8f2c4d810f..cb7e074540 100644 --- a/usr/src/cmd/bhyve/pci_e82545.c +++ b/usr/src/cmd/bhyve/pci_e82545.c @@ -1660,18 +1660,18 @@ e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) break; case E1000_TDBAL(0): sc->esc_TDBAL = value & ~0xF; - if (sc->esc_tx_enabled) { - /* Apparently legal */ + if (sc->esc_tx_enabled) e82545_tx_update_tdba(sc); - } break; case E1000_TDBAH(0): - //assert(!sc->esc_tx_enabled); sc->esc_TDBAH = value; + if (sc->esc_tx_enabled) + e82545_tx_update_tdba(sc); break; case E1000_TDLEN(0): - //assert(!sc->esc_tx_enabled); sc->esc_TDLEN = value & ~0xFFF0007F; + if (sc->esc_tx_enabled) + e82545_tx_update_tdba(sc); break; case E1000_TDH(0): //assert(!sc->esc_tx_enabled); diff --git a/usr/src/cmd/bhyve/pci_emul.c b/usr/src/cmd/bhyve/pci_emul.c index c510116e19..90602f715b 100644 --- a/usr/src/cmd/bhyve/pci_emul.c +++ b/usr/src/cmd/bhyve/pci_emul.c @@ -463,14 +463,6 @@ pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, return (-1); } -int -pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, - uint64_t size) -{ - - return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); -} - /* * Register (or unregister) the MMIO or I/O region associated with the BAR * register 'idx' of an emulated pci device. @@ -595,8 +587,8 @@ update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) } int -pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, - enum pcibar_type type, uint64_t size) +pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, + uint64_t size) { uint64_t *baseptr = NULL; uint64_t limit = 0, lobits = 0; @@ -636,16 +628,10 @@ pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, * Some drivers do not work well if the 64-bit BAR is allocated * above 4GB. Allow for this by allocating small requests under * 4GB unless then allocation size is larger than some arbitrary - * number (32MB currently). + * number (128MB currently). */ - if (size > 32 * 1024 * 1024) { - /* - * XXX special case for device requiring peer-peer DMA - */ - if (size == 0x100000000UL) - baseptr = &hostbase; - else - baseptr = &pci_emul_membase64; + if (size > 128 * 1024 * 1024) { + baseptr = &pci_emul_membase64; limit = PCI_EMUL_MEMLIMIT64; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | diff --git a/usr/src/cmd/bhyve/pci_emul.h b/usr/src/cmd/bhyve/pci_emul.h index d3dd9a2f46..63e3c89a95 100644 --- a/usr/src/cmd/bhyve/pci_emul.h +++ b/usr/src/cmd/bhyve/pci_emul.h @@ -222,8 +222,6 @@ int init_pci(struct vmctx *ctx); void pci_callback(void); int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size); -int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, - uint64_t hostbase, enum pcibar_type type, uint64_t size); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, diff --git a/usr/src/cmd/bhyve/pci_lpc.c b/usr/src/cmd/bhyve/pci_lpc.c index 50413250d3..9c8f25e89d 100644 --- a/usr/src/cmd/bhyve/pci_lpc.c +++ b/usr/src/cmd/bhyve/pci_lpc.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include "pci_emul.h" #include "pci_irq.h" #include "pci_lpc.h" +#include "pctestdev.h" #include "uart_emul.h" #define IO_ICU1 0x20 @@ -83,6 +84,8 @@ static struct lpc_uart_softc { static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" }; +static bool pctestdev_present; + /* * LPC device configuration is in the following form: * <lpc_device_name>[,<options>] @@ -110,6 +113,18 @@ lpc_device_parse(const char *opts) goto done; } } + if (strcasecmp(lpcdev, pctestdev_getname()) == 0) { + if (pctestdev_present) { + EPRINTLN("More than one %s device conf is " + "specified; only one is allowed.", + pctestdev_getname()); + } else if (pctestdev_parse(str) == 0) { + pctestdev_present = true; + error = 0; + free(cpy); + goto done; + } + } } done: @@ -127,6 +142,7 @@ lpc_print_supported_devices() printf("bootrom\n"); for (i = 0; i < LPC_UART_NUM; i++) printf("%s\n", lpc_uart_names[i]); + printf("%s\n", pctestdev_getname()); } const char * @@ -250,6 +266,13 @@ lpc_init(struct vmctx *ctx) sc->enabled = 1; } + /* pc-testdev */ + if (pctestdev_present) { + error = pctestdev_init(ctx); + if (error) + return (error); + } + return (0); } diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c index 65d8d49b64..a0a8a9571d 100644 --- a/usr/src/cmd/bhyve/pci_nvme.c +++ b/usr/src/cmd/bhyve/pci_nvme.c @@ -3,6 +3,7 @@ * * Copyright (c) 2017 Shunsuke Mie * Copyright (c) 2018 Leon Dang + * Copyright (c) 2020 Chuck Tuffli * * Function crc16 Copyright (c) 2017, Fedor Uporov * Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c @@ -33,7 +34,7 @@ * bhyve PCIe-NVMe device emulation. * * options: - * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=# + * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#,dsm=<opt> * * accepted devpath: * /dev/blockdev @@ -46,6 +47,7 @@ * sectsz = sector size (defaults to blockif sector size) * ser = serial number (20-chars max) * eui64 = IEEE Extended Unique Identifier (8 byte value) + * dsm = DataSet Management support. Option is one of auto, enable,disable * */ @@ -57,6 +59,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <sys/errno.h> #include <sys/types.h> #include <net/ieee_oui.h> #ifndef __FreeBSD__ @@ -86,8 +89,8 @@ __FBSDID("$FreeBSD$"); static int nvme_debug = 0; -#define DPRINTF(params) if (nvme_debug) PRINTLN params -#define WPRINTF(params) PRINTLN params +#define DPRINTF(fmt, args...) if (nvme_debug) PRINTLN(fmt, ##args) +#define WPRINTF(fmt, args...) PRINTLN(fmt, ##args) /* defaults; can be overridden */ #define NVME_MSIX_BAR 4 @@ -99,9 +102,16 @@ static int nvme_debug = 0; #define NVME_QUEUES 16 #define NVME_MAX_QENTRIES 2048 +/* Memory Page size Minimum reported in CAP register */ +#define NVME_MPSMIN 0 +/* MPSMIN converted to bytes */ +#define NVME_MPSMIN_BYTES (1 << (12 + NVME_MPSMIN)) #define NVME_PRP2_ITEMS (PAGE_SIZE/sizeof(uint64_t)) -#define NVME_MAX_BLOCKIOVS 512 +#define NVME_MDTS 9 +/* Note the + 1 allows for the initial descriptor to not be page aligned */ +#define NVME_MAX_IOVEC ((1 << NVME_MDTS) + 1) +#define NVME_MAX_DATA_SIZE ((1 << NVME_MDTS) * NVME_MPSMIN_BYTES) /* This is a synthetic status code to indicate there is no status */ #define NVME_NO_STATUS 0xffff @@ -153,21 +163,21 @@ enum nvme_copy_dir { struct nvme_completion_queue { struct nvme_completion *qbase; + pthread_mutex_t mtx; uint32_t size; uint16_t tail; /* nvme progress */ uint16_t head; /* guest progress */ uint16_t intr_vec; uint32_t intr_en; - pthread_mutex_t mtx; }; struct nvme_submission_queue { struct nvme_command *qbase; + pthread_mutex_t mtx; uint32_t size; uint16_t head; /* nvme progress */ uint16_t tail; /* guest progress */ uint16_t cqid; /* completion queue id */ - int busy; /* queue is being processed */ int qpriority; }; @@ -186,6 +196,18 @@ struct pci_nvme_blockstore { uint32_t deallocate:1; }; +/* + * Calculate the number of additional page descriptors for guest IO requests + * based on the advertised Max Data Transfer (MDTS) and given the number of + * default iovec's in a struct blockif_req. + * + * Note the + 1 allows for the initial descriptor to not be page aligned. + */ +#define MDTS_PAD_SIZE \ + NVME_MAX_IOVEC > BLOCKIF_IOV_MAX ? \ + NVME_MAX_IOVEC - BLOCKIF_IOV_MAX : \ + 0 + struct pci_nvme_ioreq { struct pci_nvme_softc *sc; STAILQ_ENTRY(pci_nvme_ioreq) link; @@ -199,18 +221,11 @@ struct pci_nvme_ioreq { uint64_t prev_gpaddr; size_t prev_size; - - /* - * lock if all iovs consumed (big IO); - * complete transaction before continuing - */ - pthread_mutex_t mtx; - pthread_cond_t cv; + size_t bytes; struct blockif_req io_req; - /* pad to fit up to 512 page descriptors from guest IO request */ - struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX]; + struct iovec iovpadding[MDTS_PAD_SIZE]; }; enum nvme_dsm_type { @@ -222,6 +237,28 @@ enum nvme_dsm_type { NVME_DATASET_MANAGEMENT_DISABLE, }; +struct pci_nvme_softc; +struct nvme_feature_obj; + +typedef void (*nvme_feature_cb)(struct pci_nvme_softc *, + struct nvme_feature_obj *, + struct nvme_command *, + struct nvme_completion *); + +struct nvme_feature_obj { + uint32_t cdw11; + nvme_feature_cb set; + nvme_feature_cb get; + bool namespace_specific; +}; + +#define NVME_FID_MAX (NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1) + +struct pci_nvme_aer { + STAILQ_ENTRY(pci_nvme_aer) link; + uint16_t cid; /* Command ID of the submitted AER */ +}; + struct pci_nvme_softc { struct pci_devinst *nsc_pi; @@ -241,6 +278,7 @@ struct pci_nvme_softc { uint32_t max_queues; /* max number of IO SQ's or CQ's */ uint32_t num_cqueues; uint32_t num_squeues; + bool num_q_is_set; /* Has host set Number of Queues */ struct pci_nvme_ioreq *ioreqs; STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */ @@ -255,16 +293,26 @@ struct pci_nvme_softc { struct nvme_completion_queue *compl_queues; struct nvme_submission_queue *submit_queues; - /* controller features */ - uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */ - uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */ - uint32_t async_ev_config; /* 0x0B: async event config */ + struct nvme_feature_obj feat[NVME_FID_MAX]; enum nvme_dsm_type dataset_management; + + /* Accounting for SMART data */ + __uint128_t read_data_units; + __uint128_t write_data_units; + __uint128_t read_commands; + __uint128_t write_commands; + uint32_t read_dunits_remainder; + uint32_t write_dunits_remainder; + + STAILQ_HEAD(, pci_nvme_aer) aer_list; + uint32_t aer_count; }; -static void pci_nvme_io_partial(struct blockif_req *br, int err); +static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *); +static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *); +static void pci_nvme_io_done(struct blockif_req *, int); /* Controller Configuration utils */ #define NVME_CC_GET_EN(cc) \ @@ -303,6 +351,19 @@ static void pci_nvme_io_partial(struct blockif_req *br, int err); #define NVME_ONCS_DSM (NVME_CTRLR_DATA_ONCS_DSM_MASK << \ NVME_CTRLR_DATA_ONCS_DSM_SHIFT) +static void nvme_feature_invalid_cb(struct pci_nvme_softc *, + struct nvme_feature_obj *, + struct nvme_command *, + struct nvme_completion *); +static void nvme_feature_num_queues(struct pci_nvme_softc *, + struct nvme_feature_obj *, + struct nvme_command *, + struct nvme_completion *); +static void nvme_feature_iv_config(struct pci_nvme_softc *, + struct nvme_feature_obj *, + struct nvme_command *, + struct nvme_completion *); + static __inline void cpywithpad(char *dst, size_t dst_size, const char *src, char pad) { @@ -329,14 +390,60 @@ pci_nvme_status_genc(uint16_t *status, uint16_t code) pci_nvme_status_tc(status, NVME_SCT_GENERIC, code); } -static __inline void -pci_nvme_toggle_phase(uint16_t *status, int prev) +/* + * Initialize the requested number or IO Submission and Completion Queues. + * Admin queues are allocated implicitly. + */ +static void +pci_nvme_init_queues(struct pci_nvme_softc *sc, uint32_t nsq, uint32_t ncq) { + uint32_t i; - if (prev) - *status &= ~NVME_STATUS_P; - else - *status |= NVME_STATUS_P; + /* + * Allocate and initialize the Submission Queues + */ + if (nsq > NVME_QUEUES) { + WPRINTF("%s: clamping number of SQ from %u to %u", + __func__, nsq, NVME_QUEUES); + nsq = NVME_QUEUES; + } + + sc->num_squeues = nsq; + + sc->submit_queues = calloc(sc->num_squeues + 1, + sizeof(struct nvme_submission_queue)); + if (sc->submit_queues == NULL) { + WPRINTF("%s: SQ allocation failed", __func__); + sc->num_squeues = 0; + } else { + struct nvme_submission_queue *sq = sc->submit_queues; + + for (i = 0; i < sc->num_squeues; i++) + pthread_mutex_init(&sq[i].mtx, NULL); + } + + /* + * Allocate and initialize the Completion Queues + */ + if (ncq > NVME_QUEUES) { + WPRINTF("%s: clamping number of CQ from %u to %u", + __func__, ncq, NVME_QUEUES); + ncq = NVME_QUEUES; + } + + sc->num_cqueues = ncq; + + sc->compl_queues = calloc(sc->num_cqueues + 1, + sizeof(struct nvme_completion_queue)); + if (sc->compl_queues == NULL) { + WPRINTF("%s: CQ allocation failed", __func__); + sc->num_cqueues = 0; + } else { + struct nvme_completion_queue *cq = sc->compl_queues; + + for (i = 0; i < sc->num_cqueues; i++) + pthread_mutex_init(&cq[i].mtx, NULL); + } } static void @@ -360,7 +467,7 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) cd->mic = 0; - cd->mdts = 9; /* max data transfer size (2^mdts * CAP.MPSMIN) */ + cd->mdts = NVME_MDTS; /* max data transfer size (2^mdts * CAP.MPSMIN) */ cd->ver = 0x00010300; @@ -368,6 +475,9 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) cd->acl = 2; cd->aerl = 4; + /* Advertise 1, Read-only firmware slot */ + cd->frmw = NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK | + (1 << NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT); cd->lpa = 0; /* TODO: support some simple things like SMART */ cd->elpe = 0; /* max error log page entries */ cd->npss = 1; /* number of power states support */ @@ -493,12 +603,136 @@ pci_nvme_init_logpages(struct pci_nvme_softc *sc) memset(&sc->err_log, 0, sizeof(sc->err_log)); memset(&sc->health_log, 0, sizeof(sc->health_log)); memset(&sc->fw_log, 0, sizeof(sc->fw_log)); + + /* Set read/write remainder to round up according to spec */ + sc->read_dunits_remainder = 999; + sc->write_dunits_remainder = 999; + + /* Set nominal Health values checked by implementations */ + sc->health_log.temperature = 310; + sc->health_log.available_spare = 100; + sc->health_log.available_spare_threshold = 10; +} + +static void +pci_nvme_init_features(struct pci_nvme_softc *sc) +{ + + sc->feat[0].set = nvme_feature_invalid_cb; + sc->feat[0].get = nvme_feature_invalid_cb; + + sc->feat[NVME_FEAT_LBA_RANGE_TYPE].namespace_specific = true; + sc->feat[NVME_FEAT_ERROR_RECOVERY].namespace_specific = true; + sc->feat[NVME_FEAT_NUMBER_OF_QUEUES].set = nvme_feature_num_queues; + sc->feat[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION].set = + nvme_feature_iv_config; + sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG].get = + nvme_feature_invalid_cb; + sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW].get = + nvme_feature_invalid_cb; +} + +static void +pci_nvme_aer_init(struct pci_nvme_softc *sc) +{ + + STAILQ_INIT(&sc->aer_list); + sc->aer_count = 0; +} + +static void +pci_nvme_aer_destroy(struct pci_nvme_softc *sc) +{ + struct pci_nvme_aer *aer = NULL; + + while (!STAILQ_EMPTY(&sc->aer_list)) { + aer = STAILQ_FIRST(&sc->aer_list); + STAILQ_REMOVE_HEAD(&sc->aer_list, link); + free(aer); + } + + pci_nvme_aer_init(sc); +} + +#ifdef __FreeBSD__ +static bool +pci_nvme_aer_available(struct pci_nvme_softc *sc) +{ + + return (!STAILQ_EMPTY(&sc->aer_list)); +} +#else +/* This is kept behind an ifdef while it's unused to appease the compiler. */ +#endif + +static bool +pci_nvme_aer_limit_reached(struct pci_nvme_softc *sc) +{ + struct nvme_controller_data *cd = &sc->ctrldata; + + /* AERL is a zero based value while aer_count is one's based */ + return (sc->aer_count == (cd->aerl + 1)); +} + +/* + * Add an Async Event Request + * + * Stores an AER to be returned later if the Controller needs to notify the + * host of an event. + * Note that while the NVMe spec doesn't require Controllers to return AER's + * in order, this implementation does preserve the order. + */ +static int +pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid) +{ + struct pci_nvme_aer *aer = NULL; + + if (pci_nvme_aer_limit_reached(sc)) + return (-1); + + aer = calloc(1, sizeof(struct pci_nvme_aer)); + if (aer == NULL) + return (-1); + + sc->aer_count++; + + /* Save the Command ID for use in the completion message */ + aer->cid = cid; + STAILQ_INSERT_TAIL(&sc->aer_list, aer, link); + + return (0); } +/* + * Get an Async Event Request structure + * + * Returns a pointer to an AER previously submitted by the host or NULL if + * no AER's exist. Caller is responsible for freeing the returned struct. + */ +#ifdef __FreeBSD__ +static struct pci_nvme_aer * +pci_nvme_aer_get(struct pci_nvme_softc *sc) +{ + struct pci_nvme_aer *aer = NULL; + + aer = STAILQ_FIRST(&sc->aer_list); + if (aer != NULL) { + STAILQ_REMOVE_HEAD(&sc->aer_list, link); + sc->aer_count--; + } + + return (aer); +} +#else +/* This is kept behind an ifdef while it's unused to appease the compiler. */ +#endif + static void pci_nvme_reset_locked(struct pci_nvme_softc *sc) { - DPRINTF(("%s", __func__)); + uint32_t i; + + DPRINTF("%s", __func__); sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | (1 << NVME_CAP_LO_REG_CQR_SHIFT) | @@ -511,45 +745,28 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) sc->regs.cc = 0; sc->regs.csts = 0; - sc->num_cqueues = sc->num_squeues = sc->max_queues; - if (sc->submit_queues != NULL) { - for (int i = 0; i < sc->num_squeues + 1; i++) { - /* - * The Admin Submission Queue is at index 0. - * It must not be changed at reset otherwise the - * emulation will be out of sync with the guest. - */ - if (i != 0) { - sc->submit_queues[i].qbase = NULL; - sc->submit_queues[i].size = 0; - sc->submit_queues[i].cqid = 0; - } - sc->submit_queues[i].tail = 0; - sc->submit_queues[i].head = 0; - sc->submit_queues[i].busy = 0; - } - } else - sc->submit_queues = calloc(sc->num_squeues + 1, - sizeof(struct nvme_submission_queue)); - - if (sc->compl_queues != NULL) { - for (int i = 0; i < sc->num_cqueues + 1; i++) { - /* See Admin Submission Queue note above */ - if (i != 0) { - sc->compl_queues[i].qbase = NULL; - sc->compl_queues[i].size = 0; - } + assert(sc->submit_queues != NULL); - sc->compl_queues[i].tail = 0; - sc->compl_queues[i].head = 0; - } - } else { - sc->compl_queues = calloc(sc->num_cqueues + 1, - sizeof(struct nvme_completion_queue)); + for (i = 0; i < sc->num_squeues + 1; i++) { + sc->submit_queues[i].qbase = NULL; + sc->submit_queues[i].size = 0; + sc->submit_queues[i].cqid = 0; + sc->submit_queues[i].tail = 0; + sc->submit_queues[i].head = 0; + } + + assert(sc->compl_queues != NULL); - for (int i = 0; i < sc->num_cqueues + 1; i++) - pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); + for (i = 0; i < sc->num_cqueues + 1; i++) { + sc->compl_queues[i].qbase = NULL; + sc->compl_queues[i].size = 0; + sc->compl_queues[i].tail = 0; + sc->compl_queues[i].head = 0; } + + sc->num_q_is_set = false; + + pci_nvme_aer_destroy(sc); } static void @@ -565,23 +782,25 @@ pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) { uint16_t acqs, asqs; - DPRINTF(("%s", __func__)); + DPRINTF("%s", __func__); asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; sc->submit_queues[0].size = asqs; sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, sizeof(struct nvme_command) * asqs); - DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p", - __func__, sc->regs.asq, sc->submit_queues[0].qbase)); + DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p", + __func__, sc->regs.asq, sc->submit_queues[0].qbase); acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & NVME_AQA_REG_ACQS_MASK) + 1; sc->compl_queues[0].size = acqs; sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, sizeof(struct nvme_completion) * acqs); - DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p", - __func__, sc->regs.acq, sc->compl_queues[0].qbase)); + sc->compl_queues[0].intr_en = NVME_CQ_INTEN; + + DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p", + __func__, sc->regs.acq, sc->compl_queues[0].qbase); } static int @@ -631,22 +850,63 @@ nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b, return (0); } +/* + * Write a Completion Queue Entry update + * + * Write the completion and update the doorbell value + */ +static void +pci_nvme_cq_update(struct pci_nvme_softc *sc, + struct nvme_completion_queue *cq, + uint32_t cdw0, + uint16_t cid, + uint16_t sqid, + uint16_t status) +{ + struct nvme_submission_queue *sq = &sc->submit_queues[sqid]; + struct nvme_completion *cqe; + + assert(cq->qbase != NULL); + + pthread_mutex_lock(&cq->mtx); + + cqe = &cq->qbase[cq->tail]; + + /* Flip the phase bit */ + status |= (cqe->status ^ NVME_STATUS_P) & NVME_STATUS_P_MASK; + + cqe->cdw0 = cdw0; + cqe->sqhd = sq->head; + cqe->sqid = sqid; + cqe->cid = cid; + cqe->status = status; + + cq->tail++; + if (cq->tail >= cq->size) { + cq->tail = 0; + } + + pthread_mutex_unlock(&cq->mtx); +} + static int nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t qid = command->cdw10 & 0xffff; - DPRINTF(("%s DELETE_IO_SQ %u", __func__, qid)); - if (qid == 0 || qid > sc->num_squeues) { - WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u", - __func__, qid, sc->num_squeues)); + DPRINTF("%s DELETE_IO_SQ %u", __func__, qid); + if (qid == 0 || qid > sc->num_squeues || + (sc->submit_queues[qid].qbase == NULL)) { + WPRINTF("%s NOT PERMITTED queue id %u / num_squeues %u", + __func__, qid, sc->num_squeues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } sc->submit_queues[qid].qbase = NULL; + sc->submit_queues[qid].cqid = 0; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } @@ -659,9 +919,10 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; struct nvme_submission_queue *nsq; - if ((qid == 0) || (qid > sc->num_squeues)) { - WPRINTF(("%s queue index %u > num_squeues %u", - __func__, qid, sc->num_squeues)); + if ((qid == 0) || (qid > sc->num_squeues) || + (sc->submit_queues[qid].qbase != NULL)) { + WPRINTF("%s queue index %u > num_squeues %u", + __func__, qid, sc->num_squeues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); @@ -670,26 +931,54 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, nsq = &sc->submit_queues[qid]; nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); + DPRINTF("%s size=%u (max=%u)", __func__, nsq->size, sc->max_qentries); + if ((nsq->size < 2) || (nsq->size > sc->max_qentries)) { + /* + * Queues must specify at least two entries + * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to + * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec + */ + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED); + return (1); + } + nsq->head = nsq->tail = 0; - nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, - sizeof(struct nvme_command) * (size_t)nsq->size); nsq->cqid = (command->cdw11 >> 16) & 0xffff; + if ((nsq->cqid == 0) || (nsq->cqid > sc->num_cqueues)) { + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_INVALID_QUEUE_IDENTIFIER); + return (1); + } + + if (sc->compl_queues[nsq->cqid].qbase == NULL) { + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_COMPLETION_QUEUE_INVALID); + return (1); + } + nsq->qpriority = (command->cdw11 >> 1) & 0x03; - DPRINTF(("%s sq %u size %u gaddr %p cqid %u", __func__, - qid, nsq->size, nsq->qbase, nsq->cqid)); + nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, + sizeof(struct nvme_command) * (size_t)nsq->size); + + DPRINTF("%s sq %u size %u gaddr %p cqid %u", __func__, + qid, nsq->size, nsq->qbase, nsq->cqid); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); - DPRINTF(("%s completed creating IOSQ qid %u", - __func__, qid)); + DPRINTF("%s completed creating IOSQ qid %u", + __func__, qid); } else { /* * Guest sent non-cont submission queue request. * This setting is unsupported by this emulation. */ - WPRINTF(("%s unsupported non-contig (list-based) " - "create i/o submission queue", __func__)); + WPRINTF("%s unsupported non-contig (list-based) " + "create i/o submission queue", __func__); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } @@ -701,16 +990,27 @@ nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { uint16_t qid = command->cdw10 & 0xffff; + uint16_t sqid; - DPRINTF(("%s DELETE_IO_CQ %u", __func__, qid)); - if (qid == 0 || qid > sc->num_cqueues) { - WPRINTF(("%s queue index %u / num_cqueues %u", - __func__, qid, sc->num_cqueues)); + DPRINTF("%s DELETE_IO_CQ %u", __func__, qid); + if (qid == 0 || qid > sc->num_cqueues || + (sc->compl_queues[qid].qbase == NULL)) { + WPRINTF("%s queue index %u / num_cqueues %u", + __func__, qid, sc->num_cqueues); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); return (1); } + /* Deleting an Active CQ is an error */ + for (sqid = 1; sqid < sc->num_squeues + 1; sqid++) + if (sc->submit_queues[sqid].cqid == qid) { + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_INVALID_QUEUE_DELETION); + return (1); + } + sc->compl_queues[qid].qbase = NULL; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); @@ -720,40 +1020,58 @@ static int nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - if (command->cdw11 & NVME_CMD_CDW11_PC) { - uint16_t qid = command->cdw10 & 0xffff; - struct nvme_completion_queue *ncq; + struct nvme_completion_queue *ncq; + uint16_t qid = command->cdw10 & 0xffff; - if ((qid == 0) || (qid > sc->num_cqueues)) { - WPRINTF(("%s queue index %u > num_cqueues %u", - __func__, qid, sc->num_cqueues)); - pci_nvme_status_tc(&compl->status, - NVME_SCT_COMMAND_SPECIFIC, - NVME_SC_INVALID_QUEUE_IDENTIFIER); - return (1); - } + /* Only support Physically Contiguous queues */ + if ((command->cdw11 & NVME_CMD_CDW11_PC) == 0) { + WPRINTF("%s unsupported non-contig (list-based) " + "create i/o completion queue", + __func__); - ncq = &sc->compl_queues[qid]; - ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; - ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; - ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (1); + } - ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, - command->prp1, - sizeof(struct nvme_command) * (size_t)ncq->size); + if ((qid == 0) || (qid > sc->num_cqueues) || + (sc->compl_queues[qid].qbase != NULL)) { + WPRINTF("%s queue index %u > num_cqueues %u", + __func__, qid, sc->num_cqueues); + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_INVALID_QUEUE_IDENTIFIER); + return (1); + } + + ncq = &sc->compl_queues[qid]; + ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; + ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; + if (ncq->intr_vec > (sc->max_queues + 1)) { + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_INVALID_INTERRUPT_VECTOR); + return (1); + } - pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); - } else { - /* - * Non-contig completion queue unsupported. + ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); + if ((ncq->size < 2) || (ncq->size > sc->max_qentries)) { + /* + * Queues must specify at least two entries + * NOTE: "MAXIMUM QUEUE SIZE EXCEEDED" was renamed to + * "INVALID QUEUE SIZE" in the NVM Express 1.3 Spec */ - WPRINTF(("%s unsupported non-contig (list-based) " - "create i/o completion queue", - __func__)); - - /* 0x12 = Invalid Use of Controller Memory Buffer */ - pci_nvme_status_genc(&compl->status, 0x12); + pci_nvme_status_tc(&compl->status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED); + return (1); } + ncq->head = ncq->tail = 0; + ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, + command->prp1, + sizeof(struct nvme_command) * (size_t)ncq->size); + + pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); + return (1); } @@ -762,33 +1080,53 @@ static int nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2; + uint32_t logsize = 0; uint8_t logpage = command->cdw10 & 0xFF; - DPRINTF(("%s log page %u len %u", __func__, logpage, logsize)); + DPRINTF("%s log page %u len %u", __func__, logpage, logsize); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); + /* + * Command specifies the number of dwords to return in fields NUMDU + * and NUMDL. This is a zero-based value. + */ + logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1; + logsize *= sizeof(uint32_t); + switch (logpage) { case NVME_LOG_ERROR: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->err_log, logsize, + command->prp2, (uint8_t *)&sc->err_log, + MIN(logsize, sizeof(sc->err_log)), NVME_COPY_TO_PRP); break; case NVME_LOG_HEALTH_INFORMATION: - /* TODO: present some smart info */ + pthread_mutex_lock(&sc->mtx); + memcpy(&sc->health_log.data_units_read, &sc->read_data_units, + sizeof(sc->health_log.data_units_read)); + memcpy(&sc->health_log.data_units_written, &sc->write_data_units, + sizeof(sc->health_log.data_units_written)); + memcpy(&sc->health_log.host_read_commands, &sc->read_commands, + sizeof(sc->health_log.host_read_commands)); + memcpy(&sc->health_log.host_write_commands, &sc->write_commands, + sizeof(sc->health_log.host_write_commands)); + pthread_mutex_unlock(&sc->mtx); + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->health_log, logsize, + command->prp2, (uint8_t *)&sc->health_log, + MIN(logsize, sizeof(sc->health_log)), NVME_COPY_TO_PRP); break; case NVME_LOG_FIRMWARE_SLOT: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->fw_log, logsize, + command->prp2, (uint8_t *)&sc->fw_log, + MIN(logsize, sizeof(sc->fw_log)), NVME_COPY_TO_PRP); break; default: - WPRINTF(("%s get log page %x command not supported", - __func__, logpage)); + DPRINTF("%s get log page %x command not supported", + __func__, logpage); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_LOG_PAGE); @@ -802,9 +1140,12 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { void *dest; + uint16_t status = 0; + + DPRINTF("%s identify 0x%x nsid 0x%x", __func__, + command->cdw10 & 0xFF, command->nsid); - DPRINTF(("%s identify 0x%x nsid 0x%x", __func__, - command->cdw10 & 0xFF, command->nsid)); + pci_nvme_status_genc(&status, NVME_SC_SUCCESS); switch (command->cdw10 & 0xFF) { case 0x00: /* return Identify Namespace data structure */ @@ -821,230 +1162,359 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(uint32_t) * 1024); + /* All unused entries shall be zero */ + bzero(dest, sizeof(uint32_t) * 1024); ((uint32_t *)dest)[0] = 1; - ((uint32_t *)dest)[1] = 0; break; - case 0x11: - pci_nvme_status_genc(&compl->status, - NVME_SC_INVALID_NAMESPACE_OR_FORMAT); - return (1); case 0x03: /* list of NSID structures in CDW1.NSID, 4096 bytes */ - case 0x10: - case 0x12: - case 0x13: - case 0x14: - case 0x15: + if (command->nsid != 1) { + pci_nvme_status_genc(&status, + NVME_SC_INVALID_NAMESPACE_OR_FORMAT); + break; + } + dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, + sizeof(uint32_t) * 1024); + /* All bytes after the descriptor shall be zero */ + bzero(dest, sizeof(uint32_t) * 1024); + + /* Return NIDT=1 (i.e. EUI64) descriptor */ + ((uint8_t *)dest)[0] = 1; + ((uint8_t *)dest)[1] = sizeof(uint64_t); + bcopy(sc->nsdata.eui64, ((uint8_t *)dest) + 4, sizeof(uint64_t)); + break; default: - DPRINTF(("%s unsupported identify command requested 0x%x", - __func__, command->cdw10 & 0xFF)); - pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); - return (1); + DPRINTF("%s unsupported identify command requested 0x%x", + __func__, command->cdw10 & 0xFF); + pci_nvme_status_genc(&status, NVME_SC_INVALID_FIELD); + break; } - pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); + compl->status = status; return (1); } -static int -nvme_set_feature_queues(struct pci_nvme_softc* sc, struct nvme_command* command, - struct nvme_completion* compl) +static const char * +nvme_fid_to_name(uint8_t fid) +{ + const char *name; + + switch (fid) { + case NVME_FEAT_ARBITRATION: + name = "Arbitration"; + break; + case NVME_FEAT_POWER_MANAGEMENT: + name = "Power Management"; + break; + case NVME_FEAT_LBA_RANGE_TYPE: + name = "LBA Range Type"; + break; + case NVME_FEAT_TEMPERATURE_THRESHOLD: + name = "Temperature Threshold"; + break; + case NVME_FEAT_ERROR_RECOVERY: + name = "Error Recovery"; + break; + case NVME_FEAT_VOLATILE_WRITE_CACHE: + name = "Volatile Write Cache"; + break; + case NVME_FEAT_NUMBER_OF_QUEUES: + name = "Number of Queues"; + break; + case NVME_FEAT_INTERRUPT_COALESCING: + name = "Interrupt Coalescing"; + break; + case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: + name = "Interrupt Vector Configuration"; + break; + case NVME_FEAT_WRITE_ATOMICITY: + name = "Write Atomicity Normal"; + break; + case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: + name = "Asynchronous Event Configuration"; + break; + case NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION: + name = "Autonomous Power State Transition"; + break; + case NVME_FEAT_HOST_MEMORY_BUFFER: + name = "Host Memory Buffer"; + break; + case NVME_FEAT_TIMESTAMP: + name = "Timestamp"; + break; + case NVME_FEAT_KEEP_ALIVE_TIMER: + name = "Keep Alive Timer"; + break; + case NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT: + name = "Host Controlled Thermal Management"; + break; + case NVME_FEAT_NON_OP_POWER_STATE_CONFIG: + name = "Non-Operation Power State Config"; + break; + case NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG: + name = "Read Recovery Level Config"; + break; + case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG: + name = "Predictable Latency Mode Config"; + break; + case NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW: + name = "Predictable Latency Mode Window"; + break; + case NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES: + name = "LBA Status Information Report Interval"; + break; + case NVME_FEAT_HOST_BEHAVIOR_SUPPORT: + name = "Host Behavior Support"; + break; + case NVME_FEAT_SANITIZE_CONFIG: + name = "Sanitize Config"; + break; + case NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION: + name = "Endurance Group Event Configuration"; + break; + case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: + name = "Software Progress Marker"; + break; + case NVME_FEAT_HOST_IDENTIFIER: + name = "Host Identifier"; + break; + case NVME_FEAT_RESERVATION_NOTIFICATION_MASK: + name = "Reservation Notification Mask"; + break; + case NVME_FEAT_RESERVATION_PERSISTENCE: + name = "Reservation Persistence"; + break; + case NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG: + name = "Namespace Write Protection Config"; + break; + default: + name = "Unknown"; + break; + } + + return (name); +} + +static void +nvme_feature_invalid_cb(struct pci_nvme_softc *sc, + struct nvme_feature_obj *feat, + struct nvme_command *command, + struct nvme_completion *compl) +{ + + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); +} + +static void +nvme_feature_iv_config(struct pci_nvme_softc *sc, + struct nvme_feature_obj *feat, + struct nvme_command *command, + struct nvme_completion *compl) +{ + uint32_t i; + uint32_t cdw11 = command->cdw11; + uint16_t iv; + bool cd; + + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + + iv = cdw11 & 0xffff; + cd = cdw11 & (1 << 16); + + if (iv > (sc->max_queues + 1)) { + return; + } + + /* No Interrupt Coalescing (i.e. not Coalescing Disable) for Admin Q */ + if ((iv == 0) && !cd) + return; + + /* Requested Interrupt Vector must be used by a CQ */ + for (i = 0; i < sc->num_cqueues + 1; i++) { + if (sc->compl_queues[i].intr_vec == iv) { + pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); + } + } + +} + +static void +nvme_feature_num_queues(struct pci_nvme_softc *sc, + struct nvme_feature_obj *feat, + struct nvme_command *command, + struct nvme_completion *compl) { uint16_t nqr; /* Number of Queues Requested */ + if (sc->num_q_is_set) { + WPRINTF("%s: Number of Queues already set", __func__); + pci_nvme_status_genc(&compl->status, + NVME_SC_COMMAND_SEQUENCE_ERROR); + return; + } + nqr = command->cdw11 & 0xFFFF; if (nqr == 0xffff) { - WPRINTF(("%s: Illegal NSQR value %#x", __func__, nqr)); + WPRINTF("%s: Illegal NSQR value %#x", __func__, nqr); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); - return (-1); + return; } sc->num_squeues = ONE_BASED(nqr); if (sc->num_squeues > sc->max_queues) { - DPRINTF(("NSQR=%u is greater than max %u", sc->num_squeues, - sc->max_queues)); + DPRINTF("NSQR=%u is greater than max %u", sc->num_squeues, + sc->max_queues); sc->num_squeues = sc->max_queues; } nqr = (command->cdw11 >> 16) & 0xFFFF; if (nqr == 0xffff) { - WPRINTF(("%s: Illegal NCQR value %#x", __func__, nqr)); + WPRINTF("%s: Illegal NCQR value %#x", __func__, nqr); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); - return (-1); + return; } sc->num_cqueues = ONE_BASED(nqr); if (sc->num_cqueues > sc->max_queues) { - DPRINTF(("NCQR=%u is greater than max %u", sc->num_cqueues, - sc->max_queues)); + DPRINTF("NCQR=%u is greater than max %u", sc->num_cqueues, + sc->max_queues); sc->num_cqueues = sc->max_queues; } + /* Patch the command value which will be saved on callback's return */ + command->cdw11 = NVME_FEATURE_NUM_QUEUES(sc); compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); - return (0); + sc->num_q_is_set = true; } static int -nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, - struct nvme_completion* compl) +nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command, + struct nvme_completion *compl) { - int feature = command->cdw10 & 0xFF; - uint32_t iv; + struct nvme_feature_obj *feat; + uint32_t nsid = command->nsid; + uint8_t fid = command->cdw10 & 0xFF; + + DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid)); + + if (fid >= NVME_FID_MAX) { + DPRINTF("%s invalid feature 0x%x", __func__, fid); + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (1); + } + feat = &sc->feat[fid]; + + if (!feat->namespace_specific && + !((nsid == 0) || (nsid == NVME_GLOBAL_NAMESPACE_TAG))) { + pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_FEATURE_NOT_NS_SPECIFIC); + return (1); + } - DPRINTF(("%s feature 0x%x", __func__, feature)); compl->cdw0 = 0; + pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); - switch (feature) { - case NVME_FEAT_ARBITRATION: - DPRINTF((" arbitration 0x%x", command->cdw11)); - break; - case NVME_FEAT_POWER_MANAGEMENT: - DPRINTF((" power management 0x%x", command->cdw11)); - break; - case NVME_FEAT_LBA_RANGE_TYPE: - DPRINTF((" lba range 0x%x", command->cdw11)); - break; - case NVME_FEAT_TEMPERATURE_THRESHOLD: - DPRINTF((" temperature threshold 0x%x", command->cdw11)); - break; - case NVME_FEAT_ERROR_RECOVERY: - DPRINTF((" error recovery 0x%x", command->cdw11)); - break; - case NVME_FEAT_VOLATILE_WRITE_CACHE: - DPRINTF((" volatile write cache 0x%x", command->cdw11)); - break; - case NVME_FEAT_NUMBER_OF_QUEUES: - nvme_set_feature_queues(sc, command, compl); - break; - case NVME_FEAT_INTERRUPT_COALESCING: - DPRINTF((" interrupt coalescing 0x%x", command->cdw11)); + if (feat->set) + feat->set(sc, feat, command, compl); - /* in uS */ - sc->intr_coales_aggr_time = ((command->cdw11 >> 8) & 0xFF)*100; + if (compl->status == NVME_SC_SUCCESS) + feat->cdw11 = command->cdw11; - sc->intr_coales_aggr_thresh = command->cdw11 & 0xFF; - break; - case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: - iv = command->cdw11 & 0xFFFF; - - DPRINTF((" interrupt vector configuration 0x%x", - command->cdw11)); - - for (uint32_t i = 0; i < sc->num_cqueues + 1; i++) { - if (sc->compl_queues[i].intr_vec == iv) { - if (command->cdw11 & (1 << 16)) - sc->compl_queues[i].intr_en |= - NVME_CQ_INTCOAL; - else - sc->compl_queues[i].intr_en &= - ~NVME_CQ_INTCOAL; - } - } - break; - case NVME_FEAT_WRITE_ATOMICITY: - DPRINTF((" write atomicity 0x%x", command->cdw11)); - break; - case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: - DPRINTF((" async event configuration 0x%x", - command->cdw11)); - sc->async_ev_config = command->cdw11; - break; - case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: - DPRINTF((" software progress marker 0x%x", - command->cdw11)); - break; - case 0x0C: - DPRINTF((" autonomous power state transition 0x%x", - command->cdw11)); - break; - default: - WPRINTF(("%s invalid feature", __func__)); + return (0); +} + +static int +nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, + struct nvme_completion* compl) +{ + struct nvme_feature_obj *feat; + uint8_t fid = command->cdw10 & 0xFF; + + DPRINTF("%s: Feature ID 0x%x (%s)", __func__, fid, nvme_fid_to_name(fid)); + + if (fid >= NVME_FID_MAX) { + DPRINTF("%s invalid feature 0x%x", __func__, fid); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } + compl->cdw0 = 0; pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); - return (1); + + feat = &sc->feat[fid]; + if (feat->get) { + feat->get(sc, feat, command, compl); + } + + if (compl->status == NVME_SC_SUCCESS) { + compl->cdw0 = feat->cdw11; + } + + return (0); } static int -nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, +nvme_opc_format_nvm(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - int feature = command->cdw10 & 0xFF; + uint8_t ses, lbaf, pi; - DPRINTF(("%s feature 0x%x", __func__, feature)); + /* Only supports Secure Erase Setting - User Data Erase */ + ses = (command->cdw10 >> 9) & 0x7; + if (ses > 0x1) { + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (1); + } - compl->cdw0 = 0; + /* Only supports a single LBA Format */ + lbaf = command->cdw10 & 0xf; + if (lbaf != 0) { + pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_INVALID_FORMAT); + return (1); + } - switch (feature) { - case NVME_FEAT_ARBITRATION: - DPRINTF((" arbitration")); - break; - case NVME_FEAT_POWER_MANAGEMENT: - DPRINTF((" power management")); - break; - case NVME_FEAT_LBA_RANGE_TYPE: - DPRINTF((" lba range")); - break; - case NVME_FEAT_TEMPERATURE_THRESHOLD: - DPRINTF((" temperature threshold")); - switch ((command->cdw11 >> 20) & 0x3) { - case 0: - /* Over temp threshold */ - compl->cdw0 = 0xFFFF; - break; - case 1: - /* Under temp threshold */ - compl->cdw0 = 0; - break; - default: - WPRINTF((" invalid threshold type select")); + /* Doesn't support Protection Infomation */ + pi = (command->cdw10 >> 5) & 0x7; + if (pi != 0) { + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (1); + } + + if (sc->nvstore.type == NVME_STOR_RAM) { + if (sc->nvstore.ctx) + free(sc->nvstore.ctx); + sc->nvstore.ctx = calloc(1, sc->nvstore.size); + pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); + } else { + struct pci_nvme_ioreq *req; + int err; + + req = pci_nvme_get_ioreq(sc); + if (req == NULL) { pci_nvme_status_genc(&compl->status, - NVME_SC_INVALID_FIELD); + NVME_SC_INTERNAL_DEVICE_ERROR); + WPRINTF("%s: unable to allocate IO req", __func__); return (1); } - break; - case NVME_FEAT_ERROR_RECOVERY: - DPRINTF((" error recovery")); - break; - case NVME_FEAT_VOLATILE_WRITE_CACHE: - DPRINTF((" volatile write cache")); - break; - case NVME_FEAT_NUMBER_OF_QUEUES: - compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); - - DPRINTF((" number of queues (submit %u, completion %u)", - compl->cdw0 & 0xFFFF, - (compl->cdw0 >> 16) & 0xFFFF)); + req->nvme_sq = &sc->submit_queues[0]; + req->sqid = 0; + req->opc = command->opc; + req->cid = command->cid; + req->nsid = command->nsid; + + req->io_req.br_offset = 0; + req->io_req.br_resid = sc->nvstore.size; + req->io_req.br_callback = pci_nvme_io_done; - break; - case NVME_FEAT_INTERRUPT_COALESCING: - DPRINTF((" interrupt coalescing")); - break; - case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: - DPRINTF((" interrupt vector configuration")); - break; - case NVME_FEAT_WRITE_ATOMICITY: - DPRINTF((" write atomicity")); - break; - case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: - DPRINTF((" async event configuration")); - sc->async_ev_config = command->cdw11; - break; - case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: - DPRINTF((" software progress marker")); - break; - case 0x0C: - DPRINTF((" autonomous power state transition")); - break; - default: - WPRINTF(("%s invalid feature 0x%x", __func__, feature)); - pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); - return (1); + err = blockif_delete(sc->nvstore.ctx, &req->io_req); + if (err) { + pci_nvme_status_genc(&compl->status, + NVME_SC_INTERNAL_DEVICE_ERROR); + pci_nvme_release_ioreq(sc, req); + } } - pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); return (1); } @@ -1052,8 +1522,8 @@ static int nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - DPRINTF(("%s submission queue %u, command ID 0x%x", __func__, - command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF)); + DPRINTF("%s submission queue %u, command ID 0x%x", __func__, + command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF); /* TODO: search for the command ID and abort it */ @@ -1062,25 +1532,34 @@ nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, return (1); } -#ifdef __FreeBSD__ static int nvme_opc_async_event_req(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - DPRINTF(("%s async event request 0x%x", __func__, command->cdw11)); + DPRINTF("%s async event request 0x%x", __func__, command->cdw11); + + /* Don't exceed the Async Event Request Limit (AERL). */ + if (pci_nvme_aer_limit_reached(sc)) { + pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); + return (1); + } + + if (pci_nvme_aer_add(sc, command->cid)) { + pci_nvme_status_tc(&compl->status, NVME_SCT_GENERIC, + NVME_SC_INTERNAL_DEVICE_ERROR); + return (1); + } /* - * TODO: raise events when they happen based on the Set Features cmd. + * Raise events when they happen based on the Set Features cmd. * These events happen async, so only set completion successful if * there is an event reflective of the request to get event. */ - pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, - NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); + compl->status = NVME_NO_STATUS; + return (0); } -#else -/* This is kept behind an ifdef while it's unused to appease the compiler. */ -#endif /* __FreeBSD__ */ static void pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) @@ -1091,20 +1570,15 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) struct nvme_completion_queue *cq; uint16_t sqhead; - DPRINTF(("%s index %u", __func__, (uint32_t)value)); + DPRINTF("%s index %u", __func__, (uint32_t)value); sq = &sc->submit_queues[0]; cq = &sc->compl_queues[0]; - sqhead = atomic_load_acq_short(&sq->head); + pthread_mutex_lock(&sq->mtx); - if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s SQ busy, head %u, tail %u", - __func__, sqhead, sq->tail)); - return; - } - - DPRINTF(("sqhead %u, tail %u", sqhead, sq->tail)); + sqhead = sq->head; + DPRINTF("sqhead %u, tail %u", sqhead, sq->tail); while (sqhead != atomic_load_acq_short(&sq->tail)) { cmd = &(sq->qbase)[sqhead]; @@ -1113,205 +1587,226 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) switch (cmd->opc) { case NVME_OPC_DELETE_IO_SQ: - DPRINTF(("%s command DELETE_IO_SQ", __func__)); + DPRINTF("%s command DELETE_IO_SQ", __func__); nvme_opc_delete_io_sq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_SQ: - DPRINTF(("%s command CREATE_IO_SQ", __func__)); + DPRINTF("%s command CREATE_IO_SQ", __func__); nvme_opc_create_io_sq(sc, cmd, &compl); break; case NVME_OPC_DELETE_IO_CQ: - DPRINTF(("%s command DELETE_IO_CQ", __func__)); + DPRINTF("%s command DELETE_IO_CQ", __func__); nvme_opc_delete_io_cq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_CQ: - DPRINTF(("%s command CREATE_IO_CQ", __func__)); + DPRINTF("%s command CREATE_IO_CQ", __func__); nvme_opc_create_io_cq(sc, cmd, &compl); break; case NVME_OPC_GET_LOG_PAGE: - DPRINTF(("%s command GET_LOG_PAGE", __func__)); + DPRINTF("%s command GET_LOG_PAGE", __func__); nvme_opc_get_log_page(sc, cmd, &compl); break; case NVME_OPC_IDENTIFY: - DPRINTF(("%s command IDENTIFY", __func__)); + DPRINTF("%s command IDENTIFY", __func__); nvme_opc_identify(sc, cmd, &compl); break; case NVME_OPC_ABORT: - DPRINTF(("%s command ABORT", __func__)); + DPRINTF("%s command ABORT", __func__); nvme_opc_abort(sc, cmd, &compl); break; case NVME_OPC_SET_FEATURES: - DPRINTF(("%s command SET_FEATURES", __func__)); + DPRINTF("%s command SET_FEATURES", __func__); nvme_opc_set_features(sc, cmd, &compl); break; case NVME_OPC_GET_FEATURES: - DPRINTF(("%s command GET_FEATURES", __func__)); + DPRINTF("%s command GET_FEATURES", __func__); nvme_opc_get_features(sc, cmd, &compl); break; + case NVME_OPC_FIRMWARE_ACTIVATE: + DPRINTF("%s command FIRMWARE_ACTIVATE", __func__); + pci_nvme_status_tc(&compl.status, + NVME_SCT_COMMAND_SPECIFIC, + NVME_SC_INVALID_FIRMWARE_SLOT); + break; case NVME_OPC_ASYNC_EVENT_REQUEST: - DPRINTF(("%s command ASYNC_EVENT_REQ", __func__)); - /* XXX dont care, unhandled for now + DPRINTF("%s command ASYNC_EVENT_REQ", __func__); nvme_opc_async_event_req(sc, cmd, &compl); - */ + break; + case NVME_OPC_FORMAT_NVM: + DPRINTF("%s command FORMAT_NVM", __func__); + if ((sc->ctrldata.oacs & + (1 << NVME_CTRLR_DATA_OACS_FORMAT_SHIFT)) == 0) { + pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); + } compl.status = NVME_NO_STATUS; + nvme_opc_format_nvm(sc, cmd, &compl); break; default: - WPRINTF(("0x%x command is not implemented", - cmd->opc)); + DPRINTF("0x%x command is not implemented", + cmd->opc); pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); } sqhead = (sqhead + 1) % sq->size; if (NVME_COMPLETION_VALID(compl)) { - struct nvme_completion *cp; - int phase; - - cp = &(cq->qbase)[cq->tail]; - cp->cdw0 = compl.cdw0; - cp->sqid = 0; - cp->sqhd = sqhead; - cp->cid = cmd->cid; - - phase = NVME_STATUS_GET_P(cp->status); - cp->status = compl.status; - pci_nvme_toggle_phase(&cp->status, phase); - - cq->tail = (cq->tail + 1) % cq->size; + pci_nvme_cq_update(sc, &sc->compl_queues[0], + compl.cdw0, + cmd->cid, + 0, /* SQID */ + compl.status); } } - DPRINTF(("setting sqhead %u", sqhead)); - atomic_store_short(&sq->head, sqhead); - atomic_store_int(&sq->busy, 0); + DPRINTF("setting sqhead %u", sqhead); + sq->head = sqhead; if (cq->head != cq->tail) pci_generate_msix(sc->nsc_pi, 0); + pthread_mutex_unlock(&sq->mtx); } -static int -pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, - uint64_t gpaddr, size_t size, int do_write, uint64_t lba) +/* + * Update the Write and Read statistics reported in SMART data + * + * NVMe defines "data unit" as thousand's of 512 byte blocks and is rounded up. + * E.g. 1 data unit is 1 - 1,000 512 byte blocks. 3 data units are 2,001 - 3,000 + * 512 byte blocks. Rounding up is acheived by initializing the remainder to 999. + */ +static void +pci_nvme_stats_write_read_update(struct pci_nvme_softc *sc, uint8_t opc, + size_t bytes, uint16_t status) { - int iovidx; - if (req != NULL) { - /* concatenate contig block-iovs to minimize number of iovs */ - if ((req->prev_gpaddr + req->prev_size) == gpaddr) { - iovidx = req->io_req.br_iovcnt - 1; + pthread_mutex_lock(&sc->mtx); + switch (opc) { + case NVME_OPC_WRITE: + sc->write_commands++; + if (status != NVME_SC_SUCCESS) + break; + sc->write_dunits_remainder += (bytes / 512); + while (sc->write_dunits_remainder >= 1000) { + sc->write_data_units++; + sc->write_dunits_remainder -= 1000; + } + break; + case NVME_OPC_READ: + sc->read_commands++; + if (status != NVME_SC_SUCCESS) + break; + sc->read_dunits_remainder += (bytes / 512); + while (sc->read_dunits_remainder >= 1000) { + sc->read_data_units++; + sc->read_dunits_remainder -= 1000; + } + break; + default: + DPRINTF("%s: Invalid OPC 0x%02x for stats", __func__, opc); + break; + } + pthread_mutex_unlock(&sc->mtx); +} - req->io_req.br_iov[iovidx].iov_base = - paddr_guest2host(req->sc->nsc_pi->pi_vmctx, - req->prev_gpaddr, size); +/* + * Check if the combination of Starting LBA (slba) and Number of Logical + * Blocks (nlb) exceeds the range of the underlying storage. + * + * Because NVMe specifies the SLBA in blocks as a uint64_t and blockif stores + * the capacity in bytes as a uint64_t, care must be taken to avoid integer + * overflow. + */ +static bool +pci_nvme_out_of_range(struct pci_nvme_blockstore *nvstore, uint64_t slba, + uint32_t nlb) +{ + size_t offset, bytes; - req->prev_size += size; - req->io_req.br_resid += size; + /* Overflow check of multiplying Starting LBA by the sector size */ + if (slba >> (64 - nvstore->sectsz_bits)) + return (true); - req->io_req.br_iov[iovidx].iov_len = req->prev_size; - } else { - pthread_mutex_lock(&req->mtx); + offset = slba << nvstore->sectsz_bits; + bytes = nlb << nvstore->sectsz_bits; - iovidx = req->io_req.br_iovcnt; - if (iovidx == NVME_MAX_BLOCKIOVS) { - int err = 0; + /* Overflow check of Number of Logical Blocks */ + if ((nvstore->size - offset) < bytes) + return (true); - DPRINTF(("large I/O, doing partial req")); + return (false); +} - iovidx = 0; - req->io_req.br_iovcnt = 0; +static int +pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, + uint64_t gpaddr, size_t size, int do_write, uint64_t lba) +{ + int iovidx; - req->io_req.br_callback = pci_nvme_io_partial; + if (req == NULL) + return (-1); - if (!do_write) - err = blockif_read(sc->nvstore.ctx, - &req->io_req); - else - err = blockif_write(sc->nvstore.ctx, - &req->io_req); + if (req->io_req.br_iovcnt == NVME_MAX_IOVEC) { + return (-1); + } - /* wait until req completes before cont */ - if (err == 0) - pthread_cond_wait(&req->cv, &req->mtx); - } - if (iovidx == 0) { - req->io_req.br_offset = lba; - req->io_req.br_resid = 0; - req->io_req.br_param = req; - } + /* concatenate contig block-iovs to minimize number of iovs */ + if ((req->prev_gpaddr + req->prev_size) == gpaddr) { + iovidx = req->io_req.br_iovcnt - 1; - req->io_req.br_iov[iovidx].iov_base = - paddr_guest2host(req->sc->nsc_pi->pi_vmctx, - gpaddr, size); + req->io_req.br_iov[iovidx].iov_base = + paddr_guest2host(req->sc->nsc_pi->pi_vmctx, + req->prev_gpaddr, size); - req->io_req.br_iov[iovidx].iov_len = size; + req->prev_size += size; + req->io_req.br_resid += size; - req->prev_gpaddr = gpaddr; - req->prev_size = size; - req->io_req.br_resid += size; + req->io_req.br_iov[iovidx].iov_len = req->prev_size; + } else { + iovidx = req->io_req.br_iovcnt; + if (iovidx == 0) { + req->io_req.br_offset = lba; + req->io_req.br_resid = 0; + req->io_req.br_param = req; + } - req->io_req.br_iovcnt++; + req->io_req.br_iov[iovidx].iov_base = + paddr_guest2host(req->sc->nsc_pi->pi_vmctx, + gpaddr, size); - pthread_mutex_unlock(&req->mtx); - } - } else { - /* RAM buffer: read/write directly */ - void *p = sc->nvstore.ctx; - void *gptr; + req->io_req.br_iov[iovidx].iov_len = size; - if ((lba + size) > sc->nvstore.size) { - WPRINTF(("%s write would overflow RAM", __func__)); - return (-1); - } + req->prev_gpaddr = gpaddr; + req->prev_size = size; + req->io_req.br_resid += size; - p = (void *)((uintptr_t)p + (uintptr_t)lba); - gptr = paddr_guest2host(sc->nsc_pi->pi_vmctx, gpaddr, size); - if (do_write) - memcpy(p, gptr, size); - else - memcpy(gptr, p, size); + req->io_req.br_iovcnt++; } + return (0); } static void pci_nvme_set_completion(struct pci_nvme_softc *sc, struct nvme_submission_queue *sq, int sqid, uint16_t cid, - uint32_t cdw0, uint16_t status, int ignore_busy) + uint32_t cdw0, uint16_t status) { struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; - struct nvme_completion *compl; - int phase; - DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x", + DPRINTF("%s sqid %d cqid %u cid %u status: 0x%x 0x%x", __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), - NVME_STATUS_GET_SC(status))); - - pthread_mutex_lock(&cq->mtx); - - assert(cq->qbase != NULL); - - compl = &cq->qbase[cq->tail]; - - compl->cdw0 = cdw0; - compl->sqid = sqid; - compl->sqhd = atomic_load_acq_short(&sq->head); - compl->cid = cid; - - // toggle phase - phase = NVME_STATUS_GET_P(compl->status); - compl->status = status; - pci_nvme_toggle_phase(&compl->status, phase); + NVME_STATUS_GET_SC(status)); - cq->tail = (cq->tail + 1) % cq->size; - - pthread_mutex_unlock(&cq->mtx); + pci_nvme_cq_update(sc, cq, + 0, /* CDW0 */ + cid, + sqid, + status); if (cq->head != cq->tail) { if (cq->intr_en & NVME_CQ_INTEN) { pci_generate_msix(sc->nsc_pi, cq->intr_vec); } else { - DPRINTF(("%s: CQ%u interrupt disabled\n", - __func__, sq->cqid)); + DPRINTF("%s: CQ%u interrupt disabled", + __func__, sq->cqid); } } } @@ -1373,24 +1868,211 @@ pci_nvme_io_done(struct blockif_req *br, int err) struct nvme_submission_queue *sq = req->nvme_sq; uint16_t code, status = 0; - DPRINTF(("%s error %d %s", __func__, err, strerror(err))); + DPRINTF("%s error %d %s", __func__, err, strerror(err)); /* TODO return correct error */ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); - pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status, 0); + pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, 0, status); + pci_nvme_stats_write_read_update(req->sc, req->opc, + req->bytes, status); pci_nvme_release_ioreq(req->sc, req); } -static void -pci_nvme_io_partial(struct blockif_req *br, int err) +/* + * Implements the Flush command. The specification states: + * If a volatile write cache is not present, Flush commands complete + * successfully and have no effect + * in the description of the Volatile Write Cache (VWC) field of the Identify + * Controller data. Therefore, set status to Success if the command is + * not supported (i.e. RAM or as indicated by the blockif). + */ +static bool +nvme_opc_flush(struct pci_nvme_softc *sc, + struct nvme_command *cmd, + struct pci_nvme_blockstore *nvstore, + struct pci_nvme_ioreq *req, + uint16_t *status) { - struct pci_nvme_ioreq *req = br->br_param; + bool pending = false; + + if (nvstore->type == NVME_STOR_RAM) { + pci_nvme_status_genc(status, NVME_SC_SUCCESS); + } else { + int err; + + req->io_req.br_callback = pci_nvme_io_done; + + err = blockif_flush(nvstore->ctx, &req->io_req); + switch (err) { + case 0: + pending = true; + break; + case EOPNOTSUPP: + pci_nvme_status_genc(status, NVME_SC_SUCCESS); + break; + default: + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + } + } - DPRINTF(("%s error %d %s", __func__, err, strerror(err))); + return (pending); +} + +static uint16_t +nvme_write_read_ram(struct pci_nvme_softc *sc, + struct pci_nvme_blockstore *nvstore, + uint64_t prp1, uint64_t prp2, + size_t offset, uint64_t bytes, + bool is_write) +{ + uint8_t *buf = nvstore->ctx; + enum nvme_copy_dir dir; + uint16_t status = 0; + + if (is_write) + dir = NVME_COPY_TO_PRP; + else + dir = NVME_COPY_FROM_PRP; - pthread_cond_signal(&req->cv); + if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, prp1, prp2, + buf + offset, bytes, dir)) + pci_nvme_status_genc(&status, + NVME_SC_DATA_TRANSFER_ERROR); + else + pci_nvme_status_genc(&status, NVME_SC_SUCCESS); + + return (status); +} + +static uint16_t +nvme_write_read_blockif(struct pci_nvme_softc *sc, + struct pci_nvme_blockstore *nvstore, + struct pci_nvme_ioreq *req, + uint64_t prp1, uint64_t prp2, + size_t offset, uint64_t bytes, + bool is_write) +{ + uint64_t size; + int err; + uint16_t status = NVME_NO_STATUS; + + size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes); + if (pci_nvme_append_iov_req(sc, req, prp1, + size, is_write, offset)) { + pci_nvme_status_genc(&status, + NVME_SC_DATA_TRANSFER_ERROR); + goto out; + } + + offset += size; + bytes -= size; + + if (bytes == 0) { + ; + } else if (bytes <= PAGE_SIZE) { + size = bytes; + if (pci_nvme_append_iov_req(sc, req, prp2, + size, is_write, offset)) { + pci_nvme_status_genc(&status, + NVME_SC_DATA_TRANSFER_ERROR); + goto out; + } + } else { + void *vmctx = sc->nsc_pi->pi_vmctx; + uint64_t *prp_list = &prp2; + uint64_t *last = prp_list; + + /* PRP2 is pointer to a physical region page list */ + while (bytes) { + /* Last entry in list points to the next list */ + if (prp_list == last) { + uint64_t prp = *prp_list; + + prp_list = paddr_guest2host(vmctx, prp, + PAGE_SIZE - (prp % PAGE_SIZE)); + last = prp_list + (NVME_PRP2_ITEMS - 1); + } + + size = MIN(bytes, PAGE_SIZE); + + if (pci_nvme_append_iov_req(sc, req, *prp_list, + size, is_write, offset)) { + pci_nvme_status_genc(&status, + NVME_SC_DATA_TRANSFER_ERROR); + goto out; + } + + offset += size; + bytes -= size; + + prp_list++; + } + } + req->io_req.br_callback = pci_nvme_io_done; + if (is_write) + err = blockif_write(nvstore->ctx, &req->io_req); + else + err = blockif_read(nvstore->ctx, &req->io_req); + + if (err) + pci_nvme_status_genc(&status, NVME_SC_DATA_TRANSFER_ERROR); +out: + return (status); +} + +static bool +nvme_opc_write_read(struct pci_nvme_softc *sc, + struct nvme_command *cmd, + struct pci_nvme_blockstore *nvstore, + struct pci_nvme_ioreq *req, + uint16_t *status) +{ + uint64_t lba, nblocks, bytes = 0; + size_t offset; + bool is_write = cmd->opc == NVME_OPC_WRITE; + bool pending = false; + + lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; + nblocks = (cmd->cdw12 & 0xFFFF) + 1; + if (pci_nvme_out_of_range(nvstore, lba, nblocks)) { + WPRINTF("%s command would exceed LBA range", __func__); + pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); + goto out; + } + + bytes = nblocks << nvstore->sectsz_bits; + if (bytes > NVME_MAX_DATA_SIZE) { + WPRINTF("%s command would exceed MDTS", __func__); + pci_nvme_status_genc(status, NVME_SC_INVALID_FIELD); + goto out; + } + + offset = lba << nvstore->sectsz_bits; + + req->bytes = bytes; + req->io_req.br_offset = lba; + + /* PRP bits 1:0 must be zero */ + cmd->prp1 &= ~0x3UL; + cmd->prp2 &= ~0x3UL; + + if (nvstore->type == NVME_STOR_RAM) { + *status = nvme_write_read_ram(sc, nvstore, cmd->prp1, + cmd->prp2, offset, bytes, is_write); + } else { + *status = nvme_write_read_blockif(sc, nvstore, req, + cmd->prp1, cmd->prp2, offset, bytes, is_write); + + if (*status == NVME_NO_STATUS) + pending = true; + } +out: + if (!pending) + pci_nvme_stats_write_read_update(sc, cmd->opc, bytes, *status); + + return (pending); } static void @@ -1427,29 +2109,54 @@ pci_nvme_dealloc_sm(struct blockif_req *br, int err) if (done) { pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, - req->cid, 0, status, 0); + req->cid, 0, status); pci_nvme_release_ioreq(sc, req); } } -static int +static bool nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, struct nvme_command *cmd, struct pci_nvme_blockstore *nvstore, struct pci_nvme_ioreq *req, uint16_t *status) { - int err = -1; + struct nvme_dsm_range *range = NULL; + uint32_t nr, r, non_zero, dr; + int err; + bool pending = false; if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) { pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE); goto out; } + nr = cmd->cdw10 & 0xff; + + /* copy locally because a range entry could straddle PRPs */ + range = calloc(1, NVME_MAX_DSM_TRIM); + if (range == NULL) { + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + goto out; + } + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2, + (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP); + + /* Check for invalid ranges and the number of non-zero lengths */ + non_zero = 0; + for (r = 0; r <= nr; r++) { + if (pci_nvme_out_of_range(nvstore, + range[r].starting_lba, range[r].length)) { + pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE); + goto out; + } + if (range[r].length != 0) + non_zero++; + } + if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) { - struct nvme_dsm_range *range; - uint32_t nr, r; - int sectsz = sc->nvstore.sectsz; + size_t offset, bytes; + int sectsz_bits = sc->nvstore.sectsz_bits; /* * DSM calls are advisory only, and compliant controllers @@ -1460,23 +2167,20 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, goto out; } - if (req == NULL) { - pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + /* If all ranges have a zero length, return Success */ + if (non_zero == 0) { + pci_nvme_status_genc(status, NVME_SC_SUCCESS); goto out; } - /* copy locally because a range entry could straddle PRPs */ - range = calloc(1, NVME_MAX_DSM_TRIM); - if (range == NULL) { + if (req == NULL) { pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); goto out; } - nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2, - (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP); - req->opc = cmd->opc; - req->cid = cmd->cid; - req->nsid = cmd->nsid; + offset = range[0].starting_lba << sectsz_bits; + bytes = range[0].length << sectsz_bits; + /* * If the request is for more than a single range, store * the ranges in the br_iov. Optimize for the common case @@ -1484,20 +2188,29 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, * * Note that NVMe Number of Ranges is a zero based value */ - nr = cmd->cdw10 & 0xff; - req->io_req.br_iovcnt = 0; - req->io_req.br_offset = range[0].starting_lba * sectsz; - req->io_req.br_resid = range[0].length * sectsz; + req->io_req.br_offset = offset; + req->io_req.br_resid = bytes; if (nr == 0) { req->io_req.br_callback = pci_nvme_io_done; } else { struct iovec *iov = req->io_req.br_iov; - for (r = 0; r <= nr; r++) { - iov[r].iov_base = (void *)(range[r].starting_lba * sectsz); - iov[r].iov_len = range[r].length * sectsz; + for (r = 0, dr = 0; r <= nr; r++) { + offset = range[r].starting_lba << sectsz_bits; + bytes = range[r].length << sectsz_bits; + if (bytes == 0) + continue; + + if ((nvstore->size - offset) < bytes) { + pci_nvme_status_genc(status, + NVME_SC_LBA_OUT_OF_RANGE); + goto out; + } + iov[dr].iov_base = (void *)offset; + iov[dr].iov_len = bytes; + dr++; } req->io_req.br_callback = pci_nvme_dealloc_sm; @@ -1506,17 +2219,18 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, * prev_size to track the number of entries */ req->prev_gpaddr = 0; - req->prev_size = r; + req->prev_size = dr; } err = blockif_delete(nvstore->ctx, &req->io_req); if (err) pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); - - free(range); + else + pending = true; } out: - return (err); + free(range); + return (pending); } static void @@ -1525,221 +2239,105 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) struct nvme_submission_queue *sq; uint16_t status = 0; uint16_t sqhead; - int err; /* handle all submissions up to sq->tail index */ sq = &sc->submit_queues[idx]; - if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s sqid %u busy", __func__, idx)); - return; - } + pthread_mutex_lock(&sq->mtx); - sqhead = atomic_load_acq_short(&sq->head); - - DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p", - idx, sqhead, sq->tail, sq->qbase)); + sqhead = sq->head; + DPRINTF("nvme_handle_io qid %u head %u tail %u cmdlist %p", + idx, sqhead, sq->tail, sq->qbase); while (sqhead != atomic_load_acq_short(&sq->tail)) { struct nvme_command *cmd; - struct pci_nvme_ioreq *req = NULL; - uint64_t lba; - uint64_t nblocks, bytes, size, cpsz; + struct pci_nvme_ioreq *req; + uint32_t nsid; + bool pending; - /* TODO: support scatter gather list handling */ + pending = false; + req = NULL; + status = 0; cmd = &sq->qbase[sqhead]; sqhead = (sqhead + 1) % sq->size; - lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10; - - if (cmd->opc == NVME_OPC_FLUSH) { - pci_nvme_status_genc(&status, NVME_SC_SUCCESS); - pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); - - continue; - } else if (cmd->opc == 0x08) { - /* TODO: write zeroes */ - WPRINTF(("%s write zeroes lba 0x%lx blocks %u", - __func__, lba, cmd->cdw12 & 0xFFFF)); - pci_nvme_status_genc(&status, NVME_SC_SUCCESS); - pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); - - continue; - } - - if (sc->nvstore.type == NVME_STOR_BLOCKIF) { - req = pci_nvme_get_ioreq(sc); - req->nvme_sq = sq; - req->sqid = idx; - } - - if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) { - if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req, - &status)) { - pci_nvme_set_completion(sc, sq, idx, cmd->cid, - 0, status, 1); - if (req) - pci_nvme_release_ioreq(sc, req); - } - continue; - } - - nblocks = (cmd->cdw12 & 0xFFFF) + 1; - - bytes = nblocks * sc->nvstore.sectsz; - - /* - * If data starts mid-page and flows into the next page, then - * increase page count - */ - - DPRINTF(("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu " - "(%lu-bytes)", - sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size, - cmd->opc == NVME_OPC_WRITE ? - "WRITE" : "READ", - lba, nblocks, bytes)); - - cmd->prp1 &= ~(0x03UL); - cmd->prp2 &= ~(0x03UL); - - DPRINTF((" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2)); - - size = bytes; - lba *= sc->nvstore.sectsz; - - cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE); - - if (cpsz > bytes) - cpsz = bytes; - - if (req != NULL) { - req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) | - cmd->cdw10; - req->opc = cmd->opc; - req->cid = cmd->cid; - req->nsid = cmd->nsid; - } - - err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz, - cmd->opc == NVME_OPC_WRITE, lba); - lba += cpsz; - size -= cpsz; - - if (size == 0) - goto iodone; - - if (size <= PAGE_SIZE) { - /* prp2 is second (and final) page in transfer */ - - err = pci_nvme_append_iov_req(sc, req, cmd->prp2, - size, - cmd->opc == NVME_OPC_WRITE, - lba); - } else { - uint64_t *prp_list; - int i; - - /* prp2 is pointer to a physical region page list */ - prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx, - cmd->prp2, PAGE_SIZE); - - i = 0; - while (size != 0) { - cpsz = MIN(size, PAGE_SIZE); - - /* - * Move to linked physical region page list - * in last item. - */ - if (i == (NVME_PRP2_ITEMS-1) && - size > PAGE_SIZE) { - assert((prp_list[i] & (PAGE_SIZE-1)) == 0); - prp_list = paddr_guest2host( - sc->nsc_pi->pi_vmctx, - prp_list[i], PAGE_SIZE); - i = 0; - } - if (prp_list[i] == 0) { - WPRINTF(("PRP2[%d] = 0 !!!", i)); - err = 1; - break; - } - - err = pci_nvme_append_iov_req(sc, req, - prp_list[i], cpsz, - cmd->opc == NVME_OPC_WRITE, lba); - if (err) - break; - - lba += cpsz; - size -= cpsz; - i++; - } - } - -iodone: - if (sc->nvstore.type == NVME_STOR_RAM) { - uint16_t code, status = 0; - - code = err ? NVME_SC_LBA_OUT_OF_RANGE : - NVME_SC_SUCCESS; - pci_nvme_status_genc(&status, code); - - pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); + nsid = le32toh(cmd->nsid); + if ((nsid == 0) || (nsid > sc->ctrldata.nn)) { + pci_nvme_status_genc(&status, + NVME_SC_INVALID_NAMESPACE_OR_FORMAT); + status |= + NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT; + goto complete; + } - continue; + req = pci_nvme_get_ioreq(sc); + if (req == NULL) { + pci_nvme_status_genc(&status, + NVME_SC_INTERNAL_DEVICE_ERROR); + WPRINTF("%s: unable to allocate IO req", __func__); + goto complete; } + req->nvme_sq = sq; + req->sqid = idx; + req->opc = cmd->opc; + req->cid = cmd->cid; + req->nsid = cmd->nsid; - - if (err) - goto do_error; - - req->io_req.br_callback = pci_nvme_io_done; - - err = 0; switch (cmd->opc) { + case NVME_OPC_FLUSH: + pending = nvme_opc_flush(sc, cmd, &sc->nvstore, + req, &status); + break; + case NVME_OPC_WRITE: case NVME_OPC_READ: - err = blockif_read(sc->nvstore.ctx, &req->io_req); + pending = nvme_opc_write_read(sc, cmd, &sc->nvstore, + req, &status); break; - case NVME_OPC_WRITE: - err = blockif_write(sc->nvstore.ctx, &req->io_req); + case NVME_OPC_WRITE_ZEROES: + /* TODO: write zeroes + WPRINTF("%s write zeroes lba 0x%lx blocks %u", + __func__, lba, cmd->cdw12 & 0xFFFF); */ + pci_nvme_status_genc(&status, NVME_SC_SUCCESS); break; - default: - WPRINTF(("%s unhandled io command 0x%x", - __func__, cmd->opc)); - err = 1; + case NVME_OPC_DATASET_MANAGEMENT: + pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, + req, &status); + break; + default: + WPRINTF("%s unhandled io command 0x%x", + __func__, cmd->opc); + pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE); } - -do_error: - if (err) { - uint16_t status = 0; - - pci_nvme_status_genc(&status, - NVME_SC_DATA_TRANSFER_ERROR); - +complete: + if (!pending) { pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, - status, 1); - pci_nvme_release_ioreq(sc, req); + status); + if (req != NULL) + pci_nvme_release_ioreq(sc, req); } } - atomic_store_short(&sq->head, sqhead); - atomic_store_int(&sq->busy, 0); + sq->head = sqhead; + + pthread_mutex_unlock(&sq->mtx); } static void pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t idx, int is_sq, uint64_t value) { - DPRINTF(("nvme doorbell %lu, %s, val 0x%lx", - idx, is_sq ? "SQ" : "CQ", value & 0xFFFF)); + DPRINTF("nvme doorbell %lu, %s, val 0x%lx", + idx, is_sq ? "SQ" : "CQ", value & 0xFFFF); if (is_sq) { + if (idx > sc->num_squeues) { + WPRINTF("%s queue index %lu overflow from " + "guest (max %u)", + __func__, idx, sc->num_squeues); + return; + } + atomic_store_short(&sc->submit_queues[idx].tail, (uint16_t)value); @@ -1748,22 +2346,23 @@ pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, } else { /* submission queue; handle new entries in SQ */ if (idx > sc->num_squeues) { - WPRINTF(("%s SQ index %lu overflow from " + WPRINTF("%s SQ index %lu overflow from " "guest (max %u)", - __func__, idx, sc->num_squeues)); + __func__, idx, sc->num_squeues); return; } pci_nvme_handle_io_cmd(sc, (uint16_t)idx); } } else { if (idx > sc->num_cqueues) { - WPRINTF(("%s queue index %lu overflow from " + WPRINTF("%s queue index %lu overflow from " "guest (max %u)", - __func__, idx, sc->num_cqueues)); + __func__, idx, sc->num_cqueues); return; } - sc->compl_queues[idx].head = (uint16_t)value; + atomic_store_short(&sc->compl_queues[idx].head, + (uint16_t)value); } } @@ -1774,46 +2373,46 @@ pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite) switch (offset) { case NVME_CR_CAP_LOW: - DPRINTF(("%s %s NVME_CR_CAP_LOW", func, s)); + DPRINTF("%s %s NVME_CR_CAP_LOW", func, s); break; case NVME_CR_CAP_HI: - DPRINTF(("%s %s NVME_CR_CAP_HI", func, s)); + DPRINTF("%s %s NVME_CR_CAP_HI", func, s); break; case NVME_CR_VS: - DPRINTF(("%s %s NVME_CR_VS", func, s)); + DPRINTF("%s %s NVME_CR_VS", func, s); break; case NVME_CR_INTMS: - DPRINTF(("%s %s NVME_CR_INTMS", func, s)); + DPRINTF("%s %s NVME_CR_INTMS", func, s); break; case NVME_CR_INTMC: - DPRINTF(("%s %s NVME_CR_INTMC", func, s)); + DPRINTF("%s %s NVME_CR_INTMC", func, s); break; case NVME_CR_CC: - DPRINTF(("%s %s NVME_CR_CC", func, s)); + DPRINTF("%s %s NVME_CR_CC", func, s); break; case NVME_CR_CSTS: - DPRINTF(("%s %s NVME_CR_CSTS", func, s)); + DPRINTF("%s %s NVME_CR_CSTS", func, s); break; case NVME_CR_NSSR: - DPRINTF(("%s %s NVME_CR_NSSR", func, s)); + DPRINTF("%s %s NVME_CR_NSSR", func, s); break; case NVME_CR_AQA: - DPRINTF(("%s %s NVME_CR_AQA", func, s)); + DPRINTF("%s %s NVME_CR_AQA", func, s); break; case NVME_CR_ASQ_LOW: - DPRINTF(("%s %s NVME_CR_ASQ_LOW", func, s)); + DPRINTF("%s %s NVME_CR_ASQ_LOW", func, s); break; case NVME_CR_ASQ_HI: - DPRINTF(("%s %s NVME_CR_ASQ_HI", func, s)); + DPRINTF("%s %s NVME_CR_ASQ_HI", func, s); break; case NVME_CR_ACQ_LOW: - DPRINTF(("%s %s NVME_CR_ACQ_LOW", func, s)); + DPRINTF("%s %s NVME_CR_ACQ_LOW", func, s); break; case NVME_CR_ACQ_HI: - DPRINTF(("%s %s NVME_CR_ACQ_HI", func, s)); + DPRINTF("%s %s NVME_CR_ACQ_HI", func, s); break; default: - DPRINTF(("unknown nvme bar-0 offset 0x%lx", offset)); + DPRINTF("unknown nvme bar-0 offset 0x%lx", offset); } } @@ -1830,9 +2429,9 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, int is_sq = (belloffset % 8) < 4; if (belloffset > ((sc->max_queues+1) * 8 - 4)) { - WPRINTF(("guest attempted an overflow write offset " + WPRINTF("guest attempted an overflow write offset " "0x%lx, val 0x%lx in %s", - offset, value, __func__)); + offset, value, __func__); return; } @@ -1840,13 +2439,13 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, return; } - DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx", - offset, size, value)); + DPRINTF("nvme-write offset 0x%lx, size %d, value 0x%lx", + offset, size, value); if (size != 4) { - WPRINTF(("guest wrote invalid size %d (offset 0x%lx, " + WPRINTF("guest wrote invalid size %d (offset 0x%lx, " "val 0x%lx) to bar0 in %s", - size, offset, value, __func__)); + size, offset, value, __func__); /* TODO: shutdown device */ return; } @@ -1872,12 +2471,12 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, case NVME_CR_CC: ccreg = (uint32_t)value; - DPRINTF(("%s NVME_CR_CC en %x css %x shn %x iosqes %u " + DPRINTF("%s NVME_CR_CC en %x css %x shn %x iosqes %u " "iocqes %u", __func__, NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg), NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg), - NVME_CC_GET_IOCQES(ccreg))); + NVME_CC_GET_IOCQES(ccreg)); if (NVME_CC_GET_SHN(ccreg)) { /* perform shutdown - flush out data to backend */ @@ -1931,8 +2530,8 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, (value << 32); break; default: - DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d", - __func__, offset, value, size)); + DPRINTF("%s unknown offset 0x%lx, value 0x%lx size %d", + __func__, offset, value, size); } pthread_mutex_unlock(&sc->mtx); } @@ -1945,8 +2544,8 @@ pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { - DPRINTF(("nvme-write baridx %d, msix: off 0x%lx, size %d, " - " value 0x%lx", baridx, offset, size, value)); + DPRINTF("nvme-write baridx %d, msix: off 0x%lx, size %d, " + " value 0x%lx", baridx, offset, size, value); pci_emul_msix_twrite(pi, offset, size, value); return; @@ -1958,8 +2557,8 @@ pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, break; default: - DPRINTF(("%s unknown baridx %d, val 0x%lx", - __func__, baridx, value)); + DPRINTF("%s unknown baridx %d, val 0x%lx", + __func__, baridx, value); } } @@ -1977,7 +2576,7 @@ static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, pthread_mutex_unlock(&sc->mtx); } else { value = 0; - WPRINTF(("pci_nvme: read invalid offset %ld", offset)); + WPRINTF("pci_nvme: read invalid offset %ld", offset); } switch (size) { @@ -1992,8 +2591,8 @@ static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, break; } - DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x", - offset, size, (uint32_t)value)); + DPRINTF(" nvme-read offset 0x%lx, size %d -> value 0x%x", + offset, size, (uint32_t)value); return (value); } @@ -2008,8 +2607,8 @@ pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { - DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d", - baridx, offset, size)); + DPRINTF("nvme-read bar: %d, msix: regoff 0x%lx, size %d", + baridx, offset, size); return pci_emul_msix_tread(pi, offset, size); } @@ -2019,7 +2618,7 @@ pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, return pci_nvme_read_bar_0(sc, offset, size); default: - DPRINTF(("unknown bar %d, 0x%lx", baridx, offset)); + DPRINTF("unknown bar %d, 0x%lx", baridx, offset); } return (0); @@ -2162,10 +2761,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq)); for (int i = 0; i < sc->ioslots; i++) { STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link); - pthread_mutex_init(&sc->ioreqs[i].mtx, NULL); - pthread_cond_init(&sc->ioreqs[i].cv, NULL); } - sc->intr_coales_aggr_thresh = 1; pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A); pci_set_cfgdata16(pi, PCIR_VENDOR, 0xFB5D); @@ -2185,30 +2781,30 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2 * sizeof(uint32_t) * (sc->max_queues + 1); pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN); - DPRINTF(("nvme membar size: %u", pci_membar_sz)); + DPRINTF("nvme membar size: %u", pci_membar_sz); error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz); if (error) { - WPRINTF(("%s pci alloc mem bar failed", __func__)); + WPRINTF("%s pci alloc mem bar failed", __func__); goto done; } error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); if (error) { - WPRINTF(("%s pci add msixcap failed", __func__)); + WPRINTF("%s pci add msixcap failed", __func__); goto done; } error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP); if (error) { - WPRINTF(("%s pci add Express capability failed", __func__)); + WPRINTF("%s pci add Express capability failed", __func__); goto done; } pthread_mutex_init(&sc->mtx, NULL); sem_init(&sc->iosemlock, 0, sc->ioslots); - pci_nvme_reset(sc); + pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues); /* * Controller data depends on Namespace data so initialize Namespace * data first. @@ -2216,6 +2812,11 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore); pci_nvme_init_ctrldata(sc); pci_nvme_init_logpages(sc); + pci_nvme_init_features(sc); + + pci_nvme_aer_init(sc); + + pci_nvme_reset(sc); pci_lintr_request(pi); diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c index 664d07b731..c777c56cb1 100644 --- a/usr/src/cmd/bhyve/pci_passthru.c +++ b/usr/src/cmd/bhyve/pci_passthru.c @@ -622,7 +622,7 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) sc->psc_bar[i].addr = base; /* Allocate the BAR in the guest I/O or MMIO space */ - error = pci_emul_alloc_pbar(pi, i, base, bartype, size); + error = pci_emul_alloc_bar(pi, i, bartype, size); if (error) return (-1); @@ -849,6 +849,10 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (error) err(1, "vm_setup_pptdev_msix"); } + } else { + error = vm_disable_pptdev_msix(ctx, sc->pptfd); + if (error) + err(1, "vm_disable_pptdev_msix"); } return (0); } diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index a34bd864be..27d743a770 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -510,7 +510,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->vbsc_cfg.vbc_writeback = 0; sc->vbsc_cfg.max_discard_sectors = VTBLK_MAX_DISCARD_SECT; sc->vbsc_cfg.max_discard_seg = VTBLK_MAX_DISCARD_SEG; - sc->vbsc_cfg.discard_sector_alignment = sectsz / VTBLK_BSIZE; + sc->vbsc_cfg.discard_sector_alignment = MAX(sectsz, sts) / VTBLK_BSIZE; /* * Should we move some of this into virtio.c? Could diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c index 3a1cc46a06..ded9ca90ea 100644 --- a/usr/src/cmd/bhyve/pci_virtio_net.c +++ b/usr/src/cmd/bhyve/pci_virtio_net.c @@ -179,6 +179,7 @@ struct pci_vtnet_softc { struct nm_desc *vsc_nmd; int vsc_rx_ready; + bool features_negotiated; /* protected by rx_mtx */ int resetting; /* protected by tx_mtx */ uint64_t vsc_features; /* negotiated features */ @@ -228,6 +229,8 @@ pci_vtnet_reset(void *vsc) /* Acquire the RX lock to block RX processing. */ pthread_mutex_lock(&sc->rx_mtx); + sc->features_negotiated = false; + /* Set sc->resetting and give a chance to the TX thread to stop. */ pthread_mutex_lock(&sc->tx_mtx); sc->resetting = 1; @@ -348,6 +351,11 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) assert(sc->vsc_dlpifd != -1); #endif + /* Features must be negotiated */ + if (!sc->features_negotiated) { + return; + } + /* * But, will be called when the rx ring hasn't yet * been set up. @@ -558,6 +566,11 @@ pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) */ assert(sc->vsc_nmd != NULL); + /* Features must be negotiated */ + if (!sc->features_negotiated) { + return; + } + /* * But, will be called when the rx ring hasn't yet * been set up. @@ -678,11 +691,14 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) /* * A qnotify means that the rx process can now begin. + * Enable RX only if features are negotiated. */ - if (sc->vsc_rx_ready == 0) { + pthread_mutex_lock(&sc->rx_mtx); + if (sc->vsc_rx_ready == 0 && sc->features_negotiated) { sc->vsc_rx_ready = 1; vq_kick_disable(vq); } + pthread_mutex_unlock(&sc->rx_mtx); } static void @@ -1132,6 +1148,10 @@ pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) /* non-merge rx header is 2 bytes shorter */ sc->rx_vhdrlen -= 2; } + + pthread_mutex_lock(&sc->rx_mtx); + sc->features_negotiated = true; + pthread_mutex_unlock(&sc->rx_mtx); } struct pci_devemu pci_de_vnet = { diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c index b92be4dec3..587e80a91c 100644 --- a/usr/src/cmd/bhyve/pci_xhci.c +++ b/usr/src/cmd/bhyve/pci_xhci.c @@ -1849,6 +1849,9 @@ retry: DPRINTF(("pci_xhci[%d]: xfer->ndata %u", __LINE__, xfer->ndata)); + if (xfer->ndata <= 0) + goto errout; + if (epid == 1) { err = USB_ERR_NOT_STARTED; if (dev->dev_ue->ue_request != NULL) @@ -1863,6 +1866,7 @@ retry: err = USB_TO_XHCI_ERR(err); if ((err == XHCI_TRB_ERROR_SUCCESS) || + (err == XHCI_TRB_ERROR_STALL) || (err == XHCI_TRB_ERROR_SHORT_PKT)) { err = pci_xhci_xfer_complete(sc, xfer, slot, epid, &do_intr); if (err != XHCI_TRB_ERROR_SUCCESS) @@ -2813,7 +2817,8 @@ pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->hcsparams2 = XHCI_SET_HCSP2_ERSTMAX(XHCI_ERST_MAX) | XHCI_SET_HCSP2_IST(0x04); sc->hcsparams3 = 0; /* no latency */ - sc->hccparams1 = XHCI_SET_HCCP1_NSS(1) | /* no 2nd-streams */ + sc->hccparams1 = XHCI_SET_HCCP1_AC64(1) | /* 64-bit addrs */ + XHCI_SET_HCCP1_NSS(1) | /* no 2nd-streams */ XHCI_SET_HCCP1_SPC(1) | /* short packet */ XHCI_SET_HCCP1_MAXPSA(XHCI_STREAMS_MAX); sc->hccparams2 = XHCI_SET_HCCP2_LEC(1) | diff --git a/usr/src/cmd/bhyve/pctestdev.c b/usr/src/cmd/bhyve/pctestdev.c new file mode 100644 index 0000000000..be445e5c75 --- /dev/null +++ b/usr/src/cmd/bhyve/pctestdev.c @@ -0,0 +1,270 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Adam Fenn <adam@fenn.io> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Emulation of selected legacy test/debug interfaces expected by KVM-unit-tests + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/mman.h> +#include <machine/vmm.h> + +#include <assert.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <vmmapi.h> + +#include "debug.h" +#include "inout.h" +#include "mem.h" +#include "pctestdev.h" + +#define DEBUGEXIT_BASE 0xf4 +#define DEBUGEXIT_LEN 4 +#define DEBUGEXIT_NAME "isa-debug-exit" + +#define IOMEM_BASE 0xff000000 +#define IOMEM_LEN 0x10000 +#define IOMEM_NAME "pc-testdev-iomem" + +#define IOPORT_BASE 0xe0 +#define IOPORT_LEN 4 +#define IOPORT_NAME "pc-testdev-ioport" + +#define IRQ_BASE 0x2000 +#define IRQ_IOAPIC_PINCOUNT_MIN 24 +#define IRQ_IOAPIC_PINCOUNT_MAX 32 +#define IRQ_NAME "pc-testdev-irq-line" + +#define PCTESTDEV_NAME "pc-testdev" + +static bool pctestdev_inited; +static uint8_t pctestdev_iomem_buf[IOMEM_LEN]; +static uint32_t pctestdev_ioport_data; + +static int pctestdev_debugexit_io(struct vmctx *ctx, int vcpu, int in, + int port, int bytes, uint32_t *eax, void *arg); +static int pctestdev_iomem_io(struct vmctx *ctx, int vcpu, int dir, + uint64_t addr, int size, uint64_t *val, void *arg1, + long arg2); +static int pctestdev_ioport_io(struct vmctx *ctx, int vcpu, int in, + int port, int bytes, uint32_t *eax, void *arg); +static int pctestdev_irq_io(struct vmctx *ctx, int vcpu, int in, + int port, int bytes, uint32_t *eax, void *arg); + +const char * +pctestdev_getname(void) +{ + return (PCTESTDEV_NAME); +} + +int +pctestdev_parse(const char *opts) +{ + if (opts != NULL && *opts != '\0') + return (-1); + + return (0); +} + +int +pctestdev_init(struct vmctx *ctx) +{ + struct mem_range iomem; + struct inout_port debugexit, ioport, irq; + int err, pincount; + + if (pctestdev_inited) { + EPRINTLN("Only one pc-testdev device is allowed."); + + return (-1); + } + + err = vm_ioapic_pincount(ctx, &pincount); + if (err != 0) { + EPRINTLN("pc-testdev: Failed to obtain IOAPIC pin count."); + + return (-1); + } + if (pincount < IRQ_IOAPIC_PINCOUNT_MIN || + pincount > IRQ_IOAPIC_PINCOUNT_MAX) { + EPRINTLN("pc-testdev: Unsupported IOAPIC pin count: %d.", + pincount); + + return (-1); + } + + debugexit.name = DEBUGEXIT_NAME; + debugexit.port = DEBUGEXIT_BASE; + debugexit.size = DEBUGEXIT_LEN; + debugexit.flags = IOPORT_F_INOUT; + debugexit.handler = pctestdev_debugexit_io; + debugexit.arg = NULL; + + iomem.name = IOMEM_NAME; + iomem.flags = MEM_F_RW | MEM_F_IMMUTABLE; + iomem.handler = pctestdev_iomem_io; + iomem.arg1 = NULL; + iomem.arg2 = 0; + iomem.base = IOMEM_BASE; + iomem.size = IOMEM_LEN; + + ioport.name = IOPORT_NAME; + ioport.port = IOPORT_BASE; + ioport.size = IOPORT_LEN; + ioport.flags = IOPORT_F_INOUT; + ioport.handler = pctestdev_ioport_io; + ioport.arg = NULL; + + irq.name = IRQ_NAME; + irq.port = IRQ_BASE; + irq.size = pincount; + irq.flags = IOPORT_F_INOUT; + irq.handler = pctestdev_irq_io; + irq.arg = NULL; + + err = register_inout(&debugexit); + if (err != 0) + goto fail; + + err = register_inout(&ioport); + if (err != 0) + goto fail_after_debugexit_reg; + + err = register_inout(&irq); + if (err != 0) + goto fail_after_ioport_reg; + + err = register_mem(&iomem); + if (err != 0) + goto fail_after_irq_reg; + + pctestdev_inited = true; + + return (0); + +fail_after_irq_reg: + (void)unregister_inout(&irq); + +fail_after_ioport_reg: + (void)unregister_inout(&ioport); + +fail_after_debugexit_reg: + (void)unregister_inout(&debugexit); + +fail: + return (err); +} + +static int +pctestdev_debugexit_io(struct vmctx *ctx, int vcpu, int in, int port, + int bytes, uint32_t *eax, void *arg) +{ + if (in) + *eax = 0; + else + exit((*eax << 1) | 1); + + return (0); +} + +static int +pctestdev_iomem_io(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, + int size, uint64_t *val, void *arg1, long arg2) +{ + uint64_t offset; + + if (addr + size > IOMEM_BASE + IOMEM_LEN) + return (-1); + + offset = addr - IOMEM_BASE; + if (dir == MEM_F_READ) { + (void)memcpy(val, pctestdev_iomem_buf + offset, size); + } else { + assert(dir == MEM_F_WRITE); + (void)memcpy(pctestdev_iomem_buf + offset, val, size); + } + + return (0); +} + +static int +pctestdev_ioport_io(struct vmctx *ctx, int vcpu, int in, int port, + int bytes, uint32_t *eax, void *arg) +{ + uint32_t mask; + int lsb; + + if (port + bytes > IOPORT_BASE + IOPORT_LEN) + return (-1); + + lsb = (port & 0x3) * 8; + mask = (-1UL >> (32 - (bytes * 8))) << lsb; + + if (in) + *eax = (pctestdev_ioport_data & mask) >> lsb; + else { + pctestdev_ioport_data &= ~mask; + pctestdev_ioport_data |= *eax << lsb; + } + + return (0); +} + +static int +pctestdev_irq_io(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + int irq; + + if (bytes != 1) + return (-1); + + if (in) { + *eax = 0; + return (0); + } else { + irq = port - IRQ_BASE; + if (irq < 16) { + if (*eax) + return (vm_isa_assert_irq(ctx, irq, irq)); + else + return (vm_isa_deassert_irq(ctx, irq, irq)); + } else { + if (*eax) + return (vm_ioapic_assert_irq(ctx, irq)); + else + return (vm_ioapic_deassert_irq(ctx, irq)); + } + } +} diff --git a/usr/src/cmd/bhyve/pctestdev.h b/usr/src/cmd/bhyve/pctestdev.h new file mode 100644 index 0000000000..c1c940146e --- /dev/null +++ b/usr/src/cmd/bhyve/pctestdev.h @@ -0,0 +1,43 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Adam Fenn <adam@fenn.io> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Emulation of selected legacy test/debug interfaces expected by KVM-unit-tests + */ + +#ifndef _PCTESTDEV_H_ +#define _PCTESTDEV_H_ + +struct vmctx; + +const char *pctestdev_getname(void); +int pctestdev_init(struct vmctx *ctx); +int pctestdev_parse(const char *opts); + +#endif diff --git a/usr/src/cmd/bhyve/pm.c b/usr/src/cmd/bhyve/pm.c index fa162faab1..d2732242f9 100644 --- a/usr/src/cmd/bhyve/pm.c +++ b/usr/src/cmd/bhyve/pm.c @@ -211,7 +211,7 @@ pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, * the global lock, but ACPI-CA whines profusely if it * can't set GBL_EN. */ - pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN); + pm1_enable = *eax & (PM1_RTC_EN | PM1_PWRBTN_EN | PM1_GBL_EN); sci_update(ctx); } pthread_mutex_unlock(&pm_lock); diff --git a/usr/src/cmd/bhyve/smbiostbl.c b/usr/src/cmd/bhyve/smbiostbl.c index 8af8a85755..3df2012f10 100644 --- a/usr/src/cmd/bhyve/smbiostbl.c +++ b/usr/src/cmd/bhyve/smbiostbl.c @@ -52,6 +52,10 @@ __FBSDID("$FreeBSD$"); #define SMBIOS_BASE 0xF1000 +#define FIRMWARE_VERSION "13.0" +/* The SMBIOS specification defines the date format to be mm/dd/yyyy */ +#define FIRMWARE_RELEASE_DATE "11/10/2020" + /* BHYVE_ACPI_BASE - SMBIOS_BASE) */ #define SMBIOS_MAX_LENGTH (0xF2400 - 0xF1000) @@ -324,9 +328,9 @@ struct smbios_table_type0 smbios_type0_template = { }; const char *smbios_type0_strings[] = { - "BHYVE", /* vendor string */ - "1.00", /* bios version string */ - "03/14/2014", /* bios release date string */ + "BHYVE", /* vendor string */ + FIRMWARE_VERSION, /* bios version string */ + FIRMWARE_RELEASE_DATE, /* bios release date string */ NULL }; @@ -347,12 +351,12 @@ static int smbios_type1_initializer(struct smbios_structure *template_entry, uint16_t *n, uint16_t *size); const char *smbios_type1_strings[] = { - " ", /* manufacturer string */ - "BHYVE", /* product name string */ - "1.0", /* version string */ - "None", /* serial number string */ - "None", /* sku string */ - " ", /* family name string */ + "illumos", /* manufacturer string */ + "BHYVE", /* product name string */ + "1.0", /* version string */ + "None", /* serial number string */ + "None", /* sku string */ + "Virtual Machine", /* family name string */ NULL }; @@ -375,7 +379,7 @@ struct smbios_table_type3 smbios_type3_template = { }; const char *smbios_type3_strings[] = { - " ", /* manufacturer string */ + "illumos", /* manufacturer string */ "1.0", /* version string */ "None", /* serial number string */ "None", /* asset tag string */ @@ -755,7 +759,7 @@ smbios_type19_initializer(struct smbios_structure *template_entry, type19 = (struct smbios_table_type19 *)curaddr; type19->arrayhand = type16_handle; type19->xsaddr = 4*GB; - type19->xeaddr = guest_himem; + type19->xeaddr = type19->xsaddr + guest_himem; } return (0); diff --git a/usr/src/cmd/bhyve/usb_mouse.c b/usr/src/cmd/bhyve/usb_mouse.c index 8af86fcdc7..7790fe0ec9 100644 --- a/usr/src/cmd/bhyve/usb_mouse.c +++ b/usr/src/cmd/bhyve/usb_mouse.c @@ -72,7 +72,7 @@ enum { }; static const char *umouse_desc_strings[] = { - "\x04\x09", + "\x09\x04", "BHYVE", "HID Tablet", "01", @@ -388,7 +388,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) "sizeof(umouse_dev_desc) %lu", len, sizeof(umouse_dev_desc))); if ((value & 0xFF) != 0) { - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; } if (len > sizeof(umouse_dev_desc)) { @@ -403,7 +403,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) case UDESC_CONFIG: DPRINTF(("umouse: (->UDESC_CONFIG)")); if ((value & 0xFF) != 0) { - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; } if (len > sizeof(umouse_confd)) { @@ -472,7 +472,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) default: DPRINTF(("umouse: unknown(%d)->ERROR", value >> 8)); - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; } eshort = data->blen > 0; @@ -496,7 +496,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; default: DPRINTF(("umouse: IO ERROR")); - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; } eshort = data->blen > 0; @@ -507,7 +507,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) if (index != 0) { DPRINTF(("umouse get_interface, invalid index %d", index)); - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; } @@ -578,7 +578,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) case UREQ(UR_SET_FEATURE, UT_WRITE_INTERFACE): case UREQ(UR_SET_FEATURE, UT_WRITE_ENDPOINT): DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_INTERFACE)")); - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; case UREQ(UR_SET_INTERFACE, UT_WRITE_INTERFACE): @@ -617,7 +617,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) memcpy(data->buf, &sc->um_report, len); data->bdone += len; } else { - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; goto done; } eshort = data->blen > 0; @@ -659,7 +659,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) default: DPRINTF(("**** umouse request unhandled")); - err = USB_ERR_IOERROR; + err = USB_ERR_STALLED; break; } diff --git a/usr/src/contrib/bhyve/dev/nvme/nvme.h b/usr/src/contrib/bhyve/dev/nvme/nvme.h index c7f6496426..6fbf2b758f 100644 --- a/usr/src/contrib/bhyve/dev/nvme/nvme.h +++ b/usr/src/contrib/bhyve/dev/nvme/nvme.h @@ -30,6 +30,18 @@ * Copyright 2019 Joyent, Inc. */ +/* + * illumos port notes: + * + * The upstream version of this file uses conditionals of the form + * #if _BYTE_ORDER != _LITTLE_ENDIAN + * Rather than keep this file in compat with only that little bit changed, + * this is locally patched below. + * + * There is also a static assertion which has been commented out due to a + * problem with smatch. + */ + #ifndef __NVME_H__ #define __NVME_H__ @@ -42,6 +54,8 @@ #define NVME_PASSTHROUGH_CMD _IOWR('n', 0, struct nvme_pt_command) #define NVME_RESET_CONTROLLER _IO('n', 1) +#define NVME_GET_NSID _IOR('n', 2, struct nvme_get_nsid) +#define NVME_GET_MAX_XFER_SIZE _IOR('n', 3, uint64_t) #define NVME_IO_TEST _IOWR('n', 100, struct nvme_io_test) #define NVME_BIO_TEST _IOWR('n', 101, struct nvme_io_test) @@ -59,8 +73,8 @@ */ #define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF) -/* Cap nvme to 1MB transfers driver explodes with larger sizes */ -#define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20)) +/* Cap transfers by the maximum addressable by page-sized PRP (4KB -> 2MB). */ +#define NVME_MAX_XFER_SIZE MIN(maxphys, (PAGE_SIZE/8*PAGE_SIZE)) /* Register field definitions */ #define NVME_CAP_LO_REG_MQES_SHIFT (0) @@ -71,15 +85,51 @@ #define NVME_CAP_LO_REG_AMS_MASK (0x3) #define NVME_CAP_LO_REG_TO_SHIFT (24) #define NVME_CAP_LO_REG_TO_MASK (0xFF) +#define NVME_CAP_LO_MQES(x) \ + (((x) >> NVME_CAP_LO_REG_MQES_SHIFT) & NVME_CAP_LO_REG_MQES_MASK) +#define NVME_CAP_LO_CQR(x) \ + (((x) >> NVME_CAP_LO_REG_CQR_SHIFT) & NVME_CAP_LO_REG_CQR_MASK) +#define NVME_CAP_LO_AMS(x) \ + (((x) >> NVME_CAP_LO_REG_AMS_SHIFT) & NVME_CAP_LO_REG_AMS_MASK) +#define NVME_CAP_LO_TO(x) \ + (((x) >> NVME_CAP_LO_REG_TO_SHIFT) & NVME_CAP_LO_REG_TO_MASK) #define NVME_CAP_HI_REG_DSTRD_SHIFT (0) #define NVME_CAP_HI_REG_DSTRD_MASK (0xF) +#define NVME_CAP_HI_REG_NSSRS_SHIFT (4) +#define NVME_CAP_HI_REG_NSSRS_MASK (0x1) +#define NVME_CAP_HI_REG_CSS_SHIFT (5) +#define NVME_CAP_HI_REG_CSS_MASK (0xff) #define NVME_CAP_HI_REG_CSS_NVM_SHIFT (5) #define NVME_CAP_HI_REG_CSS_NVM_MASK (0x1) +#define NVME_CAP_HI_REG_BPS_SHIFT (13) +#define NVME_CAP_HI_REG_BPS_MASK (0x1) #define NVME_CAP_HI_REG_MPSMIN_SHIFT (16) #define NVME_CAP_HI_REG_MPSMIN_MASK (0xF) #define NVME_CAP_HI_REG_MPSMAX_SHIFT (20) #define NVME_CAP_HI_REG_MPSMAX_MASK (0xF) +#define NVME_CAP_HI_REG_PMRS_SHIFT (24) +#define NVME_CAP_HI_REG_PMRS_MASK (0x1) +#define NVME_CAP_HI_REG_CMBS_SHIFT (25) +#define NVME_CAP_HI_REG_CMBS_MASK (0x1) +#define NVME_CAP_HI_DSTRD(x) \ + (((x) >> NVME_CAP_HI_REG_DSTRD_SHIFT) & NVME_CAP_HI_REG_DSTRD_MASK) +#define NVME_CAP_HI_NSSRS(x) \ + (((x) >> NVME_CAP_HI_REG_NSSRS_SHIFT) & NVME_CAP_HI_REG_NSSRS_MASK) +#define NVME_CAP_HI_CSS(x) \ + (((x) >> NVME_CAP_HI_REG_CSS_SHIFT) & NVME_CAP_HI_REG_CSS_MASK) +#define NVME_CAP_HI_CSS_NVM(x) \ + (((x) >> NVME_CAP_HI_REG_CSS_NVM_SHIFT) & NVME_CAP_HI_REG_CSS_NVM_MASK) +#define NVME_CAP_HI_BPS(x) \ + (((x) >> NVME_CAP_HI_REG_BPS_SHIFT) & NVME_CAP_HI_REG_BPS_MASK) +#define NVME_CAP_HI_MPSMIN(x) \ + (((x) >> NVME_CAP_HI_REG_MPSMIN_SHIFT) & NVME_CAP_HI_REG_MPSMIN_MASK) +#define NVME_CAP_HI_MPSMAX(x) \ + (((x) >> NVME_CAP_HI_REG_MPSMAX_SHIFT) & NVME_CAP_HI_REG_MPSMAX_MASK) +#define NVME_CAP_HI_PMRS(x) \ + (((x) >> NVME_CAP_HI_REG_PMRS_SHIFT) & NVME_CAP_HI_REG_PMRS_MASK) +#define NVME_CAP_HI_CMBS(x) \ + (((x) >> NVME_CAP_HI_REG_CMBS_SHIFT) & NVME_CAP_HI_REG_CMBS_MASK) #define NVME_CC_REG_EN_SHIFT (0) #define NVME_CC_REG_EN_MASK (0x1) @@ -102,6 +152,10 @@ #define NVME_CSTS_REG_CFS_MASK (0x1) #define NVME_CSTS_REG_SHST_SHIFT (2) #define NVME_CSTS_REG_SHST_MASK (0x3) +#define NVME_CSTS_REG_NVSRO_SHIFT (4) +#define NVME_CSTS_REG_NVSRO_MASK (0x1) +#define NVME_CSTS_REG_PP_SHIFT (5) +#define NVME_CSTS_REG_PP_MASK (0x1) #define NVME_CSTS_GET_SHST(csts) (((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK) @@ -110,6 +164,36 @@ #define NVME_AQA_REG_ACQS_SHIFT (16) #define NVME_AQA_REG_ACQS_MASK (0xFFF) +#define NVME_PMRCAP_REG_RDS_SHIFT (3) +#define NVME_PMRCAP_REG_RDS_MASK (0x1) +#define NVME_PMRCAP_REG_WDS_SHIFT (4) +#define NVME_PMRCAP_REG_WDS_MASK (0x1) +#define NVME_PMRCAP_REG_BIR_SHIFT (5) +#define NVME_PMRCAP_REG_BIR_MASK (0x7) +#define NVME_PMRCAP_REG_PMRTU_SHIFT (8) +#define NVME_PMRCAP_REG_PMRTU_MASK (0x3) +#define NVME_PMRCAP_REG_PMRWBM_SHIFT (10) +#define NVME_PMRCAP_REG_PMRWBM_MASK (0xf) +#define NVME_PMRCAP_REG_PMRTO_SHIFT (16) +#define NVME_PMRCAP_REG_PMRTO_MASK (0xff) +#define NVME_PMRCAP_REG_CMSS_SHIFT (24) +#define NVME_PMRCAP_REG_CMSS_MASK (0x1) + +#define NVME_PMRCAP_RDS(x) \ + (((x) >> NVME_PMRCAP_REG_RDS_SHIFT) & NVME_PMRCAP_REG_RDS_MASK) +#define NVME_PMRCAP_WDS(x) \ + (((x) >> NVME_PMRCAP_REG_WDS_SHIFT) & NVME_PMRCAP_REG_WDS_MASK) +#define NVME_PMRCAP_BIR(x) \ + (((x) >> NVME_PMRCAP_REG_BIR_SHIFT) & NVME_PMRCAP_REG_BIR_MASK) +#define NVME_PMRCAP_PMRTU(x) \ + (((x) >> NVME_PMRCAP_REG_PMRTU_SHIFT) & NVME_PMRCAP_REG_PMRTU_MASK) +#define NVME_PMRCAP_PMRWBM(x) \ + (((x) >> NVME_PMRCAP_REG_PMRWBM_SHIFT) & NVME_PMRCAP_REG_PMRWBM_MASK) +#define NVME_PMRCAP_PMRTO(x) \ + (((x) >> NVME_PMRCAP_REG_PMRTO_SHIFT) & NVME_PMRCAP_REG_PMRTO_MASK) +#define NVME_PMRCAP_CMSS(x) \ + (((x) >> NVME_PMRCAP_REG_CMSS_SHIFT) & NVME_PMRCAP_REG_CMSS_MASK) + /* Command field definitions */ #define NVME_CMD_FUSE_SHIFT (8) @@ -121,6 +205,8 @@ #define NVME_STATUS_SC_MASK (0xFF) #define NVME_STATUS_SCT_SHIFT (9) #define NVME_STATUS_SCT_MASK (0x7) +#define NVME_STATUS_CRD_SHIFT (12) +#define NVME_STATUS_CRD_MASK (0x3) #define NVME_STATUS_M_SHIFT (14) #define NVME_STATUS_M_MASK (0x1) #define NVME_STATUS_DNR_SHIFT (15) @@ -161,6 +247,9 @@ /* SR-IOV Virtual Function */ #define NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT (2) #define NVME_CTRLR_DATA_MIC_SRIOVVF_MASK (0x1) +/* Asymmetric Namespace Access Reporting */ +#define NVME_CTRLR_DATA_MIC_ANAR_SHIFT (3) +#define NVME_CTRLR_DATA_MIC_ANAR_MASK (0x1) /** OACS - optional admin command support */ /* supports security send/receive commands */ @@ -190,6 +279,9 @@ /* supports Doorbell Buffer Config */ #define NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT (8) #define NVME_CTRLR_DATA_OACS_DBBUFFER_MASK (0x1) +/* supports Get LBA Status */ +#define NVME_CTRLR_DATA_OACS_GETLBA_SHIFT (9) +#define NVME_CTRLR_DATA_OACS_GETLBA_MASK (0x1) /** firmware updates */ /* first slot is read-only */ @@ -198,6 +290,9 @@ /* number of firmware slots */ #define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT (1) #define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK (0x7) +/* firmware activation without reset */ +#define NVME_CTRLR_DATA_FRMW_ACT_WO_RESET_SHIFT (4) +#define NVME_CTRLR_DATA_FRMW_ACT_WO_RESET_MASK (0x1) /** log page attributes */ /* per namespace smart/health log page */ @@ -214,6 +309,26 @@ #define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT (0) #define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK (0x1) +/** Sanitize Capabilities */ +/* Crypto Erase Support */ +#define NVME_CTRLR_DATA_SANICAP_CES_SHIFT (0) +#define NVME_CTRLR_DATA_SANICAP_CES_MASK (0x1) +/* Block Erase Support */ +#define NVME_CTRLR_DATA_SANICAP_BES_SHIFT (1) +#define NVME_CTRLR_DATA_SANICAP_BES_MASK (0x1) +/* Overwrite Support */ +#define NVME_CTRLR_DATA_SANICAP_OWS_SHIFT (2) +#define NVME_CTRLR_DATA_SANICAP_OWS_MASK (0x1) +/* No-Deallocate Inhibited */ +#define NVME_CTRLR_DATA_SANICAP_NDI_SHIFT (29) +#define NVME_CTRLR_DATA_SANICAP_NDI_MASK (0x1) +/* No-Deallocate Modifies Media After Sanitize */ +#define NVME_CTRLR_DATA_SANICAP_NODMMAS_SHIFT (30) +#define NVME_CTRLR_DATA_SANICAP_NODMMAS_MASK (0x3) +#define NVME_CTRLR_DATA_SANICAP_NODMMAS_UNDEF (0) +#define NVME_CTRLR_DATA_SANICAP_NODMMAS_NO (1) +#define NVME_CTRLR_DATA_SANICAP_NODMMAS_YES (2) + /** submission queue entry size */ #define NVME_CTRLR_DATA_SQES_MIN_SHIFT (0) #define NVME_CTRLR_DATA_SQES_MIN_MASK (0xF) @@ -241,6 +356,8 @@ #define NVME_CTRLR_DATA_ONCS_RESERV_MASK (0x1) #define NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT (6) #define NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK (0x1) +#define NVME_CTRLR_DATA_ONCS_VERIFY_SHIFT (7) +#define NVME_CTRLR_DATA_ONCS_VERIFY_MASK (0x1) /** Fused Operation Support */ #define NVME_CTRLR_DATA_FUSES_CNW_SHIFT (0) @@ -255,8 +372,15 @@ #define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK (0x1) /** volatile write cache */ +/* volatile write cache present */ #define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT (0) #define NVME_CTRLR_DATA_VWC_PRESENT_MASK (0x1) +/* flush all namespaces supported */ +#define NVME_CTRLR_DATA_VWC_ALL_SHIFT (1) +#define NVME_CTRLR_DATA_VWC_ALL_MASK (0x3) +#define NVME_CTRLR_DATA_VWC_ALL_UNKNOWN (0) +#define NVME_CTRLR_DATA_VWC_ALL_NO (2) +#define NVME_CTRLR_DATA_VWC_ALL_YES (3) /** namespace features */ /* thin provisioning */ @@ -271,6 +395,9 @@ /* NGUID and EUI64 fields are not reusable */ #define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_SHIFT (3) #define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_MASK (0x1) +/* NPWG, NPWA, NPDG, NPDA, and NOWS are valid */ +#define NVME_NS_DATA_NSFEAT_NPVALID_SHIFT (4) +#define NVME_NS_DATA_NSFEAT_NPVALID_MASK (0x1) /** formatted lba size */ #define NVME_NS_DATA_FLBAS_FORMAT_SHIFT (0) @@ -351,6 +478,20 @@ #define NVME_NS_DATA_FPI_SUPP_SHIFT (7) #define NVME_NS_DATA_FPI_SUPP_MASK (0x1) +/** Deallocate Logical Block Features */ +/* deallocated logical block read behavior */ +#define NVME_NS_DATA_DLFEAT_READ_SHIFT (0) +#define NVME_NS_DATA_DLFEAT_READ_MASK (0x07) +#define NVME_NS_DATA_DLFEAT_READ_NR (0x00) +#define NVME_NS_DATA_DLFEAT_READ_00 (0x01) +#define NVME_NS_DATA_DLFEAT_READ_FF (0x02) +/* supports the Deallocate bit in the Write Zeroes */ +#define NVME_NS_DATA_DLFEAT_DWZ_SHIFT (3) +#define NVME_NS_DATA_DLFEAT_DWZ_MASK (0x01) +/* Guard field for deallocated logical blocks is set to the CRC */ +#define NVME_NS_DATA_DLFEAT_GCRC_SHIFT (4) +#define NVME_NS_DATA_DLFEAT_GCRC_MASK (0x01) + /** lba format support */ /* metadata size */ #define NVME_NS_DATA_LBAF_MS_SHIFT (0) @@ -370,11 +511,42 @@ enum nvme_critical_warning_state { NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP = 0x10, }; #define NVME_CRIT_WARN_ST_RESERVED_MASK (0xE0) +#define NVME_ASYNC_EVENT_NS_ATTRIBUTE (0x100) +#define NVME_ASYNC_EVENT_FW_ACTIVATE (0x200) /* slot for current FW */ #define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT (0) #define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK (0x7) +/* Commands Supported and Effects */ +#define NVME_CE_PAGE_CSUP_SHIFT (0) +#define NVME_CE_PAGE_CSUP_MASK (0x1) +#define NVME_CE_PAGE_LBCC_SHIFT (1) +#define NVME_CE_PAGE_LBCC_MASK (0x1) +#define NVME_CE_PAGE_NCC_SHIFT (2) +#define NVME_CE_PAGE_NCC_MASK (0x1) +#define NVME_CE_PAGE_NIC_SHIFT (3) +#define NVME_CE_PAGE_NIC_MASK (0x1) +#define NVME_CE_PAGE_CCC_SHIFT (4) +#define NVME_CE_PAGE_CCC_MASK (0x1) +#define NVME_CE_PAGE_CSE_SHIFT (16) +#define NVME_CE_PAGE_CSE_MASK (0x7) +#define NVME_CE_PAGE_UUID_SHIFT (19) +#define NVME_CE_PAGE_UUID_MASK (0x1) + +/* Sanitize Status */ +#define NVME_SS_PAGE_SSTAT_STATUS_SHIFT (0) +#define NVME_SS_PAGE_SSTAT_STATUS_MASK (0x7) +#define NVME_SS_PAGE_SSTAT_STATUS_NEVER (0) +#define NVME_SS_PAGE_SSTAT_STATUS_COMPLETED (1) +#define NVME_SS_PAGE_SSTAT_STATUS_INPROG (2) +#define NVME_SS_PAGE_SSTAT_STATUS_FAILED (3) +#define NVME_SS_PAGE_SSTAT_STATUS_COMPLETEDWD (4) +#define NVME_SS_PAGE_SSTAT_PASSES_SHIFT (3) +#define NVME_SS_PAGE_SSTAT_PASSES_MASK (0x1f) +#define NVME_SS_PAGE_SSTAT_GDE_SHIFT (8) +#define NVME_SS_PAGE_SSTAT_GDE_MASK (0x1) + /* CC register SHN field values */ enum shn_value { NVME_SHN_NORMAL = 0x1, @@ -390,34 +562,37 @@ enum shst_value { struct nvme_registers { - /** controller capabilities */ - uint32_t cap_lo; - uint32_t cap_hi; - - uint32_t vs; /* version */ - uint32_t intms; /* interrupt mask set */ - uint32_t intmc; /* interrupt mask clear */ - - /** controller configuration */ - uint32_t cc; - - uint32_t reserved1; - - /** controller status */ - uint32_t csts; - - uint32_t reserved2; - - /** admin queue attributes */ - uint32_t aqa; - - uint64_t asq; /* admin submission queue base addr */ - uint64_t acq; /* admin completion queue base addr */ - uint32_t reserved3[0x3f2]; - + uint32_t cap_lo; /* controller capabilities */ + uint32_t cap_hi; + uint32_t vs; /* version */ + uint32_t intms; /* interrupt mask set */ + uint32_t intmc; /* interrupt mask clear */ + uint32_t cc; /* controller configuration */ + uint32_t reserved1; + uint32_t csts; /* controller status */ + uint32_t nssr; /* NVM Subsystem Reset */ + uint32_t aqa; /* admin queue attributes */ + uint64_t asq; /* admin submission queue base addr */ + uint64_t acq; /* admin completion queue base addr */ + uint32_t cmbloc; /* Controller Memory Buffer Location */ + uint32_t cmbsz; /* Controller Memory Buffer Size */ + uint32_t bpinfo; /* Boot Partition Information */ + uint32_t bprsel; /* Boot Partition Read Select */ + uint64_t bpmbl; /* Boot Partition Memory Buffer Location */ + uint64_t cmbmsc; /* Controller Memory Buffer Memory Space Control */ + uint32_t cmbsts; /* Controller Memory Buffer Status */ + uint8_t reserved3[3492]; /* 5Ch - DFFh */ + uint32_t pmrcap; /* Persistent Memory Capabilities */ + uint32_t pmrctl; /* Persistent Memory Region Control */ + uint32_t pmrsts; /* Persistent Memory Region Status */ + uint32_t pmrebs; /* Persistent Memory Region Elasticity Buffer Size */ + uint32_t pmrswtp; /* Persistent Memory Region Sustained Write Throughput */ + uint32_t pmrmsc_lo; /* Persistent Memory Region Controller Memory Space Control */ + uint32_t pmrmsc_hi; + uint8_t reserved4[484]; /* E1Ch - FFFh */ struct { - uint32_t sq_tdbl; /* submission queue tail doorbell */ - uint32_t cq_hdbl; /* completion queue head doorbell */ + uint32_t sq_tdbl; /* submission queue tail doorbell */ + uint32_t cq_hdbl; /* completion queue head doorbell */ } doorbell[1] __packed; } __packed; @@ -458,7 +633,6 @@ struct nvme_command _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command"); struct nvme_completion { - /* dword 0 */ uint32_t cdw0; /* command-specific */ @@ -492,6 +666,7 @@ enum nvme_status_code_type { NVME_SCT_GENERIC = 0x0, NVME_SCT_COMMAND_SPECIFIC = 0x1, NVME_SCT_MEDIA_ERROR = 0x2, + NVME_SCT_PATH_RELATED = 0x3, /* 0x3-0x6 - reserved */ NVME_SCT_VENDOR_SPECIFIC = 0x7, }; @@ -530,6 +705,9 @@ enum nvme_generic_command_status_code { NVME_SC_SANITIZE_IN_PROGRESS = 0x1d, NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID = 0x1e, NVME_SC_NOT_SUPPORTED_IN_CMB = 0x1f, + NVME_SC_NAMESPACE_IS_WRITE_PROTECTED = 0x20, + NVME_SC_COMMAND_INTERRUPTED = 0x21, + NVME_SC_TRANSIENT_TRANSPORT_ERROR = 0x22, NVME_SC_LBA_OUT_OF_RANGE = 0x80, NVME_SC_CAPACITY_EXCEEDED = 0x81, @@ -569,12 +747,15 @@ enum nvme_command_specific_status_code { NVME_SC_NS_NOT_ATTACHED = 0x1a, NVME_SC_THIN_PROV_NOT_SUPPORTED = 0x1b, NVME_SC_CTRLR_LIST_INVALID = 0x1c, - NVME_SC_SELT_TEST_IN_PROGRESS = 0x1d, + NVME_SC_SELF_TEST_IN_PROGRESS = 0x1d, NVME_SC_BOOT_PART_WRITE_PROHIB = 0x1e, NVME_SC_INVALID_CTRLR_ID = 0x1f, NVME_SC_INVALID_SEC_CTRLR_STATE = 0x20, NVME_SC_INVALID_NUM_OF_CTRLR_RESRC = 0x21, NVME_SC_INVALID_RESOURCE_ID = 0x22, + NVME_SC_SANITIZE_PROHIBITED_WPMRE = 0x23, + NVME_SC_ANA_GROUP_ID_INVALID = 0x24, + NVME_SC_ANA_ATTACH_FAILED = 0x25, NVME_SC_CONFLICTING_ATTRIBUTES = 0x80, NVME_SC_INVALID_PROTECTION_INFO = 0x81, @@ -593,6 +774,17 @@ enum nvme_media_error_status_code { NVME_SC_DEALLOCATED_OR_UNWRITTEN = 0x87, }; +/* path related status codes */ +enum nvme_path_related_status_code { + NVME_SC_INTERNAL_PATH_ERROR = 0x00, + NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS = 0x01, + NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE = 0x02, + NVME_SC_ASYMMETRIC_ACCESS_TRANSITION = 0x03, + NVME_SC_CONTROLLER_PATHING_ERROR = 0x60, + NVME_SC_HOST_PATHING_ERROR = 0x70, + NVME_SC_COMMAND_ABOTHED_BY_HOST = 0x71, +}; + /* admin opcodes */ enum nvme_admin_opcode { NVME_OPC_DELETE_IO_SQ = 0x00, @@ -612,20 +804,27 @@ enum nvme_admin_opcode { /* 0x0e-0x0f - reserved */ NVME_OPC_FIRMWARE_ACTIVATE = 0x10, NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD = 0x11, + /* 0x12-0x13 - reserved */ NVME_OPC_DEVICE_SELF_TEST = 0x14, NVME_OPC_NAMESPACE_ATTACHMENT = 0x15, + /* 0x16-0x17 - reserved */ NVME_OPC_KEEP_ALIVE = 0x18, NVME_OPC_DIRECTIVE_SEND = 0x19, NVME_OPC_DIRECTIVE_RECEIVE = 0x1a, + /* 0x1b - reserved */ NVME_OPC_VIRTUALIZATION_MANAGEMENT = 0x1c, NVME_OPC_NVME_MI_SEND = 0x1d, NVME_OPC_NVME_MI_RECEIVE = 0x1e, + /* 0x1f-0x7b - reserved */ NVME_OPC_DOORBELL_BUFFER_CONFIG = 0x7c, NVME_OPC_FORMAT_NVM = 0x80, NVME_OPC_SECURITY_SEND = 0x81, NVME_OPC_SECURITY_RECEIVE = 0x82, + /* 0x83 - reserved */ NVME_OPC_SANITIZE = 0x84, + /* 0x85 - reserved */ + NVME_OPC_GET_LBA_STATUS = 0x86, }; /* nvme nvm opcodes */ @@ -636,11 +835,11 @@ enum nvme_nvm_opcode { /* 0x03 - reserved */ NVME_OPC_WRITE_UNCORRECTABLE = 0x04, NVME_OPC_COMPARE = 0x05, - /* 0x06 - reserved */ + /* 0x06-0x07 - reserved */ NVME_OPC_WRITE_ZEROES = 0x08, - /* 0x07 - reserved */ NVME_OPC_DATASET_MANAGEMENT = 0x09, - /* 0x0a-0x0c - reserved */ + /* 0x0a-0x0b - reserved */ + NVME_OPC_VERIFY = 0x0c, NVME_OPC_RESERVATION_REGISTER = 0x0d, NVME_OPC_RESERVATION_REPORT = 0x0e, /* 0x0f-0x10 - reserved */ @@ -668,10 +867,21 @@ enum nvme_feature { NVME_FEAT_KEEP_ALIVE_TIMER = 0x0F, NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT = 0x10, NVME_FEAT_NON_OP_POWER_STATE_CONFIG = 0x11, - /* 0x12-0x77 - reserved */ + NVME_FEAT_READ_RECOVERY_LEVEL_CONFIG = 0x12, + NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG = 0x13, + NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW = 0x14, + NVME_FEAT_LBA_STATUS_INFORMATION_ATTRIBUTES = 0x15, + NVME_FEAT_HOST_BEHAVIOR_SUPPORT = 0x16, + NVME_FEAT_SANITIZE_CONFIG = 0x17, + NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION = 0x18, + /* 0x19-0x77 - reserved */ /* 0x78-0x7f - NVMe Management Interface */ NVME_FEAT_SOFTWARE_PROGRESS_MARKER = 0x80, - /* 0x81-0xBF - command set specific (reserved) */ + NVME_FEAT_HOST_IDENTIFIER = 0x81, + NVME_FEAT_RESERVATION_NOTIFICATION_MASK = 0x82, + NVME_FEAT_RESERVATION_PERSISTENCE = 0x83, + NVME_FEAT_NAMESPACE_WRITE_PROTECTION_CONFIG = 0x84, + /* 0x85-0xBF - command set specific (reserved) */ /* 0xC0-0xFF - vendor specific */ }; @@ -717,7 +927,6 @@ _Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_s #define NVME_FIRMWARE_REVISION_LENGTH 8 struct nvme_controller_data { - /* bytes 0-255: controller capabilities and features */ /** pci vendor id */ @@ -765,12 +974,27 @@ struct nvme_controller_data { /** Controller Attributes */ uint32_t ctratt; /* bitfield really */ - uint8_t reserved1[12]; + /** Read Recovery Levels Supported */ + uint16_t rrls; + + uint8_t reserved1[9]; + + /** Controller Type */ + uint8_t cntrltype; /** FRU Globally Unique Identifier */ uint8_t fguid[16]; - uint8_t reserved2[128]; + /** Command Retry Delay Time 1 */ + uint16_t crdt1; + + /** Command Retry Delay Time 2 */ + uint16_t crdt2; + + /** Command Retry Delay Time 3 */ + uint16_t crdt3; + + uint8_t reserved2[122]; /* bytes 256-511: admin command set attributes */ @@ -850,7 +1074,34 @@ struct nvme_controller_data { /** Sanitize Capabilities */ uint32_t sanicap; /* Really a bitfield */ - uint8_t reserved3[180]; + /** Host Memory Buffer Minimum Descriptor Entry Size */ + uint32_t hmminds; + + /** Host Memory Maximum Descriptors Entries */ + uint16_t hmmaxd; + + /** NVM Set Identifier Maximum */ + uint16_t nsetidmax; + + /** Endurance Group Identifier Maximum */ + uint16_t endgidmax; + + /** ANA Transition Time */ + uint8_t anatt; + + /** Asymmetric Namespace Access Capabilities */ + uint8_t anacap; + + /** ANA Group Identifier Maximum */ + uint32_t anagrpmax; + + /** Number of ANA Group Identifiers */ + uint32_t nanagrpid; + + /** Persistent Event Log Size */ + uint32_t pels; + + uint8_t reserved3[156]; /* bytes 512-703: nvm command set attributes */ /** submission queue entry size */ @@ -885,7 +1136,9 @@ struct nvme_controller_data { /** NVM Vendor Specific Command Configuration */ uint8_t nvscc; - uint8_t reserved5; + + /** Namespace Write Protection Capabilities */ + uint8_t nwpc; /** Atomic Compare & Write Unit */ uint16_t acwu; @@ -894,8 +1147,11 @@ struct nvme_controller_data { /** SGL Support */ uint32_t sgls; + /** Maximum Number of Allowed Namespaces */ + uint32_t mnan; + /* bytes 540-767: Reserved */ - uint8_t reserved7[228]; + uint8_t reserved7[224]; /** NVM Subsystem NVMe Qualified Name */ uint8_t subnqn[256]; @@ -916,7 +1172,6 @@ struct nvme_controller_data { _Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data"); struct nvme_namespace_data { - /** namespace size */ uint64_t nsze; @@ -980,8 +1235,38 @@ struct nvme_namespace_data { /** NVM Capacity */ uint8_t nvmcap[16]; - /* bytes 64-103: Reserved */ - uint8_t reserved5[40]; + /** Namespace Preferred Write Granularity */ + uint16_t npwg; + + /** Namespace Preferred Write Alignment */ + uint16_t npwa; + + /** Namespace Preferred Deallocate Granularity */ + uint16_t npdg; + + /** Namespace Preferred Deallocate Alignment */ + uint16_t npda; + + /** Namespace Optimal Write Size */ + uint16_t nows; + + /* bytes 74-91: Reserved */ + uint8_t reserved5[18]; + + /** ANA Group Identifier */ + uint32_t anagrpid; + + /* bytes 96-98: Reserved */ + uint8_t reserved6[3]; + + /** Namespace Attributes */ + uint8_t nsattr; + + /** NVM Set Identifier */ + uint16_t nvmsetid; + + /** Endurance Group Identifier */ + uint16_t endgid; /** Namespace Globally Unique Identifier */ uint8_t nguid[16]; @@ -992,7 +1277,7 @@ struct nvme_namespace_data { /** lba format support */ uint32_t lbaf[16]; - uint8_t reserved6[192]; + uint8_t reserved7[192]; uint8_t vendor_specific[3712]; } __packed __aligned(4); @@ -1000,16 +1285,27 @@ struct nvme_namespace_data { _Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data"); enum nvme_log_page { - /* 0x00 - reserved */ NVME_LOG_ERROR = 0x01, NVME_LOG_HEALTH_INFORMATION = 0x02, NVME_LOG_FIRMWARE_SLOT = 0x03, NVME_LOG_CHANGED_NAMESPACE = 0x04, NVME_LOG_COMMAND_EFFECT = 0x05, + NVME_LOG_DEVICE_SELF_TEST = 0x06, + NVME_LOG_TELEMETRY_HOST_INITIATED = 0x07, + NVME_LOG_TELEMETRY_CONTROLLER_INITIATED = 0x08, + NVME_LOG_ENDURANCE_GROUP_INFORMATION = 0x09, + NVME_LOG_PREDICTABLE_LATENCY_PER_NVM_SET = 0x0a, + NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE = 0x0b, + NVME_LOG_ASYMMETRIC_NAMESPAVE_ACCESS = 0x0c, + NVME_LOG_PERSISTENT_EVENT_LOG = 0x0d, + NVME_LOG_LBA_STATUS_INFORMATION = 0x0e, + NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE = 0x0f, /* 0x06-0x7F - reserved */ /* 0x80-0xBF - I/O command set specific */ NVME_LOG_RES_NOTIFICATION = 0x80, + NVME_LOG_SANITIZE_STATUS = 0x81, + /* 0x82-0xBF - reserved */ /* 0xC0-0xFF - vendor specific */ /* @@ -1029,7 +1325,6 @@ enum nvme_log_page { }; struct nvme_error_information_entry { - uint64_t error_count; uint16_t sqid; uint16_t cid; @@ -1038,13 +1333,16 @@ struct nvme_error_information_entry { uint64_t lba; uint32_t nsid; uint8_t vendor_specific; - uint8_t reserved[35]; + uint8_t trtype; + uint16_t reserved30; + uint64_t csi; + uint16_t ttsi; + uint8_t reserved[22]; } __packed __aligned(4); _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry"); struct nvme_health_information_page { - uint8_t critical_warning; uint16_t temperature; uint8_t available_spare; @@ -1074,8 +1372,16 @@ struct nvme_health_information_page { uint32_t warning_temp_time; uint32_t error_temp_time; uint16_t temp_sensor[8]; - - uint8_t reserved2[296]; + /* Thermal Management Temperature 1 Transition Count */ + uint32_t tmt1tc; + /* Thermal Management Temperature 2 Transition Count */ + uint32_t tmt2tc; + /* Total Time For Thermal Management Temperature 1 */ + uint32_t ttftmt1; + /* Total Time For Thermal Management Temperature 2 */ + uint32_t ttftmt2; + + uint8_t reserved2[280]; } __packed __aligned(4); /* Currently sparse/smatch incorrectly packs this struct in some situations. */ @@ -1084,7 +1390,6 @@ _Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for #endif struct nvme_firmware_page { - uint8_t afi; uint8_t reserved[7]; uint64_t revision[7]; /* revisions for 7 slots */ @@ -1099,6 +1404,43 @@ struct nvme_ns_list { _Static_assert(sizeof(struct nvme_ns_list) == 4096, "bad size for nvme_ns_list"); +struct nvme_command_effects_page { + uint32_t acs[256]; + uint32_t iocs[256]; + uint8_t reserved[2048]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_command_effects_page) == 4096, + "bad size for nvme_command_effects_page"); + +struct nvme_res_notification_page { + uint64_t log_page_count; + uint8_t log_page_type; + uint8_t available_log_pages; + uint8_t reserved2; + uint32_t nsid; + uint8_t reserved[48]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_res_notification_page) == 64, + "bad size for nvme_res_notification_page"); + +struct nvme_sanitize_status_page { + uint16_t sprog; + uint16_t sstat; + uint32_t scdw10; + uint32_t etfo; + uint32_t etfbe; + uint32_t etfce; + uint32_t etfownd; + uint32_t etfbewnd; + uint32_t etfcewnd; + uint8_t reserved[480]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_sanitize_status_page) == 512, + "bad size for nvme_sanitize_status_page"); + struct intel_log_temp_stats { uint64_t current; @@ -1114,10 +1456,59 @@ struct intel_log_temp_stats _Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats"); +struct nvme_resv_reg_ctrlr +{ + uint16_t ctrlr_id; /* Controller ID */ + uint8_t rcsts; /* Reservation Status */ + uint8_t reserved3[5]; + uint64_t hostid; /* Host Identifier */ + uint64_t rkey; /* Reservation Key */ +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_resv_reg_ctrlr) == 24, "bad size for nvme_resv_reg_ctrlr"); + +struct nvme_resv_reg_ctrlr_ext +{ + uint16_t ctrlr_id; /* Controller ID */ + uint8_t rcsts; /* Reservation Status */ + uint8_t reserved3[5]; + uint64_t rkey; /* Reservation Key */ + uint64_t hostid[2]; /* Host Identifier */ + uint8_t reserved32[32]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_resv_reg_ctrlr_ext) == 64, "bad size for nvme_resv_reg_ctrlr_ext"); + +struct nvme_resv_status +{ + uint32_t gen; /* Generation */ + uint8_t rtype; /* Reservation Type */ + uint8_t regctl[2]; /* Number of Registered Controllers */ + uint8_t reserved7[2]; + uint8_t ptpls; /* Persist Through Power Loss State */ + uint8_t reserved10[14]; + struct nvme_resv_reg_ctrlr ctrlr[0]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_resv_status) == 24, "bad size for nvme_resv_status"); + +struct nvme_resv_status_ext +{ + uint32_t gen; /* Generation */ + uint8_t rtype; /* Reservation Type */ + uint8_t regctl[2]; /* Number of Registered Controllers */ + uint8_t reserved7[2]; + uint8_t ptpls; /* Persist Through Power Loss State */ + uint8_t reserved10[14]; + uint8_t reserved24[40]; + struct nvme_resv_reg_ctrlr_ext ctrlr[0]; +} __packed __aligned(4); + +_Static_assert(sizeof(struct nvme_resv_status_ext) == 64, "bad size for nvme_resv_status_ext"); + #define NVME_TEST_MAX_THREADS 128 struct nvme_io_test { - enum nvme_nvm_opcode opc; uint32_t size; uint32_t time; /* in seconds */ @@ -1127,7 +1518,6 @@ struct nvme_io_test { }; enum nvme_io_test_flags { - /* * Specifies whether dev_refthread/dev_relthread should be * called during NVME_BIO_TEST. Ignored for other test @@ -1137,7 +1527,6 @@ enum nvme_io_test_flags { }; struct nvme_pt_command { - /* * cmd is used to specify a passthrough command to a controller or * namespace. @@ -1189,6 +1578,17 @@ struct nvme_pt_command { struct mtx * driver_lock; }; +struct nvme_get_nsid { + char cdev[SPECNAMELEN + 1]; + uint32_t nsid; +}; + +struct nvme_hmb_desc { + uint64_t addr; + uint32_t size; + uint32_t reserved; +}; + #define nvme_completion_is_error(cpl) \ (NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0) @@ -1197,6 +1597,7 @@ void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen); #ifdef _KERNEL struct bio; +struct thread; struct nvme_namespace; struct nvme_controller; @@ -1223,6 +1624,8 @@ int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, /* Admin functions */ void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature, uint32_t cdw11, + uint32_t cdw12, uint32_t cdw13, + uint32_t cdw14, uint32_t cdw15, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn, void *cb_arg); void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, @@ -1264,6 +1667,13 @@ void nvme_unregister_consumer(struct nvme_consumer *consumer); device_t nvme_ctrlr_get_device(struct nvme_controller *ctrlr); const struct nvme_controller_data * nvme_ctrlr_get_data(struct nvme_controller *ctrlr); +static inline bool +nvme_ctrlr_has_dataset_mgmt(const struct nvme_controller_data *cd) +{ + /* Assumes cd was byte swapped by nvme_controller_data_swapbytes() */ + return ((cd->oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) & + NVME_CTRLR_DATA_ONCS_DSM_MASK); +} /* Namespace helper functions */ uint32_t nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns); @@ -1279,6 +1689,8 @@ uint32_t nvme_ns_get_stripesize(struct nvme_namespace *ns); int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn); +int nvme_ns_ioctl_process(struct nvme_namespace *ns, u_long cmd, + caddr_t arg, int flag, struct thread *td); /* * Command building helper functions -- shared with CAM @@ -1335,8 +1747,9 @@ extern int nvme_use_nvd; /* Endianess conversion functions for NVMe structs */ static inline -void nvme_completion_swapbytes(struct nvme_completion *s) +void nvme_completion_swapbytes(struct nvme_completion *s __unused) { +#ifndef _LITTLE_ENDIAN s->cdw0 = le32toh(s->cdw0); /* omit rsvd1 */ @@ -1344,22 +1757,26 @@ void nvme_completion_swapbytes(struct nvme_completion *s) s->sqid = le16toh(s->sqid); /* omit cid */ s->status = le16toh(s->status); +#endif } static inline -void nvme_power_state_swapbytes(struct nvme_power_state *s) +void nvme_power_state_swapbytes(struct nvme_power_state *s __unused) { +#ifndef _LITTLE_ENDIAN s->mp = le16toh(s->mp); s->enlat = le32toh(s->enlat); s->exlat = le32toh(s->exlat); s->idlp = le16toh(s->idlp); s->actp = le16toh(s->actp); +#endif } static inline -void nvme_controller_data_swapbytes(struct nvme_controller_data *s) +void nvme_controller_data_swapbytes(struct nvme_controller_data *s __unused) { +#ifndef _LITTLE_ENDIAN int i; s->vid = le16toh(s->vid); @@ -1370,6 +1787,10 @@ void nvme_controller_data_swapbytes(struct nvme_controller_data *s) s->rtd3e = le32toh(s->rtd3e); s->oaes = le32toh(s->oaes); s->ctratt = le32toh(s->ctratt); + s->rrls = le16toh(s->rrls); + s->crdt1 = le16toh(s->crdt1); + s->crdt2 = le16toh(s->crdt2); + s->crdt3 = le16toh(s->crdt3); s->oacs = le16toh(s->oacs); s->wctemp = le16toh(s->wctemp); s->cctemp = le16toh(s->cctemp); @@ -1383,6 +1804,13 @@ void nvme_controller_data_swapbytes(struct nvme_controller_data *s) s->mntmt = le16toh(s->mntmt); s->mxtmt = le16toh(s->mxtmt); s->sanicap = le32toh(s->sanicap); + s->hmminds = le32toh(s->hmminds); + s->hmmaxd = le16toh(s->hmmaxd); + s->nsetidmax = le16toh(s->nsetidmax); + s->endgidmax = le16toh(s->endgidmax); + s->anagrpmax = le32toh(s->anagrpmax); + s->nanagrpid = le32toh(s->nanagrpid); + s->pels = le32toh(s->pels); s->maxcmd = le16toh(s->maxcmd); s->nn = le32toh(s->nn); s->oncs = le16toh(s->oncs); @@ -1391,13 +1819,16 @@ void nvme_controller_data_swapbytes(struct nvme_controller_data *s) s->awupf = le16toh(s->awupf); s->acwu = le16toh(s->acwu); s->sgls = le32toh(s->sgls); + s->mnan = le32toh(s->mnan); for (i = 0; i < 32; i++) nvme_power_state_swapbytes(&s->power_state[i]); +#endif } static inline -void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s) +void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused) { +#ifndef _LITTLE_ENDIAN int i; s->nsze = le64toh(s->nsze); @@ -1410,13 +1841,24 @@ void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s) s->nabo = le16toh(s->nabo); s->nabspf = le16toh(s->nabspf); s->noiob = le16toh(s->noiob); + s->npwg = le16toh(s->npwg); + s->npwa = le16toh(s->npwa); + s->npdg = le16toh(s->npdg); + s->npda = le16toh(s->npda); + s->nows = le16toh(s->nows); + s->anagrpid = le32toh(s->anagrpid); + s->nvmsetid = le16toh(s->nvmsetid); + s->endgid = le16toh(s->endgid); for (i = 0; i < 16; i++) s->lbaf[i] = le32toh(s->lbaf[i]); +#endif } static inline -void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s) +void nvme_error_information_entry_swapbytes( + struct nvme_error_information_entry *s __unused) { +#ifndef _LITTLE_ENDIAN s->error_count = le64toh(s->error_count); s->sqid = le16toh(s->sqid); @@ -1425,18 +1867,14 @@ void nvme_error_information_entry_swapbytes(struct nvme_error_information_entry s->error_location = le16toh(s->error_location); s->lba = le64toh(s->lba); s->nsid = le32toh(s->nsid); + s->csi = le64toh(s->csi); + s->ttsi = le16toh(s->ttsi); +#endif } static inline -void nvme_le128toh(void *p) +void nvme_le128toh(void *p __unused) { - /* - * Upstream, this uses the following comparison: - * #if _BYTE_ORDER != _LITTLE_ENDIAN - * - * Rather than keep this file in compat with only that little bit - * changed, we'll just float a little patch here for now. - */ #ifndef _LITTLE_ENDIAN /* Swap 16 bytes in place */ char *tmp = (char*)p; @@ -1447,14 +1885,14 @@ void nvme_le128toh(void *p) tmp[i] = tmp[15-i]; tmp[15-i] = b; } -#else - (void)p; #endif } static inline -void nvme_health_information_page_swapbytes(struct nvme_health_information_page *s) +void nvme_health_information_page_swapbytes( + struct nvme_health_information_page *s __unused) { +#ifndef _LITTLE_ENDIAN int i; s->temperature = le16toh(s->temperature); @@ -1472,30 +1910,80 @@ void nvme_health_information_page_swapbytes(struct nvme_health_information_page s->error_temp_time = le32toh(s->error_temp_time); for (i = 0; i < 8; i++) s->temp_sensor[i] = le16toh(s->temp_sensor[i]); + s->tmt1tc = le32toh(s->tmt1tc); + s->tmt2tc = le32toh(s->tmt2tc); + s->ttftmt1 = le32toh(s->ttftmt1); + s->ttftmt2 = le32toh(s->ttftmt2); +#endif } - static inline -void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s) +void nvme_firmware_page_swapbytes(struct nvme_firmware_page *s __unused) { +#ifndef _LITTLE_ENDIAN int i; for (i = 0; i < 7; i++) s->revision[i] = le64toh(s->revision[i]); +#endif } static inline -void nvme_ns_list_swapbytes(struct nvme_ns_list *s) +void nvme_ns_list_swapbytes(struct nvme_ns_list *s __unused) { +#ifndef _LITTLE_ENDIAN int i; for (i = 0; i < 1024; i++) s->ns[i] = le32toh(s->ns[i]); +#endif +} + +static inline +void nvme_command_effects_page_swapbytes( + struct nvme_command_effects_page *s __unused) +{ +#ifndef _LITTLE_ENDIAN + int i; + + for (i = 0; i < 256; i++) + s->acs[i] = le32toh(s->acs[i]); + for (i = 0; i < 256; i++) + s->iocs[i] = le32toh(s->iocs[i]); +#endif +} + +static inline +void nvme_res_notification_page_swapbytes( + struct nvme_res_notification_page *s __unused) +{ +#ifndef _LITTLE_ENDIAN + s->log_page_count = le64toh(s->log_page_count); + s->nsid = le32toh(s->nsid); +#endif +} + +static inline +void nvme_sanitize_status_page_swapbytes( + struct nvme_sanitize_status_page *s __unused) +{ +#ifndef _LITTLE_ENDIAN + s->sprog = le16toh(s->sprog); + s->sstat = le16toh(s->sstat); + s->scdw10 = le32toh(s->scdw10); + s->etfo = le32toh(s->etfo); + s->etfbe = le32toh(s->etfbe); + s->etfce = le32toh(s->etfce); + s->etfownd = le32toh(s->etfownd); + s->etfbewnd = le32toh(s->etfbewnd); + s->etfcewnd = le32toh(s->etfcewnd); +#endif } static inline -void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s) +void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s __unused) { +#ifndef _LITTLE_ENDIAN s->current = le64toh(s->current); s->overtemp_flag_last = le64toh(s->overtemp_flag_last); @@ -1506,6 +1994,43 @@ void intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s) s->max_oper_temp = le64toh(s->max_oper_temp); s->min_oper_temp = le64toh(s->min_oper_temp); s->est_offset = le64toh(s->est_offset); +#endif +} + +static inline +void nvme_resv_status_swapbytes(struct nvme_resv_status *s __unused, + size_t size __unused) +{ +#ifndef _LITTLE_ENDIAN + u_int i, n; + + s->gen = le32toh(s->gen); + n = (s->regctl[1] << 8) | s->regctl[0]; + n = MIN(n, (size - sizeof(s)) / sizeof(s->ctrlr[0])); + for (i = 0; i < n; i++) { + s->ctrlr[i].ctrlr_id = le16toh(s->ctrlr[i].ctrlr_id); + s->ctrlr[i].hostid = le64toh(s->ctrlr[i].hostid); + s->ctrlr[i].rkey = le64toh(s->ctrlr[i].rkey); + } +#endif +} + +static inline +void nvme_resv_status_ext_swapbytes(struct nvme_resv_status_ext *s __unused, + size_t size __unused) +{ +#ifndef _LITTLE_ENDIAN + u_int i, n; + + s->gen = le32toh(s->gen); + n = (s->regctl[1] << 8) | s->regctl[0]; + n = MIN(n, (size - sizeof(s)) / sizeof(s->ctrlr[0])); + for (i = 0; i < n; i++) { + s->ctrlr[i].ctrlr_id = le16toh(s->ctrlr[i].ctrlr_id); + s->ctrlr[i].rkey = le64toh(s->ctrlr[i].rkey); + nvme_le128toh((void *)s->ctrlr[i].hostid); + } +#endif } #endif /* __NVME_H__ */ diff --git a/usr/src/contrib/bhyve/sys/ata.h b/usr/src/contrib/bhyve/sys/ata.h index 223bd7b3eb..83eb089dbe 100644 --- a/usr/src/contrib/bhyve/sys/ata.h +++ b/usr/src/contrib/bhyve/sys/ata.h @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org> * All rights reserved. * @@ -44,6 +46,7 @@ struct ata_params { #define ATA_ATAPI_TYPE_TAPE 0x0100 /* streaming tape */ #define ATA_ATAPI_TYPE_CDROM 0x0500 /* CD-ROM device */ #define ATA_ATAPI_TYPE_OPTICAL 0x0700 /* optical disk */ +#define ATA_ATAPI_REMOVABLE 0x0080 #define ATA_DRQ_MASK 0x0060 #define ATA_DRQ_SLOW 0x0000 /* cpu 3 ms delay */ #define ATA_DRQ_INTR 0x0020 /* interrupt 10 ms delay */ @@ -64,12 +67,13 @@ struct ata_params { /*023*/ u_int8_t revision[8]; /* firmware revision */ /*027*/ u_int8_t model[40]; /* model name */ /*047*/ u_int16_t sectors_intr; /* sectors per interrupt */ -/*048*/ u_int16_t usedmovsd; /* double word read/write? */ +/*048*/ u_int16_t tcg; /* Trusted Computing Group */ +#define ATA_SUPPORT_TCG 0x0001 /*049*/ u_int16_t capabilities1; #define ATA_SUPPORT_DMA 0x0100 #define ATA_SUPPORT_LBA 0x0200 -#define ATA_SUPPORT_IORDY 0x0400 -#define ATA_SUPPORT_IORDYDIS 0x0800 +#define ATA_SUPPORT_IORDYDIS 0x0400 +#define ATA_SUPPORT_IORDY 0x0800 #define ATA_SUPPORT_OVERLAP 0x4000 /*050*/ u_int16_t capabilities2; @@ -90,6 +94,12 @@ struct ata_params { /*057*/ u_int16_t current_size_1; /*058*/ u_int16_t current_size_2; /*059*/ u_int16_t multi; +#define ATA_SUPPORT_BLOCK_ERASE_EXT 0x8000 +#define ATA_SUPPORT_OVERWRITE_EXT 0x4000 +#define ATA_SUPPORT_CRYPTO_SCRAMBLE_EXT 0x2000 +#define ATA_SUPPORT_SANITIZE 0x1000 +#define ATA_SUPPORT_SANITIZE_ALLOWED 0x0800 +#define ATA_SUPPORT_ANTIFREEZE_LOCK_EXT 0x0400 #define ATA_MULTI_VALID 0x0100 /*060*/ u_int16_t lba_size_1; @@ -105,6 +115,7 @@ struct ata_params { /*069*/ u_int16_t support3; #define ATA_SUPPORT_RZAT 0x0020 #define ATA_SUPPORT_DRAT 0x4000 +#define ATA_ENCRYPTS_ALL_USER_DATA 0x0010 /* Self-encrypting drive */ #define ATA_SUPPORT_ZONE_MASK 0x0003 #define ATA_SUPPORT_ZONE_NR 0x0000 #define ATA_SUPPORT_ZONE_HOST_AWARE 0x0001 @@ -133,7 +144,8 @@ struct ata_params { /*77*/ u_int16_t satacapabilities2; #define ATA_SATA_CURR_GEN_MASK 0x0006 #define ATA_SUPPORT_NCQ_STREAM 0x0010 -#define ATA_SUPPORT_NCQ_QMANAGEMENT 0x0020 +#define ATA_SUPPORT_NCQ_NON_DATA 0x0020 +#define ATA_SUPPORT_NCQ_QMANAGEMENT ATA_SUPPORT_NCQ_NON_DATA #define ATA_SUPPORT_RCVSND_FPDMA_QUEUED 0x0040 /*78*/ u_int16_t satasupport; #define ATA_SUPPORT_NONZERO 0x0002 @@ -142,6 +154,7 @@ struct ata_params { #define ATA_SUPPORT_INORDERDATA 0x0010 #define ATA_SUPPORT_ASYNCNOTIF 0x0020 #define ATA_SUPPORT_SOFTSETPRESERVE 0x0040 +#define ATA_SUPPORT_NCQ_AUTOSENSE 0x0080 /*79*/ u_int16_t sataenabled; #define ATA_ENABLED_DAPST 0x0080 @@ -234,12 +247,15 @@ struct ata_params { #define ATA_SUPPORT_FREEFALL 0x0020 #define ATA_SUPPORT_SENSE_REPORT 0x0040 #define ATA_SUPPORT_EPC 0x0080 +#define ATA_SUPPORT_AMAX_ADDR 0x0100 +#define ATA_SUPPORT_DSN 0x0200 /*120*/ u_int16_t enabled2; #define ATA_ENABLED_WRITEREADVERIFY 0x0002 #define ATA_ENABLED_WRITEUNCORREXT 0x0004 #define ATA_ENABLED_FREEFALL 0x0020 #define ATA_ENABLED_SENSE_REPORT 0x0040 #define ATA_ENABLED_EPC 0x0080 +#define ATA_ENABLED_DSN 0x0200 u_int16_t reserved121[6]; /*127*/ u_int16_t removable_status; /*128*/ u_int16_t security_status; @@ -257,10 +273,23 @@ struct ata_params { /*162*/ u_int16_t cfa_kms_support; /*163*/ u_int16_t cfa_trueide_modes; /*164*/ u_int16_t cfa_memory_modes; - u_int16_t reserved165[4]; + u_int16_t reserved165[3]; +/*168*/ u_int16_t form_factor; +#define ATA_FORM_FACTOR_MASK 0x000f +#define ATA_FORM_FACTOR_NOT_REPORTED 0x0000 +#define ATA_FORM_FACTOR_5_25 0x0001 +#define ATA_FORM_FACTOR_3_5 0x0002 +#define ATA_FORM_FACTOR_2_5 0x0003 +#define ATA_FORM_FACTOR_1_8 0x0004 +#define ATA_FORM_FACTOR_SUB_1_8 0x0005 +#define ATA_FORM_FACTOR_MSATA 0x0006 +#define ATA_FORM_FACTOR_M_2 0x0007 +#define ATA_FORM_FACTOR_MICRO_SSD 0x0008 +#define ATA_FORM_FACTOR_C_FAST 0x0009 /*169*/ u_int16_t support_dsm; #define ATA_SUPPORT_DSM_TRIM 0x0001 - u_int16_t reserved170[6]; +/*170*/ u_int8_t product_id[8]; /* Additional Product Identifier */ + u_int16_t reserved174[2]; /*176*/ u_int8_t media_serial[60]; /*206*/ u_int16_t sct; u_int16_t reserved207[2]; @@ -283,7 +312,7 @@ struct ata_params { /*223*/ u_int16_t transport_minor; u_int16_t reserved224[31]; /*255*/ u_int16_t integrity; -} __packed; +} __packed __aligned(2); /* ATA Dataset Management */ #define ATA_DSM_BLK_SIZE 512 @@ -355,7 +384,6 @@ struct ata_params { #define ATA_SA600 0x49 #define ATA_DMA_MAX 0x4f - /* ATA commands */ #define ATA_NOP 0x00 /* NOP */ #define ATA_NF_FLUSHQUEUE 0x00 /* flush queued cmd's */ @@ -391,6 +419,12 @@ struct ata_params { #define ATA_READ_LOG_DMA_EXT 0x47 /* read log DMA ext - PIO Data-In */ #define ATA_ZAC_MANAGEMENT_IN 0x4a /* ZAC management in */ #define ATA_ZM_REPORT_ZONES 0x00 /* report zones */ +#define ATA_WRITE_LOG_DMA_EXT 0x57 /* WRITE LOG DMA EXT */ +#define ATA_TRUSTED_NON_DATA 0x5b /* TRUSTED NON-DATA */ +#define ATA_TRUSTED_RECEIVE 0x5c /* TRUSTED RECEIVE */ +#define ATA_TRUSTED_RECEIVE_DMA 0x5d /* TRUSTED RECEIVE DMA */ +#define ATA_TRUSTED_SEND 0x5e /* TRUSTED SEND */ +#define ATA_TRUSTED_SEND_DMA 0x5f /* TRUSTED SEND DMA */ #define ATA_READ_FPDMA_QUEUED 0x60 /* read DMA NCQ */ #define ATA_WRITE_FPDMA_QUEUED 0x61 /* write DMA NCQ */ #define ATA_NCQ_NON_DATA 0x63 /* NCQ non-data command */ @@ -410,15 +444,22 @@ struct ata_params { #define ATA_RFPDMA_ZAC_MGMT_IN 0x02 /* NCQ ZAC mgmt in w/data */ #define ATA_SEP_ATTN 0x67 /* SEP request */ #define ATA_SEEK 0x70 /* seek */ +#define ATA_AMAX_ADDR 0x78 /* Accessible Max Address */ +#define ATA_AMAX_ADDR_GET 0x00 /* GET NATIVE MAX ADDRESS EXT */ +#define ATA_AMAX_ADDR_SET 0x01 /* SET ACCESSIBLE MAX ADDRESS EXT */ +#define ATA_AMAX_ADDR_FREEZE 0x02 /* FREEZE ACCESSIBLE MAX ADDRESS EXT */ #define ATA_ZAC_MANAGEMENT_OUT 0x9f /* ZAC management out */ #define ATA_ZM_CLOSE_ZONE 0x01 /* close zone */ #define ATA_ZM_FINISH_ZONE 0x02 /* finish zone */ #define ATA_ZM_OPEN_ZONE 0x03 /* open zone */ #define ATA_ZM_RWP 0x04 /* reset write pointer */ +#define ATA_DOWNLOAD_MICROCODE 0x92 /* DOWNLOAD MICROCODE */ +#define ATA_DOWNLOAD_MICROCODE_DMA 0x93 /* DOWNLOAD MICROCODE DMA */ #define ATA_PACKET_CMD 0xa0 /* packet command */ #define ATA_ATAPI_IDENTIFY 0xa1 /* get ATAPI params*/ #define ATA_SERVICE 0xa2 /* service command */ #define ATA_SMART_CMD 0xb0 /* SMART command */ +#define ATA_SANITIZE 0xb4 /* sanitize device */ #define ATA_CFA_ERASE 0xc0 /* CFA erase */ #define ATA_READ_MUL 0xc4 /* read multi */ #define ATA_WRITE_MUL 0xc5 /* write multi */ @@ -437,8 +478,11 @@ struct ata_params { #define ATA_CHECK_POWER_MODE 0xe5 /* device power mode */ #define ATA_SLEEP 0xe6 /* sleep */ #define ATA_FLUSHCACHE 0xe7 /* flush cache to disk */ +#define ATA_WRITE_BUFFER 0xe8 /* write buffer */ #define ATA_WRITE_PM 0xe8 /* write portmultiplier */ +#define ATA_READ_BUFFER_DMA 0xe9 /* read buffer DMA */ #define ATA_FLUSHCACHE48 0xea /* flush cache to disk */ +#define ATA_WRITE_BUFFER_DMA 0xeb /* write buffer DMA */ #define ATA_ATA_IDENTIFY 0xec /* get ATA params */ #define ATA_SETFEATURES 0xef /* features command */ #define ATA_SF_ENAB_WCACHE 0x02 /* enable write cache */ @@ -463,7 +507,6 @@ struct ata_params { #define ATA_SF_DIS_SRVIRQ 0xde /* disable service interrupt */ #define ATA_SF_LPSAERC 0x62 /* Long Phys Sect Align ErrRep*/ #define ATA_SF_DSN 0x63 /* Device Stats Notification */ -#define ATA_CHECK_POWER_MODE 0xe5 /* Check Power Mode */ #define ATA_SECURITY_SET_PASSWORD 0xf1 /* set drive password */ #define ATA_SECURITY_UNLOCK 0xf2 /* unlock drive using passwd */ #define ATA_SECURITY_ERASE_PREPARE 0xf3 /* prepare to erase drive */ @@ -473,7 +516,6 @@ struct ata_params { #define ATA_READ_NATIVE_MAX_ADDRESS 0xf8 /* read native max address */ #define ATA_SET_MAX_ADDRESS 0xf9 /* set max address */ - /* ATAPI commands */ #define ATAPI_TEST_UNIT_READY 0x00 /* check if device is ready */ #define ATAPI_REZERO 0x01 /* rewind */ @@ -534,7 +576,6 @@ struct ata_params { #define ATAPI_READ_CD 0xbe /* read data */ #define ATAPI_POLL_DSC 0xff /* poll DSC status bit */ - struct ata_ioc_devices { int channel; char name[2][32]; @@ -585,7 +626,7 @@ struct atapi_sense { u_int8_t specific; /* sense key specific */ #define ATA_SENSE_SPEC_VALID 0x80 #define ATA_SENSE_SPEC_MASK 0x7f - + u_int8_t specific1; /* sense key specific */ u_int8_t specific2; /* sense key specific */ } __packed; @@ -682,7 +723,7 @@ struct atapi_sense { #define ATA_IDL_ATA_STRINGS 0x05 /* ATA Strings */ #define ATA_IDL_SECURITY 0x06 /* Security */ #define ATA_IDL_PARALLEL_ATA 0x07 /* Parallel ATA */ -#define ATA_IDL_SERIAL_ATA 0x08 /* Seiral ATA */ +#define ATA_IDL_SERIAL_ATA 0x08 /* Serial ATA */ #define ATA_IDL_ZDI 0x09 /* Zoned Device Information */ struct ata_gp_log_dir { @@ -965,7 +1006,6 @@ struct ata_security_password { #define IOCATAGSPINDOWN _IOR('a', 104, int) #define IOCATASSPINDOWN _IOW('a', 105, int) - struct ata_ioc_raid_config { int lun; int type; diff --git a/usr/src/data/zoneinfo/README.illumos b/usr/src/data/zoneinfo/README.illumos index 416a4a92ba..17d1808a3e 100644 --- a/usr/src/data/zoneinfo/README.illumos +++ b/usr/src/data/zoneinfo/README.illumos @@ -28,7 +28,7 @@ commands assume that you're inside of the directory you extracted the time zone files. $ cp africa antarctica asia australasia backward etcetera europe \ - northamerica pacificnew southamerica systemv $CODEMGR_WS/usr/src/data/zoneinfo + northamerica southamerica $CODEMGR_WS/usr/src/data/zoneinfo Next you need to copy the country tab and the zone tab files. These have different names in our source tree. @@ -65,12 +65,12 @@ occurred in zone.tab.txt and backward. The contents of backward describe hardlinks that need to exist in packaging. Specifically, if you have a line in the form of: -LINK Beleriand Gondolin +LINK Arda/Beleriand Arda/Gondolin -That instructs that Gondolin should be hardlinked to Beleriand. In ips parlance +That instructs that Gondolin should be hardlinked to Beleriand. In IPS parlance that'd be: -hardlink path=usr/share/lib/zoneinfo/Beleriand target=./Gondolin +hardlink path=usr/share/lib/zoneinfo/Arda/Gondolin target=../Arda/Beleriand The differences in zone.tab.txt describe the additions and removals of various file entries. If you're not friends with protocmp yet, this should help you out. diff --git a/usr/src/data/zoneinfo/africa b/usr/src/data/zoneinfo/africa index 724744f377..28168cfc17 100644 --- a/usr/src/data/zoneinfo/africa +++ b/usr/src/data/zoneinfo/africa @@ -64,7 +64,7 @@ # Corrections are welcome. # Algeria -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Algeria 1916 only - Jun 14 23:00s 1:00 S Rule Algeria 1916 1919 - Oct Sun>=1 23:00s 0 - Rule Algeria 1917 only - Mar 24 23:00s 1:00 S @@ -87,10 +87,9 @@ Rule Algeria 1978 only - Mar 24 1:00 1:00 S Rule Algeria 1978 only - Sep 22 3:00 0 - Rule Algeria 1980 only - Apr 25 0:00 1:00 S Rule Algeria 1980 only - Oct 31 2:00 0 - -# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's -# more precise 0:09:21. +# See Europe/Paris for PMT-related transitions. # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01 +Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 16 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time 0:00 Algeria WE%sT 1940 Feb 25 2:00 1:00 Algeria CE%sT 1946 Oct 7 @@ -176,7 +175,7 @@ Link Africa/Abidjan Atlantic/St_Helena # St Helena # Egypt was mean noon at the Great Pyramid, 2:04:30.5, but apparently this # did not apply to Cairo, Alexandria, or Port Said. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Egypt 1940 only - Jul 15 0:00 1:00 S Rule Egypt 1940 only - Oct 1 0:00 0 - Rule Egypt 1941 only - Apr 15 0:00 1:00 S @@ -387,36 +386,87 @@ Zone Africa/Cairo 2:05:09 - LMT 1900 Oct # Ghana -# From Paul Eggert (2018-01-30): -# Whitman says DST was observed from 1931 to "the present"; -# Shanks & Pottenger say 1936 to 1942 with 20 minutes of DST, -# with transitions on 09-01 and 12-31 at 00:00. -# Page 33 of Parish GCB, Colonial Reports - Annual. No. 1066. Gold -# Coast. Report for 1919. (March 1921), OCLC 784024077 -# http://libsysdigi.library.illinois.edu/ilharvest/africana/books2011-05/5530214/5530214_1919/5530214_1919_opt.pdf -# lists the Determination of the Time Ordinance, 1919, No. 18, -# "to advance the time observed locally by the space of twenty minutes -# during the last four months of each year; the object in view being -# to extend during those months the period of daylight-time available -# for evening recreation after office hours." -# Vanessa Ogle, The Global Transformation of Time, 1870-1950 (2015), p 33, -# writes "In 1919, the Gold Coast (Ghana as of 1957) made Greenwich -# time its legal time and simultaneously legalized a summer time of -# UTC - 00:20 minutes from March to October."; a footnote lists -# the ordinance as being dated 1919-11-24. -# The Crown Colonist, Volume 12 (1942), p 176, says "the Government -# intend advancing Gold Coast time half an hour ahead of G.M.T. -# The actual date of the alteration has not yet been announced." -# These sources are incomplete and contradictory. Possibly what is -# now Ghana observed different DST regimes in different years. For -# lack of better info, use Shanks except treat the minus sign as a -# typo, and assume DST started in 1920 not 1936. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Ghana 1920 1942 - Sep 1 0:00 0:20 - -Rule Ghana 1920 1942 - Dec 31 0:00 0 - +# From P Chan (2020-11-20): +# Interpretation Amendment Ordinance, 1915 (No.24 of 1915) [1915-11-02] +# Ordinances of the Gold Coast, Ashanti, Northern Territories 1915, p 69-71 +# https://books.google.com/books?id=ErA-AQAAIAAJ&pg=PA70 +# This Ordinance added "'Time' shall mean Greenwich Mean Time" to the +# Interpretation Ordinance, 1876. +# +# Determination of the Time Ordinance, 1919 (No. 18 of 1919) [1919-11-24] +# Ordinances of the Gold Coast, Ashanti, Northern Territories 1919, p 75-76 +# https://books.google.com/books?id=MbA-AQAAIAAJ&pg=PA75 +# This Ordinance removed the previous definition of time and introduced DST. +# +# Time Determination Ordinance (Cap. 214) +# The Laws of the Gold Coast (including Togoland Under British Mandate) +# Vol. II (1937), p 2328 +# https://books.google.com/books?id=Z7M-AQAAIAAJ&pg=PA2328 +# Revised edition of the 1919 Ordinance. +# +# Time Determination (Amendment) Ordinance, 1940 (No. 9 of 1940) [1940-04-06] +# Annual Volume of the Laws of the Gold Coast: +# Containing All Legislation Enacted During Year 1940, p 22 +# https://books.google.com/books?id=1ao-AQAAIAAJ&pg=PA22 +# This Ordinance changed the forward transition from September to May. +# +# Defence (Time Determination Ordinance Amendment) Regulations, 1942 +# (Regulations No. 6 of 1942) [1942-01-31, commenced on 1942-02-08] +# Annual Volume of the Laws of the Gold Coast: +# Containing All Legislation Enacted During Year 1942, p 48 +# https://books.google.com/books?id=Das-AQAAIAAJ&pg=PA48 +# These regulations advanced the [standard] time by thirty minutes. +# +# Defence (Time Determination Ordinance Amendment (No.2)) Regulations, +# 1942 (Regulations No. 28 of 1942) [1942-04-25] +# Annual Volume of the Laws of the Gold Coast: +# Containing All Legislation Enacted During Year 1942, p 87 +# https://books.google.com/books?id=Das-AQAAIAAJ&pg=PA87 +# These regulations abolished DST and changed the time to GMT+0:30. +# +# Defence (Revocation) (No.4) Regulations, 1945 (Regulations No. 45 of +# 1945) [1945-10-24, commenced on 1946-01-06] +# Annual Volume of the Laws of the Gold Coast: +# Containing All Legislation Enacted During Year 1945, p 256 +# https://books.google.com/books?id=9as-AQAAIAAJ&pg=PA256 +# These regulations revoked the previous two sets of Regulations. +# +# Time Determination (Amendment) Ordinance, 1945 (No. 18 of 1945) [1946-01-06] +# Annual Volume of the Laws of the Gold Coast: +# Containing All Legislation Enacted During Year 1945, p 69 +# https://books.google.com/books?id=9as-AQAAIAAJ&pg=PA69 +# This Ordinance abolished DST. +# +# Time Determination (Amendment) Ordinance, 1950 (No. 26 of 1950) [1950-07-22] +# Annual Volume of the Laws of the Gold Coast: +# Containing All Legislation Enacted During Year 1950, p 35 +# https://books.google.com/books?id=e60-AQAAIAAJ&pg=PA35 +# This Ordinance restored DST but with thirty minutes offset. +# +# Time Determination Ordinance (Cap. 264) +# The Laws of the Gold Coast, Vol. V (1954), p 380 +# https://books.google.com/books?id=Mqc-AQAAIAAJ&pg=PA380 +# Revised edition of the Time Determination Ordinance. +# +# Time Determination (Amendment) Ordinance, 1956 (No. 21 of 1956) [1956-08-29] +# Annual Volume of the Ordinances of the Gold Coast Enacted During the +# Year 1956, p 83 +# https://books.google.com/books?id=VLE-AQAAIAAJ&pg=PA83 +# This Ordinance abolished DST. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Ghana 1919 only - Nov 24 0:00 0:20 +0020 +Rule Ghana 1920 1942 - Jan 1 2:00 0 GMT +Rule Ghana 1920 1939 - Sep 1 2:00 0:20 +0020 +Rule Ghana 1940 1941 - May 1 2:00 0:20 +0020 +Rule Ghana 1950 1955 - Sep 1 2:00 0:30 +0030 +Rule Ghana 1951 1956 - Jan 1 2:00 0 GMT + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Africa/Accra -0:00:52 - LMT 1918 - 0:00 Ghana GMT/+0020 +Zone Africa/Accra -0:00:52 - LMT 1915 Nov 2 + 0:00 Ghana %s 1942 Feb 8 + 0:30 - +0030 1946 Jan 6 + 0:00 Ghana %s # Guinea # See Africa/Abidjan. @@ -434,11 +484,54 @@ Zone Africa/Bissau -1:02:20 - LMT 1912 Jan 1 1:00u 0:00 - GMT # Kenya + +# From P Chan (2020-10-24): +# +# The standard time of GMT+2:30 was adopted in the East Africa Protectorate.... +# [The Official Gazette, 1908-05-01, p 274] +# https://books.google.com/books?id=e-cAC-sjPSEC&pg=PA274 +# +# At midnight on 30 June 1928 the clocks throughout Kenya was put forward +# half an hour by the Alteration of Time Ordinance, 1928. +# https://gazettes.africa/archive/ke/1928/ke-government-gazette-dated-1928-05-11-no-28.pdf +# [Ordinance No. 11 of 1928, The Offical Gazette, 1928-06-26, p 813] +# https://books.google.com/books?id=2S0S6os32ZUC&pg=PA813 +# +# The 1928 ordinance was repealed by the Alteration of Time (repeal) Ordinance, +# 1929 and the time was restored to GMT+2:30 at midnight on 4 January 1930. +# [Ordinance No. 97 of 1929, The Official Gazette, 1929-12-31, p 2701] +# https://books.google.com/books?id=_g18jIZQlwwC&pg=PA2701 +# +# The Alteration of Time Ordinance, 1936 changed the time to GMT+2:45 +# and repealed the previous ordinance at midnight on 31 December 1936. +# [The Official Gazette, 1936-07-21, p 705] +# https://books.google.com/books?id=K7j41z0aC5wC&pg=PA705 +# +# The Defence (Amendment of Laws No. 120) Regulations changed the time +# to GMT+3 at midnight on 31 July 1942. +# [Kenya Official Gazette Supplement No. 32, 1942-07-21, p 331] +# https://books.google.com/books?hl=zh-TW&id=c_E-AQAAIAAJ&pg=PA331 +# The provision of the 1936 ordinance was not repealed and was later +# incorporated in the Interpretation and General Clauses Ordinance in 1948. +# Although it was overridden by the 1942 regulations. +# [The Laws of Kenya in force on 1948-09-21, Title I, Chapter 1, 31] +# https://dds.crl.edu/item/217517 (p.101) +# In 1950 the Interpretation and General Clauses Ordinance was amended to adopt +# GMT+3 permanently as the 1942 regulations were due to expire on 10 December. +# https://books.google.com/books?id=jvR8mUDAwR0C&pg=PA787 +# [Ordinance No. 44 of 1950, Kenya Ordinances 1950, Vol. XXIX, p 294] +# https://books.google.com/books?id=-_dQAQAAMAAJ&pg=PA294 + +# From Paul Eggert (2020-10-24): +# The 1908-05-01 announcement does not give an effective date, +# so just say "1908 May". + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Africa/Nairobi 2:27:16 - LMT 1928 Jul - 3:00 - EAT 1930 - 2:30 - +0230 1940 - 2:45 - +0245 1960 +Zone Africa/Nairobi 2:27:16 - LMT 1908 May + 2:30 - +0230 1928 Jun 30 24:00 + 3:00 - EAT 1930 Jan 4 24:00 + 2:30 - +0230 1936 Dec 31 24:00 + 2:45 - +0245 1942 Jul 31 24:00 3:00 - EAT Link Africa/Nairobi Africa/Addis_Ababa # Ethiopia Link Africa/Nairobi Africa/Asmara # Eritrea @@ -501,7 +594,7 @@ Zone Africa/Monrovia -0:43:08 - LMT 1882 # From Paul Eggert (2013-10-25): # For now, assume they're reverting to the pre-2012 rules of permanent UT +02. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Libya 1951 only - Oct 14 2:00 1:00 S Rule Libya 1952 only - Jan 1 0:00 0 - Rule Libya 1953 only - Oct 9 2:00 1:00 S @@ -624,7 +717,7 @@ Zone Africa/Tripoli 0:52:44 - LMT 1920 # "The trial ended on March 29, 2009, when the clocks moved back by one hour # at 2am (or 02:00) local time..." -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Mauritius 1982 only - Oct 10 0:00 1:00 - Rule Mauritius 1983 only - Mar 21 0:00 0 - Rule Mauritius 2008 only - Oct lastSun 2:00 1:00 - @@ -875,17 +968,30 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis # https://maroc-diplomatique.net/maroc-le-retour-a-lheure-gmt-est-prevu-dimanche-prochain/ # http://aujourdhui.ma/actualite/gmt1-retour-a-lheure-normale-dimanche-prochain-1 # -# From Paul Eggert (2020-04-14): +# From Milamber (2020-05-31) +# In Morocco (where I live), the end of Ramadan (Arabic month) is followed by +# the Eid al-Fitr, and concretely it's 1 or 2 day offs for the people (with +# traditional visiting of family, big lunches/dinners, etc.). So for this +# year the astronomical calculations don't include the following 2 days off in +# the calc. These 2 days fall in a Sunday/Monday, so it's not acceptable by +# people to have a time shift during these 2 days off. Perhaps you can modify +# the (predicted) rules for next years: if the end of Ramadan is a (probable) +# Friday or Saturday (and so the 2 days off are on a weekend), the next time +# shift will be the next weekend. +# +# From Paul Eggert (2020-05-31): # For now, guess that in the future Morocco will fall back at 03:00 # the last Sunday before Ramadan, and spring forward at 02:00 the -# first Sunday after the day after Ramadan. To implement this, -# transition dates for 2021 through 2087 were determined by running -# the following program under GNU Emacs 26.3. -# (let ((islamic-year 1442)) +# first Sunday after two days after Ramadan. To implement this, +# transition dates and times for 2019 through 2087 were determined by +# running the following program under GNU Emacs 26.3. (This algorithm +# also produces the correct transition dates for 2016 through 2018, +# though the times differ due to Morocco's time zone change in 2018.) +# (let ((islamic-year 1440)) # (require 'cal-islam) # (while (< islamic-year 1511) # (let ((a (calendar-islamic-to-absolute (list 9 1 islamic-year))) -# (b (1+ (calendar-islamic-to-absolute (list 10 1 islamic-year)))) +# (b (+ 2 (calendar-islamic-to-absolute (list 10 1 islamic-year)))) # (sunday 0)) # (while (/= sunday (mod (setq a (1- a)) 7))) # (while (/= sunday (mod b 7)) @@ -900,7 +1006,7 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis # (car (cdr (cdr b))) (calendar-month-name (car b) t) (car (cdr b))))) # (setq islamic-year (+ 1 islamic-year)))) -# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Morocco 1939 only - Sep 12 0:00 1:00 - Rule Morocco 1939 only - Nov 19 0:00 0 - Rule Morocco 1940 only - Feb 25 0:00 1:00 - @@ -951,7 +1057,7 @@ Rule Morocco 2021 only - May 16 2:00 0 - Rule Morocco 2022 only - Mar 27 3:00 -1:00 - Rule Morocco 2022 only - May 8 2:00 0 - Rule Morocco 2023 only - Mar 19 3:00 -1:00 - -Rule Morocco 2023 only - Apr 23 2:00 0 - +Rule Morocco 2023 only - Apr 30 2:00 0 - Rule Morocco 2024 only - Mar 10 3:00 -1:00 - Rule Morocco 2024 only - Apr 14 2:00 0 - Rule Morocco 2025 only - Feb 23 3:00 -1:00 - @@ -967,7 +1073,7 @@ Rule Morocco 2029 only - Feb 18 2:00 0 - Rule Morocco 2029 only - Dec 30 3:00 -1:00 - Rule Morocco 2030 only - Feb 10 2:00 0 - Rule Morocco 2030 only - Dec 22 3:00 -1:00 - -Rule Morocco 2031 only - Jan 26 2:00 0 - +Rule Morocco 2031 only - Feb 2 2:00 0 - Rule Morocco 2031 only - Dec 14 3:00 -1:00 - Rule Morocco 2032 only - Jan 18 2:00 0 - Rule Morocco 2032 only - Nov 28 3:00 -1:00 - @@ -983,7 +1089,7 @@ Rule Morocco 2036 only - Nov 23 2:00 0 - Rule Morocco 2037 only - Oct 4 3:00 -1:00 - Rule Morocco 2037 only - Nov 15 2:00 0 - Rule Morocco 2038 only - Sep 26 3:00 -1:00 - -Rule Morocco 2038 only - Oct 31 2:00 0 - +Rule Morocco 2038 only - Nov 7 2:00 0 - Rule Morocco 2039 only - Sep 18 3:00 -1:00 - Rule Morocco 2039 only - Oct 23 2:00 0 - Rule Morocco 2040 only - Sep 2 3:00 -1:00 - @@ -999,7 +1105,7 @@ Rule Morocco 2044 only - Aug 28 2:00 0 - Rule Morocco 2045 only - Jul 9 3:00 -1:00 - Rule Morocco 2045 only - Aug 20 2:00 0 - Rule Morocco 2046 only - Jul 1 3:00 -1:00 - -Rule Morocco 2046 only - Aug 5 2:00 0 - +Rule Morocco 2046 only - Aug 12 2:00 0 - Rule Morocco 2047 only - Jun 23 3:00 -1:00 - Rule Morocco 2047 only - Jul 28 2:00 0 - Rule Morocco 2048 only - Jun 7 3:00 -1:00 - @@ -1015,7 +1121,7 @@ Rule Morocco 2052 only - Jun 2 2:00 0 - Rule Morocco 2053 only - Apr 13 3:00 -1:00 - Rule Morocco 2053 only - May 25 2:00 0 - Rule Morocco 2054 only - Apr 5 3:00 -1:00 - -Rule Morocco 2054 only - May 10 2:00 0 - +Rule Morocco 2054 only - May 17 2:00 0 - Rule Morocco 2055 only - Mar 28 3:00 -1:00 - Rule Morocco 2055 only - May 2 2:00 0 - Rule Morocco 2056 only - Mar 12 3:00 -1:00 - @@ -1031,7 +1137,7 @@ Rule Morocco 2060 only - Mar 7 2:00 0 - Rule Morocco 2061 only - Jan 16 3:00 -1:00 - Rule Morocco 2061 only - Feb 27 2:00 0 - Rule Morocco 2062 only - Jan 8 3:00 -1:00 - -Rule Morocco 2062 only - Feb 12 2:00 0 - +Rule Morocco 2062 only - Feb 19 2:00 0 - Rule Morocco 2062 only - Dec 31 3:00 -1:00 - Rule Morocco 2063 only - Feb 4 2:00 0 - Rule Morocco 2063 only - Dec 16 3:00 -1:00 - @@ -1047,7 +1153,7 @@ Rule Morocco 2067 only - Dec 11 2:00 0 - Rule Morocco 2068 only - Oct 21 3:00 -1:00 - Rule Morocco 2068 only - Dec 2 2:00 0 - Rule Morocco 2069 only - Oct 13 3:00 -1:00 - -Rule Morocco 2069 only - Nov 17 2:00 0 - +Rule Morocco 2069 only - Nov 24 2:00 0 - Rule Morocco 2070 only - Oct 5 3:00 -1:00 - Rule Morocco 2070 only - Nov 9 2:00 0 - Rule Morocco 2071 only - Sep 20 3:00 -1:00 - @@ -1063,7 +1169,7 @@ Rule Morocco 2075 only - Sep 15 2:00 0 - Rule Morocco 2076 only - Jul 26 3:00 -1:00 - Rule Morocco 2076 only - Sep 6 2:00 0 - Rule Morocco 2077 only - Jul 18 3:00 -1:00 - -Rule Morocco 2077 only - Aug 22 2:00 0 - +Rule Morocco 2077 only - Aug 29 2:00 0 - Rule Morocco 2078 only - Jul 10 3:00 -1:00 - Rule Morocco 2078 only - Aug 14 2:00 0 - Rule Morocco 2079 only - Jun 25 3:00 -1:00 - @@ -1073,13 +1179,13 @@ Rule Morocco 2080 only - Jul 21 2:00 0 - Rule Morocco 2081 only - Jun 1 3:00 -1:00 - Rule Morocco 2081 only - Jul 13 2:00 0 - Rule Morocco 2082 only - May 24 3:00 -1:00 - -Rule Morocco 2082 only - Jun 28 2:00 0 - +Rule Morocco 2082 only - Jul 5 2:00 0 - Rule Morocco 2083 only - May 16 3:00 -1:00 - Rule Morocco 2083 only - Jun 20 2:00 0 - Rule Morocco 2084 only - Apr 30 3:00 -1:00 - Rule Morocco 2084 only - Jun 11 2:00 0 - Rule Morocco 2085 only - Apr 22 3:00 -1:00 - -Rule Morocco 2085 only - May 27 2:00 0 - +Rule Morocco 2085 only - Jun 3 2:00 0 - Rule Morocco 2086 only - Apr 14 3:00 -1:00 - Rule Morocco 2086 only - May 19 2:00 0 - Rule Morocco 2087 only - Mar 30 3:00 -1:00 - @@ -1180,7 +1286,7 @@ Link Africa/Maputo Africa/Lusaka # Zambia # Use plain "WAT" and "CAT" for the time zone abbreviations, to be compatible # with Namibia's neighbors. -# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Vanguard section, for zic and other parsers that support negative DST. Rule Namibia 1994 only - Mar 21 0:00 -1:00 WAT Rule Namibia 1994 2017 - Sep Sun>=1 2:00 0 CAT @@ -1212,8 +1318,69 @@ Zone Africa/Windhoek 1:08:24 - LMT 1892 Feb 8 # See Africa/Lagos. # Nigeria + +# From P Chan (2020-12-03): +# GMT was adopted as the standard time of Lagos on 1905-07-01. +# Lagos Weekly Record, 1905-06-24, p 3 +# http://ddsnext.crl.edu/titles/31558#?c=0&m=668&s=0&cv=2&r=0&xywh=1446%2C5221%2C1931%2C1235 +# says "It is officially notified that on and after the 1st of July 1905 +# Greenwich Mean Solar Time will be adopted thought the Colony and +# Protectorate, and that it will be necessary to put all clocks 13 minutes and +# 35 seconds back, recording local mean time." +# +# It seemed that Lagos returned to LMT on 1908-07-01. +# [The Lagos Standard], 1908-07-01, p 5 +# http://ddsnext.crl.edu/titles/31556#?c=0&m=78&s=0&cv=4&r=0&xywh=-92%2C3590%2C3944%2C2523 +# says "Scarcely have the people become accustomed to this new time, when +# another official notice has now appeared announcing that from and after the +# 1st July next, return will be made to local mean time." +# +# From P Chan (2020-11-27): +# On 1914-01-01, standard time of GMT+0:30 was adopted for the unified Nigeria. +# Colonial Reports - Annual. No. 878. Nigeria. Report for 1914. (April 1916), +# p 27 +# https://libsysdigi.library.illinois.edu/ilharvest/Africana/Books2011-05/3064634/3064634_1914/3064634_1914_opt.pdf#page=27 +# "On January 1st [1914], a universal standard time for Nigeria was adopted, +# viz., half an hour fast on Greenwich mean time, corresponding to the meridian +# 7 [degrees] 30' E. long." +# Lloyd's Register of Shipping (1915) says "Hitherto the time observed in Lagos +# was the local mean time. On 1st January, 1914, standard time for the whole of +# Nigeria was introduced ... Lagos time has been advanced about 16 minutes +# accordingly." +# +# In 1919, standard time was changed to GMT+1. +# Interpretation Ordinance (Cap 2) +# The Laws of Nigeria, Containing the Ordinances of Nigeria, in Force on the +# 1st Day of January, 1923, Vol.I [p 16] +# https://books.google.com/books?id=BOMrAQAAMAAJ&pg=PA16 +# "The expression 'Standard time' means standard time as used in Nigeria: +# namely, 60 minutes in advance of Greenwich mean time. (As amended by 18 of +# 1919, s. 2.)" +# From Tim Parenti (2020-12-10): +# The Lagos Weekly Record, 1919-09-20, p 3 details discussion on the first +# reading of this Bill by the Legislative Council of the Colony of Nigeria on +# Thursday 1919-08-28: +# http://ddsnext.crl.edu/titles/31558?terms&item_id=303484#?m=1118&c=1&s=0&cv=2&r=0&xywh=1261%2C3408%2C2994%2C1915 +# "The proposal is that the Globe should be divided into twelve zones East and +# West of Greenwich, of one hour each, Nigeria falling into the zone with a +# standard of one hour fast on Greenwich Mean Time. Nigeria standard time is +# now 30 minutes in advance of Greenwich Mean Time ... according to the new +# proposal, standard time will be advanced another 30 minutes". It was further +# proposed that the firing of the time guns likewise be adjusted by 30 minutes +# to compensate. +# From Tim Parenti (2020-12-10), per P Chan (2020-12-11): +# The text of Ordinance 18 of 1919, published in Nigeria Gazette, Vol 6, No 52, +# shows that the change was assented to the following day and took effect "on +# the 1st day of September, 1919." +# Nigeria Gazette and Supplements 1919 Jan-Dec, Reference: 73266B-40, +# img 245-246 +# https://microform.digital/boa/collections/77/volumes/539/nigeria-lagos-1887-1919 + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Africa/Lagos 0:13:36 - LMT 1919 Sep +Zone Africa/Lagos 0:13:35 - LMT 1905 Jul 1 + 0:00 - GMT 1908 Jul 1 + 0:13:35 - LMT 1914 Jan 1 + 0:30 - +0030 1919 Sep 1 1:00 - WAT Link Africa/Lagos Africa/Bangui # Central African Republic Link Africa/Lagos Africa/Brazzaville # Rep. of the Congo @@ -1286,8 +1453,21 @@ Zone Africa/Sao_Tome 0:26:56 - LMT 1884 # See Africa/Abidjan. # Seychelles + +# From P Chan (2020-11-27): +# Standard Time was adopted on 1907-01-01. +# +# Standard Time Ordinance (Chapter 237) +# The Laws of Seychelles in Force on the 31st December, 1971, Vol. 6, p 571 +# https://books.google.com/books?id=efE-AQAAIAAJ&pg=PA571 +# +# From Tim Parenti (2020-12-05): +# A footnote on https://books.google.com/books?id=DYdDAQAAMAAJ&pg=PA1689 +# confirms that Ordinance No. 9 of 1906 "was brought into force on the 1st +# January, 1907." + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Indian/Mahe 3:41:48 - LMT 1906 Jun # Victoria +Zone Indian/Mahe 3:41:48 - LMT 1907 Jan 1 # Victoria 4:00 - +04 # From Paul Eggert (2001-05-30): # Aldabra, Farquhar, and Desroches, originally dependencies of the @@ -1303,7 +1483,7 @@ Zone Indian/Mahe 3:41:48 - LMT 1906 Jun # Victoria # See Africa/Nairobi. # South Africa -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule SA 1942 1943 - Sep Sun>=15 2:00 1:00 - Rule SA 1943 1944 - Mar Sun>=15 2:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1336,7 +1516,7 @@ Link Africa/Johannesburg Africa/Mbabane # Eswatini # Abdalla of NTC, archived at: # https://mm.icann.org/pipermail/tz/2017-October/025333.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Sudan 1970 only - May 1 0:00 1:00 S Rule Sudan 1970 1985 - Oct 15 0:00 0 - Rule Sudan 1971 only - Apr 30 0:00 1:00 S @@ -1347,11 +1527,17 @@ Zone Africa/Khartoum 2:10:08 - LMT 1931 3:00 - EAT 2017 Nov 1 2:00 - CAT +# From Steffen Thorsen (2021-01-18): +# "South Sudan will change its time zone by setting the clock back 1 +# hour on February 1, 2021...." +# from https://eyeradio.org/south-sudan-adopts-new-time-zone-makuei/ + # South Sudan # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Africa/Juba 2:06:28 - LMT 1931 2:00 Sudan CA%sT 2000 Jan 15 12:00 - 3:00 - EAT + 3:00 - EAT 2021 Feb 1 00:00 + 2:00 - CAT # Tanzania # See Africa/Nairobi. @@ -1424,7 +1610,7 @@ Zone Africa/Juba 2:06:28 - LMT 1931 # http://www.almadenahnews.com/newss/news.php?c=118&id=38036 # http://www.worldtimezone.com/dst_news/dst_news_tunis02.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Tunisia 1939 only - Apr 15 23:00s 1:00 S Rule Tunisia 1939 only - Nov 18 23:00s 0 - Rule Tunisia 1940 only - Feb 25 23:00s 1:00 S @@ -1451,9 +1637,7 @@ Rule Tunisia 2005 only - Sep 30 1:00s 0 - Rule Tunisia 2006 2008 - Mar lastSun 2:00s 1:00 S Rule Tunisia 2006 2008 - Oct lastSun 2:00s 0 - -# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's -# more precise 0:09:21. -# Shanks & Pottenger say the 1911 switch was on Mar 9; go with Howse's Mar 11. +# See Europe/Paris for PMT-related transitions. # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Africa/Tunis 0:40:44 - LMT 1881 May 12 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time diff --git a/usr/src/data/zoneinfo/antarctica b/usr/src/data/zoneinfo/antarctica index 205998381d..ed750a8992 100644 --- a/usr/src/data/zoneinfo/antarctica +++ b/usr/src/data/zoneinfo/antarctica @@ -70,15 +70,30 @@ # Australian Antarctica Division informed us that Casey changed time # zone to UTC+11 in "the morning of 22nd October 2016". +# From Steffen Thorsen (2020-10-02, as corrected): +# Based on information we have received from the Australian Antarctic +# Division, Casey station and Macquarie Island station will move to Tasmanian +# daylight savings time on Sunday 4 October. This will take effect from 0001 +# hrs on Sunday 4 October 2020 and will mean Casey and Macquarie Island will +# be on the same time zone as Hobart. Some past dates too for this 3 hour +# time change back and forth between UTC+8 and UTC+11 for Casey: +# - 2018 Oct 7 4:00 - 2019 Mar 17 3:00 - 2019 Oct 4 3:00 - 2020 Mar 8 3:00 +# and now - 2020 Oct 4 0:01 + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Antarctica/Casey 0 - -00 1969 - 8:00 - +08 2009 Oct 18 2:00 +Zone Antarctica/Casey 0 - -00 1969 + 8:00 - +08 2009 Oct 18 2:00 11:00 - +11 2010 Mar 5 2:00 - 8:00 - +08 2011 Oct 28 2:00 + 8:00 - +08 2011 Oct 28 2:00 11:00 - +11 2012 Feb 21 17:00u - 8:00 - +08 2016 Oct 22 + 8:00 - +08 2016 Oct 22 11:00 - +11 2018 Mar 11 4:00 - 8:00 - +08 + 8:00 - +08 2018 Oct 7 4:00 + 11:00 - +11 2019 Mar 17 3:00 + 8:00 - +08 2019 Oct 4 3:00 + 11:00 - +11 2020 Mar 8 3:00 + 8:00 - +08 2020 Oct 4 0:01 + 11:00 - +11 Zone Antarctica/Davis 0 - -00 1957 Jan 13 7:00 - +07 1964 Nov 0 - -00 1969 Feb @@ -224,7 +239,7 @@ Zone Antarctica/Syowa 0 - -00 1957 Jan 29 # suggested by Bengt-Inge Larsson comment them out for now, and approximate # with only UTC and CEST. Uncomment them when 2014b is more prevalent. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S #Rule Troll 2005 max - Mar 1 1:00u 1:00 +01 Rule Troll 2005 max - Mar lastSun 1:00u 2:00 +02 #Rule Troll 2005 max - Oct lastSun 1:00u 1:00 +01 diff --git a/usr/src/data/zoneinfo/asia b/usr/src/data/zoneinfo/asia index 106efad877..ed944130e9 100644 --- a/usr/src/data/zoneinfo/asia +++ b/usr/src/data/zoneinfo/asia @@ -70,7 +70,7 @@ ############################################################################### # These rules are stolen from the 'europe' file. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule EUAsia 1981 max - Mar lastSun 1:00u 1:00 S Rule EUAsia 1979 1995 - Sep lastSun 1:00u 0 - Rule EUAsia 1996 max - Oct lastSun 1:00u 0 - @@ -114,7 +114,7 @@ Zone Asia/Kabul 4:36:48 - LMT 1890 # or # (brief) # http://www.worldtimezone.com/dst_news/dst_news_armenia03.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Armenia 2011 only - Mar lastSun 2:00s 1:00 - Rule Armenia 2011 only - Oct lastSun 2:00s 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -140,7 +140,7 @@ Zone Asia/Yerevan 2:58:00 - LMT 1924 May 2 # http://vestnikkavkaza.net/news/Azerbaijani-Cabinet-of-Ministers-cancels-daylight-saving-time.html # http://en.apa.az/xeber_azerbaijan_abolishes_daylight_savings_ti_240862.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Azer 1997 2015 - Mar lastSun 4:00 1:00 - Rule Azer 1997 2015 - Oct lastSun 5:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -227,7 +227,7 @@ Zone Asia/Baku 3:19:24 - LMT 1924 May 2 # http://www.thedailystar.net/newDesign/latest_news.php?nid=22817 # http://www.worldtimezone.com/dst_news/dst_news_bangladesh06.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Dhaka 2009 only - Jun 19 23:00 1:00 - Rule Dhaka 2009 only - Dec 31 24:00 0 - @@ -303,7 +303,7 @@ Zone Asia/Yangon 6:24:47 - LMT 1880 # or Rangoon # generally esteemed a success, it was announced early in 1920 that it would # not be repeated." # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Shang 1919 only - Apr 12 24:00 1:00 D Rule Shang 1919 only - Sep 30 24:00 0 S @@ -399,7 +399,7 @@ Rule Shang 1919 only - Sep 30 24:00 0 S # the Yangtze river delta area during that period of time although the scope # of such use will need to be investigated to determine. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Shang 1940 only - Jun 1 0:00 1:00 D Rule Shang 1940 only - Oct 12 24:00 0 S Rule Shang 1941 only - Mar 15 0:00 1:00 D @@ -462,7 +462,7 @@ Rule Shang 1948 1949 - Sep 30 24:00 0 S #plan # to begin on 17 April. # http://data.people.com.cn/pic/101p/1988/04/1988041201.jpg -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule PRC 1986 only - May 4 2:00 1:00 D Rule PRC 1986 1991 - Sep Sun>=11 2:00 0 S Rule PRC 1987 1991 - Apr Sun>=11 2:00 1:00 D @@ -846,7 +846,7 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928 # or dates for the 1942 and 1945 transitions. # The Japanese occupation of Hong Kong began 1941-12-25. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule HK 1946 only - Apr 21 0:00 1:00 S Rule HK 1946 only - Dec 1 3:30s 0 - Rule HK 1947 only - Apr 13 3:30s 1:00 S @@ -973,7 +973,7 @@ Zone Asia/Hong_Kong 7:36:42 - LMT 1904 Oct 30 0:36:42 # until 1945-09-21 at 01:00, overriding Shanks & Pottenger. # Likewise, use Yu-Cheng Chuang's data for DST in Taiwan. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Taiwan 1946 only - May 15 0:00 1:00 D Rule Taiwan 1946 only - Oct 1 0:00 0 S Rule Taiwan 1947 only - Apr 15 0:00 1:00 D @@ -1099,7 +1099,7 @@ Zone Asia/Taipei 8:06:00 - LMT 1896 Jan 1 # The 1904 decree says that Macau changed from the meridian of # Fortaleza do Monte, presumably the basis for the 7:34:10 for LMT. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Macau 1942 1943 - Apr 30 23:00 1:00 - Rule Macau 1942 only - Nov 17 23:00 0 - Rule Macau 1943 only - Sep 30 23:00 0 S @@ -1157,7 +1157,7 @@ Zone Asia/Macau 7:34:10 - LMT 1904 Oct 30 # Cyprus to remain united in time. Cyprus Mail 2017-10-17. # https://cyprus-mail.com/2017/10/17/cyprus-remain-united-time/ -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cyprus 1975 only - Apr 13 0:00 1:00 S Rule Cyprus 1975 only - Oct 12 0:00 0 - Rule Cyprus 1976 only - May 15 0:00 1:00 S @@ -1534,7 +1534,7 @@ Zone Asia/Jayapura 9:22:48 - LMT 1932 Nov # be changed back to its previous state on the 24 hours of the # thirtieth day of Shahrivar. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Iran 1978 1980 - Mar 20 24:00 1:00 - Rule Iran 1978 only - Oct 20 24:00 0 - Rule Iran 1979 only - Sep 18 24:00 0 - @@ -1676,7 +1676,7 @@ Zone Asia/Tehran 3:25:44 - LMT 1916 # We have published a short article in English about the change: # https://www.timeanddate.com/news/time/iraq-dumps-daylight-saving.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Iraq 1982 only - May 1 0:00 1:00 - Rule Iraq 1982 1984 - Oct 1 0:00 0 - Rule Iraq 1983 only - Mar 31 0:00 1:00 - @@ -1699,6 +1699,10 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890 # Israel +# For more info about the motivation for DST in Israel, see: +# Barak Y. Israel's Daylight Saving Time controversy. Israel Affairs. +# 2020-08-11. https://doi.org/10.1080/13537121.2020.1806564 + # From Ephraim Silverberg (2001-01-11): # # I coined "IST/IDT" circa 1988. Until then there were three @@ -1719,40 +1723,180 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890 # high on my favorite-country list (and not only because my wife's # family is from India). -# From Shanks & Pottenger: -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Zion 1940 only - Jun 1 0:00 1:00 D -Rule Zion 1942 1944 - Nov 1 0:00 0 S -Rule Zion 1943 only - Apr 1 2:00 1:00 D -Rule Zion 1944 only - Apr 1 0:00 1:00 D -Rule Zion 1945 only - Apr 16 0:00 1:00 D -Rule Zion 1945 only - Nov 1 2:00 0 S -Rule Zion 1946 only - Apr 16 2:00 1:00 D -Rule Zion 1946 only - Nov 1 0:00 0 S -Rule Zion 1948 only - May 23 0:00 2:00 DD -Rule Zion 1948 only - Sep 1 0:00 1:00 D -Rule Zion 1948 1949 - Nov 1 2:00 0 S -Rule Zion 1949 only - May 1 0:00 1:00 D -Rule Zion 1950 only - Apr 16 0:00 1:00 D -Rule Zion 1950 only - Sep 15 3:00 0 S -Rule Zion 1951 only - Apr 1 0:00 1:00 D -Rule Zion 1951 only - Nov 11 3:00 0 S -Rule Zion 1952 only - Apr 20 2:00 1:00 D -Rule Zion 1952 only - Oct 19 3:00 0 S -Rule Zion 1953 only - Apr 12 2:00 1:00 D -Rule Zion 1953 only - Sep 13 3:00 0 S -Rule Zion 1954 only - Jun 13 0:00 1:00 D -Rule Zion 1954 only - Sep 12 0:00 0 S -Rule Zion 1955 only - Jun 11 2:00 1:00 D -Rule Zion 1955 only - Sep 11 0:00 0 S -Rule Zion 1956 only - Jun 3 0:00 1:00 D -Rule Zion 1956 only - Sep 30 3:00 0 S -Rule Zion 1957 only - Apr 29 2:00 1:00 D -Rule Zion 1957 only - Sep 22 0:00 0 S -Rule Zion 1974 only - Jul 7 0:00 1:00 D -Rule Zion 1974 only - Oct 13 0:00 0 S -Rule Zion 1975 only - Apr 20 0:00 1:00 D -Rule Zion 1975 only - Aug 31 0:00 0 S +# From P Chan (2020-10-27), with corrections: +# +# 1940-1946 Supplement No. 2 to the Palestine Gazette +# # issue page Order No. dated start end note +# 1 1010 729 67 of 1940 1940-05-22 1940-05-31* 1940-09-30* revoked by #2 +# 2 1013 758 73 of 1940 1940-05-31 1940-05-31 1940-09-30 +# 3 1055 1574 196 of 1940 1940-11-06 1940-11-16 1940-12-31 +# 4 1066 1811 208 of 1940 1940-12-17 1940-12-31 1941-12-31 +# 5 1156 1967 116 of 1941 1941-12-16 1941-12-31 1942-12-31* amended by #6 +# 6 1228 1608 86 of 1942 1942-10-14 1941-12-31 1942-10-31 +# 7 1256 279 21 of 1943 1943-03-18 1943-03-31 1943-10-31 +# 8 1323 249 19 of 1944 1944-03-13 1944-03-31 1944-10-31 +# 9 1402 328 20 of 1945 1945-04-05 1945-04-15 1945-10-31 +#10 1487 596 14 of 1946 1946-04-04 1946-04-15 1946-10-31 +# +# 1948 Iton Rishmi (Official Gazette of the Provisional Government) +# # issue page dated start end +#11 2 7 1948-05-20 1948-05-22 1948-10-31* +# ^This moved timezone to +04, replaced by #12 from 1948-08-31 24:00 GMT. +#12 17 (Annex B) 84 1948-08-22 1948-08-31 1948-10-31 +# +# 1949-2000 Kovetz HaTakanot (Collection of Regulations) +# # issue page dated start end note +#13 6 133 1949-03-23 1949-04-30 1949-10-31 +#14 80 755 1950-03-17 1950-04-15 1950-09-14 +#15 164 782 1951-03-22 1951-03-31 1951-09-29* amended by #16 +#16 206 1940 1951-09-23 ---------- 1951-10-22* amended by #17 +#17 212 78 1951-10-19 ---------- 1951-11-10 +#18 254 652 1952-03-03 1952-04-19 1952-09-27* amended by #19 +#19 300 11 1952-09-15 ---------- 1952-10-18 +#20 348 817 1953-03-03 1953-04-11 1953-09-12 +#21 420 385 1954-02-17 1954-06-12 1954-09-11 +#22 497 548 1955-01-14 1955-06-11 1955-09-10 +#23 591 608 1956-03-12 1956-06-02 1956-09-29 +#24 680 957 1957-02-08 1957-04-27 1957-09-21 +#25 3192 1418 1974-06-28 1974-07-06 1974-10-12 +#26 3322 1389 1975-04-03 1975-04-19 1975-08-30 +#27 4146 2089 1980-07-15 1980-08-02 1980-09-13 +#28 4604 1081 1984-02-22 1984-05-05* 1984-08-25* revoked by #29 +#29 4619 1312 1984-04-06 1984-05-05 1984-08-25 +#30 4744 475 1984-12-23 1985-04-13 1985-09-14* amended by #31 +#31 4851 1848 1985-08-18 ---------- 1985-08-31 +#32 4932 899 1986-04-22 1986-05-17 1986-09-06 +#33 5013 580 1987-02-15 1987-04-18* 1987-08-22* revoked by #34 +#34 5021 744 1987-03-30 1987-04-14 1987-09-12 +#35 5096 659 1988-02-14 1988-04-09 1988-09-03 +#36 5167 514 1989-02-03 1989-04-29 1989-09-02 +#37 5248 375 1990-01-23 1990-03-24 1990-08-25 +#38 5335 612 1991-02-10 1991-03-09* 1991-08-31 amended by #39 +# 1992-03-28 1992-09-05 +#39 5339 709 1991-03-04 1991-03-23 ---------- +#40 5506 503 1993-02-18 1993-04-02 1993-09-05 +# 1994-04-01 1994-08-28 +# 1995-03-31 1995-09-03 +#41 5731 438 1996-01-01 1996-03-14 1996-09-15 +# 1997-03-13* 1997-09-18* overridden by 1997 Temp Prov +# 1998-03-19* 1998-09-17* revoked by #42 +#42 5853 1243 1997-09-18 1998-03-19 1998-09-05 +#43 5937 77 1998-10-18 1999-04-02 1999-09-03 +# 2000-04-14* 2000-09-15* revoked by #44 +# 2001-04-13* 2001-09-14* revoked by #44 +#44 6024 39 2000-03-14 2000-04-14 2000-10-22* overridden by 2000 Temp Prov +# 2001-04-06* 2001-10-10* overridden by 2000 Temp Prov +# 2002-03-29* 2002-10-29* overridden by 2000 Temp Prov +# +# These are laws enacted by the Knesset since the Minister could only alter the +# transition dates at least six months in advanced under the 1992 Law. +# dated start end +# 1997 Temporary Provisions 1997-03-06 1997-03-20 1997-09-13 +# 2000 Temporary Provisions 2000-07-28 ---------- 2000-10-06 +# 2001-04-09 2001-09-24 +# 2002-03-29 2002-10-07 +# 2003-03-28 2003-10-03 +# 2004-04-07 2004-09-22 +# Note: +# Transition times in 1940-1957 (#1-#24) were midnight GMT, +# in 1974-1998 (#25-#42 and the 1997 Temporary Provisions) were midnight, +# in 1999-April 2000 (#43,#44) were 02:00, +# in the 2000 Temporary Provisions were 01:00. +# +# ----------------------------------------------------------------------------- +# Links: +# 1 https://findit.library.yale.edu/images_layout/view?parentoid=15537490&increment=687 +# 2 https://findit.library.yale.edu/images_layout/view?parentoid=15537490&increment=716 +# 3 https://findit.library.yale.edu/images_layout/view?parentoid=15537491&increment=721 +# 4 https://findit.library.yale.edu/images_layout/view?parentoid=15537491&increment=958 +# 5 https://findit.library.yale.edu/images_layout/view?parentoid=15537502&increment=558 +# 6 https://findit.library.yale.edu/images_layout/view?parentoid=15537511&increment=105 +# 7 https://findit.library.yale.edu/images_layout/view?parentoid=15537516&increment=278 +# 8 https://findit.library.yale.edu/images_layout/view?parentoid=15537522&increment=248 +# 9 https://findit.library.yale.edu/images_layout/view?parentoid=15537530&increment=329 +#10 https://findit.library.yale.edu/images_layout/view?parentoid=15537537&increment=601 +#11 https://www.nevo.co.il/law_word/law12/er-002.pdf#page=3 +#12 https://www.nevo.co.il/law_word/law12/er-017-t2.pdf#page=4 +#13 https://www.nevo.co.il/law_word/law06/tak-0006.pdf#page=3 +#14 https://www.nevo.co.il/law_word/law06/tak-0080.pdf#page=7 +#15 https://www.nevo.co.il/law_word/law06/tak-0164.pdf#page=10 +#16 https://www.nevo.co.il/law_word/law06/tak-0206.pdf#page=4 +#17 https://www.nevo.co.il/law_word/law06/tak-0212.pdf#page=2 +#18 https://www.nevo.co.il/law_word/law06/tak-0254.pdf#page=4 +#19 https://www.nevo.co.il/law_word/law06/tak-0300.pdf#page=5 +#20 https://www.nevo.co.il/law_word/law06/tak-0348.pdf#page=3 +#21 https://www.nevo.co.il/law_word/law06/tak-0420.pdf#page=5 +#22 https://www.nevo.co.il/law_word/law06/tak-0497.pdf#page=10 +#23 https://www.nevo.co.il/law_word/law06/tak-0591.pdf#page=6 +#24 https://www.nevo.co.il/law_word/law06/tak-0680.pdf#page=3 +#25 https://www.nevo.co.il/law_word/law06/tak-3192.pdf#page=2 +#26 https://www.nevo.co.il/law_word/law06/tak-3322.pdf#page=5 +#27 https://www.nevo.co.il/law_word/law06/tak-4146.pdf#page=2 +#28 https://www.nevo.co.il/law_word/law06/tak-4604.pdf#page=7 +#29 https://www.nevo.co.il/law_word/law06/tak-4619.pdf#page=2 +#30 https://www.nevo.co.il/law_word/law06/tak-4744.pdf#page=11 +#31 https://www.nevo.co.il/law_word/law06/tak-4851.pdf#page=2 +#32 https://www.nevo.co.il/law_word/law06/tak-4932.pdf#page=19 +#33 https://www.nevo.co.il/law_word/law06/tak-5013.pdf#page=8 +#34 https://www.nevo.co.il/law_word/law06/tak-5021.pdf#page=8 +#35 https://www.nevo.co.il/law_word/law06/tak-5096.pdf#page=3 +#36 https://www.nevo.co.il/law_word/law06/tak-5167.pdf#page=2 +#37 https://www.nevo.co.il/law_word/law06/tak-5248.pdf#page=7 +#38 https://www.nevo.co.il/law_word/law06/tak-5335.pdf#page=6 +#39 https://www.nevo.co.il/law_word/law06/tak-5339.pdf#page=7 +#40 https://www.nevo.co.il/law_word/law06/tak-5506.pdf#page=19 +#41 https://www.nevo.co.il/law_word/law06/tak-5731.pdf#page=2 +#42 https://www.nevo.co.il/law_word/law06/tak-5853.pdf#page=3 +#43 https://www.nevo.co.il/law_word/law06/tak-5937.pdf#page=9 +#44 https://www.nevo.co.il/law_word/law06/tak-6024.pdf#page=4 +# +# Time Determination (Temporary Provisions) Law, 1997 +# https://www.nevo.co.il/law_html/law19/p201_003.htm +# +# Time Determination (Temporary Provisions) Law, 2000 +# https://www.nevo.co.il/law_html/law19/p201_004.htm +# +# Time Determination Law, 1992 and amendments +# https://www.nevo.co.il/law_html/law01/p201_002.htm +# https://main.knesset.gov.il/Activity/Legislation/Laws/Pages/LawPrimary.aspx?lawitemid=2001174 + +# From Paul Eggert (2020-10-27): +# Several of the midnight transitions mentioned above are ambiguous; +# are they 00:00, 00:00s, 24:00, or 24:00s? When resolving these ambiguities, +# try to minimize changes from previous tzdb versions, for lack of better info. +# Commentary from previous versions is included below, to help explain this. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Zion 1940 only - May 31 24:00u 1:00 D +Rule Zion 1940 only - Sep 30 24:00u 0 S +Rule Zion 1940 only - Nov 16 24:00u 1:00 D +Rule Zion 1942 1946 - Oct 31 24:00u 0 S +Rule Zion 1943 1944 - Mar 31 24:00u 1:00 D +Rule Zion 1945 1946 - Apr 15 24:00u 1:00 D +Rule Zion 1948 only - May 22 24:00u 2:00 DD +Rule Zion 1948 only - Aug 31 24:00u 1:00 D +Rule Zion 1948 1949 - Oct 31 24:00u 0 S +Rule Zion 1949 only - Apr 30 24:00u 1:00 D +Rule Zion 1950 only - Apr 15 24:00u 1:00 D +Rule Zion 1950 only - Sep 14 24:00u 0 S +Rule Zion 1951 only - Mar 31 24:00u 1:00 D +Rule Zion 1951 only - Nov 10 24:00u 0 S +Rule Zion 1952 only - Apr 19 24:00u 1:00 D +Rule Zion 1952 only - Oct 18 24:00u 0 S +Rule Zion 1953 only - Apr 11 24:00u 1:00 D +Rule Zion 1953 only - Sep 12 24:00u 0 S +Rule Zion 1954 only - Jun 12 24:00u 1:00 D +Rule Zion 1954 only - Sep 11 24:00u 0 S +Rule Zion 1955 only - Jun 11 24:00u 1:00 D +Rule Zion 1955 only - Sep 10 24:00u 0 S +Rule Zion 1956 only - Jun 2 24:00u 1:00 D +Rule Zion 1956 only - Sep 29 24:00u 0 S +Rule Zion 1957 only - Apr 27 24:00u 1:00 D +Rule Zion 1957 only - Sep 21 24:00u 0 S +Rule Zion 1974 only - Jul 6 24:00 1:00 D +Rule Zion 1974 only - Oct 12 24:00 0 S +Rule Zion 1975 only - Apr 19 24:00 1:00 D +Rule Zion 1975 only - Aug 30 24:00 0 S # From Alois Treindl (2019-03-06): # http://www.moin.gov.il/Documents/שעון%20×§×™×¥/clock-50-years-7-2014.pdf @@ -1765,25 +1909,24 @@ Rule Zion 1975 only - Aug 31 0:00 0 S # From Paul Eggert (2019-03-06): # Also see this thread about the moin.gov.il URL: # https://mm.icann.org/pipermail/tz/2018-November/027194.html -Rule Zion 1980 only - Aug 2 0:00 1:00 D -Rule Zion 1980 only - Sep 13 1:00 0 S -Rule Zion 1984 only - May 5 0:00 1:00 D -Rule Zion 1984 only - Aug 25 1:00 0 S - -# From Shanks & Pottenger: -Rule Zion 1985 only - Apr 14 0:00 1:00 D -Rule Zion 1985 only - Sep 15 0:00 0 S -Rule Zion 1986 only - May 18 0:00 1:00 D -Rule Zion 1986 only - Sep 7 0:00 0 S -Rule Zion 1987 only - Apr 15 0:00 1:00 D -Rule Zion 1987 only - Sep 13 0:00 0 S +Rule Zion 1980 only - Aug 2 24:00s 1:00 D +Rule Zion 1980 only - Sep 13 24:00s 0 S +Rule Zion 1984 only - May 5 24:00s 1:00 D +Rule Zion 1984 only - Aug 25 24:00s 0 S + +Rule Zion 1985 only - Apr 13 24:00 1:00 D +Rule Zion 1985 only - Aug 31 24:00 0 S +Rule Zion 1986 only - May 17 24:00 1:00 D +Rule Zion 1986 only - Sep 6 24:00 0 S +Rule Zion 1987 only - Apr 14 24:00 1:00 D +Rule Zion 1987 only - Sep 12 24:00 0 S # From Avigdor Finkelstein (2014-03-05): # I check the Parliament (Knesset) records and there it's stated that the # [1988] transition should take place on Saturday night, when the Sabbath # ends and changes to Sunday. -Rule Zion 1988 only - Apr 10 0:00 1:00 D -Rule Zion 1988 only - Sep 4 0:00 0 S +Rule Zion 1988 only - Apr 9 24:00 1:00 D +Rule Zion 1988 only - Sep 3 24:00 0 S # From Ephraim Silverberg # (1997-03-04, 1998-03-16, 1998-12-28, 2000-01-17, 2000-07-25, 2004-12-22, @@ -1812,15 +1955,15 @@ Rule Zion 1988 only - Sep 4 0:00 0 S # (except in 2002) is three nights before Yom Kippur [Day of Atonement] # (the eve of the 7th of Tishrei in the lunar Hebrew calendar). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Zion 1989 only - Apr 30 0:00 1:00 D -Rule Zion 1989 only - Sep 3 0:00 0 S -Rule Zion 1990 only - Mar 25 0:00 1:00 D -Rule Zion 1990 only - Aug 26 0:00 0 S -Rule Zion 1991 only - Mar 24 0:00 1:00 D -Rule Zion 1991 only - Sep 1 0:00 0 S -Rule Zion 1992 only - Mar 29 0:00 1:00 D -Rule Zion 1992 only - Sep 6 0:00 0 S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Zion 1989 only - Apr 29 24:00 1:00 D +Rule Zion 1989 only - Sep 2 24:00 0 S +Rule Zion 1990 only - Mar 24 24:00 1:00 D +Rule Zion 1990 only - Aug 25 24:00 0 S +Rule Zion 1991 only - Mar 23 24:00 1:00 D +Rule Zion 1991 only - Aug 31 24:00 0 S +Rule Zion 1992 only - Mar 28 24:00 1:00 D +Rule Zion 1992 only - Sep 5 24:00 0 S Rule Zion 1993 only - Apr 2 0:00 1:00 D Rule Zion 1993 only - Sep 5 0:00 0 S @@ -1828,7 +1971,7 @@ Rule Zion 1993 only - Sep 5 0:00 0 S # Ministry of Interior, Jerusalem, Israel. The spokeswoman can be reached by # calling the office directly at 972-2-6701447 or 972-2-6701448. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 1994 only - Apr 1 0:00 1:00 D Rule Zion 1994 only - Aug 28 0:00 0 S Rule Zion 1995 only - Mar 31 0:00 1:00 D @@ -1848,11 +1991,11 @@ Rule Zion 1995 only - Sep 3 0:00 0 S # # where YYYY is the relevant year. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Zion 1996 only - Mar 15 0:00 1:00 D -Rule Zion 1996 only - Sep 16 0:00 0 S -Rule Zion 1997 only - Mar 21 0:00 1:00 D -Rule Zion 1997 only - Sep 14 0:00 0 S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Zion 1996 only - Mar 14 24:00 1:00 D +Rule Zion 1996 only - Sep 15 24:00 0 S +Rule Zion 1997 only - Mar 20 24:00 1:00 D +Rule Zion 1997 only - Sep 13 24:00 0 S Rule Zion 1998 only - Mar 20 0:00 1:00 D Rule Zion 1998 only - Sep 6 0:00 0 S Rule Zion 1999 only - Apr 2 2:00 1:00 D @@ -1871,7 +2014,7 @@ Rule Zion 1999 only - Sep 3 2:00 0 S # # ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 2000 only - Apr 14 2:00 1:00 D Rule Zion 2000 only - Oct 6 1:00 0 S Rule Zion 2001 only - Apr 9 1:00 1:00 D @@ -1893,7 +2036,7 @@ Rule Zion 2004 only - Sep 22 1:00 0 S # # ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2005+beyond.ps -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 2005 2012 - Apr Fri<=1 2:00 1:00 D Rule Zion 2005 only - Oct 9 2:00 0 S Rule Zion 2006 only - Oct 1 2:00 0 S @@ -1904,16 +2047,17 @@ Rule Zion 2010 only - Sep 12 2:00 0 S Rule Zion 2011 only - Oct 2 2:00 0 S Rule Zion 2012 only - Sep 23 2:00 0 S -# From Ephraim Silverberg (2013-06-27): -# On June 23, 2013, the Israeli government approved changes to the -# Time Decree Law. The next day, the changes passed the First Reading -# in the Knesset. The law is expected to pass the Second and Third -# (final) Readings by the beginning of September 2013. -# -# As of 2013, DST starts at 02:00 on the Friday before the last Sunday -# in March. DST ends at 02:00 on the last Sunday of October. - -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# From Ephraim Silverberg (2020-10-26): +# The current time law (2013) from the State of Israel can be viewed +# (in Hebrew) at: +# ftp://ftp.cs.huji.ac.il/pub/tz/israel/announcements/2013+law.pdf +# It translates to: +# Every year, in the period from the Friday before the last Sunday in +# the month of March at 02:00 a.m. until the last Sunday of the month +# of October at 02:00 a.m., Israel Time will be advanced an additional +# hour such that it will be UTC+3. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 2013 max - Mar Fri>=23 2:00 1:00 D Rule Zion 2013 max - Oct lastSun 2:00 0 S @@ -2013,7 +2157,7 @@ Zone Asia/Jerusalem 2:20:54 - LMT 1880 # do in any POSIX or C platform. The "25:00" assumes zic from 2007 or later, # which should be safe now. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Japan 1948 only - May Sat>=1 24:00 1:00 D Rule Japan 1948 1951 - Sep Sat>=8 25:00 0 S Rule Japan 1949 only - Apr Sat>=1 24:00 1:00 D @@ -2090,7 +2234,7 @@ Zone Asia/Tokyo 9:18:59 - LMT 1887 Dec 31 15:00u # From Paul Eggert (2013-12-11): # As Steffen suggested, consider the past 21-month experiment to be DST. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Jordan 1973 only - Jun 6 0:00 1:00 S Rule Jordan 1973 1975 - Oct 1 0:00 0 - Rule Jordan 1974 1977 - May 1 0:00 1:00 S @@ -2416,7 +2560,7 @@ Zone Asia/Oral 3:25:24 - LMT 1924 May 2 # or Ural'sk # Our government cancels daylight saving time 6th of August 2005. # From 2005-08-12 our GMT-offset is +6, w/o any daylight saving. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Kyrgyz 1992 1996 - Apr Sun>=7 0:00s 1:00 - Rule Kyrgyz 1992 1996 - Sep lastSun 0:00 0 - Rule Kyrgyz 1997 2005 - Mar lastSun 2:30 1:00 - @@ -2472,7 +2616,7 @@ Zone Asia/Bishkek 4:58:24 - LMT 1924 May 2 # follow and continued to use GMT+9:00 for interoperability. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule ROK 1948 only - Jun 1 0:00 1:00 D Rule ROK 1948 only - Sep 12 24:00 0 S Rule ROK 1949 only - Apr 3 0:00 1:00 D @@ -2560,7 +2704,7 @@ Zone Asia/Pyongyang 8:23:00 - LMT 1908 Apr 1 # Lebanon -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Lebanon 1920 only - Mar 28 0:00 1:00 S Rule Lebanon 1920 only - Oct 25 0:00 0 - Rule Lebanon 1921 only - Apr 3 0:00 1:00 S @@ -2590,7 +2734,7 @@ Zone Asia/Beirut 2:22:00 - LMT 1880 2:00 Lebanon EE%sT # Malaysia -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NBorneo 1935 1941 - Sep 14 0:00 0:20 - Rule NBorneo 1935 1941 - Dec 14 0:00 0 - # @@ -2735,7 +2879,7 @@ Zone Indian/Maldives 4:54:00 - LMT 1880 # Malé # September daylight saving time ends. Source: # http://zasag.mn/news/view/8969 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Mongol 1983 1984 - Apr 1 0:00 1:00 - Rule Mongol 1983 only - Oct 1 0:00 0 - # Shanks & Pottenger and IATA SSIM say 1990s switches occurred at 00:00, @@ -2923,7 +3067,7 @@ Zone Asia/Kathmandu 5:41:16 - LMT 1920 # "People laud PM's announcement to end DST" # http://www.app.com.pk/en_/index.php?option=com_content&task=view&id=99374&Itemid=2 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Pakistan 2002 only - Apr Sun>=2 0:00 1:00 S Rule Pakistan 2002 only - Oct Sun>=2 0:00 0 - Rule Pakistan 2008 only - Jun 1 0:00 1:00 S @@ -3217,15 +3361,42 @@ Zone Asia/Karachi 4:28:12 - LMT 1907 # From Sharef Mustafa (2019-10-18): # Palestine summer time will end on midnight Oct 26th 2019 ... -# http://www.palestinecabinet.gov.ps/website/ar/ViewDetails?ID=43948 # -# From Paul Eggert (2019-04-10): -# For now, guess spring-ahead transitions are March's last Friday at 00:00. +# From Steffen Thorsen (2020-10-20): +# Some sources such as these say, and display on clocks, that DST ended at +# midnight last year... +# https://www.amad.ps/ar/post/320006 +# +# From Tim Parenti (2020-10-20): +# The report of the Palestinian Cabinet meeting of 2019-10-14 confirms +# a decision on (translated): "The start of the winter time in Palestine, by +# delaying the clock by sixty minutes, starting from midnight on Friday / +# Saturday corresponding to 26/10/2019." +# http://www.palestinecabinet.gov.ps/portal/meeting/details/43948 + +# From Sharef Mustafa (2020-10-20): +# As per the palestinian cabinet announcement yesterday , the day light saving +# shall [end] on Oct 24th 2020 at 01:00AM by delaying the clock by 60 minutes. +# http://www.palestinecabinet.gov.ps/portal/Meeting/Details/51584 + +# From Tim Parenti (2020-10-20): +# Predict future fall transitions at 01:00 on the Saturday preceding October's +# last Sunday (i.e., Sat>=24). This is consistent with our predictions since +# 2016, although the time of the change differed slightly in 2019. + +# From Pierre Cashon (2020-10-20): +# The summer time this year started on March 28 at 00:00. +# https://wafa.ps/ar_page.aspx?id=GveQNZa872839351758aGveQNZ +# http://www.palestinecabinet.gov.ps/portal/meeting/details/50284 +# The winter time in 2015 started on October 23 at 01:00. +# https://wafa.ps/ar_page.aspx?id=CgpCdYa670694628582aCgpCdY +# http://www.palestinecabinet.gov.ps/portal/meeting/details/27583 # -# From Tim Parenti (2016-10-19): -# Predict fall transitions on October's last Saturday at 01:00 from now on. +# From Paul Eggert (2019-04-10): +# For now, guess spring-ahead transitions are at 00:00 on the Saturday +# preceding March's last Sunday (i.e., Sat>=24). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule EgyptAsia 1957 only - May 10 0:00 1:00 S Rule EgyptAsia 1957 1958 - Oct 1 0:00 0 - Rule EgyptAsia 1958 only - May 1 0:00 1:00 S @@ -3239,10 +3410,10 @@ Rule Palestine 2004 only - Oct 1 1:00 0 - Rule Palestine 2005 only - Oct 4 2:00 0 - Rule Palestine 2006 2007 - Apr 1 0:00 1:00 S Rule Palestine 2006 only - Sep 22 0:00 0 - -Rule Palestine 2007 only - Sep Thu>=8 2:00 0 - +Rule Palestine 2007 only - Sep 13 2:00 0 - Rule Palestine 2008 2009 - Mar lastFri 0:00 1:00 S Rule Palestine 2008 only - Sep 1 0:00 0 - -Rule Palestine 2009 only - Sep Fri>=1 1:00 0 - +Rule Palestine 2009 only - Sep 4 1:00 0 - Rule Palestine 2010 only - Mar 26 0:00 1:00 S Rule Palestine 2010 only - Aug 11 0:00 0 - Rule Palestine 2011 only - Apr 1 0:01 1:00 S @@ -3251,12 +3422,16 @@ Rule Palestine 2011 only - Aug 30 0:00 1:00 S Rule Palestine 2011 only - Sep 30 0:00 0 - Rule Palestine 2012 2014 - Mar lastThu 24:00 1:00 S Rule Palestine 2012 only - Sep 21 1:00 0 - -Rule Palestine 2013 only - Sep Fri>=21 0:00 0 - -Rule Palestine 2014 2015 - Oct Fri>=21 0:00 0 - -Rule Palestine 2015 only - Mar lastFri 24:00 1:00 S +Rule Palestine 2013 only - Sep 27 0:00 0 - +Rule Palestine 2014 only - Oct 24 0:00 0 - +Rule Palestine 2015 only - Mar 28 0:00 1:00 S +Rule Palestine 2015 only - Oct 23 1:00 0 - Rule Palestine 2016 2018 - Mar Sat>=24 1:00 1:00 S -Rule Palestine 2016 max - Oct lastSat 1:00 0 - -Rule Palestine 2019 max - Mar lastFri 0:00 1:00 S +Rule Palestine 2016 2018 - Oct Sat>=24 1:00 0 - +Rule Palestine 2019 only - Mar 29 0:00 1:00 S +Rule Palestine 2019 only - Oct Sat>=24 0:00 0 - +Rule Palestine 2020 max - Mar Sat>=24 0:00 1:00 S +Rule Palestine 2020 max - Oct Sat>=24 1:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Asia/Gaza 2:17:52 - LMT 1900 Oct @@ -3325,7 +3500,7 @@ Zone Asia/Hebron 2:20:23 - LMT 1900 Oct # influence of the sources. There is no current abbreviation for DST, # so use "PDT", the usual American style. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Phil 1936 only - Nov 1 0:00 1:00 D Rule Phil 1937 only - Feb 1 0:00 0 S Rule Phil 1954 only - Apr 12 0:00 1:00 D @@ -3473,7 +3648,7 @@ Zone Asia/Colombo 5:19:24 - LMT 1880 5:30 - +0530 # Syria -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Syria 1920 1923 - Apr Sun>=15 2:00 1:00 S Rule Syria 1920 1923 - Oct Sun>=1 2:00 0 - Rule Syria 1962 only - Apr 29 2:00 1:00 S diff --git a/usr/src/data/zoneinfo/australasia b/usr/src/data/zoneinfo/australasia index ac44c04821..cf8a0638f8 100644 --- a/usr/src/data/zoneinfo/australasia +++ b/usr/src/data/zoneinfo/australasia @@ -13,17 +13,14 @@ # Please see the notes below for the controversy about "EST" versus "AEST" etc. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Aus 1917 only - Jan 1 0:01 1:00 D -Rule Aus 1917 only - Mar 25 2:00 0 S -Rule Aus 1942 only - Jan 1 2:00 1:00 D -Rule Aus 1942 only - Mar 29 2:00 0 S -Rule Aus 1942 only - Sep 27 2:00 1:00 D -Rule Aus 1943 1944 - Mar lastSun 2:00 0 S -Rule Aus 1943 only - Oct 3 2:00 1:00 D -# Go with Whitman and the Australian National Standards Commission, which -# says W Australia didn't use DST in 1943/1944. Ignore Whitman's claim that -# 1944/1945 was just like 1943/1944. +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Aus 1917 only - Jan 1 2:00s 1:00 D +Rule Aus 1917 only - Mar lastSun 2:00s 0 S +Rule Aus 1942 only - Jan 1 2:00s 1:00 D +Rule Aus 1942 only - Mar lastSun 2:00s 0 S +Rule Aus 1942 only - Sep 27 2:00s 1:00 D +Rule Aus 1943 1944 - Mar lastSun 2:00s 0 S +Rule Aus 1943 only - Oct 3 2:00s 1:00 D # Zone NAME STDOFF RULES FORMAT [UNTIL] # Northern Territory @@ -32,7 +29,7 @@ Zone Australia/Darwin 8:43:20 - LMT 1895 Feb 9:30 Aus AC%sT # Western Australia # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AW 1974 only - Oct lastSun 2:00s 1:00 D Rule AW 1975 only - Mar Sun>=1 2:00s 0 S Rule AW 1983 only - Oct lastSun 2:00s 1:00 D @@ -70,7 +67,7 @@ Zone Australia/Eucla 8:35:28 - LMT 1895 Dec # applies to all of the Whitsundays. # http://www.australia.gov.au/about-australia/australian-story/austn-islands # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AQ 1971 only - Oct lastSun 2:00s 1:00 D Rule AQ 1972 only - Feb lastSun 2:00s 0 S Rule AQ 1989 1991 - Oct lastSun 2:00s 1:00 D @@ -86,7 +83,7 @@ Zone Australia/Lindeman 9:55:56 - LMT 1895 10:00 Holiday AE%sT # South Australia -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AS 1971 1985 - Oct lastSun 2:00s 1:00 D Rule AS 1986 only - Oct 19 2:00s 1:00 D Rule AS 1987 2007 - Oct lastSun 2:00s 1:00 D @@ -114,9 +111,13 @@ Zone Australia/Adelaide 9:14:20 - LMT 1895 Feb # http://www.bom.gov.au/climate/averages/tables/dst_times.shtml # says King Island didn't observe DST from WWII until late 1971. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule AT 1916 only - Oct Sun>=1 2:00s 1:00 D +Rule AT 1917 only - Mar lastSun 2:00s 0 S +Rule AT 1917 1918 - Oct Sun>=22 2:00s 1:00 D +Rule AT 1918 1919 - Mar Sun>=1 2:00s 0 S Rule AT 1967 only - Oct Sun>=1 2:00s 1:00 D -Rule AT 1968 only - Mar lastSun 2:00s 0 S +Rule AT 1968 only - Mar Sun>=29 2:00s 0 S Rule AT 1968 1985 - Oct lastSun 2:00s 1:00 D Rule AT 1969 1971 - Mar Sun>=8 2:00s 0 S Rule AT 1972 only - Feb lastSun 2:00s 0 S @@ -136,18 +137,12 @@ Rule AT 2007 only - Mar lastSun 2:00s 0 S Rule AT 2008 max - Apr Sun>=1 2:00s 0 S # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Australia/Hobart 9:49:16 - LMT 1895 Sep - 10:00 - AEST 1916 Oct 1 2:00 - 10:00 1:00 AEDT 1917 Feb + 10:00 AT AE%sT 1919 Oct 24 10:00 Aus AE%sT 1967 10:00 AT AE%sT -Zone Australia/Currie 9:35:28 - LMT 1895 Sep - 10:00 - AEST 1916 Oct 1 2:00 - 10:00 1:00 AEDT 1917 Feb - 10:00 Aus AE%sT 1971 Jul - 10:00 AT AE%sT # Victoria -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AV 1971 1985 - Oct lastSun 2:00s 1:00 D Rule AV 1972 only - Feb lastSun 2:00s 0 S Rule AV 1973 1985 - Mar Sun>=1 2:00s 0 S @@ -168,7 +163,7 @@ Zone Australia/Melbourne 9:39:52 - LMT 1895 Feb 10:00 AV AE%sT # New South Wales -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AN 1971 1985 - Oct lastSun 2:00s 1:00 D Rule AN 1972 only - Feb 27 2:00s 0 S Rule AN 1973 1981 - Mar Sun>=1 2:00s 0 S @@ -197,7 +192,7 @@ Zone Australia/Broken_Hill 9:25:48 - LMT 1895 Feb 9:30 AS AC%sT # Lord Howe Island -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule LH 1981 1984 - Oct lastSun 2:00 1:00 - Rule LH 1982 1985 - Mar Sun>=1 2:00 0 - Rule LH 1985 only - Oct lastSun 2:00 0:30 - @@ -252,8 +247,9 @@ Zone Antarctica/Macquarie 0 - -00 1899 Nov 10:00 Aus AE%sT 1919 Apr 1 0:00s 0 - -00 1948 Mar 25 10:00 Aus AE%sT 1967 - 10:00 AT AE%sT 2010 Apr 4 3:00 - 11:00 - +11 + 10:00 AT AE%sT 2010 + 10:00 1:00 AEDT 2011 + 10:00 AT AE%sT # Christmas # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -380,7 +376,20 @@ Zone Indian/Cocos 6:27:40 - LMT 1900 # From Michael Deckers (2019-08-06): # https://www.laws.gov.fj/LawsAsMade/downloadfile/848 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# From Raymond Kumar (2020-10-08): +# [DST in Fiji] is from December 20th 2020, till 17th January 2021. +# From Alan Mintz (2020-10-08): +# https://www.laws.gov.fj/LawsAsMade/GetFile/1071 +# From Tim Parenti (2020-10-08): +# https://www.fijivillage.com/news/Daylight-saving-from-Dec-20th-this-year-to-Jan-17th-2021-8rf4x5/ +# "Minister for Employment, Parveen Bala says they had never thought of +# stopping daylight saving. He says it was just to decide on when it should +# start and end. Bala says it is a short period..." +# Since the end date is still in line with our ongoing predictions, assume for +# now that the later-than-usual start date is a one-time departure from the +# recent second Sunday in November pattern. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Fiji 1998 1999 - Nov Sun>=1 2:00 1:00 - Rule Fiji 1999 2000 - Feb lastSun 3:00 0 - Rule Fiji 2009 only - Nov 29 2:00 1:00 - @@ -391,7 +400,9 @@ Rule Fiji 2012 2013 - Jan Sun>=18 3:00 0 - Rule Fiji 2014 only - Jan Sun>=18 2:00 0 - Rule Fiji 2014 2018 - Nov Sun>=1 2:00 1:00 - Rule Fiji 2015 max - Jan Sun>=12 3:00 0 - -Rule Fiji 2019 max - Nov Sun>=8 2:00 1:00 - +Rule Fiji 2019 only - Nov Sun>=8 2:00 1:00 - +Rule Fiji 2020 only - Dec 20 2:00 1:00 - +Rule Fiji 2021 max - Nov Sun>=8 2:00 1:00 - # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Pacific/Fiji 11:55:44 - LMT 1915 Oct 26 # Suva 12:00 Fiji +12/+13 @@ -409,7 +420,7 @@ Zone Pacific/Tahiti -9:58:16 - LMT 1912 Oct # Papeete # Guam -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # http://guamlegislature.com/Public_Laws_5th/PL05-025.pdf # http://documents.guam.gov/wp-content/uploads/E.O.-59-7-Guam-Daylight-Savings-Time-May-6-1959.pdf Rule Guam 1959 only - Jun 27 2:00 1:00 D @@ -520,7 +531,7 @@ Zone Pacific/Nauru 11:07:40 - LMT 1921 Jan 15 # Uaobe 12:00 - +12 # New Caledonia -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NC 1977 1978 - Dec Sun>=1 0:00 1:00 - Rule NC 1978 1979 - Feb 27 0:00 0 - Rule NC 1996 only - Dec 1 2:00s 1:00 - @@ -535,7 +546,7 @@ Zone Pacific/Noumea 11:05:48 - LMT 1912 Jan 13 # Nouméa # New Zealand -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NZ 1927 only - Nov 6 2:00 1:00 S Rule NZ 1928 only - Mar 4 2:00 0 M Rule NZ 1928 1933 - Oct Sun>=8 2:00 0:30 S @@ -587,7 +598,7 @@ Link Pacific/Auckland Antarctica/McMurdo # Cook Is # From Shanks & Pottenger: -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cook 1978 only - Nov 12 0:00 0:30 - Rule Cook 1979 1991 - Mar Sun>=1 0:00 0 - Rule Cook 1979 1990 - Oct lastSun 0:00 0:30 - @@ -732,7 +743,7 @@ Link Pacific/Pago_Pago Pacific/Midway # in US minor outlying islands # That web page currently lists transitions for 2012/3 and 2013/4. # Assume the pattern instituted in 2012 will continue indefinitely. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule WS 2010 only - Sep lastSun 0:00 1 - Rule WS 2011 only - Apr Sat>=1 4:00 0 - Rule WS 2011 only - Sep lastSat 3:00 1 - @@ -776,7 +787,7 @@ Zone Pacific/Fakaofo -11:24:56 - LMT 1901 13:00 - +13 # Tonga -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Tonga 1999 only - Oct 7 2:00s 1:00 - Rule Tonga 2000 only - Mar 19 2:00s 0 - Rule Tonga 2000 2001 - Nov Sun>=1 2:00 1:00 - @@ -857,13 +868,36 @@ Zone Pacific/Wake 11:06:28 - LMT 1901 # Vanuatu -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Vanuatu 1983 only - Sep 25 0:00 1:00 - -Rule Vanuatu 1984 1991 - Mar Sun>=23 0:00 0 - -Rule Vanuatu 1984 only - Oct 23 0:00 1:00 - -Rule Vanuatu 1985 1991 - Sep Sun>=23 0:00 1:00 - -Rule Vanuatu 1992 1993 - Jan Sun>=23 0:00 0 - -Rule Vanuatu 1992 only - Oct Sun>=23 0:00 1:00 - + +# From P Chan (2020-11-27): +# Joint Daylight Saving Regulation No 59 of 1973 +# New Hebrides Condominium Gazette No 336. December 1973 +# http://www.paclii.org/vu/other/VUNHGovGaz//1973/11.pdf#page=15 +# +# Joint Daylight Saving (Repeal) Regulation No 10 of 1974 +# New Hebrides Condominium Gazette No 336. March 1974 +# http://www.paclii.org/vu/other/VUNHGovGaz//1974/3.pdf#page=11 +# +# Summer Time Act No. 35 of 1982 [commenced 1983-09-01] +# http://www.paclii.org/vu/other/VUGovGaz/1982/32.pdf#page=48 +# +# Summer Time Act (Cap 157) +# Laws of the Republic of Vanuatu Revised Edition 1988 +# http://www.paclii.org/cgi-bin/sinodisp/vu/legis/consol_act1988/sta147/sta147.html +# +# Summer Time (Amendment) Act No. 6 of 1991 [commenced 1991-11-11] +# http://www.paclii.org/vu/legis/num_act/sta1991227/ +# +# Summer Time (Repeal) Act No. 4 of 1993 [commenced 1993-05-03] +# http://www.paclii.org/vu/other/VUGovGaz/1993/15.pdf#page=59 + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Vanuatu 1973 only - Dec 22 12:00u 1:00 - +Rule Vanuatu 1974 only - Mar 30 12:00u 0 - +Rule Vanuatu 1983 1991 - Sep Sat>=22 24:00 1:00 - +Rule Vanuatu 1984 1991 - Mar Sat>=22 24:00 0 - +Rule Vanuatu 1992 1993 - Jan Sat>=22 24:00 0 - +Rule Vanuatu 1992 only - Oct Sat>=22 24:00 1:00 - # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Pacific/Efate 11:13:16 - LMT 1912 Jan 13 # Vila 11:00 Vanuatu +11/+12 @@ -942,6 +976,25 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # Electronic Journal of Australian and New Zealand History (1997-03-03) # http://www.jcu.edu.au/aff/history/reviews/davison.htm +# From P Chan (2020-11-20): +# Daylight Saving Act 1916 (No. 40 of 1916) [1916-12-21, commenced 1917-01-01] +# http://classic.austlii.edu.au/au/legis/cth/num_act/dsa1916401916192/ +# +# Daylight Saving Repeal Act 1917 (No. 35 of 1917) [1917-09-25] +# http://classic.austlii.edu.au/au/legis/cth/num_act/dsra1917351917243/ +# +# Statutory Rules 1941, No. 323 [1941-12-24] +# https://www.legislation.gov.au/Details/C1941L00323 +# +# Statutory Rules 1942, No. 392 [1942-09-10] +# https://www.legislation.gov.au/Details/C1942L00392 +# +# Statutory Rules 1943, No. 241 [1943-09-29] +# https://www.legislation.gov.au/Details/C1943L00241 +# +# All transition times should be 02:00 standard time. + + # From Paul Eggert (2005-12-08): # Implementation Dates of Daylight Saving Time within Australia # http://www.bom.gov.au/climate/averages/tables/dst_times.shtml @@ -1334,6 +1387,27 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # Tasmania +# From P Chan (2020-11-20): +# Tasmania observed DST in 1916-1919. +# +# Daylight Saving Act, 1916 (7 Geo V, No 2) [1916-09-22] +# http://classic.austlii.edu.au/au/legis/tas/num_act/tdsa19167gvn2267/ +# +# Daylight Saving Amendment Act, 1917 (8 Geo V, No 5) [1917-10-01] +# http://classic.austlii.edu.au/au/legis/tas/num_act/tdsaa19178gvn5347/ +# +# Daylight Saving Act Repeal Act, 1919 (10 Geo V, No 9) [1919-10-24] +# http://classic.austlii.edu.au/au/legis/tas/num_act/tdsara191910gvn9339/ +# +# King Island is mentioned in the 1967 Act but not the 1968 Act. +# Therefore it possibly observed DST from 1968/69. +# +# Daylight Saving Act 1967 (No. 33 of 1967) [1967-09-22] +# http://classic.austlii.edu.au/au/legis/tas/num_act/dsa196733o1967211/ +# +# Daylight Saving Act 1968 (No. 42 of 1968) [1968-10-15] +# http://classic.austlii.edu.au/au/legis/tas/num_act/dsa196842o1968211/ + # The rules for 1967 through 1991 were reported by George Shepherd # via Simon Woodhead via Robert Elz (1991-03-06): # # The state of TASMANIA.. [Courtesy Tasmanian Dept of Premier + Cabinet ] diff --git a/usr/src/data/zoneinfo/backward b/usr/src/data/zoneinfo/backward index e13ae527a9..0c55be221d 100644 --- a/usr/src/data/zoneinfo/backward +++ b/usr/src/data/zoneinfo/backward @@ -49,6 +49,7 @@ Link Atlantic/Faroe Atlantic/Faeroe Link Europe/Oslo Atlantic/Jan_Mayen Link Australia/Sydney Australia/ACT Link Australia/Sydney Australia/Canberra +Link Australia/Hobart Australia/Currie Link Australia/Lord_Howe Australia/LHI Link Australia/Sydney Australia/NSW Link Australia/Darwin Australia/North diff --git a/usr/src/data/zoneinfo/etcetera b/usr/src/data/zoneinfo/etcetera index a1606bde84..1dc7411f04 100644 --- a/usr/src/data/zoneinfo/etcetera +++ b/usr/src/data/zoneinfo/etcetera @@ -3,12 +3,11 @@ # This file is in the public domain, so clarified as of # 2009-05-17 by Arthur David Olson. -# These entries are mostly present for historical reasons, so that -# people in areas not otherwise covered by the tz files could "zic -l" -# to a timezone that was right for their area. These days, the -# tz files cover almost all the inhabited world, and the only practical -# need now for the entries that are not on UTC are for ships at sea -# that cannot use POSIX TZ settings. +# These entries are for uses not otherwise covered by the tz database. +# Their main practical use is for platforms like Android that lack +# support for POSIX-style TZ strings. On such platforms these entries +# can be useful if the timezone database is wrong or if a ship or +# aircraft at sea is not in a timezone. # Starting with POSIX 1003.1-2001, the entries below are all # unnecessary as settings for the TZ environment variable. E.g., diff --git a/usr/src/data/zoneinfo/europe b/usr/src/data/zoneinfo/europe index 5593c6079d..bba4d56482 100644 --- a/usr/src/data/zoneinfo/europe +++ b/usr/src/data/zoneinfo/europe @@ -388,7 +388,7 @@ # http://www.irishstatutebook.ie/eli/1926/sro/919/made/en/print # http://www.irishstatutebook.ie/eli/1947/sro/71/made/en/print -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Summer Time Act, 1916 Rule GB-Eire 1916 only - May 21 2:00s 1:00 BST Rule GB-Eire 1916 only - Oct 1 2:00s 0 GMT @@ -529,7 +529,7 @@ Link Europe/London Europe/Isle_of_Man # The following is like GB-Eire and EU, except with standard time in # summer and negative daylight saving time in winter. It is for when # negative SAVE values are used. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Eire 1971 only - Oct 31 2:00u -1:00 - Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 - Rule Eire 1972 1980 - Oct Sun>=23 2:00u -1:00 - @@ -566,7 +566,7 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2 # predecessor organization, the European Communities. # For brevity they are called "EU rules" elsewhere in this file. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S Rule EU 1977 only - Sep lastSun 1:00u 0 - Rule EU 1978 only - Oct 1 1:00u 0 - @@ -606,13 +606,13 @@ Rule C-Eur 1944 only - Oct 2 2:00s 0 - # corrected in version 2008d). The circumstantial evidence is simply the # tz database itself, as seen below: # -# Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01 +# Zone Europe/Paris ... # 0:00 France WE%sT 1945 Sep 16 3:00 # -# Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 +# Zone Europe/Monaco ... # 0:00 France WE%sT 1945 Sep 16 3:00 # -# Zone Europe/Belgrade 1:22:00 - LMT 1884 +# Zone Europe/Belgrade ... # 1:00 1:00 CEST 1945 Sep 16 2:00s # # Rule France 1945 only - Sep 16 3:00 0 - @@ -658,7 +658,7 @@ Rule E-Eur 1996 max - Oct lastSun 0:00 0 - # # The 1917-1921 decree URLs are from Alexander Belopolsky (2016-08-23). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Russia 1917 only - Jul 1 23:00 1:00 MST # Moscow Summer Time # # Decree No. 142 (1917-12-22) http://istmat.info/node/28137 @@ -772,7 +772,7 @@ Zone EET 2:00 EU EE%sT # Albania -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Albania 1940 only - Jun 16 0:00 1:00 S Rule Albania 1942 only - Nov 2 3:00 0 - Rule Albania 1943 only - Mar 29 2:00 1:00 S @@ -826,7 +826,7 @@ Zone Europe/Andorra 0:06:04 - LMT 1901 # In 1946 the end of DST was on Monday, 7 October 1946, at 3:00 am. # Shanks had this right. Source: Die Weltpresse, 5. Oktober 1946, page 5. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Austria 1920 only - Apr 5 2:00s 1:00 S Rule Austria 1920 only - Sep 13 2:00s 0 - Rule Austria 1946 only - Apr 14 2:00s 1:00 S @@ -913,7 +913,7 @@ Zone Europe/Minsk 1:50:16 - LMT 1880 # The 1918 rules are listed for completeness; they apply to unoccupied Belgium. # Assume Brussels switched to WET in 1918 when the armistice took effect. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Belgium 1918 only - Mar 9 0:00s 1:00 S Rule Belgium 1918 1919 - Oct Sat>=1 23:00s 0 - Rule Belgium 1919 only - Mar 1 23:00s 1:00 S @@ -973,7 +973,7 @@ Zone Europe/Brussels 0:17:30 - LMT 1880 # EET -> EETDST is in 03:00 Local time in last Sunday of March ... # EETDST -> EET is in 04:00 Local time in last Sunday of October # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Bulg 1979 only - Mar 31 23:00 1:00 S Rule Bulg 1979 only - Oct 1 1:00 0 - Rule Bulg 1980 1982 - Apr Sat>=1 23:00 1:00 S @@ -1005,7 +1005,7 @@ Zone Europe/Sofia 1:33:16 - LMT 1880 # We know of no English-language name for historical Czech winter time; # abbreviate it as "GMT", as it happened to be GMT. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Czech 1945 only - Apr Mon>=1 2:00s 1:00 S Rule Czech 1945 only - Oct 1 2:00s 0 - Rule Czech 1946 only - May 6 2:00s 1:00 S @@ -1029,17 +1029,16 @@ Zone Europe/Prague 0:57:44 - LMT 1850 # Denmark, Faroe Islands, and Greenland # From Jesper Nørgaard Welen (2005-04-26): -# http://www.hum.aau.dk/~poe/tid/tine/DanskTid.htm says that the law -# [introducing standard time] was in effect from 1894-01-01.... -# The page http://www.retsinfo.dk/_GETDOCI_/ACCN/A18930008330-REGL +# the law [introducing standard time] was in effect from 1894-01-01.... +# The page https://www.retsinformation.dk/eli/lta/1893/83 # confirms this, and states that the law was put forth 1893-03-29. # # The EU [actually, EEC and Euratom] treaty with effect from 1973: -# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19722110030-REGL +# https://www.retsinformation.dk/eli/lta/1972/21100 # # This provoked a new law from 1974 to make possible summer time changes # in subsequent decrees with the law -# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19740022330-REGL +# https://www.retsinformation.dk/eli/lta/1974/223 # # It seems however that no decree was set forward until 1980. I have # not found any decree, but in another related law, the effecting DST @@ -1051,7 +1050,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850 # The law is about the management of the extra hour, concerning # working hours reported and effect on obligatory-rest rules (which # was suspended on that night): -# http://www.retsinfo.dk/_GETDOCI_/ACCN/C19801120554-REGL +# https://web.archive.org/web/20140104053304/https://www.retsinformation.dk/Forms/R0710.aspx?id=60267 # From Jesper Nørgaard Welen (2005-06-11): # The Herning Folkeblad (1980-09-26) reported that the night between @@ -1061,7 +1060,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850 # Hence the "02:00" of the 1980 law refers to standard time, not # wall-clock time, and so the EU rules were in effect in 1980. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Denmark 1916 only - May 14 23:00 1:00 S Rule Denmark 1916 only - Sep 30 23:00 0 - Rule Denmark 1940 only - May 15 0:00 1:00 S @@ -1163,7 +1162,7 @@ Zone Atlantic/Faroe -0:27:04 - LMT 1908 Jan 11 # Tórshavn # http://naalakkersuisut.gl/~/media/Nanoq/Files/Attached%20Files/Engelske-tekster/Legislation/Executive%20Order%20National%20Park.rtf # It is their only National Park. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Thule 1991 1992 - Mar lastSun 2:00 1:00 D Rule Thule 1991 1992 - Sep lastSun 2:00 0 S Rule Thule 1993 2006 - Apr Sun>=1 2:00 1:00 D @@ -1294,7 +1293,7 @@ Zone Europe/Tallinn 1:39:00 - LMT 1880 # From Paul Eggert (2014-06-14): # Go with Oja over Shanks. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Finland 1942 only - Apr 2 24:00 1:00 S Rule Finland 1942 only - Oct 4 1:00 0 - Rule Finland 1981 1982 - Mar lastSun 2:00 1:00 S @@ -1326,10 +1325,58 @@ Link Europe/Helsinki Europe/Mariehamn # Françoise Gauquelin, Problèmes de l'heure résolus en astrologie, # Guy Trédaniel, Paris 1987 +# From Michael Deckers (2020-06-11): +# the law of 1891 <https://gallica.bnf.fr/ark:/12148/bpt6k64415343.texteImage> +# was published on 1891-03-15, so it could only take force on 1891-03-16. + +# From Michael Deckers (2020-06-10): +# Le Gaulois, 1911-03-11, page 1/6, online at +# https://www.retronews.fr/societe/echo-de-presse/2018/01/29/1911-change-lheure-de-paris +# ... [ Instantly, all pressure driven clock dials halted... Nine minutes and +# twenty-one seconds later the hands resumed their circular motion. ] +# There are also precise reports about how the change was prepared in train +# stations: all the publicly visible clocks stopped at midnight railway time +# (or were covered), only the chief of service had a watch, labeled +# "Heure ancienne", that he kept running until it reached 00:04:21, when +# he announced "Heure nouvelle". See the "Le Petit Journal 1911-03-11". +# https://gallica.bnf.fr/ark:/12148/bpt6k6192911/f1.item.zoom +# +# From Michael Deckers (2020-06-12): +# That "all French clocks stopped" for 00:09:21 is a misreading of French +# newspapers; this sort of adjustment applies only to certain +# remote-controlled clocks ("pendules pneumatiques", of which there existed +# perhaps a dozen in Paris, and which simply could not be set back remotely), +# but not to all the clocks in all French towns and villages. For instance, +# the following story in the "Courrier de Saône-et-Loire" 1911-03-11, page 2: +# only works if legal time was stepped back (was not monotone): ... +# [One can observe that children who had been born at midnight less 5 +# minutes and who had died at midnight of the old time, would turn out to +# be dead before being born, time having been set back and having +# suppressed 9 minutes and 25 seconds of their existence, that is, more +# than they could spend.] +# +# From Paul Eggert (2020-06-12): +# French time in railway stations was legally five minutes behind civil time, +# which explains why railway "old time" ran to 00:04:21 instead of to 00:09:21. +# The law's text (which Michael Deckers noted is at +# <https://gallica.bnf.fr/ark:/12148/bpt6k2022333z/f2>) says only that +# at 1911-03-11 00:00 legal time was that of Paris mean time delayed by +# nine minutes and twenty-one seconds, and does not say how the +# transition from Paris mean time was to occur. +# +# tzdb has no way to represent stopped clocks. As the railway practice +# was to keep a watch running on "old time" to decide when to restart +# the other clocks, this could be modeled as a transition for "old time" at +# 00:09:21. However, since the law was ambiguous and clocks outside railway +# stations were probably done haphazardly with the popular impression being +# that the transition was done at 00:00 "old time", simply leave the time +# blank; this causes zic to default to 00:00 "old time" which is good enough. +# Do something similar for the 1891-03-16 transition. There are similar +# problems in Algiers, Monaco and Tunis. # # Shank & Pottenger seem to use '24:00' ambiguously; resolve it with Whitman. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule France 1916 only - Jun 14 23:00s 1:00 S Rule France 1916 1919 - Oct Sun>=1 23:00s 0 - Rule France 1917 only - Mar 24 23:00s 1:00 S @@ -1389,13 +1436,11 @@ Rule France 1945 only - Sep 16 3:00 0 - # go with Excoffier's 28/3/76 0hUT and 25/9/76 23hUT. Rule France 1976 only - Mar 28 1:00 1:00 S Rule France 1976 only - Sep 26 1:00 0 - -# Shanks & Pottenger give 0:09:20 for Paris Mean Time, and Whitman 0:09:05, -# but Howse quotes the actual French legislation as saying 0:09:21. -# Go with Howse. Howse writes that the time in France was officially based +# Howse writes that the time in France was officially based # on PMT-0:09:21 until 1978-08-09, when the time base finally switched to UTC. # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01 - 0:09:21 - PMT 1911 Mar 11 0:01 # Paris MT +Zone Europe/Paris 0:09:21 - LMT 1891 Mar 16 + 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time # Shanks & Pottenger give 1940 Jun 14 0:00; go with Excoffier and Le Corre. 0:00 France WE%sT 1940 Jun 14 23:00 # Le Corre says Paris stuck with occupied-France time after the liberation; @@ -1424,7 +1469,7 @@ Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01 # this was equivalent to UT +03, not +04. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Germany 1946 only - Apr 14 2:00s 1:00 S Rule Germany 1946 only - Oct 7 2:00s 0 - Rule Germany 1947 1949 - Oct Sun>=1 2:00s 0 - @@ -1476,7 +1521,7 @@ Zone Europe/Gibraltar -0:21:24 - LMT 1880 Aug 2 0:00s 1:00 EU CE%sT # Greece -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Whitman gives 1932 Jul 5 - Nov 1; go with Shanks & Pottenger. Rule Greece 1932 only - Jul 7 0:00 1:00 S Rule Greece 1932 only - Sep 1 0:00 0 - @@ -1511,38 +1556,73 @@ Zone Europe/Athens 1:34:52 - LMT 1895 Sep 14 2:00 EU EE%sT # Hungary -# From Paul Eggert (2014-07-15): -# Dates for 1916-1945 are taken from: -# Oross A. Jelen a múlt jövÅ‘je: a nyári idÅ‘számÃtás Magyarországon 1916-1945. -# National Archives of Hungary (2012-10-29). -# http://mnl.gov.hu/a_het_dokumentuma/a_nyari_idoszamitas_magyarorszagon_19161945.html -# This source does not always give times, which are taken from Shanks -# & Pottenger (which disagree about the dates). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Hungary 1918 only - Apr 1 3:00 1:00 S -Rule Hungary 1918 only - Sep 16 3:00 0 - -Rule Hungary 1919 only - Apr 15 3:00 1:00 S -Rule Hungary 1919 only - Nov 24 3:00 0 - + +# From Michael Deckers (2020-06-09): +# an Austrian encyclopedia of railroads of 1913, online at +# http://www.zeno.org/Roell-1912/A/Eisenbahnzeit +# says that the switch [to CET] happened on 1890-11-01. + +# From Géza Nyáry (2020-06-07): +# Data for 1918-1983 are based on the archive database of Library Hungaricana. +# The dates are collected from original, scanned governmental orders, +# bulletins, instructions and public press. +# [See URLs below.] + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +# https://library.hungaricana.hu/hu/view/OGYK_RT_1918/?pg=238 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1919/?pg=808 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1920/?pg=201 +Rule Hungary 1918 1919 - Apr 15 2:00 1:00 S +Rule Hungary 1918 1920 - Sep Mon>=15 3:00 0 - +Rule Hungary 1920 only - Apr 5 2:00 1:00 S +# https://library.hungaricana.hu/hu/view/OGYK_RT_1945/?pg=882 Rule Hungary 1945 only - May 1 23:00 1:00 S -Rule Hungary 1945 only - Nov 1 0:00 0 - +Rule Hungary 1945 only - Nov 1 1:00 0 - +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_03/?pg=49 Rule Hungary 1946 only - Mar 31 2:00s 1:00 S -Rule Hungary 1946 1949 - Oct Sun>=1 2:00s 0 - +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_09/?pg=54 +Rule Hungary 1946 only - Oct 7 2:00 0 - +# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1947_04_1__001-123/?pg=90 +# https://library.hungaricana.hu/hu/view/DunantuliNaplo_1947_09/?pg=128 +# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1948_03_3__001-123/?pg=304 +# https://library.hungaricana.hu/hu/view/Zala_1948_09/?pg=64 +# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=53 +# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=160 +# https://library.hungaricana.hu/hu/view/UjSzo_1949_01-04/?pg=102 +# https://library.hungaricana.hu/hu/view/KeletMagyarorszag_1949_03/?pg=96 +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1949_09/?pg=94 Rule Hungary 1947 1949 - Apr Sun>=4 2:00s 1:00 S -Rule Hungary 1950 only - Apr 17 2:00s 1:00 S -Rule Hungary 1950 only - Oct 23 2:00s 0 - -Rule Hungary 1954 1955 - May 23 0:00 1:00 S -Rule Hungary 1954 1955 - Oct 3 0:00 0 - -Rule Hungary 1956 only - Jun Sun>=1 0:00 1:00 S -Rule Hungary 1956 only - Sep lastSun 0:00 0 - -Rule Hungary 1957 only - Jun Sun>=1 1:00 1:00 S -Rule Hungary 1957 only - Sep lastSun 3:00 0 - -Rule Hungary 1980 only - Apr 6 1:00 1:00 S +Rule Hungary 1947 1949 - Oct Sun>=1 2:00s 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1954/?pg=513 +Rule Hungary 1954 only - May 23 0:00 1:00 S +Rule Hungary 1954 only - Oct 3 0:00 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1955/?pg=398 +Rule Hungary 1955 only - May 22 2:00 1:00 S +Rule Hungary 1955 only - Oct 2 3:00 0 - +# https://library.hungaricana.hu/hu/view/HevesMegyeiNepujsag_1956_06/?pg=0 +# https://library.hungaricana.hu/hu/view/EszakMagyarorszag_1956_06/?pg=6 +# https://library.hungaricana.hu/hu/view/SzolnokMegyeiNeplap_1957_04/?pg=120 +# https://library.hungaricana.hu/hu/view/PestMegyeiHirlap_1957_09/?pg=143 +Rule Hungary 1956 1957 - Jun Sun>=1 2:00 1:00 S +Rule Hungary 1956 1957 - Sep lastSun 3:00 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=189 +Rule Hungary 1980 only - Apr 6 0:00 1:00 S +Rule Hungary 1980 only - Sep 28 1:00 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=1227 +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1981_01/?pg=79 +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1982/?pg=115 +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1983/?pg=85 +Rule Hungary 1981 1983 - Mar lastSun 0:00 1:00 S +Rule Hungary 1981 1983 - Sep lastSun 1:00 0 - +# # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Europe/Budapest 1:16:20 - LMT 1890 Oct +Zone Europe/Budapest 1:16:20 - LMT 1890 Nov 1 1:00 C-Eur CE%sT 1918 - 1:00 Hungary CE%sT 1941 Apr 8 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1941/?pg=1204 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1942/?pg=3955 + 1:00 Hungary CE%sT 1941 Apr 7 23:00 1:00 C-Eur CE%sT 1945 - 1:00 Hungary CE%sT 1980 Sep 28 2:00s + 1:00 Hungary CE%sT 1984 1:00 EU CE%sT # Iceland @@ -1578,7 +1658,7 @@ Zone Europe/Budapest 1:16:20 - LMT 1890 Oct # The information below is taken from the 1988 Almanak; see # http://www.almanak.hi.is/klukkan.html # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Iceland 1917 1919 - Feb 19 23:00 1:00 - Rule Iceland 1917 only - Oct 21 1:00 0 - Rule Iceland 1918 1919 - Nov 16 1:00 0 - @@ -1670,7 +1750,7 @@ Zone Atlantic/Reykjavik -1:28 - LMT 1908 # to 1944-06-04; although Rome was an open city during this period, it # was effectively controlled by Germany. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Italy 1916 only - Jun 3 24:00 1:00 S Rule Italy 1916 1917 - Sep 30 24:00 0 - Rule Italy 1917 only - Mar 31 24:00 1:00 S @@ -1780,7 +1860,7 @@ Link Europe/Rome Europe/San_Marino # urged Lithuania and Estonia to adopt a similar time policy, but it # appears that they will not do so.... -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Latvia 1989 1996 - Mar lastSun 2:00s 1:00 S Rule Latvia 1989 1996 - Sep lastSun 2:00s 0 - @@ -1873,7 +1953,7 @@ Zone Europe/Vilnius 1:41:16 - LMT 1880 # Luxembourg # Whitman disagrees with most of these dates in minor ways; # go with Shanks & Pottenger. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Lux 1916 only - May 14 23:00 1:00 S Rule Lux 1916 only - Oct 1 1:00 0 - Rule Lux 1917 only - Apr 28 23:00 1:00 S @@ -1914,7 +1994,7 @@ Zone Europe/Luxembourg 0:24:36 - LMT 1904 Jun # From Paul Eggert (2016-10-21): # Assume 1900-1972 was like Rome, overriding Shanks. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Malta 1973 only - Mar 31 0:00s 1:00 S Rule Malta 1973 only - Sep 29 0:00s 0 - Rule Malta 1974 only - Apr 21 0:00s 1:00 S @@ -1987,7 +2067,7 @@ Zone Europe/Malta 0:58:04 - LMT 1893 Nov 2 0:00s # Valletta # says the 2014-03-30 spring-forward transition was at 02:00 local time. # Guess that since 1997 Moldova has switched one hour before the EU. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Moldova 1997 max - Mar lastSun 2:00 1:00 S Rule Moldova 1997 max - Oct lastSun 3:00 0 - @@ -2005,11 +2085,24 @@ Zone Europe/Chisinau 1:55:20 - LMT 1880 2:00 Moldova EE%sT # Monaco -# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's -# more precise 0:09:21. +# +# From Michael Deckers (2020-06-12): +# In the "Journal de Monaco" of 1892-05-24, online at +# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/b1c67c12c5af11b41ea888fb048e4fe8.pdf +# we read: ... +# [In virtue of a Sovereign Ordinance of the May 13 of the current [year], +# legal time in the Principality will be set to, from the date of June 1, +# 1892 onwards, to the meridian of Paris, as in France.] +# In the "Journal de Monaco" of 1911-03-28, online at +# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/de74ffb7db53d4f599059fe8f0ed482a.pdf +# we read an ordinance of 1911-03-16: ... +# [Legal time in the Principality will be set, from the date of promulgation +# of the present ordinance, to legal time in France.... Consequently, legal +# time will be retarded by 9 minutes and 21 seconds.] +# # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 - 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time +Zone Europe/Monaco 0:29:32 - LMT 1892 Jun 1 + 0:09:21 - PMT 1911 Mar 29 # Paris Mean Time 0:00 France WE%sT 1945 Sep 16 3:00 1:00 France CE%sT 1977 1:00 EU CE%sT @@ -2057,7 +2150,7 @@ Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 # The data entries before 1945 are taken from # https://www.staff.science.uu.nl/~gent0113/wettijd/wettijd.htm -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Neth 1916 only - May 1 0:00 1:00 NST # Netherlands Summer Time Rule Neth 1916 only - Oct 1 0:00 0 AMT # Amsterdam Mean Time Rule Neth 1917 only - Apr 16 2:00s 1:00 NST @@ -2094,7 +2187,7 @@ Zone Europe/Amsterdam 0:19:32 - LMT 1835 # Norway # http://met.no/met/met_lex/q_u/sommertid.html (2004-01) agrees with Shanks & # Pottenger. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Norway 1916 only - May 22 1:00 1:00 S Rule Norway 1916 only - Sep 30 0:00 0 - Rule Norway 1945 only - Apr 2 2:00s 1:00 S @@ -2163,7 +2256,7 @@ Link Europe/Oslo Arctic/Longyearbyen # The 1919 dates and times can be found in Tygodnik UrzÄ™dowy nr 1 (1919-03-20), # <http://www.wbc.poznan.pl/publication/32156> pp 1-2. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Poland 1918 1919 - Sep 16 2:00s 0 - Rule Poland 1919 only - Apr 15 2:00s 1:00 S Rule Poland 1944 only - Apr 3 2:00s 1:00 S @@ -2234,7 +2327,7 @@ Zone Europe/Warsaw 1:24:00 - LMT 1880 # Guess that the Azores changed to EU rules in 1992 (since that's when Portugal # harmonized with EU rules), and that they stayed +0:00 that winter. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # DSH writes that despite Decree 1,469 (1915), the change to the clocks was not # done every year, depending on what Spain did, because of railroad schedules. # Go with Shanks & Pottenger. @@ -2347,7 +2440,7 @@ Zone Atlantic/Madeira -1:07:36 - LMT 1884 # Funchal # assume that Romania and Moldova switched to EU rules in 1997, # the same year as Bulgaria. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Romania 1932 only - May 21 0:00s 1:00 S Rule Romania 1932 1939 - Oct Sun>=1 0:00s 0 - Rule Romania 1933 1939 - Apr Sun>=2 0:00s 1:00 S @@ -2799,6 +2892,19 @@ Zone Europe/Astrakhan 3:12:12 - LMT 1924 May # The law has been published today on # http://publication.pravo.gov.ru/Document/View/0001201810110037 +# From Alexander Krivenyshev (2020-11-27): +# The State Duma approved (Nov 24, 2020) the transition of the Volgograd +# region to the Moscow time zone.... +# https://sozd.duma.gov.ru/bill/1012130-7 +# +# From Stepan Golosunov (2020-12-05): +# Currently proposed text for the second reading (expected on December 8) ... +# changes the date to December 27. https://v1.ru/text/gorod/2020/12/04/69601031/ +# +# From Stepan Golosunov (2020-12-22): +# The law was published today on +# http://publication.pravo.gov.ru/Document/View/0001202012220002 + Zone Europe/Volgograd 2:57:40 - LMT 1920 Jan 3 3:00 - +03 1930 Jun 21 4:00 - +04 1961 Nov 11 @@ -2808,7 +2914,8 @@ Zone Europe/Volgograd 2:57:40 - LMT 1920 Jan 3 3:00 Russia +03/+04 2011 Mar 27 2:00s 4:00 - +04 2014 Oct 26 2:00s 3:00 - +03 2018 Oct 28 2:00s - 4:00 - +04 + 4:00 - +04 2020 Dec 27 2:00s + 3:00 - +03 # From Paul Eggert (2016-11-11): # Europe/Saratov covers: @@ -3445,14 +3552,14 @@ Link Europe/Prague Europe/Bratislava # fallback transition from the next day's 00:59... to 00:00. # From Michael Deckers (2016-12-15): -# The Royal Decree of 1900-06-26 quoted by Planesas, online at +# The Royal Decree of 1900-07-26 quoted by Planesas, online at # https://www.boe.es/datos/pdfs/BOE//1900/209/A00383-00384.pdf # says in its article 5 (my translation): # These dispositions will enter into force beginning with the # instant at which, according to the time indicated in article 1, # the 1st day of January of 1901 will begin. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Spain 1918 only - Apr 15 23:00 1:00 S Rule Spain 1918 1919 - Oct 6 24:00s 0 - Rule Spain 1919 only - Apr 6 23:00 1:00 S @@ -3589,7 +3696,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1 # By the end of the 18th century clocks and watches became commonplace # and their performance improved enormously. Communities began to keep # mean time in preference to apparent time - Geneva from 1780 .... -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # From Whitman (who writes "Midnight?"): # Rule Swiss 1940 only - Nov 2 0:00 1:00 S # Rule Swiss 1940 only - Dec 31 0:00 0 - @@ -3676,7 +3783,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1 # 1853-07-16, though it probably occurred at some other date in Zurich, and # legal civil time probably changed at still some other transition date. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Swiss 1941 1942 - May Mon>=1 1:00 1:00 S Rule Swiss 1941 1942 - Oct Mon>=1 2:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -3825,7 +3932,7 @@ Zone Europe/Zurich 0:34:08 - LMT 1853 Jul 16 # See above comment. # Although Google Translate misfires on that source, it looks like # Turkey reversed last month's decision, and so will stay at +03. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Turkey 1916 only - May 1 0:00 1:00 S Rule Turkey 1916 only - Oct 1 0:00 0 - Rule Turkey 1920 only - Mar 28 0:00 1:00 S @@ -3983,7 +4090,7 @@ Zone Europe/Kiev 2:02:04 - LMT 1880 2:00 1:00 EEST 1991 Sep 29 3:00 2:00 E-Eur EE%sT 1995 2:00 EU EE%sT -# Ruthenia used CET 1990/1991. +# Transcarpathia used CET 1990/1991. # "Uzhhorod" is the transliteration of the Rusyn/Ukrainian pronunciation, but # "Uzhgorod" is more common in English. Zone Europe/Uzhgorod 1:29:12 - LMT 1890 Oct diff --git a/usr/src/data/zoneinfo/northamerica b/usr/src/data/zoneinfo/northamerica index febf05bfdd..ddc575976e 100644 --- a/usr/src/data/zoneinfo/northamerica +++ b/usr/src/data/zoneinfo/northamerica @@ -170,7 +170,7 @@ # U.S. government action. So even though the "US" rules have changed # in the latest release, other countries won't be affected. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule US 1918 1919 - Mar lastSun 2:00 1:00 D Rule US 1918 1919 - Oct lastSun 2:00 0 S Rule US 1942 only - Feb 9 2:00 1:00 W # War @@ -347,7 +347,7 @@ Zone PST8PDT -8:00 US P%sT # Eastern time (i.e., -4:56:01.6) just before the 1883 switch. Round to the # nearest second. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule NYC 1920 only - Mar lastSun 2:00 1:00 D Rule NYC 1920 only - Oct lastSun 2:00 0 S Rule NYC 1921 1966 - Apr lastSun 2:00 1:00 D @@ -431,7 +431,7 @@ Zone America/New_York -4:56:02 - LMT 1883 Nov 18 12:03:58 # The Tennessean 2007-05-11, republished 2015-04-06. # https://www.tennessean.com/story/insider/extras/2015/04/06/archives-seigenthaler-for-100-years-the-tennessean-had-it-covered/25348545/ -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Chicago 1920 only - Jun 13 2:00 1:00 D Rule Chicago 1920 1921 - Oct lastSun 2:00 0 S Rule Chicago 1921 only - Mar lastSun 2:00 1:00 D @@ -500,7 +500,7 @@ Zone America/North_Dakota/Beulah -6:47:07 - LMT 1883 Nov 18 12:12:53 # El Paso Times. 2018-10-24 06:40 -06. # https://www.elpasotimes.com/story/news/local/el-paso/2018/10/24/el-pasoans-were-time-rebels-fought-stay-mountain-zone/1744509002/ # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Denver 1920 1921 - Mar lastSun 2:00 1:00 D Rule Denver 1920 only - Oct lastSun 2:00 0 S Rule Denver 1921 only - May 22 2:00 0 S @@ -553,7 +553,7 @@ Zone America/Denver -6:59:56 - LMT 1883 Nov 18 12:00:04 # https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1501&context=ca_ballot_props # https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1636&context=ca_ballot_props # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule CA 1948 only - Mar 14 2:01 1:00 D Rule CA 1949 only - Jan 1 2:00 0 S Rule CA 1950 1966 - Apr lastSun 1:00 1:00 D @@ -911,7 +911,7 @@ Zone America/Boise -7:44:49 - LMT 1883 Nov 18 12:15:11 # going to switch from Central to Eastern Time on March 11, 2007.... # http://www.indystar.com/apps/pbcs.dll/article?AID=/20070207/LOCAL190108/702070524/0/LOCAL -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Indianapolis 1941 only - Jun 22 2:00 1:00 D Rule Indianapolis 1941 1954 - Sep lastSun 2:00 0 S Rule Indianapolis 1946 1954 - Apr lastSun 2:00 1:00 D @@ -930,7 +930,7 @@ Zone America/Indiana/Indianapolis -5:44:38 - LMT 1883 Nov 18 12:15:22 # # Eastern Crawford County, Indiana, left its clocks alone in 1974, # as well as from 1976 through 2005. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Marengo 1951 only - Apr lastSun 2:00 1:00 D Rule Marengo 1951 only - Sep lastSun 2:00 0 S Rule Marengo 1954 1960 - Apr lastSun 2:00 1:00 D @@ -949,7 +949,7 @@ Zone America/Indiana/Marengo -5:45:23 - LMT 1883 Nov 18 12:14:37 # Daviess, Dubois, Knox, and Martin Counties, Indiana, # switched from eastern to central time in April 2006, then switched back # in November 2007. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Vincennes 1946 only - Apr lastSun 2:00 1:00 D Rule Vincennes 1946 only - Sep lastSun 2:00 0 S Rule Vincennes 1953 1954 - Apr lastSun 2:00 1:00 D @@ -974,7 +974,7 @@ Zone America/Indiana/Vincennes -5:50:07 - LMT 1883 Nov 18 12:09:53 # The Indianapolis News, Friday 27 October 1967 states that Perry County # returned to CST. It went again to EST on 27 April 1969, as documented by the # Indianapolis star of Saturday 26 April. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Perry 1955 only - May 1 0:00 1:00 D Rule Perry 1955 1960 - Sep lastSun 2:00 0 S Rule Perry 1956 1963 - Apr lastSun 2:00 1:00 D @@ -991,7 +991,7 @@ Zone America/Indiana/Tell_City -5:47:03 - LMT 1883 Nov 18 12:12:57 # # Pike County, Indiana moved from central to eastern time in 1977, # then switched back in 2006, then switched back again in 2007. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Pike 1955 only - May 1 0:00 1:00 D Rule Pike 1955 1960 - Sep lastSun 2:00 0 S Rule Pike 1956 1964 - Apr lastSun 2:00 1:00 D @@ -1012,7 +1012,7 @@ Zone America/Indiana/Petersburg -5:49:07 - LMT 1883 Nov 18 12:10:53 # An article on page A3 of the Sunday, 1991-10-27 Washington Post # notes that Starke County switched from Central time to Eastern time as of # 1991-10-27. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Starke 1947 1961 - Apr lastSun 2:00 1:00 D Rule Starke 1947 1954 - Sep lastSun 2:00 0 S Rule Starke 1955 1956 - Oct lastSun 2:00 0 S @@ -1029,7 +1029,7 @@ Zone America/Indiana/Knox -5:46:30 - LMT 1883 Nov 18 12:13:30 # # Pulaski County, Indiana, switched from eastern to central time in # April 2006 and then switched back in March 2007. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Pulaski 1946 1960 - Apr lastSun 2:00 1:00 D Rule Pulaski 1946 1954 - Sep lastSun 2:00 0 S Rule Pulaski 1955 1956 - Oct lastSun 2:00 0 S @@ -1071,7 +1071,7 @@ Zone America/Indiana/Vevay -5:40:16 - LMT 1883 Nov 18 12:19:44 # # Part of Kentucky left its clocks alone in 1974. # This also includes Clark, Floyd, and Harrison counties in Indiana. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Louisville 1921 only - May 1 2:00 1:00 D Rule Louisville 1921 only - Sep 1 2:00 0 S Rule Louisville 1941 only - Apr lastSun 2:00 1:00 D @@ -1185,7 +1185,7 @@ Zone America/Kentucky/Monticello -5:39:24 - LMT 1883 Nov 18 12:20:36 # election Michigan voters narrowly repealed DST, effective 1969. # # Most of Michigan observed DST from 1973 on, but was a bit late in 1975. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Detroit 1948 only - Apr lastSun 2:00 1:00 D Rule Detroit 1948 only - Sep lastSun 2:00 0 S # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1202,7 +1202,7 @@ Zone America/Detroit -5:32:11 - LMT 1905 # # Dickinson, Gogebic, Iron, and Menominee Counties, Michigan, # switched from EST to CST/CDT in 1973. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Menominee 1946 only - Apr lastSun 2:00 1:00 D Rule Menominee 1946 only - Sep lastSun 2:00 0 S Rule Menominee 1966 only - Apr lastSun 2:00 1:00 D @@ -1372,7 +1372,7 @@ Zone America/Menominee -5:50:27 - LMT 1885 Sep 18 12:00 # Oct 31, to Oct 27, 1918 (and Sunday is a more likely transition day # than Thursday) in all Canadian rulesets. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Canada 1918 only - Apr 14 2:00 1:00 D Rule Canada 1918 only - Oct 27 2:00 0 S Rule Canada 1942 only - Feb 9 2:00 1:00 W # War @@ -1395,7 +1395,7 @@ Rule Canada 2007 max - Nov Sun>=1 2:00 0 S # that follows the rules is the southeast corner, including Port Hope # Simpson and Mary's Harbour, but excluding, say, Black Tickle. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule StJohns 1917 only - Apr 8 2:00 1:00 D Rule StJohns 1917 only - Sep 17 2:00 0 S # Whitman gives 1919 Apr 5 and 1920 Apr 5; go with Shanks & Pottenger. @@ -1497,7 +1497,7 @@ Zone America/Goose_Bay -4:01:40 - LMT 1884 # Happy Valley-Goose Bay # bill say that it is "accommodating the customs and practices" of those # regions, which suggests that they have always been in-line with Halifax. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Halifax 1916 only - Apr 1 0:00 1:00 D Rule Halifax 1916 only - Oct 1 0:00 0 S Rule Halifax 1920 only - May 9 0:00 1:00 D @@ -1563,7 +1563,7 @@ Zone America/Glace_Bay -3:59:48 - LMT 1902 Jun 15 # clear that this was the case since at least 1993. # For now, assume it started in 1993. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Moncton 1933 1935 - Jun Sun>=8 1:00 1:00 D Rule Moncton 1933 1935 - Sep Sun>=8 1:00 0 S Rule Moncton 1936 1938 - Jun Sun>=1 1:00 1:00 D @@ -1772,7 +1772,7 @@ Zone America/Blanc-Sablon -3:48:28 - LMT 1884 # With some exceptions, the use of daylight saving may be said to be limited # to those cities and towns lying between Quebec city and Windsor, Ont. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Toronto 1919 only - Mar 30 23:30 1:00 D Rule Toronto 1919 only - Oct 26 0:00 0 S Rule Toronto 1920 only - May 2 2:00 1:00 D @@ -1870,7 +1870,7 @@ Zone America/Atikokan -6:06:28 - LMT 1895 # starting 1966. Since 02:00s is clearly correct for 1967 on, assume # it was also 02:00s in 1966. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Winn 1916 only - Apr 23 0:00 1:00 D Rule Winn 1916 only - Sep 17 0:00 0 S Rule Winn 1918 only - Apr 14 2:00 1:00 D @@ -1961,7 +1961,7 @@ Zone America/Winnipeg -6:28:36 - LMT 1887 Jul 16 # long and rather painful to read. # http://www.qp.gov.sk.ca/documents/English/Statutes/Statutes/T14.pdf -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Regina 1918 only - Apr 14 2:00 1:00 D Rule Regina 1918 only - Oct 27 2:00 0 S Rule Regina 1930 1934 - May Sun>=1 0:00 1:00 D @@ -2011,7 +2011,7 @@ Zone America/Swift_Current -7:11:20 - LMT 1905 Sep # Boyer JP. Forcing Choice: The Risky Reward of Referendums. Dundum. 2017. # ISBN 978-1459739123. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Edm 1918 1919 - Apr Sun>=8 2:00 1:00 D Rule Edm 1918 only - Oct 27 2:00 0 S Rule Edm 1919 only - May 27 2:00 0 S @@ -2120,7 +2120,7 @@ Zone America/Edmonton -7:33:52 - LMT 1906 Sep # https://searcharchives.vancouver.ca/daylight-saving-1918-starts-again-july-7-1941-start-d-s-sept-27-end-of-d-s-1941 # We have no further details, so omit them for now. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Vanc 1918 only - Apr 14 2:00 1:00 D Rule Vanc 1918 only - Oct 27 2:00 0 S Rule Vanc 1942 only - Feb 9 2:00 1:00 W # War @@ -2234,7 +2234,7 @@ Zone America/Creston -7:46:04 - LMT 1884 # to say eight hours behind Greenwich Time. # # * O.I.C. 1980/02 INTERPRETATION ACT -# [no online source found] +# https://mm.icann.org/pipermail/tz/attachments/20201125/d5adc93b/CAYTOIC1980-02DST1980-01-04-0001.pdf # # * Yukon Daylight Saving Time, YOIC 1987/56 # https://www.canlii.org/en/yk/laws/regu/yoic-1987-56/latest/yoic-1987-56.html @@ -2449,7 +2449,19 @@ Zone America/Creston -7:46:04 - LMT 1884 # consistency with nearby Dawson Creek, Creston, and Fort Nelson. # https://yukon.ca/en/news/yukon-end-seasonal-time-change -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# From Andrew G. Smith (2020-09-24): +# Yukon has completed its regulatory change to be on UTC -7 year-round.... +# http://www.gov.yk.ca/legislation/regs/oic2020_125.pdf +# What we have done is re-defined Yukon Standard Time, as we are +# authorized to do under section 33 of our Interpretation Act: +# http://www.gov.yk.ca/legislation/acts/interpretation_c.pdf +# +# From Paul Eggert (2020-09-24): +# tzdb uses the obsolete YST abbreviation for standard time in Yukon through +# about 1970, and uses PST for standard time in Yukon since then. Consistent +# with that, use MST for -07, the new standard time in Yukon effective Nov. 1. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NT_YK 1918 only - Apr 14 2:00 1:00 D Rule NT_YK 1918 only - Oct 27 2:00 0 S Rule NT_YK 1919 only - May 25 2:00 1:00 D @@ -2503,12 +2515,12 @@ Zone America/Inuvik 0 - -00 1953 # Inuvik founded Zone America/Whitehorse -9:00:12 - LMT 1900 Aug 20 -9:00 NT_YK Y%sT 1967 May 28 0:00 -8:00 NT_YK P%sT 1980 - -8:00 Canada P%sT 2020 Mar 8 2:00 + -8:00 Canada P%sT 2020 Nov 1 -7:00 - MST Zone America/Dawson -9:17:40 - LMT 1900 Aug 20 -9:00 NT_YK Y%sT 1973 Oct 28 0:00 -8:00 NT_YK P%sT 1980 - -8:00 Canada P%sT 2020 Mar 8 2:00 + -8:00 Canada P%sT 2020 Nov 1 -7:00 - MST @@ -2723,7 +2735,7 @@ Zone America/Dawson -9:17:40 - LMT 1900 Aug 20 # 5- The islands, reefs and keys shall take their timezone from the # longitude they are located at. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Mexico 1939 only - Feb 5 0:00 1:00 D Rule Mexico 1939 only - Jun 25 0:00 0 S Rule Mexico 1940 only - Dec 9 0:00 1:00 D @@ -2923,12 +2935,38 @@ Zone America/Tijuana -7:48:04 - LMT 1922 Jan 1 0:11:56 # # For 1899 Milne gives -5:09:29.5; round that. # +# From P Chan (2020-11-27, corrected on 2020-12-02): +# There were two periods of DST observed in 1942-1945: 1942-05-01 +# midnight to 1944-12-31 midnight and 1945-02-01 to 1945-10-17 midnight. +# "midnight" should mean 24:00 from the context. +# +# War Time Order 1942 [1942-05-01] and War Time (No. 2) Order 1942 [1942-09-29] +# Appendix to the Statutes of 7 George VI. and the Year 1942. p 34, 43 +# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA3-PA34 +# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA3-PA43 +# +# War Time Order 1943 [1943-03-31] and War Time Order 1944 [1943-12-29] +# Appendix to the Statutes of 8 George VI. and the Year 1943. p 9-10, 28-29 +# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA4-PA9 +# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA4-PA28 +# +# War Time Order 1945 [1945-01-31] and the Order which revoke War Time Order +# 1945 [1945-10-16] Appendix to the Statutes of 9 George VI. and the Year +# 1945. p 160, 247-248 +# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA6-PA160 +# https://books.google.com/books?id=5rlNAQAAIAAJ&pg=RA6-PA247 +# # From Sue Williams (2006-12-07): # The Bahamas announced about a month ago that they plan to change their DST # rules to sync with the U.S. starting in 2007.... # http://www.jonesbahamas.com/?c=45&a=10412 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Bahamas 1942 only - May 1 24:00 1:00 W +Rule Bahamas 1944 only - Dec 31 24:00 0 S +Rule Bahamas 1945 only - Feb 1 0:00 1:00 W +Rule Bahamas 1945 only - Aug 14 23:00u 1:00 P # Peace +Rule Bahamas 1945 only - Oct 17 24:00 0 S Rule Bahamas 1964 1975 - Oct lastSun 2:00 0 S Rule Bahamas 1964 1975 - Apr lastSun 2:00 1:00 D # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -2940,7 +2978,7 @@ Zone America/Nassau -5:09:30 - LMT 1912 Mar 2 # For 1899 Milne gives -3:58:29.2; round that. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Barb 1977 only - Jun 12 2:00 1:00 D Rule Barb 1977 1978 - Oct Sun>=1 2:00 0 S Rule Barb 1978 1980 - Apr Sun>=15 2:00 1:00 D @@ -2952,34 +2990,161 @@ Zone America/Barbados -3:58:29 - LMT 1924 # Bridgetown -4:00 Barb A%sT # Belize -# Whitman entirely disagrees with Shanks; go with Shanks & Pottenger. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Belize 1918 1942 - Oct Sun>=2 0:00 0:30 -0530 -Rule Belize 1919 1943 - Feb Sun>=9 0:00 0 CST + +# From P Chan (2020-11-03): +# Below are some laws related to the time in British Honduras/Belize: +# +# Definition of Time Ordinance, 1927 (No.4 of 1927) [1927-04-01] +# Ordinances of British Honduras Passed in the Year 1927, p 19-20 +# https://books.google.com/books?id=LqEpAQAAMAAJ&pg=RA3-PA19 +# +# Definition of Time (Amendment) Ordinance, 1942 (No. 5 of 1942) [1942-06-27] +# Ordinances of British Honduras Passed in the Year 1942, p 31-32 +# https://books.google.com/books?id=h6MpAQAAMAAJ&pg=RA6-PA95-IA44 +# +# Definition of Time Ordinance, 1945 (No. 19 of 1945) [1945-12-15] +# Ordinances of British Honduras Passed in the Year 1945, p 49-50 +# https://books.google.com/books?id=xaMpAQAAMAAJ&pg=RA2-PP1 +# +# Definition of Time Ordinance, 1947 (No. 1 of 1947) [1947-03-11] +# Ordinances of British Honduras Passed in the Year 1947, p 1-2 +# https://books.google.com/books?id=xaMpAQAAMAAJ&pg=RA3-PA1 +# +# Time (Definition of) Ordinance (Chapter 180) +# The Laws of British Honduras in Force on the 15th Day of September, 1958 , Volume IV, p 2580 +# https://books.google.com/books?id=v5QpAQAAMAAJ&pg=PA2580 +# +# Time (Definition of) (Amendment) Ordinance, 1968 (No. 13 of 1968) [1968-08-03] +# https://books.google.com/books?id=xij7KEB_58wC&pg=RA1-PA428-IA9 +# +# Definition of Time Act (Chapter 339) +# Law of Belize, Revised Edition 2000 +# http://www.belizelaw.org/web/lawadmin/PDF%20files/cap339.pdf + +# From Paul Eggert (2020-11-03): +# The transitions below are derived from P Chan's sources, except that the +# 1973 through 1983 transitions are from Shanks & Pottenger since we have +# no better data there. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Belize 1918 1941 - Oct Sat>=1 24:00 0:30 -0530 +Rule Belize 1919 1942 - Feb Sat>=8 24:00 0 CST +Rule Belize 1942 only - Jun 27 24:00 1:00 CWT +Rule Belize 1945 only - Aug 14 23:00u 1:00 CPT +Rule Belize 1945 only - Dec 15 24:00 0 CST +Rule Belize 1947 1967 - Oct Sat>=1 24:00 0:30 -0530 +Rule Belize 1948 1968 - Feb Sat>=8 24:00 0 CST Rule Belize 1973 only - Dec 5 0:00 1:00 CDT Rule Belize 1974 only - Feb 9 0:00 0 CST Rule Belize 1982 only - Dec 18 0:00 1:00 CDT Rule Belize 1983 only - Feb 12 0:00 0 CST # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone America/Belize -5:52:48 - LMT 1912 Apr +Zone America/Belize -5:52:48 - LMT 1912 Apr 1 -6:00 Belize %s # Bermuda +# From Paul Eggert (2020-11-24): # For 1899 Milne gives -4:19:18.3 as the meridian of the clock tower, -# Bermuda dockyard, Ireland I; round that. +# Bermuda dockyard, Ireland I. This agrees with standard offset given in the +# Daylight Saving Act, 1917 cited below. Round that to the nearest second. +# It is not known when this time became standard for Bermuda; guess 1890. +# The transition to -04 was specified by: +# 1930: The Time Zone Act, 1929 (1929: No. 39) [1929-11-08] +# https://books.google.com/books?id=7tdMAQAAIAAJ&pg=RA54-PP1 + +# From P Chan (2020-11-20): +# Most of the information can be found online from the Bermuda National +# Library - Digital Collection which includes The Royal Gazette (RG) until 1957 +# https://bnl.contentdm.oclc.org/digital/ +# I will cite the ID. For example, [10000] means +# https://bnl.contentdm.oclc.org/digital/collection/BermudaNP02/id/10000 +# +# 1917: Apr 5 midnight to Sep 30 midnight +# Daylight Saving Act, 1917 (1917 No. 13) [1917-04-02] +# Bermuda Acts and Resolves 1917, p 37-38 +# https://books.google.com/books?id=M-lCAQAAMAAJ&pg=PA36-IA2 +# RG, 1917-04-04, p 6 [42340] gives the spring forward date. +# +# 1918: Apr 13 midnight to Sep 15 midnight +# Daylight Saving Act, 1918 (1918 No. 9) [1918-04-06] +# Bermuda Acts and Resolves 1917, p 13 +# https://books.google.com/books?id=K-lCAQAAMAAJ&pg=RA1-PA7 +# +# Note that local mean time was still used before 1930. +# +# During WWII, DST was introduced by Defence Regulations +# 1942: Jan 11 02:00 to Oct 18 02:00 [113646], [115726] +# 1943: Mar 21 02:00 to Oct 31 02:00 [116704], [118193] +# 1944: Mar 12 02:00 to Nov 5 02:00 [119225], [121593] +# 1945: Mar 11 02:00 to Nov 4 02:00 [122369], [124461] +# RG, 1942-01-08, p 2, 1942-10-12, p 2 , 1943-03-06, p 2, 1943-09-03, p 1, +# 1944-02-29, p 6, 1944-09-20, p 2, 1945-02-13, p 2, 1945-11-03, p 1 +# +# In 1946, the House of Assembly rejected DST twice. [128686], [128076] +# RG, 1946-03-16 p 1,1946-04-13 p 1 +# +# 1947: third Sunday in May 02:00 to second Sunday in September 02:00 +# DST in 1947 was defined in the Daylight Saving Act, 1947 (1947: No. 12) +# which expired at the end of the year. [125784] ,[132405], [144454], [138226] +# RG, 1947-02-27, p 1, 1947-05-15, p 1, 1947-09-13, p 1, 1947-12-30, p 1 +# +# 1948-1952: fourth Sunday in May 02:00 to first Sunday in September 02:00 +# DST in 1948 was defined in the Daylight Saving Act, 1948 (1948 : No. 12) +# which was set to expired at the end of the year but it was extended until +# the end of 1952 and was not further extended. +# [129802], [139403], [146008], [135240], [144330], [139049], [143309], +# [148271], [149773], [153589], [153802], [155924] +# RG, 1948-04-13, p 1, 1948-05-22, p 1, 1948-09-04, p 1, 1949-05-21, p1, +# 1949-09-03, p 1, 1950-05-27 p 1, 1950-09-02, p 1, 1951-05-27, p 1, +# 1951-09-01, p 1, 1952-05-23, p 1, 1952-09-26, p 1, 1952-12-21, p 8 +# +# In 1953-1955, the House of Assembly rejected DST each year. [158996], +# [162620], [166720] RG, 1953-05-02, p 1, 1954-04-01 p 1, 1955-03-12, p 1 +# +# 1956: fourth Sunday in May 02:00 to last Sunday in October 02:00 +# Time Zone (Seasonal Variation) Act, 1956 (1956: No.44) [1956-05-25] +# Bermuda Public Acts 1956, p 331-332 +# https://books.google.com/books?id=Xs1AlmD_cEwC&pg=PA63 +# +# The extension of the Act was rejected by the House of Assembly. [176218] +# RG, 1956-12-13, p 1 +# +# From the Chronological Table of Public and Private Acts up to 1985, it seems +# that there does not exist other Acts related to DST before 1973. +# https://books.google.com/books?id=r9hMAQAAIAAJ&pg=RA23-PA1 +# Public Acts of the Legislature of the Islands of Bermuda, Together with +# Statutory Instruments in Force Thereunder, Vol VII # From Dan Jones, reporting in The Royal Gazette (2006-06-26): - # Next year, however, clocks in the US will go forward on the second Sunday # in March, until the first Sunday in November. And, after the Time Zone # (Seasonal Variation) Bill 2006 was passed in the House of Assembly on # Friday, the same thing will happen in Bermuda. # http://www.theroyalgazette.com/apps/pbcs.dll/article?AID=/20060529/NEWS/105290135 +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +Rule Bermuda 1917 only - Apr 5 24:00 1:00 - +Rule Bermuda 1917 only - Sep 30 24:00 0 - +Rule Bermuda 1918 only - Apr 13 24:00 1:00 - +Rule Bermuda 1918 only - Sep 15 24:00 0 S +Rule Bermuda 1942 only - Jan 11 2:00 1:00 D +Rule Bermuda 1942 only - Oct 18 2:00 0 S +Rule Bermuda 1943 only - Mar 21 2:00 1:00 D +Rule Bermuda 1943 only - Oct 31 2:00 0 S +Rule Bermuda 1944 1945 - Mar Sun>=8 2:00 1:00 D +Rule Bermuda 1944 1945 - Nov Sun>=1 2:00 0 S +Rule Bermuda 1947 only - May Sun>=15 2:00 1:00 D +Rule Bermuda 1947 only - Sep Sun>=8 2:00 0 S +Rule Bermuda 1948 1952 - May Sun>=22 2:00 1:00 D +Rule Bermuda 1948 1952 - Sep Sun>=1 2:00 0 S +Rule Bermuda 1956 only - May Sun>=22 2:00 1:00 D +Rule Bermuda 1956 only - Oct lastSun 2:00 0 S + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Atlantic/Bermuda -4:19:18 - LMT 1930 Jan 1 2:00 # Hamilton - -4:00 - AST 1974 Apr 28 2:00 +Zone Atlantic/Bermuda -4:19:18 - LMT 1890 # Hamilton + -4:19:18 Bermuda BMT/BST 1930 Jan 1 2:00 + -4:00 Bermuda A%sT 1974 Apr 28 2:00 -4:00 Canada A%sT 1976 -4:00 US A%sT @@ -2990,7 +3155,7 @@ Zone Atlantic/Bermuda -4:19:18 - LMT 1930 Jan 1 2:00 # Hamilton # Milne gives -5:36:13.3 as San José mean time; round to nearest. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule CR 1979 1980 - Feb lastSun 0:00 1:00 D Rule CR 1979 1980 - Jun Sun>=1 0:00 0 S Rule CR 1991 1992 - Jan Sat>=15 0:00 1:00 D @@ -3164,7 +3329,7 @@ Zone America/Costa_Rica -5:36:13 - LMT 1890 # San José # From Paul Eggert (2012-11-03): # For now, assume the future rule is first Sunday in November. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cuba 1928 only - Jun 10 0:00 1:00 D Rule Cuba 1928 only - Oct 10 0:00 0 S Rule Cuba 1940 1942 - Jun Sun>=1 0:00 1:00 D @@ -3233,7 +3398,7 @@ Zone America/Havana -5:29:28 - LMT 1890 # decided to revert. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule DR 1966 only - Oct 30 0:00 1:00 EDT Rule DR 1967 only - Feb 28 0:00 0 EST Rule DR 1969 1973 - Oct lastSun 0:00 0:30 -0430 @@ -3250,7 +3415,7 @@ Zone America/Santo_Domingo -4:39:36 - LMT 1890 # El Salvador -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Salv 1987 1988 - May Sun>=1 0:00 1:00 D Rule Salv 1987 1988 - Sep lastSun 0:00 0 S # There are too many San Salvadors elsewhere, so use America/El_Salvador @@ -3279,7 +3444,7 @@ Zone America/El_Salvador -5:56:48 - LMT 1921 # San Salvador # (2006-04-19), says DST ends at 24:00. See # http://www.sieca.org.gt/Sitio_publico/Energeticos/Doc/Medidas/Cambio_Horario_Nac_190406.pdf -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Guat 1973 only - Nov 25 0:00 1:00 D Rule Guat 1974 only - Feb 24 0:00 0 S Rule Guat 1983 only - May 21 0:00 1:00 D @@ -3360,7 +3525,7 @@ Zone America/Guatemala -6:02:04 - LMT 1918 Oct 5 # I have not been able to find a more authoritative source: # https://www.haitilibre.com/en/news-20319-haiti-notices-time-change-in-haiti.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Haiti 1983 only - May 8 0:00 1:00 D Rule Haiti 1984 1987 - Apr lastSun 0:00 1:00 D Rule Haiti 1983 1987 - Oct lastSun 0:00 0 S @@ -3408,7 +3573,7 @@ Zone America/Port-au-Prince -4:49:20 - LMT 1890 # http://www.laprensahn.com/pais_nota.php?id04962=7386 # So it seems that Honduras will not enter DST this year.... -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Hond 1987 1988 - May Sun>=1 0:00 1:00 D Rule Hond 1987 1988 - Sep lastSun 0:00 0 S Rule Hond 2006 only - May Sun>=1 0:00 1:00 D @@ -3499,7 +3664,7 @@ Zone America/Martinique -4:04:20 - LMT 1890 # Fort-de-France # The natural sun time is restored in all the national territory, in that the # time is returned one hour at 01:00 am of October 1 of 2006. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Nic 1979 1980 - Mar Sun>=16 0:00 1:00 D Rule Nic 1979 1980 - Jun Mon>=23 0:00 0 S Rule Nic 2005 only - Apr 10 0:00 1:00 D @@ -3562,7 +3727,7 @@ Zone America/Miquelon -3:44:40 - LMT 1911 May 15 # St Pierre # "Eastern Standard Times Begins 2007 # Clocks are set back one hour at 2:00 a.m. local Daylight Saving Time" # indicating that the normal ET rules are followed. -# + # From Paul Eggert (2014-08-19): # The 2014-08-13 Cabinet meeting decided to stay on UT -04 year-round. See: # http://tcweeklynews.com/daylight-savings-time-to-be-maintained-p5353-127.htm @@ -3577,19 +3742,42 @@ Zone America/Miquelon -3:44:40 - LMT 1911 May 15 # St Pierre # during the summer months and Standard Time, also known as Local # Time, during the winter months with effect from April 2018 ... # https://www.gov.uk/government/news/turks-and-caicos-post-cabinet-meeting-statement--3 -# # From Paul Eggert (2017-08-26): # The date of effect of the spring 2018 change appears to be March 11, # which makes more sense. See: Hamilton D. Time change back # by March 2018 for TCI. Magnetic Media. 2017-08-25. # http://magneticmediatv.com/2017/08/time-change-back-by-march-2018-for-tci/ # +# From P Chan (2020-11-27): +# Standard Time Declaration Order 2015 (L.N. 15/2015) +# http://online.fliphtml5.com/fizd/czin/#p=2 +# +# Standard Time Declaration Order 2017 (L.N. 31/2017) +# http://online.fliphtml5.com/fizd/dmcu/#p=2 +# +# From Tim Parenti (2020-12-05): +# Although L.N. 31/2017 reads that it "shall come into operation at 2:00 a.m. +# on 11th March 2018", a precise interpretation here poses some problems. The +# order states that "the standard time to be observed throughout the Turks and +# Caicos Islands shall be the same time zone as the Eastern United States of +# America" and further clarifies "[f]or the avoidance of doubt" that it +# "applies to the Eastern Standard Time as well as any changes thereto for +# Daylight Saving Time." However, as clocks in Turks and Caicos approached +# 02:00 -04, and thus the declared implementation time, it was still 01:00 EST +# (-05), as DST in the Eastern US would not start until an hour later. +# +# Since it is unlikely that those on the islands switched their clocks twice in +# the span of an hour, we assume instead that the adoption of EDT actually took +# effect once clocks in the Eastern US had sprung forward, from 03:00 -04. +# This discrepancy only affects the time zone abbreviation and DST flag for the +# intervening hour, not wall clock times, as -04 was maintained throughout. + # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone America/Grand_Turk -4:44:32 - LMT 1890 -5:07:10 - KMT 1912 Feb # Kingston Mean Time -5:00 - EST 1979 - -5:00 US E%sT 2015 Nov Sun>=1 2:00 - -4:00 - AST 2018 Mar 11 3:00 + -5:00 US E%sT 2015 Mar 8 2:00 + -4:00 - AST 2018 Mar 11 3:00 -5:00 US E%sT # British Virgin Is diff --git a/usr/src/data/zoneinfo/southamerica b/usr/src/data/zoneinfo/southamerica index 9b679c6143..aad8b2dbd1 100644 --- a/usr/src/data/zoneinfo/southamerica +++ b/usr/src/data/zoneinfo/southamerica @@ -48,7 +48,7 @@ # I am sending modifications to the Argentine time zone table... # AR was chosen because they are the ISO letters that represent Argentina. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Arg 1930 only - Dec 1 0:00 1:00 - Rule Arg 1931 only - Apr 1 0:00 0 - Rule Arg 1931 only - Oct 15 0:00 1:00 - @@ -769,7 +769,7 @@ Zone America/La_Paz -4:32:36 - LMT 1890 # From Paul Eggert (2013-10-17): # For now, assume western Amazonas will change as well. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Decree 20,466 <http://pcdsh01.on.br/HV20466.htm> (1931-10-01) # Decree 21,896 <http://pcdsh01.on.br/HV21896.htm> (1932-01-10) Rule Brazil 1931 only - Oct 3 11:00 1:00 - @@ -1258,7 +1258,7 @@ Zone America/Rio_Branco -4:31:12 - LMT 1914 # For now, assume that they will not revert, # since they have extended the expiration date once already. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Chile 1927 1931 - Sep 1 0:00 1:00 - Rule Chile 1928 1932 - Apr 1 0:00 0 - Rule Chile 1968 only - Nov 3 4:00u 1:00 - @@ -1358,7 +1358,7 @@ Zone Antarctica/Palmer 0 - -00 1965 # Milne gives 4:56:16.4 for Bogotá time in 1899; round to nearest. He writes, # "A variation of fifteen minutes in the public clocks of Bogota is not rare." -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule CO 1992 only - May 3 0:00 1:00 - Rule CO 1993 only - Apr 4 0:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1418,7 +1418,7 @@ Link America/Curacao America/Kralendijk # Caribbean Netherlands # (Not one step back), the clocks went back in 1993 and the experiment was not # repeated. For now, assume transitions were at 00:00 local time country-wide. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Ecuador 1992 only - Nov 28 0:00 1:00 - Rule Ecuador 1993 only - Feb 5 0:00 0 - # @@ -1512,7 +1512,7 @@ Zone Pacific/Galapagos -5:58:24 - LMT 1931 # Puerto Baquerizo Moreno # For now we will assume permanent -03 for the Falklands # until advised differently (to apply for 2012 and beyond, after the 2011 # experiment was apparently successful.) -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Falk 1937 1938 - Sep lastSun 0:00 1:00 - Rule Falk 1938 1942 - Mar Sun>=19 0:00 0 - Rule Falk 1939 only - Oct 1 0:00 1:00 - @@ -1558,7 +1558,7 @@ Zone America/Guyana -3:52:40 - LMT 1915 Mar # Georgetown # No time of the day is established for the adjustment, so people normally # adjust their clocks at 0 hour of the given dates. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Para 1975 1988 - Oct 1 0:00 1:00 - Rule Para 1975 1978 - Mar 1 0:00 0 - Rule Para 1979 1991 - Apr 1 0:00 0 - @@ -1651,7 +1651,7 @@ Zone America/Asuncion -3:50:40 - LMT 1890 # From Paul Eggert (2006-03-22): # Shanks & Pottenger don't have this transition. Assume 1986 was like 1987. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Peru 1938 only - Jan 1 0:00 1:00 - Rule Peru 1938 only - Apr 1 0:00 0 - Rule Peru 1938 1939 - Sep lastSun 0:00 1:00 - @@ -1747,7 +1747,7 @@ Link America/Port_of_Spain America/Tortola # Virgin Islands (UK) # https://www.impo.com.uy/diariooficial/1926/03/10/2 # https://www.impo.com.uy/diariooficial/1926/03/18/2 # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Uruguay 1923 1925 - Oct 1 0:00 0:30 - Rule Uruguay 1924 1926 - Apr 1 0:00 0 - # From Tim Parenti (2018-02-15): diff --git a/usr/src/data/zoneinfo/zone.tab.txt b/usr/src/data/zoneinfo/zone.tab.txt index 8d056e3748..1f0128f3b2 100644 --- a/usr/src/data/zoneinfo/zone.tab.txt +++ b/usr/src/data/zoneinfo/zone.tab.txt @@ -56,8 +56,7 @@ AS -1416-17042 Pacific/Pago_Pago AT +4813+01620 Europe/Vienna AU -3133+15905 Australia/Lord_Howe Lord Howe Island AU -5430+15857 Antarctica/Macquarie Macquarie Island -AU -4253+14719 Australia/Hobart Tasmania (most areas) -AU -3956+14352 Australia/Currie Tasmania (King Island) +AU -4253+14719 Australia/Hobart Tasmania AU -3749+14458 Australia/Melbourne Victoria AU -3352+15113 Australia/Sydney New South Wales (most areas) AU -3157+14127 Australia/Broken_Hill New South Wales (Yancowinna) @@ -130,9 +129,9 @@ CA +682059-1334300 America/Inuvik Mountain - NT (west) CA +4906-11631 America/Creston MST - BC (Creston) CA +5946-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John) CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson) +CA +6043-13503 America/Whitehorse MST - Yukon (east) +CA +6404-13925 America/Dawson MST - Yukon (west) CA +4916-12307 America/Vancouver Pacific - BC (most areas) -CA +6043-13503 America/Whitehorse Pacific - Yukon (east) -CA +6404-13925 America/Dawson Pacific - Yukon (west) CC -1210+09655 Indian/Cocos CD -0418+01518 Africa/Kinshasa Dem. Rep. of Congo (west) CD -1140+02728 Africa/Lubumbashi Dem. Rep. of Congo (east) @@ -337,8 +336,8 @@ RU +554521+0373704 Europe/Moscow MSK+00 - Moscow area # Programs should use zone1970.tab instead; see above. UA +4457+03406 Europe/Simferopol Crimea RU +5836+04939 Europe/Kirov MSK+00 - Kirov +RU +4844+04425 Europe/Volgograd MSK+00 - Volgograd RU +4621+04803 Europe/Astrakhan MSK+01 - Astrakhan -RU +4844+04425 Europe/Volgograd MSK+01 - Volgograd RU +5134+04602 Europe/Saratov MSK+01 - Saratov RU +5420+04824 Europe/Ulyanovsk MSK+01 - Ulyanovsk RU +5312+05009 Europe/Samara MSK+01 - Samara, Udmurtia diff --git a/usr/src/data/zoneinfo/zone_sun.tab b/usr/src/data/zoneinfo/zone_sun.tab index a82bb5fc93..35158d737f 100644 --- a/usr/src/data/zoneinfo/zone_sun.tab +++ b/usr/src/data/zoneinfo/zone_sun.tab @@ -79,8 +79,7 @@ AS -1416-17042 Pacific/Pago_Pago US/Samoa AT +4813+01620 Europe/Vienna - AU -3133+15905 Australia/Lord_Howe Australia/LHI Lord Howe Island AU -5430+15857 Antarctica/Macquarie - Macquarie Island -AU -4253+14719 Australia/Hobart - Tasmania (most areas) -AU -3956+14352 Australia/Currie - Tasmania (King Island) +AU -4253+14719 Australia/Hobart - Tasmania AU -3749+14458 Australia/Melbourne Australia/Victoria Victoria AU -3352+15113 Australia/Sydney - New South Wales (most areas) AU -3157+14127 Australia/Broken_Hill - New South Wales (Yancowinna) @@ -153,9 +152,9 @@ CA +682059-1334300 America/Inuvik - Mountain - NT (west) CA +4906-11631 America/Creston - MST - BC (Creston) CA +5946-12014 America/Dawson_Creek - MST - BC (Dawson Cr, Ft St John) CA +5848-12242 America/Fort_Nelson - MST - BC (Ft Nelson) +CA +6043-13503 America/Whitehorse - MST - Yukon (east) +CA +6404-13925 America/Dawson - MST - Yukon (west) CA +4916-12307 America/Vancouver - Pacific - BC (most areas) -CA +6043-13503 America/Whitehorse - Pacific - Yukon (east) -CA +6404-13925 America/Dawson - Pacific - Yukon (west) CC -1210+09655 Indian/Cocos - CD -0418+01518 Africa/Kinshasa - Dem. Rep. of Congo (west) CD -1140+02728 Africa/Lubumbashi - Dem. Rep. of Congo (east) @@ -356,8 +355,8 @@ RS +4450+02030 Europe/Belgrade - RU +5443+02030 Europe/Kaliningrad - MSK-01 - Kaliningrad RU +554521+0373704 Europe/Moscow - MSK+00 - Moscow area RU +5836+04939 Europe/Kirov - MSK+00 - Kirov +RU +4844+04425 Europe/Volgograd - MSK+00 - Volgograd RU +4621+04803 Europe/Astrakhan - MSK+01 - Astrakhan -RU +4844+04425 Europe/Volgograd - MSK+01 - Volgograd RU +5134+04602 Europe/Saratov - MSK+01 - Saratov RU +5420+04824 Europe/Ulyanovsk - MSK+01 - Ulyanovsk RU +5312+05009 Europe/Samara - MSK+01 - Samara, Udmurtia diff --git a/usr/src/lib/libdlpi/Makefile.com b/usr/src/lib/libdlpi/Makefile.com index f51dd0d1b4..aa9452428e 100644 --- a/usr/src/lib/libdlpi/Makefile.com +++ b/usr/src/lib/libdlpi/Makefile.com @@ -39,7 +39,6 @@ LDLIBS += -lc -linetutil -ldladm SRCDIR = ../common CFLAGS += $(CCVERBOSE) -CERRWARN += $(CNOWARN_UNINIT) CPPFLAGS += -I$(SRCDIR) -D_REENTRANT .KEEP_STATE: diff --git a/usr/src/lib/libdlpi/common/libdlpi.c b/usr/src/lib/libdlpi/common/libdlpi.c index d07eea70e7..11d975f694 100644 --- a/usr/src/lib/libdlpi/common/libdlpi.c +++ b/usr/src/lib/libdlpi/common/libdlpi.c @@ -1384,8 +1384,7 @@ i_dlpi_strgetmsg(dlpi_impl_t *dip, int msec, dlpi_msg_t *dlreplyp, data.maxlen = (databuf == NULL) ? sizeof (bufd): *datalenp; for (;;) { - if (!infinite) - start = NSEC2MSEC(gethrtime()); + start = NSEC2MSEC(gethrtime()); switch (poll(&pfd, 1, msec)) { default: diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers index 26cfd15426..be0a055490 100644 --- a/usr/src/lib/libvmmapi/common/mapfile-vers +++ b/usr/src/lib/libvmmapi/common/mapfile-vers @@ -34,7 +34,6 @@ $mapfile_version 2 SYMBOL_VERSION ILLUMOSprivate { global: vcpu_reset; - vm_active_cpus; vm_activate_cpu; vm_active_cpus; vm_apicid2vcpu; @@ -46,19 +45,18 @@ SYMBOL_VERSION ILLUMOSprivate { vm_copy_teardown; vm_copyin; vm_copyout; - vm_create_devmem; vm_create; vm_create_devmem; vm_debug_cpus; vm_destroy; - vm_destroy; + vm_disable_pptdev_msix; vm_get_capability; vm_get_desc; vm_get_device_fd; vm_get_devmem_offset; vm_get_gpa_pmap; - vm_get_hpet_capabilities; vm_get_highmem_size; + vm_get_hpet_capabilities; vm_get_intinfo; vm_get_lowmem_limit; vm_get_lowmem_size; @@ -77,10 +75,6 @@ SYMBOL_VERSION ILLUMOSprivate { vm_inject_exception; vm_inject_fault; vm_inject_nmi; - vm_isa_assert_irq; - vm_isa_deassert_irq; - vm_isa_pulse_irq; - vm_isa_set_irq_trigger; vm_ioapic_assert_irq; vm_ioapic_deassert_irq; vm_ioapic_pincount; @@ -98,8 +92,10 @@ SYMBOL_VERSION ILLUMOSprivate { vm_mmap_memseg; vm_open; vm_parse_memsize; + vm_pmtmr_set_location; vm_reinit; vm_restart_instruction; + vm_resume_cpu; vm_rtc_gettime; vm_rtc_read; vm_rtc_settime; @@ -119,9 +115,7 @@ SYMBOL_VERSION ILLUMOSprivate { vm_suspend; vm_suspend_cpu; vm_suspended_cpus; - vm_resume_cpu; vm_unassign_pptdev; - vm_pmtmr_set_location; vm_wrlock_cycle; vm_get_run_state; vm_set_run_state; diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index 0b22ca7522..fcb098a74f 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -1162,7 +1162,22 @@ vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, return (error); } + +int +vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func) +{ + struct vm_pptdev ppt; + + bzero(&ppt, sizeof(ppt)); + ppt.bus = bus; + ppt.slot = slot; + ppt.func = func; + + return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt); +} + #else /* __FreeBSD__ */ + int vm_assign_pptdev(struct vmctx *ctx, int pptfd) { @@ -1238,6 +1253,15 @@ vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, *msix_limit = pptlimits.msix_limit; return (error); } + +int +vm_disable_pptdev_msix(struct vmctx *ctx, int pptfd) +{ + struct vm_pptdev pptdev; + + pptdev.pptfd = pptfd; + return (ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &pptdev)); +} #endif /* __FreeBSD__ */ uint64_t * @@ -1905,7 +1929,8 @@ vm_get_ioctls(size_t *len) VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, - VM_PPTDEV_MSIX, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, + VM_PPTDEV_MSIX, VM_PPTDEV_DISABLE_MSIX, + VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, VM_GLA2GPA_NOFAULT, diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index f7aaa02087..72e43a4e3d 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -225,6 +225,7 @@ int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); +int vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func); int vm_get_pptdev_limits(struct vmctx *ctx, int bus, int slot, int func, int *msi_limit, int *msix_limit); #else /* __FreeBSD__ */ @@ -236,6 +237,7 @@ int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int pptfd, uint64_t addr, uint64_t msg, int numvec); int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int pptfd, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); +int vm_disable_pptdev_msix(struct vmctx *ctx, int pptfd); int vm_get_pptdev_limits(struct vmctx *ctx, int pptfd, int *msi_limit, int *msix_limit); #endif /* __FreeBSD__ */ diff --git a/usr/src/man/man1/prctl.1 b/usr/src/man/man1/prctl.1 index 0e117a510b..5a91760ab2 100644 --- a/usr/src/man/man1/prctl.1 +++ b/usr/src/man/man1/prctl.1 @@ -1,14 +1,14 @@ '\" te .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved +.\" Copyright 2021 OmniOS Community Edition (OmniOSce) Association. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH PRCTL 1 "April 9, 2016" +.TH PRCTL 1 "Jan 23, 2021" .SH NAME prctl \- get or set the resource controls of running processes, tasks, and projects .SH SYNOPSIS -.LP .nf \fBprctl\fR [\fB-P\fR] [\fB-t\fR [basic | privileged | system]] [\fB-n\fR \fIname\fR [\fB-srx\fR] [\fB-v\fR \fIvalue\fR] [\fB-e\fR | \fB-d\fR \fIaction\fR] [\fB-p\fR \fIpid\fR]] @@ -16,7 +16,6 @@ projects .fi .SH DESCRIPTION -.LP The \fBprctl\fR utility allows the examination and modification of the resource controls associated with an active process, task, or project on the system. It allows access to the basic and privileged limits and the current usage on @@ -26,7 +25,6 @@ the specified entity. See \fBresource_controls\fR(5) for a description of the resource controls supported in the current release of the Solaris operating system. .SH OPTIONS -.LP The following options are supported: .sp .ne 2 @@ -236,7 +234,6 @@ If none of the \fB-s\fR, \fB-r\fR, \fB-x\fR, \fB-v\fR, \fB-d\fR, or \fB-e\fR options are specified, the invocation is considered a get operation. Otherwise, it is considered a modify operation. .SH OPERANDS -.LP The following operand is supported: .sp .ne 2 @@ -251,7 +248,6 @@ operation fails. If no \fIid\fR is specified, an error message is returned. .RE .SH EXAMPLES -.LP \fBExample 1 \fRDisplaying Current Resource Control Settings .sp .LP @@ -270,6 +266,9 @@ NAME PRIVILEGE VALUE FLAG ACTION RECIPIENT task.max-cpu-time usage 8s system 18.4Es inf none - +task.max-processes + usage 30 + system 2.15G max deny - task.max-lwps usage 39 system 2.15G max deny - @@ -294,6 +293,9 @@ privileged 508MB - deny - project.max-tasks usage 2 system 2.15G max deny - +project.max-processes + usage 30 + system 2.15G max deny - project.max-lwps usage 39 system 2.15G max deny - @@ -308,6 +310,8 @@ zone.max-sem-ids system 16.8M max deny - zone.max-msg-ids system 16.8M max deny - +zone.max-processes + system 2.15G max deny - zone.max-lwps system 2.15G max deny - zone.cpu-shares @@ -382,7 +386,7 @@ Using the target project, identify the resource limit value before the change: project 10: group.staff project.max-locked-memory privileged 256MB - deny - - system 16.0EB max deny - + system 16.0EB max deny - current limit is 256 Megabytes. .fi @@ -494,7 +498,6 @@ The cap can be lowered to 50% using: .sp .SH EXIT STATUS -.LP The following exit values are returned: .sp .ne 2 @@ -533,7 +536,6 @@ Process information and control files .RE .SH ATTRIBUTES -.LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -552,12 +554,10 @@ Interface Stability See below. The command-line syntax is Committed. The human-readable output is Uncommitted. The parsable output is Committed. .SH SEE ALSO -.LP \fBrctladm\fR(1M), \fBzonecfg\fR(1M), \fBsetrctl\fR(2), \fBrctlblk_get_local_action\fR(3C), \fBproject\fR(4), \fBattributes\fR(5), \fBresource_controls\fR(5) .SH NOTES -.LP The valid signals that can be set on a resource control block allowing local actions are \fBSIGABRT\fR, \fBSIGXRES\fR, \fBSIGHUP\fR, \fBSIGSTOP\fR, \fBSIGTERM\fR, and \fBSIGKILL\fR. Additionally, CPU time related controls can diff --git a/usr/src/man/man1m/zonecfg.1m b/usr/src/man/man1m/zonecfg.1m index bb1b72ac37..83b19005a2 100644 --- a/usr/src/man/man1m/zonecfg.1m +++ b/usr/src/man/man1m/zonecfg.1m @@ -2,14 +2,14 @@ .\" Copyright (c) 2004, 2009 Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2015 Joyent, Inc. .\" Copyright 2017 Peter Tribble +.\" Copyright 2021 OmniOS Community Edition (OmniOSce) Association. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the .\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH ZONECFG 1M "Jul 24, 2017" +.TH ZONECFG 1M "Jan 23, 2021" .SH NAME zonecfg \- set up zone configuration .SH SYNOPSIS -.LP .nf \fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR} .fi @@ -30,7 +30,6 @@ zonecfg \- set up zone configuration .fi .SH DESCRIPTION -.LP The \fBzonecfg\fR utility creates and modifies the configuration of a zone. Zone configuration consists of a number of resources and properties. .sp @@ -73,7 +72,6 @@ the \fBzonecfg\fR properties and resources. See the brand-specific man page for more details on each brand. For an overview of brands, see the \fBbrands\fR(5) man page. .SS "Resources" -.LP The following resource types are supported: .sp .ne 2 @@ -186,7 +184,6 @@ Delegation of administration to specific users. .RE .SS "Properties" -.LP Each resource type has one or more properties. There are also some global properties, that is, properties of the configuration as a whole, rather than of some particular resource. @@ -310,6 +307,16 @@ The following properties are supported: .ad .sp .6 .RS 4n +\fBmax-processes\fR +.RE + +.sp +.ne 2 +.na +\fB(global)\fR +.ad +.sp .6 +.RS 4n \fBmax-sem-ids\fR .RE @@ -792,6 +799,8 @@ is the preferred way to set the \fBzone.cpu-shares\fR rctl. .RS 4n The maximum number of LWPs simultaneously available to this zone. This property is the preferred way to set the \fBzone.max-lwps\fR rctl. +If \fBmax-processes\fR is not explicitly set then it will be set to the +same value as \fBmax-lwps\fR. .RE .sp @@ -808,6 +817,20 @@ the preferred way to set the \fBzone.max-msg-ids\fR rctl. .sp .ne 2 .na +\fBglobal: \fBmax-processes\fR\fR +.ad +.sp .6 +.RS 4n +The maximum number of processes simultaneously available to this zone. This +property is the preferred way to set the \fBzone.max-processes\fR rctl. +If \fBmax-lwps\fR is not explicitly set, then setting this property will +automatically set \fBmax-lwps\fR to 10 times the value of +\fBmax-processes\fR. +.RE + +.sp +.ne 2 +.na \fBglobal: \fBmax-sem-ids\fR\fR .ad .sp .6 @@ -1015,6 +1038,7 @@ resource property-name type (global) cpu-shares simple (global) max-lwps simple (global) max-msg-ids simple +(global) max-processes simple (global) max-sem-ids simple (global) max-shm-ids simple (global) max-shm-memory simple @@ -1071,7 +1095,6 @@ contain alphanumerics plus the hyphen (\fB-\fR), underscore (\fB_\fR), and dot use by the system. Finally, the "autoboot" global property must have a value of "true" or "false". .SS "Using Kernel Statistics to Monitor CPU Caps" -.LP Using the kernel statistics (\fBkstat\fR(3KSTAT)) module \fBcaps\fR, the system maintains information for all capped projects and zones. You can access this information by reading kernel statistics (\fBkstat\fR(3KSTAT)), specifying @@ -1223,7 +1246,6 @@ Name of the zone for which statistics are displayed. .LP See \fBEXAMPLES\fR for sample output from a \fBkstat\fR command. .SH OPTIONS -.LP The following options are supported: .sp .ne 2 @@ -1261,7 +1283,6 @@ Specify the uuid of a zone instead of the Zone name. .RE .SH SUBCOMMANDS -.LP You can use the \fBadd\fR and \fBselect\fR subcommands to select a specific resource, at which point the scope changes to that resource. The \fBend\fR and \fBcancel\fR subcommands are used to complete the resource specification, at @@ -1544,7 +1565,6 @@ option can be used to force the action. .RE .SH EXAMPLES -.LP \fBExample 1 \fRCreating the Environment for a New Zone .sp .LP @@ -1944,7 +1964,6 @@ for a specific project, the second for the same project within zone 1. .sp .SH EXIT STATUS -.LP The following exit values are returned: .sp .ne 2 @@ -1977,7 +1996,6 @@ Invalid usage. .RE .SH ATTRIBUTES -.LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp @@ -1992,7 +2010,6 @@ Interface Stability Volatile .TE .SH SEE ALSO -.LP \fBppriv\fR(1), \fBprctl\fR(1), \fBzlogin\fR(1), \fBkstat\fR(1M), \fBmount\fR(1M), \fBpooladm\fR(1M), \fBpoolcfg\fR(1M), \fBpoold\fR(1M), \fBrcapd\fR(1M), \fBrctladm\fR(1M), \fBsvcadm\fR(1M), \fBipadm\fR(1M), @@ -2005,5 +2022,4 @@ Interface Stability Volatile \fISystem Administration Guide: Solaris Containers-Resource Management, and Solaris Zones\fR .SH NOTES -.LP All character data used by \fBzonecfg\fR must be in US-ASCII encoding. diff --git a/usr/src/man/man5/resource_controls.5 b/usr/src/man/man5/resource_controls.5 index 8dba24173f..4cf7f5ed26 100644 --- a/usr/src/man/man5/resource_controls.5 +++ b/usr/src/man/man5/resource_controls.5 @@ -1,17 +1,16 @@ '\" te .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2017, Joyent, Inc. +.\" Copyright 2021 OmniOS Community Edition (OmniOSce) Association. .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH RESOURCE_CONTROLS 5 "April 28, 2017" +.TH RESOURCE_CONTROLS 5 "Jan 23, 2021" .SH NAME resource_controls \- resource controls available through projects and zones .SH DESCRIPTION -.LP -For projects the resource controls facility is configured through the project -database. See \fBproject\fR(4). For zones, resource controls are configured -through \fBzonecfg\fR(1M). You can set and modify resource controls through the +The resource controls facility is configured through the project database. See +\fBproject\fR(4). You can set and modify resource controls through the following utilities: .RS +4 .TP @@ -58,7 +57,7 @@ The following are the resource controls are available: .sp .ne 2 .na -\fB\fBprocess.max-address-space\fR\fR +\fBprocess.max-address-space\fR .ad .sp .6 .RS 4n @@ -69,7 +68,7 @@ available to this process, expressed as a number of bytes. .sp .ne 2 .na -\fB\fBprocess.max-core-size\fR\fR +\fBprocess.max-core-size\fR .ad .sp .6 .RS 4n @@ -80,7 +79,7 @@ bytes. .sp .ne 2 .na -\fB\fBprocess.max-cpu-time\fR\fR +\fBprocess.max-cpu-time\fR .ad .sp .6 .RS 4n @@ -91,7 +90,7 @@ seconds. .sp .ne 2 .na -\fB\fBprocess.max-data-size\fR\fR +\fBprocess.max-data-size\fR .ad .sp .6 .RS 4n @@ -101,7 +100,7 @@ Maximum heap memory available to this process, expressed as a number of bytes. .sp .ne 2 .na -\fB\fBprocess.max-file-descriptor\fR\fR +\fBprocess.max-file-descriptor\fR .ad .sp .6 .RS 4n @@ -112,7 +111,7 @@ integer. .sp .ne 2 .na -\fB\fBprocess.max-file-size\fR\fR +\fBprocess.max-file-size\fR .ad .sp .6 .RS 4n @@ -123,14 +122,14 @@ number of bytes. .sp .ne 2 .na -\fB\fBprocess.max-locked-memory\fR\fR +\fBprocess.max-locked-memory\fR .ad .sp .6 .RS 4n Total amount of physical memory that can be locked by this process, expressed as a number of bytes. This limit is not enforced for a process with the -\fB\fBPRIV_PROC_LOCK_MEMORY\fR\fR privilege. Because the ability to lock memory -is controlled by the \fB\fBPRIV_PROC_LOCK_MEMORY\fR\fR privilege within native +\fBPRIV_PROC_LOCK_MEMORY\fR privilege. Because the ability to lock memory +is controlled by the \fBPRIV_PROC_LOCK_MEMORY\fR privilege within native zones, this resource control is only useful within branded zones which might support a different policy for locking memory. .RE @@ -138,7 +137,7 @@ support a different policy for locking memory. .sp .ne 2 .na -\fB\fBprocess.max-msg-messages\fR\fR +\fBprocess.max-msg-messages\fR .ad .sp .6 .RS 4n @@ -149,7 +148,7 @@ control at \fBmsgget()\fR time), expressed as an integer. .sp .ne 2 .na -\fB\fBprocess.max-msg-qbytes\fR\fR +\fBprocess.max-msg-qbytes\fR .ad .sp .6 .RS 4n @@ -160,7 +159,7 @@ resource control at \fBmsgget()\fR time), expressed as a number of bytes. .sp .ne 2 .na -\fB\fBprocess.max-port-events\fR\fR +\fBprocess.max-port-events\fR .ad .sp .6 .RS 4n @@ -170,7 +169,7 @@ Maximum allowable number of events per event port, expressed as an integer. .sp .ne 2 .na -\fB\fBprocess.max-sem-nsems\fR\fR +\fBprocess.max-sem-nsems\fR .ad .sp .6 .RS 4n @@ -181,7 +180,7 @@ integer. .sp .ne 2 .na -\fB\fBprocess.max-sem-ops\fR\fR +\fBprocess.max-sem-ops\fR .ad .sp .6 .RS 4n @@ -193,7 +192,7 @@ integer, specifying the number of operations. .sp .ne 2 .na -\fB\fBprocess.max-sigqueue-size\fR\fR +\fBprocess.max-sigqueue-size\fR .ad .sp .6 .RS 4n @@ -203,7 +202,7 @@ Maximum number of outstanding queued signals. .sp .ne 2 .na -\fB\fBprocess.max-stack-size\fR\fR +\fBprocess.max-stack-size\fR .ad .sp .6 .RS 4n @@ -214,7 +213,7 @@ of bytes. .sp .ne 2 .na -\fB\fBproject.cpu-cap\fR\fR +\fBproject.cpu-cap\fR .ad .sp .6 .RS 4n @@ -228,7 +227,7 @@ action. .sp .ne 2 .na -\fB\fBproject.cpu-shares\fR\fR +\fBproject.cpu-shares\fR .ad .sp .6 .RS 4n @@ -240,7 +239,7 @@ resource control does not support the \fBsyslog\fR action. .sp .ne 2 .na -\fB\fBproject.max-contracts\fR\fR +\fBproject.max-contracts\fR .ad .sp .6 .RS 4n @@ -250,7 +249,7 @@ Maximum number of contracts allowed in a project, expressed as an integer. .sp .ne 2 .na -\fB\fBproject.max-crypto-memory\fR\fR +\fBproject.max-crypto-memory\fR .ad .sp .6 .RS 4n @@ -262,7 +261,7 @@ charged against this resource control. .sp .ne 2 .na -\fB\fBproject.max-locked-memory\fR\fR +\fBproject.max-locked-memory\fR .ad .sp .6 .RS 4n @@ -273,7 +272,7 @@ Total amount of physical memory locked by device drivers and user processes .sp .ne 2 .na -\fB\fBproject.max-lwps\fR\fR +\fBproject.max-lwps\fR .ad .sp .6 .RS 4n @@ -284,7 +283,7 @@ integer. .sp .ne 2 .na -\fB\fBproject.max-msg-ids\fR\fR +\fBproject.max-msg-ids\fR .ad .sp .6 .RS 4n @@ -295,30 +294,28 @@ integer. .sp .ne 2 .na -\fB\fBproject.max-port-ids\fR\fR +\fBproject.max-processes\fR .ad .sp .6 .RS 4n -Maximum allowable number of event ports, expressed as an integer. +Maximum number of processes simultaneously available to a project, expressed as +an integer. .RE .sp .ne 2 .na -\fB\fBproject.max-processes\fR\fR +\fBproject.max-port-ids\fR .ad .sp .6 .RS 4n -Maximum number of processes that can be active in a project. This rctl is -similar to \fBproject.max-lwps\fR, except that zombie processes are included. -This rctl prevents process-slot exhaustion which can occur due to an excessive -number of zombies. Expressed as an integer. +Maximum allowable number of event ports, expressed as an integer. .RE .sp .ne 2 .na -\fB\fBproject.max-sem-ids\fR\fR +\fBproject.max-sem-ids\fR .ad .sp .6 .RS 4n @@ -328,7 +325,7 @@ Maximum number of semaphore IDs allowed for a project, expressed as an integer. .sp .ne 2 .na -\fB\fBproject.max-shm-ids\fR\fR +\fBproject.max-shm-ids\fR .ad .sp .6 .RS 4n @@ -339,7 +336,7 @@ integer. .sp .ne 2 .na -\fB\fBproject.max-shm-memory\fR\fR +\fBproject.max-shm-memory\fR .ad .sp .6 .RS 4n @@ -350,7 +347,7 @@ bytes. .sp .ne 2 .na -\fB\fBproject.max-tasks\fR\fR +\fBproject.max-tasks\fR .ad .sp .6 .RS 4n @@ -360,7 +357,7 @@ Maximum number of tasks allowable in a project, expressed as an integer. .sp .ne 2 .na -\fB\fBproject.pool\fR\fR +\fBproject.pool\fR .ad .sp .6 .RS 4n @@ -370,7 +367,7 @@ Binds a specified resource pool with a project. .sp .ne 2 .na -\fB\fBrcap.max-rss\fR\fR +\fBrcap.max-rss\fR .ad .sp .6 .RS 4n @@ -381,7 +378,7 @@ in a project. .sp .ne 2 .na -\fB\fBtask.max-cpu-time\fR\fR +\fBtask.max-cpu-time\fR .ad .sp .6 .RS 4n @@ -392,7 +389,7 @@ number of seconds. .sp .ne 2 .na -\fB\fBtask.max-lwps\fR\fR +\fBtask.max-lwps\fR .ad .sp .6 .RS 4n @@ -401,12 +398,23 @@ expressed as an integer. .RE .sp +.ne 2 +.na +\fBtask.max-processes\fR +.ad +.sp .6 +.RS 4n +Maximum number of processes simultaneously available to this task, +expressed as an integer. +.RE + +.sp .LP The following zone-wide resource controls are available: .sp .ne 2 .na -\fB\fBzone.cpu-baseline\fR\fR +\fBzone.cpu-baseline\fR .ad .sp .6 .RS 4n @@ -420,7 +428,7 @@ This resource control does not support the \fBsyslog\fR action. .sp .ne 2 .na -\fB\fBzone.cpu-burst-time\fR\fR +\fBzone.cpu-burst-time\fR .ad .sp .6 .RS 4n @@ -433,7 +441,7 @@ This resource control does not support the \fBsyslog\fR action. .sp .ne 2 .na -\fB\fBzone.cpu-cap\fR\fR +\fBzone.cpu-cap\fR .ad .sp .6 .RS 4n @@ -447,7 +455,7 @@ not support the \fBsyslog\fR action. .sp .ne 2 .na -\fB\fBzone.cpu-shares\fR\fR +\fBzone.cpu-shares\fR .ad .sp .6 .RS 4n @@ -461,7 +469,7 @@ Expressed as an integer. This resource control does not support the .sp .ne 2 .na -\fB\fBzone.max-locked-memory\fR\fR +\fBzone.max-locked-memory\fR .ad .sp .6 .RS 4n @@ -471,7 +479,7 @@ Total amount of physical locked memory available to a zone. .sp .ne 2 .na -\fB\fBzone.max-lofi\fR\fR +\fBzone.max-lofi\fR .ad .sp .6 .RS 4n @@ -480,22 +488,20 @@ zone. Expressed as an integer. This resource control does not support the \fBsyslog\fR action. .RE -.sp -.ne 2 -.na -\fB\fBzone.max-lwps\fR\fR +\fBzone.max-lwps\fR .ad .sp .6 .RS 4n -Sets a limit on how many LWPs can be active in a zone. A zone's total LWPs -can be further subdivided among projects within the zone within the zone by -using \fBproject.max-lwps\fR entries. Expressed as an integer. +Enhances resource isolation by preventing too many LWPs in one zone from +affecting other zones. A zone's total LWPs can be further subdivided among +projects within the zone by using \fBproject.max-lwps\fR entries. Expressed as +an integer. .RE .sp .ne 2 .na -\fB\fBzone.max-msg-ids\fR\fR +\fBzone.max-msg-ids\fR .ad .sp .6 .RS 4n @@ -506,7 +512,7 @@ integer. .sp .ne 2 .na -\fB\fBzone.max-physical-memory\fR\fR +\fBzone.max-physical-memory\fR .ad .sp .6 .RS 4n @@ -514,26 +520,25 @@ Sets a limit on the amount of physical memory (RSS) that can be used by a zone before resident pages start being forcibly paged out. The unit used is bytes. Expressed as an integer. This resource control does not support the \fBsyslog\fR action. +\fBzone.max-processes\fR +.ad +.sp .6 +.RS 4n +Enhances resource isolation by preventing too many processes in one zone from +affecting other zones. A zone's total processes can be further subdivided among +projects within the zone by using \fBproject.max-processes\fR entries. +Expressed as an integer. .RE .sp .ne 2 .na -\fB\fBzone.max-processes\fR\fR -.ad -.sp .6 -.RS 4n -Maximum number of processes that can be active in a zone. This rctl is -similar to \fBzone.max-lwps\fR, except that zombie processes are included. -This rctl prevents process-slot exhaustion which can occur due to an excessive -number of zombies. This rctl can be further subdivided among projects within -the zone using \fBproject.max-processes\fR. Expressed as an integer. .RE .sp .ne 2 .na -\fB\fBzone.max-sem-ids\fR\fR +\fBzone.max-sem-ids\fR .ad .sp .6 .RS 4n @@ -543,7 +548,7 @@ Maximum number of semaphore IDs allowed for a zone, expressed as an integer. .sp .ne 2 .na -\fB\fBzone.max-shm-ids\fR\fR +\fBzone.max-shm-ids\fR .ad .sp .6 .RS 4n @@ -554,7 +559,7 @@ integer. .sp .ne 2 .na -\fB\fBzone.max-shm-memory\fR\fR +\fBzone.max-shm-memory\fR .ad .sp .6 .RS 4n @@ -565,7 +570,7 @@ bytes. .sp .ne 2 .na -\fB\fBzone.max-swap\fR\fR +\fBzone.max-swap\fR .ad .sp .6 .RS 4n @@ -589,7 +594,6 @@ as an integer. This resource control does not support the \fBsyslog\fR action. .LP See \fBzones\fR(5). .SS "Units Used in Resource Controls" -.LP Resource controls can be expressed as in units of size (bytes), time (seconds), or as a count (integer). These units use the strings specified below. .sp @@ -674,7 +678,6 @@ Note that unit modifiers (for example, \fB5G\fR) are accepted by the \fBprctl\fR(1), \fBprojadd\fR(1M), and \fBprojmod\fR(1M) commands. You cannot use unit modifiers in the project database itself. .SS "Resource Control Values and Privilege Levels" -.LP A threshold value on a resource control constitutes a point at which local actions can be triggered or global actions, such as logging, can occur. .sp @@ -684,7 +687,7 @@ level. The privilege level must be one of the following three types: .sp .ne 2 .na -\fB\fBbasic\fR\fR +\fBbasic\fR .ad .sp .6 .RS 4n @@ -694,7 +697,7 @@ Can be modified by the owner of the calling process. .sp .ne 2 .na -\fB\fBprivileged\fR\fR +\fBprivileged\fR .ad .sp .6 .RS 4n @@ -705,7 +708,7 @@ or by \fBprctl\fR(1) (requiring \fBproc_owner\fR privilege). .sp .ne 2 .na -\fB\fBsystem\fR\fR +\fBsystem\fR .ad .sp .6 .RS 4n @@ -742,7 +745,6 @@ task.max-lwps=(priv,1K,deny) .in -2 .SS "Global and Local Actions on Resource Control Values" -.LP There are two categories of actions on resource control values: global and local. .sp @@ -828,7 +830,7 @@ follows: .sp .ne 2 .na -\fB\fBnone\fR\fR +\fBnone\fR .ad .sp .6 .RS 4n @@ -842,7 +844,7 @@ the process exceeding the threshold is not affected. .sp .ne 2 .na -\fB\fBdeny\fR\fR +\fBdeny\fR .ad .sp .6 .RS 4n @@ -855,7 +857,7 @@ control value. See the \fBfork\fR(2). .sp .ne 2 .na -\fB\fBsignal=\fR\fR +\fBsignal=\fR .ad .sp .6 .RS 4n @@ -884,7 +886,7 @@ The following are the signals available to resource control values: .sp .ne 2 .na -\fB\fBSIGABRT\fR\fR +\fBSIGABRT\fR .ad .sp .6 .RS 4n @@ -894,7 +896,7 @@ Terminate the process. .sp .ne 2 .na -\fB\fBSIGHUP\fR\fR +\fBSIGHUP\fR .ad .sp .6 .RS 4n @@ -905,7 +907,7 @@ the process group that controls the terminal. .sp .ne 2 .na -\fB\fBSIGTERM\fR\fR +\fBSIGTERM\fR .ad .sp .6 .RS 4n @@ -915,7 +917,7 @@ Terminate the process. Termination signal sent by software. .sp .ne 2 .na -\fB\fBSIGKILL\fR\fR +\fBSIGKILL\fR .ad .sp .6 .RS 4n @@ -925,7 +927,7 @@ Terminate the process and kill the program. .sp .ne 2 .na -\fB\fBSIGSTOP\fR\fR +\fBSIGSTOP\fR .ad .sp .6 .RS 4n @@ -935,7 +937,7 @@ Stop the process. Job control signal. .sp .ne 2 .na -\fB\fBSIGXRES\fR\fR +\fBSIGXRES\fR .ad .sp .6 .RS 4n @@ -945,7 +947,7 @@ Resource control limit exceeded. Generated by resource control facility. .sp .ne 2 .na -\fB\fBSIGXFSZ\fR\fR +\fBSIGXFSZ\fR .ad .sp .6 .RS 4n @@ -957,7 +959,7 @@ controls with the \fBRCTL_GLOBAL_FILE_SIZE\fR property .sp .ne 2 .na -\fB\fBSIGXCPU\fR\fR +\fBSIGXCPU\fR .ad .sp .6 .RS 4n @@ -967,7 +969,6 @@ controls with the \fBRCTL_GLOBAL_CPUTIME\fR property .RE .SS "Resource Control Flags and Properties" -.LP Each resource control on the system has a certain set of associated properties. This set of properties is defined as a set of flags, which are associated with all controlled instances of that resource. Global flags cannot be modified, but @@ -1007,7 +1008,7 @@ The global flags indicate the following: .sp .ne 2 .na -\fB\fBlowerable\fR\fR +\fBlowerable\fR .ad .sp .6 .RS 4n @@ -1018,7 +1019,7 @@ control. .sp .ne 2 .na -\fB\fBno-deny\fR\fR +\fBno-deny\fR .ad .sp .6 .RS 4n @@ -1029,7 +1030,7 @@ denied. .sp .ne 2 .na -\fB\fBcpu-time\fR\fR +\fBcpu-time\fR .ad .sp .6 .RS 4n @@ -1040,7 +1041,7 @@ are reached. .sp .ne 2 .na -\fB\fBseconds\fR\fR +\fBseconds\fR .ad .sp .6 .RS 4n @@ -1071,7 +1072,6 @@ resource control. An \fBinf\fR value has an infinite quantity. The value is never enforced. Hence, as configured, both threshold quantities represent infinite values that are never exceeded. .SS "Resource Control Enforcement" -.LP More than one resource control can exist on a resource. A resource control can exist at each containment level in the process model. If resource controls are active on the same resource at different container levels, the smallest @@ -1079,7 +1079,6 @@ container's control is enforced first. Thus, action is taken on \fBprocess.max-cpu-time\fR before \fBtask.max-cpu-time\fR if both controls are encountered simultaneously. .SH ATTRIBUTES -.LP See \fBattributes\fR(5) for a description of the following attributes: .sp @@ -1094,7 +1093,6 @@ Interface Stability Evolving .TE .SH SEE ALSO -.LP \fBprctl\fR(1), \fBpooladm\fR(1M), \fBpoolcfg\fR(1M), \fBprojadd\fR(1M), \fBprojmod\fR(1M), \fBrctladm\fR(1M), \fBsetrctl\fR(2), \fBrctlblk_set_value\fR(3C), \fBlibpool\fR(3LIB), \fBproject\fR(4), diff --git a/usr/src/pkg/manifests/system-data-zoneinfo.mf b/usr/src/pkg/manifests/system-data-zoneinfo.mf index 94e0512a28..5ba745d762 100644 --- a/usr/src/pkg/manifests/system-data-zoneinfo.mf +++ b/usr/src/pkg/manifests/system-data-zoneinfo.mf @@ -17,7 +17,7 @@ # set name=pkg.fmri \ - value=pkg:/system/data/zoneinfo@2020.1,$(PKGVERS_BUILTON)-$(PKGVERS_BRANCH) + value=pkg:/system/data/zoneinfo@2021.1,$(PKGVERS_BUILTON)-$(PKGVERS_BRANCH) set name=pkg.description value="timezone information" set name=pkg.summary value="Timezone Information" set name=info.classification value=org.opensolaris.category.2008:System/Core @@ -312,7 +312,6 @@ file path=usr/share/lib/zoneinfo/Atlantic/Stanley file path=usr/share/lib/zoneinfo/Australia/Adelaide file path=usr/share/lib/zoneinfo/Australia/Brisbane file path=usr/share/lib/zoneinfo/Australia/Broken_Hill -file path=usr/share/lib/zoneinfo/Australia/Currie file path=usr/share/lib/zoneinfo/Australia/Darwin file path=usr/share/lib/zoneinfo/Australia/Eucla file path=usr/share/lib/zoneinfo/Australia/Hobart @@ -589,6 +588,8 @@ hardlink path=usr/share/lib/zoneinfo/Atlantic/St_Helena \ hardlink path=usr/share/lib/zoneinfo/Australia/ACT target=../Australia/Sydney hardlink path=usr/share/lib/zoneinfo/Australia/Canberra \ target=../Australia/Sydney +hardlink path=usr/share/lib/zoneinfo/Australia/Currie \ + target=../Australia/Hobart hardlink path=usr/share/lib/zoneinfo/Australia/LHI \ target=../Australia/Lord_Howe hardlink path=usr/share/lib/zoneinfo/Australia/NSW target=../Australia/Sydney diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 9981263343..1539777403 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -52,6 +52,7 @@ #include <sys/abd.h> #include <sys/cityhash.h> #include <sys/dsl_crypt.h> +#include <sys/stdbool.h> /* * ========================================================================== @@ -1859,10 +1860,36 @@ zio_execute(zio_t *zio) return; } +#ifdef _KERNEL + /* + * The I/O pipeline is a part of the machinery responsible for + * evacuation of memory pages to disk when we are under + * sufficient memory pressure for pageout to run. By setting + * this flag, allocations may dip into pages in the pageout + * reserved pool in order to try to make forward progress. + */ + bool set_pushpage = false; + if (!(curthread->t_flag & T_PUSHPAGE)) { + /* + * We can be called recursively, so we need to remember + * if this frame was the one that first set the flag or + * not. + */ + set_pushpage = true; + curthread->t_flag |= T_PUSHPAGE; + } +#endif + zio->io_stage = stage; zio->io_pipeline_trace |= zio->io_stage; rv = zio_pipeline[highbit64(stage) - 1](zio); +#ifdef _KERNEL + if (set_pushpage) { + curthread->t_flag &= ~T_PUSHPAGE; + } +#endif + if (rv == ZIO_PIPELINE_STOP) return; diff --git a/usr/src/uts/common/io/usb/usba/usbai.c b/usr/src/uts/common/io/usb/usba/usbai.c index e1a6b4dfcd..9ba8e53cd0 100644 --- a/usr/src/uts/common/io/usb/usba/usbai.c +++ b/usr/src/uts/common/io/usb/usba/usbai.c @@ -78,6 +78,8 @@ static char *usba_debug_buf = NULL; /* The debug buf */ static char *usba_buf_sptr, *usba_buf_eptr; static hrtime_t usba_last_timestamp; /* last time stamp in trace */ +usb_dev_cap_t usb_cap; + /* USBA framework initializations */ void usba_usbai_initialization() @@ -690,7 +692,7 @@ static int usba_handle_device_remote_wakeup(dev_info_t *dip, int cmd) { int rval; - uint8_t bmRequest = USB_DEV_REQ_HOST_TO_DEV; + uint8_t bmRequest = USB_DEV_REQ_HOST_TO_DEV; uchar_t bRequest; uint16_t wIndex = 0; usb_cr_t completion_reason = 0; @@ -752,7 +754,7 @@ int usb_handle_remote_wakeup(dev_info_t *dip, int cmd) { usb_cfg_descr_t cfg_descr; - uchar_t *usb_cfg; /* buf for config descriptor */ + uchar_t *usb_cfg; /* buf for config descriptor */ size_t cfg_length; int rval; @@ -789,15 +791,15 @@ usb_handle_remote_wakeup(dev_info_t *dip, int cmd) int usb_create_pm_components(dev_info_t *dip, uint_t *pwr_states) { - uchar_t *usb_cfg; /* buf for config descriptor */ + uchar_t *usb_cfg; /* buf for config descriptor */ usb_cfg_descr_t cfg_descr; size_t cfg_length; usba_cfg_pwr_descr_t confpwr_descr; usba_if_pwr_descr_t ifpwr_descr; - uint8_t cfg_attrib; + uint8_t cfg_attrib; int i, lvl, rval; int n_prop = 0; - uint8_t *ptr; + uint8_t *ptr; char *drvname; char str[USBA_POWER_STR_SIZE]; char *pm_comp[USBA_N_PMCOMP]; diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c index 6961a2ff4f..c57f8a7d2c 100644 --- a/usr/src/uts/common/os/main.c +++ b/usr/src/uts/common/os/main.c @@ -565,7 +565,7 @@ main(void) /* * Set the scan rate and other parameters of the paging subsystem. */ - setupclock(0); + setupclock(); /* * Initialize process 0's lwp directory and lwpid hash table. diff --git a/usr/src/uts/common/os/mem_config.c b/usr/src/uts/common/os/mem_config.c index 285b76347b..fd74dd3092 100644 --- a/usr/src/uts/common/os/mem_config.c +++ b/usr/src/uts/common/os/mem_config.c @@ -509,7 +509,7 @@ mapalloc: * Recalculate the paging parameters now total_pages has changed. * This will also cause the clock hands to be reset before next use. */ - setupclock(1); + setupclock(); memsegs_unlock(1); @@ -2700,7 +2700,7 @@ kphysm_del_cleanup(struct mem_handle *mhp) * Recalculate the paging parameters now total_pages has changed. * This will also cause the clock hands to be reset before next use. */ - setupclock(1); + setupclock(); memsegs_unlock(1); diff --git a/usr/src/uts/common/os/vm_pageout.c b/usr/src/uts/common/os/vm_pageout.c index 2a93cd9061..45e90c34c3 100644 --- a/usr/src/uts/common/os/vm_pageout.c +++ b/usr/src/uts/common/os/vm_pageout.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ /* @@ -74,84 +74,210 @@ #include <vm/pvn.h> #include <vm/seg_kmem.h> -static int checkpage(page_t *, int); +/* + * FREE MEMORY MANAGEMENT + * + * Management of the pool of free pages is a tricky business. There are + * several critical threshold values which constrain our allocation of new + * pages and inform the rate of paging out of memory to swap. These threshold + * values, and the behaviour they induce, are described below in descending + * order of size -- and thus increasing order of severity! + * + * +---------------------------------------------------- physmem (all memory) + * | + * | Ordinarily there are no particular constraints placed on page + * v allocation. The page scanner is not running and page_create_va() + * | will effectively grant all page requests (whether from the kernel + * | or from user processes) without artificial delay. + * | + * +------------------------ lotsfree (1.56% of physmem, min. 16MB, max. 2GB) + * | + * | When we have less than "lotsfree" pages, pageout_scanner() is + * v signalled by schedpaging() to begin looking for pages that can + * | be evicted to disk to bring us back above lotsfree. At this + * | stage there is still no constraint on allocation of free pages. + * | + * | For small systems, we set a lower bound of 16MB for lotsfree; + * v this is the natural value for a system with 1GB memory. This is + * | to ensure that the pageout reserve pool contains at least 4MB + * | for use by ZFS. + * | + * | For systems with a large amount of memory, we constrain lotsfree + * | to be at most 2GB (with a pageout reserve of around 0.5GB), as + * v at some point the required slack relates more closely to the + * | rate at which paging can occur than to the total amount of memory. + * | + * +------------------- desfree (1/2 of lotsfree, 0.78% of physmem, min. 8MB) + * | + * | When we drop below desfree, a number of kernel facilities will + * v wait before allocating more memory, under the assumption that + * | pageout or reaping will make progress and free up some memory. + * | This behaviour is not especially coordinated; look for comparisons + * | of desfree and freemem. + * | + * | In addition to various attempts at advisory caution, clock() + * | will wake up the thread that is ordinarily parked in sched(). + * | This routine is responsible for the heavy-handed swapping out + * v of entire processes in an attempt to arrest the slide of free + * | memory. See comments in sched.c for more details. + * | + * +----- minfree & throttlefree (3/4 of desfree, 0.59% of physmem, min. 6MB) + * | + * | These two separate tunables have, by default, the same value. + * v Various parts of the kernel use minfree to signal the need for + * | more aggressive reclamation of memory, and sched() is more + * | aggressive at swapping processes out. + * | + * | If free memory falls below throttlefree, page_create_va() will + * | use page_create_throttle() to begin holding most requests for + * | new pages while pageout and reaping free up memory. Sleeping + * v allocations (e.g., KM_SLEEP) are held here while we wait for + * | more memory. Non-sleeping allocations are generally allowed to + * | proceed, unless their priority is explicitly lowered with + * | KM_NORMALPRI. + * | + * +------- pageout_reserve (3/4 of throttlefree, 0.44% of physmem, min. 4MB) + * | + * | When we hit throttlefree, the situation is already dire. The + * v system is generally paging out memory and swapping out entire + * | processes in order to free up memory for continued operation. + * | + * | Unfortunately, evicting memory to disk generally requires short + * | term use of additional memory; e.g., allocation of buffers for + * | storage drivers, updating maps of free and used blocks, etc. + * | As such, pageout_reserve is the number of pages that we keep in + * | special reserve for use by pageout() and sched() and by any + * v other parts of the kernel that need to be working for those to + * | make forward progress such as the ZFS I/O pipeline. + * | + * | When we are below pageout_reserve, we fail or hold any allocation + * | that has not explicitly requested access to the reserve pool. + * | Access to the reserve is generally granted via the KM_PUSHPAGE + * | flag, or by marking a thread T_PUSHPAGE such that all allocations + * | can implicitly tap the reserve. For more details, see the + * v NOMEMWAIT() macro, the T_PUSHPAGE thread flag, the KM_PUSHPAGE + * | and VM_PUSHPAGE allocation flags, and page_create_throttle(). + * | + * +---------------------------------------------------------- no free memory + * | + * | If we have arrived here, things are very bad indeed. It is + * v surprisingly difficult to tell if this condition is even fatal, + * | as enough memory may have been granted to pageout() and to the + * | ZFS I/O pipeline that requests for eviction that have already been + * | made will complete and free up memory some time soon. + * | + * | If free memory does not materialise, the system generally remains + * | deadlocked. The pageout_deadman() below is run once per second + * | from clock(), seeking to limit the amount of time a single request + * v to page out can be blocked before the system panics to get a crash + * | dump and return to service. + * | + * +------------------------------------------------------------------------- + */ /* * The following parameters control operation of the page replacement - * algorithm. They are initialized to 0, and then computed at boot time - * based on the size of the system. If they are patched non-zero in - * a loaded vmunix they are left alone and may thus be changed per system - * using mdb on the loaded system. + * algorithm. They are initialized to 0, and then computed at boot time based + * on the size of the system; see setupclock(). If they are patched non-zero + * in a loaded vmunix they are left alone and may thus be changed per system + * using "mdb -kw" on the loaded system. */ pgcnt_t slowscan = 0; pgcnt_t fastscan = 0; static pgcnt_t handspreadpages = 0; -static int loopfraction = 2; + +/* + * looppages: + * Cached copy of the total number of pages in the system (total_pages). + * + * loopfraction: + * Divisor used to relate fastscan to looppages in setupclock(). + */ +static uint_t loopfraction = 2; static pgcnt_t looppages; -/* See comment below describing 4% and 80% */ -static int min_percent_cpu = 4; -static int max_percent_cpu = 80; + +static uint_t min_percent_cpu = 4; +static uint_t max_percent_cpu = 80; static pgcnt_t maxfastscan = 0; static pgcnt_t maxslowscan = 100; -pgcnt_t maxpgio = 0; -pgcnt_t minfree = 0; -pgcnt_t desfree = 0; -pgcnt_t lotsfree = 0; -pgcnt_t needfree = 0; -pgcnt_t throttlefree = 0; -pgcnt_t pageout_reserve = 0; +#define MEGABYTES (1024ULL * 1024ULL) -pgcnt_t deficit; -pgcnt_t nscan; -pgcnt_t desscan; +/* + * pageout_threshold_style: + * set to 1 to use the previous default threshold size calculation; + * i.e., each threshold is half of the next largest value. + */ +uint_t pageout_threshold_style = 0; + +/* + * The operator may override these tunables to request a different minimum or + * maximum lotsfree value, or to change the divisor we use for automatic + * sizing. + * + * By default, we make lotsfree 1/64th of the total memory in the machine. The + * minimum and maximum are specified in bytes, rather than pages; a zero value + * means the default values (below) are used. + */ +uint_t lotsfree_fraction = 64; +pgcnt_t lotsfree_min = 0; +pgcnt_t lotsfree_max = 0; + +#define LOTSFREE_MIN_DEFAULT (16 * MEGABYTES) +#define LOTSFREE_MAX_DEFAULT (2048 * MEGABYTES) + +/* + * If these tunables are set to non-zero values in /etc/system, and provided + * the value is not larger than the threshold above, the specified value will + * be used directly without any additional calculation or adjustment. The boot + * time value of these overrides is preserved in the "clockinit" struct. More + * detail is available in the comment at the top of the file. + */ +pgcnt_t maxpgio = 0; +pgcnt_t minfree = 0; +pgcnt_t desfree = 0; +pgcnt_t lotsfree = 0; +pgcnt_t needfree = 0; +pgcnt_t throttlefree = 0; +pgcnt_t pageout_reserve = 0; + +pgcnt_t deficit; +pgcnt_t nscan; +pgcnt_t desscan; /* kstats */ uint64_t low_mem_scan; uint64_t zone_cap_scan; uint64_t n_throttle; -clock_t zone_pageout_ticks; /* tunable to change zone pagescan ticks */ - /* - * Values for min_pageout_ticks, max_pageout_ticks and pageout_ticks - * are the number of ticks in each wakeup cycle that gives the - * equivalent of some underlying %CPU duty cycle. - * - * For example, when RATETOSCHEDPAGING is 4 (the default), then schedpaging() - * will run 4 times/sec to update pageout scanning parameters and kickoff - * the pageout_scanner() thread if necessary. - * - * Given hz is 100, min_pageout_ticks will be set to 1 (1% of a CPU). When - * pageout_ticks is set to min_pageout_ticks, then the total CPU time consumed - * by the scanner in a 1 second interval is 4% of a CPU (RATETOSCHEDPAGING * 1). - * - * Given hz is 100, max_pageout_ticks will be set to 20 (20% of a CPU). When - * pageout_ticks is set to max_pageout_ticks, then the total CPU time consumed - * by the scanner in a 1 second interval is 80% of a CPU - * (RATETOSCHEDPAGING * 20). There is no point making max_pageout_ticks >25 - * since schedpaging() runs RATETOSCHEDPAGING (4) times/sec. + * Values for min_pageout_nsec, max_pageout_nsec, pageout_nsec and + * zone_pageout_nsec are the number of nanoseconds in each wakeup cycle + * that gives the equivalent of some underlying %CPU duty cycle. * - * If hz is 1000, then min_pageout_ticks will be 10 and max_pageout_ticks - * will be 200, so the CPU percentages are the same as when hz is 100. + * min_pageout_nsec: + * nanoseconds/wakeup equivalent of min_percent_cpu. * - * min_pageout_ticks: - * ticks/wakeup equivalent of min_percent_cpu. + * max_pageout_nsec: + * nanoseconds/wakeup equivalent of max_percent_cpu. * - * max_pageout_ticks: - * ticks/wakeup equivalent of max_percent_cpu. - * - * pageout_ticks: - * Number of clock ticks budgeted for each wakeup cycle. + * pageout_nsec: + * Number of nanoseconds budgeted for each wakeup cycle. * Computed each time around by schedpaging(). - * Varies between min_pageout_ticks .. max_pageout_ticks, + * Varies between min_pageout_nsec and max_pageout_nsec, * depending on memory pressure or zones over their cap. + * + * zone_pageout_nsec: + * Number of nanoseconds budget for each cycle when a zone + * is over its memory cap. If this is zero, then the value + * of max_pageout_nsec is used instead. */ -static clock_t min_pageout_ticks; -static clock_t max_pageout_ticks; -static clock_t pageout_ticks; +static hrtime_t min_pageout_nsec; +static hrtime_t max_pageout_nsec; +static hrtime_t pageout_nsec; +static hrtime_t zone_pageout_nsec; #define MAX_PSCAN_THREADS 16 static boolean_t reset_hands[MAX_PSCAN_THREADS]; @@ -169,38 +295,38 @@ uint_t pageout_reset_cnt = 64; /* num. cycles for pageout_scanner hand reset */ uint_t n_page_scanners; static pgcnt_t pscan_region_sz; /* informational only */ - #define PAGES_POLL_MASK 1023 /* * pageout_sample_lim: - * The limit on the number of samples needed to establish a value - * for new pageout parameters, fastscan, slowscan, and handspreadpages. + * The limit on the number of samples needed to establish a value for new + * pageout parameters: fastscan, slowscan, pageout_new_spread, and + * handspreadpages. * * pageout_sample_cnt: - * Current sample number. Once the sample gets large enough, - * set new values for handspreadpages, fastscan and slowscan. + * Current sample number. Once the sample gets large enough, set new + * values for handspreadpages, pageout_new_spread, fastscan and slowscan. * * pageout_sample_pages: * The accumulated number of pages scanned during sampling. * * pageout_sample_etime: - * The accumulated number of nanoseconds for the sample. + * The accumulated nanoseconds for the sample. * * pageout_rate: - * Rate in pages/second, computed at the end of sampling. + * Rate in pages/nanosecond, computed at the end of sampling. * * pageout_new_spread: - * The new value to use for maxfastscan and (perhaps) handspreadpages. - * Intended to be the number pages that can be scanned per sec using ~10% - * of a CPU. Calculated after enough samples have been taken. - * pageout_rate / 10 + * Initially zero while the system scan rate is measured by + * pageout_scanner(), which then sets this value once per system boot after + * enough samples have been recorded (pageout_sample_cnt). Once set, this + * new value is used for fastscan and handspreadpages. */ typedef hrtime_t hrrate_t; -static uint_t pageout_sample_lim = 4; -static uint_t pageout_sample_cnt = 0; +static uint64_t pageout_sample_lim = 4; +static uint64_t pageout_sample_cnt = 0; static pgcnt_t pageout_sample_pages = 0; static hrrate_t pageout_rate = 0; static pgcnt_t pageout_new_spread = 0; @@ -211,12 +337,12 @@ static hrtime_t pageout_sample_etime = 0; #define PAGE_SCAN_STARTUP (pageout_sample_cnt < pageout_sample_lim) /* - * Record number of times a pageout_scanner wakeup cycle finished because it + * Record number of times a pageout_scanner() wakeup cycle finished because it * timed out (exceeded its CPU budget), rather than because it visited * its budgeted number of pages. This is only done when scanning under low * free memory conditions, not when scanning for zones over their cap. */ -uint64_t pageout_timeouts = 0; +uint64_t pageout_timeouts = 0; #ifdef VM_STATS static struct pageoutvmstats_str { @@ -231,10 +357,56 @@ static struct pageoutvmstats_str { kmutex_t memavail_lock; kcondvar_t memavail_cv; -/* - * The size of the clock loop. - */ -#define LOOPPAGES total_pages +typedef enum pageout_hand { + POH_FRONT = 1, + POH_BACK, +} pageout_hand_t; + +typedef enum { + CKP_INELIGIBLE, + CKP_NOT_FREED, + CKP_FREED, +} checkpage_result_t; + +static checkpage_result_t checkpage(page_t *, pageout_hand_t); + +static struct clockinit { + bool ci_init; + pgcnt_t ci_lotsfree_min; + pgcnt_t ci_lotsfree_max; + pgcnt_t ci_lotsfree; + pgcnt_t ci_desfree; + pgcnt_t ci_minfree; + pgcnt_t ci_throttlefree; + pgcnt_t ci_pageout_reserve; + pgcnt_t ci_maxpgio; + pgcnt_t ci_maxfastscan; + pgcnt_t ci_fastscan; + pgcnt_t ci_slowscan; + pgcnt_t ci_handspreadpages; +} clockinit = { .ci_init = false }; + +static pgcnt_t +clamp(pgcnt_t value, pgcnt_t minimum, pgcnt_t maximum) +{ + if (value < minimum) { + return (minimum); + } else if (value > maximum) { + return (maximum); + } else { + return (value); + } +} + +static pgcnt_t +tune(pgcnt_t initval, pgcnt_t initval_ceiling, pgcnt_t defval) +{ + if (initval == 0 || initval >= initval_ceiling) { + return (defval); + } else { + return (initval); + } +} /* * Local boolean to control scanning when zones are over their cap. Avoids @@ -248,108 +420,82 @@ kcondvar_t memavail_cv; static boolean_t zones_over = B_FALSE; /* - * Set up the paging constants for the page scanner clock-hand algorithm. - * Called at startup after the system is initialized and the amount of memory - * and number of paging devices is known (recalc will be 0). Called again once - * PAGE_SCAN_STARTUP is true after the scanner has collected enough samples - * (recalc will be 1). - * - * Will also be called after a memory dynamic reconfiguration operation and - * recalc will be 1 in those cases too. + * Set up the paging constants for the clock algorithm used by + * pageout_scanner(), and by the virtual memory system overall. See the + * comments at the top of this file for more information about the threshold + * values and system responses to memory pressure. * - * lotsfree is 1/64 of memory, but at least 512K (ha!). - * desfree is 1/2 of lotsfree. - * minfree is 1/2 of desfree. + * This routine is called once by main() at startup, after the initial size of + * physical memory is determined. It may be called again later if memory is + * added to or removed from the system, or if new measurements of the page scan + * rate become available. */ void -setupclock(int recalc) +setupclock(void) { uint_t i; pgcnt_t sz, tmp; + pgcnt_t defval; + bool half = (pageout_threshold_style == 1); + bool recalc = true; - static spgcnt_t init_lfree, init_dfree, init_mfree; - static spgcnt_t init_tfree, init_preserve, init_mpgio; - static spgcnt_t init_mfscan, init_fscan, init_sscan, init_hspages; - - looppages = LOOPPAGES; + looppages = total_pages; /* - * setupclock can be called to recalculate the paging - * parameters in the case of dynamic reconfiguration of memory. - * So to make sure we make the proper calculations, if such a - * situation should arise, we save away the initial values - * of each parameter so we can recall them when needed. This - * way we don't lose the settings an admin might have made - * through the /etc/system file. + * The operator may have provided specific values for some of the + * tunables via /etc/system. On our first call, we preserve those + * values so that they can be used for subsequent recalculations. + * + * A value of zero for any tunable means we will use the default + * sizing. */ - if (!recalc) { - init_lfree = lotsfree; - init_dfree = desfree; - init_mfree = minfree; - init_tfree = throttlefree; - init_preserve = pageout_reserve; - init_mpgio = maxpgio; - init_mfscan = maxfastscan; - init_fscan = fastscan; - init_sscan = slowscan; - init_hspages = handspreadpages; + if (!clockinit.ci_init) { + clockinit.ci_init = true; + + clockinit.ci_lotsfree_min = lotsfree_min; + clockinit.ci_lotsfree_max = lotsfree_max; + clockinit.ci_lotsfree = lotsfree; + clockinit.ci_desfree = desfree; + clockinit.ci_minfree = minfree; + clockinit.ci_throttlefree = throttlefree; + clockinit.ci_pageout_reserve = pageout_reserve; + clockinit.ci_maxpgio = maxpgio; + clockinit.ci_maxfastscan = maxfastscan; + clockinit.ci_fastscan = fastscan; + clockinit.ci_slowscan = slowscan; + clockinit.ci_handspreadpages = handspreadpages; + + /* + * The first call does not trigger a recalculation, only + * subsequent calls. + */ + recalc = false; } /* - * Set up thresholds for paging: + * Configure paging threshold values. For more details on what each + * threshold signifies, see the comments at the top of this file. */ + lotsfree_max = tune(clockinit.ci_lotsfree_max, looppages, + btop(LOTSFREE_MAX_DEFAULT)); + lotsfree_min = tune(clockinit.ci_lotsfree_min, lotsfree_max, + btop(LOTSFREE_MIN_DEFAULT)); - /* - * Lotsfree is threshold where paging daemon turns on. - */ - if (init_lfree == 0 || init_lfree >= looppages) - lotsfree = MAX(looppages / 64, btop(512 * 1024)); - else - lotsfree = init_lfree; + lotsfree = tune(clockinit.ci_lotsfree, looppages, + clamp(looppages / lotsfree_fraction, lotsfree_min, lotsfree_max)); - /* - * Desfree is amount of memory desired free. - * If less than this for extended period, start swapping. - */ - if (init_dfree == 0 || init_dfree >= lotsfree) - desfree = lotsfree / 2; - else - desfree = init_dfree; + desfree = tune(clockinit.ci_desfree, lotsfree, + lotsfree / 2); - /* - * Minfree is minimal amount of free memory which is tolerable. - */ - if (init_mfree == 0 || init_mfree >= desfree) - minfree = desfree / 2; - else - minfree = init_mfree; + minfree = tune(clockinit.ci_minfree, desfree, + half ? desfree / 2 : 3 * desfree / 4); - /* - * Throttlefree is the point at which we start throttling - * PG_WAIT requests until enough memory becomes available. - */ - if (init_tfree == 0 || init_tfree >= desfree) - throttlefree = minfree; - else - throttlefree = init_tfree; + throttlefree = tune(clockinit.ci_throttlefree, desfree, + minfree); - /* - * Pageout_reserve is the number of pages that we keep in - * stock for pageout's own use. Having a few such pages - * provides insurance against system deadlock due to - * pageout needing pages. When freemem < pageout_reserve, - * non-blocking allocations are denied to any threads - * other than pageout and sched. (At some point we might - * want to consider a per-thread flag like T_PUSHING_PAGES - * to indicate that a thread is part of the page-pushing - * dance (e.g. an interrupt thread) and thus is entitled - * to the same special dispensation we accord pageout.) - */ - if (init_preserve == 0 || init_preserve >= throttlefree) - pageout_reserve = throttlefree / 2; - else - pageout_reserve = init_preserve; + pageout_reserve = tune(clockinit.ci_pageout_reserve, throttlefree, + half ? throttlefree / 2 : 3 * throttlefree / 4); /* * Maxpgio thresholds how much paging is acceptable. @@ -358,131 +504,146 @@ setupclock(int recalc) * * XXX - Does not account for multiple swap devices. */ - if (init_mpgio == 0) + if (clockinit.ci_maxpgio == 0) { maxpgio = (DISKRPM * 2) / 3; - else - maxpgio = init_mpgio; + } else { + maxpgio = clockinit.ci_maxpgio; + } /* - * When the system is in a low memory state, the page scan rate varies - * between fastscan and slowscan based on the amount of free memory - * available. When only zones are over their memory cap, the scan rate - * is always fastscan. - * - * The fastscan rate should be set based on the number pages that can - * be scanned per sec using ~10% of a CPU. Since this value depends on - * the processor, MMU, Ghz etc., it must be determined dynamically. - * - * When the scanner first starts up, fastscan will be set to 0 and - * maxfastscan will be set to MAXHANDSPREADPAGES (64MB, in pages). - * However, once the scanner has collected enough samples, then fastscan - * is set to be the smaller of 1/2 of memory (looppages / loopfraction) - * or maxfastscan (which is set from pageout_new_spread). Thus, - * MAXHANDSPREADPAGES is irrelevant after the scanner is fully - * initialized. - * - * pageout_new_spread is calculated when the scanner first starts - * running. During this initial sampling period the nscan_limit - * is set to the total_pages of system memory. Thus, the scanner could - * theoretically scan all of memory in one pass. However, each sample - * is also limited by the %CPU budget. This is controlled by - * pageout_ticks which is set in schedpaging(). During the sampling - * period, pageout_ticks is set to max_pageout_ticks. This tick value - * is derived from the max_percent_cpu (80%) described above. On a - * system with more than a small amount of memory (~8GB), the scanner's - * %CPU will be the limiting factor in calculating pageout_new_spread. + * The clock scan rate varies between fastscan and slowscan + * based on the amount of free memory available. Fastscan + * rate should be set based on the number pages that can be + * scanned per sec using ~10% of processor time. Since this + * value depends on the processor, MMU, Mhz etc., it is + * difficult to determine it in a generic manner for all + * architectures. * - * At the end of the sampling period, the pageout_rate indicates how - * many pages could be scanned per second. The pageout_new_spread is - * then set to be 1/10th of that (i.e. approximating 10% of a CPU). - * Of course, this value could still be more than the physical memory - * on the system. If so, fastscan is set to 1/2 of memory, as - * mentioned above. + * Instead of trying to determine the number of pages scanned + * per sec for every processor, fastscan is set to be the smaller + * of 1/2 of memory or MAXHANDSPREADPAGES and the sampling + * time is limited to ~4% of processor time. * - * All of this leads up to the setting of handspreadpages, which is - * set to fastscan. This is the distance, in pages, between the front - * and back hands during scanning. It will dictate which pages will - * be considered "hot" on the backhand and which pages will be "cold" - * and reclaimed + * Setting fastscan to be 1/2 of memory allows pageout to scan + * all of memory in ~2 secs. This implies that user pages not + * accessed within 1 sec (assuming, handspreadpages == fastscan) + * can be reclaimed when free memory is very low. Stealing pages + * not accessed within 1 sec seems reasonable and ensures that + * active user processes don't thrash. * - * If the scanner is limited by desscan, then at the highest rate it - * will scan up to fastscan/RATETOSCHEDPAGING pages per cycle. If the - * scanner is limited by the %CPU, then at the highest rate (20% of a - * CPU per cycle) the number of pages scanned could be much less. + * Smaller values of fastscan result in scanning fewer pages + * every second and consequently pageout may not be able to free + * sufficient memory to maintain the minimum threshold. Larger + * values of fastscan result in scanning a lot more pages which + * could lead to thrashing and higher CPU usage. * - * Thus, if the scanner is limited by desscan, then the handspreadpages - * setting means 1sec between the front and back hands, but if the - * scanner is limited by %CPU, it could be several seconds between the - * two hands. + * Fastscan needs to be limited to a maximum value and should not + * scale with memory to prevent pageout from consuming too much + * time for scanning on slow CPU's and avoid thrashing, as a + * result of scanning too many pages, on faster CPU's. + * The value of 64 Meg was chosen for MAXHANDSPREADPAGES + * (the upper bound for fastscan) based on the average number + * of pages that can potentially be scanned in ~1 sec (using ~4% + * of the CPU) on some of the following machines that currently + * run Solaris 2.x: * - * The basic assumption is that at the worst case, stealing pages - * not accessed within 1 sec seems reasonable and ensures that active - * user processes don't thrash. This is especially true when the system - * is in a low memory state. + * average memory scanned in ~1 sec * - * There are some additional factors to consider for the case of - * scanning when zones are over their cap. In this situation it is - * also likely that the machine will have a large physical memory which - * will take many seconds to fully scan (due to the %CPU and desscan - * limits per cycle). It is probable that there will be few (or 0) - * pages attributed to these zones in any single scanning cycle. The - * result is that reclaiming enough pages for these zones might take - * several additional seconds (this is generally not a problem since - * the zone physical cap is just a soft cap). + * 25 Mhz SS1+: 23 Meg + * LX: 37 Meg + * 50 Mhz SC2000: 68 Meg * - * This is similar to the typical multi-processor situation in which - * pageout is often unable to maintain the minimum paging thresholds - * under heavy load due to the fact that user processes running on - * other CPU's can be dirtying memory at a much faster pace than - * pageout can find pages to free. + * 40 Mhz 486: 26 Meg + * 66 Mhz 486: 42 Meg * - * One potential approach to address both of these cases is to enable - * more than one CPU to run the page scanner, in such a manner that the - * various clock hands don't overlap. However, this also makes it more - * difficult to determine the values for fastscan, slowscan and - * handspreadpages. This is left as a future enhancement, if necessary. - * - * When free memory falls just below lotsfree, the scan rate goes from - * 0 to slowscan (i.e., the page scanner starts running). This + * When free memory falls just below lotsfree, the scan rate + * goes from 0 to slowscan (i.e., pageout starts running). This * transition needs to be smooth and is achieved by ensuring that * pageout scans a small number of pages to satisfy the transient * memory demand. This is set to not exceed 100 pages/sec (25 per * wakeup) since scanning that many pages has no noticible impact * on system performance. * - * The swapper is currently used to free up memory when pageout is - * unable to meet memory demands. It does this by swapping out entire - * processes. In addition to freeing up memory, swapping also reduces - * the demand for memory because the swapped out processes cannot - * run, and thereby consume memory. However, this is a pathological - * state and performance will generally be considered unacceptable. + * In addition to setting fastscan and slowscan, pageout is + * limited to using ~4% of the CPU. This results in increasing + * the time taken to scan all of memory, which in turn means that + * user processes have a better opportunity of preventing their + * pages from being stolen. This has a positive effect on + * interactive and overall system performance when memory demand + * is high. + * + * Thus, the rate at which pages are scanned for replacement will + * vary linearly between slowscan and the number of pages that + * can be scanned using ~4% of processor time instead of varying + * linearly between slowscan and fastscan. + * + * Also, the processor time used by pageout will vary from ~1% + * at slowscan to ~4% at fastscan instead of varying between + * ~1% at slowscan and ~10% at fastscan. + * + * The values chosen for the various VM parameters (fastscan, + * handspreadpages, etc) are not universally true for all machines, + * but appear to be a good rule of thumb for the machines we've + * tested. They have the following ranges: + * + * cpu speed: 20 to 70 Mhz + * page size: 4K to 8K + * memory size: 16M to 5G + * page scan rate: 4000 - 17400 4K pages per sec + * + * The values need to be re-examined for machines which don't + * fall into the various ranges (e.g., slower or faster CPUs, + * smaller or larger pagesizes etc) shown above. + * + * On an MP machine, pageout is often unable to maintain the + * minimum paging thresholds under heavy load. This is due to + * the fact that user processes running on other CPU's can be + * dirtying memory at a much faster pace than pageout can find + * pages to free. The memory demands could be met by enabling + * more than one CPU to run the clock algorithm in such a manner + * that the various clock hands don't overlap. This also makes + * it more difficult to determine the values for fastscan, slowscan + * and handspreadpages. + * + * The swapper is currently used to free up memory when pageout + * is unable to meet memory demands by swapping out processes. + * In addition to freeing up memory, swapping also reduces the + * demand for memory by preventing user processes from running + * and thereby consuming memory. */ - if (init_mfscan == 0) { - if (pageout_new_spread != 0) + if (clockinit.ci_maxfastscan == 0) { + if (pageout_new_spread != 0) { maxfastscan = pageout_new_spread; - else + } else { maxfastscan = MAXHANDSPREADPAGES; + } } else { - maxfastscan = init_mfscan; + maxfastscan = clockinit.ci_maxfastscan; } - if (init_fscan == 0) { + + if (clockinit.ci_fastscan == 0) { fastscan = MIN(looppages / loopfraction, maxfastscan); } else { - fastscan = init_fscan; - if (fastscan > looppages / loopfraction) - fastscan = looppages / loopfraction; + fastscan = clockinit.ci_fastscan; + } + + if (fastscan > looppages / loopfraction) { + fastscan = looppages / loopfraction; } /* * Set slow scan time to 1/10 the fast scan time, but * not to exceed maxslowscan. */ - if (init_sscan == 0) + if (clockinit.ci_slowscan == 0) { slowscan = MIN(fastscan / 10, maxslowscan); - else - slowscan = init_sscan; - if (slowscan > fastscan / 2) + } else { + slowscan = clockinit.ci_slowscan; + } + + if (slowscan > fastscan / 2) { slowscan = fastscan / 2; + } /* * Handspreadpages is distance (in pages) between front and back @@ -491,10 +652,12 @@ setupclock(int recalc) * decreases as the scan rate rises. It must be < the amount * of pageable memory. * - * Since pageout is limited to the %CPU per cycle, setting - * handspreadpages to be "fastscan" results in the front hand being - * a few secs (varies based on the processor speed) ahead of the back - * hand at fastscan rates. + * Since pageout is limited to ~4% of the CPU, setting handspreadpages + * to be "fastscan" results in the front hand being a few secs + * (varies based on the processor speed) ahead of the back hand + * at fastscan rates. This distance can be further reduced, if + * necessary, by increasing the processor time used by pageout + * to be more than ~4% and preferrably not more than ~10%. * * As a result, user processes have a much better chance of * referencing their pages before the back hand examines them. @@ -502,21 +665,23 @@ setupclock(int recalc) * the freelist since pageout does not end up freeing pages which * may be referenced a sec later. */ - if (init_hspages == 0) + if (clockinit.ci_handspreadpages == 0) { handspreadpages = fastscan; - else - handspreadpages = init_hspages; + } else { + handspreadpages = clockinit.ci_handspreadpages; + } /* * Make sure that back hand follows front hand by at least - * 1/RATETOSCHEDPAGING seconds. Without this test, it is possible - * for the back hand to look at a page during the same wakeup of - * the pageout daemon in which the front hand cleared its ref bit. + * 1/SCHEDPAGING_HZ seconds. Without this test, it is possible for the + * back hand to look at a page during the same wakeup of the pageout + * daemon in which the front hand cleared its ref bit. */ - if (handspreadpages >= looppages) + if (handspreadpages >= looppages) { handspreadpages = looppages - 1; + } - if (recalc == 0) { + if (!recalc) { /* * Setup basic values at initialization. */ @@ -574,20 +739,16 @@ setupclock(int recalc) * Pageout scheduling. * * Schedpaging controls the rate at which the page out daemon runs by - * setting the global variables pageout_ticks and desscan RATETOSCHEDPAGING - * times a second. The pageout_ticks variable controls the percent of one - * CPU that each page scanner thread should consume (see min_percent_cpu - * and max_percent_cpu descriptions). The desscan variable records the number - * of pages pageout should examine in its next pass; schedpaging sets this - * value based on the amount of currently available memory. In addtition, the - * nscan variable records the number of pages pageout has examined in its - * current pass; schedpaging resets this value to zero each time it runs. + * setting the global variables nscan and desscan SCHEDPAGING_HZ + * times a second. Nscan records the number of pages pageout has examined + * in its current pass; schedpaging() resets this value to zero each time + * it runs. Desscan records the number of pages pageout should examine + * in its next pass; schedpaging() sets this value based on the amount of + * currently available memory. */ +#define SCHEDPAGING_HZ 4 -#define RATETOSCHEDPAGING 4 /* times/second */ - -/* held while pageout_scanner or schedpaging are modifying shared data */ -static kmutex_t pageout_mutex; +static kmutex_t pageout_mutex; /* held while pageout or schedpaging running */ /* * Pool of available async pageout putpage requests. @@ -663,7 +824,7 @@ schedpaging(void *arg) * happens, desscan becomes negative and pageout_scanner() * stops paging out. */ - if ((needfree) && (pageout_new_spread == 0)) { + if (needfree > 0 && pageout_new_spread == 0) { /* * If we've not yet collected enough samples to * calculate a spread, kick into high gear anytime @@ -672,7 +833,7 @@ schedpaging(void *arg) * the %CPU will limit the scan. That will also be * maxed out below. */ - desscan = fastscan / RATETOSCHEDPAGING; + desscan = fastscan / SCHEDPAGING_HZ; } else { /* * Once we've calculated a spread based on system @@ -684,7 +845,7 @@ schedpaging(void *arg) slowstmp = slowscan * vavail; faststmp = fastscan * (lotsfree - vavail); result = (slowstmp + faststmp) / - nz(lotsfree) / RATETOSCHEDPAGING; + nz(lotsfree) / SCHEDPAGING_HZ; desscan = (pgcnt_t)result; } @@ -693,11 +854,11 @@ schedpaging(void *arg) * spread, also kick %CPU to the max. */ if (pageout_new_spread == 0) { - pageout_ticks = max_pageout_ticks; + pageout_nsec = max_pageout_nsec; } else { - pageout_ticks = min_pageout_ticks + + pageout_nsec = min_pageout_nsec + (lotsfree - vavail) * - (max_pageout_ticks - min_pageout_ticks) / + (max_pageout_nsec - min_pageout_nsec) / nz(lotsfree); } @@ -756,6 +917,10 @@ schedpaging(void *arg) if (freemem < lotsfree + needfree || PAGE_SCAN_STARTUP) { if (!PAGE_SCAN_STARTUP) low_mem_scan++; + /* + * Either we need more memory, or we still need to + * measure the average scan rate. Wake the scanner. + */ DTRACE_PROBE(schedpage__wake__low); WAKE_PAGEOUT_SCANNER(); @@ -769,10 +934,10 @@ schedpaging(void *arg) * Increase the scanning CPU% to the max. This implies * 80% of one CPU/sec if the scanner can run each * opportunity. Can also be tuned via setting - * zone_pageout_ticks in /etc/system or with mdb. + * zone_pageout_nsec in /etc/system or with mdb. */ - pageout_ticks = (zone_pageout_ticks != 0) ? - zone_pageout_ticks : max_pageout_ticks; + pageout_nsec = (zone_pageout_nsec != 0) ? + zone_pageout_nsec : max_pageout_nsec; zones_over = B_TRUE; zone_cap_scan++; @@ -805,16 +970,18 @@ schedpaging(void *arg) if (kmem_avail() > 0) cv_broadcast(&memavail_cv); - (void) timeout(schedpaging, arg, hz / RATETOSCHEDPAGING); + (void) timeout(schedpaging, arg, hz / SCHEDPAGING_HZ); } pgcnt_t pushes; ulong_t push_list_size; /* # of requests on pageout queue */ -#define FRONT 1 -#define BACK 2 - -int dopageout = 1; /* /etc/system tunable to disable page reclamation */ +/* + * Paging out should always be enabled. This tunable exists to hold pageout + * for debugging purposes. If set to 0, pageout_scanner() will go back to + * sleep each time it is woken by schedpaging(). + */ +uint_t dopageout = 1; /* * The page out daemon, which runs as process 2. @@ -886,14 +1053,15 @@ pageout() kmem_zalloc(async_list_size * sizeof (struct async_reqs), KM_SLEEP); req_freelist = push_req; - for (i = 0; i < async_list_size - 1; i++) + for (i = 0; i < async_list_size - 1; i++) { push_req[i].a_next = &push_req[i + 1]; + } pageout_pri = curthread->t_pri; /* Create the (first) pageout scanner thread. */ - (void) lwp_kernel_create(proc_pageout, pageout_scanner, (void *) 0, - TS_RUN, pageout_pri - 1); + (void) lwp_kernel_create(proc_pageout, pageout_scanner, NULL, TS_RUN, + pageout_pri - 1); /* * kick off pageout scheduler. @@ -911,7 +1079,7 @@ pageout() /* * Limit pushes to avoid saturating pageout devices. */ - max_pushes = maxpgio / RATETOSCHEDPAGING; + max_pushes = maxpgio / SCHEDPAGING_HZ; CALLB_CPR_INIT(&cprinfo, &push_lock, callb_generic_cpr, "pageout"); for (;;) { @@ -954,14 +1122,14 @@ static void pageout_scanner(void *a) { struct page *fronthand, *backhand; - uint_t count, iter = 0; + uint_t laps, iter = 0; callb_cpr_t cprinfo; pgcnt_t nscan_cnt, nscan_limit; pgcnt_t pcount; uint_t inst = (uint_t)(uintptr_t)a; hrtime_t sample_start, sample_end; - clock_t pageout_lbolt; kmutex_t pscan_mutex; + bool sampling; VERIFY3U(inst, <, MAX_PSCAN_THREADS); @@ -970,10 +1138,17 @@ pageout_scanner(void *a) CALLB_CPR_INIT(&cprinfo, &pscan_mutex, callb_generic_cpr, "poscan"); mutex_enter(&pscan_mutex); - min_pageout_ticks = MAX(1, - ((hz * min_percent_cpu) / 100) / RATETOSCHEDPAGING); - max_pageout_ticks = MAX(min_pageout_ticks, - ((hz * max_percent_cpu) / 100) / RATETOSCHEDPAGING); + /* + * Establish the minimum and maximum length of time to be spent + * scanning pages per wakeup, limiting the scanner duty cycle. The + * input percentage values (0-100) must be converted to a fraction of + * the number of nanoseconds in a second of wall time, then further + * scaled down by the number of scanner wakeups in a second: + */ + min_pageout_nsec = MAX(1, + NANOSEC * min_percent_cpu / 100 / SCHEDPAGING_HZ); + max_pageout_nsec = MAX(min_pageout_nsec, + NANOSEC * max_percent_cpu / 100 / SCHEDPAGING_HZ); loop: cv_signal_pageout(); @@ -982,9 +1157,17 @@ loop: cv_wait(&proc_pageout->p_cv, &pscan_mutex); CALLB_CPR_SAFE_END(&cprinfo, &pscan_mutex); - if (!dopageout) + /* + * Check if pageout has been disabled for debugging purposes: + */ + if (!dopageout) { goto loop; + } + /* + * One may reset the clock hands for debugging purposes. Hands will + * also be reset if memory is added to or removed from the system. + */ if (reset_hands[inst]) { struct page *first; pgcnt_t offset = total_pages / n_page_scanners; @@ -1024,29 +1207,38 @@ loop: } } - /* - * This CPU kstat is only incremented here and we're obviously on this - * CPU, so no lock. - */ CPU_STATS_ADDQ(CPU, vm, pgrrun, 1); - count = 0; - /* Kernel probe */ - TNF_PROBE_2(pageout_scan_start, "vm pagedaemon", /* CSTYLED */, - tnf_ulong, pages_free, freemem, tnf_ulong, pages_needed, needfree); + /* + * Keep track of the number of times we have scanned all the way around + * the loop: + */ + laps = 0; + /* + * Track the number of pages visited during this scan so that we can + * periodically measure our duty cycle. + */ pcount = 0; nscan_cnt = 0; + if (PAGE_SCAN_STARTUP) { + /* + * We need to measure the rate at which the system is able to + * scan pages of memory. Each of these initial samples is a + * scan of all system memory, regardless of whether or not we + * are experiencing memory pressure. + */ nscan_limit = total_pages; + sampling = true; } else { nscan_limit = desscan; + sampling = false; } DTRACE_PROBE4(pageout__start, pgcnt_t, nscan_limit, uint_t, inst, page_t *, backhand, page_t *, fronthand); - pageout_lbolt = ddi_get_lbolt(); sample_start = gethrtime(); /* @@ -1056,24 +1248,29 @@ loop: * 2) there is not enough free memory * 3) during page scan startup when determining sample data */ - while (nscan_cnt < nscan_limit && - (zones_over || - freemem < lotsfree + needfree || - PAGE_SCAN_STARTUP)) { - int rvfront, rvback; + while (nscan_cnt < nscan_limit) { + checkpage_result_t rvfront, rvback; + + if (!sampling && !zones_over && + freemem >= lotsfree + needfree) { + /* + * We are not sampling and enough memory has become + * available that scanning is no longer required. + */ + break; + } DTRACE_PROBE2(pageout__loop, pgcnt_t, pcount, uint_t, inst); /* - * Check to see if we have exceeded our %CPU budget - * for this wakeup, but not on every single page visited, - * just every once in a while. + * Periodically check to see if we have exceeded the CPU duty + * cycle for a single wakeup. */ if ((pcount & PAGES_POLL_MASK) == PAGES_POLL_MASK) { - clock_t pageout_cycle_ticks; + hrtime_t pageout_cycle_nsec; - pageout_cycle_ticks = ddi_get_lbolt() - pageout_lbolt; - if (pageout_cycle_ticks >= pageout_ticks) { + pageout_cycle_nsec = gethrtime() - sample_start; + if (pageout_cycle_nsec >= pageout_nsec) { /* * This is where we normally break out of the * loop when scanning zones or sampling. @@ -1088,12 +1285,14 @@ loop: /* * If checkpage manages to add a page to the free list, - * we give ourselves another couple of trips around memory. + * we give ourselves another couple of trips around the loop. */ - if ((rvfront = checkpage(fronthand, FRONT)) == 1) - count = 0; - if ((rvback = checkpage(backhand, BACK)) == 1) - count = 0; + if ((rvfront = checkpage(fronthand, POH_FRONT)) == CKP_FREED) { + laps = 0; + } + if ((rvback = checkpage(backhand, POH_BACK)) == CKP_FREED) { + laps = 0; + } ++pcount; @@ -1106,17 +1305,18 @@ loop: /* * Don't include ineligible pages in the number scanned. */ - if (rvfront != -1 || rvback != -1) + if (rvfront != CKP_INELIGIBLE || rvback != CKP_INELIGIBLE) { nscan_cnt++; + } backhand = page_next(backhand); + fronthand = page_next(fronthand); /* - * backhand update and wraparound check are done separately - * because lint barks when it finds an empty "if" body + * The front hand has wrapped around to the first page in the + * loop. */ - - if ((fronthand = page_next(fronthand)) == page_first()) { + if (fronthand == page_first()) { DTRACE_PROBE1(pageout__wrap__front, uint_t, inst); /* @@ -1139,12 +1339,12 @@ loop: * If scanning only because zones are over their cap, * then wrapping is common and we simply keep going. */ - if (freemem < lotsfree + needfree && ++count > 1) { + if (freemem < lotsfree + needfree && ++laps > 1) { /* * The system is low on memory. * Extremely unlikely, but it happens. - * We went around memory at least once - * and didn't reclaim enough. + * We went around the loop at least once + * and didn't get far enough. * If we are still skipping `highly shared' * pages, skip fewer of them. Otherwise, * give up till the next clock tick. @@ -1154,11 +1354,6 @@ loop: po_share <<= 1; mutex_exit(&pageout_mutex); } else { - /* - * Really a "goto loop", but if someone - * is tracing or TNF_PROBE_ing, hit - * those probes first. - */ mutex_exit(&pageout_mutex); break; } @@ -1173,11 +1368,6 @@ loop: DTRACE_PROBE3(pageout__loop__end, pgcnt_t, nscan_cnt, pgcnt_t, pcount, uint_t, inst); - /* Kernel probe */ - TNF_PROBE_2(pageout_scan_end, "vm pagedaemon", /* CSTYLED */, - tnf_ulong, pages_scanned, nscan_cnt, tnf_ulong, pages_free, - freemem); - /* * The following two blocks are only relevant when the scanner is * first started up. After the scanner runs for a while, neither of @@ -1187,23 +1377,25 @@ loop: * only during initial scanning when there is a single page scanner * thread running. Thus, we don't use any locking. */ - if (PAGE_SCAN_STARTUP) { - VERIFY3U(inst, ==, 0); - pageout_sample_pages += pcount; - pageout_sample_etime += sample_end - sample_start; - ++pageout_sample_cnt; - - } else if (pageout_new_spread == 0) { - uint_t i; - - /* - * We have run enough samples, set the spread. - */ + if (pageout_new_spread == 0) { VERIFY3U(inst, ==, 0); - pageout_rate = (hrrate_t)pageout_sample_pages * - (hrrate_t)(NANOSEC) / pageout_sample_etime; - pageout_new_spread = pageout_rate / 10; - setupclock(1); + if (PAGE_SCAN_STARTUP) { + /* + * Continue accumulating samples until we have enough + * to get a reasonable value for average scan rate: + */ + pageout_sample_pages += pcount; + pageout_sample_etime += sample_end - sample_start; + ++pageout_sample_cnt; + } else { + /* + * We have run enough samples, set the spread. + */ + pageout_rate = (hrrate_t)pageout_sample_pages * + (hrrate_t)(NANOSEC) / pageout_sample_etime; + pageout_new_spread = pageout_rate / 10; + setupclock(); + } } goto loop; @@ -1265,16 +1457,17 @@ reset: * Look at the page at hand. If it is locked (e.g., for physical i/o), * system (u., page table) or free, then leave it alone. Otherwise, * if we are running the front hand, turn off the page's reference bit. - * If running the back hand, check whether the page has been reclaimed. - * If not, free the page, pushing it to disk first if necessary. + * If the proc is over maxrss, we take it. If running the back hand, + * check whether the page has been reclaimed. If not, free the page, + * pushing it to disk first if necessary. * * Return values: - * -1 if the page is not a candidate at all, - * 0 if not freed, or - * 1 if we freed it. + * CKP_INELIGIBLE if the page is not a candidate at all, + * CKP_NOT_FREED if the page was not freed, or + * CKP_FREED if we freed it. */ -static int -checkpage(struct page *pp, int whichhand) +static checkpage_result_t +checkpage(struct page *pp, pageout_hand_t whichhand) { int ppattr; int isfs = 0; @@ -1296,21 +1489,21 @@ checkpage(struct page *pp, int whichhand) if (PP_ISKAS(pp) || PAGE_LOCKED(pp) || PP_ISFREE(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0 || hat_page_checkshare(pp, po_share)) { - return (-1); + return (CKP_INELIGIBLE); } if (!page_trylock(pp, SE_EXCL)) { /* * Skip the page if we can't acquire the "exclusive" lock. */ - return (-1); + return (CKP_INELIGIBLE); } else if (PP_ISFREE(pp)) { /* * It became free between the above check and our actually - * locking the page. Oh, well there will be other pages. + * locking the page. Oh well, there will be other pages. */ page_unlock(pp); - return (-1); + return (CKP_INELIGIBLE); } /* @@ -1320,7 +1513,7 @@ checkpage(struct page *pp, int whichhand) */ if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { page_unlock(pp); - return (-1); + return (CKP_INELIGIBLE); } if (zones_over) { @@ -1333,7 +1526,7 @@ checkpage(struct page *pp, int whichhand) * Leave the page alone. */ page_unlock(pp); - return (-1); + return (CKP_INELIGIBLE); } zid = pp->p_zoneid; } @@ -1355,34 +1548,44 @@ checkpage(struct page *pp, int whichhand) * The back hand examines the REF bit and always considers * SHARED pages as referenced. */ - if (whichhand == FRONT) + if (whichhand == POH_FRONT) { pagesync_flag = HAT_SYNC_ZERORM; - else + } else { pagesync_flag = HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_REF | HAT_SYNC_STOPON_SHARED; + } ppattr = hat_pagesync(pp, pagesync_flag); recheck: /* - * If page is referenced; fronthand makes unreferenced and reclaimable. - * For the backhand, a process referenced the page since the front hand - * went by, so it's not a candidate for freeing up. + * If page is referenced; make unreferenced but reclaimable. + * If this page is not referenced, then it must be reclaimable + * and we can add it to the free list. */ if (ppattr & P_REF) { - DTRACE_PROBE2(pageout__isref, page_t *, pp, int, whichhand); - if (whichhand == FRONT) { + DTRACE_PROBE2(pageout__isref, page_t *, pp, + pageout_hand_t, whichhand); + + if (whichhand == POH_FRONT) { + /* + * Checking of rss or madvise flags needed here... + * + * If not "well-behaved", fall through into the code + * for not referenced. + */ hat_clrref(pp); } + + /* + * Somebody referenced the page since the front + * hand went by, so it's not a candidate for + * freeing up. + */ page_unlock(pp); - return (0); + return (CKP_NOT_FREED); } - /* - * This page is not referenced, so it must be reclaimable and we can - * add it to the free list. This can be done by either hand. - */ - VM_STAT_ADD(pageoutvmstats.checkpage[0]); /* @@ -1393,31 +1596,32 @@ recheck: if (!page_try_demote_pages(pp)) { VM_STAT_ADD(pageoutvmstats.checkpage[1]); page_unlock(pp); - return (-1); + return (CKP_INELIGIBLE); } + ASSERT(pp->p_szc == 0); VM_STAT_ADD(pageoutvmstats.checkpage[2]); + /* - * since page_try_demote_pages() could have unloaded some + * Since page_try_demote_pages() could have unloaded some * mappings it makes sense to reload ppattr. */ ppattr = hat_page_getattr(pp, P_MOD | P_REF); } /* - * If the page is currently dirty, we have to arrange - * to have it cleaned before it can be freed. + * If the page is currently dirty, we have to arrange to have it + * cleaned before it can be freed. * * XXX - ASSERT(pp->p_vnode != NULL); */ - if ((ppattr & P_MOD) && pp->p_vnode) { + if ((ppattr & P_MOD) && pp->p_vnode != NULL) { struct vnode *vp = pp->p_vnode; u_offset_t offset = pp->p_offset; /* - * Note: There is no possibility to test for process being - * swapped out or about to exit since we can't get back to - * process(es) from the page. + * XXX - Test for process being swapped out or about to exit? + * [Can't get back to process(es) using the page.] */ /* @@ -1429,34 +1633,33 @@ recheck: page_unlock(pp); /* - * Queue i/o request for the pageout thread. + * Queue I/O request for the pageout thread. */ if (!queue_io_request(vp, offset)) { VN_RELE(vp); - return (0); + return (CKP_NOT_FREED); } if (isfs) { zone_pageout_stat(zid, ZPO_DIRTY); } else { zone_pageout_stat(zid, ZPO_ANONDIRTY); } - return (1); + return (CKP_FREED); } /* - * Now we unload all the translations, - * and put the page back on to the free list. - * If the page was used (referenced or modified) after - * the pagesync but before it was unloaded we catch it - * and handle the page properly. + * Now we unload all the translations and put the page back on to the + * free list. If the page was used (referenced or modified) after the + * pagesync but before it was unloaded we catch it and handle the page + * properly. */ - DTRACE_PROBE2(pageout__free, page_t *, pp, int, whichhand); + DTRACE_PROBE2(pageout__free, page_t *, pp, pageout_hand_t, whichhand); (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); ppattr = hat_page_getattr(pp, P_MOD | P_REF); - if ((ppattr & P_REF) || ((ppattr & P_MOD) && pp->p_vnode)) + if ((ppattr & P_REF) || ((ppattr & P_MOD) && pp->p_vnode != NULL)) { goto recheck; + } - /*LINTED: constant in conditional context*/ VN_DISPOSE(pp, B_FREE, 0, kcred); CPU_STATS_ADD_K(vm, dfree, 1); @@ -1473,7 +1676,7 @@ recheck: zone_pageout_stat(zid, ZPO_ANON); } - return (1); /* freed a page! */ + return (CKP_FREED); } /* diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 830cacd9b3..3ca82366a0 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -385,6 +385,7 @@ typedef struct _kthread { #define T_VFPARENT 0x4000 /* thread is vfork parent, must call vfwait */ #define T_DONTDTRACE 0x8000 /* disable DTrace probes */ #define T_KFPU 0x10000 /* kernel FPU active */ +#define T_PUSHPAGE 0x20000 /* this thread may be assisting pageout */ /* * Flags in t_proc_flag. diff --git a/usr/src/uts/common/sys/usb/usba/usba_impl.h b/usr/src/uts/common/sys/usb/usba/usba_impl.h index ddb6f7346d..8ad72b4f05 100644 --- a/usr/src/uts/common/sys/usb/usba/usba_impl.h +++ b/usr/src/uts/common/sys/usb/usba/usba_impl.h @@ -327,7 +327,7 @@ void usba_free_binary_object_store(usba_device_t *); #define DPRINT_MASK_REGISTER 0x00000040 #define DPRINT_MASK_DEVDB 0x00000080 #define DPRINT_MASK_WHCDI 0x00000100 -#define DPRINT_MASK_ALL 0xFFFFFFFF +#define DPRINT_MASK_ALL 0xFFFFFFFF typedef struct usba_log_handle_impl { dev_info_t *lh_dip; @@ -445,7 +445,7 @@ typedef struct usb_dev_cap { usb_dev_driver_callback_t usba_dev_driver_cb; } usb_dev_cap_t; -usb_dev_cap_t usb_cap; +extern usb_dev_cap_t usb_cap; _NOTE(SCHEME_PROTECTS_DATA("unique device capture data", usb_cap)) #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/vm.h b/usr/src/uts/common/sys/vm.h index 0f7dfa9fd0..b32a789d36 100644 --- a/usr/src/uts/common/sys/vm.h +++ b/usr/src/uts/common/sys/vm.h @@ -50,7 +50,7 @@ extern "C" { #if defined(_KERNEL) #include <sys/vnode.h> -void setupclock(int); +void setupclock(void); void pageout(void); void cv_signal_pageout(void); int queue_io_request(struct vnode *, u_offset_t); diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h index 2292310bda..daf76f9f51 100644 --- a/usr/src/uts/common/sys/vmsystm.h +++ b/usr/src/uts/common/sys/vmsystm.h @@ -76,13 +76,14 @@ extern pgcnt_t pageout_reserve; /* point at which we deny non-PG_WAIT calls */ extern pgcnt_t pages_before_pager; /* XXX */ /* - * TRUE if the pageout daemon, fsflush daemon or the scheduler. These - * processes can't sleep while trying to free up memory since a deadlock - * will occur if they do sleep. + * TRUE if the pageout daemon, fsflush daemon, or the scheduler. These threads + * can't sleep while trying to free up memory since a deadlock will occur if + * they do sleep. */ #define NOMEMWAIT() (ttoproc(curthread) == proc_pageout || \ ttoproc(curthread) == proc_fsflush || \ - ttoproc(curthread) == proc_sched) + ttoproc(curthread) == proc_sched || \ + (curthread->t_flag & T_PUSHPAGE) != 0) /* insure non-zero */ #define nz(x) ((x) != 0 ? (x) : 1) diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync deleted file mode 100644 index 1b766008a8..0000000000 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ /dev/null @@ -1,37 +0,0 @@ -The bhyve kernel module and its associated userland consumers have been updated -to the latest upstream FreeBSD sources as of: - -commit 8ade7383cafed0f7555cac16ef7f9e956e46eaeb -Author: grehan <grehan@FreeBSD.org> -Date: Mon May 25 06:25:31 2020 +0000 - - Fix pci-passthru MSI issues with OpenBSD guests - - - Return 2 x 16-bit registers in the correct byte order - for a 4-byte read that spans the CMD/STATUS register. - This reversal was hiding the capabilities-list, which prevented - the MSI capability from being found for XHCI passthru. - - - Reorganize MSI/MSI-x config writes so that a 4-byte write at the - capability offset would have the read-only portion skipped. - This prevented MSI interrupts from being enabled. - - Reported and extensively tested by Anatoli (me at anatoli dot ws) - - PR: 245392 - Reported by: Anatoli (me at anatoli dot ws) - Reviewed by: jhb (bhyve) - Approved by: jhb, bz (mentor) - MFC after: 1 week - Differential Revision: https://reviews.freebsd.org/D24951 - -Divergence Notes: -A previous sync skipped commit c8edafdabc27533d9c51eddc2896e772c16d965c which -introduced a generic backend functionality to network devices. Without that in -place, subsequent updates reflect the absence of that subsystem. Integrating -net backends has not been a priority, given the common use of viona on illumos. - -The draft Save/Restore functionality, added in FreeBSD commit -d3e4e512238b072fb9282e024610b981ba679869, has not been synced into illumos bhyve -yet. It is not built by default in FreeBSD, so we're not interested in taking -it until it successfully endures more in-depth testing. diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c index a01b06446d..e2f298ae09 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c +++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c @@ -356,7 +356,6 @@ amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id, cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); - cmd->opcode = AMDVI_INVD_PAGE_OPCODE; cmd->word1 = domain_id; /* @@ -729,7 +728,6 @@ amdvi_print_pci_cap(device_t dev) struct amdvi_softc *softc; uint32_t off, cap; - softc = device_get_softc(dev); off = softc->cap_off; @@ -869,7 +867,6 @@ amdvi_alloc_intr_resources(struct amdvi_softc *softc) return (0); } - static void amdvi_print_dev_cap(struct amdvi_softc *softc) { @@ -1121,7 +1118,6 @@ amdvi_free_ptp(uint64_t *ptp, int level) amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i] & AMDVI_PT_MASK), level - 1); - } free(ptp, M_AMDVI); diff --git a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c index 11925582ef..96241be8f4 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c +++ b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c @@ -105,7 +105,6 @@ ivrs_hdr_iterate_tbl(ivhd_iter_t iter, void *arg) default: printf("AMD-Vi:Not IVHD/IVMD type(%d)", ivrs_hdr->Type); - } ivrs_hdr = (ACPI_IVRS_HEADER *)((uint8_t *)ivrs_hdr + diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 94dce3fa47..62823b3a65 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -475,7 +475,6 @@ vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa, svm_enable_intercept(sc, vcpu, VMCB_CR_INTCPT, mask); } - /* * Intercept everything when tracing guest exceptions otherwise * just intercept machine check exception. diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_support.s b/usr/src/uts/i86pc/io/vmm/amd/svm_support.s index c1537b1544..278dd5c5cb 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_support.s +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_support.s @@ -37,10 +37,6 @@ /* Porting note: This is named 'svm_support.S' upstream. */ -#define VMLOAD .byte 0x0f, 0x01, 0xda -#define VMRUN .byte 0x0f, 0x01, 0xd8 -#define VMSAVE .byte 0x0f, 0x01, 0xdb - /* * Flush scratch registers to avoid lingering guest state being used for @@ -87,7 +83,7 @@ ENTRY_NP(svm_launch) movq %rsi, SVMSTK_RSI(%rsp) movq %rdi, SVMSTK_RDI(%rsp) - /* VMLOAD and VMRUN expect the VMCB physaddr in %rax */ + /* Save the physical address of the VMCB in %rax */ movq %rdi, %rax /* Restore guest state. */ @@ -106,9 +102,9 @@ ENTRY_NP(svm_launch) movq SCTX_RDI(%rsi), %rdi movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */ - VMLOAD - VMRUN - VMSAVE + vmload %rax + vmrun %rax + vmsave %rax /* Grab the svm_regctx pointer */ movq SVMSTK_RSI(%rsp), %rax diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c index 2f715bcc42..02446862ea 100644 --- a/usr/src/uts/i86pc/io/vmm/io/ppt.c +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c @@ -825,31 +825,44 @@ fail: return (B_FALSE); } - -static struct pptdev * -ppt_findf(int fd) +static int +ppt_findf(struct vm *vm, int fd, struct pptdev **pptp) { struct pptdev *ppt = NULL; file_t *fp; vattr_t va; + int err = 0; - if ((fp = getf(fd)) == NULL) { - return (NULL); - } + ASSERT(MUTEX_HELD(&pptdev_mtx)); + + if ((fp = getf(fd)) == NULL) + return (EBADF); va.va_mask = AT_RDEV; if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 || - getmajor(va.va_rdev) != ppt_major) + getmajor(va.va_rdev) != ppt_major) { + err = EBADF; goto fail; + } ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev)); - if (ppt != NULL) - return (ppt); + if (ppt == NULL) { + err = EBADF; + goto fail; + } + + if (ppt->vm != vm) { + err = EBUSY; + goto fail; + } + + *pptp = ppt; + return (0); fail: releasef(fd); - return (NULL); + return (err); } static void @@ -992,16 +1005,11 @@ ppt_assign_device(struct vm *vm, int pptfd) int err = 0; mutex_enter(&pptdev_mtx); - ppt = ppt_findf(pptfd); - if (ppt == NULL) { + /* Passing NULL requires the device to be unowned. */ + err = ppt_findf(NULL, pptfd, &ppt); + if (err != 0) { mutex_exit(&pptdev_mtx); - return (EBADF); - } - - /* Only one VM may own a device at any given time */ - if (ppt->vm != NULL && ppt->vm != vm) { - err = EBUSY; - goto done; + return (err); } if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) { @@ -1091,20 +1099,14 @@ ppt_unassign_device(struct vm *vm, int pptfd) int err = 0; mutex_enter(&pptdev_mtx); - ppt = ppt_findf(pptfd); - if (ppt == NULL) { + err = ppt_findf(vm, pptfd, &ppt); + if (err != 0) { mutex_exit(&pptdev_mtx); - return (EBADF); + return (err); } - /* If this device is not owned by this 'vm' then bail out. */ - if (ppt->vm != vm) { - err = EBUSY; - goto done; - } ppt_do_unassign(ppt); -done: releasef(pptfd); mutex_exit(&pptdev_mtx); return (err); @@ -1135,14 +1137,10 @@ ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len, int err = 0; mutex_enter(&pptdev_mtx); - ppt = ppt_findf(pptfd); - if (ppt == NULL) { + err = ppt_findf(vm, pptfd, &ppt); + if (err != 0) { mutex_exit(&pptdev_mtx); - return (EBADF); - } - if (ppt->vm != vm) { - err = EBUSY; - goto done; + return (err); } /* @@ -1208,13 +1206,14 @@ ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg, return (EINVAL); mutex_enter(&pptdev_mtx); - ppt = ppt_findf(pptfd); - if (ppt == NULL) { + err = ppt_findf(vm, pptfd, &ppt); + if (err != 0) { mutex_exit(&pptdev_mtx); - return (EBADF); + return (err); } - if (ppt->vm != vm) { - /* Make sure we own this device */ + + /* Reject attempts to enable MSI while MSI-X is active. */ + if (ppt->msix.num_msgs != 0 && numvec != 0) { err = EBUSY; goto done; } @@ -1308,13 +1307,14 @@ ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr, int err = 0; mutex_enter(&pptdev_mtx); - ppt = ppt_findf(pptfd); - if (ppt == NULL) { + err = ppt_findf(vm, pptfd, &ppt); + if (err != 0) { mutex_exit(&pptdev_mtx); - return (EBADF); + return (err); } - /* Make sure we own this device */ - if (ppt->vm != vm) { + + /* Reject attempts to enable MSI-X while MSI is active. */ + if (ppt->msi.num_msgs != 0) { err = EBUSY; goto done; } @@ -1410,14 +1410,10 @@ ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit) int err = 0; mutex_enter(&pptdev_mtx); - ppt = ppt_findf(pptfd); - if (ppt == NULL) { + err = ppt_findf(vm, pptfd, &ppt); + if (err != 0) { mutex_exit(&pptdev_mtx); - return (EBADF); - } - if (ppt->vm != vm) { - err = EBUSY; - goto done; + return (err); } if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI, @@ -1429,7 +1425,26 @@ ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit) *msixlimit = -1; } -done: + releasef(pptfd); + mutex_exit(&pptdev_mtx); + return (err); +} + +int +ppt_disable_msix(struct vm *vm, int pptfd) +{ + struct pptdev *ppt; + int err = 0; + + mutex_enter(&pptdev_mtx); + err = ppt_findf(vm, pptfd, &ppt); + if (err != 0) { + mutex_exit(&pptdev_mtx); + return (err); + } + + ppt_teardown_msix(ppt); + releasef(pptfd); mutex_exit(&pptdev_mtx); return (err); diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.h b/usr/src/uts/i86pc/io/vmm/io/ppt.h index 979c0e18ac..72a768c085 100644 --- a/usr/src/uts/i86pc/io/vmm/io/ppt.h +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.h @@ -38,6 +38,7 @@ int ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg, int numvec); int ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); +int ppt_disable_msix(struct vm *vm, int pptfd); int ppt_assigned_devices(struct vm *vm); boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa); int ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit); diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile b/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile index 708818d78e..1b08b06b58 100644 --- a/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.mapfile @@ -41,6 +41,7 @@ SYMBOL_VERSION ILLUMOSprivate { ppt_assigned_devices; ppt_is_mmio; ppt_assign_device; + ppt_disable_msix; ppt_unassign_device; ppt_unassign_all; ppt_map_mmio; diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c index d8cfc1beb6..024aa076f7 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c +++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c @@ -76,7 +76,6 @@ struct vatpit_callout_arg { int channel_num; }; - struct channel { int mode; uint16_t initial; /* initial counter value */ @@ -293,7 +292,6 @@ pit_readback(struct vatpit *vatpit, uint8_t cmd) return (error); } - static int vatpit_update_mode(struct vatpit *vatpit, uint8_t val) { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index df127d4021..7b04aed61a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -490,6 +490,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_RTC_WRITE: case VM_RTC_SETTIME: case VM_RTC_GETTIME: + case VM_PPTDEV_DISABLE_MSIX: #ifndef __FreeBSD__ case VM_DEVMEM_GETOFFSET: #endif @@ -616,6 +617,16 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, pptmsix.vector_control); break; } + case VM_PPTDEV_DISABLE_MSIX: { + struct vm_pptdev pptdev; + + if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) { + error = EFAULT; + break; + } + error = ppt_disable_msix(sc->vmm_vm, pptdev.pptfd); + break; + } case VM_MAP_PPTDEV_MMIO: { struct vm_pptdev_mmio pptmmio; diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index f5d031bfd4..f4a68636b3 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -368,6 +368,7 @@ struct vm_run_state { #define VM_SUSPEND_CPU (VMM_IOC_BASE | 0x1d) #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) +#define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index 03fe983372..d6d44e57af 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -1303,6 +1303,7 @@ fcnname/**/_info: \ WSTUB(ppt, ppt_map_mmio, nomod_einval); WSTUB(ppt, ppt_setup_msi, nomod_einval); WSTUB(ppt, ppt_setup_msix, nomod_einval); + WSTUB(ppt, ppt_disable_msix, nomod_einval); WSTUB(ppt, ppt_assigned_devices, nomod_zero); WSTUB(ppt, ppt_is_mmio, nomod_zero); WSTUB(ppt, ppt_assign_device, nomod_einval); |