diff options
Diffstat (limited to 'usr/src/uts/common/cpr')
-rw-r--r-- | usr/src/uts/common/cpr/cpr_driver.c | 131 | ||||
-rw-r--r-- | usr/src/uts/common/cpr/cpr_dump.c | 51 | ||||
-rw-r--r-- | usr/src/uts/common/cpr/cpr_main.c | 692 | ||||
-rw-r--r-- | usr/src/uts/common/cpr/cpr_misc.c | 32 | ||||
-rw-r--r-- | usr/src/uts/common/cpr/cpr_mod.c | 191 | ||||
-rw-r--r-- | usr/src/uts/common/cpr/cpr_stat.c | 25 | ||||
-rw-r--r-- | usr/src/uts/common/cpr/cpr_uthread.c | 36 |
7 files changed, 953 insertions, 205 deletions
diff --git a/usr/src/uts/common/cpr/cpr_driver.c b/usr/src/uts/common/cpr/cpr_driver.c index a23a9cbf7c..442473c7ca 100644 --- a/usr/src/uts/common/cpr/cpr_driver.c +++ b/usr/src/uts/common/cpr/cpr_driver.c @@ -45,6 +45,18 @@ extern int devi_attach(dev_info_t *, int); static char *devi_string(dev_info_t *, char *); static int cpr_is_real_device(dev_info_t *); +/* + * Xen uses this code to suspend _all_ drivers quickly and easily. + * Suspend and Resume uses it for the same reason, but also has + * to contend with some platform specific code that Xen does not. + * it is also used as a test entry point for developers/testers to + * execute code without going through a complete suspend. So additions + * that have platform implications shall need #if[n]def's. + */ +#ifndef __xpv +extern void i_cpr_save_configuration(dev_info_t *); +extern void i_cpr_restore_configuration(dev_info_t *); +#endif /* * Traverse the dev info tree: @@ -70,22 +82,52 @@ cpr_suspend_devices(dev_info_t *dip) devi_string(dip, buf)); ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0); - if (!i_ddi_devi_attached(dip)) +#ifndef __xpv + i_cpr_save_configuration(dip); +#endif + + + if (!i_ddi_devi_attached(dip)) { error = DDI_FAILURE; - else - error = devi_detach(dip, DDI_SUSPEND); + } else { +#ifndef __xpv + if (cpr_test_point != DEVICE_SUSPEND_TO_RAM || + (cpr_test_point == DEVICE_SUSPEND_TO_RAM && + cpr_device == ddi_driver_major(dip))) { +#endif + error = devi_detach(dip, DDI_SUSPEND); +#ifndef __xpv + } else { + error = DDI_SUCCESS; + } +#endif + } - if (error == DDI_SUCCESS) + if (error == DDI_SUCCESS) { DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; + } + else { CPR_DEBUG(CPR_DEBUG2, "WARNING: Unable to suspend device %s\n", devi_string(dip, buf)); cpr_err(CE_WARN, "Unable to suspend device %s.", - devi_string(dip, buf)); + devi_string(dip, buf)); cpr_err(CE_WARN, "Device is busy or does not " - "support suspend/resume."); - return (ENXIO); + "support suspend/resume."); +#ifndef __xpv + /* + * the device has failed to suspend however, + * if cpr_test_point == FORCE_SUSPEND_TO_RAM + * after putting out the warning message above, + * we carry on as if suspending the device had + * been successful + */ + if (cpr_test_point == FORCE_SUSPEND_TO_RAM) + DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; + else +#endif + return (ENXIO); } } return (0); @@ -124,13 +166,27 @@ cpr_resume_devices(dev_info_t *start, int resume_failed) DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED; /* + * Always attempt to restore device configuration before + * attempting resume + */ +#ifndef __xpv + i_cpr_restore_configuration(dip); +#endif + + /* * There may be background attaches happening on devices * that were not originally suspended by cpr, so resume * only devices that were suspended by cpr. Also, stop * resuming after the first resume failure, but traverse - * the entire tree to clear the suspend flag. + * the entire tree to clear the suspend flag unless the + * FORCE_SUSPEND_TO_RAM test point is set. */ +#ifndef __xpv + if (did_suspend && (!error || + cpr_test_point == FORCE_SUSPEND_TO_RAM)) { +#else if (did_suspend && !error) { +#endif CPR_DEBUG(CPR_DEBUG2, "Resuming device %s\n", devi_string(dip, buf)); /* @@ -146,17 +202,28 @@ cpr_resume_devices(dev_info_t *start, int resume_failed) cpr_err(CE_WARN, "Skipping %s, device " "not ready for resume", devi_string(dip, buf)); - } else if (devi_attach(dip, DDI_RESUME) != - DDI_SUCCESS) { - CPR_DEBUG(CPR_DEBUG2, - "WARNING: Unable to resume device %s\n", - devi_string(dip, buf)); - cpr_err(CE_WARN, "Unable to resume device %s", - devi_string(dip, buf)); - error = ENXIO; +#ifndef __xpv + } else if (cpr_test_point != DEVICE_SUSPEND_TO_RAM || + (cpr_test_point == DEVICE_SUSPEND_TO_RAM && + cpr_device == ddi_driver_major(dip))) { +#else + } else { +#endif + if (devi_attach(dip, DDI_RESUME) != + DDI_SUCCESS) { + error = ENXIO; + } } } + if (error == ENXIO) { + CPR_DEBUG(CPR_DEBUG2, + "WARNING: Unable to resume device %s\n", + devi_string(dip, buf)); + cpr_err(CE_WARN, "Unable to resume device %s", + devi_string(dip, buf)); + } + error = cpr_resume_devices(ddi_get_child(dip), error); last = dip; } @@ -176,10 +243,8 @@ devi_string(dev_info_t *devi, char *buf) name = ddi_node_name(devi); address = ddi_get_name_addr(devi); - size = (name == NULL) ? - strlen("<null name>") : strlen(name); - size += (address == NULL) ? - strlen("<null>") : strlen(address); + size = (name == NULL) ? strlen("<null name>") : strlen(name); + size += (address == NULL) ? strlen("<null>") : strlen(address); /* * Make sure that we don't over-run the buffer. @@ -237,29 +302,3 @@ cpr_is_real_device(dev_info_t *dip) return (1); } } - -/* - * Power down the system. - */ -void -cpr_power_down(void) -{ -#if defined(__sparc) - /* - * XXX This platform firmware implementation dependency - * doesn't belong in common code! - */ - int is_defined = 0; - char *wordexists = "p\" power-off\" find nip swap l! "; - char *req = "power-off"; - - /* - * is_defined has value -1 when defined - */ - prom_interpret(wordexists, (uintptr_t)&is_defined, 0, 0, 0, 0); - if (is_defined) { - CPR_DEBUG(CPR_DEBUG1, "\ncpr: %s...\n", req); - prom_interpret(req, 0, 0, 0, 0, 0); - } -#endif -} diff --git a/usr/src/uts/common/cpr/cpr_dump.c b/usr/src/uts/common/cpr/cpr_dump.c index 99f5cea43d..28fee49bf9 100644 --- a/usr/src/uts/common/cpr/cpr_dump.c +++ b/usr/src/uts/common/cpr/cpr_dump.c @@ -54,17 +54,20 @@ #include <sys/ddi.h> #include <sys/panic.h> #include <sys/thread.h> +#include <sys/note.h> /* Local defines and variables */ #define BTOb(bytes) ((bytes) << 3) /* Bytes to bits, log2(NBBY) */ #define bTOB(bits) ((bits) >> 3) /* bits to Bytes, log2(NBBY) */ +#if defined(__sparc) static uint_t cpr_pages_tobe_dumped; static uint_t cpr_regular_pgs_dumped; - static int cpr_dump_regular_pages(vnode_t *); static int cpr_count_upages(int, bitfunc_t); static int cpr_compress_and_write(vnode_t *, uint_t, pfn_t, pgcnt_t); +#endif + int cpr_flush_write(vnode_t *); int cpr_contig_pages(vnode_t *, int); @@ -75,6 +78,8 @@ extern size_t cpr_get_devsize(dev_t); extern int i_cpr_dump_setup(vnode_t *); extern int i_cpr_blockzero(char *, char **, int *, vnode_t *); extern int cpr_test_mode; +int cpr_setbit(pfn_t, int); +int cpr_clrbit(pfn_t, int); ctrm_t cpr_term; @@ -87,13 +92,16 @@ int cpr_nbitmaps; char *cpr_pagedata; /* page buffer for compression / tmp copy */ size_t cpr_pagedata_size; /* page buffer size in bytes */ +#if defined(__sparc) static char *cpr_wptr; /* keep track of where to write to next */ static int cpr_file_bn; /* cpr state-file block offset */ static int cpr_disk_writes_ok; static size_t cpr_dev_space = 0; +#endif char cpr_pagecopy[CPR_MAXCONTIG * MMU_PAGESIZE]; +#if defined(__sparc) /* * On some platforms bcopy may modify the thread structure * during bcopy (eg, to prevent cpu migration). If the @@ -194,6 +202,7 @@ cpr_write_header(vnode_t *vp) struct cpr_dump_desc cdump; pgcnt_t bitmap_pages; pgcnt_t kpages, vpages, upages; + pgcnt_t cpr_count_kpages(int mapflag, bitfunc_t bitfunc); cdump.cdd_magic = (uint_t)CPR_DUMP_MAGIC; cdump.cdd_version = CPR_VERSION; @@ -237,19 +246,20 @@ cpr_write_header(vnode_t *vp) * Roundup will be done in the file allocation code. */ STAT->cs_nocomp_statefsz = sizeof (cdd_t) + sizeof (cmd_t) + - (sizeof (cbd_t) * cdump.cdd_bitmaprec) + - (sizeof (cpd_t) * cdump.cdd_dumppgsize) + - mmu_ptob(cdump.cdd_dumppgsize + bitmap_pages); + (sizeof (cbd_t) * cdump.cdd_bitmaprec) + + (sizeof (cpd_t) * cdump.cdd_dumppgsize) + + mmu_ptob(cdump.cdd_dumppgsize + bitmap_pages); /* * If the estimated statefile is not big enough, * go retry now to save un-necessary operations. */ if (!(CPR->c_flags & C_COMPRESSING) && - (STAT->cs_nocomp_statefsz > STAT->cs_est_statefsz)) { + (STAT->cs_nocomp_statefsz > STAT->cs_est_statefsz)) { if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7)) - prom_printf("cpr_write_header: STAT->cs_nocomp_statefsz > " - "STAT->cs_est_statefsz\n"); + prom_printf("cpr_write_header: " + "STAT->cs_nocomp_statefsz > " + "STAT->cs_est_statefsz\n"); return (ENOSPC); } @@ -272,10 +282,10 @@ cpr_write_terminator(vnode_t *vp) /* count the last one (flush) */ cpr_term.real_statef_size = STAT->cs_real_statefsz + - btod(cpr_wptr - cpr_buf) * DEV_BSIZE; + btod(cpr_wptr - cpr_buf) * DEV_BSIZE; CPR_DEBUG(CPR_DEBUG9, "cpr_dump: Real Statefile Size: %ld\n", - STAT->cs_real_statefsz); + STAT->cs_real_statefsz); cpr_tod_get(&cpr_term.tm_shutdown); @@ -382,6 +392,7 @@ cpr_write_statefile(vnode_t *vp) return (error); } +#endif /* @@ -393,9 +404,13 @@ cpr_write_statefile(vnode_t *vp) * - writes the remaining user pages * - writes the kernel pages */ +#if defined(__x86) + _NOTE(ARGSUSED(0)) +#endif int cpr_dump(vnode_t *vp) { +#if defined(__sparc) int error; if (cpr_buf == NULL) { @@ -484,11 +499,13 @@ cpr_dump(vnode_t *vp) if (error = i_cpr_blockzero(cpr_buf, &cpr_wptr, &cpr_file_bn, vp)) return (error); +#endif return (0); } +#if defined(__sparc) /* * cpr_xwalk() is called many 100x with a range within kvseg or kvseg_reloc; * a page-count from each range is accumulated at arg->pages. @@ -633,7 +650,8 @@ cpr_sparse_seg_check(struct seg *seg) for (; ste->st_seg; ste++) { tseg = (ste->st_addrtype == KSEG_PTR_ADDR) ? - *ste->st_seg : (struct seg *)ste->st_seg; + *ste->st_seg : (struct seg *)ste->st_seg; + if (seg == tseg) return (ste); } @@ -690,7 +708,8 @@ cpr_count_kpages(int mapflag, bitfunc_t bitfunc) CPR_DEBUG(CPR_DEBUG9, "cpr_count_kpages: kas_cnt=%ld\n", kas_cnt); CPR_DEBUG(CPR_DEBUG7, "\ncpr_count_kpages: %ld pages, 0x%lx bytes\n", - kas_cnt, mmu_ptob(kas_cnt)); + kas_cnt, mmu_ptob(kas_cnt)); + return (kas_cnt); } @@ -796,7 +815,7 @@ cpr_count_upages(int mapflag, bitfunc_t bitfunc) extern struct vnode prom_ppages; if (pp->p_vnode == NULL || PP_ISKAS(pp) || pp->p_vnode == &prom_ppages || - PP_ISFREE(pp) && PP_ISAGED(pp)) + PP_ISFREE(pp) && PP_ISAGED(pp)) #else if (pp->p_vnode == NULL || PP_ISKAS(pp) || PP_ISFREE(pp) && PP_ISAGED(pp)) @@ -813,9 +832,10 @@ cpr_count_upages(int mapflag, bitfunc_t bitfunc) STAT->cs_upage2statef = dcnt; CPR_DEBUG(CPR_DEBUG9, "cpr_count_upages: dirty=%ld total=%ld\n", - dcnt, tcnt); + dcnt, tcnt); CPR_DEBUG(CPR_DEBUG7, "cpr_count_upages: %ld pages, 0x%lx bytes\n", - dcnt, mmu_ptob(dcnt)); + dcnt, mmu_ptob(dcnt)); + return (dcnt); } @@ -907,7 +927,7 @@ cpr_compress_and_write(vnode_t *vp, uint_t va, pfn_t pfn, pgcnt_t npg) i_cpr_mapin(CPR->c_mapping_area, npg, pfn); CPR_DEBUG(CPR_DEBUG3, "mapped-in %ld pages, vaddr 0x%p, pfn 0x%lx\n", - npg, CPR->c_mapping_area, pfn); + npg, CPR->c_mapping_area, pfn); /* * Fill cpr page descriptor. @@ -1181,3 +1201,4 @@ cpr_dump_regular_pages(vnode_t *vp) CPR_DEBUG(CPR_DEBUG7, "cpr_dump_regular_pages() done.\n"); return (error); } +#endif diff --git a/usr/src/uts/common/cpr/cpr_main.c b/usr/src/uts/common/cpr/cpr_main.c index 6669469681..65e911cb11 100644 --- a/usr/src/uts/common/cpr/cpr_main.c +++ b/usr/src/uts/common/cpr/cpr_main.c @@ -25,7 +25,6 @@ #pragma ident "%Z%%M% %I% %E% SMI" - /* * This module contains the guts of checkpoint-resume mechanism. * All code in this module is platform independent. @@ -51,6 +50,10 @@ #include <sys/reboot.h> #include <sys/kdi.h> #include <sys/promif.h> +#include <sys/srn.h> +#include <sys/cpr_impl.h> + +#define PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm) extern struct cpr_terminator cpr_term; @@ -63,18 +66,47 @@ extern void cpr_set_bitmap_size(void); extern void cpr_stat_init(); extern void cpr_statef_close(void); extern void flush_windows(void); +extern void (*srn_signal)(int, int); +extern void init_cpu_syscall(struct cpu *); +extern void i_cpr_pre_resume_cpus(); +extern void i_cpr_post_resume_cpus(); extern int pm_powering_down; - -static int cpr_suspend(void); -static int cpr_resume(void); -static void cpr_suspend_init(void); +extern kmutex_t srn_clone_lock; +extern int srn_inuse; + +static int cpr_suspend(int); +static int cpr_resume(int); +static void cpr_suspend_init(int); +#if defined(__x86) +static int cpr_suspend_cpus(void); +static void cpr_resume_cpus(void); +#endif +static int cpr_all_online(void); +static void cpr_restore_offline(void); cpr_time_t wholecycle_tv; int cpr_suspend_succeeded; pfn_t curthreadpfn; int curthreadremapped; +extern cpuset_t cpu_ready_set; +extern void *(*cpu_pause_func)(void *); + +extern processorid_t i_cpr_bootcpuid(void); +extern cpu_t *i_cpr_bootcpu(void); +extern void tsc_adjust_delta(hrtime_t tdelta); +extern void tsc_resume(void); +extern int tsc_resume_in_cyclic; + +/* + * Set this variable to 1, to have device drivers resume in an + * uniprocessor environment. This is to allow drivers that assume + * that they resume on a UP machine to continue to work. Should be + * deprecated once the broken drivers are fixed + */ +int cpr_resume_uniproc = 0; + /* * save or restore abort_enable; this prevents a drop * to kadb or prom during cpr_resume_devices() when @@ -101,23 +133,73 @@ cpr_sae(int stash) * returned back to here and it then calls the resume routine. */ int -cpr_main(void) +cpr_main(int sleeptype) { - label_t saveq = ttolwp(curthread)->lwp_qsav; - int rc; + int rc, rc2; + label_t saveq; + klwp_t *tlwp = ttolwp(curthread); - if (rc = cpr_default_setup(1)) - return (rc); + if (sleeptype == CPR_TODISK) { + if ((rc = cpr_default_setup(1)) != 0) + return (rc); + ASSERT(tlwp); + saveq = tlwp->lwp_qsav; + } + + if (sleeptype == CPR_TORAM) { + rc = cpr_suspend(sleeptype); + PMD(PMD_SX, ("cpr_suspend rets %x\n", rc)) + if (rc == 0) { + int i_cpr_power_down(int sleeptype); + + /* + * From this point on, we should be at a high + * spl, interrupts disabled, and all but one + * cpu's paused (effectively UP/single threaded). + * So this is were we want to put ASSERTS() + * to let us know otherwise. + */ + ASSERT(cpus_paused()); + /* + * Now do the work of actually putting this + * machine to sleep! + */ + rc = i_cpr_power_down(sleeptype); + if (rc == 0) { + PMD(PMD_SX, ("back from succssful suspend\n")) + } + /* + * We do care about the return value from cpr_resume + * at this point, as it will tell us if one of the + * resume functions failed (cpr_resume_devices()) + * However, for this to return and _not_ panic, means + * that we must be in one of the test functions. So + * check for that and return an appropriate message. + */ + rc2 = cpr_resume(sleeptype); + if (rc2 != 0) { + ASSERT(cpr_test_point > 0); + cmn_err(CE_NOTE, + "cpr_resume returned non-zero: %d\n", rc2); + PMD(PMD_SX, ("cpr_resume rets %x\n", rc2)) + } + ASSERT(!cpus_paused()); + } else { + PMD(PMD_SX, ("failed suspend, resuming\n")) + rc = cpr_resume(sleeptype); + } + return (rc); + } /* - * Remember where we are for resume + * Remember where we are for resume after reboot */ - if (!setjmp(&ttolwp(curthread)->lwp_qsav)) { + if (!setjmp(&tlwp->lwp_qsav)) { /* * try to checkpoint the system, if failed return back * to userland, otherwise power off. */ - rc = cpr_suspend(); + rc = cpr_suspend(sleeptype); if (rc || cpr_reusable_mode) { /* * We don't really want to go down, or @@ -125,22 +207,28 @@ cpr_main(void) * to put the system back to an operable state then * return back to userland. */ - (void) cpr_resume(); + PMD(PMD_SX, ("failed suspend, resuming\n")) + (void) cpr_resume(sleeptype); + PMD(PMD_SX, ("back from failed suspend resume\n")) } } else { /* * This is the resumed side of longjmp, restore the previous * longjmp pointer if there is one so this will be transparent * to the world. + * This path is only for CPR_TODISK, where we reboot */ - ttolwp(curthread)->lwp_qsav = saveq; + ASSERT(sleeptype == CPR_TODISK); + tlwp->lwp_qsav = saveq; CPR->c_flags &= ~C_SUSPENDING; CPR->c_flags |= C_RESUMING; /* * resume the system back to the original state */ - rc = cpr_resume(); + rc = cpr_resume(sleeptype); + PMD(PMD_SX, ("back from successful suspend; resume rets %x\n", + rc)) } (void) cpr_default_setup(0); @@ -149,6 +237,8 @@ cpr_main(void) } +#if defined(__sparc) + /* * check/disable or re-enable UFS logging */ @@ -180,8 +270,7 @@ cpr_log_status(int enable, int *svstat, vnode_t *vp) *svstat = status; if (cpr_debug & CPR_DEBUG5) { mntpt = vfs_getmntpoint(vp->v_vfsp); - CPR_DEBUG(CPR_DEBUG5, - "%s: \"%s\", logging status = %d\n", + errp("%s: \"%s\", logging status = %d\n", str, refstr_value(mntpt), status); refstr_rele(mntpt); }; @@ -207,11 +296,10 @@ cpr_log_status(int enable, int *svstat, vnode_t *vp) } else { if (cpr_debug & CPR_DEBUG5) { mntpt = vfs_getmntpoint(vp->v_vfsp); - CPR_DEBUG(CPR_DEBUG5, - "%s: \"%s\", logging is now %sd\n", + errp("%s: \"%s\", logging is now %sd\n", str, refstr_value(mntpt), able); refstr_rele(mntpt); - } + }; } } @@ -223,7 +311,6 @@ cpr_log_status(int enable, int *svstat, vnode_t *vp) *svstat = -1; } - /* * enable/disable UFS logging on filesystems containing cpr_default_path * and cpr statefile. since the statefile can be on any fs, that fs @@ -234,6 +321,7 @@ cpr_log_status(int enable, int *svstat, vnode_t *vp) * file outside of rootfs would cause errors during cprboot, plus cpr and * fsck problems with the new fs if logging were enabled. */ + static int cpr_ufs_logging(int enable) { @@ -274,6 +362,7 @@ cpr_ufs_logging(int enable) return (0); } +#endif /* @@ -288,6 +377,54 @@ cpr_lock_mgr(void (*service)(void)) (*service)(); } +int +cpr_suspend_cpus(void) +{ + cpu_t *bootcpu; + int ret = 0; + extern void *i_cpr_save_context(void *arg); + + mutex_enter(&cpu_lock); + + /* + * if bootcpu is offline bring it back online + */ + bootcpu = i_cpr_bootcpu(); + + /* + * the machine could not have booted without a bootcpu + */ + ASSERT(bootcpu != NULL); + + /* + * bring all the offline cpus online + */ + if ((ret = cpr_all_online())) { + mutex_exit(&cpu_lock); + return (ret); + } + + /* + * Set the affinity to be the boot processor + * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus() + */ + affinity_set(i_cpr_bootcpuid()); + + ASSERT(CPU->cpu_id == 0); + + PMD(PMD_SX, ("curthread running on bootcpu\n")) + + /* + * pause all other running CPUs and save the CPU state at the sametime + */ + cpu_pause_func = i_cpr_save_context; + pause_cpus(NULL); + + mutex_exit(&cpu_lock); + + return (0); +} + /* * Take the system down to a checkpointable state and write * the state file, the following are sequentially executed: @@ -301,41 +438,69 @@ cpr_lock_mgr(void (*service)(void)) * - suspend all devices * - block intrpts * - dump system state and memory to state file + * - SPARC code will not be called with CPR_TORAM, caller filters */ static int -cpr_suspend(void) +cpr_suspend(int sleeptype) { - int sf_realloc, rc, skt_rc, nverr; +#if defined(__sparc) + int sf_realloc, nverr; +#endif + int rc = 0; + int skt_rc = 0; + PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype)) cpr_set_substate(C_ST_SUSPEND_BEGIN); - cpr_suspend_init(); + cpr_suspend_init(sleeptype); cpr_save_time(); cpr_tod_get(&wholecycle_tv); CPR_STAT_EVENT_START("Suspend Total"); + i_cpr_alloc_cpus(); + +#if defined(__sparc) + ASSERT(sleeptype == CPR_TODISK); if (!cpr_reusable_mode) { /* - * We need to validate default file before fs functionality - * is disabled. + * We need to validate default file before fs + * functionality is disabled. */ if (rc = cpr_validate_definfo(0)) return (rc); } - i_cpr_save_machdep_info(); +#endif + PMD(PMD_SX, ("cpr_suspend: stop scans\n")) /* Stop PM scans ASAP */ (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT); pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND, NULL, NULL, PM_DEP_WAIT, NULL, 0); +#if defined(__sparc) + ASSERT(sleeptype == CPR_TODISK); cpr_set_substate(C_ST_MP_OFFLINE); if (rc = cpr_mp_offline()) return (rc); +#endif + /* + * Ask Xorg to suspend the frame buffer, and wait for it to happen + */ + mutex_enter(&srn_clone_lock); + if (srn_signal) { + PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., " + "SRN_SUSPEND_REQ)\n")) + srn_inuse = 1; /* because *(srn_signal) cv_waits */ + (*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ); + srn_inuse = 0; + } else { + PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n")) + } + mutex_exit(&srn_clone_lock); /* * Ask the user threads to stop by themselves, but @@ -346,11 +511,13 @@ cpr_suspend(void) CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads..."); CPR_STAT_EVENT_START(" stop users"); cpr_set_substate(C_ST_STOP_USER_THREADS); + PMD(PMD_SX, ("cpr_suspend: stop user threads\n")) if (rc = cpr_stop_user_threads()) return (rc); CPR_STAT_EVENT_END(" stop users"); CPR_DEBUG(CPR_DEBUG1, "done\n"); + PMD(PMD_SX, ("cpr_suspend: save direct levels\n")) pm_save_direct_levels(); /* @@ -360,10 +527,14 @@ cpr_suspend(void) */ (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT); + PMD(PMD_SX, ("cpr_suspend: send notice\n")) +#ifndef DEBUG cpr_send_notice(); if (cpr_debug) prom_printf("\n"); +#endif + PMD(PMD_SX, ("cpr_suspend: POST USER callback\n")) (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT); /* @@ -373,9 +544,12 @@ cpr_suspend(void) * a kernel thread. */ cpr_set_substate(C_ST_PM_REATTACH_NOINVOL); + PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n")) if (!pm_reattach_noinvol()) return (ENXIO); +#if defined(__sparc) + ASSERT(sleeptype == CPR_TODISK); /* * if ufs logging is enabled, we need to disable before * stopping kernel threads so that ufs delete and roll @@ -398,8 +572,8 @@ cpr_suspend(void) alloc_statefile: /* - * If our last state was C_ST_DUMP_NOSPC, we're trying to realloc - * the statefile, otherwise this is the first attempt. + * If our last state was C_ST_DUMP_NOSPC, we're trying to + * realloc the statefile, otherwise this is the first attempt. */ sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0; @@ -407,7 +581,7 @@ alloc_statefile: cpr_set_substate(C_ST_STATEF_ALLOC); if (rc = cpr_alloc_statefile(sf_realloc)) { if (sf_realloc) - prom_printf("realloc failed\n"); + errp("realloc failed\n"); return (rc); } CPR_STAT_EVENT_END(" alloc statefile"); @@ -415,9 +589,10 @@ alloc_statefile: /* * Sync the filesystem to preserve its integrity. * - * This sync is also used to flush out all B_DELWRI buffers (fs cache) - * which are mapped and neither dirty nor referenced before - * cpr_invalidate_pages destroys them. fsflush does similar thing. + * This sync is also used to flush out all B_DELWRI buffers + * (fs cache) which are mapped and neither dirty nor referenced + * before cpr_invalidate_pages destroys them. + * fsflush does similar thing. */ sync(); @@ -425,16 +600,18 @@ alloc_statefile: * destroy all clean file mapped kernel pages */ CPR_STAT_EVENT_START(" clean pages"); - CPR_DEBUG(CPR_DEBUG1, "cleaning up mapped pages..."); + CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages...")); (void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT); - CPR_DEBUG(CPR_DEBUG1, "done\n"); + CPR_DEBUG(CPR_DEBUG1, ("done\n")); CPR_STAT_EVENT_END(" clean pages"); +#endif /* * Hooks needed by lock manager prior to suspending. * Refer to code for more comments. */ + PMD(PMD_SX, ("cpr_suspend: lock mgr\n")) cpr_lock_mgr(lm_cprsuspend); /* @@ -444,6 +621,7 @@ alloc_statefile: CPR_DEBUG(CPR_DEBUG1, "suspending drivers..."); cpr_set_substate(C_ST_SUSPEND_DEVICES); pm_powering_down = 1; + PMD(PMD_SX, ("cpr_suspend: suspending devices\n")) rc = cpr_suspend_devices(ddi_root_node()); pm_powering_down = 0; if (rc) @@ -455,36 +633,61 @@ alloc_statefile: * Stop all daemon activities */ cpr_set_substate(C_ST_STOP_KERNEL_THREADS); + PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n")) if (skt_rc = cpr_stop_kernel_threads()) return (skt_rc); + PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n")) (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT); + PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n")) pm_reattach_noinvol_fini(); cpr_sae(1); + PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n")) (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT); - /* - * It's safer to do tod_get before we disable all intr. - */ - CPR_STAT_EVENT_START(" write statefile"); + if (sleeptype == CPR_TODISK) { + /* + * It's safer to do tod_get before we disable all intr. + */ + CPR_STAT_EVENT_START(" write statefile"); + } /* * it's time to ignore the outside world, stop the real time * clock and disable any further intrpt activity. */ + PMD(PMD_SX, ("cpr_suspend: handle xc\n")) i_cpr_handle_xc(1); /* turn it on to disable xc assertion */ mutex_enter(&cpu_lock); + PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n")) cyclic_suspend(); mutex_exit(&cpu_lock); - mon_clock_stop(); - mon_clock_unshare(); - mon_clock_start(); + /* + * Due to the different methods of resuming the system between + * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image) + * and CPR_TORAM (restart via reset into existing kernel image) + * cpus are not suspended and restored in the SPARC case, since it + * is necessary to restart the cpus and pause them before restoring + * the OBP image + */ + +#if defined(__x86) + + /* pause aux cpus */ + PMD(PMD_SX, ("pause aux cpus\n")) + + cpr_set_substate(C_ST_MP_PAUSED); + if ((rc = cpr_suspend_cpus()) != 0) + return (rc); +#endif + + PMD(PMD_SX, ("cpr_suspend: stop intr\n")) i_cpr_stop_intr(); CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n"); @@ -494,16 +697,28 @@ alloc_statefile: * it must be up now. */ ASSERT(pm_cfb_is_up()); + PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n")) prom_suspend_prepost(); +#if defined(__sparc) /* * getting ready to write ourself out, flush the register * windows to make sure that our stack is good when we * come back on the resume side. */ flush_windows(); +#endif /* + * For S3, we're done + */ + if (sleeptype == CPR_TORAM) { + PMD(PMD_SX, ("cpr_suspend rets %x\n", rc)) + cpr_set_substate(C_ST_NODUMP); + return (rc); + } +#if defined(__sparc) + /* * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!! * * The system is quiesced at this point, we are ready to either dump @@ -535,7 +750,7 @@ alloc_statefile: if (rc == ENOSPC) { cpr_set_substate(C_ST_DUMP_NOSPC); - (void) cpr_resume(); + (void) cpr_resume(sleeptype); goto alloc_statefile; } else if (rc == 0) { if (cpr_reusable_mode) { @@ -544,9 +759,97 @@ alloc_statefile: } else rc = cpr_set_properties(1); } +#endif + PMD(PMD_SX, ("cpr_suspend: return %d\n", rc)) return (rc); } +void +cpr_resume_cpus(void) +{ + /* + * this is a cut down version of start_other_cpus() + * just do the initialization to wake the other cpus + */ + +#if defined(__x86) + /* + * Initialize our syscall handlers + */ + init_cpu_syscall(CPU); + +#endif + + i_cpr_pre_resume_cpus(); + + /* + * Restart the paused cpus + */ + mutex_enter(&cpu_lock); + start_cpus(); + mutex_exit(&cpu_lock); + + /* + * clear the affinity set in cpr_suspend_cpus() + */ + affinity_clear(); + + i_cpr_post_resume_cpus(); + + mutex_enter(&cpu_lock); + /* + * Restore this cpu to use the regular cpu_pause(), so that + * online and offline will work correctly + */ + cpu_pause_func = NULL; + + /* + * offline all the cpus that were brought online during suspend + */ + cpr_restore_offline(); + + /* + * clear the affinity set in cpr_suspend_cpus() + */ + affinity_clear(); + + mutex_exit(&cpu_lock); +} + +void +cpr_unpause_cpus(void) +{ + /* + * Now restore the system back to what it was before we suspended + */ + + PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n")) + + mutex_enter(&cpu_lock); + + /* + * Restore this cpu to use the regular cpu_pause(), so that + * online and offline will work correctly + */ + cpu_pause_func = NULL; + + /* + * Restart the paused cpus + */ + start_cpus(); + + /* + * offline all the cpus that were brought online during suspend + */ + cpr_restore_offline(); + + /* + * clear the affinity set in cpr_suspend_cpus() + */ + affinity_clear(); + + mutex_exit(&cpu_lock); +} /* * Bring the system back up from a checkpoint, at this point @@ -559,7 +862,7 @@ alloc_statefile: * - put all threads back on run queue */ static int -cpr_resume(void) +cpr_resume(int sleeptype) { cpr_time_t pwron_tv, *ctp; char *str; @@ -570,6 +873,7 @@ cpr_resume(void) * that was suspended to a different level. */ CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n"); + PMD(PMD_SX, ("cpr_resume %x\n", sleeptype)) /* * Note: @@ -584,12 +888,14 @@ cpr_resume(void) * and the one that caused the failure, if necessary." */ switch (CPR->c_substate) { +#if defined(__sparc) case C_ST_DUMP: /* * This is most likely a full-fledged cpr_resume after * a complete and successful cpr suspend. Just roll back * everything. */ + ASSERT(sleeptype == CPR_TODISK); break; case C_ST_REUSABLE: @@ -605,46 +911,60 @@ cpr_resume(void) * is possible that a need for roll back of a state * change arises between these exit points. */ + ASSERT(sleeptype == CPR_TODISK); goto rb_dump; +#endif + + case C_ST_NODUMP: + PMD(PMD_SX, ("cpr_resume: NODUMP\n")) + goto rb_nodump; case C_ST_STOP_KERNEL_THREADS: + PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n")) goto rb_stop_kernel_threads; case C_ST_SUSPEND_DEVICES: + PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n")) goto rb_suspend_devices; +#if defined(__sparc) case C_ST_STATEF_ALLOC: + ASSERT(sleeptype == CPR_TODISK); goto rb_statef_alloc; case C_ST_DISABLE_UFS_LOGGING: + ASSERT(sleeptype == CPR_TODISK); goto rb_disable_ufs_logging; +#endif case C_ST_PM_REATTACH_NOINVOL: + PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n")) goto rb_pm_reattach_noinvol; case C_ST_STOP_USER_THREADS: + PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n")) goto rb_stop_user_threads; +#if defined(__sparc) case C_ST_MP_OFFLINE: + PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n")) goto rb_mp_offline; +#endif + +#if defined(__x86) + case C_ST_MP_PAUSED: + PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n")) + goto rb_mp_paused; +#endif + default: + PMD(PMD_SX, ("cpr_resume: others\n")) goto rb_others; } rb_all: /* - * setup debugger trapping. - */ - if (cpr_suspend_succeeded) - i_cpr_set_tbr(); - - /* - * tell prom to monitor keys before the kernel comes alive - */ - mon_clock_start(); - - /* * perform platform-dependent initialization */ if (cpr_suspend_succeeded) @@ -659,33 +979,65 @@ rb_dump: * * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!! */ +rb_nodump: + /* + * If we did suspend to RAM, we didn't generate a dump + */ + PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n")) (void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME); - if (cpr_suspend_succeeded) + if (cpr_suspend_succeeded) { + PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n")) (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME); + } prom_resume_prepost(); +#if !defined(__sparc) + /* + * Need to sync the software clock with the hardware clock. + * On Sparc, this occurs in the sparc-specific cbe. However + * on x86 this needs to be handled _before_ we bring other cpu's + * back online. So we call a resume function in timestamp.c + */ + if (tsc_resume_in_cyclic == 0) + tsc_resume(); +#endif + +#if defined(__sparc) if (cpr_suspend_succeeded && (boothowto & RB_DEBUG)) kdi_dvec_cpr_restart(); +#endif + + +#if defined(__x86) +rb_mp_paused: + PT(PT_RMPO); + PMD(PMD_SX, ("resume aux cpus\n")) + + if (cpr_suspend_succeeded) { + cpr_resume_cpus(); + } else { + cpr_unpause_cpus(); + } +#endif /* * let the tmp callout catch up. */ + PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n")) (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME); i_cpr_enable_intr(); - mon_clock_stop(); - mon_clock_share(); - mutex_enter(&cpu_lock); + PMD(PMD_SX, ("cpr_resume: cyclic resume\n")) cyclic_resume(); mutex_exit(&cpu_lock); - mon_clock_start(); - + PMD(PMD_SX, ("cpr_resume: handle xc\n")) i_cpr_handle_xc(0); /* turn it off to allow xc assertion */ + PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n")) (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME); /* @@ -701,7 +1053,8 @@ rb_dump: cpr_convert_promtime(&pwron_tv); ctp = &cpr_term.tm_shutdown; - CPR_STAT_EVENT_END_TMZ(" write statefile", ctp); + if (sleeptype == CPR_TODISK) + CPR_STAT_EVENT_END_TMZ(" write statefile", ctp); CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp); CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv); @@ -726,62 +1079,116 @@ rb_stop_kernel_threads: * disabled before starting kernel threads, we don't want * modunload thread to start changing device tree underneath. */ + PMD(PMD_SX, ("cpr_resume: modunload disable\n")) modunload_disable(); + PMD(PMD_SX, ("cpr_resume: start kernel threads\n")) cpr_start_kernel_threads(); rb_suspend_devices: CPR_DEBUG(CPR_DEBUG1, "resuming devices..."); CPR_STAT_EVENT_START(" start drivers"); + PMD(PMD_SX, + ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n", + cpr_resume_uniproc)) + +#if defined(__x86) + /* + * If cpr_resume_uniproc is set, then pause all the other cpus + * apart from the current cpu, so that broken drivers that think + * that they are on a uniprocessor machine will resume + */ + if (cpr_resume_uniproc) { + mutex_enter(&cpu_lock); + pause_cpus(NULL); + mutex_exit(&cpu_lock); + } +#endif + /* * The policy here is to continue resume everything we can if we did * not successfully finish suspend; and panic if we are coming back * from a fully suspended system. */ + PMD(PMD_SX, ("cpr_resume: resume devices\n")) rc = cpr_resume_devices(ddi_root_node(), 0); cpr_sae(0); str = "Failed to resume one or more devices."; - if (rc && CPR->c_substate == C_ST_DUMP) - cpr_err(CE_PANIC, str); - else if (rc) - cpr_err(CE_WARN, str); + + if (rc) { + if (CPR->c_substate == C_ST_DUMP || + (sleeptype == CPR_TORAM && + CPR->c_substate == C_ST_NODUMP)) { + if (cpr_test_point == FORCE_SUSPEND_TO_RAM) { + PMD(PMD_SX, ("cpr_resume: resume device " + "warn\n")) + cpr_err(CE_WARN, str); + } else { + PMD(PMD_SX, ("cpr_resume: resume device " + "panic\n")) + cpr_err(CE_PANIC, str); + } + } else { + PMD(PMD_SX, ("cpr_resume: resume device warn\n")) + cpr_err(CE_WARN, str); + } + } + CPR_STAT_EVENT_END(" start drivers"); CPR_DEBUG(CPR_DEBUG1, "done\n"); +#if defined(__x86) + /* + * If cpr_resume_uniproc is set, then unpause all the processors + * that were paused before resuming the drivers + */ + if (cpr_resume_uniproc) { + mutex_enter(&cpu_lock); + start_cpus(); + mutex_exit(&cpu_lock); + } +#endif + /* * If we had disabled modunloading in this cpr resume cycle (i.e. we * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable * modunloading now. */ - if (CPR->c_substate != C_ST_SUSPEND_DEVICES) + if (CPR->c_substate != C_ST_SUSPEND_DEVICES) { + PMD(PMD_SX, ("cpr_resume: modload enable\n")) modunload_enable(); + } /* * Hooks needed by lock manager prior to resuming. * Refer to code for more comments. */ + PMD(PMD_SX, ("cpr_resume: lock mgr\n")) cpr_lock_mgr(lm_cprresume); +#if defined(__sparc) /* * This is a partial (half) resume during cpr suspend, we * haven't yet given up on the suspend. On return from here, * cpr_suspend() will try to reallocate and retry the suspend. */ if (CPR->c_substate == C_ST_DUMP_NOSPC) { - mon_clock_stop(); return (0); } + if (sleeptype == CPR_TODISK) { rb_statef_alloc: - cpr_statef_close(); + cpr_statef_close(); rb_disable_ufs_logging: - /* - * if ufs logging was disabled, re-enable - */ - (void) cpr_ufs_logging(1); + /* + * if ufs logging was disabled, re-enable + */ + (void) cpr_ufs_logging(1); + } +#endif rb_pm_reattach_noinvol: /* @@ -795,44 +1202,64 @@ rb_pm_reattach_noinvol: CPR->c_substate == C_ST_STATEF_ALLOC || CPR->c_substate == C_ST_SUSPEND_DEVICES || CPR->c_substate == C_ST_STOP_KERNEL_THREADS) { + PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n")) pm_reattach_noinvol_fini(); } + PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n")) (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME); + PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n")) (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME); + PMD(PMD_SX, ("cpr_resume: restore direct levels\n")) pm_restore_direct_levels(); rb_stop_user_threads: CPR_DEBUG(CPR_DEBUG1, "starting user threads..."); + PMD(PMD_SX, ("cpr_resume: starting user threads\n")) cpr_start_user_threads(); CPR_DEBUG(CPR_DEBUG1, "done\n"); + /* + * Ask Xorg to resume the frame buffer, and wait for it to happen + */ + mutex_enter(&srn_clone_lock); + if (srn_signal) { + PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., " + "SRN_NORMAL_RESUME)\n")) + srn_inuse = 1; /* because (*srn_signal) cv_waits */ + (*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME); + srn_inuse = 0; + } else { + PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n")) + } + mutex_exit(&srn_clone_lock); +#if defined(__sparc) rb_mp_offline: if (cpr_mp_online()) cpr_err(CE_WARN, "Failed to online all the processors."); +#endif rb_others: - pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL, PM_DEP_WAIT, - NULL, 0); + PMD(PMD_SX, ("cpr_resume: dep thread\n")) + pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL, + PM_DEP_WAIT, NULL, 0); + PMD(PMD_SX, ("cpr_resume: CPR PM callback\n")) (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME); - /* - * now that all the drivers are going, kernel kbd driver can - * take over, turn off prom monitor clock - */ - mon_clock_stop(); - if (cpr_suspend_succeeded) { - cpr_restore_time(); cpr_stat_record_events(); } - if (!cpr_reusable_mode) +#if defined(__sparc) + if (sleeptype == CPR_TODISK && !cpr_reusable_mode) cpr_clear_definfo(); +#endif + i_cpr_free_cpus(); CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW..."); + PMD(PMD_SX, ("cpr_resume: SIGTHAW\n")) cpr_signal_user(SIGTHAW); CPR_DEBUG(CPR_DEBUG1, "done\n"); @@ -854,11 +1281,12 @@ rb_others: CPR_STAT_EVENT_PRINT(); #endif /* CPR_STAT */ + PMD(PMD_SX, ("cpr_resume returns %x\n", rc)) return (rc); } static void -cpr_suspend_init(void) +cpr_suspend_init(int sleeptype) { cpr_time_t *ctp; @@ -880,15 +1308,93 @@ cpr_suspend_init(void) ctp = &cpr_term.tm_cprboot_end; bzero(ctp, sizeof (*ctp)); + if (sleeptype == CPR_TODISK) { + /* + * Lookup the physical address of our thread structure. + * This should never be invalid and the entire thread structure + * is expected to reside within the same pfn. + */ + curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread); + ASSERT(curthreadpfn != PFN_INVALID); + ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat, + (caddr_t)curthread + sizeof (kthread_t) - 1)); + } + + cpr_suspend_succeeded = 0; +} + +/* + * bring all the offline cpus online + */ +static int +cpr_all_online(void) +{ + int rc = 0; + +#ifdef __sparc /* - * Lookup the physical address of our thread structure. This should - * never be invalid and the entire thread structure is expected - * to reside within the same pfn. + * do nothing */ - curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread); - ASSERT(curthreadpfn != PFN_INVALID); - ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat, - (caddr_t)curthread + sizeof (kthread_t) - 1)); +#else + + cpu_t *cp; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + cp = cpu_list; + do { + cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE; + if (!CPU_ACTIVE(cp)) { + if ((rc = cpu_online(cp)) != 0) + break; + CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE); + } + } while ((cp = cp->cpu_next) != cpu_list); + + if (rc) { + /* + * an online operation failed so offline the cpus + * that were onlined above to restore the system + * to its original state + */ + cpr_restore_offline(); + } +#endif + return (rc); +} + +/* + * offline all the cpus that were brought online by cpr_all_online() + */ +static void +cpr_restore_offline(void) +{ + +#ifdef __sparc + /* + * do nothing + */ +#else + + cpu_t *cp; + int rc = 0; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + cp = cpu_list; + do { + if (CPU_CPR_IS_ONLINE(cp)) { + rc = cpu_offline(cp, 0); + /* + * this offline should work, since the cpu was + * offline originally and was successfully onlined + * by cpr_all_online() + */ + ASSERT(rc == 0); + cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE; + } + } while ((cp = cp->cpu_next) != cpu_list); + +#endif - cpr_suspend_succeeded = 0; } diff --git a/usr/src/uts/common/cpr/cpr_misc.c b/usr/src/uts/common/cpr/cpr_misc.c index 936e3e9565..1ec0452c81 100644 --- a/usr/src/uts/common/cpr/cpr_misc.c +++ b/usr/src/uts/common/cpr/cpr_misc.c @@ -38,6 +38,7 @@ #include <sys/kmem.h> #include <sys/cpr.h> #include <sys/conf.h> +#include <sys/machclock.h> /* * CPR miscellaneous support routines @@ -61,11 +62,14 @@ extern char *cpr_pagedata; extern int cpr_bufs_allocated; extern int cpr_bitmaps_allocated; +#if defined(__sparc) static struct cprconfig cprconfig; static int cprconfig_loaded = 0; static int cpr_statefile_ok(vnode_t *, int); static int cpr_p_online(cpu_t *, int); static void cpr_save_mp_state(void); +#endif + int cpr_is_ufs(struct vfs *); char cpr_default_path[] = CPR_DEFAULT; @@ -112,6 +116,10 @@ cpr_init(int fcn) CPR->c_flags |= C_REUSABLE; else CPR->c_flags |= C_SUSPENDING; + if (fcn == AD_SUSPEND_TO_RAM || fcn == DEV_SUSPEND_TO_RAM) { + return (0); + } +#if defined(__sparc) if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ) CPR->c_flags |= C_COMPRESSING; /* @@ -126,6 +134,7 @@ cpr_init(int fcn) if (cpr_debug & CPR_DEBUG3) cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing " "kas\n", (void *)CPR->c_mapping_area); +#endif return (0); } @@ -157,6 +166,7 @@ cpr_done(void) } +#if defined(__sparc) /* * reads config data into cprconfig */ @@ -815,6 +825,7 @@ cpr_get_reusable_mode(void) return (0); } +#endif /* * clock/time related routines @@ -828,7 +839,7 @@ cpr_tod_get(cpr_time_t *ctp) timestruc_t ts; mutex_enter(&tod_lock); - ts = tod_get(); + ts = TODOP_GET(tod_ops); mutex_exit(&tod_lock); ctp->tv_sec = (time32_t)ts.tv_sec; ctp->tv_nsec = (int32_t)ts.tv_nsec; @@ -857,6 +868,7 @@ cpr_restore_time(void) clkset(cpr_time_stamp); } +#if defined(__sparc) /* * CPU ONLINE/OFFLINE CODE */ @@ -1104,20 +1116,20 @@ cpr_reusable_mount_check(void) } /* - * Force a fresh read of the cprinfo per uadmin 3 call + * return statefile offset in DEV_BSIZE units */ -void -cpr_forget_cprconfig(void) +int +cpr_statefile_offset(void) { - cprconfig_loaded = 0; + return (cpr_statefile_is_spec() ? btod(CPR_SPEC_OFFSET) : 0); } - /* - * return statefile offset in DEV_BSIZE units + * Force a fresh read of the cprinfo per uadmin 3 call */ -int -cpr_statefile_offset(void) +void +cpr_forget_cprconfig(void) { - return (cpr_statefile_is_spec() ? btod(CPR_SPEC_OFFSET) : 0); + cprconfig_loaded = 0; } +#endif diff --git a/usr/src/uts/common/cpr/cpr_mod.c b/usr/src/uts/common/cpr/cpr_mod.c index 365f102a2b..9358a6ab3a 100644 --- a/usr/src/uts/common/cpr/cpr_mod.c +++ b/usr/src/uts/common/cpr/cpr_mod.c @@ -42,16 +42,23 @@ #include <sys/autoconf.h> #include <sys/machsystm.h> -extern int i_cpr_is_supported(void); +extern int i_cpr_is_supported(int sleeptype); extern int cpr_is_ufs(struct vfs *); extern int cpr_check_spec_statefile(void); extern int cpr_reusable_mount_check(void); -extern void cpr_forget_cprconfig(void); extern int i_cpr_reusable_supported(void); extern int i_cpr_reusefini(void); - extern struct mod_ops mod_miscops; +extern int cpr_init(int); +extern void cpr_done(void); +extern void i_cpr_stop_other_cpus(void); +extern int i_cpr_power_down(); + +#if defined(__sparc) +extern void cpr_forget_cprconfig(void); +#endif + static struct modlmisc modlmisc = { &mod_miscops, "checkpoint resume" }; @@ -68,6 +75,9 @@ kmutex_t cpr_slock; /* cpr serial lock */ cpr_t cpr_state; int cpr_debug; int cpr_test_mode; /* true if called via uadmin testmode */ +int cpr_test_point = LOOP_BACK_NONE; /* cpr test point */ +int cpr_mp_enable = 0; /* set to 1 to enable MP suspend */ +major_t cpr_device = 0; /* major number for S3 on one device */ /* * All the loadable module related code follows @@ -100,9 +110,25 @@ _info(struct modinfo *modinfop) return (mod_info(&modlinkage, modinfop)); } +static +int +atoi(char *p) +{ + int i; + + i = (*p++ - '0'); + + while (*p != '\0') + i = 10 * i + (*p++ - '0'); + + return (i); +} + int -cpr(int fcn) +cpr(int fcn, void *mdep) { + +#if defined(__sparc) static const char noswapstr[] = "reusable statefile requires " "that no swap area be configured.\n"; static const char blockstr[] = "reusable statefile must be " @@ -112,11 +138,71 @@ cpr(int fcn) "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) " "to exit reusable statefile mode.\n"; static const char modefmt[] = "%s in reusable mode.\n"; +#endif register int rc = 0; - extern int cpr_init(int); - extern void cpr_done(void); + int cpr_sleeptype; /* + * First, reject commands that we don't (yet) support on this arch. + * This is easier to understand broken out like this than grotting + * through the second switch below. + */ + + switch (fcn) { +#if defined(__sparc) + case AD_CHECK_SUSPEND_TO_RAM: + case AD_SUSPEND_TO_RAM: + return (ENOTSUP); + case AD_CHECK_SUSPEND_TO_DISK: + case AD_SUSPEND_TO_DISK: + case AD_CPR_REUSEINIT: + case AD_CPR_NOCOMPRESS: + case AD_CPR_FORCE: + case AD_CPR_REUSABLE: + case AD_CPR_REUSEFINI: + case AD_CPR_TESTZ: + case AD_CPR_TESTNOZ: + case AD_CPR_TESTHALT: + case AD_CPR_SUSP_DEVICES: + cpr_sleeptype = CPR_TODISK; + break; +#endif +#if defined(__x86) + case AD_CHECK_SUSPEND_TO_DISK: + case AD_SUSPEND_TO_DISK: + case AD_CPR_REUSEINIT: + case AD_CPR_NOCOMPRESS: + case AD_CPR_FORCE: + case AD_CPR_REUSABLE: + case AD_CPR_REUSEFINI: + case AD_CPR_TESTZ: + case AD_CPR_TESTNOZ: + case AD_CPR_TESTHALT: + case AD_CPR_PRINT: + return (ENOTSUP); + /* The DEV_* values need to be removed after sys-syspend is fixed */ + case DEV_CHECK_SUSPEND_TO_RAM: + case DEV_SUSPEND_TO_RAM: + case AD_CPR_SUSP_DEVICES: + case AD_CHECK_SUSPEND_TO_RAM: + case AD_SUSPEND_TO_RAM: + case AD_LOOPBACK_SUSPEND_TO_RAM_PASS: + case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL: + case AD_FORCE_SUSPEND_TO_RAM: + case AD_DEVICE_SUSPEND_TO_RAM: + /* + * if MP then do not support suspend to RAM, however override + * the MP restriction if cpr_mp_enable has been set + */ + if (ncpus > 1 && cpr_mp_enable == 0) + return (ENOTSUP); + else + cpr_sleeptype = CPR_TORAM; + break; +#endif + } +#if defined(__sparc) + /* * Need to know if we're in reusable mode, but we will likely have * rebooted since REUSEINIT, so we have to get the info from the * file system @@ -125,8 +211,11 @@ cpr(int fcn) cpr_reusable_mode = cpr_get_reusable_mode(); cpr_forget_cprconfig(); +#endif + switch (fcn) { +#if defined(__sparc) case AD_CPR_REUSEINIT: if (!i_cpr_reusable_supported()) return (ENOTSUP); @@ -188,7 +277,7 @@ cpr(int fcn) break; case AD_CPR_CHECK: - if (!i_cpr_is_supported() || cpr_reusable_mode) + if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode) return (ENOTSUP); return (0); @@ -196,6 +285,7 @@ cpr(int fcn) CPR_STAT_EVENT_END("POST CPR DELAY"); cpr_stat_event_print(); return (0); +#endif case AD_CPR_DEBUG0: cpr_debug = 0; @@ -215,13 +305,55 @@ cpr(int fcn) cpr_debug |= CPR_DEBUG6; return (0); + /* The DEV_* values need to be removed after sys-syspend is fixed */ + case DEV_CHECK_SUSPEND_TO_RAM: + case DEV_SUSPEND_TO_RAM: + case AD_CHECK_SUSPEND_TO_RAM: + case AD_SUSPEND_TO_RAM: + cpr_test_point = LOOP_BACK_NONE; + break; + + case AD_LOOPBACK_SUSPEND_TO_RAM_PASS: + cpr_test_point = LOOP_BACK_PASS; + break; + + case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL: + cpr_test_point = LOOP_BACK_FAIL; + break; + + case AD_FORCE_SUSPEND_TO_RAM: + cpr_test_point = FORCE_SUSPEND_TO_RAM; + break; + + case AD_DEVICE_SUSPEND_TO_RAM: + cpr_test_point = DEVICE_SUSPEND_TO_RAM; + cpr_device = (major_t)atoi((char *)mdep); + break; + + case AD_CPR_SUSP_DEVICES: + cpr_test_point = FORCE_SUSPEND_TO_RAM; + if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS) + cmn_err(CE_WARN, + "Some devices did not suspend " + "and may be unusable"); + (void) cpr_resume_devices(ddi_root_node(), 0); + return (0); + default: return (ENOTSUP); } - if (!i_cpr_is_supported() || !cpr_is_ufs(rootvfs)) + if (!i_cpr_is_supported(cpr_sleeptype) || + (cpr_sleeptype == CPR_TODISK && !cpr_is_ufs(rootvfs))) return (ENOTSUP); + if (fcn == AD_CHECK_SUSPEND_TO_RAM || + fcn == DEV_CHECK_SUSPEND_TO_RAM) { + ASSERT(i_cpr_is_supported(cpr_sleeptype)); + return (0); + } + +#if defined(__sparc) if (fcn == AD_CPR_REUSEINIT) { if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); @@ -247,6 +379,7 @@ cpr(int fcn) mutex_exit(&cpr_slock); return (rc); } +#endif /* * acquire cpr serial lock and init cpr state structure. @@ -254,23 +387,39 @@ cpr(int fcn) if (rc = cpr_init(fcn)) return (rc); +#if defined(__sparc) if (fcn == AD_CPR_REUSABLE) { if ((rc = i_cpr_check_cprinfo()) != 0) { mutex_exit(&cpr_slock); return (rc); } } +#endif /* * Call the main cpr routine. If we are successful, we will be coming * down from the resume side, otherwise we are still in suspend. */ cpr_err(CE_CONT, "System is being suspended"); - if (rc = cpr_main()) { + if (rc = cpr_main(cpr_sleeptype)) { CPR->c_flags |= C_ERROR; + PMD(PMD_SX, ("cpr: Suspend operation failed.\n")) cpr_err(CE_NOTE, "Suspend operation failed."); } else if (CPR->c_flags & C_SUSPENDING) { - extern void cpr_power_down(); + + /* + * In the suspend to RAM case, by the time we get + * control back we're already resumed + */ + if (cpr_sleeptype == CPR_TORAM) { + PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n")) + cpr_done(); + return (rc); + } + +#if defined(__sparc) + + PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n")) /* * Back from a successful checkpoint */ @@ -280,6 +429,7 @@ cpr(int fcn) } /* make sure there are no more changes to the device tree */ + PMD(PMD_SX, ("cpr: dev tree freeze\n")) devtree_freeze(); /* @@ -288,7 +438,9 @@ cpr(int fcn) * for us to be preempted, we're essentially single threaded * from here on out. */ - stop_other_cpus(); + PMD(PMD_SX, ("cpr: stop other cpus\n")) + i_cpr_stop_other_cpus(); + PMD(PMD_SX, ("cpr: spl6\n")) (void) spl6(); /* @@ -296,24 +448,27 @@ cpr(int fcn) * be called when there are no other threads that could be * accessing devices */ + PMD(PMD_SX, ("cpr: reset leaves\n")) reset_leaves(); /* - * If cpr_power_down() succeeds, it'll not return. + * If i_cpr_power_down() succeeds, it'll not return * * Drives with write-cache enabled need to flush * their cache. */ - if (fcn != AD_CPR_TESTHALT) - cpr_power_down(); - + if (fcn != AD_CPR_TESTHALT) { + PMD(PMD_SX, ("cpr: power down\n")) + (void) i_cpr_power_down(cpr_sleeptype); + } + ASSERT(cpr_sleeptype == CPR_TODISK); + /* currently CPR_TODISK comes back via a boot path */ CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n"); halt(NULL); /* NOTREACHED */ +#endif } - /* - * For resuming: release resources and the serial lock. - */ + PMD(PMD_SX, ("cpr: cpr done\n")) cpr_done(); return (rc); } diff --git a/usr/src/uts/common/cpr/cpr_stat.c b/usr/src/uts/common/cpr/cpr_stat.c index 264bb4c9c7..9992f23c82 100644 --- a/usr/src/uts/common/cpr/cpr_stat.c +++ b/usr/src/uts/common/cpr/cpr_stat.c @@ -28,7 +28,6 @@ #include <sys/types.h> #include <sys/ddi.h> #include <sys/pte.h> -#include <sys/intreg.h> #include <sys/cpr.h> /* @@ -111,7 +110,7 @@ cpr_stat_event_end(char *name, cpr_time_t *ctp) cep->ce_sec.etime = tv.tv_sec; cep->ce_sec.ltime = cep->ce_sec.etime - cep->ce_sec.stime; cep->ce_sec.mtime = ((cep->ce_sec.mtime * (cep->ce_ntests - 1)) + - cep->ce_sec.ltime) / cep->ce_ntests; + cep->ce_sec.ltime) / cep->ce_ntests; /* * calculate 100*milliseconds @@ -158,10 +157,10 @@ cpr_stat_record_events() STAT->cs_real_statefsz = cpr_term.real_statef_size; cur_comprate = ((longlong_t)((longlong_t) - STAT->cs_nocomp_statefsz*100)/ - STAT->cs_real_statefsz); + STAT->cs_nocomp_statefsz*100)/ + STAT->cs_real_statefsz); if (STAT->cs_min_comprate == 0 || - (STAT->cs_min_comprate > cur_comprate)) + (STAT->cs_min_comprate > cur_comprate)) STAT->cs_min_comprate = cur_comprate; } } @@ -203,25 +202,25 @@ cpr_stat_event_print() */ printf("\nMISCELLANEOUS STATISTICS INFORMATION (units in KBytes)\n\n"); printf("\tUser Pages w/o Swapspace:\t%8lu (%lu pages)\n", - cp->cs_nosw_pages*PAGESIZE/1000, cp->cs_nosw_pages); + cp->cs_nosw_pages*PAGESIZE/1000, cp->cs_nosw_pages); printf("\tTotal Upages Saved to Statefile:%8d (%d pages)\n", - cp->cs_upage2statef*PAGESIZE/1000, cp->cs_upage2statef); + cp->cs_upage2statef*PAGESIZE/1000, cp->cs_upage2statef); if (cp->cs_mclustsz) printf("\tAverage Cluster Size:\t\t%8d (%d.%1d%1d pages)\n\n", - cp->cs_mclustsz/1000, cp->cs_mclustsz/PAGESIZE, - ((cp->cs_mclustsz%PAGESIZE)*10/PAGESIZE), - ((cp->cs_mclustsz%PAGESIZE)*100/PAGESIZE)%10); + cp->cs_mclustsz/1000, cp->cs_mclustsz/PAGESIZE, + ((cp->cs_mclustsz%PAGESIZE)*10/PAGESIZE), + ((cp->cs_mclustsz%PAGESIZE)*100/PAGESIZE)%10); printf("\tKernel Memory Size:\t\t%8lu\n", cp->cs_nocomp_statefsz/1000); printf("\tEstimated Statefile Size:\t%8lu\n", cp->cs_est_statefsz/1000); printf("\tActual Statefile Size:\t\t%8lu\n", cp->cs_real_statefsz/1000); if (cp->cs_real_statefsz) { int min = cp->cs_min_comprate; int new = ((longlong_t)((longlong_t) - cp->cs_nocomp_statefsz*100)/cp->cs_real_statefsz); + cp->cs_nocomp_statefsz*100)/cp->cs_real_statefsz); printf("\tCompression Ratio:\t\t%5d.%1d%1d (worst %d.%1d%1d)\n", - new/100, (new%100)/10, new%10, - min/100, (min%100)/10, min%10); + new/100, (new%100)/10, new%10, + min/100, (min%100)/10, min%10); } } diff --git a/usr/src/uts/common/cpr/cpr_uthread.c b/usr/src/uts/common/cpr/cpr_uthread.c index 49ea1dfb1f..e2da80d5b8 100644 --- a/usr/src/uts/common/cpr/cpr_uthread.c +++ b/usr/src/uts/common/cpr/cpr_uthread.c @@ -59,7 +59,7 @@ cpr_signal_user(int sig) for (p = practive; p; p = p->p_next) { /* only user threads */ if (p->p_exec == NULL || p->p_stat == SZOMB || - p == proc_init || p == ttoproc(curthread)) + p == proc_init || p == ttoproc(curthread)) continue; mutex_enter(&p->p_lock); @@ -87,7 +87,7 @@ cpr_stop_user_threads() return (ESRCH); cpr_stop_user(count * count * CPR_UTSTOP_WAIT); } while (cpr_check_user_threads() && - (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE)); + (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE)); return (0); } @@ -194,11 +194,11 @@ cpr_check_user_threads() CPR_DEBUG(CPR_DEBUG1, "Suspend failed: " "cannot stop uthread\n"); cpr_err(CE_WARN, "Suspend cannot stop " - "process %s (%p:%x).", - ttoproc(tp)->p_user.u_psargs, (void *)tp, - tp->t_state); + "process %s (%p:%x).", + ttoproc(tp)->p_user.u_psargs, (void *)tp, + tp->t_state); cpr_err(CE_WARN, "Process may be waiting for" - " network request, please try again."); + " network request, please try again."); } CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x " @@ -284,8 +284,6 @@ int cpr_stop_kernel_threads(void) { caddr_t name; - kthread_id_t tp; - proc_t *p; callb_lock_table(); /* Note: we unlock the table in resume. */ @@ -298,6 +296,25 @@ cpr_stop_kernel_threads(void) return (EBUSY); } + CPR_DEBUG(CPR_DEBUG1, ("done\n")); + return (0); +} + +/* + * Check to see that kernel threads are stopped. + * This should be called while CPU's are paused, and the caller is + * effectively running single user, or else we are virtually guaranteed + * to fail. The routine should not ASSERT on the paused state or spl + * level, as there may be a use for this to verify that things are running + * again. + */ +int +cpr_threads_are_stopped(void) +{ + caddr_t name; + kthread_id_t tp; + proc_t *p; + /* * We think we stopped all the kernel threads. Just in case * someone is not playing by the rules, take a spin through @@ -320,8 +337,7 @@ cpr_stop_kernel_threads(void) return (EBUSY); } } while ((tp = tp->t_next) != curthread); - mutex_exit(&pidlock); - CPR_DEBUG(CPR_DEBUG1, "done\n"); + mutex_exit(&pidlock); return (0); } |