summaryrefslogtreecommitdiff
path: root/usr/src/uts/sun4u/ngdr/io/dr_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/sun4u/ngdr/io/dr_mem.c')
-rw-r--r--usr/src/uts/sun4u/ngdr/io/dr_mem.c2934
1 files changed, 2934 insertions, 0 deletions
diff --git a/usr/src/uts/sun4u/ngdr/io/dr_mem.c b/usr/src/uts/sun4u/ngdr/io/dr_mem.c
new file mode 100644
index 0000000000..e876db93b5
--- /dev/null
+++ b/usr/src/uts/sun4u/ngdr/io/dr_mem.c
@@ -0,0 +1,2934 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * DR memory support routines.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/dditypes.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/sysmacros.h>
+#include <sys/machsystm.h>
+#include <sys/spitregs.h>
+#include <sys/cpuvar.h>
+#include <sys/promif.h>
+#include <vm/seg_kmem.h>
+#include <sys/lgrp.h>
+#include <sys/platform_module.h>
+
+#include <vm/page.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+extern struct memlist *phys_install;
+
+/* TODO: push this reference below drmach line */
+extern int kcage_on;
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "%M% %d";
+
+static int dr_post_detach_mem_unit(dr_mem_unit_t *mp);
+static int dr_reserve_mem_spans(memhandle_t *mhp,
+ struct memlist *mlist);
+static int dr_select_mem_target(dr_handle_t *hp,
+ dr_mem_unit_t *mp, struct memlist *ml);
+static void dr_init_mem_unit_data(dr_mem_unit_t *mp);
+
+static struct memlist *memlist_dup(struct memlist *);
+static int memlist_canfit(struct memlist *s_mlist,
+ struct memlist *t_mlist);
+static struct memlist *memlist_del_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+static struct memlist *memlist_cat_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+
+extern void page_unretire_pages(void);
+
+/*
+ * dr_mem_unit_t.sbm_flags
+ */
+#define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
+#define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
+#define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
+#define DR_MFLAG_MEMUPSIZE 0x08 /* move from big to small board */
+#define DR_MFLAG_MEMDOWNSIZE 0x10 /* move from small to big board */
+#define DR_MFLAG_MEMRESIZE 0x18 /* move to different size board */
+#define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
+#define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
+
+/* helper macros */
+#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
+#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
+
+static struct memlist *
+dr_get_memlist(dr_mem_unit_t *mp)
+{
+ struct memlist *mlist = NULL;
+ sbd_error_t *err;
+ static fn_t f = "dr_get_memlist";
+
+ PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
+
+ /*
+ * Return cached memlist, if present.
+ * This memlist will be present following an
+ * unconfigure (a.k.a: detach) of this memunit.
+ * It should only be used in the case were a configure
+ * is bringing this memunit back in without going
+ * through the disconnect and connect states.
+ */
+ if (mp->sbm_mlist) {
+ PR_MEM("%s: found cached memlist\n", f);
+
+ mlist = memlist_dup(mp->sbm_mlist);
+ } else {
+ uint64_t basepa = _ptob64(mp->sbm_basepfn);
+
+ /* attempt to construct a memlist using phys_install */
+
+ /* round down to slice base address */
+ basepa &= ~(mp->sbm_slice_size - 1);
+
+ /* get a copy of phys_install to edit */
+ memlist_read_lock();
+ mlist = memlist_dup(phys_install);
+ memlist_read_unlock();
+
+ /* trim lower irrelevant span */
+ if (mlist)
+ mlist = memlist_del_span(mlist, 0ull, basepa);
+
+ /* trim upper irrelevant span */
+ if (mlist) {
+ uint64_t endpa;
+
+ basepa += mp->sbm_slice_size;
+ endpa = _ptob64(physmax + 1);
+ if (endpa > basepa)
+ mlist = memlist_del_span(
+ mlist,
+ basepa,
+ endpa - basepa);
+ }
+
+ if (mlist) {
+ /* successfully built a memlist */
+ PR_MEM("%s: derived memlist from phys_install\n", f);
+ }
+
+ /* if no mlist yet, try platform layer */
+ if (!mlist) {
+ err = drmach_mem_get_memlist(
+ mp->sbm_cm.sbdev_id, &mlist);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mlist = NULL; /* paranoia */
+ }
+ }
+ }
+
+ PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(mlist);
+
+ return (mlist);
+}
+
+typedef struct {
+ kcondvar_t cond;
+ kmutex_t lock;
+ int error;
+ int done;
+} dr_release_mem_sync_t;
+
+/*
+ * Memory has been logically removed by the time this routine is called.
+ */
+static void
+dr_mem_del_done(void *arg, int error)
+{
+ dr_release_mem_sync_t *ds = arg;
+
+ mutex_enter(&ds->lock);
+ ds->error = error;
+ ds->done = 1;
+ cv_signal(&ds->cond);
+ mutex_exit(&ds->lock);
+}
+
+/*
+ * When we reach here the memory being drained should have
+ * already been reserved in dr_pre_release_mem().
+ * Our only task here is to kick off the "drain" and wait
+ * for it to finish.
+ */
+void
+dr_release_mem(dr_common_unit_t *cp)
+{
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
+ int err;
+ dr_release_mem_sync_t rms;
+ static fn_t f = "dr_release_mem";
+
+ /* check that this memory unit has been reserved */
+ if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+ return;
+ }
+
+ bzero((void *) &rms, sizeof (rms));
+
+ mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
+
+ mutex_enter(&rms.lock);
+ err = kphysm_del_start(mp->sbm_memhandle,
+ dr_mem_del_done, (void *) &rms);
+ if (err == KPHYSM_OK) {
+ /* wait for completion or interrupt */
+ while (!rms.done) {
+ if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
+ /* then there is a pending UNIX signal */
+ (void) kphysm_del_cancel(mp->sbm_memhandle);
+
+ /* wait for completion */
+ while (!rms.done)
+ cv_wait(&rms.cond, &rms.lock);
+ }
+ }
+ /* get the result of the memory delete operation */
+ err = rms.error;
+ }
+ mutex_exit(&rms.lock);
+
+ cv_destroy(&rms.cond);
+ mutex_destroy(&rms.lock);
+
+ if (err != KPHYSM_OK) {
+ int e_code;
+
+ switch (err) {
+ case KPHYSM_ENOWORK:
+ e_code = ESBD_NOERROR;
+ break;
+
+ case KPHYSM_EHANDLE:
+ case KPHYSM_ESEQUENCE:
+ e_code = ESBD_INTERNAL;
+ break;
+
+ case KPHYSM_ENOTVIABLE:
+ e_code = ESBD_MEM_NOTVIABLE;
+ break;
+
+ case KPHYSM_EREFUSED:
+ e_code = ESBD_MEM_REFUSED;
+ break;
+
+ case KPHYSM_ENONRELOC:
+ e_code = ESBD_MEM_NONRELOC;
+ break;
+
+ case KPHYSM_ECANCELLED:
+ e_code = ESBD_MEM_CANCELLED;
+ break;
+
+ case KPHYSM_ERESOURCE:
+ e_code = ESBD_MEMFAIL;
+ break;
+
+ default:
+ cmn_err(CE_WARN,
+ "%s: unexpected kphysm error code %d,"
+ " id 0x%p",
+ f, err, mp->sbm_cm.sbdev_id);
+
+ e_code = ESBD_IO;
+ break;
+ }
+
+ if (e_code != ESBD_NOERROR) {
+ dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
+ }
+ }
+}
+
+void
+dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
+ struct memlist *ml, *mc;
+ sbd_error_t *err;
+ static fn_t f = "dr_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ dr_lock_status(hp->h_bd);
+ err = drmach_configure(cp->sbdev_id, 0);
+ dr_unlock_status(hp->h_bd);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ return;
+ }
+
+ ml = dr_get_memlist(mp);
+ for (mc = ml; mc; mc = mc->next) {
+ int rv;
+ sbd_error_t *err;
+
+ rv = kphysm_add_memory_dynamic(
+ (pfn_t)(mc->address >> PAGESHIFT),
+ (pgcnt_t)(mc->size >> PAGESHIFT));
+ if (rv != KPHYSM_OK) {
+ /*
+ * translate kphysm error and
+ * store in devlist error
+ */
+ switch (rv) {
+ case KPHYSM_ERESOURCE:
+ rv = ESBD_NOMEM;
+ break;
+
+ case KPHYSM_EFAULT:
+ rv = ESBD_FAULT;
+ break;
+
+ default:
+ rv = ESBD_INTERNAL;
+ break;
+ }
+
+ if (rv == ESBD_INTERNAL) {
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+ } else
+ dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
+ break;
+ }
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id, mc->address, mc->size);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ break;
+ }
+ }
+
+ memlist_delete(ml);
+
+ /* back out if configure failed */
+ if (mp->sbm_cm.sbdev_error != NULL) {
+ dr_lock_status(hp->h_bd);
+ err = drmach_unconfigure(cp->sbdev_id, DRMACH_DEVI_REMOVE);
+ if (err)
+ sbd_err_clear(&err);
+ dr_unlock_status(hp->h_bd);
+ }
+}
+
+#define DR_SCRUB_VALUE 0x0d0e0a0d0b0e0e0fULL
+
+static void
+dr_mem_ecache_scrub(dr_mem_unit_t *mp, struct memlist *mlist)
+{
+#ifdef DEBUG
+ clock_t stime = lbolt;
+#endif /* DEBUG */
+
+ struct memlist *ml;
+ uint64_t scrub_value = DR_SCRUB_VALUE;
+ processorid_t cpuid;
+ static fn_t f = "dr_mem_ecache_scrub";
+
+ cpuid = drmach_mem_cpu_affinity(mp->sbm_cm.sbdev_id);
+ affinity_set(cpuid);
+
+ PR_MEM("%s: using proc %d, memlist...\n", f,
+ (cpuid == CPU_CURRENT) ? CPU->cpu_id : cpuid);
+ PR_MEMLIST_DUMP(mlist);
+
+ for (ml = mlist; ml; ml = ml->next) {
+ uint64_t dst_pa;
+ uint64_t nbytes;
+
+ /* calculate the destination physical address */
+ dst_pa = ml->address;
+ if (ml->address & PAGEOFFSET)
+ cmn_err(CE_WARN,
+ "%s: address (0x%llx) not on "
+ "page boundary", f, ml->address);
+
+ nbytes = ml->size;
+ if (ml->size & PAGEOFFSET)
+ cmn_err(CE_WARN,
+ "%s: size (0x%llx) not on "
+ "page boundary", f, ml->size);
+
+ /*LINTED*/
+ while (nbytes > 0) {
+ /* write 64 bits to dst_pa */
+ stdphys(dst_pa, scrub_value);
+
+ /* increment/decrement by cacheline sizes */
+ dst_pa += DRMACH_COHERENCY_UNIT;
+ nbytes -= DRMACH_COHERENCY_UNIT;
+ }
+ }
+
+ /*
+ * flush this cpu's ecache and take care to ensure
+ * that all of it's bus transactions have retired.
+ */
+ drmach_cpu_flush_ecache_sync();
+
+ affinity_clear();
+
+#ifdef DEBUG
+ stime = lbolt - stime;
+ PR_MEM("%s: scrub ticks = %ld (%ld secs)\n", f, stime, stime / hz);
+#endif /* DEBUG */
+}
+
+/*
+ * This function marks as clean, all the faulty pages that belong to the
+ * board that is copy-renamed since they are not likely to be bad pages
+ * after the rename. This includes the retired pages on the board.
+ */
+
+static void
+dr_memlist_clrpages(struct memlist *r_ml)
+{
+ struct memlist *t_ml;
+ page_t *pp, *epp;
+ pfn_t pfn, epfn;
+ struct memseg *seg;
+
+ if (r_ml == NULL)
+ return;
+
+ for (t_ml = r_ml; (t_ml != NULL); t_ml = t_ml->next) {
+ pfn = _b64top(t_ml->address);
+ epfn = _b64top(t_ml->address + t_ml->size);
+
+ for (seg = memsegs; seg != NULL; seg = seg->next) {
+ if (pfn >= seg->pages_end || epfn < seg->pages_base)
+ continue;
+
+ pp = seg->pages;
+ if (pfn > seg->pages_base)
+ pp += pfn - seg->pages_base;
+
+ epp = seg->epages;
+ if (epfn < seg->pages_end)
+ epp -= seg->pages_end - epfn;
+
+ ASSERT(pp < epp);
+ while (pp < epp) {
+ if (page_isfaulty((page_t *)pp))
+ page_clrtoxic_flag((page_t *)pp,
+ PAGE_IS_FAULTY);
+ pp++;
+ }
+ }
+ }
+}
+
+static int
+dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
+{
+ time_t copytime;
+ drmachid_t cr_id;
+ dr_sr_handle_t *srhp;
+ struct memlist *c_ml, *d_ml, *r_ml;
+ sbd_error_t *err;
+ static fn_t f = "dr_move_memory";
+
+ PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
+ f,
+ s_mp->sbm_cm.sbdev_path,
+ t_mp->sbm_cm.sbdev_path);
+
+ ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
+ ASSERT(s_mp->sbm_peer == t_mp);
+ ASSERT(s_mp->sbm_mlist);
+
+ ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
+ ASSERT(t_mp->sbm_peer == s_mp);
+
+ /*
+ * create a memlist of spans to copy by removing
+ * the spans that have been deleted, if any, from
+ * the full source board memlist. s_mp->sbm_del_mlist
+ * will be NULL if there were no spans deleted from
+ * the source board.
+ */
+ c_ml = memlist_dup(s_mp->sbm_mlist);
+ d_ml = s_mp->sbm_del_mlist;
+ while (d_ml != NULL) {
+ c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
+ d_ml = d_ml->next;
+ }
+
+ /*
+ * create a copy of the memlist to be used for retiring pages.
+ */
+ r_ml = memlist_dup(c_ml);
+
+ affinity_set(drmach_mem_cpu_affinity(t_mp->sbm_cm.sbdev_id));
+
+ err = drmach_copy_rename_init(
+ t_mp->sbm_cm.sbdev_id, _ptob64(t_mp->sbm_slice_offset),
+ s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
+ if (err) {
+ DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
+ affinity_clear();
+ memlist_delete(r_ml);
+ return (-1);
+ }
+
+ srhp = dr_get_sr_handle(hp);
+ ASSERT(srhp);
+
+ copytime = lbolt;
+
+ /* Quiesce the OS. */
+ if (dr_suspend(srhp)) {
+ cmn_err(CE_WARN, "%s: failed to quiesce OS"
+ " for copy-rename", f);
+
+ dr_release_sr_handle(srhp);
+ err = drmach_copy_rename_fini(cr_id);
+ if (err) {
+ /*
+ * no error is expected since the program has
+ * not yet run.
+ */
+
+ /* catch this in debug kernels */
+ ASSERT(0);
+
+ sbd_err_clear(&err);
+ }
+
+ /* suspend error reached via hp */
+ s_mp->sbm_cm.sbdev_error = hp->h_err;
+ hp->h_err = NULL;
+
+ affinity_clear();
+ memlist_delete(r_ml);
+ return (-1);
+ }
+
+ /*
+ * Rename memory for lgroup.
+ * Source and target board numbers are packaged in arg.
+ */
+ {
+ dr_board_t *t_bp, *s_bp;
+
+ s_bp = s_mp->sbm_cm.sbdev_bp;
+ t_bp = t_mp->sbm_cm.sbdev_bp;
+
+ lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
+ (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
+ }
+
+ drmach_copy_rename(cr_id);
+
+ /*
+ * Clear pages that have been marked as faulty since we are
+ * changing the physical memory for the pages.
+ */
+ dr_memlist_clrpages(r_ml);
+
+ /* Resume the OS. */
+ dr_resume(srhp);
+
+ copytime = lbolt - copytime;
+
+ dr_release_sr_handle(srhp);
+ err = drmach_copy_rename_fini(cr_id);
+ if (err)
+ DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
+
+ affinity_clear();
+
+ PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
+ f, copytime, copytime / hz);
+
+ memlist_delete(r_ml);
+
+ /* Unretire any pages cleared after copy-rename */
+ page_unretire_pages();
+
+ /* return -1 if dr_suspend or copy/rename recorded an error */
+ return (err == NULL ? 0 : -1);
+}
+
+/*
+ * If detaching node contains memory that is "non-permanent"
+ * then the memory adr's are simply cleared. If the memory
+ * is non-relocatable, then do a copy-rename.
+ */
+void
+dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ int rv = 0;
+ dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
+ dr_mem_unit_t *t_mp;
+ dr_state_t state;
+ static fn_t f = "dr_detach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ /* lookup target mem unit and target board structure, if any */
+ if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
+ t_mp = s_mp->sbm_peer;
+ ASSERT(t_mp != NULL);
+ ASSERT(t_mp->sbm_peer == s_mp);
+ } else {
+ t_mp = NULL;
+ }
+
+ /* verify mem unit's state is UNREFERENCED */
+ state = s_mp->sbm_cm.sbdev_state;
+ if (state != DR_STATE_UNREFERENCED) {
+ dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
+ return;
+ }
+
+ /* verify target mem unit's state is UNREFERENCED, if any */
+ if (t_mp != NULL) {
+ state = t_mp->sbm_cm.sbdev_state;
+ if (state != DR_STATE_UNREFERENCED) {
+ dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
+ return;
+ }
+ }
+
+ /*
+ * Scrub deleted memory. This will cause all cachelines
+ * referencing the memory to only be in the local cpu's
+ * ecache.
+ */
+ if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
+ /* no del mlist for src<=dst mem size copy/rename */
+ if (s_mp->sbm_del_mlist)
+ dr_mem_ecache_scrub(s_mp, s_mp->sbm_del_mlist);
+ }
+ if (t_mp != NULL && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
+ ASSERT(t_mp->sbm_del_mlist);
+ dr_mem_ecache_scrub(t_mp, t_mp->sbm_del_mlist);
+ }
+
+ /*
+ * If there is no target board (no copy/rename was needed), then
+ * we're done!
+ */
+ if (t_mp == NULL) {
+ sbd_error_t *err;
+ /*
+ * Reprogram interconnect hardware and disable
+ * memory controllers for memory node that's going away.
+ */
+
+ err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
+ rv = -1;
+ }
+ } else {
+ rv = dr_move_memory(hp, s_mp, t_mp);
+ PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
+ f,
+ rv ? "FAILED" : "COMPLETED",
+ s_mp->sbm_cm.sbdev_bp->b_num,
+ t_mp->sbm_cm.sbdev_bp->b_num);
+
+ if (rv != 0)
+ (void) dr_cancel_mem(s_mp);
+ }
+
+ if (rv == 0) {
+ sbd_error_t *err;
+
+ dr_lock_status(hp->h_bd);
+ err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id,
+ DRMACH_DEVI_REMOVE);
+ dr_unlock_status(hp->h_bd);
+ if (err)
+ sbd_err_clear(&err);
+ }
+}
+
+#ifndef _STARFIRE
+/*
+ * XXX workaround for certain lab configurations (see also starcat drmach.c)
+ * Temporary code to get around observed incorrect results from
+ * kphysm_del_span_query when the queried span contains address spans
+ * not occupied by memory in between spans that do have memory.
+ * This routine acts as a wrapper to kphysm_del_span_query. It builds
+ * a memlist from phys_install of spans that exist between base and
+ * base + npages, inclusively. Kphysm_del_span_query is called for each
+ * node in the memlist with the results accumulated in *mp.
+ */
+static int
+dr_del_span_query(pfn_t base, pgcnt_t npages, memquery_t *mp)
+{
+ uint64_t pa = _ptob64(base);
+ uint64_t sm = ~ (137438953472ull - 1);
+ uint64_t sa = pa & sm;
+ struct memlist *mlist, *ml;
+ int rv;
+
+ npages = npages; /* silence lint */
+ memlist_read_lock();
+ mlist = memlist_dup(phys_install);
+ memlist_read_unlock();
+
+again:
+ for (ml = mlist; ml; ml = ml->next) {
+ if ((ml->address & sm) != sa) {
+ mlist = memlist_del_span(mlist, ml->address, ml->size);
+ goto again;
+ }
+ }
+
+ mp->phys_pages = 0;
+ mp->managed = 0;
+ mp->nonrelocatable = 0;
+ mp->first_nonrelocatable = (pfn_t)-1; /* XXX */
+ mp->last_nonrelocatable = 0;
+
+ for (ml = mlist; ml; ml = ml->next) {
+ memquery_t mq;
+
+ rv = kphysm_del_span_query(
+ _b64top(ml->address), _b64top(ml->size), &mq);
+ if (rv)
+ break;
+
+ mp->phys_pages += mq.phys_pages;
+ mp->managed += mq.managed;
+ mp->nonrelocatable += mq.nonrelocatable;
+
+ if (mq.nonrelocatable != 0) {
+ if (mq.first_nonrelocatable < mp->first_nonrelocatable)
+ mp->first_nonrelocatable =
+ mq.first_nonrelocatable;
+ if (mq.last_nonrelocatable > mp->last_nonrelocatable)
+ mp->last_nonrelocatable =
+ mq.last_nonrelocatable;
+ }
+ }
+
+ if (mp->nonrelocatable == 0)
+ mp->first_nonrelocatable = 0; /* XXX */
+
+ memlist_delete(mlist);
+ return (rv);
+}
+
+#define kphysm_del_span_query dr_del_span_query
+#endif /* _STARFIRE */
+
+/*
+ * NOTE: This routine is only partially smart about multiple
+ * mem-units. Need to make mem-status structure smart
+ * about them also.
+ */
+int
+dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int m, mix;
+ memdelstat_t mdst;
+ memquery_t mq;
+ dr_board_t *bp;
+ dr_mem_unit_t *mp;
+ sbd_mem_stat_t *msp;
+ static fn_t f = "dr_mem_status";
+
+ bp = hp->h_bd;
+ devset &= DR_DEVS_PRESENT(bp);
+
+ for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
+ int rv;
+ sbd_error_t *err;
+ drmach_status_t pstat;
+ dr_mem_unit_t *p_mp;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
+ continue;
+
+ mp = dr_get_mem_unit(bp, m);
+
+ if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
+ continue;
+
+ /* fetch platform status */
+ err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ continue;
+ }
+
+ msp = &dsp->d_mem;
+ bzero((caddr_t)msp, sizeof (*msp));
+
+ strncpy(msp->ms_cm.c_id.c_name, pstat.type,
+ sizeof (msp->ms_cm.c_id.c_name));
+ msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
+ msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
+ msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
+ msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
+ msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
+ msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
+
+ msp->ms_totpages = mp->sbm_npages;
+ msp->ms_basepfn = mp->sbm_basepfn;
+ msp->ms_pageslost = mp->sbm_pageslost;
+ msp->ms_cage_enabled = kcage_on;
+
+ if (mp->sbm_flags & DR_MFLAG_RESERVED)
+ p_mp = mp->sbm_peer;
+ else
+ p_mp = NULL;
+
+ if (p_mp == NULL) {
+ msp->ms_peer_is_target = 0;
+ msp->ms_peer_ap_id[0] = '\0';
+ } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ char *minor;
+
+ /*
+ * b_dip doesn't have to be held for ddi_pathname()
+ * because the board struct (dr_board_t) will be
+ * destroyed before b_dip detaches.
+ */
+ (void) ddi_pathname(bp->b_dip, path);
+ minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
+
+ snprintf(msp->ms_peer_ap_id,
+ sizeof (msp->ms_peer_ap_id), "%s%s",
+ path, (minor == NULL) ? "" : minor);
+
+ kmem_free(path, MAXPATHLEN);
+
+ if (p_mp->sbm_flags & DR_MFLAG_TARGET)
+ msp->ms_peer_is_target = 1;
+ }
+
+ if (mp->sbm_flags & DR_MFLAG_RELOWNER)
+ rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
+ else
+ rv = KPHYSM_EHANDLE; /* force 'if' to fail */
+
+ if (rv == KPHYSM_OK) {
+ /*
+ * Any pages above managed is "free",
+ * i.e. it's collected.
+ */
+ msp->ms_detpages += (uint_t)(mdst.collected +
+ mdst.phys_pages - mdst.managed);
+ } else {
+ /*
+ * If we're UNREFERENCED or UNCONFIGURED,
+ * then the number of detached pages is
+ * however many pages are on the board.
+ * I.e. detached = not in use by OS.
+ */
+ switch (msp->ms_cm.c_ostate) {
+ /*
+ * changed to use cfgadm states
+ *
+ * was:
+ * case DR_STATE_UNREFERENCED:
+ * case DR_STATE_UNCONFIGURED:
+ */
+ case SBD_STAT_UNCONFIGURED:
+ msp->ms_detpages = msp->ms_totpages;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /*
+ * kphysm_del_span_query can report non-reloc pages = total
+ * pages for memory that is not yet configured
+ */
+ if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
+
+ rv = kphysm_del_span_query(mp->sbm_basepfn,
+ mp->sbm_npages, &mq);
+
+ if (rv == KPHYSM_OK) {
+ msp->ms_managed_pages = mq.managed;
+ msp->ms_noreloc_pages = mq.nonrelocatable;
+ msp->ms_noreloc_first =
+ mq.first_nonrelocatable;
+ msp->ms_noreloc_last =
+ mq.last_nonrelocatable;
+ msp->ms_cm.c_sflags = 0;
+ if (mq.nonrelocatable) {
+ SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
+ msp->ms_cm.c_sflags);
+ }
+ } else {
+ PR_MEM("%s: kphysm_del_span_query() = %d\n",
+ f, rv);
+ }
+ }
+
+ /*
+ * Check source unit state during copy-rename
+ */
+ if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
+ (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
+ mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
+ msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
+
+ mix++;
+ dsp++;
+ }
+
+ return (mix);
+}
+
+int
+dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ int err_flag = 0;
+ int d;
+ sbd_error_t *err;
+ static fn_t f = "dr_pre_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ dr_state_t state;
+
+ cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
+
+ state = mp->sbm_cm.sbdev_state;
+ switch (state) {
+ case DR_STATE_UNCONFIGURED:
+ PR_MEM("%s: recovering from UNCONFIG for %s\n",
+ f,
+ mp->sbm_cm.sbdev_path);
+
+ /* use memlist cached by dr_post_detach_mem_unit */
+ ASSERT(mp->sbm_mlist != NULL);
+ PR_MEM("%s: re-configuring cached memlist for %s:\n",
+ f, mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(mp->sbm_mlist);
+
+ /* kphysm del handle should be have been freed */
+ ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
+
+ /*FALLTHROUGH*/
+
+ case DR_STATE_CONNECTED:
+ PR_MEM("%s: reprogramming mem hardware on %s\n",
+ f, mp->sbm_cm.sbdev_bp->b_path);
+
+ PR_MEM("%s: enabling %s\n",
+ f, mp->sbm_cm.sbdev_path);
+
+ err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ err_flag = 1;
+ }
+ break;
+
+ default:
+ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
+ err_flag = 1;
+ break;
+ }
+
+ /* exit for loop if error encountered */
+ if (err_flag)
+ break;
+ }
+
+ return (err_flag ? -1 : 0);
+}
+
+int
+dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ int d;
+ static fn_t f = "dr_post_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ struct memlist *mlist, *ml;
+
+ mlist = dr_get_memlist(mp);
+ if (mlist == NULL) {
+ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL);
+ continue;
+ }
+
+ /*
+ * Verify the memory really did successfully attach
+ * by checking for its existence in phys_install.
+ */
+ memlist_read_lock();
+ if (memlist_intersect(phys_install, mlist) == 0) {
+ memlist_read_unlock();
+
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+
+ PR_MEM("%s: %s memlist not in phys_install",
+ f, mp->sbm_cm.sbdev_path);
+
+ memlist_delete(mlist);
+ continue;
+ }
+ memlist_read_unlock();
+
+ for (ml = mlist; ml != NULL; ml = ml->next) {
+ sbd_error_t *err;
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id,
+ ml->address,
+ ml->size);
+ if (err)
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ }
+
+ memlist_delete(mlist);
+
+ /*
+ * Destroy cached memlist, if any.
+ * There will be a cached memlist in sbm_mlist if
+ * this board is being configured directly after
+ * an unconfigure.
+ * To support this transition, dr_post_detach_mem
+ * left a copy of the last known memlist in sbm_mlist.
+ * This memlist could differ from any derived from
+ * hardware if while this memunit was last configured
+ * the system detected and deleted bad pages from
+ * phys_install. The location of those bad pages
+ * will be reflected in the cached memlist.
+ */
+ if (mp->sbm_mlist) {
+ memlist_delete(mp->sbm_mlist);
+ mp->sbm_mlist = NULL;
+ }
+
+/*
+ * TODO: why is this call to dr_init_mem_unit_data here?
+ * this has been done at discovery or connect time, so this is
+ * probably redundant and unnecessary.
+ */
+ dr_init_mem_unit_data(mp);
+ }
+
+ return (0);
+}
+
+int
+dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ int d;
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+
+ cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
+ }
+
+ return (0);
+}
+
+
+int
+dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ int d, rv;
+ static fn_t f = "dr_post_detach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ rv = 0;
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+
+ ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
+
+ if (dr_post_detach_mem_unit(mp))
+ rv = -1;
+ }
+
+ return (rv);
+}
+
+static void
+dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
+{
+ static fn_t f = "dr_add_memory_spans";
+
+ PR_MEM("%s...", f);
+ PR_MEMLIST_DUMP(ml);
+
+#ifdef DEBUG
+ memlist_read_lock();
+ if (memlist_intersect(phys_install, ml)) {
+ PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
+ }
+ memlist_read_unlock();
+#endif
+
+ for (; ml; ml = ml->next) {
+ pfn_t base;
+ pgcnt_t npgs;
+ int rv;
+ sbd_error_t *err;
+
+ base = _b64top(ml->address);
+ npgs = _b64top(ml->size);
+
+ rv = kphysm_add_memory_dynamic(base, npgs);
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id,
+ ml->address,
+ ml->size);
+
+ if (err)
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+
+ if (rv != KPHYSM_OK) {
+ cmn_err(CE_WARN, "%s:"
+ " unexpected kphysm_add_memory_dynamic"
+ " return value %d;"
+ " basepfn=0x%lx, npages=%ld\n",
+ f, rv, base, npgs);
+
+ continue;
+ }
+ }
+}
+
+static int
+dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
+{
+ uint64_t sz = s_mp->sbm_slice_size;
+ uint64_t sm = sz - 1;
+ /* old and new below refer to PAs before and after copy-rename */
+ uint64_t s_old_basepa, s_new_basepa;
+ uint64_t t_old_basepa, t_new_basepa;
+ uint64_t t_new_smallsize = 0;
+ dr_mem_unit_t *t_mp, *x_mp;
+ struct memlist *ml;
+ int rv;
+ sbd_error_t *err;
+ static fn_t f = "dr_post_detach_mem_unit";
+
+ PR_MEM("%s...\n", f);
+
+ /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
+ PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
+ f, s_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
+
+ /* sanity check */
+ ASSERT(s_mp->sbm_del_mlist == NULL ||
+ (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
+
+ if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
+ t_mp = s_mp->sbm_peer;
+ ASSERT(t_mp != NULL);
+ ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
+ ASSERT(t_mp->sbm_peer == s_mp);
+
+ ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
+ ASSERT(t_mp->sbm_del_mlist);
+
+ PR_MEM("%s: target %s: deleted memlist:\n",
+ f, t_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
+ } else {
+ /* this is no target unit */
+ t_mp = NULL;
+ }
+
+ /*
+ * Verify the memory really did successfully detach
+ * by checking for its non-existence in phys_install.
+ */
+ rv = 0;
+ memlist_read_lock();
+ if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
+ x_mp = s_mp;
+ rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
+ }
+ if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
+ x_mp = t_mp;
+ rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
+ }
+ memlist_read_unlock();
+
+ if (rv) {
+ /* error: memlist still in phys_install */
+ DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
+ }
+
+ /*
+ * clean mem unit state and bail out if an error has been recorded.
+ */
+ rv = 0;
+ if (s_mp->sbm_cm.sbdev_error) {
+ PR_MEM("%s: %s flags=%x", f,
+ s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
+ DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
+ DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
+ dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
+ rv = -1;
+ }
+ if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
+ PR_MEM("%s: %s flags=%x", f,
+ s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
+ DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
+ DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
+ dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
+ rv = -1;
+ }
+ if (rv)
+ goto cleanup;
+
+ s_old_basepa = _ptob64(s_mp->sbm_basepfn);
+ err = drmach_mem_get_base_physaddr(s_mp->sbm_cm.sbdev_id,
+ &s_new_basepa);
+ ASSERT(err == NULL);
+
+ PR_MEM("%s:s_old_basepa: 0x%llx\n", f, s_old_basepa);
+ PR_MEM("%s:s_new_basepa: 0x%llx\n", f, s_new_basepa);
+
+ if (t_mp != NULL) {
+ struct memlist *s_copy_mlist;
+
+ t_old_basepa = _ptob64(t_mp->sbm_basepfn);
+ err = drmach_mem_get_base_physaddr(t_mp->sbm_cm.sbdev_id,
+ &t_new_basepa);
+ ASSERT(err == NULL);
+
+ PR_MEM("%s:t_old_basepa: 0x%llx\n", f, t_old_basepa);
+ PR_MEM("%s:t_new_basepa: 0x%llx\n", f, t_new_basepa);
+
+ /*
+ * Construct copy list with original source addresses.
+ * Used to add back excess target mem.
+ */
+ s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
+ for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
+ s_copy_mlist = memlist_del_span(s_copy_mlist,
+ ml->address, ml->size);
+ }
+
+ PR_MEM("%s: source copy list:\n:", f);
+ PR_MEMLIST_DUMP(s_copy_mlist);
+
+ /*
+ * We had to swap mem-units, so update
+ * memlists accordingly with new base
+ * addresses.
+ */
+ for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
+ ml->address -= t_old_basepa;
+ ml->address += t_new_basepa;
+ }
+
+ /*
+ * There is no need to explicitly rename the target delete
+ * memlist, because sbm_del_mlist and sbm_mlist always
+ * point to the same memlist for a copy/rename operation.
+ */
+ ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
+
+ PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
+ PR_MEMLIST_DUMP(t_mp->sbm_mlist);
+
+ for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
+ ml->address -= s_old_basepa;
+ ml->address += s_new_basepa;
+ }
+
+ PR_MEM("%s: renamed source memlist:\n", f);
+ PR_MEMLIST_DUMP(s_mp->sbm_mlist);
+
+ /*
+ * Keep track of dynamically added segments
+ * since they cannot be split if we need to delete
+ * excess source memory later for this board.
+ */
+ if (t_mp->sbm_dyn_segs)
+ memlist_delete(t_mp->sbm_dyn_segs);
+ t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
+ s_mp->sbm_dyn_segs = NULL;
+
+ /*
+ * If the target memory range with the new target base PA
+ * extends beyond the usable slice, prevent any "target excess"
+ * from being added back after this copy/rename and
+ * calculate the new smaller size of the target board
+ * to be set as part of target cleanup. The base + npages
+ * must only include the range of memory up to the end of
+ * this slice. This will only be used after a category 4
+ * large-to-small target type copy/rename - see comments
+ * in dr_select_mem_target.
+ */
+ if (((t_new_basepa & sm) + _ptob64(t_mp->sbm_npages)) > sz) {
+ t_new_smallsize = sz - (t_new_basepa & sm);
+ }
+
+ if (s_mp->sbm_flags & DR_MFLAG_MEMRESIZE &&
+ t_new_smallsize == 0) {
+ struct memlist *t_excess_mlist;
+
+ /*
+ * Add back excess target memory.
+ * Subtract out the portion of the target memory
+ * node that was taken over by the source memory
+ * node.
+ */
+ t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
+ for (ml = s_copy_mlist; ml; ml = ml->next) {
+ t_excess_mlist =
+ memlist_del_span(t_excess_mlist,
+ ml->address, ml->size);
+ }
+
+ /*
+ * Update dynamically added segs
+ */
+ for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
+ t_mp->sbm_dyn_segs =
+ memlist_del_span(t_mp->sbm_dyn_segs,
+ ml->address, ml->size);
+ }
+ for (ml = t_excess_mlist; ml; ml = ml->next) {
+ t_mp->sbm_dyn_segs =
+ memlist_cat_span(t_mp->sbm_dyn_segs,
+ ml->address, ml->size);
+ }
+ PR_MEM("%s: %s: updated dynamic seg list:\n",
+ f, t_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
+
+ PR_MEM("%s: adding back remaining portion"
+ " of %s, memlist:\n",
+ f, t_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(t_excess_mlist);
+
+ dr_add_memory_spans(s_mp, t_excess_mlist);
+ memlist_delete(t_excess_mlist);
+ }
+ memlist_delete(s_copy_mlist);
+
+#ifdef DEBUG
+ /*
+ * Renaming s_mp->sbm_del_mlist is not necessary. This
+ * list is not used beyond this point, and in fact, is
+ * disposed of at the end of this function.
+ */
+ for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
+ ml->address -= s_old_basepa;
+ ml->address += s_new_basepa;
+ }
+
+ PR_MEM("%s: renamed source delete memlist", f);
+ PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
+#endif
+
+ }
+
+ if (t_mp != NULL) {
+ /* delete target's entire address space */
+ err = drmach_mem_del_span(
+ t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
+ if (err)
+ DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
+ ASSERT(err == NULL);
+
+ /*
+ * After the copy/rename, the original address space
+ * for the source board (which is now located on the
+ * target board) may now have some excess to be deleted.
+ * The amount is calculated by masking the slice
+ * info and keeping the slice offset from t_new_basepa.
+ */
+ err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
+ s_old_basepa & ~ sm, t_new_basepa & sm);
+ if (err)
+ DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
+ ASSERT(err == NULL);
+
+ } else {
+ /* delete board's entire address space */
+ err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
+ s_old_basepa & ~ sm, sz);
+ if (err)
+ DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
+ ASSERT(err == NULL);
+ }
+
+cleanup:
+ /* clean up target mem unit */
+ if (t_mp != NULL) {
+ memlist_delete(t_mp->sbm_del_mlist);
+ /* no need to delete sbm_mlist, it shares sbm_del_mlist */
+
+ t_mp->sbm_del_mlist = NULL;
+ t_mp->sbm_mlist = NULL;
+ t_mp->sbm_peer = NULL;
+ t_mp->sbm_flags = 0;
+ t_mp->sbm_cm.sbdev_busy = 0;
+ dr_init_mem_unit_data(t_mp);
+
+ /* reduce target size if new PAs go past end of usable slice */
+ if (t_new_smallsize > 0) {
+ t_mp->sbm_npages = _b64top(t_new_smallsize);
+ PR_MEM("%s: target new size 0x%llx bytes\n",
+ f, t_new_smallsize);
+ }
+ }
+ if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
+ /*
+ * now that copy/rename has completed, undo this
+ * work that was done in dr_release_mem_done.
+ */
+ DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
+ DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
+ dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
+ }
+
+ /*
+ * clean up (source) board's mem unit structure.
+ * NOTE: sbm_mlist is retained if no error has been record (in other
+ * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
+ * referred to elsewhere as the cached memlist. The cached memlist
+ * is used to re-attach (configure back in) this memunit from the
+ * unconfigured state. The memlist is retained because it may
+ * represent bad pages that were detected while the memory was
+ * configured into the OS. The OS deletes bad pages from phys_install.
+ * Those deletes, if any, will be represented in the cached mlist.
+ */
+ if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
+ memlist_delete(s_mp->sbm_del_mlist);
+
+ if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
+ memlist_delete(s_mp->sbm_mlist);
+ s_mp->sbm_mlist = NULL;
+ }
+
+ if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
+ memlist_delete(s_mp->sbm_dyn_segs);
+ s_mp->sbm_dyn_segs = NULL;
+ }
+
+ s_mp->sbm_del_mlist = NULL;
+ s_mp->sbm_peer = NULL;
+ s_mp->sbm_flags = 0;
+ s_mp->sbm_cm.sbdev_busy = 0;
+ dr_init_mem_unit_data(s_mp);
+
+ PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(s_mp->sbm_mlist);
+
+ return (0);
+}
+
+/*
+ * Successful return from this function will have the memory
+ * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
+ * and waiting. This routine's job is to select the memory that
+ * actually has to be released (detached) which may not necessarily
+ * be the same memory node that came in in devlist[],
+ * i.e. a copy-rename is needed.
+ */
+int
+dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+ int err_flag = 0;
+ static fn_t f = "dr_pre_release_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ int rv;
+ memquery_t mq;
+ struct memlist *ml;
+
+ if (mp->sbm_cm.sbdev_error) {
+ err_flag = 1;
+ continue;
+ } else if (!kcage_on) {
+ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
+ err_flag = 1;
+ continue;
+ }
+
+ if (mp->sbm_flags & DR_MFLAG_RESERVED) {
+ /*
+ * Board is currently involved in a delete
+ * memory operation. Can't detach this guy until
+ * that operation completes.
+ */
+ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
+ err_flag = 1;
+ break;
+ }
+
+ /*
+ * Check whether the detaching memory requires a
+ * copy-rename.
+ */
+ ASSERT(mp->sbm_npages != 0);
+ rv = kphysm_del_span_query(
+ mp->sbm_basepfn, mp->sbm_npages, &mq);
+ if (rv != KPHYSM_OK) {
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+ err_flag = 1;
+ break;
+ }
+
+ if (mq.nonrelocatable != 0) {
+ if (!(dr_cmd_flags(hp) &
+ (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
+ /* caller wasn't prompted for a suspend */
+ dr_dev_err(CE_WARN, &mp->sbm_cm,
+ ESBD_QUIESCE_REQD);
+ err_flag = 1;
+ break;
+ }
+ }
+
+ /* flags should be clean at this time */
+ ASSERT(mp->sbm_flags == 0);
+
+ ASSERT(mp->sbm_mlist == NULL); /* should be null */
+ ASSERT(mp->sbm_del_mlist == NULL); /* should be null */
+ if (mp->sbm_mlist != NULL) {
+ memlist_delete(mp->sbm_mlist);
+ mp->sbm_mlist = NULL;
+ }
+
+ ml = dr_get_memlist(mp);
+ if (ml == NULL) {
+ err_flag = 1;
+ PR_MEM("%s: no memlist found for %s\n",
+ f, mp->sbm_cm.sbdev_path);
+ continue;
+ }
+
+ /* allocate a kphysm handle */
+ rv = kphysm_del_gethandle(&mp->sbm_memhandle);
+ if (rv != KPHYSM_OK) {
+ memlist_delete(ml);
+
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+ err_flag = 1;
+ break;
+ }
+ mp->sbm_flags |= DR_MFLAG_RELOWNER;
+
+ if ((mq.nonrelocatable != 0) ||
+ dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
+ /*
+ * Either the detaching memory node contains
+ * non-reloc memory or we failed to reserve the
+ * detaching memory node (which did _not_ have
+ * any non-reloc memory, i.e. some non-reloc mem
+ * got onboard).
+ */
+
+ if (dr_select_mem_target(hp, mp, ml)) {
+ int rv;
+
+ /*
+ * We had no luck locating a target
+ * memory node to be the recipient of
+ * the non-reloc memory on the node
+ * we're trying to detach.
+ * Clean up be disposing the mem handle
+ * and the mem list.
+ */
+ rv = kphysm_del_release(mp->sbm_memhandle);
+ if (rv != KPHYSM_OK) {
+ /*
+ * can do nothing but complain
+ * and hope helpful for debug
+ */
+ cmn_err(CE_WARN, "%s: unexpected"
+ " kphysm_del_release return"
+ " value %d",
+ f, rv);
+ }
+ mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
+
+ memlist_delete(ml);
+
+ /* make sure sbm_flags is clean */
+ ASSERT(mp->sbm_flags == 0);
+
+ dr_dev_err(CE_WARN,
+ &mp->sbm_cm, ESBD_NO_TARGET);
+
+ err_flag = 1;
+ break;
+ }
+
+ /*
+ * ml is not memlist_delete'd here because
+ * it has been assigned to mp->sbm_mlist
+ * by dr_select_mem_target.
+ */
+ } else {
+ /* no target needed to detach this board */
+ mp->sbm_flags |= DR_MFLAG_RESERVED;
+ mp->sbm_peer = NULL;
+ mp->sbm_del_mlist = ml;
+ mp->sbm_mlist = ml;
+ mp->sbm_cm.sbdev_busy = 1;
+ }
+#ifdef DEBUG
+ ASSERT(mp->sbm_mlist != NULL);
+
+ if (mp->sbm_flags & DR_MFLAG_SOURCE) {
+ PR_MEM("%s: release of %s requires copy/rename;"
+ " selected target board %s\n",
+ f,
+ mp->sbm_cm.sbdev_path,
+ mp->sbm_peer->sbm_cm.sbdev_path);
+ } else {
+ PR_MEM("%s: copy/rename not required to release %s\n",
+ f, mp->sbm_cm.sbdev_path);
+ }
+
+ ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
+ ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
+#endif
+ }
+
+ return (err_flag ? -1 : 0);
+}
+
+void
+dr_release_mem_done(dr_common_unit_t *cp)
+{
+ dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
+ dr_mem_unit_t *t_mp, *mp;
+ int rv;
+ static fn_t f = "dr_release_mem_done";
+
+ /*
+ * This unit will be flagged with DR_MFLAG_SOURCE, if it
+ * has a target unit.
+ */
+ if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
+ t_mp = s_mp->sbm_peer;
+ ASSERT(t_mp != NULL);
+ ASSERT(t_mp->sbm_peer == s_mp);
+ ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
+ ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
+ } else {
+ /* this is no target unit */
+ t_mp = NULL;
+ }
+
+ /* free delete handle */
+ ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
+ ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
+ rv = kphysm_del_release(s_mp->sbm_memhandle);
+ if (rv != KPHYSM_OK) {
+ /*
+ * can do nothing but complain
+ * and hope helpful for debug
+ */
+ cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
+ " return value %d", f, rv);
+ }
+ s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
+
+ /*
+ * If an error was encountered during release, clean up
+ * the source (and target, if present) unit data.
+ */
+/* XXX Can we know that sbdev_error was encountered during release? */
+ if (s_mp->sbm_cm.sbdev_error != NULL) {
+ PR_MEM("%s: %s: error %d noted\n",
+ f,
+ s_mp->sbm_cm.sbdev_path,
+ s_mp->sbm_cm.sbdev_error->e_code);
+
+ if (t_mp != NULL) {
+ ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
+ t_mp->sbm_del_mlist = NULL;
+
+ if (t_mp->sbm_mlist != NULL) {
+ memlist_delete(t_mp->sbm_mlist);
+ t_mp->sbm_mlist = NULL;
+ }
+
+ t_mp->sbm_peer = NULL;
+ t_mp->sbm_flags = 0;
+ t_mp->sbm_cm.sbdev_busy = 0;
+ }
+
+ if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
+ memlist_delete(s_mp->sbm_del_mlist);
+ s_mp->sbm_del_mlist = NULL;
+
+ if (s_mp->sbm_mlist != NULL) {
+ memlist_delete(s_mp->sbm_mlist);
+ s_mp->sbm_mlist = NULL;
+ }
+
+ s_mp->sbm_peer = NULL;
+ s_mp->sbm_flags = 0;
+ s_mp->sbm_cm.sbdev_busy = 0;
+
+ /* bail out */
+ return;
+ }
+
+ DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
+ dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
+
+ if (t_mp != NULL) {
+ /*
+ * the kphysm delete operation that drained the source
+ * board also drained this target board. Since the source
+ * board drain is now known to have succeeded, we know this
+ * target board is drained too.
+ *
+ * because DR_DEV_SET_RELEASED and dr_device_transition
+ * is done here, the dr_release_dev_done should not
+ * fail.
+ */
+ DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
+ dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
+
+ /*
+ * NOTE: do not transition target's board state,
+ * even if the mem-unit was the last configure
+ * unit of the board. When copy/rename completes
+ * this mem-unit will transitioned back to
+ * the configured state. In the meantime, the
+ * board's must remain as is.
+ */
+ }
+
+ /* if board(s) had deleted memory, verify it is gone */
+ rv = 0;
+ memlist_read_lock();
+ if (s_mp->sbm_del_mlist != NULL) {
+ mp = s_mp;
+ rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
+ }
+ if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
+ mp = t_mp;
+ rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
+ }
+ memlist_read_unlock();
+ if (rv) {
+ cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
+ "deleted memory still found in phys_install",
+ f,
+ (mp == t_mp ? "target " : ""),
+ mp->sbm_cm.sbdev_bp->b_num,
+ mp->sbm_cm.sbdev_unum);
+
+ DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
+ return;
+ }
+
+ s_mp->sbm_flags |= DR_MFLAG_RELDONE;
+ if (t_mp != NULL)
+ t_mp->sbm_flags |= DR_MFLAG_RELDONE;
+
+ /* this should not fail */
+ if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
+ /* catch this in debug kernels */
+ ASSERT(0);
+ return;
+ }
+
+ PR_MEM("%s: marking %s release DONE\n",
+ f, s_mp->sbm_cm.sbdev_path);
+
+ s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
+
+ if (t_mp != NULL) {
+ /* should not fail */
+ rv = dr_release_dev_done(&t_mp->sbm_cm);
+ if (rv != 0) {
+ /* catch this in debug kernels */
+ ASSERT(0);
+ return;
+ }
+
+ PR_MEM("%s: marking %s release DONE\n",
+ f, t_mp->sbm_cm.sbdev_path);
+
+ t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
+ }
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_mem(dr_mem_unit_t *mp)
+{
+ static fn_t f = "dr_disconnect_mem";
+ update_membounds_t umb;
+
+#ifdef DEBUG
+ int state = mp->sbm_cm.sbdev_state;
+ ASSERT(state == DR_STATE_CONNECTED ||
+ state == DR_STATE_UNCONFIGURED);
+#endif
+
+ PR_MEM("%s...\n", f);
+
+ if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
+ memlist_delete(mp->sbm_del_mlist);
+ mp->sbm_del_mlist = NULL;
+
+ if (mp->sbm_mlist) {
+ memlist_delete(mp->sbm_mlist);
+ mp->sbm_mlist = NULL;
+ }
+
+ /*
+ * Remove memory from lgroup
+ * For now, only board info is required.
+ */
+ umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
+ umb.u_base = (uint64_t)-1;
+ umb.u_len = (uint64_t)-1;
+
+ lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
+
+ return (0);
+}
+
+int
+dr_cancel_mem(dr_mem_unit_t *s_mp)
+{
+ dr_mem_unit_t *t_mp;
+ dr_state_t state;
+ static fn_t f = "dr_cancel_mem";
+
+ state = s_mp->sbm_cm.sbdev_state;
+
+ if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
+ /* must cancel source board, not target board */
+ /* TODO: set error */
+ return (-1);
+ } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
+ t_mp = s_mp->sbm_peer;
+ ASSERT(t_mp != NULL);
+ ASSERT(t_mp->sbm_peer == s_mp);
+
+ /* must always match the source board's state */
+/* TODO: is this assertion correct? */
+ ASSERT(t_mp->sbm_cm.sbdev_state == state);
+ } else {
+ /* this is no target unit */
+ t_mp = NULL;
+ }
+
+ switch (state) {
+ case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */
+ ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
+
+ if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
+ PR_MEM("%s: undoing target %s memory delete\n",
+ f, t_mp->sbm_cm.sbdev_path);
+ dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
+
+ DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
+ }
+
+ if (s_mp->sbm_del_mlist != NULL) {
+ PR_MEM("%s: undoing %s memory delete\n",
+ f, s_mp->sbm_cm.sbdev_path);
+
+ dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
+ }
+
+ /*FALLTHROUGH*/
+
+/* TODO: should no longer be possible to see the release state here */
+ case DR_STATE_RELEASE: /* state set by dr_release_mem_done */
+
+ ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
+
+ if (t_mp != NULL) {
+ ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
+ t_mp->sbm_del_mlist = NULL;
+
+ if (t_mp->sbm_mlist != NULL) {
+ memlist_delete(t_mp->sbm_mlist);
+ t_mp->sbm_mlist = NULL;
+ }
+
+ t_mp->sbm_peer = NULL;
+ t_mp->sbm_flags = 0;
+ t_mp->sbm_cm.sbdev_busy = 0;
+ dr_init_mem_unit_data(t_mp);
+
+ DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
+
+ dr_device_transition(
+ &t_mp->sbm_cm, DR_STATE_CONFIGURED);
+ }
+
+ if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
+ memlist_delete(s_mp->sbm_del_mlist);
+ s_mp->sbm_del_mlist = NULL;
+
+ if (s_mp->sbm_mlist != NULL) {
+ memlist_delete(s_mp->sbm_mlist);
+ s_mp->sbm_mlist = NULL;
+ }
+
+ s_mp->sbm_peer = NULL;
+ s_mp->sbm_flags = 0;
+ s_mp->sbm_cm.sbdev_busy = 0;
+ dr_init_mem_unit_data(s_mp);
+
+ return (0);
+
+ default:
+ PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
+ f, (int)state, s_mp->sbm_cm.sbdev_path);
+
+ return (-1);
+ }
+ /*NOTREACHED*/
+}
+
+void
+dr_init_mem_unit(dr_mem_unit_t *mp)
+{
+ dr_state_t new_state;
+
+
+ if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+ } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
+ new_state = DR_STATE_OCCUPIED;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ }
+
+ if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
+ dr_init_mem_unit_data(mp);
+
+ /* delay transition until fully initialized */
+ dr_device_transition(&mp->sbm_cm, new_state);
+}
+
+static void
+dr_init_mem_unit_data(dr_mem_unit_t *mp)
+{
+ drmachid_t id = mp->sbm_cm.sbdev_id;
+ uint64_t bytes;
+ sbd_error_t *err;
+ static fn_t f = "dr_init_mem_unit_data";
+ update_membounds_t umb;
+
+ PR_MEM("%s...\n", f);
+
+ /* a little sanity checking */
+ ASSERT(mp->sbm_peer == NULL);
+ ASSERT(mp->sbm_flags == 0);
+
+ /* get basepfn of mem unit */
+ err = drmach_mem_get_base_physaddr(id, &bytes);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mp->sbm_basepfn = (pfn_t)-1;
+ } else
+ mp->sbm_basepfn = _b64top(bytes);
+
+ /* attempt to get number of pages from PDA */
+ err = drmach_mem_get_size(id, &bytes);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mp->sbm_npages = 0;
+ } else
+ mp->sbm_npages = _b64top(bytes);
+
+ /* if didn't work, calculate using memlist */
+ if (mp->sbm_npages == 0) {
+ struct memlist *ml, *mlist;
+ /*
+ * Either we couldn't open the PDA or our
+ * PDA has garbage in it. We must have the
+ * page count consistent and whatever the
+ * OS states has precedence over the PDA
+ * so let's check the kernel.
+ */
+/* TODO: curious comment. it suggests pda query should happen if this fails */
+ PR_MEM("%s: PDA query failed for npages."
+ " Checking memlist for %s\n",
+ f, mp->sbm_cm.sbdev_path);
+
+ mlist = dr_get_memlist(mp);
+ for (ml = mlist; ml; ml = ml->next)
+ mp->sbm_npages += btop(ml->size);
+ memlist_delete(mlist);
+ }
+
+ err = drmach_mem_get_alignment(id, &bytes);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mp->sbm_alignment_mask = 0;
+ } else
+ mp->sbm_alignment_mask = _b64top(bytes);
+
+ err = drmach_mem_get_slice_size(id, &bytes);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mp->sbm_slice_size = 0; /* paranoia */
+ } else
+ mp->sbm_slice_size = bytes;
+
+ /*
+ * Add memory to lgroup
+ */
+ umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
+ umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
+ umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
+
+ lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
+
+ PR_MEM("%s: %s (basepfn = 0x%x, npgs = %d)\n",
+ f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
+}
+
+static int
+dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
+{
+ int err;
+ pfn_t base;
+ pgcnt_t npgs;
+ struct memlist *mc;
+ static fn_t f = "dr_reserve_mem_spans";
+
+ PR_MEM("%s...\n", f);
+
+ /*
+ * Walk the supplied memlist scheduling each span for removal
+ * with kphysm_del_span. It is possible that a span may intersect
+ * an area occupied by the cage.
+ */
+ for (mc = ml; mc != NULL; mc = mc->next) {
+ base = _b64top(mc->address);
+ npgs = _b64top(mc->size);
+
+ err = kphysm_del_span(*mhp, base, npgs);
+ if (err != KPHYSM_OK) {
+ cmn_err(CE_WARN, "%s memory reserve failed."
+ " unexpected kphysm_del_span return value %d;"
+ " basepfn=0x%lx npages=%ld",
+ f, err, base, npgs);
+
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+/* debug counters */
+int dr_smt_realigned;
+int dr_smt_preference[4];
+
+#ifdef DEBUG
+uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
+#endif
+
+/*
+ * Find and reserve a copy/rename target board suitable for the
+ * given source board.
+ * All boards in the system are examined and categorized in relation to
+ * their memory size versus the source board's memory size. Order of
+ * preference is:
+ * 1st: board has same memory size
+ * 2nd: board has larger memory size
+ * 3rd: board has smaller memory size
+ * 4th: board has smaller memory size, available memory will be reduced.
+ * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
+ * span to which the MC responds to address span that appropriately covers
+ * the nonrelocatable span of the source board.
+ */
+static int
+dr_select_mem_target(dr_handle_t *hp,
+ dr_mem_unit_t *s_mp, struct memlist *s_ml)
+{
+ pgcnt_t sz = _b64top(s_mp->sbm_slice_size);
+ pgcnt_t sm = sz - 1; /* mem_slice_mask */
+ pfn_t s_phi, t_phi;
+
+ int n_sets = 4; /* same, larger, smaller, clipped */
+ int preference; /* lower value is higher preference */
+ int n_units_per_set;
+ int idx;
+ dr_mem_unit_t **sets;
+
+ int t_bd;
+ int t_unit;
+ int rv;
+ int allow_src_memrange_modify;
+ int allow_targ_memrange_modify;
+ drmachid_t t_id;
+ dr_board_t *s_bp, *t_bp;
+ dr_mem_unit_t *t_mp, *c_mp;
+ struct memlist *d_ml, *t_ml, *x_ml;
+ memquery_t s_mq = {0};
+ static fn_t f = "dr_select_mem_target";
+
+ PR_MEM("%s...\n", f);
+
+ ASSERT(s_ml != NULL);
+
+ n_units_per_set = MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD;
+ sets = GETSTRUCT(dr_mem_unit_t *, n_units_per_set * n_sets);
+
+ s_bp = hp->h_bd;
+ /* calculate the offset into the slice of the last source board pfn */
+ ASSERT(s_mp->sbm_npages != 0);
+ s_phi = (s_mp->sbm_basepfn + s_mp->sbm_npages - 1) & sm;
+
+ allow_src_memrange_modify = drmach_allow_memrange_modify(s_bp->b_id);
+
+ /*
+ * Make one pass through all memory units on all boards
+ * and categorize them with respect to the source board.
+ */
+ for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
+ /*
+ * The board structs are a contiguous array
+ * so we take advantage of that to find the
+ * correct board struct pointer for a given
+ * board number.
+ */
+ t_bp = dr_lookup_board(t_bd);
+
+ /* source board can not be its own target */
+ if (s_bp->b_num == t_bp->b_num)
+ continue;
+
+ for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
+
+ t_mp = dr_get_mem_unit(t_bp, t_unit);
+
+ /* this memory node must be attached */
+ if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
+ continue;
+
+ /* source unit can not be its own target */
+ if (s_mp == t_mp) {
+ /* catch this is debug kernels */
+ ASSERT(0);
+ continue;
+ }
+
+ /*
+ * this memory node must not already be reserved
+ * by some other memory delete operation.
+ */
+ if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
+ continue;
+
+ /*
+ * categorize the memory node
+ * If this is a smaller memory node, create a
+ * temporary, edited copy of the source board's
+ * memlist containing only the span of the non-
+ * relocatable pages.
+ */
+ t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm;
+ t_id = t_mp->sbm_cm.sbdev_bp->b_id;
+ allow_targ_memrange_modify =
+ drmach_allow_memrange_modify(t_id);
+ if (t_mp->sbm_npages == s_mp->sbm_npages &&
+ t_phi == s_phi) {
+ preference = 0;
+ t_mp->sbm_slice_offset = 0;
+ } else if (t_mp->sbm_npages > s_mp->sbm_npages &&
+ t_phi > s_phi) {
+ /*
+ * Selecting this target will require modifying
+ * the source and/or target physical address
+ * ranges. Skip if not supported by platform.
+ */
+ if (!allow_src_memrange_modify ||
+ !allow_targ_memrange_modify) {
+ PR_MEM("%s: skip target %s, memory "
+ "range relocation not supported "
+ "by platform\n", f,
+ t_mp->sbm_cm.sbdev_path);
+ continue;
+ }
+ preference = 1;
+ t_mp->sbm_slice_offset = 0;
+ } else {
+ pfn_t pfn = 0;
+
+ /*
+ * Selecting this target will require modifying
+ * the source and/or target physical address
+ * ranges. Skip if not supported by platform.
+ */
+ if (!allow_src_memrange_modify ||
+ !allow_targ_memrange_modify) {
+ PR_MEM("%s: skip target %s, memory "
+ "range relocation not supported "
+ "by platform\n", f,
+ t_mp->sbm_cm.sbdev_path);
+ continue;
+ }
+
+ /*
+ * Check if its mc can be programmed to relocate
+ * the active address range to match the
+ * nonrelocatable span of the source board.
+ */
+ preference = 2;
+
+ if (s_mq.phys_pages == 0) {
+ /*
+ * find non-relocatable span on
+ * source board.
+ */
+ rv = kphysm_del_span_query(
+ s_mp->sbm_basepfn,
+ s_mp->sbm_npages, &s_mq);
+ if (rv != KPHYSM_OK) {
+ PR_MEM("%s: %s: unexpected"
+ " kphysm_del_span_query"
+ " return value %d;"
+ " basepfn 0x%lx, npages %ld\n",
+ f,
+ s_mp->sbm_cm.sbdev_path,
+ rv,
+ s_mp->sbm_basepfn,
+ s_mp->sbm_npages);
+
+ /* paranoia */
+ s_mq.phys_pages = 0;
+
+ continue;
+ }
+
+ /* more paranoia */
+ ASSERT(s_mq.phys_pages != 0);
+ ASSERT(s_mq.nonrelocatable != 0);
+
+ /*
+ * this should not happen
+ * if it does, it simply means that
+ * we can not proceed with qualifying
+ * this target candidate.
+ */
+ if (s_mq.nonrelocatable == 0)
+ continue;
+
+ PR_MEM("%s: %s: nonrelocatable"
+ " span (0x%lx..0x%lx)\n",
+ f,
+ s_mp->sbm_cm.sbdev_path,
+ s_mq.first_nonrelocatable,
+ s_mq.last_nonrelocatable);
+ }
+
+ /*
+ * Round down the starting pfn of the
+ * nonrelocatable span on the source board
+ * to nearest programmable boundary possible
+ * with this target candidate.
+ */
+ pfn = s_mq.first_nonrelocatable &
+ ~t_mp->sbm_alignment_mask;
+
+ /* skip candidate if memory is too small */
+ if (pfn + t_mp->sbm_npages <
+ s_mq.last_nonrelocatable)
+ continue;
+
+ /*
+ * reprogramming an mc to relocate its
+ * active address range means the beginning
+ * address to which the DIMMS respond will
+ * be somewhere above the slice boundary
+ * address. The larger the size of memory
+ * on this unit, the more likely part of it
+ * will exist beyond the end of the slice.
+ * The portion of the memory that does is
+ * unavailable to the system until the mc
+ * reprogrammed to a more favorable base
+ * address.
+ * An attempt is made to avoid the loss by
+ * recalculating the mc base address relative
+ * to the end of the slice. This may produce
+ * a more favorable result. If not, we lower
+ * the board's preference rating so that it
+ * is one the last candidate boards to be
+ * considered.
+ */
+ if ((pfn + t_mp->sbm_npages) & ~sm) {
+ pfn_t p;
+
+ ASSERT(sz >= t_mp->sbm_npages);
+
+ /*
+ * calculate an alternative starting
+ * address relative to the end of the
+ * slice's address space.
+ */
+ p = pfn & ~sm;
+ p = p + (sz - t_mp->sbm_npages);
+ p = p & ~t_mp->sbm_alignment_mask;
+
+ if ((p > s_mq.first_nonrelocatable) ||
+ (p + t_mp->sbm_npages <
+ s_mq.last_nonrelocatable)) {
+
+ /*
+ * alternative starting addr
+ * won't work. Lower preference
+ * rating of this board, since
+ * some number of pages will
+ * unavailable for use.
+ */
+ preference = 3;
+ } else {
+ dr_smt_realigned++;
+ pfn = p;
+ }
+ }
+
+ /*
+ * translate calculated pfn to an offset
+ * relative to the slice boundary. If the
+ * candidate board is selected, this offset
+ * will be used to calculate the values
+ * programmed into the mc.
+ */
+ t_mp->sbm_slice_offset = pfn & sm;
+ PR_MEM("%s: %s:"
+ " proposed mc offset 0x%lx\n",
+ f,
+ t_mp->sbm_cm.sbdev_path,
+ t_mp->sbm_slice_offset);
+ }
+
+ dr_smt_preference[preference]++;
+
+ /* calculate index to start of preference set */
+ idx = n_units_per_set * preference;
+ /* calculate offset to respective element */
+ idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
+
+ ASSERT(idx < n_units_per_set * n_sets);
+ sets[idx] = t_mp;
+ }
+ }
+
+ /*
+ * NOTE: this would be a good place to sort each candidate
+ * set in to some desired order, e.g. memory size in ascending
+ * order. Without an additional sorting step here, the order
+ * within a set is ascending board number order.
+ */
+
+ c_mp = NULL;
+ x_ml = NULL;
+ t_ml = NULL;
+ for (idx = 0; idx < n_units_per_set * n_sets; idx++) {
+ memquery_t mq;
+
+ /* cleanup t_ml after previous pass */
+ if (t_ml != NULL) {
+ memlist_delete(t_ml);
+ t_ml = NULL;
+ }
+
+ /* get candidate target board mem unit */
+ t_mp = sets[idx];
+ if (t_mp == NULL)
+ continue;
+
+ /* get target board memlist */
+ t_ml = dr_get_memlist(t_mp);
+ if (t_ml == NULL) {
+ cmn_err(CE_WARN, "%s: no memlist for"
+ " mem-unit %d, board %d",
+ f,
+ t_mp->sbm_cm.sbdev_bp->b_num,
+ t_mp->sbm_cm.sbdev_unum);
+
+ continue;
+ }
+
+ /* get appropriate source board memlist */
+ t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm;
+ if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) {
+ spgcnt_t excess;
+
+ /*
+ * make a copy of the source board memlist
+ * then edit it to remove the spans that
+ * are outside the calculated span of
+ * [pfn..s_mq.last_nonrelocatable].
+ */
+ if (x_ml != NULL)
+ memlist_delete(x_ml);
+
+ x_ml = memlist_dup(s_ml);
+ if (x_ml == NULL) {
+ PR_MEM("%s: memlist_dup failed\n", f);
+ /* TODO: should abort */
+ continue;
+ }
+
+ /* trim off lower portion */
+ excess = t_mp->sbm_slice_offset -
+ (s_mp->sbm_basepfn & sm);
+
+ if (excess > 0) {
+ x_ml = memlist_del_span(
+ x_ml,
+ _ptob64(s_mp->sbm_basepfn),
+ _ptob64(excess));
+ }
+ ASSERT(x_ml);
+
+ /*
+ * Since this candidate target board is smaller
+ * than the source board, s_mq must have been
+ * initialized in previous loop while processing
+ * this or some other candidate board.
+ * FIXME: this is weak.
+ */
+ ASSERT(s_mq.phys_pages != 0);
+
+ /* trim off upper portion */
+ excess = (s_mp->sbm_basepfn + s_mp->sbm_npages)
+ - (s_mq.last_nonrelocatable + 1);
+ if (excess > 0) {
+ pfn_t p;
+
+ p = s_mq.last_nonrelocatable + 1;
+ x_ml = memlist_del_span(
+ x_ml,
+ _ptob64(p),
+ _ptob64(excess));
+ }
+
+ PR_MEM("%s: %s: edited source memlist:\n",
+ f, s_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(x_ml);
+
+#ifdef DEBUG
+ /* sanity check memlist */
+ d_ml = x_ml;
+ while (d_ml->next != NULL)
+ d_ml = d_ml->next;
+
+ ASSERT(d_ml->address + d_ml->size ==
+ _ptob64(s_mq.last_nonrelocatable + 1));
+#endif
+
+ /*
+ * x_ml now describes only the portion of the
+ * source board that will be moved during the
+ * copy/rename operation.
+ */
+ d_ml = x_ml;
+ } else {
+ /* use original memlist; all spans will be moved */
+ d_ml = s_ml;
+ }
+
+ /* verify target can support source memory spans. */
+ if (memlist_canfit(d_ml, t_ml) == 0) {
+ PR_MEM("%s: source memlist won't"
+ " fit in target memlist\n", f);
+ PR_MEM("%s: source memlist:\n", f);
+ PR_MEMLIST_DUMP(d_ml);
+ PR_MEM("%s: target memlist:\n", f);
+ PR_MEMLIST_DUMP(t_ml);
+
+ continue;
+ }
+
+ /* NOTE: the value of d_ml is not used beyond this point */
+
+ PR_MEM("%s: checking for no-reloc in %s, "
+ " basepfn=0x%lx, npages=%ld\n",
+ f,
+ t_mp->sbm_cm.sbdev_path,
+ t_mp->sbm_basepfn,
+ t_mp->sbm_npages);
+
+ rv = kphysm_del_span_query(
+ t_mp->sbm_basepfn, t_mp->sbm_npages, &mq);
+ if (rv != KPHYSM_OK) {
+ PR_MEM("%s: kphysm_del_span_query:"
+ " unexpected return value %d\n", f, rv);
+
+ continue;
+ }
+
+ if (mq.nonrelocatable != 0) {
+ PR_MEM("%s: candidate %s has"
+ " nonrelocatable span [0x%lx..0x%lx]\n",
+ f,
+ t_mp->sbm_cm.sbdev_path,
+ mq.first_nonrelocatable,
+ mq.last_nonrelocatable);
+
+ continue;
+ }
+
+#ifdef DEBUG
+ /*
+ * This is a debug tool for excluding certain boards
+ * from being selected as a target board candidate.
+ * dr_ignore_board is only tested by this driver.
+ * It must be set with adb, obp, /etc/system or your
+ * favorite debugger.
+ */
+ if (dr_ignore_board &
+ (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
+ PR_MEM("%s: dr_ignore_board flag set,"
+ " ignoring %s as candidate\n",
+ f, t_mp->sbm_cm.sbdev_path);
+ continue;
+ }
+#endif
+
+ /*
+ * Reserve excess source board memory, if any.
+ *
+ * When the number of pages on the candidate target
+ * board is less than the number of pages on the source,
+ * then some spans (clearly) of the source board's address
+ * space will not be covered by physical memory after the
+ * copy/rename completes. The following code block
+ * schedules those spans to be deleted.
+ */
+ if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) {
+ pfn_t pfn;
+ uint64_t s_del_pa;
+ struct memlist *ml;
+
+ d_ml = memlist_dup(s_ml);
+ if (d_ml == NULL) {
+ PR_MEM("%s: cant dup src brd memlist\n", f);
+ /* TODO: should abort */
+ continue;
+ }
+
+ /* calculate base pfn relative to target board */
+ pfn = s_mp->sbm_basepfn & ~sm;
+ pfn += t_mp->sbm_slice_offset;
+
+ /*
+ * cannot split dynamically added segment
+ */
+ s_del_pa = _ptob64(pfn + t_mp->sbm_npages);
+ PR_MEM("%s: proposed src delete pa=0x%lx\n", f,
+ s_del_pa);
+ PR_MEM("%s: checking for split of dyn seg list:\n", f);
+ PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
+ for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
+ if (s_del_pa > ml->address &&
+ s_del_pa < ml->address + ml->size) {
+ s_del_pa = ml->address;
+ break;
+ }
+ }
+
+ /* remove span that will reside on candidate board */
+ d_ml = memlist_del_span(d_ml, _ptob64(pfn),
+ s_del_pa - _ptob64(pfn));
+
+ PR_MEM("%s: %s: reserving src brd memlist:\n",
+ f, s_mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(d_ml);
+
+ /* reserve excess spans */
+ if (dr_reserve_mem_spans(
+ &s_mp->sbm_memhandle, d_ml) != 0) {
+
+ /* likely more non-reloc pages appeared */
+ /* TODO: restart from top? */
+ continue;
+ }
+ } else {
+ /* no excess source board memory */
+ d_ml = NULL;
+ }
+
+ s_mp->sbm_flags |= DR_MFLAG_RESERVED;
+
+ /*
+ * reserve all memory on target board.
+ * NOTE: source board's memhandle is used.
+ *
+ * If this succeeds (eq 0), then target selection is
+ * complete and all unwanted memory spans, both source and
+ * target, have been reserved. Loop is terminated.
+ */
+ if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
+ PR_MEM("%s: %s: target board memory reserved\n",
+ f, t_mp->sbm_cm.sbdev_path);
+
+ /* a candidate target board is now reserved */
+ t_mp->sbm_flags |= DR_MFLAG_RESERVED;
+ c_mp = t_mp;
+
+ /* *** EXITING LOOP *** */
+ break;
+ }
+
+ /* did not successfully reserve the target board. */
+ PR_MEM("%s: could not reserve target %s\n",
+ f, t_mp->sbm_cm.sbdev_path);
+
+ /*
+ * NOTE: an undo of the dr_reserve_mem_span work
+ * will happen automatically when the memhandle
+ * (s_mp->sbm_memhandle) is kphysm_del_release'd.
+ */
+
+ s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
+ }
+
+ /* clean up after memlist editing logic */
+ if (x_ml != NULL)
+ memlist_delete(x_ml);
+
+ FREESTRUCT(sets, dr_mem_unit_t *, n_units_per_set * n_sets);
+
+ /*
+ * c_mp will be NULL when the entire sets[] array
+ * has been searched without reserving a target board.
+ */
+ if (c_mp == NULL) {
+ PR_MEM("%s: %s: target selection failed.\n",
+ f, s_mp->sbm_cm.sbdev_path);
+
+ if (t_ml != NULL)
+ memlist_delete(t_ml);
+
+ return (-1);
+ }
+
+ PR_MEM("%s: found target %s for source %s\n",
+ f,
+ c_mp->sbm_cm.sbdev_path,
+ s_mp->sbm_cm.sbdev_path);
+
+ s_mp->sbm_peer = c_mp;
+ s_mp->sbm_flags |= DR_MFLAG_SOURCE;
+ s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */
+ s_mp->sbm_mlist = s_ml;
+ s_mp->sbm_cm.sbdev_busy = 1;
+
+ c_mp->sbm_peer = s_mp;
+ c_mp->sbm_flags |= DR_MFLAG_TARGET;
+ c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */
+ c_mp->sbm_mlist = t_ml;
+ c_mp->sbm_cm.sbdev_busy = 1;
+
+ s_mp->sbm_flags &= ~DR_MFLAG_MEMRESIZE;
+ if (c_mp->sbm_npages > s_mp->sbm_npages) {
+ s_mp->sbm_flags |= DR_MFLAG_MEMUPSIZE;
+ PR_MEM("%s: upsize detected (source=%d < target=%d)\n",
+ f, s_mp->sbm_npages, c_mp->sbm_npages);
+ } else if (c_mp->sbm_npages < s_mp->sbm_npages) {
+ s_mp->sbm_flags |= DR_MFLAG_MEMDOWNSIZE;
+ PR_MEM("%s: downsize detected (source=%d > target=%d)\n",
+ f, s_mp->sbm_npages, c_mp->sbm_npages);
+ }
+
+ return (0);
+}
+
+/*
+ * Memlist support.
+ */
+static struct memlist *
+memlist_dup(struct memlist *mlist)
+{
+ struct memlist *hl = NULL, *tl, **mlp;
+
+ if (mlist == NULL)
+ return (NULL);
+
+ mlp = &hl;
+ tl = *mlp;
+ for (; mlist; mlist = mlist->next) {
+ *mlp = GETSTRUCT(struct memlist, 1);
+ (*mlp)->address = mlist->address;
+ (*mlp)->size = mlist->size;
+ (*mlp)->prev = tl;
+ tl = *mlp;
+ mlp = &((*mlp)->next);
+ }
+ *mlp = NULL;
+
+ return (hl);
+}
+
+/*
+ * Determine whether the source memlist (s_mlist) will
+ * fit into the target memlist (t_mlist) in terms of
+ * size and holes (i.e. based on same relative base address).
+ */
+static int
+memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
+{
+ int rv = 0;
+ uint64_t s_basepa, t_basepa;
+ struct memlist *s_ml, *t_ml;
+
+ if ((s_mlist == NULL) || (t_mlist == NULL))
+ return (0);
+
+ /*
+ * Base both memlists on common base address (0).
+ */
+ s_basepa = s_mlist->address;
+ t_basepa = t_mlist->address;
+
+ for (s_ml = s_mlist; s_ml; s_ml = s_ml->next)
+ s_ml->address -= s_basepa;
+
+ for (t_ml = t_mlist; t_ml; t_ml = t_ml->next)
+ t_ml->address -= t_basepa;
+
+ s_ml = s_mlist;
+ for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
+ uint64_t s_start, s_end;
+ uint64_t t_start, t_end;
+
+ t_start = t_ml->address;
+ t_end = t_start + t_ml->size;
+
+ for (; s_ml; s_ml = s_ml->next) {
+ s_start = s_ml->address;
+ s_end = s_start + s_ml->size;
+
+ if ((s_start < t_start) || (s_end > t_end))
+ break;
+ }
+ }
+ /*
+ * If we ran out of source memlist chunks that mean
+ * we found a home for all of them.
+ */
+ if (s_ml == NULL)
+ rv = 1;
+
+ /*
+ * Need to add base addresses back since memlists
+ * are probably in use by caller.
+ */
+ for (s_ml = s_mlist; s_ml; s_ml = s_ml->next)
+ s_ml->address += s_basepa;
+
+ for (t_ml = t_mlist; t_ml; t_ml = t_ml->next)
+ t_ml->address += t_basepa;
+
+ return (rv);
+}
+
+static struct memlist *
+memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ uint64_t end;
+ struct memlist *ml, *tl, *nlp;
+
+ if (mlist == NULL)
+ return (NULL);
+
+ end = base + len;
+ if ((end <= mlist->address) || (base == end))
+ return (mlist);
+
+ for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
+ uint64_t mend;
+
+ nlp = ml->next;
+
+ if (end <= ml->address)
+ break;
+
+ mend = ml->address + ml->size;
+ if (base < mend) {
+ if (base <= ml->address) {
+ ml->address = end;
+ if (end >= mend)
+ ml->size = 0ull;
+ else
+ ml->size = mend - ml->address;
+ } else {
+ ml->size = base - ml->address;
+ if (end < mend) {
+ struct memlist *nl;
+ /*
+ * splitting an memlist entry.
+ */
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->address = end;
+ nl->size = mend - nl->address;
+ if ((nl->next = nlp) != NULL)
+ nlp->prev = nl;
+ nl->prev = ml;
+ ml->next = nl;
+ nlp = nl;
+ }
+ }
+ if (ml->size == 0ull) {
+ if (ml == mlist) {
+ if ((mlist = nlp) != NULL)
+ nlp->prev = NULL;
+ FREESTRUCT(ml, struct memlist, 1);
+ if (mlist == NULL)
+ break;
+ ml = nlp;
+ } else {
+ if ((tl->next = nlp) != NULL)
+ nlp->prev = tl;
+ FREESTRUCT(ml, struct memlist, 1);
+ ml = tl;
+ }
+ }
+ }
+ }
+
+ return (mlist);
+}
+
+/*
+ * add span without merging
+ */
+static struct memlist *
+memlist_cat_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ struct memlist *ml, *tl, *nl;
+
+ if (len == 0ull)
+ return (NULL);
+
+ if (mlist == NULL) {
+ mlist = GETSTRUCT(struct memlist, 1);
+ mlist->address = base;
+ mlist->size = len;
+ mlist->next = mlist->prev = NULL;
+
+ return (mlist);
+ }
+
+ for (tl = ml = mlist; ml; tl = ml, ml = ml->next) {
+ if (base < ml->address) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->address = base;
+ nl->size = len;
+ nl->next = ml;
+ if ((nl->prev = ml->prev) != NULL)
+ nl->prev->next = nl;
+ ml->prev = nl;
+ if (mlist == ml)
+ mlist = nl;
+ break;
+ }
+ }
+
+ if (ml == NULL) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->address = base;
+ nl->size = len;
+ nl->next = NULL;
+ nl->prev = tl;
+ tl->next = nl;
+ }
+
+ return (mlist);
+}