diff options
Diffstat (limited to 'usr/src/uts/sun4u/io/sbd_mem.c')
| -rw-r--r-- | usr/src/uts/sun4u/io/sbd_mem.c | 2711 |
1 files changed, 2711 insertions, 0 deletions
diff --git a/usr/src/uts/sun4u/io/sbd_mem.c b/usr/src/uts/sun4u/io/sbd_mem.c new file mode 100644 index 0000000000..add43318dc --- /dev/null +++ b/usr/src/uts/sun4u/io/sbd_mem.c @@ -0,0 +1,2711 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * memory support routines for sbd. + */ + +#include <sys/debug.h> +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/dditypes.h> +#include <sys/kmem.h> +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunndi.h> +#include <sys/ddi_impldefs.h> +#include <sys/sysmacros.h> +#include <sys/machsystm.h> +#include <sys/spitregs.h> +#include <sys/cpuvar.h> +#include <sys/cpu_module.h> +#include <sys/promif.h> +#include <sys/memlist_impl.h> +#include <sys/mem_cage.h> +#include <sys/lgrp.h> +#include <sys/platform_module.h> +#include <vm/seg_kmem.h> + +#include <sys/sbdpriv.h> + +#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT) +#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT)) + +static int sbd_post_detach_mem_unit(sbd_mem_unit_t *mp, + sbderror_t *ep); +static int sbd_reserve_mem_spans(memhandle_t *mhp, + struct memlist *mlist); +static int sbd_check_boundaries(struct memlist *orig_memlist, + sbd_mem_unit_t *s_mp, + sbd_mem_unit_t *t_mp); +static int sbd_select_mem_target(sbd_handle_t *hp, + sbd_mem_unit_t *mp, struct memlist *ml); +static void sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t + *ep); +static int memlist_canfit(struct memlist *s_mlist, + struct memlist *t_mlist); +static void sbd_mem_cleanup(sbd_mem_unit_t *s_mp, + sbd_mem_unit_t *t_mp, sbderror_t *ep); +static void sbd_flush_ecache(uint64_t a, uint64_t b); + +struct memlist * +sbd_get_memlist(sbd_mem_unit_t *mp, sbderror_t *ep) +{ + struct memlist *mlist; + static fn_t f = "sbd_get_memlist"; + sbd_board_t *sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp; + sbdp_handle_t *hdp; + sbd_handle_t *hp = MACHBD2HD(sbp); + + PR_MEM("%s...\n", f); + + /* + * Return cached memlist, if present. + * This memlist will be present following an + * unconfigure (a.k.a: detach) of this memunit. + * It should only be used in the case were a configure + * is bringing this memunit back in without going + * through the disconnect and connect states. + */ + if (mp->sbm_mlist) { + PR_MEM("%s: found cached memlist\n", f); + + mlist = memlist_dup(mp->sbm_mlist); + } else { + /* attempt to construct a memlist using phys_install */ + + /* + * NOTE: this code block assumes only one memunit per + * board. This is currently safe because the function + * sbd_init_mem_devlist() forces this assumption to be + * valid. + */ + + /* round down to slice base address */ + /* build mlist from the lower layer */ + hdp = sbd_get_sbdp_handle(sbp, hp); + mlist = sbdp_get_memlist(hdp, mp->sbm_cm.sbdev_dip); + if (mlist == NULL) { + SBD_GET_PERR(hdp->h_err, ep); + PR_MEM("sbd:%s: failed to get memlist for " + "dip (0x%p) ecode %d errno %d", f, + (void *)mp->sbm_cm.sbdev_dip, + ep->e_code, ep->e_errno); + sbd_release_sbdp_handle(hdp); + return (NULL); + } + sbd_release_sbdp_handle(hdp); + } + + PR_MEM("%s: memlist for mem-unit (%d.%d), dip 0x%p:\n", + f, sbp->sb_num, + mp->sbm_cm.sbdev_unum, + (void *)mp->sbm_cm.sbdev_dip); + SBD_MEMLIST_DUMP(mlist); + + return (mlist); +} + +int +sbd_pre_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum) +{ + int err_flag = 0; + sbderror_t *ep = SBD_HD2ERR(hp); + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + int d, i; + sbdp_handle_t *hdp; + static fn_t f = "sbd_pre_attach_mem"; + + PR_MEM("%s...\n", f); + + SBD_SET_ERR(ep, 0); + hdp = sbd_get_sbdp_handle(sbp, hp); + + for (d = 0; d < devnum; d++) { + sbd_mem_unit_t *mp; + int unit; + dev_info_t *dip; + sbd_istate_t state; + int rv; + + /* sbd_get_devlist will not devlist element w/ dip of 0 */ + ASSERT(devlist[d].dv_dip != NULL); + + dip = devlist[d].dv_dip; + unit = sbdp_get_unit_num(hdp, dip); + if (unit == -1) { + if (hp->h_flags & SBD_IOCTL_FLAG_FORCE) + continue; + else { + SBD_GET_PERR(hdp->h_err, ep); + err_flag = 1; + break; + } + } + + mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + ASSERT(mp->sbm_cm.sbdev_sbp == sbp); + ASSERT(unit == mp->sbm_cm.sbdev_unum); + + PR_MEM("sbd: OS attach mem-unit (%d.%d)\n", + sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + state = mp->sbm_cm.sbdev_state; + switch (state) { + case SBD_STATE_UNCONFIGURED: + /* use memlist cached by sbd_post_detach_mem_unit */ + if (mp->sbm_mlist != NULL) { + PR_MEM("%s: recovering from UNCONFIG" + " mem-unit (%d.%d)\n", + f, sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + PR_MEM("%s: re-configure cached memlist:\n", f); + SBD_MEMLIST_DUMP(mp->sbm_mlist); + + /* + * kphysm del handle should have been freed + */ + ASSERT((mp->sbm_flags & SBD_MFLAG_RELOWNER) + == 0); + } else { + if (hp->h_flags & SBD_IOCTL_FLAG_FORCE) + continue; + else { + SBD_GET_PERR(hdp->h_err, ep); + err_flag = 1; + PR_MEM("%s: mem-unit (%d.%d)" + " unusable\n", + f, sbp->sb_num, + mp->sbm_cm.sbdev_unum); + break; + } + } + + /*FALLTHROUGH*/ + + case SBD_STATE_CONNECTED: + PR_MEM("%s: reprogramming mem hardware (board %d)\n", + f, sbp->sb_num); + + for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) { + if (mp->sbm_dip[i] == NULL) + continue; + dip = mp->sbm_dip[i]; + + PR_MEM("%s: enabling mc 0x%p on board %d\n", + f, (void *)dip, sbp->sb_num); + + rv = sbdphw_enable_memctrl(hdp, dip); + if (rv < 0) { + SBD_GET_PERR(hdp->h_err, ep); + cmn_err(CE_WARN, + "%s: failed to program mem ctrlr %p on " + "board %d", f, (void *)mp->sbm_dip[i], + sbp->sb_num); + err_flag = 1; + } + } + break; + + default: + cmn_err(CE_WARN, + "%s: unexpected state (%d) for mem-unit " + "(%d.%d)", f, state, sbp->sb_num, + mp->sbm_cm.sbdev_unum); + if (SBD_GET_ERR(ep) == 0) { + SBD_SET_ERR(ep, ESBD_STATE); + err_flag = 1; + } + break; + } + + /* exit for loop if error encountered */ + if (err_flag) { + SBD_SET_ERRSTR(ep, + sbp->sb_mempath[mp->sbm_cm.sbdev_unum]); + break; + } + } + sbd_release_sbdp_handle(hdp); + + return (err_flag ? -1 : 0); +} + +int +sbd_post_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum) +{ + int d; + sbdp_handle_t *hdp; + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + sbderror_t *ep = SBD_HD2ERR(hp); + static fn_t f = "sbd_post_attach_mem"; + + PR_MEM("%s...\n", f); + hdp = sbd_get_sbdp_handle(sbp, hp); + + for (d = 0; d < devnum; d++) { + sbd_mem_unit_t *mp; + dev_info_t *dip; + int unit; + struct memlist *mlist, *ml; + + /* sbd_get_devlist will not devlist element w/ dip of 0 */ + ASSERT(devlist[d].dv_dip != NULL); + + dip = devlist[d].dv_dip; + unit = sbdp_get_unit_num(hdp, dip); + if (unit == -1) { + if (hp->h_flags & SBD_IOCTL_FLAG_FORCE) + continue; + else { + SBD_GET_PERR(hdp->h_err, ep); + break; + } + } + + mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + mlist = sbd_get_memlist(mp, ep); + if (mlist == NULL) { + cmn_err(CE_WARN, + "%s: no memlist for mem-unit (%d.%d)", + f, + sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + if (SBD_GET_ERR(ep) == 0) { + SBD_SET_ERR(ep, ESBD_MEMFAIL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + } + + continue; + } + + /* + * Verify the memory really did successfully attach + * by checking for its existence in phys_install. + */ + + memlist_read_lock(); + if (memlist_intersect(phys_install, mlist) == 0) { + memlist_read_unlock(); + + cmn_err(CE_WARN, + "%s: mem-unit (%d.%d) memlist not in" + " phys_install", f, sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + if (SBD_GET_ERR(ep) == 0) { + SBD_SET_ERR(ep, ESBD_INTERNAL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + } + + memlist_delete(mlist); + continue; + } + memlist_read_unlock(); + + for (ml = mlist; ml != NULL; ml = ml->next) { + sbdp_mem_add_span(hdp, ml->address, ml->size); + } + + memlist_delete(mlist); + + /* + * Destroy cached memlist, if any. + * There will be a cached memlist in sbm_mlist if + * this board is being configured directly after + * an unconfigure. + * To support this transition, sbd_post_detach_mem + * left a copy of the last known memlist in sbm_mlist. + * This memlist could differ from any derived from + * hardware if while this memunit was last configured + * the system detected and deleted bad pages from + * phys_install. The location of those bad pages + * will be reflected in the cached memlist. + */ + if (mp->sbm_mlist) { + memlist_delete(mp->sbm_mlist); + mp->sbm_mlist = NULL; + } + sbd_init_mem_unit_data(mp, ep); + } + + sbd_release_sbdp_handle(hdp); + return (0); +} + +int +sbd_pre_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum) +{ + int d; + int unit; + sbdp_handle_t *hdp; + sbderror_t *ep = SBD_HD2ERR(hp); + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + dev_info_t *dip; + + hdp = sbd_get_sbdp_handle(sbp, hp); + + for (d = 0; d < devnum; d++) { + sbd_mem_unit_t *mp; + + /* sbd_get_devlist will not devlist element w/ dip of 0 */ + ASSERT(devlist[d].dv_dip != NULL); + + dip = devlist[d].dv_dip; + unit = sbdp_get_unit_num(hdp, dip); + if (unit == -1) { + if (hp->h_flags & SBD_IOCTL_FLAG_FORCE) + continue; + else { + SBD_GET_PERR(hdp->h_err, ep); + sbd_release_sbdp_handle(hdp); + return (-1); + } + } + + mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + /* sanity check */ + ASSERT(mp->sbm_cm.sbdev_sbp == sbp); + ASSERT(unit == mp->sbm_cm.sbdev_unum); + + PR_MEM("sbd: OS detach mem-unit (%d.%d)\n", + sbp->sb_num, mp->sbm_cm.sbdev_unum); + } + + sbd_release_sbdp_handle(hdp); + return (0); +} + +int +sbd_post_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum) +{ + int d, rv; + sbdp_handle_t *hdp; + sbd_board_t *sbp; + sbd_mem_unit_t *s_mp, *t_mp; + static fn_t f = "sbd_post_detach_mem"; + + PR_MEM("%s...\n", f); + + sbp = SBDH2BD(hp->h_sbd); + + hdp = sbd_get_sbdp_handle(sbp, hp); + + + rv = 0; + for (d = 0; d < devnum; d++) { + sbderror_t *ep; + dev_info_t *dip; + int unit; + + /* sbd_get_devlist will not devlist element w/ dip of 0 */ + ASSERT(devlist[d].dv_dip != NULL); + + ep = &devlist[d].dv_error; + if ((SBD_GET_ERR(SBD_HD2ERR(hp)) != 0) || + (sbd_set_err_in_hdl(hp, ep) == 0)) { + rv = -1; + } + + dip = devlist[d].dv_dip; + unit = sbdp_get_unit_num(hdp, dip); + if (unit == -1) { + if (hp->h_flags & SBD_IOCTL_FLAG_FORCE) + continue; + else { + if (rv != -1) + SBD_GET_PERR(hdp->h_err, ep); + break; + } + } + + s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + ASSERT(s_mp->sbm_cm.sbdev_sbp == sbp); + + if (rv == -1) { + if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) { + t_mp = s_mp->sbm_peer; + } else { + /* this is no target unit */ + t_mp = NULL; + } + + sbd_mem_cleanup(s_mp, t_mp, ep); + } else if (sbd_post_detach_mem_unit(s_mp, ep)) + rv = -1; + } + + sbd_release_sbdp_handle(hdp); + return (rv); +} + +static void +sbd_add_memory_spans(sbd_board_t *sbp, struct memlist *ml) +{ + sbdp_handle_t *hdp; + static fn_t f = "sbd_add_memory_spans"; + + PR_MEM("%s...", f); + SBD_MEMLIST_DUMP(ml); + +#ifdef DEBUG + memlist_read_lock(); + if (memlist_intersect(phys_install, ml)) { + PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f); + } + memlist_read_unlock(); +#endif + hdp = sbd_get_sbdp_handle(NULL, NULL); + + for (; ml; ml = ml->next) { + update_membounds_t umb; + pfn_t base; + pgcnt_t npgs; + int rv; + + base = _b64top(ml->address); + npgs = _b64top(ml->size); + + umb.u_board = sbp->sb_num; + umb.u_base = (uint64_t)base << MMU_PAGESHIFT; + umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT; + + lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb); + rv = kphysm_add_memory_dynamic(base, npgs); + + (void) sbdp_mem_add_span(hdp, ml->address, ml->size); + + if (rv != KPHYSM_OK) { + cmn_err(CE_WARN, "sbd:%s:" + " unexpected kphysm_add_memory_dynamic" + " return value %d;" + " basepfn=0x%lx, npages=%ld\n", + f, rv, base, npgs); + + continue; + } + kcage_range_lock(); + rv = kcage_range_add(base, npgs, 1); + kcage_range_unlock(); + if (rv != 0) + continue; + } + sbd_release_sbdp_handle(hdp); +} + +/* hack for test scripts. *** remove before code finalized *** */ +int sbd_last_target; + +static int +sbd_post_detach_mem_unit(sbd_mem_unit_t *s_mp, sbderror_t *ep) +{ + uint64_t sz; + uint64_t sm; + uint64_t t_basepa; + uint64_t tmp_basepa; + uint64_t s_basepa; + sbd_board_t *sbp; + sbdp_handle_t *hdp; + uint64_t s_nbytes; + uint64_t s_new_basepa; + sbd_mem_unit_t *t_mp, *x_mp; + struct memlist *ml; + int rv; + static fn_t f = "sbd_post_detach_mem_unit"; + sbd_handle_t *hp; + + PR_MEM("%s...\n", f); + + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + hp = MACHBD2HD(sbp); + hdp = sbd_get_sbdp_handle(sbp, hp); + + if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) { + cmn_err(CE_WARN, + "sbd:%s: no alignment for mem-unit (%d.%d)", + f, sbp->sb_num, s_mp->sbm_cm.sbdev_unum); + SBD_GET_PERR(hdp->h_err, ep); + sbd_release_sbdp_handle(hdp); + return (-1); + } + sm = sz - 1; + + /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */ + PR_MEM("%s: brd %d: deleted memlist (EMPTY maybe okay):\n", + f, sbp->sb_num); + SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist); + + /* sanity check */ + ASSERT(s_mp->sbm_del_mlist == NULL || + (s_mp->sbm_flags & SBD_MFLAG_RELDONE) != 0); + + if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) { + t_mp = s_mp->sbm_peer; + + ASSERT(t_mp != NULL); + ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET); + ASSERT(t_mp->sbm_peer == s_mp); + + ASSERT(t_mp->sbm_flags & SBD_MFLAG_RELDONE); + ASSERT(t_mp->sbm_del_mlist); + + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + PR_MEM("%s: target brd %d: deleted memlist:\n", + f, sbp->sb_num); + SBD_MEMLIST_DUMP(t_mp->sbm_del_mlist); + } else { + /* this is no target unit */ + t_mp = NULL; + } + + /* + * Verify the memory really did successfully detach + * by checking for its non-existence in phys_install. + */ + rv = 0; + memlist_read_lock(); + if (s_mp->sbm_flags & SBD_MFLAG_RELDONE) { + x_mp = s_mp; + rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); + } + if (rv == 0 && t_mp && (t_mp->sbm_flags & SBD_MFLAG_RELDONE)) { + x_mp = t_mp; + rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); + } + memlist_read_unlock(); + + if (rv) { + sbp = (sbd_board_t *)x_mp->sbm_cm.sbdev_sbp; + + cmn_err(CE_WARN, + "%s: %smem-unit (%d.%d) memlist still in phys_install", + f, + x_mp == t_mp ? "target " : "", + sbp->sb_num, + x_mp->sbm_cm.sbdev_unum); + SBD_SET_ERR(ep, ESBD_INTERNAL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[x_mp->sbm_cm.sbdev_unum]); + sbd_release_sbdp_handle(hdp); + return (-1); + } + + s_basepa = _ptob64(s_mp->sbm_basepfn); + s_nbytes = _ptob64(s_mp->sbm_npages); + + if (t_mp != NULL) { + t_basepa = _ptob64(t_mp->sbm_basepfn); + s_new_basepa = (s_basepa & ~ sm) + + _ptob64(t_mp->sbm_slice_offset); + + /* + * We had to swap mem-units, so update + * memlists accordingly with new base + * addresses. + */ + for (ml = t_mp->sbm_mlist; ml; ml = ml->next) { + ml->address -= t_basepa; + ml->address += s_new_basepa; + } + + /* + * There is no need to explicitly rename the target delete + * memlist, because sbm_del_mlist and sbm_mlist always + * point to the same memlist for a copy/rename operation. + */ + ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); + + PR_MEM("%s: renamed target memlist and delete memlist", f); + SBD_MEMLIST_DUMP(t_mp->sbm_mlist); + + for (ml = s_mp->sbm_mlist; ml; ml = ml->next) { + ml->address -= s_basepa; + ml->address += t_basepa; + } + + PR_MEM("%s: renamed source memlist", f); + SBD_MEMLIST_DUMP(s_mp->sbm_mlist); + +#ifdef DEBUG + ASSERT(s_mp->sbm_mlist != s_mp->sbm_del_mlist); + /* + * Renaming s_mp->sbm_del_mlist is not necessary. This + * list is not used beyond this point, and in fact, is + * disposed of at the end of this function. + */ + for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { + ml->address -= s_basepa; + ml->address += t_basepa; + } + + PR_MEM("%s: renamed source delete memlist", f); + SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist); +#endif + + if (s_mp->sbm_flags & SBD_MFLAG_MEMUPSIZE) { + struct memlist *nl; + int mlret; + + /* + * We had to perform a copy-rename from a + * small memory node to a big memory node. + * Need to add back the remaining memory on + * the big board that wasn't used by that + * from the small board during the copy. + * Subtract out the portion of the target memory + * node that was taken over by the source memory + * node. + */ + nl = memlist_dup(t_mp->sbm_mlist); + mlret = memlist_delete_span(s_basepa, s_nbytes, &nl); + PR_MEM("%s: mlret = %d\n", f, mlret); + + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + PR_MEM("%s: adding back remaining portion" + " of mem-unit (%d.%d), memlist:\n", + f, sbp->sb_num, + t_mp->sbm_cm.sbdev_unum); + + SBD_MEMLIST_DUMP(nl); + + sbd_add_memory_spans(sbp, nl); + + memlist_delete(nl); + } + } + + + if (t_mp != NULL) { + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + hdp->h_board = sbp->sb_num; + /* delete target's entire address space */ + tmp_basepa = t_basepa & ~ sm; + rv = sbdp_mem_del_span(hdp, tmp_basepa, sz); + ASSERT(rv == 0); + + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + hdp->h_board = sbp->sb_num; + tmp_basepa = s_basepa & ~ sm; + sz = s_new_basepa & sm; + /* delete source board's vacant address space */ + rv = sbdp_mem_del_span(hdp, tmp_basepa, sz); + ASSERT(rv == 0); + } else { + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + hdp->h_board = sbp->sb_num; + tmp_basepa = s_basepa & ~ sm; + /* delete board's entire address space */ + rv = sbdp_mem_del_span(hdp, tmp_basepa, sz); + ASSERT(rv == 0); + } + +#ifdef LINT + rv = rv; +#endif + + sbd_mem_cleanup(s_mp, t_mp, ep); + + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + PR_MEM("%s: board %d's memlist:", f, sbp->sb_num); + SBD_MEMLIST_DUMP(s_mp->sbm_mlist); + + sbd_release_sbdp_handle(hdp); + return (0); +} + +static void +sbd_mem_cleanup(sbd_mem_unit_t *s_mp, sbd_mem_unit_t *t_mp, sbderror_t *ep) +{ + sbd_board_t *sbp; + + /* clean up target mem unit */ + if (t_mp != NULL) { + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + + ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); + /* + * sbm_del_mlist and sbm_mlist point at the same list + * We only need to delete one and then set both pointers + * to NULL + */ + memlist_delete(t_mp->sbm_del_mlist); + + t_mp->sbm_del_mlist = NULL; + t_mp->sbm_mlist = NULL; + t_mp->sbm_peer = NULL; + t_mp->sbm_flags = 0; + t_mp->sbm_cm.sbdev_busy = 0; + sbd_init_mem_unit_data(t_mp, ep); + + /* + * now that copy/rename has completed, undo this + * work that was done in sbd_release_mem_done. + */ + /* + * If error don't set the target to configured + */ + if (SBD_GET_ERR(ep) == 0) { + SBD_DEV_CLR_UNREFERENCED(sbp, SBD_COMP_MEM, 0); + SBD_DEV_CLR_RELEASED(sbp, SBD_COMP_MEM, 0); + SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, 0, + SBD_STATE_CONFIGURED); + } + +/* hack for test scripts. *** remove before code finalized *** */ +sbd_last_target = sbp->sb_num; + } + + /* + * clean up (source) board's mem unit structure. + * NOTE: sbm_mlist is retained. It is referred to as the + * cached memlist. The cached memlist is used to re-attach + * (configure back in) this memunit from the unconfigured + * state. + */ + if (s_mp != NULL) { + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + + /* + * Don't want to call memlist_delete for sbm_del_mlist, + * since that list points to the sbm_list + */ + s_mp->sbm_del_mlist = NULL; + s_mp->sbm_peer = NULL; + s_mp->sbm_flags = 0; + s_mp->sbm_cm.sbdev_busy = 0; + sbd_init_mem_unit_data(s_mp, ep); + } +} + +/* + * Successful return from this function will have the memory + * handle in sbp->sb_dev[..mem-unit...].sbm_memhandle allocated + * and waiting. This routine's job is to select the memory that + * actually has to be released (detached) which may not necessarily + * be the same memory node that came in in devlist[], + * i.e. a copy-rename is needed. + */ +int +sbd_pre_release_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum) +{ + extern int kcage_on; + int d; + int err_flag = 0; + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + sbderror_t *ep = SBD_HD2ERR(hp); + sbderror_t *lep; + static fn_t f = "sbd_pre_release_mem"; + + PR_MEM("%s...\n", f); + + if (kcage_on == 0) { + /* + * Can't Detach memory if Cage is OFF. + */ + cmn_err(CE_WARN, "%s: kernel cage is disabled", f); + SBD_SET_ERR(ep, ESBD_KCAGE_OFF); + return (-1); + } + + for (d = 0; d < devnum; d++) { + int rv; + memquery_t mq; + sbd_mem_unit_t *mp; + struct memlist *ml; + + /* sbd_get_devlist will not devlist element w/ dip of 0 */ + ASSERT(devlist[d].dv_dip != NULL); + + mp = SBD_GET_BOARD_MEMUNIT(sbp, d); + + /* + * If all the mem unit is marked as failed then don't allow the + * operation + */ + if (mp->sbm_cm.sbdev_cond == SBD_COND_FAILED) { + SBD_SET_ERR(ep, ESBD_STATE); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]); + err_flag = -1; + break; + } + + ASSERT(d == mp->sbm_cm.sbdev_unum); + + /* + * if interleave is set to across boards fail the op + */ + if (mp->sbm_interleave) { + SBD_SET_ERR(ep, ESBD_MEMINTLV); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]); + err_flag = -1; + break; + } + + lep = &devlist[d].dv_error; + if (SBD_GET_ERR(lep) != 0) { + err_flag = -1; + (void) sbd_set_err_in_hdl(hp, lep); + break; + } + + if (mp->sbm_flags & SBD_MFLAG_RESERVED) { + /* + * Board is currently involved in a delete + * memory operation. Can't detach this guy until + * that operation completes. + */ + cmn_err(CE_WARN, + "%s: ineligible mem-unit (%d.%d) for detach", + f, sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + SBD_SET_ERR(lep, ESBD_INVAL); + SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]); + (void) sbd_set_err_in_hdl(hp, lep); + err_flag = -1; + break; + } + + /* + * Check whether the detaching memory requires a + * copy-rename. + */ + ASSERT(mp->sbm_npages != 0); + rv = kphysm_del_span_query( + mp->sbm_basepfn, mp->sbm_npages, &mq); + if (rv != KPHYSM_OK) { + cmn_err(CE_WARN, + "%s: unexpected kphysm_del_span_query" + " return value %d;" + " basepfn 0x%lx, npages 0x%lx," + " mem-unit (%d.%d), dip 0x%p", + f, + rv, + mp->sbm_basepfn, + mp->sbm_npages, + sbp->sb_num, + mp->sbm_cm.sbdev_unum, + (void *)mp->sbm_cm.sbdev_dip); + + SBD_SET_ERR(lep, ESBD_INTERNAL); + SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]); + (void) sbd_set_err_in_hdl(hp, lep); + err_flag = -1; + break; + } + + if (mq.nonrelocatable != 0) { + if (!(hp->h_iap->i_flags & SBD_FLAG_QUIESCE_OKAY)) { + /* caller wasn't prompted for a suspend */ + SBD_SET_ERR(lep, ESBD_QUIESCE_REQD); + SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]); + (void) sbd_set_err_in_hdl(hp, lep); + err_flag = 1; + break; + } + } + + /* flags should be clean at this time */ + ASSERT(mp->sbm_flags == 0); + + ASSERT(mp->sbm_del_mlist == NULL); /* should be null */ + + if (mp->sbm_mlist != NULL) { + memlist_delete(mp->sbm_mlist); + mp->sbm_mlist = NULL; + } + + ml = sbd_get_memlist(mp, lep); + (void) sbd_set_err_in_hdl(hp, lep); + if (ml == NULL) { + PR_MEM("%s: no memlist found for board %d\n", + f, sbp->sb_num); + err_flag = -1; + break; + } + + /* allocate a kphysm handle */ + rv = kphysm_del_gethandle(&mp->sbm_memhandle); + if (rv != KPHYSM_OK) { + memlist_delete(ml); + + cmn_err(CE_WARN, + "%s: unexpected kphysm_del_gethandle" + " return value %d", f, rv); + + SBD_SET_ERR(lep, ESBD_INTERNAL); + SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]); + (void) sbd_set_err_in_hdl(hp, lep); + err_flag = -1; + break; + } + mp->sbm_flags |= SBD_MFLAG_RELOWNER; + + if ((mq.nonrelocatable != 0) || + sbd_reserve_mem_spans(&mp->sbm_memhandle, ml)) { + /* + * Either the detaching memory node contains + * non-reloc memory or we failed to reserve the + * detaching memory node (which did _not_ have + * any non-reloc memory, i.e. some non-reloc mem + * got onboard). + */ + + if (sbd_select_mem_target(hp, mp, ml)) { + int rv; + + /* + * We had no luck locating a target + * memory node to be the recipient of + * the non-reloc memory on the node + * we're trying to detach. + * Clean up be disposing the mem handle + * and the mem list. + */ + rv = kphysm_del_release(mp->sbm_memhandle); + if (rv != KPHYSM_OK) { + /* + * can do nothing but complain + * and hope helpful for debug + */ + cmn_err(CE_WARN, "sbd:%s: unexpected" + " kphysm_del_release return" + " value %d", + f, rv); + } + mp->sbm_flags &= ~SBD_MFLAG_RELOWNER; + + memlist_delete(ml); + + /* make sure sbm_flags is clean */ + ASSERT(mp->sbm_flags == 0); + + cmn_err(CE_WARN, + "%s: no available target for " + "mem-unit (%d.%d)", + f, sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + SBD_SET_ERR(lep, ESBD_NO_TARGET); + SBD_SET_ERRSTR(lep, + sbp->sb_mempath[mp->sbm_cm.sbdev_unum]); + (void) sbd_set_err_in_hdl(hp, lep); + + err_flag = -1; + break; + } + + /* + * ml is not memlist_deleted here because + * it has been assigned to mp->sbm_mlist + * by sbd_select_mem_target. + */ + } else { + /* no target needed to detach this board */ + mp->sbm_flags |= SBD_MFLAG_RESERVED; + mp->sbm_peer = NULL; + mp->sbm_del_mlist = ml; + mp->sbm_mlist = ml; + mp->sbm_cm.sbdev_busy = 1; + } +#ifdef DEBUG + ASSERT(mp->sbm_mlist != NULL); + + if (mp->sbm_flags & SBD_MFLAG_SOURCE) { + int src, targ; + + sbp = (sbd_board_t *) + mp->sbm_peer->sbm_cm.sbdev_sbp; + targ = sbp->sb_num; + sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp; + src = sbp->sb_num; + PR_MEM("%s: release of board %d requires copy/rename;" + " selected target board %d\n", + f, src, targ); + } else { + sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp; + PR_MEM("%s: copy/rename not required to release" + " board %d\n", f, sbp->sb_num); + } + + ASSERT(mp->sbm_flags & SBD_MFLAG_RELOWNER); + ASSERT(mp->sbm_flags & SBD_MFLAG_RESERVED); +#endif + } + + return (err_flag); +} + +void +sbd_release_mem_done(sbd_handle_t *hp, int unit) +{ + sbd_mem_unit_t *s_mp, *t_mp, *mp; + sbderror_t *ep = SBD_HD2ERR(hp); + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + int rv; + static fn_t f = "sbd_release_mem_done"; + + s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + + /* + * This unit will be flagged with SBD_MFLAG_SOURCE, if it + * has a target unit. + */ + if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) { + t_mp = s_mp->sbm_peer; + ASSERT(t_mp != NULL); + ASSERT(t_mp->sbm_peer == s_mp); + ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET); + ASSERT(t_mp->sbm_flags & SBD_MFLAG_RESERVED); + } else { + /* this is no target unit */ + t_mp = NULL; + } + + /* free delete handle */ + ASSERT(s_mp->sbm_flags & SBD_MFLAG_RELOWNER); + ASSERT(s_mp->sbm_flags & SBD_MFLAG_RESERVED); + + rv = kphysm_del_release(s_mp->sbm_memhandle); + if (rv != KPHYSM_OK) { + /* + * can do nothing but complain + * and hope helpful for debug + */ + cmn_err(CE_WARN, "sbd:%s: unexpected kphysm_del_release" + " return value %d", f, rv); + } + s_mp->sbm_flags &= ~SBD_MFLAG_RELOWNER; + + /* + * If an error was encountered during release, clean up + * the source (and target, if present) unit data. + */ + if (SBD_GET_ERR(ep) != 0) { + + PR_MEM("%s: unit %d.%d: error %d noted\n", + f, sbp->sb_num, + s_mp->sbm_cm.sbdev_unum, + SBD_GET_ERR(ep)); + + sbd_mem_cleanup(s_mp, t_mp, ep); + + /* bail out */ + return; + } + + SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM, unit); + SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_RELEASE); + + if (t_mp != NULL) { + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + /* + * the kphysm delete operation that drained the source + * board also drained this target board. Since the source + * board drain is now known to have succeeded, we know this + * target board is drained too. + */ + SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum); + SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum, + SBD_STATE_RELEASE); + + /* + * NOTE: do not transition target's board state, + * even if the mem-unit was the last configure + * unit of the board. When copy/rename completes + * this mem-unit will transitioned back to + * the configured state. In the meantime, the + * board's must remain as is. + */ + } + + /* if board(s) had deleted memory, verify it is gone */ + rv = 0; + memlist_read_lock(); + if (s_mp->sbm_del_mlist != NULL) { + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + mp = s_mp; + rv = memlist_intersect(phys_install, mp->sbm_del_mlist); + } + if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) { + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + mp = t_mp; + rv = memlist_intersect(phys_install, mp->sbm_del_mlist); + } + memlist_read_unlock(); + if (rv) { + cmn_err(CE_WARN, "sbd:%s: %smem-unit (%d.%d): " + "deleted memory still found in phys_install", + f, + (mp == t_mp ? "target " : ""), + sbp->sb_num, + mp->sbm_cm.sbdev_unum); + + SBD_SET_ERR(ep, ESBD_INTERNAL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[mp->sbm_cm.sbdev_unum]); + return; + } + + s_mp->sbm_flags |= SBD_MFLAG_RELDONE; + if (t_mp != NULL) { + t_mp->sbm_flags &= ~SBD_MFLAG_RESERVED; + t_mp->sbm_flags |= SBD_MFLAG_RELDONE; + } + + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + + SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM, unit); + SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_UNREFERENCED); + + PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n", + f, sbp->sb_num, + s_mp->sbm_cm.sbdev_unum); + + s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; + + if (t_mp != NULL) { + sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + + SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum); + SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum, + SBD_STATE_UNREFERENCED); + + sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + + PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n", + f, sbp->sb_num, + t_mp->sbm_cm.sbdev_unum); + + t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; + } +} + +int +sbd_disconnect_mem(sbd_handle_t *hp, int unit) +{ + static fn_t f = "sbd_disconnect_mem"; + sbd_mem_unit_t *mp; + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + + mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + ASSERT(mp->sbm_cm.sbdev_state == SBD_STATE_CONNECTED || + mp->sbm_cm.sbdev_state == SBD_STATE_UNCONFIGURED); + + PR_MEM("%s...\n", f); + + if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist) + memlist_delete(mp->sbm_del_mlist); + mp->sbm_del_mlist = NULL; + + if (mp->sbm_mlist) { + memlist_delete(mp->sbm_mlist); + mp->sbm_mlist = NULL; + } + + return (0); +} + +int +sbd_cancel_mem(sbd_handle_t *hp, int unit) +{ + sbd_mem_unit_t *s_mp, *t_mp; + sbd_istate_t state; + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + sbd_board_t *tsbp; + static fn_t f = "sbd_cancel_mem"; + sbderror_t *ep = SBD_HD2ERR(hp); + + s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + state = s_mp->sbm_cm.sbdev_state; + + if (s_mp->sbm_flags & SBD_MFLAG_TARGET) { + /* must cancel source board, not target board */ + SBD_SET_ERR(ep, ESBD_INTERNAL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + return (-1); + } else if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) { + t_mp = s_mp->sbm_peer; + tsbp = t_mp->sbm_cm.sbdev_sbp; + ASSERT(t_mp != NULL); + ASSERT(t_mp->sbm_peer == s_mp); + + /* must always match the source board's state */ + ASSERT(t_mp->sbm_cm.sbdev_state == state); + } else { + /* this is no target unit */ + t_mp = NULL; + } + + switch (state) { + case SBD_STATE_UNREFERENCED: /* state set by sbd_release_mem_done */ + ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0); + + if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) { + PR_MEM("%s: undoing target board %d memory delete\n", + f, tsbp->sb_num); + sbd_add_memory_spans(tsbp, t_mp->sbm_del_mlist); + SBD_DEV_CLR_UNREFERENCED(tsbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum); + } + + if (s_mp->sbm_del_mlist != NULL) { + PR_MEM("%s: undoing board %d memory delete\n", + f, sbp->sb_num); + sbd_add_memory_spans(sbp, s_mp->sbm_del_mlist); + } + + /*FALLTHROUGH*/ + + case SBD_STATE_CONFIGURED: + /* + * we got here because of an error early in the release process + * Just leave the memory as is and report the error + */ + + ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0); + + if (t_mp != NULL) { + ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); + t_mp->sbm_del_mlist = NULL; + + if (t_mp->sbm_mlist != NULL) { + memlist_delete(t_mp->sbm_mlist); + t_mp->sbm_mlist = NULL; + } + + t_mp->sbm_peer = NULL; + t_mp->sbm_flags = 0; + t_mp->sbm_cm.sbdev_busy = 0; + sbd_init_mem_unit_data(t_mp, ep); + + SBD_DEV_CLR_RELEASED(tsbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum); + + SBD_DEVICE_TRANSITION(tsbp, SBD_COMP_MEM, + t_mp->sbm_cm.sbdev_unum, + SBD_STATE_CONFIGURED); + } + + if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) + memlist_delete(s_mp->sbm_del_mlist); + s_mp->sbm_del_mlist = NULL; + + if (s_mp->sbm_mlist != NULL) { + memlist_delete(s_mp->sbm_mlist); + s_mp->sbm_mlist = NULL; + } + + s_mp->sbm_peer = NULL; + s_mp->sbm_flags = 0; + s_mp->sbm_cm.sbdev_busy = 0; + sbd_init_mem_unit_data(s_mp, ep); + + return (0); + default: + PR_MEM("%s: WARNING unexpected state (%d) for " + "mem-unit %d.%d\n", + f, + (int)state, + sbp->sb_num, + s_mp->sbm_cm.sbdev_unum); + + return (-1); + } + /*NOTREACHED*/ +} + +void +sbd_init_mem_unit(sbd_board_t *sbp, int unit, sbderror_t *ep) +{ + sbd_istate_t new_state; + sbd_mem_unit_t *mp; + dev_info_t *cur_mc_dip; + int failed_mcs = 0, present_mcs = 0; + sbd_cond_t mc_cond; + int i; + + mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + if (SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_MEM, unit)) { + new_state = SBD_STATE_CONFIGURED; + } else if (SBD_DEV_IS_PRESENT(sbp, SBD_COMP_MEM, unit)) { + new_state = SBD_STATE_CONNECTED; + } else if (mp->sbm_cm.sbdev_dip != NULL) { + new_state = SBD_STATE_OCCUPIED; + } else { + new_state = SBD_STATE_EMPTY; + } + + /* + * Check all the possible memory nodes on the board. If all of them + * have a failed status mark memory as failed. Otherwise mem is ok + */ + if (!sbp->sb_memaccess_ok) { + mp->sbm_cm.sbdev_cond = SBD_COND_UNKNOWN; + return; + } + + for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) { + cur_mc_dip = mp->sbm_dip[i]; + + if (cur_mc_dip == NULL) + continue; + + present_mcs |= (1 << i); + + mc_cond = sbd_get_comp_cond(cur_mc_dip); + if (mc_cond == SBD_COND_FAILED) { + failed_mcs |= (1 << i); + } + } + + if (failed_mcs == present_mcs) { + /* + * All mem nodes failed, therefore mark all mem + * as failed + */ + mp->sbm_cm.sbdev_cond = SBD_COND_FAILED; + } else { + mp->sbm_cm.sbdev_cond = SBD_COND_OK; + } + + sbd_init_mem_unit_data(mp, ep); + + /* + * Any changes to this memory unit should be performed above + * this call to ensure the unit is fully initialized + * before transitioning to the new state. + */ + SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, new_state); + +} + +static void +sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t *ep) +{ + uint64_t basepa; + uint64_t sz; + sbd_board_t *sbp = mp->sbm_cm.sbdev_sbp; + sbdp_handle_t *hdp; + static fn_t f = "sbd_init_mem_unit_data"; + sbd_handle_t *hp = MACHBD2HD(sbp); + + PR_MEM("%s...\n", f); + + /* a little sanity checking */ + ASSERT(mp->sbm_peer == NULL); + ASSERT(mp->sbm_flags == 0); + + hdp = sbd_get_sbdp_handle(sbp, hp); + + /* get basepfn of mem unit */ + if (sbdphw_get_base_physaddr(hdp, mp->sbm_cm.sbdev_dip, &basepa)) { + cmn_err(CE_WARN, "sbd:%s: failed to get physaddr" + " for mem-unit (%d.%d)", + f, + sbp->sb_num, + mp->sbm_cm.sbdev_unum); + SBD_GET_PERR(hdp->h_err, ep); + sbd_release_sbdp_handle(hdp); + return; + } + mp->sbm_basepfn = _b64top(basepa); + + /* attempt to get number of pages from PDA */ + mp->sbm_npages = sbdp_get_mem_size(hdp); + + /* if didn't work, calculate using memlist */ + if (mp->sbm_npages == 0) { + struct memlist *ml, *mlist; + mlist = sbd_get_memlist(mp, ep); + for (ml = mlist; ml; ml = ml->next) + mp->sbm_npages += btop(ml->size); + memlist_delete(mlist); + } + + + if (sbdp_get_mem_alignment(hdp, mp->sbm_cm.sbdev_dip, &sz)) { + cmn_err(CE_WARN, + "sbd:%s: no alignment for mem-unit (%d.%d)", + f, sbp->sb_num, mp->sbm_cm.sbdev_unum); + SBD_GET_PERR(hdp->h_err, ep); + sbd_release_sbdp_handle(hdp); + return; + } + mp->sbm_alignment_mask = _b64top(sz); + + + mp->sbm_interleave = sbdp_isinterleaved(hdp, + mp->sbm_cm.sbdev_dip); + + PR_MEM("%s: board %d (basepfn = 0x%lx, npgs = 0x%lx interleave %d)\n", + f, sbp->sb_num, + mp->sbm_basepfn, + mp->sbm_npages, + mp->sbm_interleave); + + sbd_release_sbdp_handle(hdp); +} + +static int +sbd_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml) +{ + int err; + pfn_t base; + pgcnt_t npgs; + struct memlist *mc; + static fn_t f = "sbd_reserve_mem_spans"; + + PR_MEM("%s...\n", f); + + /* + * Walk the supplied memlist scheduling each span for removal + * with kphysm_del_span. It is possible that a span may intersect + * an area occupied by the cage. + */ + for (mc = ml; mc != NULL; mc = mc->next) { + base = _b64top(mc->address); + npgs = _b64top(mc->size); + + err = kphysm_del_span(*mhp, base, npgs); + if (err != KPHYSM_OK) { + cmn_err(CE_WARN, "sbd:%s memory reserve failed." + " unexpected kphysm_del_span return value %d;" + " basepfn=0x%lx npages=%ld", + f, err, base, npgs); + return (-1); + } + } + return (0); +} + +/* debug counters */ +int sbd_smt_realigned; +int sbd_smt_preference[4]; + +#ifdef DEBUG +uint_t sbd_ignore_board; /* if bit[bnum-1] set, board won't be candidate */ +#endif + +/* + * Verify that there is no memory overlapping if copy-rename is + * done with the selected target board. + * + * Returns 0 if OK, -1 otherwise. + */ +static int +sbd_check_boundaries(struct memlist *orig_memlist, sbd_mem_unit_t *s_mp, + sbd_mem_unit_t *t_mp) +{ + struct memlist *new_memlist; + int mlret; + static fn_t f = "sbd_check_boundaries"; + + new_memlist = memlist_dup(orig_memlist); + if (new_memlist == NULL) { + PR_MEM("%s: can't dup original memlist\n", f); + return (-1); + } + + mlret = memlist_delete_span( + _ptob64(s_mp->sbm_basepfn), + _ptob64(s_mp->sbm_npages), + &new_memlist); + if (mlret != MEML_SPANOP_OK) { + PR_MEM("%s: del s/s mlret = %d\n", f, mlret); + goto check_done; + } + + mlret = memlist_delete_span( + _ptob64(t_mp->sbm_basepfn), + _ptob64(t_mp->sbm_npages), + &new_memlist); + if (mlret != MEML_SPANOP_OK) { + PR_MEM("%s: del t/t mlret = %d\n", f, mlret); + goto check_done; + } + + mlret = memlist_add_span( + _ptob64(t_mp->sbm_basepfn), + _ptob64(s_mp->sbm_npages), + &new_memlist); + if (mlret != MEML_SPANOP_OK) { + PR_MEM("%s: add t/s mlret = %d\n", f, mlret); + goto check_done; + } + + mlret = memlist_add_span( + _ptob64(s_mp->sbm_basepfn), + _ptob64(t_mp->sbm_npages), + &new_memlist); + if (mlret != MEML_SPANOP_OK) { + PR_MEM("%s: add s/t mlret = %d\n", f, mlret); + } + +check_done: + memlist_delete(new_memlist); + + if (mlret == MEML_SPANOP_OK) + return (0); + else + return (-1); +} + +/* + * Find and reserve a copy/rename target board suitable for the + * given source board. + * All boards in the system are examined and categorized in relation to + * their memory size versus the source board's memory size. Order of + * preference is: + * 1st: board has same memory size + * 2nd: board has larger memory size + * 3rd: board has smaller memory size + * 4th: board has smaller memory size, available memory will be reduced. + * Boards in category 3 and 4 will have their MC's reprogrammed to locate the + * span to which the MC responds to address span that appropriately covers + * the nonrelocatable span of the source board. + */ +static int +sbd_select_mem_target(sbd_handle_t *hp, + sbd_mem_unit_t *s_mp, struct memlist *s_ml) +{ + uint64_t sz; + pgcnt_t sm; + int n_sets = 4; /* same, larger, smaller, clipped */ + int preference; /* lower value is higher preference */ + int n_units_per_set; + int idx; + sbd_mem_unit_t **sets; + sbdp_handle_t *hdp; + int t_bd; + sbd_softstate_t *softsp; + int t_unit; + int max_boards; + int rv; + sbd_board_t *s_sbp, *t_sbp; + sbd_mem_unit_t *t_mp, *c_mp; + struct memlist *d_ml, *t_ml, *x_ml; + memquery_t s_mq = {0}; + static fn_t f = "sbd_select_mem_target"; + + PR_MEM("%s...\n", f); + + ASSERT(s_ml != NULL); + + s_sbp = s_mp->sbm_cm.sbdev_sbp; + + hdp = sbd_get_sbdp_handle(s_sbp, hp); + + if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) { + sbderror_t *ep = SBD_HD2ERR(hp); + cmn_err(CE_WARN, + "sbd:%s: no alignment for mem-unit (%d.%d)", + f, s_sbp->sb_num, s_mp->sbm_cm.sbdev_unum); + SBD_GET_PERR(hdp->h_err, ep); + sbd_release_sbdp_handle(hdp); + return (-1); + } + sm = sz - 1; + sbd_release_sbdp_handle(hdp); + + softsp = (sbd_softstate_t *)s_sbp->sb_softsp; + + max_boards = softsp->max_boards; + n_units_per_set = max_boards * MAX_MEM_UNITS_PER_BOARD; + sets = GETSTRUCT(sbd_mem_unit_t *, n_units_per_set * n_sets); + + /* + * Make one pass through all memory units on all boards + * and categorize them with respect to the source board. + */ + for (t_bd = 0; t_bd < max_boards; t_bd++) { + /* + * The board structs are a contiguous array + * so we take advantage of that to find the + * correct board struct pointer for a given + * board number. + */ + t_sbp = (sbd_board_t *)softsp->sbd_boardlist; + t_sbp += t_bd; + + /* source board can not be its own target */ + if (s_sbp->sb_num == t_sbp->sb_num) + continue; + + for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) { + + t_mp = SBD_GET_BOARD_MEMUNIT(t_sbp, t_unit); + + /* this memory node must be attached */ + if (!SBD_DEV_IS_ATTACHED(t_sbp, SBD_COMP_MEM, t_unit)) + continue; + + /* source unit can not be its own target */ + if (s_mp == t_mp) { + /* catch this in debug kernels */ + ASSERT(0); + continue; + } + + /* + * this memory node must not already be reserved + * by some other memory delete operation. + */ + if (t_mp->sbm_flags & SBD_MFLAG_RESERVED) + continue; + + /* + * categorize the memory node + * If this is a smaller memory node, create a + * temporary, edited copy of the source board's + * memlist containing only the span of the non- + * relocatable pages. + */ + if (t_mp->sbm_npages == s_mp->sbm_npages) { + preference = 0; + t_mp->sbm_slice_offset = 0; + } else if (t_mp->sbm_npages > s_mp->sbm_npages) { + preference = 1; + t_mp->sbm_slice_offset = 0; + } else { + /* + * We do not allow other options right now + */ + continue; + } + + sbd_smt_preference[preference]++; + + /* calculate index to start of preference set */ + idx = n_units_per_set * preference; + /* calculate offset to respective element */ + idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit; + + ASSERT(idx < n_units_per_set * n_sets); + sets[idx] = t_mp; + } + } + + /* + * NOTE: this would be a good place to sort each candidate + * set in to some desired order, e.g. memory size in ascending + * order. Without an additional sorting step here, the order + * within a set is ascending board number order. + */ + + c_mp = NULL; + x_ml = NULL; + t_ml = NULL; + for (idx = 0; idx < n_units_per_set * n_sets; idx++) { + memquery_t mq; + + /* cleanup t_ml after previous pass */ + if (t_ml != NULL) { + memlist_delete(t_ml); + t_ml = NULL; + } + + /* get candidate target board mem unit */ + t_mp = sets[idx]; + if (t_mp == NULL) + continue; + + t_sbp = t_mp->sbm_cm.sbdev_sbp; + + /* get target board memlist */ + t_ml = sbd_get_memlist(t_mp, SBD_HD2ERR(hp)); + if (t_ml == NULL) { + cmn_err(CE_WARN, "sbd:%s: no memlist for" + " mem-unit %d, board %d", + f, + t_sbp->sb_num, + t_mp->sbm_cm.sbdev_unum); + + continue; + } + + /* get appropriate source board memlist */ + if (t_mp->sbm_npages < s_mp->sbm_npages) { + spgcnt_t excess; + + /* + * make a copy of the source board memlist + * then edit it to remove the spans that + * are outside the calculated span of + * [pfn..s_mq.last_nonrelocatable]. + */ + if (x_ml != NULL) + memlist_delete(x_ml); + + x_ml = memlist_dup(s_ml); + if (x_ml == NULL) { + PR_MEM("%s: memlist_dup failed\n", f); + /* TODO: should abort */ + continue; + } + + /* trim off lower portion */ + excess = t_mp->sbm_slice_offset; + if (excess > 0) { + int mlret; + + mlret = memlist_delete_span( + _ptob64(s_mp->sbm_basepfn), + _ptob64(excess), + &x_ml); + PR_MEM("%s: mlret = %d\n", f, mlret); + } + + /* + * Since this candidate target board is smaller + * than the source board, s_mq must have been + * initialized in previous loop while processing + * this or some other candidate board. + * FIXME: this is weak. + */ + ASSERT(s_mq.phys_pages != 0); + + /* trim off upper portion */ + excess = (s_mp->sbm_basepfn + s_mp->sbm_npages) + - (s_mq.last_nonrelocatable + 1); + if (excess > 0) { + pfn_t p; + int mlret; + + p = s_mq.last_nonrelocatable + 1; + p -= excess; + + mlret = memlist_delete_span( + _ptob64(p), + _ptob64(excess), + &x_ml); + PR_MEM("%s: mlret = %d\n", f, mlret); + } + + PR_MEM("%s: brd %d: edited source memlist:\n", + f, s_sbp->sb_num); + SBD_MEMLIST_DUMP(x_ml); + +#ifdef DEBUG + /* sanity check memlist */ + d_ml = x_ml; + while (d_ml->next != NULL) + d_ml = d_ml->next; + ASSERT(x_ml->address == _ptob64(s_mp->sbm_basepfn) + + _ptob64(t_mp->sbm_slice_offset)); + ASSERT(d_ml->address + d_ml->size == + _ptob64(s_mq.last_nonrelocatable + 1)); +#endif + + /* + * x_ml now describes only the portion of the + * source board that will be moved during the + * copy/rename operation. + */ + d_ml = x_ml; + } else { + /* use original memlist; all spans will be moved */ + d_ml = s_ml; + } + + /* verify target can support source memory spans. */ + if (memlist_canfit(d_ml, t_ml) == 0) { + PR_MEM("%s: source memlist won't" + " fit in target memlist\n", f); + PR_MEM("%s: source memlist:\n", f); + SBD_MEMLIST_DUMP(d_ml); + PR_MEM("%s: target memlist:\n", f); + SBD_MEMLIST_DUMP(t_ml); + + continue; + } + + /* NOTE: the value of d_ml is not used beyond this point */ + + PR_MEM("%s: checking for no-reloc on board %d, " + " basepfn=0x%lx, npages=%ld\n", + f, + t_sbp->sb_num, + t_mp->sbm_basepfn, + t_mp->sbm_npages); + + rv = kphysm_del_span_query( + t_mp->sbm_basepfn, t_mp->sbm_npages, &mq); + if (rv != KPHYSM_OK) { + PR_MEM("%s: kphysm_del_span_query:" + " unexpected return value %d\n", f, rv); + + continue; + } + + if (mq.nonrelocatable != 0) { + PR_MEM("%s: candidate board %d has" + " nonrelocatable span [0x%lx..0x%lx]\n", + f, + t_sbp->sb_num, + mq.first_nonrelocatable, + mq.last_nonrelocatable); + + continue; + } + +#ifdef DEBUG + /* + * This is a debug tool for excluding certain boards + * from being selected as a target board candidate. + * sbd_ignore_board is only tested by this driver. + * It must be set with adb, obp, /etc/system or your + * favorite debugger. + */ + if (sbd_ignore_board & + (1 << (t_sbp->sb_num - 1))) { + PR_MEM("%s: sbd_ignore_board flag set," + " ignoring board %d as candidate\n", + f, t_sbp->sb_num); + continue; + } +#endif + + /* + * Make sure there is no memory overlap if this + * target board is used for copy-rename. + */ + if (sbd_check_boundaries(phys_install, s_mp, t_mp) != 0) + continue; + + /* + * Reserve excess source board memory, if any. + * + * When the number of pages on the candidate target + * board is less than the number of pages on the source, + * then some spans (clearly) of the source board's address + * space will not be covered by physical memory after the + * copy/rename completes. The following code block + * schedules those spans to be deleted. + */ + if (t_mp->sbm_npages < s_mp->sbm_npages) { + pfn_t pfn; + int mlret; + + d_ml = memlist_dup(s_ml); + if (d_ml == NULL) { + PR_MEM("%s: cant dup src brd memlist\n", f); + /* TODO: should abort */ + continue; + } + + /* calculate base pfn relative to target board */ + pfn = s_mp->sbm_basepfn & ~sm; + pfn += t_mp->sbm_slice_offset; + + /* remove span that will reside on candidate board */ + mlret = memlist_delete_span( + _ptob64(pfn), + _ptob64(t_mp->sbm_npages), + &d_ml); + PR_MEM("%s: mlret = %d\n", f, mlret); + + PR_MEM("%s: brd %d: reserving src brd memlist:\n", + f, s_sbp->sb_num); + SBD_MEMLIST_DUMP(d_ml); + + /* reserve excess spans */ + if (sbd_reserve_mem_spans( + &s_mp->sbm_memhandle, d_ml) != 0) { + + /* likely more non-reloc pages appeared */ + /* TODO: restart from top? */ + continue; + } + } else { + /* no excess source board memory */ + d_ml = NULL; + } + + s_mp->sbm_flags |= SBD_MFLAG_RESERVED; + + /* + * reserve all memory on target board. + * NOTE: source board's memhandle is used. + * + * If this succeeds (eq 0), then target selection is + * complete and all unwanted memory spans, both source and + * target, have been reserved. Loop is terminated. + */ + if (sbd_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) { + PR_MEM("%s: brd %d: target board memory reserved\n", + f, t_sbp->sb_num); + + /* a candidate target board is now reserved */ + t_mp->sbm_flags |= SBD_MFLAG_RESERVED; + c_mp = t_mp; + + /* *** EXITING LOOP *** */ + break; + } + + /* did not successfully reserve the target board. */ + PR_MEM("%s: could not reserve target board %d\n", + f, t_sbp->sb_num); + + /* + * NOTE: an undo of the sbd_reserve_mem_span work + * will happen automatically when the memhandle + * (s_mp->sbm_memhandle) is kphysm_del_release'd. + */ + + s_mp->sbm_flags &= ~SBD_MFLAG_RESERVED; + } + + /* clean up after memlist editing logic */ + if (x_ml != NULL) + memlist_delete(x_ml); + + FREESTRUCT(sets, sbd_mem_unit_t *, n_units_per_set * n_sets); + + /* + * c_mp will be NULL when the entire sets[] array + * has been searched without reserving a target board. + */ + if (c_mp == NULL) { + PR_MEM("%s: brd %d: target selection failed.\n", + f, s_sbp->sb_num); + + if (t_ml != NULL) + memlist_delete(t_ml); + + return (-1); + } + + PR_MEM("%s: found target board %d for source board %d\n", + f, + t_sbp->sb_num, + s_sbp->sb_num); + + s_mp->sbm_peer = c_mp; + s_mp->sbm_flags |= SBD_MFLAG_SOURCE; + s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */ + s_mp->sbm_mlist = s_ml; + s_mp->sbm_cm.sbdev_busy = 1; + + c_mp->sbm_peer = s_mp; + c_mp->sbm_flags |= SBD_MFLAG_TARGET; + c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */ + c_mp->sbm_mlist = t_ml; + c_mp->sbm_cm.sbdev_busy = 1; + + s_mp->sbm_flags &= ~SBD_MFLAG_MEMRESIZE; + if (c_mp->sbm_npages > s_mp->sbm_npages) { + s_mp->sbm_flags |= SBD_MFLAG_MEMUPSIZE; + PR_MEM("%s: upsize (source pgs 0x%lx < target pgs 0x%lx)\n", + f, s_mp->sbm_npages, c_mp->sbm_npages); + } else if (c_mp->sbm_npages < s_mp->sbm_npages) { + s_mp->sbm_flags |= SBD_MFLAG_MEMDOWNSIZE; + PR_MEM("%s: downsize (source pgs 0x%lx > target pgs 0x%lx)\n", + f, s_mp->sbm_npages, c_mp->sbm_npages); + } + + return (0); +} + +int +sbd_move_memory(sbd_handle_t *hp, sbd_board_t *s_bp, sbd_board_t *t_bp) +{ + int ret; + sbdp_handle_t *hdp; + sbderror_t *ep = SBD_HD2ERR(hp); + + hdp = sbd_get_sbdp_handle(s_bp, hp); + + ret = sbdp_move_memory(hdp, t_bp->sb_num); + if (ret != 0) + SBD_GET_PERR(hdp->h_err, ep); + + sbd_release_sbdp_handle(hdp); + + return (ret); +} + +/* + * Memlist support. + */ +void +memlist_delete(struct memlist *mlist) +{ + sbdp_handle_t *hdp; + + hdp = sbd_get_sbdp_handle(NULL, NULL); + + (void) sbdp_del_memlist(hdp, mlist); + + sbd_release_sbdp_handle(hdp); +} + +struct memlist * +memlist_dup(struct memlist *mlist) +{ + struct memlist *hl, *prev; + + if (mlist == NULL) + return (NULL); + + prev = NULL; + hl = NULL; + for (; mlist; mlist = mlist->next) { + struct memlist *mp; + + mp = memlist_get_one(); + if (mp == NULL) { + if (hl != NULL) + memlist_free_list(hl); + hl = NULL; + break; + } + mp->address = mlist->address; + mp->size = mlist->size; + mp->next = NULL; + mp->prev = prev; + + if (prev == NULL) + hl = mp; + else + prev->next = mp; + prev = mp; + } + + return (hl); +} + +void +memlist_dump(struct memlist *mlist) +{ + register struct memlist *ml; + + if (mlist == NULL) { + PR_MEM("memlist> EMPTY\n"); + } else { + for (ml = mlist; ml; ml = ml->next) + PR_MEM("memlist> 0x%" PRIx64 " " + "0x%" PRIx64 " \n", + ml->address, ml->size); + } +} + +int +memlist_intersect(struct memlist *al, struct memlist *bl) +{ + uint64_t astart, aend, bstart, bend; + + if ((al == NULL) || (bl == NULL)) + return (0); + + aend = al->address + al->size; + bstart = bl->address; + bend = bl->address + bl->size; + + while (al && bl) { + while (al && (aend <= bstart)) + if ((al = al->next) != NULL) + aend = al->address + al->size; + if (al == NULL) + return (0); + + if ((astart = al->address) <= bstart) + return (1); + + while (bl && (bend <= astart)) + if ((bl = bl->next) != NULL) + bend = bl->address + bl->size; + if (bl == NULL) + return (0); + + if ((bstart = bl->address) <= astart) + return (1); + } + + return (0); +} + +/* + * Determine whether the source memlist (s_mlist) will + * fit into the target memlist (t_mlist) in terms of + * size and holes (i.e. based on same relative base address). + */ +static int +memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist) +{ + int rv = 0; + uint64_t s_basepa, t_basepa; + struct memlist *s_ml, *t_ml; + + if ((s_mlist == NULL) || (t_mlist == NULL)) + return (0); + + /* + * Base both memlists on common base address (0). + */ + s_basepa = s_mlist->address; + t_basepa = t_mlist->address; + + for (s_ml = s_mlist; s_ml; s_ml = s_ml->next) + s_ml->address -= s_basepa; + + for (t_ml = t_mlist; t_ml; t_ml = t_ml->next) + t_ml->address -= t_basepa; + + s_ml = s_mlist; + for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) { + uint64_t s_start, s_end; + uint64_t t_start, t_end; + + t_start = t_ml->address; + t_end = t_start + t_ml->size; + + for (; s_ml; s_ml = s_ml->next) { + s_start = s_ml->address; + s_end = s_start + s_ml->size; + + if ((s_start < t_start) || (s_end > t_end)) + break; + } + } + /* + * If we ran out of source memlist chunks that mean + * we found a home for all of them. + */ + if (s_ml == NULL) + rv = 1; + + /* + * Need to add base addresses back since memlists + * are probably in use by caller. + */ + for (s_ml = s_mlist; s_ml; s_ml = s_ml->next) + s_ml->address += s_basepa; + + for (t_ml = t_mlist; t_ml; t_ml = t_ml->next) + t_ml->address += t_basepa; + + return (rv); +} + +void +sbd_attach_mem(sbd_handle_t *hp, sbderror_t *ep) +{ + sbd_mem_unit_t *mp; + dev_info_t *dip; + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + sbdp_handle_t *hdp; + int err, unit; + struct memlist *ml, *mc; + static fn_t f = "sbd_attach_mem"; + int i; + + PR_MEM("%s...\n", f); + + /* + * all four cpus have to be attached before + * configuring mem + */ + for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) { + sbd_cpu_unit_t *cpup; + struct cpu *cp; + + if (!SBD_DEV_IS_PRESENT(sbp, SBD_COMP_CPU, i)) + continue; + + if (!SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_CPU, i)) + goto error; + + cpup = SBD_GET_BOARD_CPUUNIT(sbp, i); + + if (cpup == NULL) + goto error; + + mutex_enter(&cpu_lock); + cp = cpu_get(cpup->sbc_cpu_id); + if (cp == NULL) { + mutex_exit(&cpu_lock); + cmn_err(CE_WARN, + "sbd:%s: cpu_get failed for cpu %d", + f, cpup->sbc_cpu_id); + goto error; + } + if (cpu_is_poweredoff(cp)) { + mutex_exit(&cpu_lock); + goto error; + } + mutex_exit(&cpu_lock); + continue; + +error: + SBD_SET_ERR(ep, ESBD_CPUONLINE); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[i]); + (void) sbd_set_err_in_hdl(hp, ep); + return; + } + + dip = *(sbp->sb_devlist[NIX(SBD_COMP_MEM)]); + + hdp = sbd_get_sbdp_handle(sbp, hp); + unit = sbdp_get_unit_num(hdp, dip); + if (unit < 0) { + SBD_GET_PERR(hdp->h_err, ep); + sbd_release_sbdp_handle(hdp); + return; + } + + ASSERT(sbp->sb_mempath[unit] != NULL); + ASSERT(e_ddi_branch_held(dip)); + + (void) ddi_pathname(dip, sbp->sb_mempath[unit]); + + mp = SBD_GET_BOARD_MEMUNIT(sbp, unit); + + ml = sbd_get_memlist(mp, ep); + if (ml == NULL) { + cmn_err(CE_WARN, + "sbd:%s: failed to get memlist for " + "board %d", f, sbp->sb_num); + /* + * Need to record an error and return. + */ + SBD_SET_ERR(ep, ESBD_MEMFAIL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + sbd_release_sbdp_handle(hdp); + return; + } + + SBD_MEMLIST_DUMP(ml); + err = 0; + for (mc = ml; mc; mc = mc->next) { + update_membounds_t umb; + pfn_t base; + pgcnt_t npgs; + + base = (pfn_t)(mc->address >> PAGESHIFT); + npgs = (pgcnt_t)(mc->size >> PAGESHIFT); + + umb.u_board = sbp->sb_num; + umb.u_base = (uint64_t)base << MMU_PAGESHIFT; + umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT; + + lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb); + err = kphysm_add_memory_dynamic(base, npgs); + + if (err != KPHYSM_OK) { + cmn_err(CE_WARN, + "%s: kphysm_add_memory_dynamic fail %d", f, err); + + /* translate kphysm error */ + switch (err) { + case KPHYSM_ERESOURCE: + err = ESBD_NOMEM; + break; + + case KPHYSM_EFAULT: + err = ESBD_FAULT; + break; + + default: + err = ESBD_INVAL; + break; + } + break; + } + + kcage_range_lock(); + err = kcage_range_add(base, npgs, 1); + kcage_range_unlock(); + if (err != 0) { + cmn_err(CE_WARN, + "%s: kcage_range_add fail %d", f, err); + + /* Translate kcage error. */ + switch (err) { + case ENOMEM: + err = ESBD_NOMEM; + break; + default: + err = ESBD_INVAL; + break; + } + break; + } + (void) sbdp_mem_add_span(hdp, mc->address, mc->size); + } + + if (err != 0) { + SBD_SET_ERR(ep, err); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + } + + memlist_delete(ml); + sbd_release_sbdp_handle(hdp); + + /* + * Now attach all mem devinfo nodes to the device tree. + */ + for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) { + if (mp->sbm_dip[i] == NULL) + continue; + ASSERT(e_ddi_branch_held(mp->sbm_dip[i])); + if (e_ddi_branch_configure(mp->sbm_dip[i], NULL, 0) && + SBD_GET_ERR(ep) == 0) { + SBD_SET_ERR(ep, ESBD_INVAL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + } + } +} + +typedef struct { + kcondvar_t cond; + kmutex_t lock; + int error; + int done; +} sbd_release_mem_sync_t; + +/* + * When we reach here the memory being drained should have + * already been reserved in sbd_pre_release_mem(). + * Our only task here is to kick off the "drain". + * Returns -1 when error encountered or zero for success. + */ +int +sbd_release_mem(sbd_handle_t *hp, dev_info_t *dip, int unit) +{ + memhandle_t mh; + int err; + int cancel_flag = 0; + int e_code = 0; + sbd_board_t *sbp = SBDH2BD(hp->h_sbd); + sbd_release_mem_sync_t rms; + static fn_t f = "sbd_release_mem"; + + /* + * If this node has a scheduled memory delete operation, + * it will have a memhandle. If it does have a memhandle (the + * return value of sbd_get_memhandle is zero when true), + * then perform the delete. + */ + + if ((cancel_flag = sbd_get_memhandle(hp, dip, &mh)) != 0) { + cmn_err(CE_WARN, "%s: couldn't get the memhandle\n", f); + return (cancel_flag); + } + + bzero((void *) &rms, sizeof (rms)); + + mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL); + cv_init(&rms.cond, NULL, CV_DRIVER, NULL); + + mutex_enter(&rms.lock); + err = kphysm_del_start(mh, sbd_release_memory_done, (void *) &rms); + if (err == KPHYSM_OK) { + /* wait for completion */ + while (!rms.done) { + if (cancel_flag) { + /* previously canceled */ + cv_wait(&rms.cond, &rms.lock); + } else if (cv_wait_sig(&rms.cond, &rms.lock) == 0) { + /* interrupted: cancel and wait */ + cancel_flag = -1; + (void) kphysm_del_cancel(mh); + } + } + /* get the result of the memory delete operation */ + err = rms.error; + } else { + (void) kphysm_del_release(mh); + } + + mutex_exit(&rms.lock); + + cv_destroy(&rms.cond); + mutex_destroy(&rms.lock); + + if (err != KPHYSM_OK) { + switch (err) { + case KPHYSM_ENOWORK: + e_code = ESBD_NOERROR; + break; + + case KPHYSM_EHANDLE: + case KPHYSM_ESEQUENCE: + e_code = ESBD_INTERNAL; + break; + + case KPHYSM_ENOTVIABLE: + e_code = ESBD_MEM_NOTVIABLE; + break; + + case KPHYSM_EREFUSED: + e_code = ESBD_MEM_REFUSED; + break; + + case KPHYSM_ENONRELOC: + e_code = ESBD_MEM_NONRELOC; + break; + + case KPHYSM_ECANCELLED: + e_code = ESBD_MEM_CANCELLED; + break; + + case KPHYSM_ERESOURCE: + e_code = ESBD_MEMFAIL; + break; + + default: + cmn_err(CE_WARN, "sbd:%s:" + " unexpected kphysm error code %d," + " dip 0x%p", + f, err, (void *)dip); + + e_code = ESBD_IO; + break; + } + + if (e_code != 0) { + cancel_flag = -1; + SBD_SET_ERR(SBD_HD2ERR(hp), e_code); + SBD_SET_ERRSTR(SBD_HD2ERR(hp), sbp->sb_mempath[unit]); + } + } + + return (cancel_flag); +} + +/* + * Memory has been logically removed by the time this routine is called. + */ +void +sbd_release_memory_done(void *arg, int error) +{ + sbd_release_mem_sync_t *ds = arg; + + mutex_enter(&ds->lock); + ds->error = error; + ds->done = 1; + cv_signal(&ds->cond); + mutex_exit(&ds->lock); +} + +/* + * If detaching node contains memory that is "non-permanent" + * then the memory adr's are simply cleared. If the memory + * is non-relocatable, then do a copy-rename. + */ +int +sbd_detach_memory(sbd_handle_t *hp, sbderror_t *ep, sbd_mem_unit_t *s_mp, + int unit) +{ + int rv; + sbd_mem_unit_t *t_mp; + sbd_istate_t state; + sbdp_handle_t *hdp; + sbd_board_t *sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp; + sbd_board_t *tbp; + static fn_t f = "sbd_detach_memory"; + + PR_MEM("%s...\n", f); + + /* lookup target mem unit and target board structure, if any */ + if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) { + t_mp = s_mp->sbm_peer; + ASSERT(t_mp != NULL); + ASSERT(t_mp->sbm_peer == s_mp); + tbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp; + } else { + t_mp = NULL; + } + + /* verify mem unit's state is UNREFERENCED */ + state = s_mp->sbm_cm.sbdev_state; + if (state != SBD_STATE_UNREFERENCED) { + cmn_err(CE_WARN, "%s: invalid state transition for" + " mem-unit (%d.%d)", + f, + sbp->sb_num, + s_mp->sbm_cm.sbdev_unum); + SBD_SET_ERR(ep, ESBD_STATE); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + return (-1); + } + + /* verify target mem unit's state is UNREFERENCED, if any */ + if (t_mp != NULL) { + state = t_mp->sbm_cm.sbdev_state; + if (state != SBD_STATE_UNREFERENCED) { + cmn_err(CE_WARN, "%s: invalid state transition for" + " target mem-unit (%d.%d)", + f, + tbp->sb_num, + t_mp->sbm_cm.sbdev_unum); + SBD_SET_ERR(ep, ESBD_STATE); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + return (-1); + } + } + + /* + * Displacement flush all ecaches in the system. + * That's the fastest way to remove all cache references + * to the detaching memory. + */ + xc_all(sbd_flush_ecache, 0, 0); + + hdp = sbd_get_sbdp_handle(sbp, hp); + + /* + * If there is no target board (no copy/rename was needed), then + * we're done! + */ + if (t_mp == NULL) { + /* + * Reprogram interconnect hardware and disable + * memory controllers for memory node that's going away. + */ + + rv = sbdphw_disable_memctrl(hdp, s_mp->sbm_cm.sbdev_dip); + if (rv) { + cmn_err(CE_WARN, + "%s: failed to deprogram mem-unit (%d.%d)," + " dip 0x%p", + f, + sbp->sb_num, + s_mp->sbm_cm.sbdev_unum, + (void *)s_mp->sbm_cm.sbdev_dip); + /* + * Make sure we don't rewrite an sbdp error + */ + if (SBD_GET_ERR(ep) != 0) { + SBD_SET_ERR(ep, ESBD_HW_PROGRAM); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + } + } + } else { + rv = sbd_move_memory(hp, sbp, tbp); + if (rv) { + int i; + + cmn_err(CE_WARN, "%s: failed to move memory" + " from board %d to board %d", + f, + sbp->sb_num, + tbp->sb_num); + /* + * Make sure we don't rewrite an sbdp error + */ + if (SBD_GET_ERR(ep) != 0) { + SBD_SET_ERR(ep, ESBD_INTERNAL); + SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]); + } + /* + * If we failed here, it means that the target board's + * memory has been unconfigured. We need to configure + * it back + */ + for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) { + int unit; + dev_info_t *dip; + dev_info_t **devlist; + + + devlist = tbp->sb_devlist[NIX(SBD_COMP_MEM)]; + dip = devlist[i]; + sbd_reset_error_sbdph(hdp); + unit = sbdp_get_unit_num(hdp, dip); + + /* + * We already saved the error that created + * this mess. If we fail, make sure not + * to overwrite the original error + */ + if (unit == -1) { + continue; + } + if (sbd_cancel_mem(hp, unit) != 0) + continue; + + t_mp->sbm_flags = 0; + /* + * clean up + */ + sbd_mem_cleanup(s_mp, t_mp, ep); + if (s_mp->sbm_mlist) { + memlist_delete(s_mp->sbm_mlist); + s_mp->sbm_mlist = NULL; + } + + SBD_DEVICE_TRANSITION(tbp, SBD_COMP_MEM, + unit, SBD_STATE_CONFIGURED); + } + } + + PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n", + f, + rv ? "FAILED" : "COMPLETED", + sbp->sb_num, + tbp->sb_num); + } + + if (rv == 0) { + update_membounds_t umb; + + umb.u_board = sbp->sb_num; + umb.u_base = (uint64_t)-1; + umb.u_len = (uint64_t)-1; + + lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb); + } + + sbd_release_sbdp_handle(hdp); + return (rv); +} + +/*ARGSUSED*/ +static void +sbd_flush_ecache(uint64_t a, uint64_t b) +{ + cpu_flush_ecache(); +} |
