summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/Targetdirs7
-rw-r--r--usr/src/cmd/Makefile3
-rw-r--r--usr/src/cmd/Makefile.cmd11
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kctl/kctl.h11
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_auxv.h20
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_fault.c15
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_kctl.h17
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_main.c27
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_promif.c23
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_promif.h10
-rw-r--r--usr/src/cmd/mdb/sparc/kmdb/kaif_activate.c78
-rw-r--r--usr/src/cmd/mdb/sparc/kmdb/kctl/kctl_isadep.c52
-rw-r--r--usr/src/cmd/mdb/sparc/v9/kmdb/kaif_handlers.s314
-rw-r--r--usr/src/cmd/mdb/sun4v/Makefile.kmdb25
-rw-r--r--usr/src/cmd/mdb/sun4v/modules/Makefile11
-rw-r--r--usr/src/cmd/mdb/sun4v/modules/vdsk/Makefile30
-rw-r--r--usr/src/cmd/mdb/sun4v/modules/vdsk/v9/Makefile44
-rw-r--r--usr/src/cmd/mdb/sun4v/modules/vdsk/vdsk.c105
-rw-r--r--usr/src/cmd/mdb/sun4v/v9/Makefile.kmdb8
-rw-r--r--usr/src/cmd/mdb/sun4v/v9/kmdb/Makefile10
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/mdesc/Makefile18
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/mdesc/cpu_prop_update.c15
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/mdesc/dr.c517
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/mdesc/init.c47
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.c75
-rw-r--r--usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.h23
-rw-r--r--usr/src/cmd/vntsd/Makefile74
-rw-r--r--usr/src/cmd/vntsd/chars.h87
-rw-r--r--usr/src/cmd/vntsd/cmd.c486
-rw-r--r--usr/src/cmd/vntsd/common.c654
-rw-r--r--usr/src/cmd/vntsd/console.c721
-rw-r--r--usr/src/cmd/vntsd/listen.c285
-rw-r--r--usr/src/cmd/vntsd/queue.c288
-rw-r--r--usr/src/cmd/vntsd/read.c265
-rw-r--r--usr/src/cmd/vntsd/svc-vntsd64
-rw-r--r--usr/src/cmd/vntsd/vcc.h75
-rw-r--r--usr/src/cmd/vntsd/vntsd.c582
-rw-r--r--usr/src/cmd/vntsd/vntsd.h476
-rw-r--r--usr/src/cmd/vntsd/vntsd.xml94
-rw-r--r--usr/src/cmd/vntsd/vntsdvcc.c633
-rw-r--r--usr/src/cmd/vntsd/write.c251
-rw-r--r--usr/src/common/mdesc/mdesc_diff.c602
-rw-r--r--usr/src/common/mdesc/mdesc_fini.c12
-rw-r--r--usr/src/common/mdesc/mdesc_getbinsize.c45
-rw-r--r--usr/src/common/mdesc/mdesc_getgen.c45
-rw-r--r--usr/src/common/mdesc/mdesc_init_intern.c49
-rw-r--r--usr/src/common/mdesc/mdesc_rootnode.c10
-rw-r--r--usr/src/common/mdesc/mdesc_scandag.c11
-rw-r--r--usr/src/lib/libpcp/common/libpcp.c300
-rw-r--r--usr/src/pkgdefs/Makefile2
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/Makefile38
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/i.manifest76
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/pkginfo.tmpl55
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/postinstall136
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/preremove58
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/prototype_com52
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/prototype_sparc79
-rw-r--r--usr/src/pkgdefs/SUNWldomr.v/r.manifest83
-rw-r--r--usr/src/pkgdefs/SUNWldomu.v/Makefile38
-rw-r--r--usr/src/pkgdefs/SUNWldomu.v/depend56
-rw-r--r--usr/src/pkgdefs/SUNWldomu.v/pkginfo.tmpl55
-rw-r--r--usr/src/pkgdefs/SUNWldomu.v/prototype_com48
-rw-r--r--usr/src/pkgdefs/SUNWldomu.v/prototype_sparc54
-rw-r--r--usr/src/pkgdefs/SUNWmdb/prototype_sparc1
-rw-r--r--usr/src/pkgdefs/SUNWmdbr/prototype_sparc1
-rw-r--r--usr/src/pkgdefs/etc/exception_list_i3861
-rw-r--r--usr/src/tools/scripts/bfu.sh3
-rw-r--r--usr/src/uts/common/sys/mdesc.h60
-rw-r--r--usr/src/uts/common/sys/mdesc_impl.h12
-rw-r--r--usr/src/uts/sfmmu/ml/sfmmu_kdi.s23
-rw-r--r--usr/src/uts/sun4/io/trapstat.c19
-rw-r--r--usr/src/uts/sun4/os/ddi_impl.c829
-rw-r--r--usr/src/uts/sun4/os/mlsetup.c8
-rw-r--r--usr/src/uts/sun4/os/mp_startup.c7
-rw-r--r--usr/src/uts/sun4/os/startup.c19
-rw-r--r--usr/src/uts/sun4u/os/mach_ddi_impl.c835
-rw-r--r--usr/src/uts/sun4u/os/mach_startup.c35
-rw-r--r--usr/src/uts/sun4v/Makefile.files37
-rw-r--r--usr/src/uts/sun4v/Makefile.rules7
-rw-r--r--usr/src/uts/sun4v/Makefile.sun4v.shared39
-rw-r--r--usr/src/uts/sun4v/cnex/Makefile99
-rw-r--r--usr/src/uts/sun4v/cpu/common_asm.s10
-rw-r--r--usr/src/uts/sun4v/cpu/generic.c260
-rw-r--r--usr/src/uts/sun4v/cpu/niagara.c152
-rw-r--r--usr/src/uts/sun4v/dr_cpu/Makefile93
-rw-r--r--usr/src/uts/sun4v/ds/Makefile97
-rw-r--r--usr/src/uts/sun4v/fault_iso/Makefile97
-rw-r--r--usr/src/uts/sun4v/io/cnex.c1133
-rw-r--r--usr/src/uts/sun4v/io/dr_cpu.c1151
-rw-r--r--usr/src/uts/sun4v/io/dr_util.c206
-rw-r--r--usr/src/uts/sun4v/io/ds.c2728
-rw-r--r--usr/src/uts/sun4v/io/fault_iso.c453
-rw-r--r--usr/src/uts/sun4v/io/ldc.c5609
-rw-r--r--usr/src/uts/sun4v/io/mdeg.c914
-rw-r--r--usr/src/uts/sun4v/io/mdesc.c217
-rw-r--r--usr/src/uts/sun4v/io/platsvc.c371
-rw-r--r--usr/src/uts/sun4v/io/qcn.c16
-rw-r--r--usr/src/uts/sun4v/io/vcc.c2406
-rw-r--r--usr/src/uts/sun4v/io/vdc.c3560
-rw-r--r--usr/src/uts/sun4v/io/vds.c2013
-rw-r--r--usr/src/uts/sun4v/io/vldc.c1581
-rw-r--r--usr/src/uts/sun4v/io/vnet.c1049
-rw-r--r--usr/src/uts/sun4v/io/vnet_gen.c4899
-rw-r--r--usr/src/uts/sun4v/io/vnex.c22
-rw-r--r--usr/src/uts/sun4v/io/vsw.c6959
-rw-r--r--usr/src/uts/sun4v/ldc/Makefile96
-rw-r--r--usr/src/uts/sun4v/ml/hcall.s546
-rw-r--r--usr/src/uts/sun4v/ml/mach_offsets.in21
-rw-r--r--usr/src/uts/sun4v/ml/mach_proc_init.s211
-rw-r--r--usr/src/uts/sun4v/ml/mach_subr_asm.s81
-rw-r--r--usr/src/uts/sun4v/ml/trap_table.s41
-rw-r--r--usr/src/uts/sun4v/os/fillsysinfo.c958
-rw-r--r--usr/src/uts/sun4v/os/hsvc.c10
-rw-r--r--usr/src/uts/sun4v/os/intrq.c128
-rw-r--r--usr/src/uts/sun4v/os/lpad.c231
-rw-r--r--usr/src/uts/sun4v/os/mach_cpu_states.c53
-rw-r--r--usr/src/uts/sun4v/os/mach_descrip.c845
-rw-r--r--usr/src/uts/sun4v/os/mach_mp_startup.c127
-rw-r--r--usr/src/uts/sun4v/os/mach_mp_states.c240
-rw-r--r--usr/src/uts/sun4v/os/mach_startup.c56
-rw-r--r--usr/src/uts/sun4v/platsvc/Makefile97
-rw-r--r--usr/src/uts/sun4v/promif/promif_asr.c75
-rw-r--r--usr/src/uts/sun4v/promif/promif_cpu.c122
-rw-r--r--usr/src/uts/sun4v/promif/promif_emul.c268
-rw-r--r--usr/src/uts/sun4v/promif/promif_interp.c42
-rw-r--r--usr/src/uts/sun4v/promif/promif_io.c220
-rw-r--r--usr/src/uts/sun4v/promif/promif_key.c58
-rw-r--r--usr/src/uts/sun4v/promif/promif_mon.c203
-rw-r--r--usr/src/uts/sun4v/promif/promif_node.c293
-rw-r--r--usr/src/uts/sun4v/promif/promif_power_off.c45
-rw-r--r--usr/src/uts/sun4v/promif/promif_prop.c327
-rw-r--r--usr/src/uts/sun4v/promif/promif_reboot.c115
-rw-r--r--usr/src/uts/sun4v/promif/promif_stree.c455
-rw-r--r--usr/src/uts/sun4v/promif/promif_test.c51
-rw-r--r--usr/src/uts/sun4v/promif/promif_version.c82
-rw-r--r--usr/src/uts/sun4v/sys/cnex.h98
-rw-r--r--usr/src/uts/sun4v/sys/cpu_module.h19
-rw-r--r--usr/src/uts/sun4v/sys/dr_cpu.h93
-rw-r--r--usr/src/uts/sun4v/sys/dr_util.h108
-rw-r--r--usr/src/uts/sun4v/sys/ds.h114
-rw-r--r--usr/src/uts/sun4v/sys/ds_impl.h332
-rw-r--r--usr/src/uts/sun4v/sys/error.h13
-rw-r--r--usr/src/uts/sun4v/sys/fault_iso.h96
-rw-r--r--usr/src/uts/sun4v/sys/hsvc.h1
-rw-r--r--usr/src/uts/sun4v/sys/hypervisor_api.h138
-rw-r--r--usr/src/uts/sun4v/sys/ldc.h221
-rw-r--r--usr/src/uts/sun4v/sys/ldc_impl.h487
-rw-r--r--usr/src/uts/sun4v/sys/ldoms.h61
-rw-r--r--usr/src/uts/sun4v/sys/lpad.h95
-rw-r--r--usr/src/uts/sun4v/sys/mach_descrip.h68
-rw-r--r--usr/src/uts/sun4v/sys/machcpuvar.h1
-rw-r--r--usr/src/uts/sun4v/sys/machparam.h41
-rw-r--r--usr/src/uts/sun4v/sys/machsystm.h26
-rw-r--r--usr/src/uts/sun4v/sys/mdeg.h120
-rw-r--r--usr/src/uts/sun4v/sys/mmu.h6
-rw-r--r--usr/src/uts/sun4v/sys/ncp.h220
-rw-r--r--usr/src/uts/sun4v/sys/ncs.h69
-rw-r--r--usr/src/uts/sun4v/sys/platsvc.h95
-rw-r--r--usr/src/uts/sun4v/sys/promif_impl.h144
-rw-r--r--usr/src/uts/sun4v/sys/varconfig.h89
-rw-r--r--usr/src/uts/sun4v/sys/vcc.h110
-rw-r--r--usr/src/uts/sun4v/sys/vcc_impl.h304
-rw-r--r--usr/src/uts/sun4v/sys/vdc.h260
-rw-r--r--usr/src/uts/sun4v/sys/vdsk_common.h194
-rw-r--r--usr/src/uts/sun4v/sys/vdsk_mailbox.h100
-rw-r--r--usr/src/uts/sun4v/sys/vio_common.h55
-rw-r--r--usr/src/uts/sun4v/sys/vio_mailbox.h331
-rw-r--r--usr/src/uts/sun4v/sys/vldc.h91
-rw-r--r--usr/src/uts/sun4v/sys/vldc_impl.h133
-rw-r--r--usr/src/uts/sun4v/sys/vnet.h118
-rw-r--r--usr/src/uts/sun4v/sys/vnet_common.h76
-rw-r--r--usr/src/uts/sun4v/sys/vnet_gen.h337
-rw-r--r--usr/src/uts/sun4v/sys/vnet_mailbox.h95
-rw-r--r--usr/src/uts/sun4v/sys/vnet_proxy.h133
-rw-r--r--usr/src/uts/sun4v/sys/vnetmsg.h81
-rw-r--r--usr/src/uts/sun4v/sys/vsw.h455
-rw-r--r--usr/src/uts/sun4v/sys/vsw_fdb.h64
-rw-r--r--usr/src/uts/sun4v/vcc/Makefile106
-rw-r--r--usr/src/uts/sun4v/vdc/Makefile108
-rw-r--r--usr/src/uts/sun4v/vds/Makefile106
-rw-r--r--usr/src/uts/sun4v/vldc/Makefile101
-rw-r--r--usr/src/uts/sun4v/vnet/Makefile105
-rw-r--r--usr/src/uts/sun4v/vsw/Makefile112
183 files changed, 56870 insertions, 2043 deletions
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index 036b228d34..35ab557045 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -159,6 +159,7 @@ ROOT.SYS= \
/var/svc/manifest/platform \
/var/svc/manifest/platform/i86pc \
/var/svc/manifest/platform/sun4u \
+ /var/svc/manifest/platform/sun4v \
/var/svc/manifest/site \
/var/svc/profile
@@ -170,7 +171,13 @@ XROOT.BIN= \
/usr/lib/inet/wanboot
# EXPORT DELETE END
+i386_ROOT.BIN=
+
+sparc_ROOT.BIN= \
+ /usr/lib/ldoms
+
ROOT.BIN= \
+ $($(MACH)_ROOT.BIN) \
/etc/saf \
/etc/sma \
/etc/sma/snmp \
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 0e4a192222..ccf9b44d6d 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -456,6 +456,7 @@ sparc_SUBDIRS= \
sckmd \
sf880drd \
stmsboot \
+ vntsd \
wrsmconf \
wrsmstat
@@ -692,6 +693,7 @@ sparc_MSGSUBDIRS= \
prtdscp \
prtfru \
stmsboot \
+ vntsd \
wrsmconf \
wrsmstat
@@ -765,6 +767,7 @@ MANIFEST_TOPDIRS= \
syseventd \
syslogd \
utmpd \
+ vntsd \
ypcmd \
zoneadmd
diff --git a/usr/src/cmd/Makefile.cmd b/usr/src/cmd/Makefile.cmd
index b7c8ec8f83..6ea1b083b5 100644
--- a/usr/src/cmd/Makefile.cmd
+++ b/usr/src/cmd/Makefile.cmd
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -209,6 +208,7 @@ ROOTSVCNETWORKSSL= $(ROOTSVCNETWORK)/ssl
ROOTSVCPLATFORM= $(ROOTVARSVCMANIFEST)/platform
ROOTSVCPLATFORMI86PC= $(ROOTSVCPLATFORM)/i86pc
ROOTSVCPLATFORMSUN4U= $(ROOTSVCPLATFORM)/sun4u
+ROOTSVCPLATFORMSUN4V= $(ROOTSVCPLATFORM)/sun4v
ROOTSVCAPPLICATION= $(ROOTVARSVCMANIFEST)/application
ROOTSVCAPPLICATIONMANAGEMENT= $(ROOTSVCAPPLICATION)/management
ROOTSVCAPPLICATIONSECURITY= $(ROOTSVCAPPLICATION)/security
@@ -388,6 +388,9 @@ $(ROOTSVCPLATFORMI86PC)/%: %
$(ROOTSVCPLATFORMSUN4U)/%: %
$(INS.file)
+$(ROOTSVCPLATFORMSUN4V)/%: %
+ $(INS.file)
+
$(ROOTMAN1)/%: %.sunman
$(INS.rename)
diff --git a/usr/src/cmd/mdb/common/kmdb/kctl/kctl.h b/usr/src/cmd/mdb/common/kmdb/kctl/kctl.h
index fd2c0c7701..612ed8db13 100644
--- a/usr/src/cmd/mdb/common/kmdb/kctl/kctl.h
+++ b/usr/src/cmd/mdb/common/kmdb/kctl/kctl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -106,6 +105,10 @@ extern void kctl_auxv_init(kmdb_auxv_t *, const char *, const char **, void *);
extern void kctl_auxv_init_isadep(kmdb_auxv_t *, void *);
extern void kctl_auxv_fini(kmdb_auxv_t *);
extern void kctl_auxv_fini_isadep(kmdb_auxv_t *);
+#ifdef sun4v
+extern void kctl_auxv_set_promif(kmdb_auxv_t *);
+extern void kctl_switch_promif(void);
+#endif
extern void kctl_wrintr(void);
extern void kctl_wrintr_fire(void);
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_auxv.h b/usr/src/cmd/mdb/common/kmdb/kmdb_auxv.h
index 7faf9b980b..bf3cc8fdf1 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_auxv.h
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_auxv.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -53,6 +52,9 @@
#include <gelf.h>
#include <sys/machelf.h>
#include <sys/kdi.h>
+#ifdef sun4v
+#include <sys/obpdefs.h>
+#endif
#ifdef __cplusplus
extern "C" {
@@ -104,6 +106,16 @@ typedef struct kmdb_auxv {
#ifdef __sparc
void (*kav_ktrap_install)(int, void (*)(void)); /* Add to krnl trptbl */
void (*kav_ktrap_restore)(void); /* Restore krnl trap hdlrs */
+#ifdef sun4v
+ uint_t kav_domaining; /* Domaining status */
+ caddr_t kav_promif_root; /* PROM shadow tree root */
+ ihandle_t kav_promif_in; /* PROM input dev instance */
+ ihandle_t kav_promif_out; /* PROM output dev instance */
+ phandle_t kav_promif_pin; /* PROM input dev package */
+ phandle_t kav_promif_pout; /* PROM output dev package */
+ pnode_t kav_promif_chosennode; /* PROM "/chosen" node */
+ pnode_t kav_promif_optionsnode; /* PROM "/options" node */
+#endif
#endif
} kmdb_auxv_t;
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_fault.c b/usr/src/cmd/mdb/common/kmdb/kmdb_fault.c
index e595f7be02..06f7c2927f 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_fault.c
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_fault.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -78,7 +78,10 @@ kmdb_fault(kreg_t tt, kreg_t pc, kreg_t sp, int cpuid)
for (;;) {
mdb_iob_printf(mdb.m_err, "\n%s: "
#if defined(__sparc)
- "(o)bp, (p)anic"
+#ifndef sun4v
+ "(o)bp, "
+#endif /* sun4v */
+ "(p)anic"
#else
"reboo(t)"
#endif
@@ -98,8 +101,10 @@ kmdb_fault(kreg_t tt, kreg_t pc, kreg_t sp, int cpuid)
continue;
#endif
+#ifndef sun4v
case 'o':
case 'O':
+#endif /* sun4v */
case 't':
case 'T':
kmdb_dpi_enter_mon();
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_kctl.h b/usr/src/cmd/mdb/common/kmdb/kmdb_kctl.h
index 54b7df2938..ad772338a5 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_kctl.h
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_kctl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -53,6 +52,16 @@ extern "C" {
extern int kmdb_init(const char *, kmdb_auxv_t *);
+/*
+ * This function should only be defined for sun4v. However the mdb build
+ * uses a custom tool (hdr2map) to generate mapfile from header files but
+ * this tool does not take care of preprocessor directives and functions
+ * are included into the mapfile whatever the architecture is and even
+ * if there is an #ifdef sun4v. So we always declare this function but it
+ * has a fake definition for all architecture but sun4v.
+ */
+extern void kmdb_init_promif(char *, kmdb_auxv_t *);
+
extern void kmdb_activate(kdi_debugvec_t **, uint_t);
extern void kmdb_deactivate(void);
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_main.c b/usr/src/cmd/mdb/common/kmdb/kmdb_main.c
index 1e34f218f2..7cc1d3f7b0 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_main.c
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_main.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -276,6 +275,28 @@ kmdb_init(const char *execname, kmdb_auxv_t *kav)
return (0);
}
+#ifdef sun4v
+
+void
+kmdb_init_promif(char *pgmname, kmdb_auxv_t *kav)
+{
+ kmdb_prom_init_promif(pgmname, kav);
+}
+
+#else
+
+/*ARGSUSED*/
+void
+kmdb_init_promif(char *pgmname, kmdb_auxv_t *kav)
+{
+ /*
+ * Fake function for non sun4v. See comments in kmdb_ctl.h
+ */
+ ASSERT(0);
+}
+
+#endif
+
/*
* First-time kmdb startup. Run when kmdb has control of the machine for the
* first time.
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c b/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c
index 50d65677cd..0757ccb48d 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c
@@ -18,7 +18,6 @@
*
* CDDL HEADER END
*/
-
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -29,6 +28,9 @@
#include <sys/types.h>
#include <sys/termios.h>
#include <sys/promif.h>
+#ifdef sun4v
+#include <sys/promif_impl.h>
+#endif
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
@@ -754,7 +756,14 @@ kmdb_prom_assfail(const char *assertion, const char *file, int line)
void
kmdb_prom_init_begin(char *pgmname, kmdb_auxv_t *kav)
{
+#ifdef sun4v
+ if (kav->kav_domaining)
+ kmdb_prom_init_promif(pgmname, kav);
+ else
+ prom_init(pgmname, kav->kav_romp);
+#else
prom_init(pgmname, kav->kav_romp);
+#endif
/* Initialize the interrupt ring buffer */
kmdb_prom_readbuf_head = kmdb_prom_readbuf_tail;
@@ -764,6 +773,18 @@ kmdb_prom_init_begin(char *pgmname, kmdb_auxv_t *kav)
#endif
}
+#ifdef sun4v
+void
+kmdb_prom_init_promif(char *pgmname, kmdb_auxv_t *kav)
+{
+ ASSERT(kav->kav_domaining);
+ cif_init(pgmname, kav->kav_promif_root,
+ kav->kav_promif_in, kav->kav_promif_out,
+ kav->kav_promif_pin, kav->kav_promif_pout,
+ kav->kav_promif_chosennode, kav->kav_promif_optionsnode);
+}
+#endif
+
/*
* Conclude the initialization of the debugger/PROM interface. Memory
* allocation and the global `mdb' object are now available.
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_promif.h b/usr/src/cmd/mdb/common/kmdb/kmdb_promif.h
index bdbadb5996..baca42f615 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_promif.h
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_promif.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,6 +41,9 @@ extern "C" {
extern void kmdb_prom_init_begin(char *, kmdb_auxv_t *);
extern void kmdb_prom_init_finish(kmdb_auxv_t *);
+#ifdef sun4v
+extern void kmdb_prom_init_promif(char *, kmdb_auxv_t *);
+#endif
extern ssize_t kmdb_prom_read(void *, size_t, struct termios *);
extern ssize_t kmdb_prom_write(const void *, size_t, struct termios *);
extern ihandle_t kmdb_prom_get_handle(char *);
diff --git a/usr/src/cmd/mdb/sparc/kmdb/kaif_activate.c b/usr/src/cmd/mdb/sparc/kmdb/kaif_activate.c
index 971ff13dd9..a17431d53f 100644
--- a/usr/src/cmd/mdb/sparc/kmdb/kaif_activate.c
+++ b/usr/src/cmd/mdb/sparc/kmdb/kaif_activate.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -131,7 +131,34 @@ kaif_install_generic(caddr_t tgt, caddr_t arg)
bcopy((caddr_t)kaif_hdlr_generic, tgt, 32);
}
-#ifndef sun4v
+#ifdef sun4v
+
+/*ARGSUSED*/
+static void
+kaif_install_goto_tt64(caddr_t tgt, caddr_t arg)
+{
+ /* LINTED - pointer alignment */
+ uint32_t *hdlr = (uint32_t *)tgt;
+ uint32_t disp = (T_FAST_INSTR_MMU_MISS - T_INSTR_MMU_MISS) * 0x20;
+
+ *hdlr++ = 0x10480000 | (disp >> 2); /* ba,pt (to tt64) */
+ *hdlr++ = 0x01000000; /* nop */
+}
+
+/*ARGSUSED*/
+static void
+kaif_install_goto_tt68(caddr_t tgt, caddr_t arg)
+{
+ /* LINTED - pointer alignment */
+ uint32_t *hdlr = (uint32_t *)tgt;
+ uint32_t disp = (T_FAST_DATA_MMU_MISS - T_DATA_MMU_MISS) * 0x20;
+
+ *hdlr++ = 0x10480000 | (disp >> 2); /* ba,pt (to tt68) */
+ *hdlr++ = 0x01000000; /* nop */
+}
+
+#endif /* sun4v */
+
static void
kaif_install_dmmumiss(caddr_t tgt, caddr_t vatotte)
{
@@ -159,25 +186,31 @@ kaif_install_immumiss(caddr_t tgt, caddr_t vatotte)
*patch++ |= (uintptr_t)vatotte >> 10;
*patch |= ((uintptr_t)vatotte) & 0x3ff;
}
-#endif /* sun4v */
static struct kaif_trap_handlers {
uint_t th_tt;
void (*th_install)(caddr_t, caddr_t);
} kaif_trap_handlers[] = {
{ T_INSTR_EXCEPTION, kaif_install_generic },
+#ifdef sun4v
+ { T_INSTR_MMU_MISS, kaif_install_goto_tt64 },
+#endif
{ T_IDIV0, kaif_install_generic },
{ T_DATA_EXCEPTION, kaif_install_generic },
+#ifdef sun4v
+ { T_DATA_MMU_MISS, kaif_install_goto_tt68 },
+#endif
{ T_DATA_ERROR, kaif_install_generic },
{ T_ALIGNMENT, kaif_install_generic },
-#ifdef sun4v
-#else /* sun4v */
{ T_FAST_INSTR_MMU_MISS, kaif_install_immumiss },
{ T_FAST_DATA_MMU_MISS, kaif_install_dmmumiss },
{ T_FAST_DATA_MMU_PROT, kaif_install_generic },
+#ifdef sun4v
+ { T_INSTR_MMU_MISS + T_TL1, kaif_install_goto_tt64 },
+ { T_DATA_MMU_MISS + T_TL1, kaif_install_goto_tt68 },
+#endif
{ T_FAST_INSTR_MMU_MISS + T_TL1, kaif_install_immumiss },
{ T_FAST_DATA_MMU_MISS + T_TL1, kaif_install_dmmumiss },
-#endif /* sun4v */
{ 0 }
};
@@ -189,34 +222,27 @@ kaif_trap_init(void)
int i;
/*
+ * sun4u:
* We rely upon OBP for the handling of a great many traps. As such,
* we begin by populating our table with pointers to OBP's handlers.
* We then copy in our own handlers where appropriate. At some point,
* when we provide the bulk of the handlers, this process will be
* reversed.
+ *
+ * sun4v:
+ * The sun4v kernel dismisses OBP at boot. Both fast and slow TLB
+ * misses are handled by KMDB. Breakpoint traps go directly KMDB.
+ * All other trap entries are redirected to their respective
+ * trap implemenation within the Solaris trap table.
*/
for (i = 0; i < kaif_tba_native_sz; i += 0x20) {
/* LINTED - pointer alignment */
uint32_t *hdlr = (uint32_t *)(kaif_tba_native + i);
#ifdef sun4v
- uint32_t tt = i/0x20;
-
- /*
- * We use obp's tl0 handlers. Sine kmdb installs itsdebug
- * hook in obp, if obp cannot handle any traps, such as
- * user enter an invalid address in kmdb, obp will call
- * kmdb's callback and the control goes back to kmdb.
- * For tl>0 traps, kernel's trap handlers are good at
- * handling these on sun4v.
- */
- if (tt >= T_TL1)
- brtgt = (uintptr_t)(kaif_tba_kernel + i);
- else
- brtgt = (uintptr_t)(kaif_tba_obp + i);
-#else /* !sun4v */
+ brtgt = (uintptr_t)(kaif_tba_kernel + i);
+#else
brtgt = (uintptr_t)(kaif_tba_obp + i);
-#endif /* sun4v */
-
+#endif
*hdlr++ = 0x03000000 | (brtgt >> 10); /* sethi brtgt, %g1 */
*hdlr++ = 0x81c06000 | (brtgt & 0x3ff); /* jmp %g1 + brtgt */
*hdlr++ = 0x01000000; /* nop */
diff --git a/usr/src/cmd/mdb/sparc/kmdb/kctl/kctl_isadep.c b/usr/src/cmd/mdb/sparc/kmdb/kctl/kctl_isadep.c
index 4c14d32067..d849441ac1 100644
--- a/usr/src/cmd/mdb/sparc/kmdb/kctl/kctl_isadep.c
+++ b/usr/src/cmd/mdb/sparc/kmdb/kctl/kctl_isadep.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -36,6 +35,11 @@
#include <sys/cpuvar.h>
#include <sys/kobj.h>
#include <sys/kobj_impl.h>
+#ifdef sun4v
+#include <sys/ldoms.h>
+#include <sys/promif_impl.h>
+#include <kmdb/kmdb_kctl.h>
+#endif
#include <kmdb/kctl/kctl.h>
@@ -229,8 +233,48 @@ kctl_auxv_init_isadep(kmdb_auxv_t *kav, void *romp)
kav->kav_ktrap_install = kctl_ktrap_install;
kav->kav_ktrap_restore = kctl_ktrap_restore;
+#ifdef sun4v
+ if (kctl.kctl_boot_loaded) {
+ /*
+ * When booting kmdb, kmdb starts before domaining is
+ * enabled and before the cif handler is changed to the
+ * kernel cif handler. So we start kmdb with using the
+ * OBP and we will change this when the cif handler is
+ * installed.
+ */
+ kav->kav_domaining = 0;
+ } else {
+ kctl_auxv_set_promif(kav);
+ }
+#endif
}
+#ifdef sun4v
+
+void
+kctl_auxv_set_promif(kmdb_auxv_t *kav)
+{
+ kav->kav_domaining = domaining_enabled;
+ kav->kav_promif_root = promif_stree_getroot();
+ kav->kav_promif_in = prom_stdin_ihandle();
+ kav->kav_promif_out = prom_stdout_ihandle();
+ kav->kav_promif_pin = prom_stdin_node();
+ kav->kav_promif_pout = prom_stdout_node();
+ kav->kav_promif_chosennode = prom_chosennode();
+ kav->kav_promif_optionsnode = prom_finddevice("/options");
+}
+
+void
+kctl_switch_promif(void)
+{
+ kmdb_auxv_t kav;
+
+ kctl_auxv_set_promif(&kav);
+ kmdb_init_promif(NULL, &kav);
+}
+
+#endif
+
/*ARGSUSED*/
void
kctl_auxv_fini_isadep(kmdb_auxv_t *auxv)
diff --git a/usr/src/cmd/mdb/sparc/v9/kmdb/kaif_handlers.s b/usr/src/cmd/mdb/sparc/v9/kmdb/kaif_handlers.s
index 1d7c9eaf8e..a90f7b2e4f 100644
--- a/usr/src/cmd/mdb/sparc/v9/kmdb/kaif_handlers.s
+++ b/usr/src/cmd/mdb/sparc/v9/kmdb/kaif_handlers.s
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -31,6 +31,20 @@
#include <sys/machtrap.h>
#include <sys/privregs.h>
#include <sys/mmu.h>
+#include <vm/mach_sfmmu.h>
+
+#if defined(sun4v) && !defined(lint)
+#include <sys/machparam.h>
+#endif
+
+#if defined(sun4v) && defined(KMDB_TRAPCOUNT)
+/*
+ * The sun4v implemenations of the fast miss handlers are larger than those
+ * of their sun4u kin. This is unfortunate because there is not enough space
+ * remaining in the respective trap table entries for this debug feature.
+ */
+#error "KMDB_TRAPCOUNT not supported on sun4v"
+#endif
/*
* This file contains the trap handlers that will be copied to kmdb's trap
@@ -50,12 +64,7 @@
#if defined(lint)
#include <kmdb/kaif.h>
-#endif /* lint */
-#if defined(lint)
-
-#ifdef sun4v
-#else /* sun4v */
void
kaif_hdlr_dmiss(void)
{
@@ -65,51 +74,149 @@ void
kaif_itlb_handler(void)
{
}
-#endif /* sun4v */
-#else /* lint */
+
+#else /* lint */
#ifdef sun4v
+
+#define GET_MMU_D_ADDR_CTX(daddr, ctx) \
+ MMU_FAULT_STATUS_AREA(ctx); \
+ ldx [ctx + MMFSA_D_ADDR], daddr; \
+ ldx [ctx + MMFSA_D_CTX], ctx
+
+#define GET_MMU_I_ADDR_CTX(iaddr, ctx) \
+ MMU_FAULT_STATUS_AREA(ctx); \
+ ldx [ctx + MMFSA_I_ADDR], iaddr; \
+ ldx [ctx + MMFSA_I_CTX], ctx
+
+/*
+ * KAIF_ITLB_STUFF
+ * derived from ITLB_STUFF in uts/sun4v/vm/mach_sfmmu.h
+ *
+ * Load ITLB entry
+ *
+ * In:
+ * tte = reg containing tte
+ * ouch = branch target label used if hcall fails (sun4v only)
+ * scr1, scr2, scr3, scr4 = scratch registers (must not be %o0-%o3)
+ */
+#define KAIF_ITLB_STUFF(tte, ouch, scr1, scr2, scr3, scr4) \
+ mov %o0, scr1; \
+ mov %o1, scr2; \
+ mov %o2, scr3; \
+ mov %o3, scr4; \
+ MMU_FAULT_STATUS_AREA(%o2); \
+ ldx [%o2 + MMFSA_I_ADDR], %o0; \
+ ldx [%o2 + MMFSA_I_CTX], %o1; \
+ srlx %o0, PAGESHIFT, %o0; \
+ sllx %o0, PAGESHIFT, %o0; \
+ mov tte, %o2; \
+ mov MAP_ITLB, %o3; \
+ ta MMU_MAP_ADDR; \
+ /* BEGIN CSTYLED */ \
+ brnz,a,pn %o0, ouch; \
+ nop; \
+ /* END CSTYLED */ \
+ mov scr1, %o0; \
+ mov scr2, %o1; \
+ mov scr3, %o2; \
+ mov scr4, %o3
+
+/*
+ * KAIF_DTLB_STUFF
+ * derived from DTLB_STUFF in uts/sun4v/vm/mach_sfmmu.h
+ *
+ * Load DTLB entry
+ *
+ * In:
+ * tte = reg containing tte
+ * ouch = branch target label used if hcall fails (sun4v only)
+ * scr1, scr2, scr3, scr4 = scratch registers (must not be %o0-%o3)
+ */
+#define KAIF_DTLB_STUFF(tte, ouch, scr1, scr2, scr3, scr4) \
+ mov %o0, scr1; \
+ mov %o1, scr2; \
+ mov %o2, scr3; \
+ mov %o3, scr4; \
+ MMU_FAULT_STATUS_AREA(%o2); \
+ ldx [%o2 + MMFSA_D_ADDR], %o0; \
+ ldx [%o2 + MMFSA_D_CTX], %o1; \
+ srlx %o0, PAGESHIFT, %o0; \
+ sllx %o0, PAGESHIFT, %o0; \
+ mov tte, %o2; \
+ mov MAP_DTLB, %o3; \
+ ta MMU_MAP_ADDR; \
+ /* BEGIN CSTYLED */ \
+ brnz,a,pn %o0, ouch; \
+ nop; \
+ /* END CSTYLED */ \
+ mov scr1, %o0; \
+ mov scr2, %o1; \
+ mov scr3, %o2; \
+ mov scr4, %o3
+
#else /* sun4v */
- .global kaif_hdlr_dmiss_patch
- .global kaif_hdlr_imiss_patch
+#define GET_MMU_D_ADDR_CTX(daddr, ctx) \
+ mov MMU_TAG_ACCESS, ctx; \
+ ldxa [ctx]ASI_DMMU, daddr; \
+ sllx daddr, TAGACC_CTX_LSHIFT, ctx; \
+ srlx ctx, TAGACC_CTX_LSHIFT, ctx
+
+#define GET_MMU_I_ADDR_CTX(iaddr, ctx) \
+ rdpr %tpc, iaddr; \
+ ldxa [%g0]ASI_IMMU, ctx; \
+ srlx ctx, TTARGET_CTX_SHIFT, ctx
+
+#define KAIF_DTLB_STUFF(tte, ouch, scr1, scr2, scr3, scr4) \
+ DTLB_STUFF(tte, scr1, scr2, scr3, scr4)
+
+#define KAIF_ITLB_STUFF(tte, ouch, scr1, scr2, scr3, scr4) \
+ ITLB_STUFF(tte, scr1, scr2, scr3, scr4)
+
+#endif /* sun4v */
+
+/*
+ * KAIF_CALL_KDI_VATOTTE
+ *
+ * Use kdi_vatotte to look up the tte. We don't bother stripping the
+ * context, as it won't change the tte we get.
+ *
+ * The two instruction at patch_lbl are modified during runtime
+ * by kaif to point to kdi_vatotte
+ *
+ * Clobbers all globals.
+ * Returns tte in %g1 if successful, otherwise 0 in %g1
+ * Leaves address of next instruction following this macro in scr1
+ */
+#define KAIF_CALL_KDI_VATOTTE(addr, ctx, patch_lbl, scr0, scr1) \
+ .global patch_lbl; \
+patch_lbl: \
+ sethi %hi(0), scr0; \
+ or scr0, %lo(0), scr0; \
+ jmpl scr0, scr1; \
+ add scr1, 8, scr1
+
- /*
- * This routine must be exactly 32 instructions long.
- */
ENTRY_NP(kaif_hdlr_dmiss)
- mov MMU_TAG_ACCESS, %g1
- ldxa [%g1]ASI_DMMU, %g1 /* %g1 = addr|ctx */
- sllx %g1, TAGACC_CTX_LSHIFT, %g2 /* strip addr */
- srlx %g2, TAGACC_CTX_LSHIFT, %g2 /* %g2 = ctx */
-
- /*
- * Use kdi_vatotte to look up the tte. We don't bother stripping the
- * context, as it won't change the tte we get.
- */
-kaif_hdlr_dmiss_patch:
- sethi %hi(0), %g3 /* set by kaif to kdi_vatotte */
- or %g3, %lo(0), %g3
- jmpl %g3, %g7 /* uses all regs, ret to %g7, tte or 0 in %g1 */
- add %g7, 8, %g7 /* adjust return */
+ GET_MMU_D_ADDR_CTX(%g1, %g2)
- brz %g1, 1f
+ KAIF_CALL_KDI_VATOTTE(%g1, %g2, kaif_hdlr_dmiss_patch, %g3, %g7)
+0: brz %g1, 1f
nop
/*
* kdi_vatotte gave us a TTE to use. Load it up and head back
* into the world, but first bump a counter.
*/
-#ifdef KMDB_TRAPCOUNT
- ldx [%g7 + 0x40], %g2 /* Trap counter. See top comment */
+
+#ifdef KMDB_TRAPCOUNT /* Trap counter. See top comment */
+ ldx [%g7 + .count-0b], %g2
add %g2, 1, %g2
- stx %g2, [%g7 + 0x40]
-#else
- nop
- nop
- nop
+ stx %g2, [%g7 + .count-0b]
#endif
- stxa %g1, [%g0]ASI_DTLB_IN
+
+ KAIF_DTLB_STUFF(%g1, 1f, %g2, %g3, %g4, %g5)
retry
1: /*
@@ -126,63 +233,47 @@ kaif_hdlr_dmiss_patch:
* find the TTE for the debugger without missing.
*/
-#ifdef KMDB_TRAPCOUNT
- mov MMU_TAG_ACCESS, %g1 /* Trap address "counter". */
- ldxa [%g1]ASI_DMMU, %g1
- stx %g1, [%g7 + 0x48]
-#else
- nop
- nop
- nop
+#ifdef KMDB_TRAPCOUNT /* Trap address "counter". */
+ GET_MMU_D_ADDR(%g2, %g3)
+ stx %g2, [%g7 + .daddr-0b]
+ stx %g1, [%g7 + .ecode-0b]
#endif
- mov PTSTATE_KERN_COMMON | PSTATE_AG, %g3
- wrpr %g3, %pstate
- sethi %hi(kaif_dtrap), %g4
- jmp %g4 + %lo(kaif_dtrap)
+ sethi %hi(kaif_dtrap), %g1
+ jmp %g1 + %lo(kaif_dtrap)
nop
- unimp 0
- unimp 0 /* counter goes here (base + 0x60) */
- unimp 0
- unimp 0 /* miss address goes here (base + 0x68) */
- unimp 0
- unimp 0
- unimp 0
- unimp 0
- unimp 0
+ /* NOTREACHED */
+
+#ifdef KMDB_TRAPCOUNT
+ .align 8
+.count: .xword 0 /* counter goes here */
+.daddr: .xword 0 /* miss address goes here */
+.ecode: .xword 0 /* sun4v: g1 contains err code */
+#endif
+
+ .align 32*4 /* force length to 32 instr. */
SET_SIZE(kaif_hdlr_dmiss)
- /*
- * This routine must be exactly 32 instructions long.
- */
- ENTRY_NP(kaif_hdlr_imiss)
- rdpr %tpc, %g1
- ldxa [%g0]ASI_IMMU, %g2
- srlx %g2, TTARGET_CTX_SHIFT, %g2
-kaif_hdlr_imiss_patch:
- sethi %hi(0), %g3 /* set by kaif to kdi_vatotte */
- or %g3, %lo(0), %g3
- jmpl %g3, %g7 /* uses all regs, ret to %g7, tte or 0 in %g1 */
- add %g7, 8, %g7 /* adjust return */
- brz %g1, 1f
+ ENTRY_NP(kaif_hdlr_imiss)
+ GET_MMU_I_ADDR_CTX(%g1, %g2)
+
+ KAIF_CALL_KDI_VATOTTE(%g1, %g2, kaif_hdlr_imiss_patch, %g3, %g7)
+0: brz %g1, 1f
nop
/*
* kdi_vatotte gave us a TTE to use. Load it up and head back
* into the world, but first bump a counter.
*/
-#ifdef KMDB_TRAPCOUNT
- ldx [%g7 + 0x3c], %g2 /* Trap counter. See top comment */
+#ifdef KMDB_TRAPCOUNT /* Trap counter. See top comment */
+ ldx [%g7 + .count-0b], %g2
add %g2, 1, %g2
- stx %g2, [%g7 + 0x3c]
-#else
- nop
- nop
- nop
+ stx %g2, [%g7 + .count-0b]
#endif
- stxa %g1, [%g0]ASI_ITLB_IN
+
+ KAIF_ITLB_STUFF(%g1, 1f, %g2, %g3, %g4, %g5)
retry
1: /*
@@ -197,42 +288,41 @@ kaif_hdlr_imiss_patch:
* We will only reach this point at TL=1, as kdi_vatotte will always
* find the TTE for the debugger without missing.
*/
- rdpr %pstate, %g1
- or %g0, PTSTATE_KERN_COMMON | PSTATE_AG, %g2
- set kaif_dtrap, %g3
- jmp %g3
- wrpr %g2, %pstate
- unimp 0
- unimp 0
- unimp 0 /* counter goes here */
- unimp 0
- unimp 0
- unimp 0
- unimp 0
- unimp 0
- unimp 0
- unimp 0
- unimp 0
- unimp 0
+
+ sethi %hi(kaif_dtrap), %g1
+ jmp %g1 + %lo(kaif_dtrap)
+ nop
+ /* NOTREACHED */
+
+#ifdef KMDB_TRAPCOUNT
+ .align 8
+.count: .xword 0
+#endif
+
+ .align 32*4 /* force length to 32 instr. */
SET_SIZE(kaif_hdlr_imiss)
-#endif /* sun4v */
+
+
ENTRY_NP(kaif_hdlr_generic)
-#ifdef KMDB_TRAPCOUNT
- rd %pc, %g3 /* Trap counter. See top comment */
- ld [%g3 + 0x1c], %g4
+#ifdef KMDB_TRAPCOUNT /* Trap counter. See top comment */
+0: rd %pc, %g3
+ ldx [%g3 + .count-0b], %g4
add %g4, 1, %g4
- st %g4, [%g3 + 0x1c]
-#else
- nop
- nop
- nop
+ stx %g4, [%g3 + .count-0b]
+#endif
+
+ sethi %hi(kaif_dtrap), %g1
+ jmp %g1 + %lo(kaif_dtrap)
nop
+ /* NOTREACHED */
+
+#ifdef KMDB_TRAPCOUNT
+ .align 8
+.count: .xword 0 /* counter goes here */
#endif
- sethi %hi(kaif_dtrap), %g3
- jmp %g3 + %lo(kaif_dtrap)
- rdpr %pstate, %g1
- unimp 0 /* counter goes here */
+
+ .align 32*4 /* force length to 32 instr. */
SET_SIZE(kaif_hdlr_generic)
-#endif
+#endif /* lint */
diff --git a/usr/src/cmd/mdb/sun4v/Makefile.kmdb b/usr/src/cmd/mdb/sun4v/Makefile.kmdb
index 43471fe808..d307d5f6f8 100644
--- a/usr/src/cmd/mdb/sun4v/Makefile.kmdb
+++ b/usr/src/cmd/mdb/sun4v/Makefile.kmdb
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -46,10 +45,19 @@ PROMSRCS += \
prom_printf.c \
prom_prop.c \
prom_putchar.c \
+ prom_reboot.c \
prom_sparc.c \
prom_stdin.c \
prom_stdout.c \
- prom_string.c
+ prom_string.c \
+ promif_emul.c \
+ promif_interp.c \
+ promif_io.c \
+ promif_mon.c \
+ promif_node.c \
+ promif_prop.c \
+ promif_reboot.c \
+ promif_stree.c
KMDBSRCS += \
kaif.c \
@@ -113,6 +121,10 @@ MAPFILE_SOURCES = \
$(COMPILE.c) $<
$(CTFCONVERT_O)
+%.o: ../../../../../uts/sun4v/promif/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
%.ln: ../../../../../psm/promif/ieee1275/common/%.c
$(LINT.c) -c $<
@@ -121,3 +133,6 @@ MAPFILE_SOURCES = \
%.ln: ../../../../../psm/promif/ieee1275/sun4u/%.c
$(LINT.c) -c $<
+
+%.ln: ../../../../../uts/sun4v/promif/%.c
+ $(LINT.c) -c $<
diff --git a/usr/src/cmd/mdb/sun4v/modules/Makefile b/usr/src/cmd/mdb/sun4v/modules/Makefile
index 77d76d05b0..957bcfcfbb 100644
--- a/usr/src/cmd/mdb/sun4v/modules/Makefile
+++ b/usr/src/cmd/mdb/sun4v/modules/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -19,11 +18,13 @@
#
# CDDL HEADER END
#
+
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
+#
-SUBDIRS = unix
+SUBDIRS = unix vdsk
include ../../Makefile.subdirs
diff --git a/usr/src/cmd/mdb/sun4v/modules/vdsk/Makefile b/usr/src/cmd/mdb/sun4v/modules/vdsk/Makefile
new file mode 100644
index 0000000000..4c5460e696
--- /dev/null
+++ b/usr/src/cmd/mdb/sun4v/modules/vdsk/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+SUBDIRS = v9
+include ../../../Makefile.subdirs
diff --git a/usr/src/cmd/mdb/sun4v/modules/vdsk/v9/Makefile b/usr/src/cmd/mdb/sun4v/modules/vdsk/v9/Makefile
new file mode 100644
index 0000000000..a449a6b174
--- /dev/null
+++ b/usr/src/cmd/mdb/sun4v/modules/vdsk/v9/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+MODULE = vdsk.so
+MDBTGT = kvm
+
+MODSRCS = vdsk.c
+
+include ../../../../../Makefile.cmd
+include ../../../../../Makefile.cmd.64
+include ../../../../sparc/Makefile.sparcv9
+include ../../../Makefile.sun4v
+include ../../../../Makefile.module
+
+MODSRCS_DIR = ../
+
+CPPFLAGS += -DMP -D_MACHDEP
+CPPFLAGS += -Dsun4v
+CPPFLAGS += -I$(SRC)/uts/sun4v
diff --git a/usr/src/cmd/mdb/sun4v/modules/vdsk/vdsk.c b/usr/src/cmd/mdb/sun4v/modules/vdsk/vdsk.c
new file mode 100644
index 0000000000..b43c3f9d95
--- /dev/null
+++ b/usr/src/cmd/mdb/sun4v/modules/vdsk/vdsk.c
@@ -0,0 +1,105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * This module provides debugging tools for the LDoms vDisk drivers
+ * (vds and vdc).
+ */
+
+#include <sys/mdb_modapi.h>
+
+#include <sys/vdsk_common.h>
+
+/*
+ */
+int
+vd_dring_entry_walk_init(mdb_walk_state_t *wsp)
+{
+ /* Must have a start addr. */
+ if (wsp->walk_addr == NULL) {
+ mdb_warn("Descriptor Ring base address required\n");
+
+ return (WALK_ERR);
+ }
+
+ return (WALK_NEXT);
+}
+
+
+/*
+ * Generic entry walker step routine.
+ */
+int
+vd_dring_entry_walk_step(mdb_walk_state_t *wsp)
+{
+ static int entry_count = 0;
+ int status;
+ vd_dring_entry_t dring_entry;
+
+ if (mdb_vread(&dring_entry, VD_DRING_ENTRY_SZ,
+ (uintptr_t)wsp->walk_addr) == -1) {
+ mdb_warn("failed to read vd_dring_entry_t at %p",
+ wsp->walk_addr);
+
+ return (WALK_ERR);
+ }
+
+ status = wsp->walk_callback(wsp->walk_addr, &dring_entry,
+ wsp->walk_cbdata);
+ wsp->walk_addr = (uintptr_t)(wsp->walk_addr + VD_DRING_ENTRY_SZ);
+
+ /* Check if we're at the last element */
+ if (++entry_count >= VD_DRING_LEN) {
+ /* reset counter for next call to this walker */
+ entry_count = 0;
+
+ return (WALK_DONE);
+ }
+
+ return (status);
+}
+
+/*
+ * MDB module linkage information:
+ */
+
+static const mdb_walker_t walkers[] = {
+ { "vd_dring_entry", "walk vDisk public Descriptor Ring entries",
+ vd_dring_entry_walk_init, vd_dring_entry_walk_step, NULL, NULL },
+ { NULL }
+};
+
+static const mdb_modinfo_t modinfo = {
+ MDB_API_VERSION, NULL, walkers
+};
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ return (&modinfo);
+}
diff --git a/usr/src/cmd/mdb/sun4v/v9/Makefile.kmdb b/usr/src/cmd/mdb/sun4v/v9/Makefile.kmdb
index 32fc72077b..3e45c49ee2 100644
--- a/usr/src/cmd/mdb/sun4v/v9/Makefile.kmdb
+++ b/usr/src/cmd/mdb/sun4v/v9/Makefile.kmdb
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#ident "%Z%%M% %I% %E% SMI"
@@ -35,6 +34,7 @@ PROMINCDIRS += $(SRC)/uts/sun4u
KMDBML += \
client_handler.s \
+ hcall.s \
kaif_handlers.s \
kaif_invoke.s \
kaif_resume.s \
diff --git a/usr/src/cmd/mdb/sun4v/v9/kmdb/Makefile b/usr/src/cmd/mdb/sun4v/v9/kmdb/Makefile
index c7c5450a95..fb865e9e03 100644
--- a/usr/src/cmd/mdb/sun4v/v9/kmdb/Makefile
+++ b/usr/src/cmd/mdb/sun4v/v9/kmdb/Makefile
@@ -18,6 +18,7 @@
#
# CDDL HEADER END
#
+
#
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
@@ -58,6 +59,9 @@ KMDB_FPTEST = \
# will be added for the trap table, and each handler installed by KMDB will use
# its padding to keep a trap count. See kaif_handlers.s.
#
+# NOTE: not currently supported by the sun4v fast miss handlers. See
+# ../../../sparc/v9/kmdb/kaif_handlers.s to verify before use.
+#
#TRAPCOUNT = -DKMDB_TRAPCOUNT
CPPFLAGS += -I../../../sparc/mdb -I.. -I$(SRC)/uts/sun4 -I$(SRC)/uts/sun4v
@@ -146,6 +150,9 @@ kmdb_context_off.h: ../../../sparc/kmdb/kmdb_context_off.in
$(COMPILE.c) $<
$(CTFCONVERT_O)
+%.o: ../../../../../uts/sun4v/ml/%.s
+ $(COMPILE.s) -o $@ $<
+
#
# Lint
#
@@ -189,6 +196,9 @@ kmdb_context_off.h: ../../../sparc/kmdb/kmdb_context_off.in
%.ln: $(SRC)/common/net/util/%.c
$(LINT.c) -c $<
+%.ln: ../../../../../uts/sun4v/ml/%.s
+ $(LINT.s) -c $<
+
#
# Installation targets
#
diff --git a/usr/src/cmd/picl/plugins/sun4v/mdesc/Makefile b/usr/src/cmd/picl/plugins/sun4v/mdesc/Makefile
index 45e9bd2d98..2fc2704617 100644
--- a/usr/src/cmd/picl/plugins/sun4v/mdesc/Makefile
+++ b/usr/src/cmd/picl/plugins/sun4v/mdesc/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -19,12 +18,13 @@
#
# CDDL HEADER END
#
+
#
-# ident "%Z%%M% %I% %E% SMI"
-#
-# Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# ident "%Z%%M% %I% %E% SMI"
+#
# cmd/picl/plugins/sun4v/mdesc/Makefile
#
LIBRARY= libmdescplugin.a
@@ -32,7 +32,7 @@ VERS= .1
OBJS_DIR= pics
-OBJECTS= mdescplugin.o init.o \
+OBJECTS= mdescplugin.o init.o dr.o \
cpu_prop_update.o disk_discovery.o \
mdesc_findname.o mdesc_findnodeprop.o \
mdesc_fini.o mdesc_getpropstr.o \
@@ -48,7 +48,7 @@ include $(SRC)/cmd/picl/plugins/Makefile.com
SRCS= $(OBJECTS:%.o=%.c)
-LINT_SRC= ./mdescplugin.c ./init.c \
+LINT_SRC= ./mdescplugin.c ./init.c ./dr.c \
./cpu_prop_update.c ./disk_discovery.c \
$(SRC)/common/mdesc/mdesc_findname.c \
$(SRC)/common/mdesc/mdesc_findnodeprop.c \
@@ -85,7 +85,7 @@ LDLIBS += -L$(SRC)/lib/libpicl/$(MACH) -L$(SRC)/lib/libpicltree/$(MACH)
LDLIBS += -L$(ROOT)/usr/lib/picl/plugins -L$(ROOT)/usr/lib/sparcv9
DYNFLAGS += -R$(DYNFLAGS_COM)
-LDLIBS += -lc -lpicltree -lrt -lpicldevtree -lcfgadm -lnvpair
+LDLIBS += -lc -lpicltree -ldevinfo -lrt -lpicldevtree -lcfgadm -lnvpair
LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN -v
diff --git a/usr/src/cmd/picl/plugins/sun4v/mdesc/cpu_prop_update.c b/usr/src/cmd/picl/plugins/sun4v/mdesc/cpu_prop_update.c
index c7e47d21d1..7e6428fa96 100644
--- a/usr/src/cmd/picl/plugins/sun4v/mdesc/cpu_prop_update.c
+++ b/usr/src/cmd/picl/plugins/sun4v/mdesc/cpu_prop_update.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -213,16 +212,16 @@ add_cpu_prop(picl_nodehdl_t node, void *args)
int x, num_nodes;
int ncpus, ncaches, ntlbs;
int status;
- int reg_prop[4], reg;
+ int reg_prop[SUN4V_CPU_REGSIZE], cpuid;
uint64_t int_value;
- status = ptree_get_propval_by_name(node, "reg", reg_prop,
+ status = ptree_get_propval_by_name(node, OBP_REG, reg_prop,
sizeof (reg_prop));
if (status != PICL_SUCCESS) {
return (PICL_WALK_TERMINATE);
}
- reg = reg_prop[0] & 0x3f;
+ cpuid = CFGHDL_TO_CPUID(reg_prop[0]);
/*
* Allocate space for our searches.
@@ -266,7 +265,7 @@ add_cpu_prop(picl_nodehdl_t node, void *args)
continue;
}
- if (int_value != reg)
+ if (int_value != cpuid)
continue;
add_md_prop(node, sizeof (int_value), "cpuid", &int_value,
diff --git a/usr/src/cmd/picl/plugins/sun4v/mdesc/dr.c b/usr/src/cmd/picl/plugins/sun4v/mdesc/dr.c
new file mode 100644
index 0000000000..5323a23264
--- /dev/null
+++ b/usr/src/cmd/picl/plugins/sun4v/mdesc/dr.c
@@ -0,0 +1,517 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include "mdescplugin.h"
+
+static di_prom_handle_t ph = DI_PROM_HANDLE_NIL;
+
+typedef struct cpu_lookup {
+ di_node_t di_node;
+ picl_nodehdl_t nodeh;
+ int result;
+} cpu_lookup_t;
+
+extern int add_cpu_prop(picl_nodehdl_t node, void *args);
+extern md_t *mdesc_devinit(void);
+
+/*
+ * This function is identical to the one in the picldevtree plugin.
+ * Unfortunately we can't just reuse that code.
+ */
+static int
+add_string_list_prop(picl_nodehdl_t nodeh, char *name, char *strlist,
+ unsigned int nrows)
+{
+ ptree_propinfo_t propinfo;
+ picl_prophdl_t proph;
+ picl_prophdl_t tblh;
+ int err;
+ unsigned int i;
+ unsigned int j;
+ picl_prophdl_t *proprow;
+ int len;
+
+#define NCOLS_IN_STRING_TABLE 1
+
+ err = ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_TABLE, PICL_READ, sizeof (picl_prophdl_t), name,
+ NULL, NULL);
+ if (err != PICL_SUCCESS)
+ return (err);
+
+ err = ptree_create_table(&tblh);
+ if (err != PICL_SUCCESS)
+ return (err);
+
+ err = ptree_create_and_add_prop(nodeh, &propinfo, &tblh, &proph);
+ if (err != PICL_SUCCESS)
+ return (err);
+
+ proprow = alloca(sizeof (picl_prophdl_t) * nrows);
+ if (proprow == NULL) {
+ (void) ptree_destroy_prop(proph);
+ return (PICL_FAILURE);
+ }
+
+ for (j = 0; j < nrows; ++j) {
+ len = strlen(strlist) + 1;
+ err = ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_CHARSTRING, PICL_READ, len, name,
+ NULL, NULL);
+ if (err != PICL_SUCCESS)
+ break;
+ err = ptree_create_prop(&propinfo, strlist, &proprow[j]);
+ if (err != PICL_SUCCESS)
+ break;
+ strlist += len;
+ err = ptree_add_row_to_table(tblh, NCOLS_IN_STRING_TABLE,
+ &proprow[j]);
+ if (err != PICL_SUCCESS)
+ break;
+ }
+
+ if (err != PICL_SUCCESS) {
+ for (i = 0; i < j; ++i)
+ (void) ptree_destroy_prop(proprow[i]);
+ (void) ptree_delete_prop(proph);
+ (void) ptree_destroy_prop(proph);
+ return (err);
+ }
+
+ return (PICL_SUCCESS);
+}
+
+/*
+ * This function is identical to the one in the picldevtree plugin.
+ * Unfortunately we can't just reuse that code.
+ */
+static void
+add_devinfo_props(picl_nodehdl_t nodeh, di_node_t di_node)
+{
+ int instance;
+ char *di_val;
+ di_prop_t di_prop;
+ int di_ptype;
+ ptree_propinfo_t propinfo;
+
+ instance = di_instance(di_node);
+ (void) ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_INT, PICL_READ, sizeof (instance), PICL_PROP_INSTANCE,
+ NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo, &instance, NULL);
+
+ di_val = di_bus_addr(di_node);
+ if (di_val) {
+ (void) ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_CHARSTRING, PICL_READ, strlen(di_val) + 1,
+ PICL_PROP_BUS_ADDR, NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo, di_val,
+ NULL);
+ }
+
+ di_val = di_binding_name(di_node);
+ if (di_val) {
+ (void) ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_CHARSTRING, PICL_READ, strlen(di_val) + 1,
+ PICL_PROP_BINDING_NAME, NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo, di_val,
+ NULL);
+ }
+
+ di_val = di_driver_name(di_node);
+ if (di_val) {
+ (void) ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_CHARSTRING, PICL_READ, strlen(di_val) + 1,
+ PICL_PROP_DRIVER_NAME, NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo, di_val,
+ NULL);
+ }
+
+ di_val = di_devfs_path(di_node);
+ if (di_val) {
+ (void) ptree_init_propinfo(&propinfo, PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_CHARSTRING, PICL_READ, strlen(di_val) + 1,
+ PICL_PROP_DEVFS_PATH, NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo, di_val,
+ NULL);
+ di_devfs_path_free(di_val);
+ }
+
+ for (di_prop = di_prop_next(di_node, DI_PROP_NIL);
+ di_prop != DI_PROP_NIL;
+ di_prop = di_prop_next(di_node, di_prop)) {
+
+ di_val = di_prop_name(di_prop);
+ di_ptype = di_prop_type(di_prop);
+ switch (di_ptype) {
+ case DI_PROP_TYPE_BOOLEAN:
+ (void) ptree_init_propinfo(&propinfo,
+ PTREE_PROPINFO_VERSION, PICL_PTYPE_VOID,
+ PICL_READ, (size_t)0, di_val, NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo,
+ NULL, NULL);
+ break;
+ case DI_PROP_TYPE_INT: {
+ int *idata;
+ int len;
+
+ len = di_prop_ints(di_prop, &idata);
+ if (len < 0)
+ /* Recieved error, so ignore prop */
+ break;
+
+ if (len == 1)
+ (void) ptree_init_propinfo(&propinfo,
+ PTREE_PROPINFO_VERSION, PICL_PTYPE_INT,
+ PICL_READ, len * sizeof (int), di_val,
+ NULL, NULL);
+ else
+ (void) ptree_init_propinfo(&propinfo,
+ PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_BYTEARRAY, PICL_READ,
+ len * sizeof (int), di_val,
+ NULL, NULL);
+
+ (void) ptree_create_and_add_prop(nodeh, &propinfo,
+ idata, NULL);
+ }
+ break;
+ case DI_PROP_TYPE_STRING: {
+ char *sdata;
+ int len;
+
+ len = di_prop_strings(di_prop, &sdata);
+ if (len < 0)
+ break;
+
+ if (len == 1) {
+ (void) ptree_init_propinfo(&propinfo,
+ PTREE_PROPINFO_VERSION,
+ PICL_PTYPE_CHARSTRING, PICL_READ,
+ strlen(sdata) + 1, di_val,
+ NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh,
+ &propinfo, sdata, NULL);
+ } else {
+ (void) add_string_list_prop(nodeh, di_val,
+ sdata, len);
+ }
+ }
+ break;
+ case DI_PROP_TYPE_BYTE: {
+ int len;
+ unsigned char *bdata;
+
+ len = di_prop_bytes(di_prop, &bdata);
+ if (len < 0)
+ break;
+ (void) ptree_init_propinfo(&propinfo,
+ PTREE_PROPINFO_VERSION, PICL_PTYPE_BYTEARRAY,
+ PICL_READ, len, di_val, NULL, NULL);
+ (void) ptree_create_and_add_prop(nodeh, &propinfo,
+ bdata, NULL);
+ }
+ break;
+ case DI_PROP_TYPE_UNKNOWN:
+ break;
+ case DI_PROP_TYPE_UNDEF_IT:
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/*
+ * Create a picl node of type cpu and fill it.
+ * properties are filled from both the device tree and the
+ * Machine description.
+ */
+static int
+construct_cpu_node(picl_nodehdl_t plath, di_node_t dn)
+{
+ int err;
+ char *nodename;
+ picl_nodehdl_t anodeh;
+
+ nodename = di_node_name(dn); /* PICL_PROP_NAME */
+
+ err = ptree_create_and_add_node(plath, nodename, PICL_CLASS_CPU,
+ &anodeh);
+ if (err != PICL_SUCCESS)
+ return (err);
+
+ add_devinfo_props(anodeh, dn);
+ (void) add_cpu_prop(anodeh, NULL);
+
+ return (err);
+}
+
+/*
+ * Given a devinfo node find its reg property.
+ */
+static int
+get_reg_prop(di_node_t dn, int **pdata)
+{
+ int dret = 0;
+
+ dret = di_prop_lookup_ints(DDI_DEV_T_ANY, dn, OBP_REG, pdata);
+ if (dret > 0)
+ return (dret);
+
+ if (!ph)
+ return (0);
+ dret = di_prom_prop_lookup_ints(ph, dn, OBP_REG, pdata);
+ return (dret < 0? 0 : dret);
+}
+
+/*
+ * Given a devinfo cpu node find its cpuid property.
+ */
+int
+get_cpuid(di_node_t di_node)
+{
+ int len;
+ int *idata;
+ int dcpuid = -1;
+
+ len = get_reg_prop(di_node, &idata);
+
+ if (len != SUN4V_CPU_REGSIZE)
+ return (dcpuid);
+ if (len == SUN4V_CPU_REGSIZE)
+ dcpuid = CFGHDL_TO_CPUID(idata[0]);
+
+ return (dcpuid);
+}
+
+int
+find_cpu(di_node_t node, int cpuid)
+{
+ int dcpuid;
+ di_node_t cnode;
+ char *nodename;
+
+ for (cnode = di_child_node(node); cnode != DI_NODE_NIL;
+ cnode = di_sibling_node(cnode)) {
+ nodename = di_node_name(cnode);
+ if (nodename == NULL)
+ continue;
+ if (strcmp(nodename, OBP_CPU) == 0) {
+ dcpuid = get_cpuid(cnode);
+ if (dcpuid == cpuid) {
+ return (1);
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Callback to the ptree walk function during remove_cpus.
+ * As a part of the args receives a picl nodeh, searches
+ * the device tree for a cpu whose cpuid matches the picl cpu node.
+ * Sets arg struct's result to 1 if it failed to match and terminates
+ * the walk.
+ */
+static int
+remove_cpu_candidate(picl_nodehdl_t nodeh, void *c_args)
+{
+ di_node_t di_node;
+ cpu_lookup_t *cpu_arg;
+ int err;
+ int pcpuid;
+ int reg_prop[SUN4V_CPU_REGSIZE];
+
+ if (c_args == NULL)
+ return (PICL_INVALIDARG);
+
+ cpu_arg = c_args;
+ di_node = cpu_arg->di_node;
+
+ err = ptree_get_propval_by_name(nodeh, OBP_REG, reg_prop,
+ sizeof (reg_prop));
+
+ if (err != PICL_SUCCESS) {
+ return (PICL_WALK_CONTINUE);
+ }
+
+ pcpuid = CFGHDL_TO_CPUID(reg_prop[0]);
+
+ if (!find_cpu(di_node, pcpuid)) {
+ cpu_arg->result = 1;
+ cpu_arg->nodeh = nodeh;
+ return (PICL_WALK_TERMINATE);
+ }
+
+ cpu_arg->result = 0;
+ return (PICL_WALK_CONTINUE);
+}
+
+/*
+ * Given the start node of the device tree.
+ * find all cpus in the picl tree that don't have
+ * device tree counterparts and remove them.
+ */
+static void
+remove_cpus(di_node_t di_start)
+{
+ int err;
+ picl_nodehdl_t plath;
+ cpu_lookup_t cpu_arg;
+
+ err = ptree_get_node_by_path(PLATFORM_PATH, &plath);
+ if (err != PICL_SUCCESS)
+ return;
+
+ do {
+ cpu_arg.di_node = di_start;
+ cpu_arg.nodeh = 0;
+ cpu_arg.result = 0;
+
+ if (ptree_walk_tree_by_class(plath,
+ PICL_CLASS_CPU, &cpu_arg, remove_cpu_candidate)
+ != PICL_SUCCESS)
+ return;
+
+ if (cpu_arg.result == 1) {
+ err = ptree_delete_node(cpu_arg.nodeh);
+ if (err == PICL_SUCCESS)
+ ptree_destroy_node(cpu_arg.nodeh);
+ }
+ } while (cpu_arg.result);
+}
+
+/*
+ * Callback to the ptree walk function during add_cpus.
+ * As a part of the args receives a cpu di_node, compares
+ * each picl cpu node's cpuid to the device tree node's cpuid.
+ * Sets arg struct's result to 1 on a match.
+ */
+static int
+cpu_exists(picl_nodehdl_t nodeh, void *c_args)
+{
+ di_node_t di_node;
+ cpu_lookup_t *cpu_arg;
+ int err;
+ int dcpuid, pcpuid;
+ int reg_prop[4];
+
+ if (c_args == NULL)
+ return (PICL_INVALIDARG);
+
+ cpu_arg = c_args;
+ di_node = cpu_arg->di_node;
+ dcpuid = get_cpuid(di_node);
+
+ err = ptree_get_propval_by_name(nodeh, OBP_REG, reg_prop,
+ sizeof (reg_prop));
+
+ if (err != PICL_SUCCESS)
+ return (PICL_WALK_CONTINUE);
+
+ pcpuid = CFGHDL_TO_CPUID(reg_prop[0]);
+
+ if (dcpuid == pcpuid) {
+ cpu_arg->result = 1;
+ return (PICL_WALK_TERMINATE);
+ }
+
+ cpu_arg->result = 0;
+ return (PICL_WALK_CONTINUE);
+}
+
+/*
+ * Given the root node of the device tree.
+ * compare it to the picl tree and add to it cpus
+ * that are new.
+ */
+static void
+add_cpus(di_node_t di_node)
+{
+ int err;
+ di_node_t cnode;
+ picl_nodehdl_t plath;
+ cpu_lookup_t cpu_arg;
+ char *nodename;
+
+ err = ptree_get_node_by_path(PLATFORM_PATH, &plath);
+ if (err != PICL_SUCCESS)
+ return;
+
+ for (cnode = di_child_node(di_node); cnode != DI_NODE_NIL;
+ cnode = di_sibling_node(cnode)) {
+ nodename = di_node_name(cnode);
+ if (nodename == NULL)
+ continue;
+ if (strcmp(nodename, OBP_CPU) == 0) {
+ cpu_arg.di_node = cnode;
+
+ if (ptree_walk_tree_by_class(plath,
+ PICL_CLASS_CPU, &cpu_arg, cpu_exists)
+ != PICL_SUCCESS)
+ return;
+
+ if (cpu_arg.result == 0)
+ /*
+ * Didn't find a matching cpu, add it.
+ */
+ (void) construct_cpu_node(plath,
+ cnode);
+ }
+ }
+}
+
+/*
+ * Handle DR events. Only supports cpu add and remove.
+ */
+int
+update_devices(char *dev, int op)
+{
+ di_node_t di_root;
+
+ if ((di_root = di_init("/", DINFOCPYALL)) == DI_NODE_NIL)
+ return (PICL_FAILURE);
+
+ if ((ph = di_prom_init()) == NULL)
+ return (PICL_FAILURE);
+
+ if (op == DEV_ADD) {
+ if (strcmp(dev, OBP_CPU) == 0)
+ add_cpus(di_root);
+ }
+
+ if (op == DEV_REMOVE) {
+ if (strcmp(dev, OBP_CPU) == 0)
+ remove_cpus(di_root);
+ }
+
+ di_fini(di_root);
+ di_prom_fini(ph);
+ return (PICL_SUCCESS);
+}
diff --git a/usr/src/cmd/picl/plugins/sun4v/mdesc/init.c b/usr/src/cmd/picl/plugins/sun4v/mdesc/init.c
index 8b6a7f2af3..d9e52a293f 100644
--- a/usr/src/cmd/picl/plugins/sun4v/mdesc/init.c
+++ b/usr/src/cmd/picl/plugins/sun4v/mdesc/init.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -43,17 +43,20 @@
#define SIZE 8192
static void mdesc_free(void *bufp, size_t size);
+uint8_t *md_bufp;
md_t *
mdesc_devinit(void)
{
int fh;
- uint8_t *bufp = NULL;
int res;
int size;
int offset;
md_t *mdp;
+ if (md_bufp != NULL)
+ return (NULL);
+
fh = open(MDESC_PATH, O_RDONLY, 0);
if (fh < 0) {
return (NULL);
@@ -62,8 +65,8 @@ mdesc_devinit(void)
size = SIZE; /* initial size */
offset = 0;
- bufp = malloc(size);
- if (NULL == bufp) {
+ md_bufp = malloc(size);
+ if (NULL == md_bufp) {
return (NULL);
}
@@ -76,18 +79,18 @@ mdesc_devinit(void)
while (len < SIZE) {
size += SIZE;
- bufp = realloc(bufp, size);
- if (NULL == bufp)
+ md_bufp = realloc(md_bufp, size);
+ if (NULL == md_bufp)
return (NULL);
len = size - offset;
}
do {
- res = read(fh, bufp+offset, len);
+ res = read(fh, md_bufp + offset, len);
} while ((res < 0) && (errno == EAGAIN));
if (res < 0) {
- free(bufp);
+ free(md_bufp);
return (NULL);
}
@@ -96,13 +99,13 @@ mdesc_devinit(void)
(void) close(fh);
- bufp = realloc(bufp, offset);
- if (NULL == bufp)
+ md_bufp = realloc(md_bufp, offset);
+ if (NULL == md_bufp)
return (NULL);
- mdp = md_init_intern((uint64_t *)bufp, malloc, mdesc_free);
+ mdp = md_init_intern((uint64_t *)md_bufp, malloc, mdesc_free);
if (NULL == mdp) {
- free(bufp);
+ free(md_bufp);
return (NULL);
}
@@ -113,5 +116,17 @@ mdesc_devinit(void)
void
mdesc_free(void *bufp, size_t size)
{
- free(bufp);
+ if (bufp)
+ free(bufp);
+}
+
+void
+mdesc_devfini(md_t *mdp)
+{
+ if (mdp)
+ (void) md_fini(mdp);
+
+ if (md_bufp)
+ free(md_bufp);
+ md_bufp = NULL;
}
diff --git a/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.c b/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.c
index 3af30d6678..1883993934 100644
--- a/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.c
+++ b/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -50,6 +50,8 @@ void mdescplugin_fini(void);
extern int add_cpu_prop(picl_nodehdl_t node, void *args);
extern int disk_discovery(void);
extern md_t *mdesc_devinit(void);
+extern void mdesc_devfini(md_t *mdp);
+extern int update_devices(char *dev, int op);
picld_plugin_reg_t mdescplugin_reg = {
PICLD_PLUGIN_VERSION_1,
@@ -91,6 +93,65 @@ find_disk(picl_nodehdl_t node, void *args)
}
/*
+ * DR event handler
+ * respond to the picl events:
+ * PICLEVENT_DR_AP_STATE_CHANGE
+ */
+static void
+dr_handler(const char *ename, const void *earg, size_t size, void *cookie)
+{
+ nvlist_t *nvlp = NULL;
+ char *dtype;
+ char *ap_id;
+ char *hint;
+
+
+ if (strcmp(ename, PICLEVENT_DR_AP_STATE_CHANGE) != 0) {
+ return;
+ }
+
+ if (nvlist_unpack((char *)earg, size, &nvlp, NULL)) {
+ return;
+ }
+
+ if (nvlist_lookup_string(nvlp, PICLEVENTARG_DATA_TYPE, &dtype)) {
+ nvlist_free(nvlp);
+ return;
+ }
+
+ if (strcmp(dtype, PICLEVENTARG_PICLEVENT_DATA) != 0) {
+ nvlist_free(nvlp);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvlp, PICLEVENTARG_AP_ID, &ap_id)) {
+ nvlist_free(nvlp);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvlp, PICLEVENTARG_HINT, &hint)) {
+ nvlist_free(nvlp);
+ return;
+ }
+
+ mdp = mdesc_devinit();
+ if (mdp == NULL) {
+ nvlist_free(nvlp);
+ return;
+ }
+
+ rootnode = md_root_node(mdp);
+
+ if (strcmp(hint, DR_HINT_INSERT) == 0)
+ (void) update_devices(ap_id, DEV_ADD);
+ else if (strcmp(hint, DR_HINT_REMOVE) == 0)
+ (void) update_devices(ap_id, DEV_REMOVE);
+
+ mdesc_devfini(mdp);
+ nvlist_free(nvlp);
+}
+
+/*
* Discovery event handler
* respond to the picl events:
* PICLEVENT_SYSEVENT_DEVICE_ADDED
@@ -170,8 +231,10 @@ mdescplugin_init(void)
dsc_handler, NULL);
(void) ptree_register_handler(PICLEVENT_SYSEVENT_DEVICE_REMOVED,
dsc_handler, NULL);
+ (void) ptree_register_handler(PICLEVENT_DR_AP_STATE_CHANGE,
+ dr_handler, NULL);
- (void) md_fini(mdp);
+ mdesc_devfini(mdp);
}
void
@@ -182,6 +245,8 @@ mdescplugin_fini(void)
dsc_handler, NULL);
(void) ptree_unregister_handler(PICLEVENT_SYSEVENT_DEVICE_REMOVED,
dsc_handler, NULL);
+ (void) ptree_unregister_handler(PICLEVENT_DR_AP_STATE_CHANGE,
+ dr_handler, NULL);
}
void
diff --git a/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.h b/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.h
index 073e9ff825..437fbecbbf 100644
--- a/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.h
+++ b/usr/src/cmd/picl/plugins/sun4v/mdesc/mdescplugin.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,6 +29,10 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#include <picl.h>
#include <picltree.h>
#include <picldefs.h>
@@ -50,6 +54,9 @@
#include <dirent.h>
#include <config_admin.h>
#include <sys/param.h>
+#include <libdevinfo.h>
+#include <sys/systeminfo.h>
+#include <sys/sysevent/dr.h>
#define MAXSTRLEN 256
#define ICACHE_FLAG 0x01
@@ -58,5 +65,13 @@
#define DISK_DISCOVERY_NAME "disk_discovery"
#define CONFIGURED "configured"
#define UNCONFIGURED "unconfigured"
+#define DEV_ADD 0
+#define DEV_REMOVE 1
+#define SUN4V_CPU_REGSIZE 4
+#define CFGHDL_TO_CPUID(x) (x & ~(0xful << 28))
+
+#ifdef __cplusplus
+}
+#endif
#endif /* _MDESCPLUGIN_H */
diff --git a/usr/src/cmd/vntsd/Makefile b/usr/src/cmd/vntsd/Makefile
new file mode 100644
index 0000000000..9cbd356516
--- /dev/null
+++ b/usr/src/cmd/vntsd/Makefile
@@ -0,0 +1,74 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG = vntsd
+SRCS = cmd.c common.c console.c listen.c queue.c read.c vntsd.c vntsdvcc.c \
+ write.c
+OBJS = $(SRCS:.c=.o)
+
+include ../Makefile.cmd
+
+POFILES = $(SRCS:.c=.po)
+POFILE = $(PROG)_msg.po
+
+MANIFEST = vntsd.xml
+SVCMETHOD = svc-vntsd
+
+CFLAGS += $(CCVERBOSE)
+
+LDLIBS += -lsocket -lnsl
+
+ROOTCMDDIR = $(ROOTLIB)/ldoms
+ROOTMANIFESTDIR = $(ROOTSVCPLATFORMSUN4V)
+$(ROOTMANIFEST) := FILEMODE = 0444
+
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all \
+ $(ROOTCMD) \
+ $(ROOTMANIFEST) \
+ $(ROOTSVCMETHOD)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(CAT) $(POFILES) > $@
+
+check: $(CHKMANIFEST)
+
+lint: lint_SRCS
+
+clean:
+ $(RM) $(OBJS)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vntsd/chars.h b/usr/src/cmd/vntsd/chars.h
new file mode 100644
index 0000000000..66abce66b7
--- /dev/null
+++ b/usr/src/cmd/vntsd/chars.h
@@ -0,0 +1,87 @@
+
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _CHARS_H
+#define _CHARS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CR 13
+#define LF 10
+
+
+/* telnet protocol command support */
+#define BEL 7 /* not support */
+#define BS 8 /* supported */
+#define HT 9 /* eoln */
+#define VT 11 /* not support */
+#define FF 12 /* not support */
+#define STOP 18
+#define START 19
+
+#define SE 240 /* end of subnegotiation params */
+#define NOP 241
+#define DM 242 /* Data Mark not support */
+#define BRK 243 /* termial support */
+#define IP 244 /* control-C */
+#define AO 245 /* abort output not support */
+#define AYT 246 /* Are you there */
+#define EC 247 /* Erase character - not support */
+#define EL 248 /* Erase line - not support */
+#define GA 249 /* Go ahead. */
+#define SB 250 /* Subnegotiation of the indicated option */
+#define WILL 251 /* will do */
+#define WONT 252 /* refuse */
+#define DO 253 /* request do */
+#define DONT 254 /* request do not do */
+#define IAC 255 /* command */
+
+
+
+/* telnet options */
+
+#define TEL_ECHO 1
+#define SUPRESS 3
+#define STATUS 5
+#define TM 6 /* timing mark - not supported */
+#define TERM_TYPE 24 /* Terminal type -not supported */
+#define WIN_SIZE 31 /* window size - not supported */
+#define TERM_SP 32 /* terminal speed - not supported */
+#define FC 33 /* remote flow control - not supported */
+#define LINEMODE 34 /* line mode */
+#define ENV 36 /* environment variables */
+
+#define VNTSD_DAEMON_CMD '~'
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CHARS_H */
diff --git a/usr/src/cmd/vntsd/cmd.c b/usr/src/cmd/vntsd/cmd.c
new file mode 100644
index 0000000000..faabebd613
--- /dev/null
+++ b/usr/src/cmd/vntsd/cmd.c
@@ -0,0 +1,486 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Vntsd handles two types of special commands, one is telnet
+ * commands and another is vntsd special commands.
+ * telnet commands supported are:
+ * WILL
+ * WONT
+ * DO
+ * DONT
+ * TEL_ECHO
+ * SUPRESS
+ * LINEMODE
+ * BRK
+ * AYT
+ * HT
+ *
+ * Vntsd special commands are:
+ * send break (~#)
+ * exit (~.)
+ * force write access (~w)
+ * cycle console down (~n)
+ * cycle console up (~p)
+ * help (~?)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <ctype.h>
+#include <sys/termio.h>
+#include <libintl.h>
+#include <syslog.h>
+#include "vntsd.h"
+#include "chars.h"
+
+char vntsd_eol[] = { CR, LF, 0};
+
+typedef int (*e_func_t)(vntsd_client_t *clientp);
+/* structure for daemon special cmd */
+typedef struct {
+ char e_char; /* char to match on */
+ char *e_help; /* help string */
+ e_func_t e_func; /* command */
+} esctable_t;
+
+/* genbrk() - send a break to vcc driver */
+static int
+genbrk(vntsd_client_t *clientp)
+{
+
+ vntsd_cons_t *consp;
+
+ assert(clientp);
+ assert(clientp->cons);
+
+ consp = clientp->cons;
+ D1(stderr, "t@%d genbrk fd=%d sockfd %d\n", thr_self(),
+ consp->vcc_fd, clientp->sockfd);
+
+ assert(consp->clientpq != NULL);
+ if (consp->clientpq->handle != clientp) {
+ /* reader */
+ return (vntsd_write_line(clientp,
+ gettext(VNTSD_NO_WRITE_ACCESS_MSG)));
+ }
+
+ /* writer */
+ if (ioctl(consp->vcc_fd, TCSBRK, NULL)) {
+ return (VNTSD_ERR_VCC_IOCTL);
+ }
+
+ return (VNTSD_STATUS_CONTINUE);
+}
+
+/*
+ * console_forward() - cycle client to the next console
+ * in the group queue.
+ */
+static int
+console_forward(void)
+{
+ return (VNTSD_STATUS_MOV_CONS_FORWARD);
+}
+
+/*
+ * console_backward() - cycle client to the previous
+ * console in the group queue.
+ */
+static int
+console_backward(void)
+{
+ return (VNTSD_STATUS_MOV_CONS_BACKWARD);
+}
+
+/* acquire_write() - acquire write access to a console. */
+static int
+acquire_write(vntsd_client_t *clientp)
+{
+ int rv;
+ int yes_no = 1;
+ vntsd_cons_t *consp;
+
+ assert(clientp);
+ consp = clientp->cons;
+ assert(consp);
+
+ if (consp->clientpq->handle == clientp) {
+ /* client is a writer */
+ if ((rv = vntsd_write_line(clientp,
+ gettext("You have write permission"))) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+
+ }
+ return (VNTSD_STATUS_CONTINUE);
+ }
+
+ /* message to client */
+ if ((rv = vntsd_write_client(clientp, vntsd_eol, VNTSD_EOL_LEN))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /*
+ * TRANSLATION_NOTE
+ * The following string should be formatted to fit on multiple lines
+ * assuming a line width of at most 78 characters. There must be no
+ * trailing newline.
+ */
+ if ((rv = vntsd_write_lines(clientp,
+ gettext("Warning: another user currently "
+ "has write permission\nto this console and forcibly removing "
+ "him/her will terminate\nany current write action and all work "
+ "will be lost."))) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /* get client yes no */
+ if ((rv = vntsd_write_client(clientp, vntsd_eol,
+ VNTSD_EOL_LEN)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_get_yes_no(clientp,
+ gettext("Would you like to continue?"),
+ &yes_no)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if (yes_no == B_FALSE) {
+ /* client change mind no need to acquire write access */
+ return (VNTSD_STATUS_CONTINUE);
+ }
+
+ return (VNTSD_STATUS_ACQUIRE_WRITER);
+}
+
+/* client_exit() - disconnect client from the console. */
+static int
+client_exit(void)
+{
+ return (VNTSD_STATUS_RESELECT_CONS);
+}
+
+static int daemon_cmd_help(vntsd_client_t *clientp);
+
+/* table for daemon commands */
+
+static esctable_t etable[] = {
+
+ /* send a break to vcc */
+ {'#', "send break", genbrk},
+
+ /* exit */
+ {'.', "exit from this console", (e_func_t)client_exit},
+
+ /* acquire write access */
+ {'w', "force write access", acquire_write},
+
+ /* connect to next console in queue */
+ {'n', "console down", (e_func_t)console_forward},
+
+ /* connect to previous console in queue */
+ {'p', "console up", (e_func_t)console_backward},
+
+ /* help must be next to last */
+ {'?', "_", daemon_cmd_help},
+
+ /* table terminator */
+ {0, 0, 0}
+};
+
+void
+vntsd_init_esctable_msgs(void)
+{
+ esctable_t *p;
+
+ for (p = etable; p->e_char != '\0'; p++) {
+ p->e_help = gettext(p->e_help);
+ }
+}
+
+/* daemon_cmd_help() - print help. */
+static int
+daemon_cmd_help(vntsd_client_t *clientp)
+{
+ esctable_t *p;
+ int rv;
+ char buf[VNTSD_LINE_LEN];
+
+ if ((rv = vntsd_write_client(clientp, vntsd_eol,
+ VNTSD_EOL_LEN)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /*
+ * TRANSLATION_NOTE
+ * VNTSD is the name of the VNTS daemon and should not be translated.
+ */
+ if ((rv = vntsd_write_line(clientp, gettext("VNTSD commands"))) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ for (p = etable; p->e_char; p++) {
+ (void) snprintf(buf, sizeof (buf),
+ "~%c --%s", p->e_char, p->e_help);
+
+ if ((rv = vntsd_write_line(clientp, buf)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+ }
+
+ return (VNTSD_STATUS_CONTINUE);
+}
+
+/* exit from daemon command */
+static int
+exit_daemon_cmd(vntsd_client_t *clientp, int rv)
+{
+ (void) mutex_lock(&clientp->lock);
+ clientp->status &= ~VNTSD_CLIENT_DISABLE_DAEMON_CMD;
+ (void) mutex_unlock(&clientp->lock);
+ return (rv);
+}
+
+/* vntsd_process_daemon_cmd() - special commands */
+int
+vntsd_process_daemon_cmd(vntsd_client_t *clientp, char c)
+{
+ esctable_t *p;
+ int rv;
+
+ if (c != VNTSD_DAEMON_CMD) {
+ /* not a daemon command */
+ return (VNTSD_SUCCESS);
+ }
+
+ if (clientp->status & VNTSD_CLIENT_DISABLE_DAEMON_CMD) {
+ return (VNTSD_STATUS_CONTINUE);
+ }
+
+ /* no reentry to process_daemon_cmd */
+ (void) mutex_lock(&clientp->lock);
+ clientp->status |= VNTSD_CLIENT_DISABLE_DAEMON_CMD;
+ (void) mutex_unlock(&clientp->lock);
+
+ D3(stderr, "t@%d process_daemon_cmd %d %d \n", thr_self(),
+ clientp->cons->vcc_fd, clientp->sockfd);
+
+ /* read in command */
+ if ((rv = vntsd_read_char(clientp, &c)) != VNTSD_SUCCESS) {
+ return (exit_daemon_cmd(clientp, rv));
+ }
+
+ for (p = etable; p->e_char; p++) {
+ if (p->e_char == c) {
+ /* found match */
+ assert(p->e_func);
+ rv = (*p->e_func)(clientp);
+ return (exit_daemon_cmd(clientp, rv));
+ }
+ }
+
+ /* no match, print out the help */
+ p--;
+ assert(p->e_char == '?');
+ rv = (*p->e_func)(clientp);
+
+ return (exit_daemon_cmd(clientp, rv));
+
+}
+
+/* vntsd_set_telnet_options() - change telnet client to character mode. */
+int
+vntsd_set_telnet_options(int fd)
+{
+ /* set client telnet options */
+ uint8_t buf[] = {IAC, DONT, LINEMODE, IAC, WILL, SUPRESS, IAC, WILL,
+ TEL_ECHO, IAC, DONT, TERM_TYPE, IAC, DONT, TERM_SP,
+ IAC, DONT, STATUS, IAC, DONT, FC, IAC, DONT, TM, IAC, DONT, ENV,
+ IAC, DONT, WIN_SIZE};
+
+ return (vntsd_write_fd(fd, (char *)buf, 30));
+}
+
+/* vntsd_telnet_cmd() process telnet commands */
+int
+vntsd_telnet_cmd(vntsd_client_t *clientp, char c)
+{
+ uint8_t buf[4];
+ char cmd;
+ int rv = VNTSD_STATUS_CONTINUE;
+
+ bzero(buf, 4);
+
+ if ((uint8_t)c != IAC) {
+ /* not telnet cmd */
+ return (VNTSD_SUCCESS);
+ }
+
+ if ((rv = vntsd_read_char(clientp, &cmd)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_read_char(clientp, &c)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+
+ switch ((uint8_t)cmd) {
+
+ case WILL:
+
+ switch ((uint8_t)c) {
+ case TEL_ECHO:
+ case SUPRESS:
+ case LINEMODE:
+ break;
+ default:
+ syslog(LOG_ERR, "not support telnet WILL %x\n", c);
+ break;
+ }
+ break;
+
+ case WONT:
+
+ switch ((uint8_t)c) {
+ case TEL_ECHO:
+ case SUPRESS:
+ case LINEMODE:
+ default:
+ syslog(LOG_ERR, "not support telnet WONT %x\n", c);
+ break;
+ }
+ break;
+
+ case DO:
+ case DONT:
+
+ buf[0] = IAC;
+ buf[1] = WILL;
+ buf[2] = c;
+ rv = vntsd_write_client(clientp, (char *)buf, 3);
+
+ break;
+
+ case BRK:
+
+ /* send break to vcc */
+ rv = genbrk(clientp);
+ break;
+
+ case IP:
+
+ break;
+
+ case AYT:
+
+ rv = vntsd_write_client(clientp, &c, 1);
+ break;
+
+ case HT:
+ return (VNTSD_STATUS_CONTINUE);
+
+ default:
+ syslog(LOG_ERR, "not support telnet ctrl %x\n", c);
+ break;
+ }
+
+ if (rv == VNTSD_SUCCESS) {
+ return (VNTSD_STATUS_CONTINUE);
+ } else {
+ return (rv);
+ }
+}
+
+
+/*
+ * vntsd_ctrl_cmd() - control keys
+ * read and write suspend are supported.
+ */
+int
+vntsd_ctrl_cmd(vntsd_client_t *clientp, char c)
+{
+ int cmd;
+
+ D3(stderr, "t@%d vntsd_ctrl_cmd%d %d\n", thr_self(),
+ clientp->cons->vcc_fd, clientp->sockfd);
+
+ if ((c != START) && (c != STOP)) {
+ /* not a supported control command */
+ return (VNTSD_SUCCESS);
+ }
+
+ if (c == START) {
+
+ D3(stderr, "t@%d client restart\n", thr_self());
+
+ /* send resume read */
+ cmd = 1;
+
+ if (ioctl(clientp->cons->vcc_fd, TCXONC, &cmd)) {
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+
+ /* send resume write */
+ cmd = 3;
+
+ if (ioctl(clientp->cons->vcc_fd, TCXONC, &cmd)) {
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+ }
+
+ if (c == STOP) {
+ D3(stderr, "t@%d client suspend\n", thr_self());
+
+ /* send suspend read */
+ cmd = 0;
+
+ if (ioctl(clientp->cons->vcc_fd, TCXONC, &cmd)) {
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+
+ /* send suspend write */
+ cmd = 2;
+
+ if (ioctl(clientp->cons->vcc_fd, TCXONC, &cmd)) {
+ perror("ioctl TCXONC");
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+ }
+
+ return (VNTSD_STATUS_CONTINUE);
+}
diff --git a/usr/src/cmd/vntsd/common.c b/usr/src/cmd/vntsd/common.c
new file mode 100644
index 0000000000..2cbad73309
--- /dev/null
+++ b/usr/src/cmd/vntsd/common.c
@@ -0,0 +1,654 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * supporting modules.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/sem.h>
+#include <sys/poll.h>
+#include <wait.h>
+#include <time.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <signal.h>
+#include <ctype.h>
+#include <langinfo.h>
+#include <libintl.h>
+#include <syslog.h>
+#include "vntsd.h"
+#include "chars.h"
+
+/* vntsd_write_line() - write a line to TCP client */
+int
+vntsd_write_line(vntsd_client_t *clientp, char *line)
+{
+ int rv;
+
+ rv = vntsd_write_client(clientp, line, strlen(line));
+ if (rv == VNTSD_SUCCESS) {
+ rv = vntsd_write_client(clientp, vntsd_eol, VNTSD_EOL_LEN);
+ }
+
+ return (rv);
+}
+
+/* vntsd_write_lines() write one or more lines to client. */
+int
+vntsd_write_lines(vntsd_client_t *clientp, char *lines)
+{
+ char *buf;
+ char *line;
+ char *endofline;
+
+ buf = strdup(lines);
+ if (buf == NULL) {
+ return (VNTSD_ERR_NO_MEM);
+ }
+
+ line = buf;
+
+ while ((line != NULL) && (*line != '\0')) {
+
+ endofline = strchr(line, '\n');
+ if (endofline != NULL) {
+ *endofline = '\0';
+ }
+
+ (void) vntsd_write_line(clientp, line);
+
+ if (endofline != NULL)
+ line = endofline + 1;
+ else
+ line = NULL;
+ }
+
+ free(buf);
+ return (VNTSD_SUCCESS);
+}
+
+/* vntsd_get_yes_no() - read in a "y" or "n" */
+int
+vntsd_get_yes_no(vntsd_client_t *clientp, char *msg, int *yes_no)
+{
+ char c;
+ char yesno[8];
+ int rv;
+
+ /* create [y/n] prompt */
+ (void) snprintf(yesno, sizeof (yesno), "[%c/%c] ",
+ *nl_langinfo(YESSTR), *nl_langinfo(NOSTR));
+
+ for (; ; ) {
+ if ((rv = vntsd_write_client(clientp, msg, strlen(msg)))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_write_client(clientp, yesno, strlen(yesno))) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_read_data(clientp, &c))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /* echo */
+ if ((rv = vntsd_write_client(clientp, &c, 1)) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_write_client(clientp, vntsd_eol,
+ VNTSD_EOL_LEN)) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ c = tolower(c);
+
+ if (c == *nl_langinfo(YESSTR)) {
+ *yes_no = B_TRUE;
+ return (VNTSD_SUCCESS);
+ }
+
+ if (c == *nl_langinfo(NOSTR)) {
+ *yes_no = B_FALSE;
+ return (VNTSD_SUCCESS);
+ }
+
+ if ((rv = vntsd_write_line(clientp,
+ gettext("Invalid response. Try again.")))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+ }
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+/* vntsd_open_vcc() - open a vcc port */
+int
+vntsd_open_vcc(char *dev_name, uint_t cons_no)
+{
+ int drvfd;
+ int sz;
+ char *path;
+ sz = strlen(VCC_DEVICE_PATH) + strlen(dev_name)+1;
+
+ path = calloc(sz, 1);
+
+ if (path == NULL) {
+ return (-1);
+ }
+
+ (void) snprintf(path, sz-1, VCC_DEVICE_PATH, dev_name);
+
+ for (; ; ) {
+ drvfd = open(path, O_RDWR);
+
+ if ((drvfd < 0) && (errno == EAGAIN)) {
+ if (vntsd_vcc_ioctl(VCC_FORCE_CLOSE, cons_no, &cons_no)
+ != VNTSD_SUCCESS) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+
+ if (drvfd < 0) {
+ D1(stderr, "t@%d open_vcc@%s exit\n", thr_self(), dev_name);
+ free(path);
+ return (-1);
+ }
+
+ free(path);
+ return (drvfd);
+}
+
+/* vntsd_cons_by_consno() - match a console structure to cons no */
+boolean_t
+vntsd_cons_by_consno(vntsd_cons_t *consp, int *cons_id)
+{
+ if (consp->status & VNTSD_CONS_DELETED) {
+ return (B_FALSE);
+ }
+ return (consp->cons_no == *cons_id);
+}
+
+/* vntsd_write_client() write to telnet client */
+int
+vntsd_write_client(vntsd_client_t *client, char *buffer, size_t sz)
+{
+ int rv;
+
+
+ /* write to client */
+ rv = vntsd_write_fd(client->sockfd, buffer, sz);
+
+ /* client has output, reset timer */
+ vntsd_reset_timer(client->cons_tid);
+
+ return (rv);
+}
+
+/* vntsd_write_fd() write to tcp socket file descriptor */
+int
+vntsd_write_fd(int fd, void *buf, size_t sz)
+{
+ int n;
+
+ while (sz > 0) {
+ n = write(fd, buf, sz);
+ if (n < 0) {
+ if (errno == EINTR) {
+ return (VNTSD_STATUS_INTR);
+ }
+
+ return (VNTSD_STATUS_CLIENT_QUIT);
+ }
+
+ if (n == 0) {
+ return (VNTSD_STATUS_CLIENT_QUIT);
+ }
+
+ buf = (caddr_t)buf + n;
+ sz -= n;
+ }
+ return (VNTSD_SUCCESS);
+
+}
+
+/*
+ * vntsd_read_char() - read a char from TCP Clienti. Returns:
+ * VNTSD_SUCCESS, VNTSD_STATUS_CLIENT_QUIT or VNTSD_STATUS_INTR
+ */
+int
+vntsd_read_char(vntsd_client_t *clientp, char *c)
+{
+ int n;
+ vntsd_timeout_t tmo;
+ int rv;
+
+ tmo.tid = thr_self();
+ tmo.minutes = 0;
+ tmo.clientp = clientp;
+
+ /* attach to timer */
+ if ((rv = vntsd_attach_timer(&tmo)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ n = read(clientp->sockfd, c, 1);
+
+ /* detach from timer */
+ if ((rv = vntsd_detach_timer(&tmo)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if (n == 1) {
+ return (VNTSD_SUCCESS);
+ }
+
+ if (n == 0) {
+ return (VNTSD_STATUS_CLIENT_QUIT);
+ }
+
+ /*
+ * read error or wake up by signal, either console is being removed or
+ * timeout occurs.
+ */
+ if (errno == EINTR) {
+ return (VNTSD_STATUS_INTR);
+ }
+
+ /* any other error, we close client */
+ return (VNTSD_STATUS_CLIENT_QUIT);
+}
+
+/*
+ * vntsd_read_data() - handle special commands
+ * such as telnet, daemon and ctrl cmds. Returns:
+ * from vntsd_read_char:
+ * VNTSD_STATUS_CLIENT_QUIT
+ * VNTSD_STATUS_INTR
+ * from vnts_process_daemon_cmd:
+ * VNTSD_STATUS_RESELECT_CONS
+ * VNTSD_STATUS_MOV_CONS_FORWARD
+ * VNTSD_STATUS_MOV_CONS_BACKWARD
+ * VNTSD_STATUS_ACQURE_WRITER
+ * VNTSD_STATUS_CONTINUE
+ * from vntsd_telnet_cmd
+ * VNTSD_STATUS_CONTINUE
+ */
+int
+vntsd_read_data(vntsd_client_t *clientp, char *c)
+{
+ int rv;
+
+ for (; ; ) {
+ if ((rv = vntsd_read_char(clientp, c)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /* daemon cmd? */
+ rv = vntsd_process_daemon_cmd(clientp, *c);
+
+ if (rv == VNTSD_SUCCESS) {
+ /* telnet cmd? */
+ rv = vntsd_telnet_cmd(clientp, *c);
+ }
+
+ if (rv == VNTSD_STATUS_CONTINUE) {
+ continue;
+ }
+
+ return (rv);
+ }
+
+ /*NOTREACHED*/
+ return (0);
+}
+/* vntsd_read_line() - read a line from TCP client */
+int
+vntsd_read_line(vntsd_client_t *clientp, char *buf, int *in_sz)
+{
+ char c;
+ int rv;
+ int out_sz = 0;
+
+
+ for (; ; ) {
+
+ if ((rv = vntsd_read_data(clientp, &c)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if (c == BS) {
+ /* back */
+ if ((rv = vntsd_write_client(clientp, &c, 1)) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ c = ' ';
+ if ((rv = vntsd_write_client(clientp, &c, 1)) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ buf--;
+ out_sz--;
+ continue;
+ }
+ /* echo */
+ if ((rv = vntsd_write_client(clientp, &c, 1)) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ *buf++ = c;
+ out_sz++;
+
+ if (c == CR) {
+ /* end of line */
+ *in_sz = out_sz;
+ return (VNTSD_SUCCESS);
+ }
+
+ if (out_sz == *in_sz) {
+ return (VNTSD_SUCCESS);
+ }
+ }
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+/* free a client */
+void
+vntsd_free_client(vntsd_client_t *clientp)
+{
+
+ if (clientp->sockfd != -1) {
+ (void) close(clientp->sockfd);
+ }
+
+ (void) mutex_destroy(&clientp->lock);
+
+ free(clientp);
+}
+
+
+/* check if a vcc console port still ok */
+boolean_t
+vntsd_vcc_cons_alive(vntsd_cons_t *consp)
+{
+ vcc_console_t vcc_cons;
+ int rv;
+
+ assert(consp);
+ assert(consp->group);
+
+ /* construct current configuration */
+ (void) strncpy(vcc_cons.domain_name, consp->domain_name, MAXPATHLEN);
+ (void) strncpy(vcc_cons.group_name, consp->group->group_name,
+ MAXPATHLEN);
+ vcc_cons.tcp_port = consp->group->tcp_port;
+ vcc_cons.cons_no = consp->cons_no;
+
+ /* call vcc to verify */
+ rv = vntsd_vcc_ioctl(VCC_CONS_STATUS, consp->cons_no, &vcc_cons);
+ if (rv != VNTSD_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ if (vcc_cons.cons_no == -1) {
+ /* port is gone */
+ return (B_FALSE);
+ }
+
+ /* port is ok */
+ return (B_TRUE);
+
+}
+
+/* add to total if a console is alive */
+static boolean_t
+total_cons(vntsd_cons_t *consp, int *num_cons)
+{
+ int rv;
+
+ assert(consp->group);
+ rv = vntsd_vcc_err(consp);
+ if (rv == VNTSD_STATUS_CONTINUE) {
+ (*num_cons)++;
+ }
+ return (B_FALSE);
+}
+
+
+/* total alive consoles in a group */
+int
+vntsd_chk_group_total_cons(vntsd_group_t *groupp)
+{
+ uint_t num_cons = 0;
+
+ (void) vntsd_que_find(groupp->conspq, (compare_func_t)total_cons,
+ &num_cons);
+ return (num_cons);
+}
+
+/* vntsd_log() log function for errors */
+void
+vntsd_log(vntsd_status_t status, char *msg)
+{
+ char *status_msg = NULL;
+ int critical = 0;
+
+ switch (status) {
+
+ case VNTSD_SUCCESS:
+ status_msg = "STATUS_OK";
+ break;
+
+ case VNTSD_STATUS_CONTINUE:
+ status_msg = "CONTINUE";
+ break;
+
+ case VNTSD_STATUS_EXIT_SIG:
+ critical = 1;
+ status_msg = "KILL SIGNAL RECV";
+ break;
+
+ case VNTSD_STATUS_SIG:
+ status_msg = "SIG RECV";
+ break;
+
+ case VNTSD_STATUS_NO_HOST_NAME:
+ status_msg = "Warining NO HOST NAME";
+ break;
+
+ case VNTSD_STATUS_CLIENT_QUIT:
+ status_msg = "CLIENT CLOSED GROUP CONNECTION";
+ break;
+
+ case VNTSD_STATUS_RESELECT_CONS:
+ status_msg = "CLIENT RESELECTS CONSOLE";
+ break;
+
+ case VNTSD_STATUS_VCC_IO_ERR:
+ status_msg = "CONSOLE WAS DELETED";
+ break;
+
+ case VNTSD_STATUS_MOV_CONS_FORWARD:
+ status_msg = "MOVE CONSOLE FORWARD";
+ break;
+
+ case VNTSD_STATUS_MOV_CONS_BACKWARD:
+ status_msg = "MOVE CONSOLE BACKWARD";
+ break;
+
+ case VNTSD_STATUS_ACQUIRE_WRITER:
+ status_msg = "FORCE CONSOLE WRITE";
+ break;
+
+ case VNTSD_STATUS_INTR:
+ status_msg = "RECV SIGNAL";
+ break;
+
+ case VNTSD_STATUS_DISCONN_CONS:
+ status_msg = "DELETING CONSOLE";
+ break;
+
+ case VNTSD_STATUS_NO_CONS:
+ status_msg = "GROUP HAS NO CONSOLE";
+ break;
+
+ case VNTSD_ERR_NO_MEM:
+ critical = 1;
+ status_msg = "NO MEMORY";
+ break;
+
+ case VNTSD_ERR_NO_DRV:
+ critical = 1;
+ status_msg = "NO VCC DRIVER";
+ break;
+
+ case VNTSD_ERR_WRITE_CLIENT:
+ status_msg = "WRITE CLIENT ERR";
+ break;
+
+ case VNTSD_ERR_EL_NOT_FOUND:
+ critical = 1;
+ status_msg = "ELEMENT_NOT_FOUND";
+ break;
+
+ case VNTSD_ERR_VCC_CTRL_DATA:
+ critical = 1;
+ status_msg = "VCC CTRL DATA ERROR";
+ break;
+
+ case VNTSD_ERR_VCC_POLL:
+ critical = 1;
+ status_msg = "VCC POLL ERROR";
+ break;
+
+ case VNTSD_ERR_VCC_IOCTL:
+ critical = 1;
+ status_msg = "VCC IOCTL ERROR";
+ break;
+
+ case VNTSD_ERR_VCC_GRP_NAME:
+ critical = 1;
+ status_msg = "VCC GROUP NAME ERROR";
+ break;
+
+ case VNTSD_ERR_CREATE_LISTEN_THR:
+ critical = 1;
+ status_msg = "FAIL TO CREATE LISTEN THREAD";
+ break;
+
+ case VNTSD_ERR_CREATE_WR_THR:
+ critical = 1;
+ status_msg = "FAIL TO CREATE WRITE THREAD";
+ break;
+
+ case VNTSD_ERR_ADD_CONS_FAILED:
+ critical = 1;
+ status_msg = "FAIL TO ADD A CONSOLE";
+ break;
+
+ case VNTSD_ERR_LISTEN_SOCKET:
+ critical = 1;
+ status_msg = "LISTEN SOCKET ERROR";
+ break;
+
+ case VNTSD_ERR_LISTEN_OPTS:
+ critical = 1;
+ status_msg = "SET SOCKET OPTIONS ERROR";
+ break;
+
+ case VNTSD_ERR_LISTEN_BIND:
+ critical = 1;
+ status_msg = "BIND SOCKET ERROR";
+ break;
+
+ case VNTSD_STATUS_ACCEPT_ERR:
+ critical = 1;
+ status_msg = "LISTEN ACCEPT ERROR";
+ break;
+
+ case VNTSD_ERR_CREATE_CONS_THR:
+ critical = 1;
+ status_msg = "CREATE CONSOLE THREAD ERROR ";
+ break;
+
+ case VNTSD_ERR_SIG:
+ critical = 1;
+ status_msg = "RECV UNKNOWN SIG";
+ break;
+
+ case VNTSD_ERR_UNKNOWN_CMD:
+ critical = 1;
+ status_msg = "RECV UNKNOWN COMMAND";
+ break;
+
+ case VNTSD_ERR_CLIENT_TIMEOUT:
+ status_msg = "CLOSE CLIENT BECAUSE TIMEOUT";
+ break;
+ default:
+ status_msg = "Unknown status recv";
+ break;
+ }
+
+
+ if (critical) {
+ syslog(LOG_ERR, "%s: thread[%d] %s\n", status_msg,
+ thr_self(), msg);
+ }
+#ifdef DEBUG
+ DERR(stderr, "%s: thread[%d] %s\n", status_msg,
+ thr_self(), msg);
+ syslog(LOG_ERR, "%s: thread[%d] %s\n", status_msg, thr_self(), msg);
+#endif
+}
diff --git a/usr/src/cmd/vntsd/console.c b/usr/src/cmd/vntsd/console.c
new file mode 100644
index 0000000000..4b7c145e0e
--- /dev/null
+++ b/usr/src/cmd/vntsd/console.c
@@ -0,0 +1,721 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Listen thread creates a console thread whenever there is a tcp client
+ * made a conection to its port. In the console thread, if there are
+ * multiple consoles in the group, client will be asked for a console selection.
+ * a write thread for a console is created when first client connects to a
+ * selected console and console thread becomes read thread for the client.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <synch.h>
+#include <signal.h>
+#include <assert.h>
+#include <ctype.h>
+#include <syslog.h>
+#include <libintl.h>
+#include <netdb.h>
+#include "vntsd.h"
+#include "chars.h"
+
+/* display domain names in the group */
+static boolean_t
+display_domain_name(vntsd_cons_t *consp, int *fd)
+{
+ char buf[VNTSD_LINE_LEN];
+ char *status;
+
+
+ if (consp->clientpq != NULL) {
+ status = gettext("connected");
+ } else if (consp->status & VNTSD_CONS_DELETED) {
+ status = gettext("removing...");
+ } else {
+ status = gettext("online");
+ }
+
+ (void) snprintf(buf, sizeof (buf), "%-20d%-30s%-25s%s",
+ consp->cons_no, consp->domain_name, status, vntsd_eol);
+
+ return (vntsd_write_fd(*fd, buf, strlen(buf)) != VNTSD_SUCCESS);
+}
+
+/* output connected message to tcp client */
+static int
+write_connect_msg(vntsd_client_t *clientp, char *group_name,
+ char *domain_name)
+{
+
+ int rv = VNTSD_SUCCESS;
+ char buf[VNTSD_LINE_LEN];
+
+ if ((rv = vntsd_write_client(clientp, vntsd_eol, VNTSD_EOL_LEN)) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ (void) snprintf(buf, sizeof (buf),
+ gettext("Connecting to console \"%s\" in group \"%s\" ...."),
+ domain_name, group_name);
+
+ if ((rv = vntsd_write_line(clientp, buf)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_write_line(clientp,
+ gettext("Press ~? for control options .."))) !=
+ VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ return (VNTSD_SUCCESS);
+}
+
+static int
+create_write_thread(vntsd_cons_t *consp)
+{
+
+ assert(consp);
+
+ /* create write thread for the console */
+ (void) mutex_lock(&consp->lock);
+ if (thr_create(NULL, 0, (thr_func_t)vntsd_write_thread,
+ (void *)consp, NULL, &consp->wr_tid)) {
+
+ DERR(stderr, "t@%d create_rd_wr_thread@%d: "
+ "create write thread failed\n",
+ thr_self(), consp->cons_no);
+ (void) close(consp->vcc_fd);
+ consp->vcc_fd = -1;
+ (void) mutex_unlock(&consp->lock);
+
+ return (VNTSD_ERR_CREATE_WR_THR);
+ }
+ (void) mutex_unlock(&consp->lock);
+ return (VNTSD_SUCCESS);
+}
+
+/* Display all domain consoles in a group. */
+static int
+list_all_domains(vntsd_group_t *groupp, vntsd_client_t *clientp)
+{
+ char vntsd_line[VNTSD_LINE_LEN];
+ int rv = VNTSD_SUCCESS;
+
+ if ((rv = vntsd_write_client(clientp, vntsd_eol, VNTSD_EOL_LEN))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /*
+ * TRANSLATION_NOTE
+ * The following three strings of the form "DOMAIN .." are table
+ * headers and should be all uppercase.
+ */
+ (void) snprintf(vntsd_line, sizeof (vntsd_line),
+ "%-20s%-30s%-25s",
+ gettext("DOMAIN ID"), gettext("DOMAIN NAME"),
+ gettext("DOMAIN STATE"));
+
+ if ((rv = vntsd_write_line(clientp, vntsd_line)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ (void) mutex_lock(&groupp->lock);
+
+ if (vntsd_que_find(groupp->conspq, (compare_func_t)display_domain_name,
+ &(clientp->sockfd)) != NULL) {
+ rv = VNTSD_ERR_WRITE_CLIENT;
+ }
+
+ (void) mutex_unlock(&groupp->lock);
+
+ return (rv);
+}
+
+/* display help */
+static int
+display_help(vntsd_client_t *clientp)
+{
+ int rv = VNTSD_SUCCESS;
+ char *bufp;
+
+ if ((rv = vntsd_write_client(clientp, vntsd_eol, VNTSD_EOL_LEN))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /*
+ * TRANSLATION_NOTE
+ * The following three strings of the form ". -- ..." are help
+ * messages for single character commands. Do not translate the
+ * character before the --.
+ */
+ bufp = gettext("h -- this help)");
+
+ if ((rv = vntsd_write_line(clientp, bufp)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ bufp = gettext("l -- list of consoles");
+
+ if ((rv = vntsd_write_line(clientp, bufp)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ bufp = gettext("q -- quit");
+
+ if ((rv = vntsd_write_line(clientp, bufp)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /*
+ * TRANSLATION_NOTE
+ * In the following string, "id" is a short mnemonic for
+ * "identifier" and both occurrences should be translated.
+ */
+
+ bufp = gettext("[c[c ]]{id} -- connect to console of domain {id}");
+
+ if ((rv = vntsd_write_line(clientp, bufp)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ return (VNTSD_SUCCESS);
+}
+
+/* select a console to connect */
+static int
+select_cons(vntsd_group_t *groupp, int num_cons, vntsd_cons_t **consp,
+ vntsd_client_t *clientp, char c)
+{
+ int cons_no = -2;
+ int n;
+ int i;
+ char buf[VNTSD_LINE_LEN];
+ int rv;
+
+
+
+ (void) mutex_lock(&groupp->lock);
+ if (groupp->num_cons == 0) {
+ (void) mutex_unlock(&groupp->lock);
+ /* no console in this group */
+ return (VNTSD_STATUS_NO_CONS);
+ }
+ (void) mutex_unlock(&groupp->lock);
+
+ if (num_cons == 1) {
+ /* by pass selecting console */
+ *consp = (vntsd_cons_t *)(groupp->conspq->handle);
+ return (VNTSD_SUCCESS);
+ }
+
+
+ if (isdigit(c)) {
+ /* {id} input */
+ cons_no = c - '0';
+ } else if (c == 'c') {
+ /* c{id} or c {id} input */
+ cons_no = -1;
+ } else if (!isspace(c)) {
+ return (VNTSD_ERR_INVALID_INPUT);
+ }
+
+ /* get client selections */
+ n = VNTSD_LINE_LEN;
+
+ if ((rv = vntsd_read_line(clientp, buf, &n)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /* parse command */
+ for (i = 0; i < n; i++) {
+ if (cons_no == -1) {
+ /* c{id} */
+ cons_no = atoi(buf + i);
+ break;
+ }
+
+ if (isspace(buf[i]) && cons_no == -2) {
+ /* skip space */
+ continue;
+ }
+
+ if (buf[i] == 'c') {
+ /* c{id} or c {id} */
+ cons_no = -1;
+ } else if (buf[i] == CR) {
+ break;
+ } else {
+ return (VNTSD_ERR_INVALID_INPUT);
+ }
+ }
+
+ if (cons_no < 0) {
+ return (VNTSD_ERR_INVALID_INPUT);
+ }
+
+ /* get selected console */
+ (void) mutex_lock(&groupp->lock);
+
+ *consp = (vntsd_cons_t *)vntsd_que_find(groupp->conspq,
+ (compare_func_t)vntsd_cons_by_consno, &cons_no);
+
+ if (*consp == NULL) {
+ /* during console selection, the console has been deleted */
+ (void) mutex_unlock(&groupp->lock);
+
+ return (VNTSD_ERR_INVALID_INPUT);
+ }
+ if ((*consp)->status & VNTSD_CONS_DELETED) {
+ return (VNTSD_ERR_INVALID_INPUT);
+ }
+
+ (void) mutex_unlock(&groupp->lock);
+
+ return (VNTSD_SUCCESS);
+}
+
+/* compare if there is a match console in the gorup */
+static boolean_t
+find_cons_in_group(vntsd_cons_t *consp_in_group, vntsd_cons_t *consp)
+{
+ if (consp_in_group == consp) {
+ return (B_TRUE);
+ } else {
+ return (B_FALSE);
+ }
+}
+
+/* connect a client to a console */
+static int
+connect_cons(vntsd_cons_t *consp, vntsd_client_t *clientp)
+{
+ int rv, rv1;
+ vntsd_group_t *groupp;
+
+ assert(consp);
+ groupp = consp->group;
+ assert(groupp);
+ assert(clientp);
+
+ (void) mutex_lock(&groupp->lock);
+
+ /* check if console is valid */
+ consp = vntsd_que_find(groupp->conspq,
+ (compare_func_t)find_cons_in_group, consp);
+
+ if (consp == NULL) {
+ (void) mutex_unlock(&groupp->lock);
+ return (VNTSD_STATUS_NO_CONS);
+ }
+ if (consp->status & VNTSD_CONS_DELETED) {
+ (void) mutex_unlock(&groupp->lock);
+ return (VNTSD_STATUS_NO_CONS);
+ }
+
+ (void) mutex_lock(&consp->lock);
+ (void) mutex_lock(&clientp->lock);
+
+
+ clientp->cons = consp;
+
+ /* enable daemon cmd */
+ clientp->status &= ~VNTSD_CLIENT_DISABLE_DAEMON_CMD;
+
+ if (consp->clientpq == NULL) {
+ /* first connect to console - a writer */
+ assert(consp->vcc_fd == -1);
+ /* open vcc */
+ consp->vcc_fd = vntsd_open_vcc(consp->dev_name, consp->cons_no);
+ if (consp->vcc_fd < 0) {
+ (void) mutex_unlock(&clientp->lock);
+ (void) mutex_unlock(&consp->lock);
+ (void) mutex_unlock(&groupp->lock);
+ assert(consp->group);
+ return (vntsd_vcc_err(consp));
+ }
+ }
+
+ (void) mutex_unlock(&clientp->lock);
+
+ /*
+ * move the client from group's no console selected queue
+ * to cons queue
+ */
+
+ rv = vntsd_que_rm(&groupp->no_cons_clientpq, clientp);
+ assert(rv == VNTSD_SUCCESS);
+
+ rv = vntsd_que_append(&consp->clientpq, clientp);
+ (void) mutex_unlock(&groupp->lock);
+
+ if (rv != VNTSD_SUCCESS) {
+ if (consp->clientpq->handle == clientp) {
+ /* writer */
+ (void) close(consp->vcc_fd);
+ consp->vcc_fd = -1;
+ }
+
+ (void) mutex_unlock(&consp->lock);
+ return (rv);
+ }
+
+ (void) mutex_unlock(&consp->lock);
+
+ if (consp->clientpq->handle == clientp) {
+ /* create a write thread */
+ rv = create_write_thread(consp);
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+ }
+
+ /* write connecting message */
+ if ((rv = write_connect_msg(clientp, consp->group->group_name,
+ consp->domain_name)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /* process input from client */
+ rv = vntsd_read(clientp);
+
+ /* client disconnected from the console */
+ (void) mutex_lock(&groupp->lock);
+
+ /* remove client from console queue */
+ (void) mutex_lock(&consp->lock);
+ rv1 = vntsd_que_rm(&consp->clientpq, clientp);
+ assert(rv1 == VNTSD_SUCCESS);
+
+ /* append client to group's no console selected queue */
+ rv1 = vntsd_que_append(&groupp->no_cons_clientpq, clientp);
+ (void) mutex_unlock(&groupp->lock);
+
+ if (consp->clientpq == NULL) {
+ /* clean up console since there is no client connected to it */
+ assert(consp->vcc_fd != -1);
+
+ /* close vcc port */
+ (void) close(consp->vcc_fd);
+ consp->vcc_fd = -1;
+
+ /* force write thread to exit */
+ assert(consp->wr_tid != (thread_t)-1);
+ (void) thr_kill(consp->wr_tid, SIGUSR1);
+ (void) mutex_unlock(&consp->lock);
+ (void) thr_join(consp->wr_tid, NULL, NULL);
+ (void) mutex_lock(&consp->lock);
+ }
+
+ if (consp->status & VNTSD_CONS_SIG_WAIT) {
+ /* console is waiting for client to disconnect */
+ (void) cond_signal(&consp->cvp);
+ }
+
+ (void) mutex_unlock(&consp->lock);
+
+ return (rv1 == VNTSD_SUCCESS ? rv : rv1);
+
+}
+
+/* read command line input */
+static int
+read_cmd(vntsd_client_t *clientp, char *prompt, char *cmd)
+{
+ int rv;
+
+ /* disable daemon special command */
+ (void) mutex_lock(&clientp->lock);
+ clientp->status |= VNTSD_CLIENT_DISABLE_DAEMON_CMD;
+ (void) mutex_unlock(&clientp->lock);
+
+ if ((rv = vntsd_write_client(clientp, vntsd_eol, VNTSD_EOL_LEN))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_write_client(clientp, prompt, strlen(prompt)))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ if ((rv = vntsd_read_data(clientp, cmd)) != VNTSD_SUCCESS) {
+ return (rv);
+ }
+ if (*cmd == BS) {
+ return (VNTSD_SUCCESS);
+ }
+
+ rv = vntsd_write_client(clientp, cmd, 1);
+
+ *cmd = tolower(*cmd);
+
+ return (rv);
+}
+
+/* reset client for selecting a console in the group */
+static void
+client_init(vntsd_client_t *clientp)
+{
+ (void) mutex_lock(&clientp->lock);
+ clientp->cons = NULL;
+ clientp->status = 0;
+ (void) mutex_unlock(&clientp->lock);
+}
+
+/* clean up client and exit the thread */
+static void
+client_fini(vntsd_group_t *groupp, vntsd_client_t *clientp)
+{
+
+ assert(groupp);
+ assert(clientp);
+
+ /* disconnct client from tcp port */
+ assert(clientp->sockfd != -1);
+ (void) close(clientp->sockfd);
+
+ (void) mutex_lock(&groupp->lock);
+ (void) vntsd_que_rm(&groupp->no_cons_clientpq, clientp);
+
+ if ((groupp->no_cons_clientpq == NULL) &&
+ (groupp->status & VNTSD_GROUP_SIG_WAIT)) {
+ /* group is waiting to be deleted */
+ groupp->status &= ~VNTSD_GROUP_SIG_WAIT;
+ (void) cond_signal(&groupp->cvp);
+ }
+ (void) mutex_unlock(&groupp->lock);
+
+ (void) mutex_destroy(&clientp->lock);
+ free(clientp);
+
+ thr_exit(0);
+}
+
+/* check client's status. exit if client quits or fatal errors */
+static void
+console_chk_status(vntsd_group_t *groupp, vntsd_client_t *clientp, int status)
+{
+ char err_msg[VNTSD_LINE_LEN];
+
+ D1(stderr, "t@%d console_chk_status() status=%d "
+ "client status=%x num consoles=%d \n",
+ thr_self(), status, clientp->status, groupp->num_cons);
+
+ (void) snprintf(err_msg, VNTSD_LINE_LEN, "console_chk_status client%d"
+ " num_cos=%d", clientp->sockfd, groupp->num_cons);
+
+ if (groupp->num_cons == 0) {
+ /* no more console in the group */
+ client_fini(groupp, clientp);
+ }
+
+ if (status == VNTSD_STATUS_INTR) {
+ /* reason for signal? */
+ status = vntsd_cons_chk_intr(clientp);
+ }
+
+ switch (status) {
+
+ case VNTSD_STATUS_CLIENT_QUIT:
+ client_fini(groupp, clientp);
+ return;
+
+ case VNTSD_STATUS_RESELECT_CONS:
+ assert(clientp->cons);
+ if ((groupp->num_cons == 1) &&
+ (groupp->conspq->handle == clientp->cons)) {
+ /* no other selection available */
+ client_fini(groupp, clientp);
+ } else {
+ client_init(clientp);
+ }
+ return;
+
+ case VNTSD_STATUS_VCC_IO_ERR:
+ if ((clientp->status & VNTSD_CLIENT_CONS_DELETED) == 0) {
+ /* check if console was deleted */
+ status = vntsd_vcc_err(clientp->cons);
+ }
+
+ if (status != VNTSD_STATUS_CONTINUE) {
+ /* console was deleted */
+ if (groupp->num_cons == 1) {
+ client_fini(groupp, clientp);
+ }
+ }
+
+ /* console is ok */
+ client_init(clientp);
+ return;
+
+ case VNTSD_STATUS_MOV_CONS_FORWARD:
+ case VNTSD_STATUS_MOV_CONS_BACKWARD:
+ if (groupp->num_cons == 1) {
+ /* same console */
+ return;
+ }
+
+ /* get selected console */
+ (void) mutex_lock(&(clientp->cons->group->lock));
+ clientp->cons = vntsd_que_pos(clientp->cons->group->conspq,
+ clientp->cons,
+ (status == VNTSD_STATUS_MOV_CONS_FORWARD)?(1):(-1));
+ (void) mutex_unlock(&(clientp->cons->group->lock));
+ return;
+
+ case VNTSD_SUCCESS:
+ case VNTSD_STATUS_CONTINUE:
+ case VNTSD_STATUS_NO_CONS:
+ client_init(clientp);
+ return;
+
+ case VNTSD_ERR_INVALID_INPUT:
+ return;
+
+ default:
+ /* fatal error */
+ vntsd_log(status, err_msg);
+ client_fini(groupp, clientp);
+ return;
+ }
+}
+
+/* console thread */
+void *
+vntsd_console_thread(vntsd_thr_arg_t *argp)
+{
+ vntsd_group_t *groupp;
+ vntsd_cons_t *consp;
+ vntsd_client_t *clientp;
+
+ char buf[MAXHOSTNAMELEN];
+ char prompt[72];
+ char cmd;
+ int rv = VNTSD_SUCCESS;
+ int num_cons;
+
+
+ groupp = (vntsd_group_t *)argp->handle;
+ clientp = (vntsd_client_t *)argp->arg;
+
+ assert(groupp);
+ assert(clientp);
+
+ /* check if group is removed */
+
+ D1(stderr, "t@%d get_client_sel@%lld:client@%d\n", thr_self(),
+ groupp->tcp_port, clientp->sockfd);
+
+ bzero(buf, MAXHOSTNAMELEN);
+
+ /* host name */
+ if (gethostname(buf, MAXHOSTNAMELEN)) {
+ vntsd_log(VNTSD_STATUS_NO_HOST_NAME, "vntsd_console_thread()");
+ (void) snprintf(buf, sizeof (buf), "unkown host");
+ }
+
+ if (snprintf(prompt, sizeof (prompt),
+ "%s-vnts-%s: h,l,{id},c{id},c {id},q:",
+ buf, groupp->group_name) >= sizeof (prompt)) {
+ /* long prompt doesn't fit, use short one */
+ (void) snprintf(prompt, sizeof (prompt),
+ "vnts: h,l,{id},c{id},c {id}, q:");
+ }
+
+
+ for (;;) {
+ cmd = ' ';
+ D1(stderr, "t@%d console_thread()@%lld:client@%d\n", thr_self(),
+ groupp->tcp_port, clientp->sockfd);
+
+ num_cons = vntsd_chk_group_total_cons(groupp);
+
+ if ((num_cons > 1) && (clientp->cons == NULL)) {
+ /* console to connect to */
+ rv = read_cmd(clientp, prompt, &cmd);
+ /* check error and may exit */
+ console_chk_status(groupp, clientp, rv);
+ }
+
+ switch (cmd) {
+
+ case 'l':
+
+ /* list domain names */
+ rv = list_all_domains(groupp, clientp);
+ break;
+
+
+ case 'q':
+
+ rv = VNTSD_STATUS_CLIENT_QUIT;
+ break;
+
+ case 'h':
+ rv = display_help(clientp);
+ break;
+
+ default:
+ /* select console */
+ if (clientp->cons == NULL) {
+ rv = select_cons(groupp, num_cons,
+ &consp, clientp, cmd);
+ if (rv == VNTSD_ERR_INVALID_INPUT) {
+ rv = display_help(clientp);
+ break;
+ }
+ } else {
+ consp = clientp->cons;
+ }
+ assert(consp);
+
+ /* connect to console */
+ rv = connect_cons(consp, clientp);
+ D1(stderr, "t@%d console_thread()"
+ "connect_cons returns %d\n",
+ thr_self(), rv);
+ break;
+
+ }
+ /* check error and may exit */
+ console_chk_status(groupp, clientp, rv);
+ }
+
+ /*NOTREACHED*/
+ return (NULL);
+}
diff --git a/usr/src/cmd/vntsd/listen.c b/usr/src/cmd/vntsd/listen.c
new file mode 100644
index 0000000000..358e2665aa
--- /dev/null
+++ b/usr/src/cmd/vntsd/listen.c
@@ -0,0 +1,285 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Each group has a listen thread. It is created at the time
+ * of a group creation and destroyed when a group does not have
+ * any console associated with it.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <assert.h>
+#include <signal.h>
+#include <ctype.h>
+#include <syslog.h>
+#include "vntsd.h"
+
+/*
+ * check the state of listen thread. exit if there is an fatal error
+ * or the group is removed.
+ */
+static void
+listen_chk_status(vntsd_group_t *groupp, int status)
+{
+ char err_msg[VNTSD_LINE_LEN];
+
+
+ D1(stderr, "t@%d listen_chk_status() status=%d group=%s "
+ "tcp=%lld group status = %x\n", thr_self(), status,
+ groupp->group_name, groupp->tcp_port, groupp->status);
+
+ (void) snprintf(err_msg, sizeof (err_msg),
+ "Group:%s TCP port %lld status %x",
+ groupp->group_name, groupp->tcp_port, groupp->status);
+
+
+ switch (status) {
+
+ case VNTSD_SUCCESS:
+ return;
+
+ case VNTSD_STATUS_INTR:
+ assert(groupp->status & VNTSD_GROUP_SIG_WAIT);
+ /* close listen socket */
+ (void) mutex_lock(&groupp->lock);
+ (void) close(groupp->sockfd);
+ groupp->sockfd = -1;
+
+ /* let group know */
+ groupp->status &= ~VNTSD_GROUP_SIG_WAIT;
+ (void) cond_signal(&groupp->cvp);
+
+ (void) mutex_unlock(&groupp->lock);
+ /* exit thread */
+ thr_exit(0);
+ break;
+
+ case VNTSD_STATUS_ACCEPT_ERR:
+ return;
+
+ case VNTSD_STATUS_NO_CONS:
+ default:
+ /* fatal, exit thread */
+
+ (void) mutex_lock(&groupp->lock);
+ (void) close(groupp->sockfd);
+ groupp->sockfd = -1;
+ (void) mutex_unlock(&groupp->lock);
+ vntsd_log(status, err_msg);
+ vntsd_clean_group(groupp);
+
+ thr_exit(0);
+ break;
+ }
+}
+
+/* allocate and initialize listening socket. */
+static int
+open_socket(int port_no, int *sockfd)
+{
+
+ struct sockaddr_in addr;
+ int on;
+
+
+ /* allocate a socket */
+ *sockfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (*sockfd < 0) {
+ if (errno == EINTR) {
+ return (VNTSD_STATUS_INTR);
+ }
+ return (VNTSD_ERR_LISTEN_SOCKET);
+ }
+
+#ifdef DEBUG
+ /* set reuse local socket address */
+ on = 1;
+ if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof (on))) {
+ return (VNTSD_ERR_LISTEN_OPTS);
+ }
+#endif
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = (vntsd_ip_addr()).s_addr;
+ addr.sin_port = htons(port_no);
+
+ /* bind socket */
+ if (bind(*sockfd, (struct sockaddr *)&addr, sizeof (addr)) < 0) {
+ if (errno == EINTR) {
+ return (VNTSD_STATUS_INTR);
+ }
+ return (VNTSD_ERR_LISTEN_BIND);
+
+ }
+
+ if (listen(*sockfd, VNTSD_MAX_SOCKETS) == -1) {
+ if (errno == EINTR) {
+ return (VNTSD_STATUS_INTR);
+ }
+ return (VNTSD_ERR_LISTEN_BIND);
+ }
+
+ D1(stderr, "t@%d open_socket() sockfd=%d\n", thr_self(), *sockfd);
+ return (VNTSD_SUCCESS);
+}
+
+/* ceate console selection thread */
+static int
+create_console_thread(vntsd_group_t *groupp, int sockfd)
+{
+ vntsd_client_t *clientp;
+ vntsd_thr_arg_t arg;
+ int rv;
+
+
+ assert(groupp);
+ D1(stderr, "t@%d create_console_thread@%lld:client@%d\n", thr_self(),
+ groupp->tcp_port, sockfd);
+
+ /* allocate a new client */
+ clientp = (vntsd_client_t *)malloc(sizeof (vntsd_client_t));
+ if (clientp == NULL) {
+ return (VNTSD_ERR_NO_MEM);
+ }
+
+ /* initialize the client */
+ bzero(clientp, sizeof (vntsd_client_t));
+
+ clientp->sockfd = sockfd;
+ clientp->cons_tid = (thread_t)-1;
+
+ (void) mutex_init(&clientp->lock, USYNC_THREAD|LOCK_ERRORCHECK, NULL);
+
+ /* append client to group */
+ (void) mutex_lock(&groupp->lock);
+
+ if ((rv = vntsd_que_append(&groupp->no_cons_clientpq, clientp))
+ != VNTSD_SUCCESS) {
+ (void) mutex_unlock(&groupp->lock);
+ vntsd_free_client(clientp);
+ return (rv);
+ }
+
+ (void) mutex_unlock(&groupp->lock);
+
+ (void) mutex_lock(&clientp->lock);
+
+ /* parameters for console thread */
+ bzero(&arg, sizeof (arg));
+
+ arg.handle = groupp;
+ arg.arg = clientp;
+
+ /* create console selection thread */
+ if (thr_create(NULL, 0, (thr_func_t)vntsd_console_thread,
+ &arg, THR_DETACHED, &clientp->cons_tid)) {
+
+ (void) mutex_unlock(&clientp->lock);
+ (void) mutex_lock(&groupp->lock);
+ (void) vntsd_que_rm(&groupp->no_cons_clientpq, clientp);
+ (void) mutex_unlock(&groupp->lock);
+ vntsd_free_client(clientp);
+
+ return (VNTSD_ERR_CREATE_CONS_THR);
+ }
+
+ (void) mutex_unlock(&clientp->lock);
+
+ return (VNTSD_SUCCESS);
+}
+
+/* listen thread */
+void *
+vntsd_listen_thread(vntsd_group_t *groupp)
+{
+
+ int newsockfd;
+ size_t clilen;
+ struct sockaddr_in cli_addr;
+ int rv;
+ int num_cons;
+
+ assert(groupp);
+
+ D1(stderr, "t@%d listen@%lld\n", thr_self(), groupp->tcp_port);
+
+
+ /* initialize listen socket */
+ (void) mutex_lock(&groupp->lock);
+ rv = open_socket(groupp->tcp_port, &groupp->sockfd);
+ (void) mutex_unlock(&groupp->lock);
+ listen_chk_status(groupp, rv);
+
+ for (; ; ) {
+
+ clilen = sizeof (cli_addr);
+
+ /* listen to the socket */
+ newsockfd = accept(groupp->sockfd, (struct sockaddr *)&cli_addr,
+ &clilen);
+
+ D1(stderr, "t@%d listen_thread() connected sockfd=%d\n",
+ thr_self(), newsockfd);
+
+ if (newsockfd <= 0) {
+
+ if (errno == EINTR) {
+ listen_chk_status(groupp, VNTSD_STATUS_INTR);
+ } else {
+ listen_chk_status(groupp,
+ VNTSD_STATUS_ACCEPT_ERR);
+ }
+ continue;
+ }
+ num_cons = vntsd_chk_group_total_cons(groupp);
+ if (num_cons == 0) {
+ (void) close(newsockfd);
+ listen_chk_status(groupp, VNTSD_STATUS_NO_CONS);
+ }
+
+ /* a connection is established */
+ rv = vntsd_set_telnet_options(newsockfd);
+ if (rv != VNTSD_SUCCESS) {
+ (void) close(newsockfd);
+ listen_chk_status(groupp, rv);
+ }
+ rv = create_console_thread(groupp, newsockfd);
+ if (rv != VNTSD_SUCCESS) {
+ (void) close(newsockfd);
+ listen_chk_status(groupp, rv);
+ }
+ }
+
+ /*NOTREACHED*/
+ return (NULL);
+}
diff --git a/usr/src/cmd/vntsd/queue.c b/usr/src/cmd/vntsd/queue.c
new file mode 100644
index 0000000000..4d50428198
--- /dev/null
+++ b/usr/src/cmd/vntsd/queue.c
@@ -0,0 +1,288 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * utility for vntsd queue handling
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/sem.h>
+#include <wait.h>
+#include <time.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <signal.h>
+#include "vntsd.h"
+
+/* alloc_que_el() allocates a queue element */
+static vntsd_que_t *
+alloc_que_el(void *handle)
+{
+ vntsd_que_t *el;
+
+ /* allocate a queue element */
+ el = (vntsd_que_t *)malloc(sizeof (vntsd_que_t));
+ if (el == NULL) {
+ return (NULL);
+ }
+
+
+ el->nextp = NULL;
+ el->prevp = NULL;
+ el->handle = handle;
+
+ return (el);
+}
+
+/* vntsd_que_append() appends a element to a queue */
+int
+vntsd_que_append(vntsd_que_t **que_hd, void *handle)
+{
+ vntsd_que_t *p;
+ vntsd_que_t *el;
+
+ assert(que_hd);
+ assert(handle);
+
+ /* allocate a queue element */
+ el = alloc_que_el(handle);
+
+ if (el == NULL) {
+ return (VNTSD_ERR_NO_MEM);
+ }
+
+ p = *que_hd;
+
+ if (p == NULL) {
+ /* first one */
+ *que_hd = el;
+ } else {
+ /* walk to the last one */
+ while (p->nextp != NULL)
+ p = p->nextp;
+ p->nextp = el;
+ }
+
+ el->prevp = p;
+
+ return (VNTSD_SUCCESS);
+}
+
+/* vntsd_que_insert_after() inserts element arter the handle */
+int
+vntsd_que_insert_after(vntsd_que_t *que, void *handle, void *next)
+{
+ vntsd_que_t *q, *el;
+
+ assert(que);
+
+ q = que;
+
+ while (q != NULL) {
+ if (q->handle == handle) {
+ break;
+ }
+
+ q = q->nextp;
+ }
+
+ if (q == NULL) {
+ /* not in queue */
+ return (VNTSD_ERR_EL_NOT_FOUND);
+ }
+
+ el = alloc_que_el(next);
+
+ if (el == NULL) {
+ return (VNTSD_ERR_NO_MEM);
+ }
+
+ el->nextp = q->nextp;
+ q->nextp = el;
+ el->prevp = q;
+
+ return (VNTSD_SUCCESS);
+}
+
+
+
+/* vntsd_que_rm() removes an element from a queue */
+int
+vntsd_que_rm(vntsd_que_t **que_hd, void *handle)
+{
+ vntsd_que_t *p = *que_hd;
+ vntsd_que_t *prevp = NULL;
+
+
+ while (p != NULL) {
+ /* match handle */
+ if (p->handle == handle) {
+ break;
+ }
+ prevp = p;
+ p = p->nextp;
+ }
+
+ if (p == NULL) {
+ /* not found */
+ return (VNTSD_ERR_EL_NOT_FOUND);
+ }
+
+ /* found */
+ if (p == *que_hd) {
+ /* first one */
+ *que_hd = p->nextp;
+ } else {
+ prevp->nextp = p->nextp;
+ }
+
+ if (p->nextp != NULL) {
+ p->nextp->prevp = prevp;
+ }
+
+ handle = p->handle;
+
+ free(p);
+
+ return (VNTSD_SUCCESS);
+
+}
+
+/* vntsd_que_walk() - walk queue and apply function to each element */
+void *
+vntsd_que_walk(vntsd_que_t *que_hd, el_func_t el_func)
+{
+ vntsd_que_t *p = que_hd;
+
+ while (p != NULL) {
+ if ((*el_func)(p->handle)) {
+ return (p->handle);
+ }
+
+ p = p->nextp;
+ }
+ return (VNTSD_SUCCESS);
+}
+
+
+/* vntsd_que_find() finds first match */
+void *
+vntsd_que_find(vntsd_que_t *que_hd, compare_func_t compare_func, void *data)
+{
+ vntsd_que_t *p = que_hd;
+
+ assert(compare_func != NULL);
+ while (p != NULL) {
+ if ((*compare_func)(p->handle, data)) {
+ /* found match */
+ return (p->handle);
+ }
+
+ p = p->nextp;
+ }
+
+ /* not found */
+ return (NULL);
+}
+
+/* vntsd_free_que() frees entire queue */
+void
+vntsd_free_que(vntsd_que_t **q, clean_func_t clean_func)
+{
+ vntsd_que_t *p;
+
+ while (*q != NULL) {
+ p = *q;
+
+ *q = p->nextp;
+
+ if (clean_func) {
+ /* clean func will free the handle */
+ (*clean_func)(p->handle);
+ } else {
+ free(p->handle);
+ }
+
+ free(p);
+ }
+}
+
+/*
+ * vntsd_que_pos() matches a handle and returns a handle located at "pos"
+ * relative to the matched handle. pos supported are 1 or -1.
+ */
+void *
+vntsd_que_pos(vntsd_que_t *que_hd, void *handle, int pos)
+{
+ vntsd_que_t *p = que_hd;
+
+ assert((pos == 1) || (pos == -1));
+
+
+ while (p != NULL) {
+ if (p->handle == handle) {
+ /* find match */
+ if (pos == 1) {
+ /* forward 1 */
+ if (p->nextp != NULL) {
+ return (p->nextp->handle);
+ }
+
+ /* last one go to first */
+ return (que_hd->handle);
+
+ } else {
+ /* backward 1 */
+ if (p->prevp != NULL) {
+ return (p->prevp->handle);
+ }
+
+ /* first one, return last one */
+ while (p->nextp != NULL) {
+ p = p->nextp;
+ }
+
+ assert(p != NULL);
+ assert(p->handle != NULL);
+ return (p->handle);
+
+ }
+ }
+ p = p->nextp;
+ }
+
+ DERR(stderr, "t@%d vntsd_que_pos can not find handle \n",
+ thr_self());
+
+ return (NULL);
+}
diff --git a/usr/src/cmd/vntsd/read.c b/usr/src/cmd/vntsd/read.c
new file mode 100644
index 0000000000..c5431a0ac1
--- /dev/null
+++ b/usr/src/cmd/vntsd/read.c
@@ -0,0 +1,265 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * read thread - Read from tcp client and write to vcc driver. There are one
+ * writer and multiple readers per console. The first client who connects to
+ * a console get write access. An error message is returned to readers if they
+ * attemp to input commands. Read thread accepts special daemon commands from
+ * all clients.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <synch.h>
+#include <signal.h>
+#include <assert.h>
+#include <ctype.h>
+#include <syslog.h>
+#include <libintl.h>
+#include "vntsd.h"
+#include "chars.h"
+
+/* write_vcc() - write to vcc virtual console */
+static int
+write_vcc(vntsd_client_t *clientp, char c)
+{
+ int n;
+
+
+ assert(clientp);
+ assert(clientp->cons);
+
+ if (c == 0) {
+ return (VNTSD_SUCCESS);
+ }
+ n = write(clientp->cons->vcc_fd, &c, 1);
+
+ if (n < 0) {
+ /* write error */
+ if (errno == EINTR) {
+ return (vntsd_cons_chk_intr(clientp));
+ }
+
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+
+ assert(n != 0);
+ return (VNTSD_SUCCESS);
+
+}
+
+/*
+ * acquire_writer() the client is going to be writer.
+ * insert the client to the head of the console client queue.
+ */
+static int
+acquire_writer(vntsd_client_t *clientp)
+{
+ vntsd_cons_t *consp;
+ vntsd_client_t *writerp;
+ int rv;
+
+ D1(stderr, "t@%d:acuire_writer :client@%d\n", thr_self(),
+ clientp->sockfd);
+
+ assert(clientp != NULL);
+ consp = clientp->cons;
+
+ assert(consp);
+
+ (void) mutex_lock(&consp->lock);
+
+ assert(consp->clientpq != NULL);
+ if (consp->clientpq->handle == clientp) {
+ /* clientp is a writer already */
+ (void) mutex_unlock(&consp->lock);
+ return (VNTSD_SUCCESS);
+ }
+
+ /* current writer */
+ writerp = (vntsd_client_t *)(consp->clientpq->handle);
+
+ (void) mutex_lock(&writerp->lock);
+
+ rv = vntsd_que_rm(&(consp->clientpq), clientp);
+ assert(rv == VNTSD_SUCCESS);
+
+ (void) mutex_lock(&clientp->lock);
+
+ /* move client to be first in the console queue */
+ consp->clientpq->handle = clientp;
+
+ /* move previous writer to be the second in the queue */
+ rv = vntsd_que_insert_after(consp->clientpq, clientp, writerp);
+
+ (void) mutex_unlock(&consp->lock);
+ (void) mutex_unlock(&writerp->lock);
+ (void) mutex_unlock(&clientp->lock);
+
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ /* write warning message to the writer */
+
+ if ((rv = vntsd_write_line(writerp,
+ gettext("Warning: Console connection forced into read-only mode")))
+ != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ return (VNTSD_SUCCESS);
+}
+
+/* interrupt handler */
+int
+vntsd_cons_chk_intr(vntsd_client_t *clientp)
+{
+
+ if (clientp->status & VNTSD_CLIENT_TIMEOUT) {
+ return (VNTSD_STATUS_CLIENT_QUIT);
+ }
+ if (clientp->status & VNTSD_CLIENT_CONS_DELETED) {
+ return (VNTSD_STATUS_RESELECT_CONS);
+ }
+
+ if (clientp->status & VNTSD_CLIENT_IO_ERR) {
+ return (VNTSD_STATUS_CLIENT_QUIT);
+ }
+ return (VNTSD_STATUS_CONTINUE);
+}
+
+/* read from client */
+static int
+read_char(vntsd_client_t *clientp, char *c)
+{
+ int rv;
+
+ for (; ; ) {
+
+ rv = vntsd_read_data(clientp, c);
+
+ switch (rv) {
+ case VNTSD_STATUS_CONTINUE:
+ break;
+
+ case VNTSD_STATUS_ACQUIRE_WRITER:
+ rv = acquire_writer(clientp);
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+ break;
+ default:
+ return (rv);
+ }
+ }
+}
+
+/* vntsd_read worker */
+int
+vntsd_read(vntsd_client_t *clientp)
+{
+ char c;
+ int rv;
+
+
+ assert(clientp);
+ D3(stderr, "t@%d vntsd_read@%d\n", thr_self(), clientp->sockfd);
+
+ for (; ; ) {
+
+ /* client input */
+ rv = read_char(clientp, &c);
+
+ if (rv == VNTSD_STATUS_INTR) {
+ rv = vntsd_cons_chk_intr(clientp);
+ }
+
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ assert(clientp->cons);
+ if (clientp->cons->clientpq->handle != clientp) {
+ /* reader - print error message */
+ if ((c != CR) && (c != LF)) {
+ rv = vntsd_write_line(clientp,
+ gettext(VNTSD_NO_WRITE_ACCESS_MSG));
+
+ /* check errors and may exit */
+ if (rv == VNTSD_STATUS_INTR) {
+ rv = vntsd_cons_chk_intr(clientp);
+ }
+
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ }
+
+ continue;
+ }
+
+ rv = vntsd_ctrl_cmd(clientp, c);
+
+ switch (rv) {
+ case VNTSD_STATUS_CONTINUE:
+ continue;
+ break;
+ case VNTSD_STATUS_INTR:
+ rv = vntsd_cons_chk_intr(clientp);
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+ break;
+ case VNTSD_SUCCESS:
+ break;
+ default:
+ return (rv);
+ }
+
+ /* write to vcc */
+ rv = write_vcc(clientp, c);
+ if (rv == VNTSD_STATUS_INTR) {
+ rv = vntsd_cons_chk_intr(clientp);
+ }
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ }
+
+ /*NOTREACHED*/
+ return (NULL);
+}
diff --git a/usr/src/cmd/vntsd/svc-vntsd b/usr/src/cmd/vntsd/svc-vntsd
new file mode 100644
index 0000000000..e573b4ecd5
--- /dev/null
+++ b/usr/src/cmd/vntsd/svc-vntsd
@@ -0,0 +1,64 @@
+#!/sbin/sh
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# Start script for vntsd
+#
+# For modifying parameters passed to vntsd, do not edit
+# this script. Instead use svccfg(1m) to modify the SMF
+# repository. For example:
+#
+# svccfg
+# svc:> select ldoms/vntsd
+# svc:/ldoms/vntsd> setprop vntsd/vcc_device = "virtual-console-concentrator@1"
+# svc:/ldoms/vntsd> setprop vntsd/listen_addr = "192.168.1.1"
+# svc:/ldoms/vntsd> exit
+
+. /lib/svc/share/smf_include.sh
+
+vcc_device=`svcprop -p vntsd/vcc_device $SMF_FMRI 2>/dev/null`
+if [ -z "$vcc_device" ]; then
+ vcc_device="virtual-console-concentrator@0"
+fi
+args="-i $vcc_device"
+
+listen_addr=`svcprop -p vntsd/listen_addr $SMF_FMRI 2>/dev/null`
+if [ -n "$listen_addr" ]; then
+ args="$args -p $listen_addr"
+fi
+
+timeout=`svcprop -p vntsd/timeout_minutes $SMF_FMRI 2>/dev/null`
+if [ -n "$timeout" ]; then
+ args="$args -t $timeout"
+fi
+
+if [ -x /usr/lib/ldoms/vntsd ]; then
+ /usr/lib/ldoms/vntsd $args || exit $SMF_EXIT_ERR_CONFIG
+else
+ echo "WARNING: /usr/lib/ldoms/vntsd is missing or not executable" >& 2
+ exit $SMF_EXIT_ERR_CONFIG
+fi
+
+exit $SMF_EXIT_OK
diff --git a/usr/src/cmd/vntsd/vcc.h b/usr/src/cmd/vntsd/vcc.h
new file mode 100644
index 0000000000..bcf1b2902b
--- /dev/null
+++ b/usr/src/cmd/vntsd/vcc.h
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _VCC_H
+#define _VCC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define VCC_MAX_NAME 25
+
+#define VCC_NUM_CONSOLE 0x1 /* total number of groups */
+#define VCC_PORT_TBL 0x2 /* download all port in a group */
+
+#define VCC_INQUIRY 0x4 /* inquiry evnts */
+#define VCC_PORT_CONFIG 0x8 /* download one port */
+#define VCC_CLEAN_POLL 0x10 /* vntsd exits */
+#define VCC_DEL_PORT_OK 0x20 /* vntsd delete port ok */
+#define VCC_PORT_HELLO 0x1
+
+typedef enum {
+ VNTSD_MSG_ADD_PORT,
+ VNTSD_MSG_DEL_PORT
+} vntsd_msg_t;
+
+
+#define VCC_PORT_ON 0x40
+
+
+typedef struct vntsd_console {
+ int cons_no;
+ uint64_t status;
+ char domain_name[VCC_MAX_NAME];
+} vntsd_console_t;
+
+/* console configuration that is downloaded to vntsd */
+typedef struct vntsd_vcc_console {
+ vntsd_console_t console;
+ char group_name[VCC_MAX_NAME];
+ uint64_t tcp_port;
+} vntsd_vcc_console_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VCC_H */
diff --git a/usr/src/cmd/vntsd/vntsd.c b/usr/src/cmd/vntsd/vntsd.c
new file mode 100644
index 0000000000..e1ded5dc3b
--- /dev/null
+++ b/usr/src/cmd/vntsd/vntsd.c
@@ -0,0 +1,582 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * VNTSD main
+ *
+ * VNTSD takes the following options:
+ * -i <device instance>
+ * VCC device instance to use, e.g. virtual-console-concentrator@0.
+ * Required option.
+ * -p <ip address>
+ * IP address VNTSD listens to.
+ * -d
+ * Do not daemonize. This is only available in a DEBUG build.
+ * -t timeout for inactivity 0 = indefinite
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <time.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <libintl.h>
+#include <locale.h>
+#include <syslog.h>
+#include "vntsd.h"
+#include "chars.h"
+
+#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't. */
+#endif
+
+/* global variables */
+
+#ifdef DEBUG
+int vntsddbg = 0x8;
+#endif
+
+#define MINUTE 60
+
+static vntsd_t *vntsdp;
+
+
+static void vntsd_exit(void);
+/* Signal handler for SIGINT, SIGKILL and SIGHUP */
+static void
+exit_sig_handler(int sig)
+{
+
+ char err_msg[VNTSD_LINE_LEN];
+
+ D1(stderr, "t@%d exit_sig_handler%d \n", thr_self(), sig);
+
+ (void) snprintf(err_msg, sizeof (err_msg), "exit_sig_handler() sig=%d",
+ sig);
+
+ vntsd_log(VNTSD_STATUS_EXIT_SIG, err_msg);
+
+ exit(0);
+}
+
+/*
+ * Before a thread reads in client's input, it attaches to vntsd timer so that
+ * it can be waken up if a client does not access the connection for
+ * VNTSD_INPUT_TIMEOUT(10) minutes.
+ */
+
+/* attach a thread to timer */
+int
+vntsd_attach_timer(vntsd_timeout_t *tmop)
+{
+ int rv;
+
+ if (vntsdp->timeout == 0) {
+ return (VNTSD_SUCCESS);
+ }
+
+ (void) mutex_lock(&vntsdp->tmo_lock);
+ rv = vntsd_que_append(&vntsdp->tmoq, (void *)tmop);
+ (void) mutex_unlock(&vntsdp->tmo_lock);
+ return (rv);
+}
+
+/* detach a thread from timer */
+int
+vntsd_detach_timer(vntsd_timeout_t *tmop)
+{
+ int rv;
+
+ if (vntsdp->timeout == 0) {
+ return (VNTSD_SUCCESS);
+ }
+
+ (void) mutex_lock(&vntsdp->tmo_lock);
+ rv = vntsd_que_rm(&vntsdp->tmoq, (void *)tmop);
+ (void) mutex_unlock(&vntsdp->tmo_lock);
+
+ return (rv);
+}
+
+/* check threadd's timeout */
+static boolean_t
+chk_timeout(vntsd_timeout_t *tmop)
+{
+ tmop->minutes++;
+
+ if (tmop->minutes == vntsdp->timeout) {
+ /* wake up the thread */
+ tmop->clientp->status |= VNTSD_CLIENT_TIMEOUT;
+ (void) thr_kill(tmop->tid, SIGALRM);
+ }
+
+ /* return false to walk the queue */
+ return (B_FALSE);
+}
+
+/* reset timer */
+static boolean_t
+reset_timeout(vntsd_timeout_t *tmop, thread_t tid)
+{
+ if (tmop->tid == tid) {
+ tmop->minutes = 0;
+ }
+ /* return false to walk the queue */
+ return (B_FALSE);
+}
+
+void
+vntsd_reset_timer(thread_t tid)
+{
+ if (vntsdp->timeout == 0) {
+ return;
+ }
+
+ (void) mutex_lock(&vntsdp->tmo_lock);
+ (void) vntsd_que_find(vntsdp->tmoq, (compare_func_t)reset_timeout,
+ (void*)tid);
+ (void) mutex_unlock(&vntsdp->tmo_lock);
+}
+
+/*
+ * When alarm goes off, wake up timeout threads. Alarm is set off every
+ * minutes.
+ */
+static void
+vntsd_alarm_sig_handler(int sig)
+{
+ static thread_t main_thread = 0;
+
+ D1(stderr, "t@%d alarm signal %d\n", thr_self(), sig);
+ if (vntsdp->timeout == 0) {
+ DERR(stderr, "t@%d alarm signal should not recv %d\n",
+ thr_self(), sig);
+ return;
+ }
+
+
+ if (main_thread == 0) {
+ /* initialize thread id */
+ main_thread = thr_self();
+ } else if (main_thread != thr_self()) {
+ /* get signal because thread is timeout */
+ return;
+ }
+
+ /* in main thread */
+ (void) mutex_lock(&vntsdp->tmo_lock);
+
+ /* wake up timeout threads */
+ (void) vntsd_que_walk(vntsdp->tmoq, (el_func_t)chk_timeout);
+ (void) mutex_unlock(&vntsdp->tmo_lock);
+
+ /* reset alarm */
+ (void) alarm(MINUTE);
+}
+
+/* got a SIGUSER1 siginal */
+static void
+vntsd_sig_handler(int sig)
+{
+ char err_msg[VNTSD_LINE_LEN];
+
+ (void) snprintf(err_msg, sizeof (err_msg), "sig_handler() sig=%d",
+ sig);
+
+ if (sig != SIGUSR1) {
+ vntsd_log(VNTSD_STATUS_SIG, err_msg);
+ }
+}
+
+/* vntsd exits */
+static void
+vntsd_exit(void)
+{
+ D1(stderr, "t@%d vntsd_exit\n", thr_self());
+
+ (void) mutex_lock(&vntsdp->lock);
+
+ if (vntsdp->timeout > 0) {
+ /* cancel the timer */
+ (void) alarm(0);
+ }
+ /* delete all groups */
+ vntsd_free_que(&vntsdp->grouppq, (clean_func_t)vntsd_clean_group);
+
+ /* close control port */
+ (void) close(vntsdp->ctrl_fd);
+
+ assert(vntsdp->tmoq == NULL);
+ (void) mutex_unlock(&vntsdp->lock);
+
+ /* clean up vntsdp */
+ (void) mutex_destroy(&vntsdp->tmo_lock);
+ (void) mutex_destroy(&vntsdp->lock);
+ free(vntsdp);
+ closelog();
+}
+
+/*
+ * vntsd_help()
+ * print out valid command line options
+ */
+static void
+vntsd_help(void)
+{
+
+ (void) fprintf(stderr, gettext("Usage: vntsd -i <VCC device instance> "
+ "[-p <listen address>] [-t <timeout in minutes>]\n"));
+}
+
+
+#ifdef DEBUG
+#define DEBUG_OPTIONS "d"
+#else
+#define DEBUG_OPTIONS ""
+#endif
+
+int
+main(int argc, char ** argv)
+{
+ char *path;
+ struct pollfd poll_drv[1];
+ struct sigaction act;
+ char *listen_addr = NULL;
+ pid_t pid;
+ int i;
+ int option;
+ int sz;
+ int fd;
+ int n;
+
+ /* internationalization */
+ (void) setlocale(LC_MESSAGES, "");
+ (void) textdomain(TEXT_DOMAIN);
+ vntsd_init_esctable_msgs();
+
+ /* initialization */
+ bzero(&act, sizeof (act));
+
+ vntsdp = calloc(sizeof (vntsd_t), 1);
+ if (vntsdp == NULL) {
+ vntsd_log(VNTSD_ERR_NO_MEM, "main:vntsdp");
+ exit(1);
+ }
+
+ vntsdp->ctrl_fd = -1;
+ vntsdp->devinst = NULL;
+
+ (void) mutex_init(&vntsdp->lock, USYNC_THREAD|LOCK_ERRORCHECK, NULL);
+ (void) mutex_init(&vntsdp->tmo_lock, USYNC_THREAD|LOCK_ERRORCHECK,
+ NULL);
+
+ /* get CLI options */
+ while ((option = getopt(argc, argv, "i:t:p:"DEBUG_OPTIONS)) != EOF) {
+ switch (option) {
+#ifdef DEBUG
+ case 'd':
+ vntsdp->options |= VNTSD_OPT_DAEMON_OFF;
+ break;
+#endif
+ case 'i':
+ vntsdp->devinst = optarg;
+ break;
+ case 'p':
+ listen_addr = optarg;
+ break;
+
+ case 't':
+ n = sscanf(optarg, "%d", &(vntsdp->timeout));
+ if (n != 1) {
+ vntsdp->timeout = -1;
+ }
+ break;
+
+ default:
+ vntsd_help();
+ exit(1);
+ }
+ }
+
+ if ((vntsdp->devinst == NULL) || (vntsdp->timeout == -1)) {
+ vntsd_help();
+ exit(1);
+ }
+
+ if (listen_addr == NULL || strcmp(listen_addr, "localhost") == 0) {
+ /* by default listen on loopback interface */
+ vntsdp->ip_addr.s_addr = htonl(INADDR_LOOPBACK);
+ } else if (strcmp(listen_addr, "any") == 0) {
+ vntsdp->ip_addr.s_addr = htonl(INADDR_ANY);
+ } else {
+ vntsdp->ip_addr.s_addr = inet_addr(listen_addr);
+ if (vntsdp->ip_addr.s_addr == (in_addr_t)(-1)) {
+ (void) fprintf(stderr,
+ gettext("Invalid listen address '%s'\n"),
+ listen_addr);
+ exit(1);
+ }
+ }
+
+ D3(stderr, "options = %llx, instance = %s, listen = %s\n",
+ vntsdp->options, vntsdp->devinst,
+ listen_addr ? listen_addr : "<null>");
+
+ /* open VCC driver control port */
+ sz = strlen(VCC_DEVICE_CTL_PATH) + strlen(vntsdp->devinst) + 1;
+ path = calloc(sz, 1);
+ if (path == NULL) {
+ vntsd_log(VNTSD_ERR_NO_MEM, "main(): alloc dev path");
+ exit(1);
+ }
+ (void) snprintf(path, sz-1, VCC_DEVICE_CTL_PATH, vntsdp->devinst,
+ sizeof (vntsdp->devinst));
+ vntsdp->ctrl_fd = open(path, O_RDWR);
+ free(path);
+
+ if (vntsdp->ctrl_fd == -1) {
+ /*
+ * do not print error if device is not present
+ * the daemon is probably being started incorrectly
+ */
+ if (errno != ENOENT) {
+ syslog(LOG_ERR,
+ "Error opening VCC device control port: %s",
+ strerror(errno));
+ }
+ exit(1);
+ }
+ if ((vntsdp->options & VNTSD_OPT_DAEMON_OFF) == 0) {
+ /* daemonize it */
+ pid = fork();
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid > 0) {
+ /* parent */
+ exit(0);
+ }
+
+ /*
+ * child process (daemon)
+ *
+ * Close all file descriptors other than 2 and the ctrl fd.
+ */
+ (void) close(0);
+ (void) close(1);
+ for (i = 3; i < vntsdp->ctrl_fd; i++) {
+ (void) close(i);
+ }
+ closefrom(vntsdp->ctrl_fd + 1);
+
+ /* obtain a new process group */
+ (void) setsid();
+ fd = open("/dev/null", O_RDWR);
+ if (fd < 0) {
+ syslog(LOG_ERR, "Can not open /dev/null");
+ exit(1);
+ }
+ /* handle standard I/O */
+ if (dup2(fd, 0) < 0) {
+ syslog(LOG_ERR, "Failed dup2()");
+ exit(1);
+ }
+
+ if (dup2(fd, 1) < 0) {
+ syslog(LOG_ERR, "Failed dup2()");
+ exit(1);
+ }
+
+ /* ignore terminal signals */
+ (void) signal(SIGTSTP, SIG_IGN);
+ (void) signal(SIGTTOU, SIG_IGN);
+ (void) signal(SIGTTIN, SIG_IGN);
+ }
+
+
+ /* set up signal handlers */
+
+ /* exit signals */
+ act.sa_handler = exit_sig_handler;
+
+ (void) sigemptyset(&act.sa_mask);
+ (void) sigaction(SIGINT, &act, NULL);
+ (void) sigaction(SIGTERM, &act, NULL);
+ (void) sigaction(SIGHUP, &act, NULL);
+
+ /* vntsd internal signals */
+ act.sa_handler = vntsd_sig_handler;
+ (void) sigemptyset(&act.sa_mask);
+ (void) sigaction(SIGUSR1, &act, NULL);
+
+
+ act.sa_handler = vntsd_alarm_sig_handler;
+ (void) sigemptyset(&act.sa_mask);
+ (void) sigaction(SIGALRM, &act, NULL);
+
+
+ /* setup exit */
+ (void) atexit(vntsd_exit);
+
+
+
+ /* initialization */
+ openlog("vntsd", LOG_CONS, LOG_DAEMON);
+
+
+ /* set alarm */
+ if (vntsdp->timeout > 0) {
+ (void) alarm(MINUTE);
+ }
+
+ vntsdp->tid = thr_self();
+
+ /* get exiting consoles from vcc */
+ vntsd_get_config(vntsdp);
+
+ for (; ; ) {
+ /* poll vcc for configuration change */
+ bzero(poll_drv, sizeof (poll_drv));
+
+ poll_drv[0].fd = vntsdp->ctrl_fd;
+ poll_drv[0].events = POLLIN;
+
+ if (poll(poll_drv, 1, -1) == -1) {
+ if (errno == EINTR) {
+ /* wake up because a consle was deleted */
+ vntsd_delete_cons(vntsdp);
+ continue;
+ }
+ vntsd_log(VNTSD_ERR_VCC_POLL,
+ "vcc control poll err! aborting..");
+ exit(1);
+ }
+
+ D1(stderr, "t@%d driver event %x\n", thr_self(),
+ poll_drv[0].revents);
+
+ vntsd_daemon_wakeup(vntsdp);
+
+ }
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+/* export ip_addr */
+struct in_addr
+vntsd_ip_addr(void)
+{
+ return (vntsdp->ip_addr);
+}
+
+/*
+ * ioctl to vcc control port
+ * Supported ioctls interface are:
+ * ioctl code parameters return data
+ * VCC_NUM_CONSOLE none uint_t no consoles
+ * VCC_CONS_TBL none array of vcc_cons_t
+ * VCC_INQUIRY none vcc_response_t response
+ * VCC_CONS_INFO uint_t portno vcc_cons_t
+ * VCC_CONS_STATUS uint_t portno
+ * VCC_FORCE_CLOSE uint_t portno
+ */
+int
+vntsd_vcc_ioctl(int ioctl_code, uint_t portno, void *buf)
+{
+ D1(stderr, "t@%d vcc_ioctl@%d code=%x\n", thr_self(), portno,
+ ioctl_code);
+
+ if ((ioctl_code == (VCC_CONS_INFO)) ||
+ (ioctl_code == (VCC_FORCE_CLOSE))) {
+ /* construct vcc in buf */
+ *((uint_t *)buf) = portno;
+ }
+
+ if (ioctl(vntsdp->ctrl_fd, ioctl_code, (caddr_t)buf)) {
+ /* control port get error */
+ syslog(LOG_ERR, "vcc control port error! abort vntsd");
+ (void) thr_kill(vntsdp->tid, SIGINT);
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+
+ return (VNTSD_SUCCESS);
+}
+
+/*
+ * check if a vcc i/o error is caused by removal of a console. If so notify
+ * all clients connected to the console and wake up main thread to cleanup
+ * the console.
+ */
+int
+vntsd_vcc_err(vntsd_cons_t *consp)
+{
+ vntsd_group_t *groupp;
+
+ assert(consp);
+ groupp = consp->group;
+ assert(groupp);
+
+ if (consp->status & VNTSD_CONS_DELETED) {
+ /* console was deleted */
+ return (VNTSD_STATUS_VCC_IO_ERR);
+ }
+
+ if (vntsd_vcc_cons_alive(consp)) {
+ /* console is ok */
+ return (VNTSD_STATUS_CONTINUE);
+ }
+
+ /* console needs to be deleted */
+ (void) mutex_lock(&consp->lock);
+ consp->status |= VNTSD_CONS_DELETED;
+
+ /* signal all clients to disconnect from console */
+ (void) vntsd_que_walk(consp->clientpq,
+ (el_func_t)vntsd_notify_client_cons_del);
+ (void) mutex_unlock(&consp->lock);
+
+ /* mark the group */
+ (void) mutex_lock(&groupp->lock);
+ groupp->status |= VNTSD_GROUP_CLEAN_CONS;
+ (void) mutex_unlock(&groupp->lock);
+
+ /* signal main thread to deleted console */
+ (void) thr_kill(vntsdp->tid, SIGUSR1);
+
+ return (VNTSD_STATUS_VCC_IO_ERR);
+}
diff --git a/usr/src/cmd/vntsd/vntsd.h b/usr/src/cmd/vntsd/vntsd.h
new file mode 100644
index 0000000000..16b1bbe90f
--- /dev/null
+++ b/usr/src/cmd/vntsd/vntsd.h
@@ -0,0 +1,476 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * vntsd uses configuration information provided by vcc to export access
+ * to Ldom console access over regular TCP sockets. When it starts, it opens
+ * the vcc driver control port and obtains the list of ports that have been
+ * created by the vcc driver as well as TCP port number and group associated
+ * with each port.
+ * vntsd consists of multiple components as the follows:
+ *
+ * vntsd.c
+ * This module initializes vnts daemon, process user options such as instance
+ * number, ip address and etc., and provides main thread to poll any console
+ * port change.
+ *
+ * vntsdvcc.c
+ * This module provides vcc driver interface. It opens vcc driver control
+ * ports, read initial configuration, and provides interface to read, write and
+ * ioctl virtual console ports. This module creates a listen thread for each
+ * console group. It further dynamically adds and removes virtual consoles
+ * and groups following instructions of the vcc driver. This module
+ * is executed in the same thread as vntsd.c which is blocked on vcc control
+ * poll interface.
+ *
+ * listen.c
+ * This is a group listen thread. Each group's tcp-port has a listen thread
+ * associated with it. The thread is created when a console is associated with
+ * a new group and is removed when all consoles in the group are removed.
+ *
+ * console.c
+ * This is a console selection thread. The thread is created when a client
+ * connects to a group TCP port and exited when client disconnects. If there is
+ * only one console in the group, the client is connected to that console. If
+ * there are multiple consoles in the group, the client is asked to select a
+ * console. After determining which console to connect to, this thread
+ * a write thread if the cient is a writer and it self read in client input.
+ *
+ * read.c
+ * it reads input from a TCP client, processes
+ * special daemon and telent commands and write to vcc driver if the client
+ * is a writer. The client is a writer if the client is the first one connects
+ * to the console. Read thread print out an error message if a reader attempt
+ * to input to vcc. Read thread exits if console is deleted, client
+ * disconnects, or there is a fatal error.
+ *
+ * Write.c
+ * Write thread is creaed when first client connects to a console. It reads
+ * from vcc and writes to all clients that connect to the same console.
+ * Write thread exits when all clients disconnect from the console.
+ *
+ * cmd.c
+ * This is a supporting module for handling special daemon and telnet commands.
+ *
+ * common.c
+ * supporting modules shared by threads modules.
+ *
+ * queue.c
+ * This is a moudle supporting queue operations. Vntsd organizes its data
+ * in multiple queues <see data structure below>.
+ *
+ * vntsd.xml
+ * This is a manifest to support SMF interfaces.
+ *
+ * Data structures
+ * each group has a vntsd_group_t structure, which contains a queue of
+ * all console in that group.
+ * each console has a vntsd_cons_t structure, which contains a queue of
+ * all clients that connected to the console.
+ *
+ * +----------+ +----------+ +----------+
+ * | group |-->| group |-->| group |-->....
+ * +----------+ +----------+ +----------+
+ * |
+ * |<-----------------------------------------+
+ * |<------------------------+ |
+ * |<--------+ | |
+ * | | | |
+ * | +----------+ +----------+ +----------+
+ * +----->| console |---->| console |---->| lconsole |---> ....
+ * +----------+ +----------+ +----------+
+ * | |
+ * | | +----------+ +----------+
+ * | +---->| client |----->| client |----->......
+ * | +----------+ +----------+
+ * | | |
+ * |<------------+ |
+ * |<------------------------------+
+ *
+ * Locks
+ * Each vntsd has one lock to protect the group queue
+ * Each group has one lock to protect the console queue, the queue for
+ * clients without a console connection and status.
+ * Each console has one lock to protect client queue and status.
+ * Each client has one lock to protect the state of the client. The client
+ * states are:
+ *
+ * VCC_CLIENT_READER
+ * A client is connected to a console as either a writer or a reader.
+ * if this client is the first one connects the console, the client is
+ * a writer, otherwise the client is a reader. A writer' write thread
+ * reads from vcc and send output to all readers connected to the
+ * same console. a reader's write thread is blocked until a reader becomes
+ * a writer.
+ *
+ * When a client selected a console, the client becomes a reader if
+ * there is another client connected to the console before the client.
+ * A client will be a writer if
+ * 1. client is the first one connected to the console or
+ * 2. client has entered a ~w daemon command or
+ * 3. all clients connected to the console before the client have
+ * disconnected from the console.
+ *
+ * VCC_CLIENT_MOVE_CONS_FORWARD
+ * VCC_CLIENT_MOVE_CONS_BACKWOARD
+ * A client is disconnecting from one console and move to the next or
+ * previous console in the group queue.
+ * A client is in one of these state if
+ * 1. the client has entered the daemon command and
+ * 2. the vntsd is in process of switching the client from one
+ * console to another.
+ *
+ * VCC_CLIENT_DISABLE_DAEMON_CMD
+ * vntsd is in processing of a client's daemon command or the client is
+ * in selecting console.
+ * A client is in this state if
+ * 1. the client has not selected a console or
+ * 2. the vntsd is processing a client's daemon command.
+ *
+ * VCC_CLIENT_ACQUIRE_WRITER
+ * A reader forces to become a writer via vntsd special command.
+ * A client is in this state if
+ * 1. the client is a reader and
+ * 2. client has entered a daemon command to become a writer.
+ *
+ * VCC_CLIENT_CONS_DELETED
+ * The console that the client is connected to is being deleted and
+ * waiting for the client to disconnect.
+ * A client is in this state if
+ * 1. the console a client is connected to is being removed and
+ * 2. the vntsd is in process of disconnecting the client from the console.
+ *
+ */
+
+#ifndef _VNTSD_H
+#define _VNTSD_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/shm.h>
+#include <strings.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stropts.h>
+#include <errno.h>
+#include <sys/param.h>
+#include "../../uts/sun4v/sys/vcc.h"
+
+#define DEBUG
+
+/* vntsd limits */
+#define VNTSD_MAX_BUF_SIZE 128
+#define VNTSD_LINE_LEN 100
+#define VNTSD_MAX_SOCKETS 5
+#define VNTSD_EOL_LEN 2
+
+/* secons before re-send signal for cv_wait */
+#define VNTSD_CV_WAIT_DELTIME 10
+
+#define VCC_PATH_PREFIX \
+ "/devices/virtual-devices@100/channel-devices@200/"
+#define VCC_DEVICE_PATH "/devices%s"
+#define VCC_DEVICE_CTL_PATH VCC_PATH_PREFIX "%s:ctl"
+
+/* common messages */
+#define VNTSD_NO_WRITE_ACCESS_MSG "You do not have write access"
+
+/* vntsd options */
+#define VNTSD_OPT_DAEMON_OFF 0x1
+
+/* group states */
+
+#define VNTSD_GROUP_SIG_WAIT 0x1 /* waiting for signal */
+#define VNTSD_GROUP_CLEAN_CONS 0x2 /* cons needs to be clean */
+#define VNTSD_GROUP_CLEANUP 0x4 /* waiting for signal */
+
+
+
+
+
+/* console status */
+
+#define VNTSD_CONS_DELETED 0x1 /* deleted */
+#define VNTSD_CONS_SIG_WAIT 0x2 /* waiting fro signal */
+
+
+#define VNTSD_CLIENT_IO_ERR 0x1 /* reader */
+#define VNTSD_CLIENT_DISABLE_DAEMON_CMD 0x2 /* disable daemon cmd */
+#define VNTSD_CLIENT_TIMEOUT 0x4 /* timeout */
+#define VNTSD_CLIENT_CONS_DELETED 0x8 /* console deleted */
+
+/* generic que structure */
+typedef struct vntsd_que {
+ void *handle; /* element in queue */
+ struct vntsd_que *nextp; /* next queue element */
+ struct vntsd_que *prevp; /* previous queue element */
+} vntsd_que_t;
+
+struct vntsd_cons;
+struct vntsd_group;
+struct vntsd;
+
+/* client structure */
+typedef struct vntsd_client {
+ mutex_t lock; /* protect the client */
+ uint_t status; /* client's state */
+
+ int sockfd; /* connection socket */
+ thread_t cons_tid; /* console thread */
+
+ struct vntsd_cons *cons; /* back link to console configuration */
+
+} vntsd_client_t;
+
+/* console structure */
+typedef struct vntsd_cons {
+ mutex_t lock; /* protect console port */
+ cond_t cvp; /* sync between threads */
+
+ vntsd_que_t *clientpq; /* client que */
+ uint_t status; /* client's state */
+ int vcc_fd; /* vcc console port */
+ thread_t wr_tid; /* write thread */
+
+ uint_t cons_no; /* console port number */
+ char domain_name[MAXPATHLEN]; /* domain name */
+ char dev_name[MAXPATHLEN];
+
+ struct vntsd_group *group; /* back link to group */
+} vntsd_cons_t;
+
+/* group structure */
+typedef struct vntsd_group {
+ mutex_t lock; /* protect group */
+ cond_t cvp; /* sync remove group */
+
+ uint_t status; /* group status */
+ char group_name[MAXPATHLEN];
+ uint64_t tcp_port; /* telnet port */
+
+ thread_t listen_tid; /* listen thread */
+ int sockfd; /* listen socket */
+
+ vntsd_que_t *conspq; /* console queue */
+ uint_t num_cons; /* num console */
+
+ /* clients have no console connection */
+ vntsd_que_t *no_cons_clientpq;
+ struct vntsd *vntsd;
+
+} vntsd_group_t;
+
+/* daemon structure */
+typedef struct vntsd {
+
+ mutex_t lock; /* protect vntsd */
+ mutex_t tmo_lock; /* protect tmo queue */
+
+ int instance; /* vcc instance */
+ struct in_addr ip_addr; /* ip address to listen */
+ uint64_t options; /* daemon options */
+ int timeout; /* connection timeout */
+
+ char *devinst; /* device name */
+ int ctrl_fd; /* vcc ctrl port */
+
+ vntsd_que_t *grouppq; /* group queue */
+ uint_t num_grps; /* num groups */
+
+ vntsd_que_t *tmoq; /* timeout queue */
+ thread_t tid; /* main thread id */
+
+} vntsd_t;
+
+/* handle for creating thread */
+typedef struct vntsd_thr_arg {
+ void *handle;
+ void *arg;
+} vntsd_thr_arg_t;
+
+/* timeout structure */
+typedef struct vntsd_timeout {
+ thread_t tid; /* thread tid */
+ uint_t minutes; /* idle minutes */
+ vntsd_client_t *clientp; /* client */
+} vntsd_timeout_t;
+
+/* vntsd status and error definitions */
+typedef enum {
+
+ /* status */
+ VNTSD_SUCCESS = 0, /* success */
+ VNTSD_STATUS_CONTINUE, /* continue to execute */
+ VNTSD_STATUS_EXIT_SIG, /* exit siginal */
+ VNTSD_STATUS_SIG, /* known signal */
+ VNTSD_STATUS_NO_HOST_NAME, /* no host name set */
+ VNTSD_STATUS_CLIENT_QUIT, /* client disconnected from group */
+ VNTSD_STATUS_RESELECT_CONS, /* client re-selecting console */
+ VNTSD_STATUS_VCC_IO_ERR, /* a vcc io error occurs */
+ VNTSD_STATUS_MOV_CONS_FORWARD, /* down arrow */
+ VNTSD_STATUS_MOV_CONS_BACKWARD, /* up arrow */
+ VNTSD_STATUS_ACQUIRE_WRITER, /* force become the writer */
+ VNTSD_STATUS_INTR, /* thread receive a signal */
+ VNTSD_STATUS_DISCONN_CONS, /* disconnect a client from cons */
+ VNTSD_STATUS_NO_CONS, /* disconnect a client from cons */
+
+ /* resource errors */
+ VNTSD_ERR_NO_MEM, /* memory allocation error */
+ VNTSD_ERR_NO_DRV, /* cannot open vcc port */
+
+ /* vcc errors */
+ VNTSD_ERR_VCC_CTRL_DATA, /* vcc ctrl data error */
+ VNTSD_ERR_VCC_POLL, /* error poll vcc driver */
+ VNTSD_ERR_VCC_IOCTL, /* vcc ioctl call error */
+ VNTSD_ERR_VCC_GRP_NAME, /* group name differs from database */
+ VNTSD_ERR_ADD_CONS_FAILED, /* addition of a console failed */
+
+ /* create thread errors */
+ VNTSD_ERR_CREATE_LISTEN_THR, /* listen thread creation failed */
+ VNTSD_ERR_CREATE_CONS_THR, /* create console thread err */
+ VNTSD_ERR_CREATE_WR_THR, /* listen thread creation failed */
+
+ /* listen thread errors */
+ VNTSD_ERR_LISTEN_SOCKET, /* can not create tcp socket */
+ VNTSD_ERR_LISTEN_OPTS, /* can not set socket opt */
+ VNTSD_ERR_LISTEN_BIND, /* can not bind socket */
+ VNTSD_STATUS_ACCEPT_ERR, /* accept error */
+
+ /* tcp client read and write errors */
+ VNTSD_ERR_WRITE_CLIENT, /* writing tcp client err */
+
+ /* tcp client timeout */
+ VNTSD_ERR_CLIENT_TIMEOUT, /* client has no activity for timeout */
+
+ /* signal errors */
+ VNTSD_ERR_SIG, /* unknown signal */
+
+ /* user input error */
+ VNTSD_ERR_INVALID_INPUT, /* client typed in */
+
+ /* internal errors */
+ VNTSD_ERR_EL_NOT_FOUND, /* element not found */
+ VNTSD_ERR_UNKNOWN_CMD /* unknown error/cmd */
+
+} vntsd_status_t;
+
+/* function prototype defines */
+typedef int (*compare_func_t)(void *el, void *data);
+typedef int (*el_func_t)(void *el);
+typedef void (*clean_func_t)(void *el);
+typedef void (*sig_handler_t)(int sig);
+typedef void *(*thr_func_t)(void *);
+
+
+
+/* function prototype */
+void vntsd_log(vntsd_status_t err, char *msg);
+struct in_addr vntsd_ip_addr(void);
+
+void vntsd_get_config(vntsd_t *vntsdp);
+void vntsd_daemon_wakeup(vntsd_t *vntsdp);
+int vntsd_open_vcc(char *domain_name, uint_t cons_no);
+void vntsd_delete_cons(vntsd_t *vntsdp);
+void vntsd_clean_group(vntsd_group_t *groupp);
+
+
+void *vntsd_listen_thread(vntsd_group_t *groupp);
+void *vntsd_console_thread(vntsd_thr_arg_t *argp);
+int vntsd_read(vntsd_client_t *clientp);
+void *vntsd_write_thread(vntsd_cons_t *consp);
+
+boolean_t vntsd_cons_by_consno(vntsd_cons_t *consp, int *cons_id);
+
+int vntsd_que_append(vntsd_que_t **que_hd, void *handle);
+int vntsd_que_rm(vntsd_que_t **que_hd, void *handle);
+void *vntsd_que_find(vntsd_que_t *que_hd, compare_func_t
+ compare_func, void *data);
+void *vntsd_que_walk(vntsd_que_t *que_hd, el_func_t el_func);
+
+int vntsd_que_insert_after(vntsd_que_t *que, void *handle,
+ void *next);
+void *vntsd_que_pos(vntsd_que_t *que_hd, void *handle, int pos);
+void vntsd_free_que(vntsd_que_t **q, clean_func_t clean_func);
+
+int vntsd_read_char(vntsd_client_t *clientp, char *c);
+int vntsd_read_line(vntsd_client_t *clientp, char *buf, int *size);
+int vntsd_read_data(vntsd_client_t *clientp, char *c);
+int vntsd_get_yes_no(vntsd_client_t *clientp, char *msg,
+ int *yes_no);
+int vntsd_ctrl_cmd(vntsd_client_t *clientp, char c);
+int vntsd_process_daemon_cmd(vntsd_client_t *clientp, char c);
+int vntsd_telnet_cmd(vntsd_client_t *clientp, char c);
+
+int vntsd_set_telnet_options(int fd);
+int vntsd_write_client(vntsd_client_t *client, char *buffer,
+ size_t sz);
+int vntsd_write_fd(int fd, void *buffer, size_t sz);
+int vntsd_write_line(vntsd_client_t *clientp, char *line);
+int vntsd_write_lines(vntsd_client_t *clientp, char *lines);
+extern char vntsd_eol[];
+
+void vntsd_clean_group(vntsd_group_t *portp);
+void vntsd_free_client(vntsd_client_t *clientp);
+int vntsd_attach_timer(vntsd_timeout_t *tmop);
+int vntsd_detach_timer(vntsd_timeout_t *tmop);
+void vntsd_reset_timer(thread_t tid);
+void vntsd_init_esctable_msgs(void);
+int vntsd_vcc_ioctl(int ioctl_code, uint_t portno, void *buf);
+int vntsd_vcc_err(vntsd_cons_t *consp);
+int vntsd_cons_chk_intr(vntsd_client_t *clientp);
+boolean_t vntsd_vcc_cons_alive(vntsd_cons_t *consp);
+boolean_t vntsd_notify_client_cons_del(vntsd_client_t *clientp);
+int vntsd_chk_group_total_cons(vntsd_group_t *groupp);
+
+
+#ifdef DEBUG
+
+extern int vntsddbg;
+
+#define D1 if (vntsddbg & 0x01) (void) fprintf
+#define D2 if (vntsddbg & 0x02) (void) fprintf
+#define D3 if (vntsddbg & 0x04) (void) fprintf
+#define DERR if (vntsddbg & 0x08) (void) fprintf
+
+#else /* not DEBUG */
+
+#define D1
+#define D2
+#define D3
+#define DERR
+
+#endif /* not DEBUG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VNTSD_H */
diff --git a/usr/src/cmd/vntsd/vntsd.xml b/usr/src/cmd/vntsd/vntsd.xml
new file mode 100644
index 0000000000..f5474dd807
--- /dev/null
+++ b/usr/src/cmd/vntsd/vntsd.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ NOTE: This service manifest is not editable; its contents will
+ be overwritten by package or patch operations, including
+ operating system upgrade. Make customizations in a different
+ file.
+-->
+
+<service_bundle type='manifest' name='SUNWldomu:vntsd'>
+
+<service
+ name='ldoms/vntsd'
+ type='service'
+ version='1'>
+
+ <create_default_instance enabled='false' />
+
+ <dependency
+ name='network'
+ grouping='optional_all'
+ restart_on='error'
+ type='service'>
+ <service_fmri value='svc:/milestone/network' />
+ </dependency>
+
+ <dependency
+ name='syslog'
+ grouping='optional_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/system/system-log' />
+ </dependency>
+
+ <exec_method
+ type='method'
+ name='start'
+ exec='/lib/svc/method/svc-vntsd'
+ timeout_seconds='60' />
+
+ <exec_method
+ type='method'
+ name='stop'
+ exec=':kill'
+ timeout_seconds='30' />
+
+ <!-- these are passed to vntsd in the method script -->
+ <property_group name='vntsd' type='application'>
+ <propval name='vcc_device' type='astring'
+ value='virtual-console-concentrator@0' />
+ <propval name='listen_addr' type='astring' value='localhost' />
+ <propval name='timeout_minutes' type='integer' value='0' />
+ </property_group>
+
+ <stability value='Unstable' />
+
+ <template>
+ <common_name>
+ <loctext xml:lang='C'>
+ virtual network terminal server
+ </loctext>
+ </common_name>
+ <documentation>
+ <manpage title='vntsd' section='1M'
+ manpath='/usr/share/man' />
+ </documentation>
+ </template>
+</service>
+
+</service_bundle>
diff --git a/usr/src/cmd/vntsd/vntsdvcc.c b/usr/src/cmd/vntsd/vntsdvcc.c
new file mode 100644
index 0000000000..9facdf7c75
--- /dev/null
+++ b/usr/src/cmd/vntsd/vntsdvcc.c
@@ -0,0 +1,633 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Configuration and setup interface to vcc driver.
+ * At intialization time, vntsd opens vcc ctrl port and read initial
+ * configuratioa. It manages console groups, creates the listen thread,
+ * dynamically adds and removes virtual console within a group.
+ */
+
+
+#include <syslog.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/sem.h>
+#include <wait.h>
+#include <time.h>
+#include <synch.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <signal.h>
+#include "vntsd.h"
+
+/* signal all clients that console has been deleted */
+boolean_t
+vntsd_notify_client_cons_del(vntsd_client_t *clientp)
+{
+ (void) mutex_lock(&clientp->lock);
+ clientp->status |= VNTSD_CLIENT_CONS_DELETED;
+ (void) thr_kill(clientp->cons_tid, SIGUSR1);
+ (void) mutex_unlock(&clientp->lock);
+ return (B_FALSE);
+}
+
+/* free console structure */
+static void
+free_cons(vntsd_cons_t *consp)
+{
+ assert(consp);
+ (void) mutex_destroy(&consp->lock);
+ (void) cond_destroy(&consp->cvp);
+ free(consp);
+}
+
+/*
+ * all clients connected to a console must disconnect before
+ * removing a console.
+ */
+static void
+cleanup_cons(vntsd_cons_t *consp)
+{
+ vntsd_group_t *groupp;
+ timestruc_t to;
+
+ assert(consp);
+ D1(stderr, "t@%d vntsd_disconn_clients@%d\n", thr_self(),
+ consp->cons_no);
+
+ groupp = consp->group;
+ assert(groupp);
+
+
+ (void) mutex_lock(&consp->lock);
+
+ /* wait for all clients disconnect from the console */
+ while (consp->clientpq != NULL) {
+ consp->status |= VNTSD_CONS_SIG_WAIT;
+
+ /* signal client to disconnect the console */
+ (void) vntsd_que_walk(consp->clientpq,
+ (el_func_t)vntsd_notify_client_cons_del);
+
+ (void) thr_kill(consp->wr_tid, SIGUSR1);
+ to.tv_sec = VNTSD_CV_WAIT_DELTIME;
+ to.tv_nsec = 0;
+
+ /* wait for clients to disconnect */
+ (void) cond_reltimedwait(&consp->cvp, &consp->lock, &to);
+ }
+
+ (void) mutex_unlock(&consp->lock);
+
+ free_cons(consp);
+}
+
+/* search for a group whose console is being deleted */
+static boolean_t
+find_clean_cons_group(vntsd_group_t *groupp)
+{
+ if (groupp->status & VNTSD_GROUP_CLEAN_CONS) {
+ return (B_TRUE);
+ } else {
+ return (B_FALSE);
+ }
+}
+
+/* search for a console that is being deleted */
+static boolean_t
+find_clean_cons(vntsd_cons_t *consp)
+{
+ if (consp->status & VNTSD_CONS_DELETED) {
+ return (B_TRUE);
+ } else {
+ return (B_FALSE);
+ }
+}
+
+/* delete a console */
+void
+vntsd_delete_cons(vntsd_t *vntsdp)
+{
+ vntsd_group_t *groupp;
+ vntsd_cons_t *consp;
+
+ for (; ; ) {
+ /* get the group contains deleted console */
+ (void) mutex_lock(&vntsdp->lock);
+ groupp = vntsd_que_walk(vntsdp->grouppq,
+ (el_func_t)find_clean_cons_group);
+ if (groupp == NULL) {
+ /* no more group has console deleted */
+ (void) mutex_unlock(&vntsdp->lock);
+ return;
+ }
+ groupp->status &= ~VNTSD_GROUP_CLEAN_CONS;
+ (void) mutex_unlock(&vntsdp->lock);
+
+ for (; ; ) {
+ /* get the console to be deleted */
+ (void) mutex_lock(&groupp->lock);
+ assert(groupp->conspq);
+ consp = vntsd_que_walk(groupp->conspq,
+ (el_func_t)find_clean_cons);
+ if (consp == NULL) {
+ /* no more cons to delete */
+ (void) mutex_unlock(&groupp->lock);
+ break;
+ }
+
+ /* remove console from the group */
+ (void) vntsd_que_rm(&groupp->conspq, consp);
+ groupp->num_cons--;
+ (void) mutex_unlock(&groupp->lock);
+
+ /* clean up the console */
+ cleanup_cons(consp);
+
+ /* delete group? */
+ if (groupp->num_cons == 0) {
+ /* no more console delete it */
+ assert(groupp->vntsd);
+
+ (void) mutex_lock(&groupp->vntsd->lock);
+ (void) vntsd_que_rm(&groupp->vntsd->grouppq,
+ groupp);
+ (void) mutex_unlock(&groupp->vntsd->lock);
+
+ /* clean up the group */
+ vntsd_clean_group(groupp);
+ break;
+ }
+ }
+ }
+}
+
+/* clean up a group */
+void
+vntsd_clean_group(vntsd_group_t *groupp)
+{
+
+ timestruc_t to;
+
+ D1(stderr, "t@%d clean_group() group=%s tcp=%lld\n", thr_self(),
+ groupp->group_name, groupp->tcp_port);
+
+ (void) mutex_lock(&groupp->lock);
+
+ /* prevent from reentry */
+ if (groupp->status & VNTSD_GROUP_CLEANUP) {
+ (void) mutex_unlock(&groupp->lock);
+ return;
+ }
+ groupp->status |= VNTSD_GROUP_CLEANUP;
+ vntsd_free_que(&groupp->conspq, (clean_func_t)cleanup_cons);
+ (void) mutex_unlock(&groupp->lock);
+
+ /* walk through no cons client queue */
+ while (groupp->no_cons_clientpq != NULL) {
+ groupp->status |= VNTSD_GROUP_SIG_WAIT;
+ (void) vntsd_que_walk(groupp->no_cons_clientpq,
+ (el_func_t)vntsd_notify_client_cons_del);
+ to.tv_sec = VNTSD_CV_WAIT_DELTIME;
+ to.tv_nsec = 0;
+ (void) cond_reltimedwait(&groupp->cvp, &groupp->lock, &to);
+ }
+
+ if (groupp->listen_tid == thr_self()) {
+ /* listen thread is exiting */
+ (void) mutex_lock(&(groupp->vntsd->lock));
+ (void) vntsd_que_rm(&groupp->vntsd->grouppq, groupp);
+ (void) mutex_unlock(&groupp->vntsd->lock);
+
+ (void) cond_destroy(&groupp->cvp);
+ (void) mutex_unlock(&groupp->lock);
+ (void) mutex_destroy(&groupp->lock);
+ free(groupp);
+ return;
+ }
+
+ /* signal listen thread to exit */
+ groupp->status |= VNTSD_GROUP_SIG_WAIT;
+
+ while (groupp->status & VNTSD_GROUP_SIG_WAIT) {
+ (void) thr_kill(groupp->listen_tid, SIGUSR1);
+ to.tv_sec = VNTSD_CV_WAIT_DELTIME;
+ to.tv_nsec = 0;
+ /* wait listen thread to exit */
+ (void) cond_reltimedwait(&groupp->cvp, &groupp->lock, &to);
+ }
+
+ (void) mutex_unlock(&groupp->lock);
+ (void) thr_join(groupp->listen_tid, NULL, NULL);
+ /* free group */
+ (void) cond_destroy(&groupp->cvp);
+ (void) mutex_destroy(&groupp->lock);
+ free(groupp);
+}
+
+/* allocate and initialize console structure */
+static vntsd_cons_t *
+alloc_cons(vntsd_group_t *groupp, vcc_console_t *consolep)
+{
+ vntsd_cons_t *consp;
+ int rv;
+
+ /* allocate console */
+ consp = (vntsd_cons_t *)malloc(sizeof (vntsd_cons_t));
+ if (consp == NULL) {
+ vntsd_log(VNTSD_ERR_NO_MEM, "alloc_cons");
+ return (NULL);
+ }
+
+ /* intialize console */
+ bzero(consp, sizeof (vntsd_cons_t));
+
+ (void) mutex_init(&consp->lock, USYNC_THREAD|LOCK_ERRORCHECK, NULL);
+ (void) cond_init(&consp->cvp, USYNC_THREAD, NULL);
+
+ consp->cons_no = consolep->cons_no;
+ (void) strlcpy(consp->domain_name, consolep->domain_name, MAXPATHLEN);
+ (void) strlcpy(consp->dev_name, consolep->dev_name, MAXPATHLEN);
+ consp->wr_tid = (thread_t)-1;
+ consp->vcc_fd = (thread_t)-1;
+
+ /* join the group */
+ (void) mutex_lock(&groupp->lock);
+
+ if ((rv = vntsd_que_append(&groupp->conspq, consp)) !=
+ VNTSD_SUCCESS) {
+ (void) mutex_unlock(&groupp->lock);
+ vntsd_log(rv, "alloc_cons");
+ free_cons(consp);
+ return (NULL);
+ }
+ groupp->num_cons++;
+ consp->group = groupp;
+
+ (void) mutex_unlock(&groupp->lock);
+
+ D1(stderr, "t@%d alloc_cons@%d %s %s\n", thr_self(),
+ consp->cons_no, consp->domain_name, consp->dev_name);
+
+ return (consp);
+}
+
+/* compare tcp with group->tcp */
+static boolean_t
+grp_by_tcp(vntsd_group_t *groupp, uint64_t *tcp_port)
+{
+ assert(groupp);
+ assert(tcp_port);
+ return (groupp->tcp_port == *tcp_port);
+}
+
+/* allocate and initialize group */
+static vntsd_group_t *
+alloc_group(vntsd_t *vntsdp, char *group_name, uint64_t tcp_port)
+{
+ vntsd_group_t *groupp;
+
+ /* allocate group */
+ groupp = (vntsd_group_t *)malloc(sizeof (vntsd_group_t));
+ if (groupp == NULL) {
+ vntsd_log(VNTSD_ERR_NO_MEM, "alloc_group");
+ return (NULL);
+ }
+
+ /* initialize group */
+ bzero(groupp, sizeof (vntsd_group_t));
+
+ (void) mutex_init(&groupp->lock, USYNC_THREAD|LOCK_ERRORCHECK, NULL);
+ (void) cond_init(&groupp->cvp, USYNC_THREAD, NULL);
+
+ if (group_name != NULL) {
+ (void) memcpy(groupp->group_name, group_name, MAXPATHLEN);
+ }
+
+ groupp->tcp_port = tcp_port;
+ groupp->listen_tid = (thread_t)-1;
+ groupp->sockfd = (thread_t)-1;
+ groupp->vntsd = vntsdp;
+
+ D1(stderr, "t@%d alloc_group@%lld:%s\n", thr_self(), groupp->tcp_port,
+ groupp->group_name);
+
+ return (groupp);
+}
+
+/*
+ * Initialize a console, if console is associated with with a
+ * new group, intialize the group.
+ */
+static int
+alloc_cons_with_group(vntsd_t *vntsdp, vcc_console_t *consp,
+ vntsd_group_t **new_groupp)
+{
+ vntsd_group_t *groupp = NULL;
+ int rv;
+
+ *new_groupp = NULL;
+
+ /* match group by tcp port */
+
+
+ (void) mutex_lock(&vntsdp->lock);
+ groupp = vntsd_que_find(vntsdp->grouppq,
+ (compare_func_t)grp_by_tcp, (void *)&(consp->tcp_port));
+ (void) mutex_unlock(&vntsdp->lock);
+
+ if (groupp != NULL) {
+ /* group with same tcp port found */
+
+ if (strcmp(groupp->group_name, consp->group_name)) {
+ /* conflict group name */
+ vntsd_log(VNTSD_ERR_VCC_GRP_NAME,
+ "group name is different from existing group");
+ return (VNTSD_ERR_VCC_CTRL_DATA);
+ }
+
+ } else {
+ /* new group */
+ groupp = alloc_group(vntsdp, consp->group_name,
+ consp->tcp_port);
+ if (groupp == NULL) {
+ return (VNTSD_ERR_NO_MEM);
+ }
+
+ assert(groupp->conspq == NULL);
+ /* queue group to vntsdp */
+ (void) mutex_lock(&vntsdp->lock);
+ rv = vntsd_que_append(&vntsdp->grouppq, groupp);
+ (void) mutex_unlock(&vntsdp->lock);
+
+ if (rv != VNTSD_SUCCESS) {
+ return (rv);
+ }
+
+ *new_groupp = groupp;
+ }
+
+ /* intialize console */
+ if (alloc_cons(groupp, consp) == NULL) {
+ /* no memory */
+ if (new_groupp != NULL) {
+ /* clean up new group */
+ (void) cond_destroy(&groupp->cvp);
+ (void) mutex_destroy(&groupp->lock);
+ free(groupp);
+ }
+
+ return (VNTSD_ERR_NO_MEM);
+ }
+
+ return (VNTSD_SUCCESS);
+
+}
+
+
+/* create listen thread */
+static boolean_t
+create_listen_thread(vntsd_group_t *groupp)
+{
+
+ char err_msg[VNTSD_LINE_LEN];
+ int rv;
+
+ assert(groupp);
+
+ (void) mutex_lock(&groupp->lock);
+ assert(groupp->num_cons);
+
+ D1(stderr, "t@%d create_listen:%lld\n", thr_self(), groupp->tcp_port);
+
+ if ((rv = thr_create(NULL, 0, (thr_func_t)vntsd_listen_thread,
+ (void *)groupp, THR_DETACHED, &groupp->listen_tid))
+ != 0) {
+ (void) (void) snprintf(err_msg, sizeof (err_msg),
+ "Can not create listen thread for"
+ "group %s tcp %llx\n", groupp->group_name,
+ groupp->tcp_port);
+ vntsd_log(VNTSD_ERR_CREATE_LISTEN_THR, err_msg);
+
+ /* clean up group queue */
+ vntsd_free_que(&groupp->conspq, (clean_func_t)free_cons);
+ groupp->listen_tid = (thread_t)-1;
+ }
+
+ (void) mutex_unlock(&groupp->lock);
+
+ return (rv != 0);
+}
+
+/* delete a console if the console exists in the vntsd */
+static void
+delete_cons_before_add(vntsd_t *vntsdp, uint64_t tcp_port, uint_t cons_no)
+{
+ vntsd_group_t *groupp;
+ vntsd_cons_t *consp;
+
+ /* group exists? */
+ (void) mutex_lock(&vntsdp->lock);
+ groupp = vntsd_que_find(vntsdp->grouppq, (compare_func_t)grp_by_tcp,
+ (void *)&(tcp_port));
+ (void) mutex_unlock(&vntsdp->lock);
+
+ if (groupp == NULL) {
+ /* no such group */
+ return;
+ }
+
+ /* group exists, if console exists? */
+ (void) mutex_lock(&groupp->lock);
+ consp = vntsd_que_find(groupp->conspq,
+ (compare_func_t)vntsd_cons_by_consno, &cons_no);
+
+ if (consp == NULL) {
+ /* no such console */
+ (void) mutex_unlock(&groupp->lock);
+ return;
+ }
+ /* console exists - delete console */
+
+ (void) mutex_lock(&consp->lock);
+
+ consp->status |= VNTSD_CONS_DELETED;
+ groupp->status |= VNTSD_GROUP_CLEAN_CONS;
+
+ (void) mutex_unlock(&consp->lock);
+
+ (void) mutex_unlock(&groupp->lock);
+
+ vntsd_delete_cons(vntsdp);
+}
+
+/* add a console */
+static void
+do_add_cons(vntsd_t *vntsdp, int cons_no)
+{
+ vcc_console_t console;
+ vntsd_group_t *groupp;
+ int rv;
+ char err_msg[VNTSD_LINE_LEN];
+
+
+ (void) snprintf(err_msg, sizeof (err_msg),
+ "do_add_cons():Can not add console=%d", cons_no);
+
+ /* get console configuration from vcc */
+
+ if ((rv = vntsd_vcc_ioctl(VCC_CONS_INFO, cons_no, (void *)&console))
+ != VNTSD_SUCCESS) {
+ vntsd_log(rv, err_msg);
+ return;
+ }
+
+ /* clean up the console if console was deleted and added again */
+ delete_cons_before_add(vntsdp, console.tcp_port, console.cons_no);
+
+ /* initialize console */
+
+ if ((rv = alloc_cons_with_group(vntsdp, &console, &groupp)) !=
+ VNTSD_SUCCESS) {
+ /* no memory to add this new console */
+ vntsd_log(rv, err_msg);
+ return;
+ }
+
+ if (groupp != NULL) {
+ /* new group */
+ /* create listen thread for this console */
+ if (create_listen_thread(groupp)) {
+ vntsd_log(VNTSD_ERR_CREATE_LISTEN_THR, err_msg);
+ (void) cond_destroy(&groupp->cvp);
+ (void) mutex_destroy(&groupp->lock);
+ free(groupp);
+ }
+
+ }
+}
+
+/* daemon wake up */
+void
+vntsd_daemon_wakeup(vntsd_t *vntsdp)
+{
+
+ vcc_response_t inq_data;
+
+ /* reason to wake up */
+ if (vntsd_vcc_ioctl(VCC_INQUIRY, 0, (void *)&inq_data) !=
+ VNTSD_SUCCESS) {
+ vntsd_log(VNTSD_ERR_VCC_IOCTL, "vntsd_daemon_wakeup()");
+ return;
+ }
+
+ D1(stderr, "t@%d vntsd_daemon_wakup:msg %d port %x\n", thr_self(),
+ inq_data.reason, inq_data.cons_no);
+
+ switch (inq_data.reason) {
+
+ case VCC_CONS_ADDED:
+ do_add_cons(vntsdp, inq_data.cons_no);
+ break;
+
+ default:
+ DERR(stderr, "t@%d daemon_wakeup:ioctl_unknown %d\n",
+ thr_self(), inq_data.reason);
+ vntsd_log(VNTSD_ERR_UNKNOWN_CMD, "from vcc\n");
+ break;
+ }
+}
+
+/* initial console configuration */
+void
+vntsd_get_config(vntsd_t *vntsdp)
+{
+
+ int i;
+ int num_cons;
+ vcc_console_t *consp;
+ vntsd_group_t *groupp;
+
+ /* num of consoles */
+ num_cons = 0;
+
+ if (vntsd_vcc_ioctl(VCC_NUM_CONSOLE, 0, (void *)&num_cons) !=
+ VNTSD_SUCCESS) {
+ vntsd_log(VNTSD_ERR_VCC_IOCTL, "VCC_NUM_CONSOLE failed\n");
+ return;
+ }
+
+ D3(stderr, "get_config:num_cons=%d", num_cons);
+
+ if (num_cons == 0) {
+ return;
+ }
+
+ /* allocate memory for all consoles */
+ consp = malloc(num_cons*sizeof (vcc_console_t));
+
+ if (consp == NULL) {
+ vntsd_log(VNTSD_ERR_NO_MEM, "for console table.");
+ return;
+ }
+
+ /* get console table */
+ if (vntsd_vcc_ioctl(VCC_CONS_TBL, 0, (void *)consp) != VNTSD_SUCCESS) {
+ vntsd_log(VNTSD_ERR_VCC_IOCTL, " VCC_CONS_TBL "
+ "for console table\n");
+ return;
+ }
+
+ /* intialize groups and consoles */
+ for (i = 0; i < num_cons; i++) {
+ if (alloc_cons_with_group(vntsdp, &consp[i], &groupp)
+ != VNTSD_SUCCESS) {
+ vntsd_log(VNTSD_ERR_ADD_CONS_FAILED, "get_config");
+ }
+ }
+
+ /* create listen thread for each group */
+ (void) mutex_lock(&vntsdp->lock);
+
+ for (; ; ) {
+ groupp = vntsd_que_walk(vntsdp->grouppq,
+ (el_func_t)create_listen_thread);
+ if (groupp == NULL) {
+ break;
+ }
+ vntsd_log(VNTSD_ERR_CREATE_LISTEN_THR, "get config()");
+ }
+
+ (void) mutex_unlock(&vntsdp->lock);
+}
diff --git a/usr/src/cmd/vntsd/write.c b/usr/src/cmd/vntsd/write.c
new file mode 100644
index 0000000000..16f07029c5
--- /dev/null
+++ b/usr/src/cmd/vntsd/write.c
@@ -0,0 +1,251 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * write thread - read from vcc console and write to tcp client. There are one
+ * writer and multiple readers per console. The first client who connects to
+ * a console get write access.
+ * Writer thread writes vcc data to all tcp clients that connected to
+ * the console.
+ */
+
+#include <stdio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <thread.h>
+#include <synch.h>
+#include <signal.h>
+#include <assert.h>
+#include <poll.h>
+#include <syslog.h>
+#include <libintl.h>
+#include "vntsd.h"
+#include "chars.h"
+
+/*
+ * check the state of write thread. exit if no more client connects to the
+ * console.
+ */
+static void
+write_chk_status(vntsd_cons_t *consp, int status)
+{
+
+ if ((consp->status & VNTSD_CONS_DELETED) || (consp->clientpq == NULL)) {
+ thr_exit(0);
+ }
+
+ switch (status) {
+ case VNTSD_STATUS_VCC_IO_ERR:
+ assert(consp->group != NULL);
+ if (vntsd_vcc_err(consp) != VNTSD_STATUS_CONTINUE) {
+ thr_exit(0);
+ }
+ break;
+ case VNTSD_STATUS_INTR:
+ thr_exit(0);
+ default:
+ break;
+
+ }
+}
+
+/*
+ * skip_terminal_null()
+ * scan terminal null character sequence (0x5e 0x40)
+ * return number of characters in the buf after skipping terminal null
+ * sequence.
+ */
+static int
+skip_terminal_null(char *buf, int buf_sz, int sz)
+{
+ int i, j;
+ static int term_null_seq = 0;
+
+ assert(sz >= 0);
+
+ if (buf_sz < sz+1) {
+ return (-1);
+ }
+
+ if (term_null_seq) {
+ /* skip 0x5e previously */
+ term_null_seq = 0;
+
+ if (buf[0] != 0x40) {
+ /* not terminal null sequence put 0x5e back */
+ for (i = sz; i > 0; i--) {
+ buf[i] = buf[i-1];
+ }
+
+ buf[0] = 0x5e;
+
+ sz++;
+ } else {
+ /* skip terminal null sequence */
+ sz--;
+
+ if (sz == 0) {
+ return (sz);
+ }
+
+ for (i = 0; i < sz; i++) {
+ buf[i] = buf[i+1];
+ }
+ }
+ }
+
+ for (; ; ) {
+ for (i = 0; i < sz; i++) {
+ if (buf[i] == '\0') {
+ return (i);
+ }
+
+ if (buf[i] == 0x5e) {
+ /* possible terminal null sequence */
+ if (i == sz -1) {
+ /* last character in buffer */
+ term_null_seq = 1;
+ sz--;
+ buf[i] = 0;
+ return (sz);
+ }
+
+ if (buf[i+1] == 0x40) {
+ /* found terminal null sequence */
+ sz -= 2;
+ for (j = i; j < sz -i; j++) {
+ buf[j] = buf[j+2];
+ }
+ break;
+ }
+
+ if (buf[i+1] == '\0') {
+ buf[i] = 0;
+ term_null_seq = 1;
+ return (i);
+ }
+
+ }
+ }
+
+ if (i == sz) {
+ /* end of scan */
+ return (sz);
+ }
+ }
+}
+
+/* read data from vcc */
+static int
+read_vcc(vntsd_cons_t *consp, char *buf, ssize_t *sz)
+{
+ /* read from vcc */
+ *sz = read(consp->vcc_fd, buf, VNTSD_MAX_BUF_SIZE);
+
+ if (errno == EINTR) {
+ return (VNTSD_STATUS_INTR);
+ }
+
+ if ((*sz > 0)) {
+ return (VNTSD_SUCCESS);
+ }
+ return (VNTSD_STATUS_VCC_IO_ERR);
+}
+
+static int s_sz;
+/* write to a client */
+static boolean_t
+write_all_clients(vntsd_client_t *clientp, char *buf)
+{
+ int rv;
+
+ rv = vntsd_write_client(clientp, buf, s_sz);
+ if (rv != VNTSD_SUCCESS) {
+ (void) mutex_lock(&clientp->lock);
+ clientp->status |= VNTSD_CLIENT_IO_ERR;
+ assert(clientp->cons);
+ (void) thr_kill(clientp->cons_tid, NULL);
+ (void) mutex_unlock(&clientp->lock);
+ }
+ return (B_FALSE);
+
+}
+
+/* vntsd_write_thread() */
+void*
+vntsd_write_thread(vntsd_cons_t *consp)
+{
+ char buf[VNTSD_MAX_BUF_SIZE+1];
+ int sz;
+ int rv;
+
+ D1(stderr, "t@%d vntsd_write@%d\n", thr_self(), consp->vcc_fd);
+
+ assert(consp);
+ write_chk_status(consp, VNTSD_SUCCESS);
+
+ for (; ; ) {
+ bzero(buf, VNTSD_MAX_BUF_SIZE +1);
+
+ /* read data */
+ rv = read_vcc(consp, buf, &sz);
+
+ write_chk_status(consp, rv);
+
+ if (sz <= 0) {
+ continue;
+ }
+
+ /* has data */
+ if ((s_sz = skip_terminal_null(buf, sz+1, sz)) == 0) {
+ /* terminal null sequence */
+ continue;
+ }
+
+ assert(s_sz > 0);
+
+ /*
+ * output data to all clients connected
+ * to this console
+ */
+
+ (void) mutex_lock(&consp->lock);
+ (void) vntsd_que_find(consp->clientpq,
+ (compare_func_t)write_all_clients, buf);
+ (void) mutex_unlock(&consp->lock);
+
+ write_chk_status(consp, VNTSD_SUCCESS);
+
+ }
+
+ /*NOTREACHED*/
+ return (NULL);
+}
diff --git a/usr/src/common/mdesc/mdesc_diff.c b/usr/src/common/mdesc/mdesc_diff.c
new file mode 100644
index 0000000000..28f55abc92
--- /dev/null
+++ b/usr/src/common/mdesc/mdesc_diff.c
@@ -0,0 +1,602 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else /* _KERNEL */
+#include <string.h>
+#include <strings.h>
+#endif /* _KERNEL */
+#include <sys/note.h>
+#include <sys/mdesc.h>
+#include <sys/mdesc_impl.h>
+
+#define MDD_FREE_CHECK(mdp, ptr, sz) \
+ do { \
+ if (ptr) mdp->freep(ptr, sz); \
+ _NOTE(CONSTCOND) } while (0)
+
+#define MD_DIFF_MAGIC 0x4D445F4449464621ull /* 'MD_DIFF!' */
+#define MD_DIFF_NOMATCH (-1)
+#define MD_DIFF_MATCH (1)
+
+typedef struct {
+ mde_cookie_t *mdep;
+ uint_t nelem;
+} md_diff_t;
+
+typedef struct {
+ uint64_t mdd_magic;
+ md_diff_t added;
+ md_diff_t removed;
+ md_diff_t match1;
+ md_diff_t match2;
+ void *(*allocp)(size_t);
+ void (*freep)(void *, size_t);
+} md_diff_impl_t;
+
+/*
+ * Internal utility functions
+ */
+static int mdd_scan_for_nodes(md_t *mdp, mde_cookie_t start,
+ char *compnodep, int *countp, mde_cookie_t **nodespp);
+
+static boolean_t mdd_any_dup_nodes(md_impl_t *mdp, md_prop_match_t *pmp,
+ int count, mde_cookie_t *nodesp);
+
+static int mdd_node_list_match(md_impl_t *md1, md_impl_t *md2,
+ md_element_t *match_nodep, mde_cookie_t *match_listp,
+ uint8_t *match_seenp, int start, int end, md_prop_match_t *match_elemsp);
+
+static int mdd_node_compare(md_impl_t *mdap, md_impl_t *mdbp,
+ md_element_t *nodeap, md_element_t *nodebp, md_prop_match_t *match_elemsp);
+
+/*
+ * Given two DAGs and information about how to uniquely identify
+ * the nodes of interest, determine which nodes have been added
+ * to the second MD, removed from the first MD, or exist in both
+ * MDs. This information is recorded and can be accessed using the
+ * opaque cookie returned to the caller.
+ */
+md_diff_cookie_t
+md_diff_init(md_t *md1p, mde_cookie_t start1, md_t *md2p, mde_cookie_t start2,
+ char *compnodep, md_prop_match_t *match_fieldsp)
+{
+ int idx;
+ md_impl_t *md1 = (md_impl_t *)md1p;
+ md_impl_t *md2 = (md_impl_t *)md2p;
+ mde_cookie_t *md1nodesp = NULL;
+ mde_cookie_t *md2nodesp = NULL;
+ int md1count = 0;
+ int md2count = 0;
+ uint8_t *seenp = NULL;
+
+ /* variables used to gather results */
+ md_diff_impl_t *diff_res;
+ mde_cookie_t *mde_add_scr;
+ mde_cookie_t *mde_rem_scr;
+ mde_cookie_t *mde_match1_scr;
+ mde_cookie_t *mde_match2_scr;
+ int nadd = 0;
+ int nrem = 0;
+ int nmatch = 0;
+
+ /* sanity check params */
+ if ((md1p == NULL) || (md2p == NULL))
+ return (MD_INVAL_DIFF_COOKIE);
+
+ if ((start1 == MDE_INVAL_ELEM_COOKIE) ||
+ (start2 == MDE_INVAL_ELEM_COOKIE))
+ return (MD_INVAL_DIFF_COOKIE);
+
+ if ((compnodep == NULL) || (match_fieldsp == NULL))
+ return (MD_INVAL_DIFF_COOKIE);
+
+ /*
+ * Prepare an array of the matching nodes from the first MD.
+ */
+ if (mdd_scan_for_nodes(md1p,
+ start1, compnodep, &md1count, &md1nodesp) == -1)
+ return (MD_INVAL_DIFF_COOKIE);
+
+ /* sanity check that all nodes are unique */
+ if (md1nodesp &&
+ mdd_any_dup_nodes(md1, match_fieldsp, md1count, md1nodesp)) {
+ MDD_FREE_CHECK(md1, md1nodesp, sizeof (mde_cookie_t) *
+ md1count);
+ return (MD_INVAL_DIFF_COOKIE);
+ }
+
+
+ /*
+ * Prepare an array of the matching nodes from the second MD.
+ */
+ if (mdd_scan_for_nodes(md2p,
+ start2, compnodep, &md2count, &md2nodesp) == -1)
+ return (MD_INVAL_DIFF_COOKIE);
+
+ /* sanity check that all nodes are unique */
+ if (md2nodesp &&
+ mdd_any_dup_nodes(md2, match_fieldsp, md2count, md2nodesp)) {
+ MDD_FREE_CHECK(md1, md1nodesp, sizeof (mde_cookie_t) *
+ md1count);
+ MDD_FREE_CHECK(md2, md2nodesp, sizeof (mde_cookie_t) *
+ md2count);
+ return (MD_INVAL_DIFF_COOKIE);
+ }
+
+ /* setup our result structure */
+ diff_res = md1->allocp(sizeof (md_diff_impl_t));
+ bzero(diff_res, sizeof (md_diff_impl_t));
+ diff_res->allocp = md1->allocp;
+ diff_res->freep = md1->freep;
+ diff_res->mdd_magic = MD_DIFF_MAGIC;
+
+ /*
+ * Special cases for empty lists
+ */
+ if ((md1count == 0) && (md2count != 0)) {
+ /* all the nodes found were added */
+ diff_res->added.mdep = md2nodesp;
+ diff_res->added.nelem = md2count;
+ return ((mde_cookie_t)diff_res);
+ }
+
+ if ((md1count != 0) && (md2count == 0)) {
+ /* all the nodes found were removed */
+ diff_res->removed.mdep = md1nodesp;
+ diff_res->removed.nelem = md1count;
+ return ((mde_cookie_t)diff_res);
+ }
+
+ if ((md1count == 0) && (md2count == 0))
+ /* no nodes found */
+ return ((mde_cookie_t)diff_res);
+
+ /*
+ * Both lists have some elements. Allocate some scratch
+ * buffers to sort them into our three categories, added,
+ * removed, and matched pairs.
+ */
+ mde_add_scr = diff_res->allocp(sizeof (mde_cookie_t) * md2count);
+ mde_rem_scr = diff_res->allocp(sizeof (mde_cookie_t) * md1count);
+ mde_match1_scr = diff_res->allocp(sizeof (mde_cookie_t) * md1count);
+ mde_match2_scr = diff_res->allocp(sizeof (mde_cookie_t) * md2count);
+
+ /* array of seen flags only needed for md2 */
+ seenp = (uint8_t *)diff_res->allocp(sizeof (uint8_t) * md2count);
+ bzero(seenp, sizeof (uint8_t) * md2count);
+
+ /*
+ * Make a pass through the md1 node array. Make note of
+ * any nodes not in the md2 array, indicating that they
+ * have been removed. Also keep track of the nodes that
+ * are present in both arrays for the matched pair results.
+ */
+ for (idx = 0; idx < md1count; idx++) {
+
+ md_element_t *elem = &(md1->mdep[md1nodesp[idx]]);
+
+ int match = mdd_node_list_match(md1, md2, elem, md2nodesp,
+ seenp, 0, md2count - 1, match_fieldsp);
+
+ if (match == MD_DIFF_NOMATCH)
+ /* record deleted node */
+ mde_rem_scr[nrem++] = md1nodesp[idx];
+ else {
+ /* record matched node pair */
+ mde_match1_scr[nmatch] = md1nodesp[idx];
+ mde_match2_scr[nmatch] = md2nodesp[match];
+ nmatch++;
+
+ /* mark that this match has been recorded */
+ seenp[match] = 1;
+ }
+ }
+
+ /*
+ * Make a pass through the md2 array. Any nodes that have
+ * not been marked as seen have been added.
+ */
+ for (idx = 0; idx < md2count; idx++) {
+ if (!seenp[idx])
+ /* record added node */
+ mde_add_scr[nadd++] = md2nodesp[idx];
+ }
+
+ /* fill in the added node list */
+ if (nadd) {
+ int addsz = sizeof (mde_cookie_t) * nadd;
+ diff_res->added.mdep = (mde_cookie_t *)diff_res->allocp(addsz);
+
+ bcopy(mde_add_scr, diff_res->added.mdep, addsz);
+
+ diff_res->added.nelem = nadd;
+ }
+
+ /* fill in the removed node list */
+ if (nrem) {
+ int remsz = sizeof (mde_cookie_t) * nrem;
+ diff_res->removed.mdep =
+ (mde_cookie_t *)diff_res->allocp(remsz);
+
+ bcopy(mde_rem_scr, diff_res->removed.mdep, remsz);
+ diff_res->removed.nelem = nrem;
+ }
+
+ /* fill in the matching node lists */
+ if (nmatch) {
+ int matchsz = sizeof (mde_cookie_t) * nmatch;
+ diff_res->match1.mdep =
+ (mde_cookie_t *)diff_res->allocp(matchsz);
+ diff_res->match2.mdep =
+ (mde_cookie_t *)diff_res->allocp(matchsz);
+
+ bcopy(mde_match1_scr, diff_res->match1.mdep, matchsz);
+ bcopy(mde_match2_scr, diff_res->match2.mdep, matchsz);
+ diff_res->match1.nelem = nmatch;
+ diff_res->match2.nelem = nmatch;
+ }
+
+ /* clean up */
+ md1->freep(md1nodesp, sizeof (mde_cookie_t) * md1count);
+ md2->freep(md2nodesp, sizeof (mde_cookie_t) * md2count);
+
+ diff_res->freep(mde_add_scr, sizeof (mde_cookie_t) * md2count);
+ diff_res->freep(mde_rem_scr, sizeof (mde_cookie_t) * md1count);
+ diff_res->freep(mde_match1_scr, sizeof (mde_cookie_t) * md1count);
+ diff_res->freep(mde_match2_scr, sizeof (mde_cookie_t) * md2count);
+
+ diff_res->freep(seenp, sizeof (uint8_t) * md2count);
+
+ return ((md_diff_cookie_t)diff_res);
+}
+
+/*
+ * Returns an array of the nodes added to the second MD in a
+ * previous md_diff_init() call. Returns the number of elements
+ * in the returned array. If the value is zero, the pointer
+ * passed back will be NULL.
+ */
+int
+md_diff_added(md_diff_cookie_t mdd, mde_cookie_t **mde_addedp)
+{
+ md_diff_impl_t *mddp = (md_diff_impl_t *)mdd;
+
+ if ((mddp == NULL) || (mddp->mdd_magic != MD_DIFF_MAGIC))
+ return (-1);
+
+ *mde_addedp = mddp->added.mdep;
+
+ return (mddp->added.nelem);
+}
+
+/*
+ * Returns an array of the nodes removed from the first MD in a
+ * previous md_diff_init() call. Returns the number of elements
+ * in the returned array. If the value is zero, the pointer
+ * passed back will be NULL.
+ */
+int
+md_diff_removed(md_diff_cookie_t mdd, mde_cookie_t **mde_removedp)
+{
+ md_diff_impl_t *mddp = (md_diff_impl_t *)mdd;
+
+ if ((mddp == NULL) || (mddp->mdd_magic != MD_DIFF_MAGIC))
+ return (-1);
+
+ *mde_removedp = mddp->removed.mdep;
+
+ return (mddp->removed.nelem);
+}
+
+/*
+ * Returns a pair of parallel arrays that contain nodes that were
+ * considered matching based on the match criteria passed in to
+ * a previous md_diff_init() call. Returns the number of elements
+ * in the arrays. If the value is zero, both pointers passed back
+ * will be NULL.
+ */
+int
+md_diff_matched(md_diff_cookie_t mdd, mde_cookie_t **mde_match1p,
+ mde_cookie_t **mde_match2p)
+{
+ md_diff_impl_t *mddp = (md_diff_impl_t *)mdd;
+
+ if ((mddp == NULL) || (mddp->mdd_magic != MD_DIFF_MAGIC))
+ return (-1);
+
+ *mde_match1p = mddp->match1.mdep;
+ *mde_match2p = mddp->match2.mdep;
+
+ return (mddp->match1.nelem);
+}
+
+/*
+ * Deallocate any storage used to store results of a previous
+ * md_diff_init() call. Returns 0 on success and -1 on failure.
+ */
+int
+md_diff_fini(md_diff_cookie_t mdd)
+{
+ md_diff_impl_t *mddp = (md_diff_impl_t *)mdd;
+
+ if ((mddp == NULL) || (mddp->mdd_magic != MD_DIFF_MAGIC))
+ return (-1);
+
+ mddp->mdd_magic = 0;
+
+ MDD_FREE_CHECK(mddp, mddp->added.mdep, mddp->added.nelem *
+ sizeof (mde_cookie_t));
+
+ MDD_FREE_CHECK(mddp, mddp->removed.mdep, mddp->removed.nelem *
+ sizeof (mde_cookie_t));
+
+ MDD_FREE_CHECK(mddp, mddp->match1.mdep, mddp->match1.nelem *
+ sizeof (mde_cookie_t));
+
+ MDD_FREE_CHECK(mddp, mddp->match2.mdep, mddp->match2.nelem *
+ sizeof (mde_cookie_t));
+
+ mddp->freep(mddp, sizeof (md_diff_impl_t));
+
+ return (0);
+}
+
+/*
+ * Walk the "fwd" DAG in an MD and return an array of nodes that are
+ * of the specified type. The start param is used to start the walk
+ * from an arbitrary location in the DAG. Returns an array of nodes
+ * as well as a count of the number of nodes in the array. If the
+ * count is zero, the node pointer will be passed back as NULL.
+ *
+ * Returns: 0 success; -1 failure
+ */
+static int
+mdd_scan_for_nodes(md_t *mdp,
+ mde_cookie_t start, char *compnodep, int *countp, mde_cookie_t **nodespp)
+{
+ mde_str_cookie_t cname;
+ mde_str_cookie_t aname;
+ md_impl_t *mdip = (md_impl_t *)mdp;
+
+ if (mdip == NULL)
+ return (-1);
+
+ cname = md_find_name(mdp, compnodep);
+ aname = md_find_name(mdp, "fwd");
+
+ /* get the number of nodes of interest in the DAG */
+ *countp = md_scan_dag(mdp, start, cname, aname, NULL);
+ if (*countp == 0) {
+ *nodespp = NULL;
+ return (0);
+ }
+
+ /* allocate the storage */
+ *nodespp = mdip->allocp(sizeof (mde_cookie_t) * (*countp));
+
+ /* populate our array with the matching nodes */
+ (void) md_scan_dag(mdp, start, cname, aname, *nodespp);
+
+ return (0);
+}
+
+/*
+ * Walk an array of nodes and check if there are any duplicate
+ * nodes. A duplicate is determined based on the specified match
+ * criteria. Returns B_TRUE if there are any duplicates and B_FALSE
+ * otherwise.
+ */
+static boolean_t
+mdd_any_dup_nodes(md_impl_t *mdp, md_prop_match_t *pmp, int count,
+ mde_cookie_t *nodesp)
+{
+ int idx;
+ int match;
+ md_element_t *elem;
+
+ ASSERT(count > 0 || nodesp == NULL);
+
+ for (idx = 0; idx < count; idx++) {
+ elem = &(mdp->mdep[nodesp[idx]]);
+
+ match = mdd_node_list_match(mdp, mdp, elem, nodesp, NULL,
+ idx + 1, count - 1, pmp);
+
+ if (match != MD_DIFF_NOMATCH)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Given a node and a array of nodes, compare the node to all elements
+ * in the specified start-end range of the array. If the node matches
+ * one of the nodes in the array, return the index of that node. Otherwise
+ * return MD_DIFF_NOMATCH.
+ *
+ * The optional seen array parameter can be used to optimize repeated
+ * calls to this function. If the seen array indicates that an element
+ * has already been matched, the full comparison is not necessary.
+ */
+static int
+mdd_node_list_match(md_impl_t *md1, md_impl_t *md2, md_element_t *match_nodep,
+ mde_cookie_t *match_listp, uint8_t *match_seenp, int start, int end,
+ md_prop_match_t *match_elemsp)
+{
+ int match;
+ int idx;
+ md_element_t *elem;
+
+ for (idx = start; idx <= end; idx++) {
+
+ if ((match_seenp != NULL) && (match_seenp[idx]))
+ continue;
+
+ elem = &(md2->mdep[match_listp[idx]]);
+
+ match = mdd_node_compare(md1, md2, match_nodep, elem,
+ match_elemsp);
+ if (match == MD_DIFF_MATCH)
+ return (idx);
+ }
+
+ return (MD_DIFF_NOMATCH);
+}
+
+/*
+ * Given two nodes and a list of properties, compare the nodes.
+ * A match is concluded if both nodes have all of the specified
+ * properties and all the values of those properties are the
+ * same. Returns MD_DIFF_NOMATCH if the nodes do not match and
+ * MD_DIFF_MATCH otherwise.
+ */
+static int
+mdd_node_compare(md_impl_t *mdap, md_impl_t *mdbp, md_element_t *nodeap,
+ md_element_t *nodebp, md_prop_match_t *match_elemsp)
+{
+ md_element_t *ap;
+ md_element_t *bp;
+ boolean_t nodea_interest;
+ boolean_t nodeb_interest;
+ int idx;
+
+ /* make sure we are starting at the beginning of the nodes */
+ if ((MDE_TAG(nodeap) != MDET_NODE) || (MDE_TAG(nodebp) != MDET_NODE))
+ return (MD_DIFF_NOMATCH);
+
+ for (idx = 0; match_elemsp[idx].type != MDET_LIST_END; idx++) {
+
+ int type;
+
+ nodea_interest = B_FALSE;
+ nodeb_interest = B_FALSE;
+
+ type = match_elemsp[idx].type;
+
+ /*
+ * Check node A for the property of interest
+ */
+ for (ap = nodeap; MDE_TAG(ap) != MDET_NODE_END; ap++) {
+ char *elemname;
+
+ if (MDE_TAG(ap) != type)
+ continue;
+
+ elemname = mdap->namep + MDE_NAME(ap);
+
+ if (strcmp(elemname, match_elemsp[idx].namep) == 0) {
+ /* found the property of interest */
+ nodea_interest = B_TRUE;
+ break;
+ }
+ }
+
+ /* node A is not of interest */
+ if (!nodea_interest)
+ return (MD_DIFF_NOMATCH);
+
+ /*
+ * Check node B for the property of interest
+ */
+ for (bp = nodebp; MDE_TAG(bp) != MDET_NODE_END; bp++) {
+ char *elemname;
+
+ if (MDE_TAG(bp) != type)
+ continue;
+
+ elemname = mdbp->namep + MDE_NAME(bp);
+
+ if (strcmp(elemname, match_elemsp[idx].namep) == 0) {
+ nodeb_interest = B_TRUE;
+ break;
+ }
+ }
+
+ /* node B is not of interest */
+ if (!nodeb_interest)
+ return (MD_DIFF_NOMATCH);
+
+ /*
+ * Both nodes have the property of interest. The
+ * nodes are not a match unless the value of that
+ * property match
+ */
+ switch (type) {
+ case MDET_PROP_VAL:
+ if (MDE_PROP_VALUE(ap) != MDE_PROP_VALUE(bp))
+ return (MD_DIFF_NOMATCH);
+ break;
+
+ case MDET_PROP_STR: {
+ char *stra = (char *)(mdap->datap +
+ MDE_PROP_DATA_OFFSET(ap));
+ char *strb = (char *)(mdbp->datap +
+ MDE_PROP_DATA_OFFSET(bp));
+
+ if (strcmp(stra, strb) != 0)
+ return (MD_DIFF_NOMATCH);
+ break;
+ }
+
+ case MDET_PROP_DAT: {
+
+ caddr_t dataa;
+ caddr_t datab;
+
+ if (MDE_PROP_DATA_LEN(ap) != MDE_PROP_DATA_LEN(bp))
+ return (MD_DIFF_NOMATCH);
+
+ dataa = (caddr_t)(mdap->datap +
+ MDE_PROP_DATA_OFFSET(ap));
+ datab = (caddr_t)(mdbp->datap +
+ MDE_PROP_DATA_OFFSET(bp));
+
+ if (memcmp(dataa, datab, MDE_PROP_DATA_LEN(ap)) != 0)
+ return (MD_DIFF_NOMATCH);
+
+ break;
+ }
+
+ default:
+ /* unsupported prop type */
+ return (MD_DIFF_NOMATCH);
+ }
+ }
+
+ /*
+ * All the specified properties exist in both
+ * nodes and have the same value. The two nodes
+ * match.
+ */
+
+ return (MD_DIFF_MATCH);
+}
diff --git a/usr/src/common/mdesc/mdesc_fini.c b/usr/src/common/mdesc/mdesc_fini.c
index f0b010b386..70340e0b8e 100644
--- a/usr/src/common/mdesc/mdesc_fini.c
+++ b/usr/src/common/mdesc/mdesc_fini.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -30,6 +30,10 @@
#include <sys/mdesc.h>
#include <sys/mdesc_impl.h>
+/*
+ * Cleanup the internal MD structure. Does not
+ * deallocate the buffer holding the MD.
+ */
int
md_fini(md_t *ptr)
{
diff --git a/usr/src/common/mdesc/mdesc_getbinsize.c b/usr/src/common/mdesc/mdesc_getbinsize.c
new file mode 100644
index 0000000000..e672806b1f
--- /dev/null
+++ b/usr/src/common/mdesc/mdesc_getbinsize.c
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mdesc.h>
+#include <sys/mdesc_impl.h>
+
+size_t
+md_get_bin_size(md_t *ptr)
+{
+ md_impl_t *mdp;
+
+ mdp = (md_impl_t *)ptr;
+
+ if (mdp == NULL)
+ return (0);
+
+ return (mdp->size);
+}
diff --git a/usr/src/common/mdesc/mdesc_getgen.c b/usr/src/common/mdesc/mdesc_getgen.c
new file mode 100644
index 0000000000..691343d772
--- /dev/null
+++ b/usr/src/common/mdesc/mdesc_getgen.c
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mdesc.h>
+#include <sys/mdesc_impl.h>
+
+uint64_t
+md_get_gen(md_t *ptr)
+{
+ md_impl_t *mdp;
+
+ mdp = (md_impl_t *)ptr;
+
+ if (mdp == NULL)
+ return (MDESC_INVAL_GEN);
+
+ return (mdp->gen);
+}
diff --git a/usr/src/common/mdesc/mdesc_init_intern.c b/usr/src/common/mdesc/mdesc_init_intern.c
index d4a9226e9e..c2bad6def2 100644
--- a/usr/src/common/mdesc/mdesc_init_intern.c
+++ b/usr/src/common/mdesc/mdesc_init_intern.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -32,23 +32,25 @@
#include <sys/mdesc_impl.h>
md_t *
-md_init_intern(uint64_t *ptr, void *(*allocp)(size_t),
- void (*freep)(void *, size_t))
+md_init_intern(uint64_t *ptr, void *(*allocp)(size_t),
+ void (*freep)(void *, size_t))
{
md_impl_t *mdp;
int idx;
int count;
int done;
+ uint64_t gen;
mde_str_cookie_t root_name;
/*
* Very basic checkup for alignment to avoid
* bus error issues.
*/
- if ((((uintptr_t)ptr)&7) != 0)
+ if ((((uintptr_t)ptr) & 7) != 0)
return (NULL);
mdp = (md_impl_t *)allocp(sizeof (md_impl_t));
+
if (mdp == NULL)
return (NULL);
@@ -60,6 +62,7 @@ md_init_intern(uint64_t *ptr, void *(*allocp)(size_t),
/*
* setup internal structures
*/
+
mdp->headerp = (md_header_t *)mdp->caddr;
if (mdtoh32(mdp->headerp->transport_version) != MD_TRANSPORT_VERSION) {
@@ -70,13 +73,13 @@ md_init_intern(uint64_t *ptr, void *(*allocp)(size_t),
mdp->name_blk_size = mdtoh32(mdp->headerp->name_blk_sz);
mdp->data_blk_size = mdtoh32(mdp->headerp->data_blk_sz);
- mdp->size = MD_HEADER_SIZE+mdp->node_blk_size+
- mdp->name_blk_size+mdp->data_blk_size;
+ mdp->size = MD_HEADER_SIZE + mdp->node_blk_size +
+ mdp->name_blk_size + mdp->data_blk_size;
- mdp->mdep = (md_element_t *)(mdp->caddr+MD_HEADER_SIZE);
- mdp->namep = (char *)(mdp->caddr+MD_HEADER_SIZE+mdp->node_blk_size);
- mdp->datap = (uint8_t *)(mdp->caddr+MD_HEADER_SIZE+mdp->name_blk_size+
- mdp->node_blk_size);
+ mdp->mdep = (md_element_t *)(mdp->caddr + MD_HEADER_SIZE);
+ mdp->namep = (char *)(mdp->caddr + MD_HEADER_SIZE + mdp->node_blk_size);
+ mdp->datap = (uint8_t *)(mdp->caddr + MD_HEADER_SIZE +
+ mdp->name_blk_size + mdp->node_blk_size);
mdp->root_node = MDE_INVAL_ELEM_COOKIE;
@@ -123,7 +126,7 @@ md_init_intern(uint64_t *ptr, void *(*allocp)(size_t),
mdp->root_node = (mde_cookie_t)idx;
}
idx = MDE_PROP_INDEX(np);
- count ++;
+ count++;
break;
default:
@@ -142,25 +145,35 @@ md_init_intern(uint64_t *ptr, void *(*allocp)(size_t),
* Register the counts
*/
- mdp->element_count = idx+1; /* include LIST_END */
+ mdp->element_count = idx + 1; /* include LIST_END */
mdp->node_count = count;
/*
* Final sanity check that everything adds up
*/
- if (mdp->element_count != (mdp->node_blk_size/MD_ELEMENT_SIZE))
+ if (mdp->element_count != (mdp->node_blk_size / MD_ELEMENT_SIZE))
goto cleanup;
mdp->md_magic = LIBMD_MAGIC;
+ /*
+ * Setup MD generation
+ */
+ if (md_get_prop_val((md_t *)mdp, mdp->root_node,
+ "md-generation#", &gen) != 0)
+ mdp->gen = MDESC_INVAL_GEN;
+ else
+ mdp->gen = gen;
+
return ((md_t *)mdp);
-cleanup:;
+cleanup:
/*
* Clean up here - including a name hash if
* we build one.
*/
-cleanup_nohash:;
+
+cleanup_nohash:
mdp->freep(mdp, sizeof (md_impl_t));
return (NULL);
}
diff --git a/usr/src/common/mdesc/mdesc_rootnode.c b/usr/src/common/mdesc/mdesc_rootnode.c
index 551130fc72..364c5004e2 100644
--- a/usr/src/common/mdesc/mdesc_rootnode.c
+++ b/usr/src/common/mdesc/mdesc_rootnode.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,7 +38,7 @@ md_root_node(md_t *ptr)
mdp = (md_impl_t *)ptr;
if (mdp->md_magic != LIBMD_MAGIC)
- return (-1);
+ return (MDE_INVAL_ELEM_COOKIE);
return (mdp->root_node);
}
diff --git a/usr/src/common/mdesc/mdesc_scandag.c b/usr/src/common/mdesc/mdesc_scandag.c
index 11b4e24ab2..ad1c74c9c2 100644
--- a/usr/src/common/mdesc/mdesc_scandag.c
+++ b/usr/src/common/mdesc/mdesc_scandag.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -121,7 +121,8 @@ md_scan_dag(md_t *ptr,
-static int mdl_scan_dag(md_impl_t *mdp,
+static int
+mdl_scan_dag(md_impl_t *mdp,
int nodeidx,
mde_str_cookie_t node_name_cookie,
mde_str_cookie_t arc_name_cookie,
diff --git a/usr/src/lib/libpcp/common/libpcp.c b/usr/src/lib/libpcp/common/libpcp.c
index 9d32387ff0..6c9eb09263 100644
--- a/usr/src/lib/libpcp/common/libpcp.c
+++ b/usr/src/lib/libpcp/common/libpcp.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -48,6 +47,8 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/glvc.h>
+#include <sys/vldc.h>
+#include <sys/ldc.h>
#include <netinet/in.h>
#include "libpcp.h"
@@ -81,6 +82,11 @@ static int check_magic_byte_presence(int byte_cnt, uint8_t *byte_val,
static uint16_t checksum(uint16_t *addr, int32_t count);
static int pcp_cleanup(int channel_fd);
+static int vldc_read(int fd, uint8_t *bufp, int size);
+static int vldc_write(int fd, uint8_t *bufp, int size);
+static int pcp_update_read_area(int byte_cnt);
+static int pcp_vldc_frame_error_handle(void);
+
/*
* local channel (glvc) file descriptor set by pcp_send_recv()
*/
@@ -156,6 +162,19 @@ static struct sigaction glvc_act;
/* To restore old SIGALRM signal handler */
static struct sigaction old_act;
+/*
+ * Variables to support vldc based streaming transport
+ */
+typedef enum {
+ GLVC_NON_STREAM,
+ VLDC_STREAMING
+} xport_t;
+
+static int xport_type = GLVC_NON_STREAM;
+#define CHANNEL_DEV "channel-devices"
+
+#define VLDC_MTU_SIZE (2048)
+
static void
glvc_timeout_handler(void)
{
@@ -178,6 +197,7 @@ pcp_init(char *channel_name)
if (channel_name == NULL)
return (PCPL_INVALID_ARGS);
+
/*
* Open virtual channel name.
*/
@@ -186,12 +206,33 @@ pcp_init(char *channel_name)
}
/*
- * Get the Channel MTU size
+ * Check if the channel-name points to a vldc node
+ * or a glvc node
*/
+ if (strstr(channel_name, CHANNEL_DEV) != NULL) {
+ vldc_opt_op_t op;
+
+ xport_type = VLDC_STREAMING;
+ mtu_size = VLDC_MTU_SIZE;
+
+ op.op_sel = VLDC_OP_SET;
+ op.opt_sel = VLDC_OPT_MODE;
+ op.opt_val = LDC_MODE_STREAM;
+ if (ioctl(channel_fd, VLDC_IOCTL_OPT_OP, &op) != 0) {
+ (void) close(channel_fd);
+ return (PCPL_GLVC_ERROR);
+ }
+ } else {
+ xport_type = GLVC_NON_STREAM;
+ /*
+ * Get the Channel MTU size
+ */
- if (pcp_get_prop(channel_fd, GLVC_XPORT_OPT_MTU_SZ, &mtu_size) != 0) {
- (void) close(channel_fd);
- return (PCPL_GLVC_ERROR);
+ if (pcp_get_prop(channel_fd, GLVC_XPORT_OPT_MTU_SZ,
+ &mtu_size) != 0) {
+ (void) close(channel_fd);
+ return (PCPL_GLVC_ERROR);
+ }
}
/*
@@ -233,7 +274,8 @@ pcp_close(int channel_fd)
{
if (channel_fd >= 0) {
- (void) pcp_cleanup(channel_fd);
+ if (xport_type == GLVC_NON_STREAM)
+ (void) pcp_cleanup(channel_fd);
(void) close(channel_fd);
} else {
return (-1);
@@ -631,7 +673,6 @@ pcp_peek(uint8_t *buf, int bytes_cnt)
(void) memcpy(buf, peek_area, m);
return (m);
-
}
/*
@@ -648,13 +689,19 @@ pcp_write(uint8_t *buf, int byte_cnt)
return (PCPL_INVALID_ARGS);
}
- (void) alarm(glvc_timeout);
+ if (xport_type == GLVC_NON_STREAM) {
+ (void) alarm(glvc_timeout);
- if ((ret = write(chnl_fd, buf, byte_cnt)) < 0) {
+ if ((ret = write(chnl_fd, buf, byte_cnt)) < 0) {
+ (void) alarm(0);
+ return (ret);
+ }
(void) alarm(0);
- return (ret);
+ } else {
+ if ((ret = vldc_write(chnl_fd, buf, byte_cnt)) <= 0) {
+ return (ret);
+ }
}
- (void) alarm(0);
return (ret);
}
@@ -718,17 +765,28 @@ pcp_read(uint8_t *buf, int byte_cnt)
* do a peek to see how much data is available and read complete data.
*/
- if ((m = pcp_peek(read_tail, mtu_size)) < 0) {
- return (m);
- }
+ if (xport_type == GLVC_NON_STREAM) {
+ if ((m = pcp_peek(read_tail, mtu_size)) < 0) {
+ return (m);
+ }
+
+ (void) alarm(glvc_timeout);
+ if ((ret = read(chnl_fd, read_tail, m)) < 0) {
+ (void) alarm(0);
+ return (ret);
+ }
- (void) alarm(glvc_timeout);
- if ((ret = read(chnl_fd, read_tail, m)) < 0) {
(void) alarm(0);
- return (ret);
+ } else {
+ /*
+ * Read the extra number of bytes
+ */
+ m = byte_cnt - (read_tail - read_head);
+ if ((ret = vldc_read(chnl_fd,
+ read_tail, m)) <= 0) {
+ return (ret);
+ }
}
-
- (void) alarm(0);
read_tail += ret;
/*
@@ -743,6 +801,69 @@ pcp_read(uint8_t *buf, int byte_cnt)
}
/*
+ * Issue read from the driver until byet_cnt number
+ * of bytes are present in read buffer. Do not
+ * move the read head.
+ */
+static int
+pcp_update_read_area(int byte_cnt)
+{
+ int ret;
+ int n, i;
+
+ if (byte_cnt < 0 || byte_cnt > mtu_size) {
+ return (PCPL_INVALID_ARGS);
+ }
+
+ /*
+ * initialization of local read buffer
+ * from which the stream read requests are serviced.
+ */
+ if (read_area == NULL) {
+ read_area = (uint8_t *)umem_zalloc(READ_AREA_SIZE,
+ UMEM_DEFAULT);
+ if (read_area == NULL) {
+ return (PCPL_MALLOC_FAIL);
+ }
+ read_head = read_area;
+ read_tail = read_area;
+ }
+
+ /*
+ * if we already have sufficient data in the buffer,
+ * just return
+ */
+ if (byte_cnt <= (read_tail - read_head)) {
+ return (byte_cnt);
+ }
+
+ /*
+ * if the request is not satisfied from the buffered data, then move the
+ * remaining data to front of the buffer and read new data.
+ */
+ for (i = 0; i < (read_tail - read_head); ++i) {
+ read_area[i] = read_head[i];
+ }
+ read_head = read_area;
+ read_tail = read_head + i;
+
+ n = byte_cnt - (read_tail - read_head);
+
+ if ((ret = vldc_read(chnl_fd,
+ read_tail, n)) <= 0) {
+ return (ret);
+ }
+ read_tail += ret;
+
+ /*
+ * Return the number of bytes we could read
+ */
+ n = MIN(byte_cnt, (read_tail - read_head));
+
+ return (n);
+}
+
+/*
* This function is slight different from pcp_peek. The peek requests are first
* serviced from local read buffer, if data is available. If the peek request
* is not serviceble from local read buffer, then the data is peeked from
@@ -798,7 +919,6 @@ pcp_peek_read(uint8_t *buf, int byte_cnt)
if ((m = pcp_peek(peek_read_tail, mtu_size)) < 0) {
return (m);
}
-
peek_read_tail += m;
/*
@@ -874,7 +994,12 @@ pcp_recv_resp_msg_hdr(pcp_resp_msg_hdr_t *resp_hdr)
* (magic seq) or if an error happens while reading data from
* channel.
*/
- if ((ret = pcp_frame_error_handle()) != 0)
+ if (xport_type == GLVC_NON_STREAM)
+ ret = pcp_frame_error_handle();
+ else
+ ret = pcp_vldc_frame_error_handle();
+
+ if (ret != 0)
return (PCPL_FRAME_ERROR);
/* read magic number first */
@@ -1059,6 +1184,55 @@ pcp_frame_error_handle(void)
}
/*
+ * This function handles channel framing errors. It waits until proper
+ * frame with starting sequence as magic numder (0xAFBCAFA0)
+ * is arrived. It removes unexpected data (before the magic number sequence)
+ * on the channel. It returns when proper magic number sequence is seen
+ * or when any failure happens while reading/peeking the channel.
+ */
+static int
+pcp_vldc_frame_error_handle(void)
+{
+ uint8_t magic_num_buf[4];
+ uint32_t net_magic_num; /* magic byte in network byte order */
+ uint32_t host_magic_num = PCP_MAGIC_NUM;
+ int found_magic = 0;
+
+ net_magic_num = htonl(host_magic_num);
+ (void) memcpy(magic_num_buf, (uint8_t *)&net_magic_num, 4);
+
+ /*
+ * For vldc, we need to read whatever data is available and
+ * advance the read pointer one byte at a time until we get
+ * the magic word. When this function is invoked, we do not
+ * have any byte in the read buffer.
+ */
+
+ /*
+ * Keep reading until we find the matching magic number
+ */
+ while (!found_magic) {
+ while ((read_tail - read_head) < sizeof (host_magic_num)) {
+ if (pcp_update_read_area(sizeof (host_magic_num)) < 0)
+ return (-1);
+ }
+
+ /*
+ * We should have at least 4 bytes in read buffer. Check
+ * if the magic number can be matched
+ */
+ if (memcmp(read_head, magic_num_buf,
+ sizeof (host_magic_num))) {
+ read_head += 1;
+ } else {
+ found_magic = 1;
+ }
+ }
+
+ return (0);
+}
+
+/*
* checks whether certain byte sequence is present in the data stream.
*/
static int
@@ -1188,3 +1362,81 @@ pcp_cleanup(int channel_fd)
umem_free(buf, mtu_size);
return (ret);
}
+
+static int
+vldc_write(int fd, uint8_t *bufp, int size)
+{
+ int res;
+ int left = size;
+ pollfd_t pollfd;
+
+ pollfd.events = POLLOUT;
+ pollfd.revents = 0;
+ pollfd.fd = fd;
+
+ /*
+ * Poll for the vldc channel to be ready
+ */
+ if (poll(&pollfd, 1, glvc_timeout * MILLISEC) <= 0) {
+ return (-1);
+ }
+
+ do {
+ if ((res = write(fd, bufp, left)) <= 0) {
+ if (errno != EWOULDBLOCK) {
+ return (res);
+ }
+ } else {
+ bufp += res;
+ left -= res;
+ }
+ } while (left > 0);
+
+ /*
+ * Return number of bytes actually written
+ */
+ return (size - left);
+}
+
+/*
+ * Keep reading until we get the specified number of bytes
+ */
+static int
+vldc_read(int fd, uint8_t *bufp, int size)
+{
+ int res;
+ int left = size;
+
+ struct pollfd fds[1];
+
+ fds[0].events = POLLIN | POLLPRI;
+ fds[0].revents = 0;
+ fds[0].fd = fd;
+
+ if (poll(fds, 1, glvc_timeout * MILLISEC) <= 0) {
+ return (-1);
+ }
+
+ while (left > 0) {
+ res = read(fd, bufp, left);
+ /* return on error or short read */
+ if ((res == 0) || ((res < 0) &&
+ (errno == EAGAIN))) {
+ /* poll until the read is unblocked */
+ if ((poll(fds, 1, glvc_timeout * MILLISEC)) < 0)
+ return (-1);
+
+ continue;
+ } else
+ if (res < 0) {
+ /* unrecoverable error */
+
+ return (-1);
+ } else {
+ bufp += res;
+ left -= res;
+ }
+ }
+
+ return (size - left);
+}
diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile
index a8d94762ad..868a18bdfd 100644
--- a/usr/src/pkgdefs/Makefile
+++ b/usr/src/pkgdefs/Makefile
@@ -69,6 +69,8 @@ sparc_SUBDIRS= \
SUNWkvm.u \
SUNWkvm.v \
SUNWkvmt200.v \
+ SUNWldomr.v \
+ SUNWldomu.v \
SUNWluxd.u \
SUNWluxl \
SUNWonmtst.u \
diff --git a/usr/src/pkgdefs/SUNWldomr.v/Makefile b/usr/src/pkgdefs/SUNWldomr.v/Makefile
new file mode 100644
index 0000000000..fbabdf4e9e
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+DATAFILES += depend
+
+.KEEP_STATE:
+
+all: $(FILES)
+
+install: all pkg
+
+include ../Makefile.targ
diff --git a/usr/src/pkgdefs/SUNWldomr.v/i.manifest b/usr/src/pkgdefs/SUNWldomr.v/i.manifest
new file mode 100644
index 0000000000..262b987697
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/i.manifest
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# i.manifest - smf(5) service manifest install class action script
+#
+
+repfile=$PKG_INSTALL_ROOT/etc/svc/repository.db
+export repfile
+
+#
+# If the repository does not yet exist, create it from the appropriate seed. If
+# for some reason the seeds do not exist, svccfg(1M) will create the repository
+# automatically.
+#
+if [ ! -f $repfile ]; then
+ if [ -n "$SUNW_PKG_INSTALL_ZONENAME" -a \
+ "$SUNW_PKG_INSTALL_ZONENAME" != "global" ]; then
+ [ -f $PKG_INSTALL_ROOT/lib/svc/seed/nonglobal.db ] && \
+ /usr/bin/cp $PKG_INSTALL_ROOT/lib/svc/seed/nonglobal.db \
+ $repfile
+ else
+ [ -f $PKG_INSTALL_ROOT/lib/svc/seed/global.db ] && \
+ /usr/bin/cp $PKG_INSTALL_ROOT/lib/svc/seed/global.db \
+ $repfile
+ fi
+ /usr/bin/chmod 0600 $repfile
+ /usr/bin/chown root:sys $repfile
+fi
+
+if [ ! -r $PKG_INSTALL_ROOT/etc/svc/volatile/repository_door ]; then
+ #
+ # smf(5) is not presently running for the destination environment.
+ # Since we presently cannot refresh without a running svc.startd(1M), we
+ # cannot consistently handle dependent placement. Defer to next boot.
+ #
+ while read src dst; do
+ /usr/bin/cp -p $src $dst
+ done
+else
+ #
+ # Local package install.
+ #
+ while read src dst; do
+ /usr/bin/cp -p $src $dst
+
+ [ "$PKG_INSTALL_ROOT" = "" -o "$PKG_INSTALL_ROOT" = "/" ] && \
+ SVCCFG_CHECKHASH=1 /usr/sbin/svccfg import $dst
+ done
+fi
+
+exit 0
diff --git a/usr/src/pkgdefs/SUNWldomr.v/pkginfo.tmpl b/usr/src/pkgdefs/SUNWldomr.v/pkginfo.tmpl
new file mode 100644
index 0000000000..c728c3e77e
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/pkginfo.tmpl
@@ -0,0 +1,55 @@
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWldomr"
+NAME="Solaris Logical Domains (Root)"
+ARCH="sparc.sun4v"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="root"
+MAXINST="1000"
+CATEGORY="system"
+DESC="Solaris Logical Domains Configuration Files"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="true"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWldomr.v/postinstall b/usr/src/pkgdefs/SUNWldomr.v/postinstall
new file mode 100644
index 0000000000..0c81ca9f6d
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/postinstall
@@ -0,0 +1,136 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+# Function: check_add_drv()
+#
+# This function will check if the module has an entry in etc/name_to_major
+# If not simply calls add_drv with the arguments given. If there is
+# such an entry in name_to_major file, it adds entries in driver_aliases
+# driver_classes and minor_perm if necessary.
+# The syntax of this function is the same as add_drv.
+
+check_add_drv()
+{
+ if [ "$BASEDIR" = "" ]
+ then
+ BASEDIR=/
+ fi
+ alias=""
+ class=""
+ ADD_ALIAS=0
+ ADD_CLASS=0
+ ADD_MINOR=0
+ OPTIND=1
+ IS_NET_DRIVER=0
+
+ cmd="add_drv"
+
+ NO_CMD=
+ while getopts i:b:m:c:N opt
+ do
+ case $opt in
+ N ) NO_CMD=1;;
+ i ) ADD_ALIAS=1
+ alias=$OPTARG
+ cmd=$cmd" -i '$alias'"
+ ;;
+ m ) ADD_MINOR=1
+ minor=$OPTARG
+ cmd=$cmd" -m '$minor'"
+ ;;
+ c) ADD_CLASS=1
+ class=$OPTARG
+ cmd=$cmd" -c $class"
+ ;;
+ b) BASEDIR=$OPTARG
+ cmd=$cmd" -b $BASEDIR"
+ ;;
+ \?) echo "check_add_drv can not handle this option"
+ return
+ ;;
+ esac
+ done
+ shift `/usr/bin/expr $OPTIND - 1`
+
+ drvname=$1
+
+ cmd=$cmd" "$drvname
+
+ drvname=`echo $drvname | /usr/bin/sed 's;.*/;;g'`
+
+ /usr/bin/grep "^$drvname[ ]" $BASEDIR/etc/name_to_major > /dev/null 2>&1
+
+ if [ "$NO_CMD" = "" -a $? -ne 0 ]
+ then
+ eval $cmd
+ else
+ # entry already in name_to_major, add alias, class, minorperm
+ # if necessary
+ if [ $ADD_ALIAS = 1 ]
+ then
+ for i in $alias
+ do
+ /usr/bin/egrep "^$drvname[ ]+$i" $BASEDIR/etc/driver_aliases>/dev/null 2>&1
+ if [ $? -ne 0 ]
+ then
+ echo "$drvname $i" >> $BASEDIR/etc/driver_aliases
+ fi
+ done
+ fi
+
+ if [ $ADD_CLASS = 1 ]
+ then
+ /usr/bin/egrep "^$drvname[ ]+$class( | |$)" $BASEDIR/etc/driver_classes > /dev/null 2>&1
+ if [ $? -ne 0 ]
+ then
+ echo "$drvname\t$class" >> $BASEDIR/etc/driver_classes
+ fi
+ fi
+
+ if [ $ADD_MINOR = 1 ]
+ then
+ /usr/bin/grep "^$drvname:" $BASEDIR/etc/minor_perm > /dev/null 2>&1
+ if [ $? -ne 0 ]
+ then
+ minorentry="$drvname:$minor"
+ echo $minorentry >> $BASEDIR/etc/minor_perm
+ fi
+ fi
+
+ fi
+
+
+}
+
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-channel-devices"' cnex
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-console-concentrator"' vcc
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-disk"' vdc
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-disk-server"' vds
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-channel"' vldc
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-network"' vnet
+check_add_drv -b "${BASEDIR}" -i '"SUNW,sun4v-network-switch"' vsw
diff --git a/usr/src/pkgdefs/SUNWldomr.v/preremove b/usr/src/pkgdefs/SUNWldomr.v/preremove
new file mode 100644
index 0000000000..b350bce8f1
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/preremove
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+PATH=/usr/bin:/usr/sbin:${PATH}
+export PATH
+
+EXIT=0
+
+not_installed()
+{
+ driver=$1
+
+ grep "^${driver} " ${BASEDIR}/etc/name_to_major > /dev/null 2>&1
+
+ if [ "$?" -eq 0 ]; then
+ return 1
+ else
+ return 0
+ fi
+}
+
+#
+# Unload and remove drivers
+#
+not_installed cnex || rem_drv -b "${BASEDIR}" cnex || EXIT=1
+not_installed vcc || rem_drv -b "${BASEDIR}" vcc || EXIT=1
+not_installed vdc || rem_drv -b "${BASEDIR}" vdc || EXIT=1
+not_installed vds || rem_drv -b "${BASEDIR}" vds || EXIT=1
+not_installed vldc || rem_drv -b "${BASEDIR}" vldc || EXIT=1
+not_installed vnet || rem_drv -b "${BASEDIR}" vnet || EXIT=1
+not_installed vsw || rem_drv -b "${BASEDIR}" vsw || EXIT=1
+
+exit ${EXIT}
diff --git a/usr/src/pkgdefs/SUNWldomr.v/prototype_com b/usr/src/pkgdefs/SUNWldomr.v/prototype_com
new file mode 100644
index 0000000000..ef552c2cd2
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/prototype_com
@@ -0,0 +1,52 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+#
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+i postinstall
+i preremove
+i i.manifest
+i r.manifest
+
+#
+# source locations relative to the prototype file
+#
+# SUNWldomr.v
+#
diff --git a/usr/src/pkgdefs/SUNWldomr.v/prototype_sparc b/usr/src/pkgdefs/SUNWldomr.v/prototype_sparc
new file mode 100644
index 0000000000..2ff55f802f
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/prototype_sparc
@@ -0,0 +1,79 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+#
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+
+#
+# List files which are SPARC specific here
+#
+# source locations relative to the prototype file
+#
+# SUNWldomr.v
+#
+d none lib 755 root bin
+d none lib/svc 0755 root bin
+d none lib/svc/method 0755 root bin
+f none lib/svc/method/svc-vntsd 0555 root bin
+d none platform 755 root sys
+d none platform/sun4v 755 root sys
+d none platform/sun4v/kernel 755 root sys
+d none platform/sun4v/kernel/drv 755 root sys
+d none platform/sun4v/kernel/drv/sparcv9 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/cnex 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/vcc 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/vdc 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/vds 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/vldc 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/vnet 755 root sys
+f none platform/sun4v/kernel/drv/sparcv9/vsw 755 root sys
+d none platform/sun4v/kernel/misc 755 root sys
+d none platform/sun4v/kernel/misc/sparcv9 755 root sys
+f none platform/sun4v/kernel/misc/sparcv9/dr_cpu 755 root sys
+f none platform/sun4v/kernel/misc/sparcv9/ds 755 root sys
+f none platform/sun4v/kernel/misc/sparcv9/fault_iso 755 root sys
+f none platform/sun4v/kernel/misc/sparcv9/ldc 755 root sys
+f none platform/sun4v/kernel/misc/sparcv9/platsvc 755 root sys
+d none var 755 root sys
+d none var/svc 755 root sys
+d none var/svc/manifest 755 root sys
+d none var/svc/manifest/platform 755 root sys
+d none var/svc/manifest/platform/sun4v 755 root sys
+f manifest var/svc/manifest/platform/sun4v/vntsd.xml 0444 root sys
diff --git a/usr/src/pkgdefs/SUNWldomr.v/r.manifest b/usr/src/pkgdefs/SUNWldomr.v/r.manifest
new file mode 100644
index 0000000000..e4690e7e5f
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomr.v/r.manifest
@@ -0,0 +1,83 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# r.manifest - smf(5) manifest remove class action script
+#
+
+if [ "$PKG_INSTALL_ROOT" != "" -a "$PKG_INSTALL_ROOT" != "/" ]; then
+ #
+ # We can't safely disable the service in this case.
+ #
+ smf_alive=no
+else
+ #
+ # We can verify if the service is disabled prior to
+ # removal.
+ #
+ if [ -r /etc/svc/volatile/repository_door ]; then
+ smf_alive=yes
+ fi
+fi
+
+MFSTSCAN=/lib/svc/bin/mfstscan
+SVCCFG=/usr/sbin/svccfg
+SVCPROP=/usr/bin/svcprop
+
+while read mfst; do
+ if [ "$smf_alive" = "yes" ]; then
+ ENTITIES=`$SVCCFG inventory $mfst`
+
+ for fmri in $ENTITIES; do
+ #
+ # Determine whether any of our instances are
+ # enabled.
+ #
+ en_p=`$SVCPROP -C -p general/enabled $fmri 2>/dev/null`
+ en_o=`$SVCPROP -C -p general_ovr/enabled $fmri 2>/dev/null`
+
+ if [ "$en_p" = "true" -o "$en_o" = "true" ]; then
+ echo "$fmri remains enabled; aborting"
+ exit 1
+ fi
+
+ $SVCCFG delete $fmri
+ done
+
+ #
+ # Delete the manifest hash value.
+ #
+ pg_name=`$MFSTSCAN -t $mfst`
+ if $SVCPROP -q -p $pg_name smf/manifest; then
+ $SVCCFG -s smf/manifest delpg $pg_name
+ fi
+ fi
+
+ /usr/bin/rm $mfst
+done
+
+exit 0
diff --git a/usr/src/pkgdefs/SUNWldomu.v/Makefile b/usr/src/pkgdefs/SUNWldomu.v/Makefile
new file mode 100644
index 0000000000..41d0832757
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomu.v/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+DATAFILES += i.manifest r.manifest
+
+.KEEP_STATE:
+
+all: $(FILES)
+
+install: all pkg
+
+include ../Makefile.targ
diff --git a/usr/src/pkgdefs/SUNWldomu.v/depend b/usr/src/pkgdefs/SUNWldomu.v/depend
new file mode 100644
index 0000000000..e7ae45116d
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomu.v/depend
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# This package information file defines software dependencies associated
+# with the pkg. You can define three types of pkg dependencies with this file:
+# P indicates a prerequisite for installation
+# I indicates an incompatible package
+# R indicates a reverse dependency
+# <pkg.abbr> see pkginfo(4), PKG parameter
+# <name> see pkginfo(4), NAME parameter
+# <version> see pkginfo(4), VERSION parameter
+# <arch> see pkginfo(4), ARCH parameter
+# <type> <pkg.abbr> <name>
+# (<arch>)<version>
+# (<arch>)<version>
+# ...
+# <type> <pkg.abbr> <name>
+# ...
+#
+
+P SUNWcar Core Architecture, (Root)
+P SUNWcakr Core Solaris Kernel Architecture (Root)
+P SUNWkvm Core Architecture, (Kvm)
+P SUNWcsr Core Solaris, (Root)
+P SUNWckr Core Solaris Kernel (Root)
+P SUNWcnetr Core Solaris Network Infrastructure (Root)
+P SUNWcsu Core Solaris, (Usr)
+P SUNWcsd Core Solaris Devices
+P SUNWcsl Core Solaris Libraries
+P SUNWldomr Solaris Logical Domains (Root)
diff --git a/usr/src/pkgdefs/SUNWldomu.v/pkginfo.tmpl b/usr/src/pkgdefs/SUNWldomu.v/pkginfo.tmpl
new file mode 100644
index 0000000000..20f2cac7dd
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomu.v/pkginfo.tmpl
@@ -0,0 +1,55 @@
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This required package information file describes characteristics of the
+# package, such as package abbreviation, full package name, package version,
+# and package architecture.
+#
+PKG="SUNWldomu"
+NAME="Solaris Logical Domains (Usr)"
+ARCH="sparc.sun4v"
+VERSION="ONVERS,REV=0.0.0"
+SUNW_PRODNAME="SunOS"
+SUNW_PRODVERS="RELEASE/VERSION"
+SUNW_PKGTYPE="usr"
+MAXINST="1000"
+CATEGORY="system"
+DESC="Solaris Logical Domains Configuration and Administration"
+VENDOR="Sun Microsystems, Inc."
+HOTLINE="Please contact your local service provider"
+EMAIL=""
+CLASSES="none"
+BASEDIR=/
+SUNW_PKGVERS="1.0"
+SUNW_PKG_ALLZONES="true"
+SUNW_PKG_HOLLOW="true"
+SUNW_PKG_THISZONE="false"
+#VSTOCK="<reserved by Release Engineering for package part #>"
+#ISTATES="<developer defined>"
+#RSTATES='<developer defined>'
+#ULIMIT="<developer defined>"
+#ORDER="<developer defined>"
+#PSTAMP="<developer defined>"
+#INTONLY="<developer defined>"
diff --git a/usr/src/pkgdefs/SUNWldomu.v/prototype_com b/usr/src/pkgdefs/SUNWldomu.v/prototype_com
new file mode 100644
index 0000000000..a493d36d3f
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomu.v/prototype_com
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+#
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+# packaging files
+i pkginfo
+i copyright
+i depend
+
+#
+# source locations relative to the prototype file
+#
+# SUNWldomu.v
+#
diff --git a/usr/src/pkgdefs/SUNWldomu.v/prototype_sparc b/usr/src/pkgdefs/SUNWldomu.v/prototype_sparc
new file mode 100644
index 0000000000..860533427d
--- /dev/null
+++ b/usr/src/pkgdefs/SUNWldomu.v/prototype_sparc
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This required package information file contains a list of package contents.
+# The 'pkgmk' command uses this file to identify the contents of a package
+# and their location on the development machine when building the package.
+# Can be created via a text editor or through use of the 'pkgproto' command.
+#
+
+#!search <pathname pathname ...> # where to find pkg objects
+#!include <filename> # include another 'prototype' file
+#!default <mode> <owner> <group> # default used if not specified on entry
+#!<param>=<value> # puts parameter in pkg environment
+
+#
+# Include ISA independent files (prototype_com)
+#
+!include prototype_com
+
+#
+# List files which are SPARC specific here
+#
+# source locations relative to the prototype file
+#
+# SUNWldomu.v
+#
+d none usr 755 root sys
+d none usr/lib 755 root bin
+d none usr/lib/ldoms 755 root bin
+f none usr/lib/ldoms/vntsd 555 root bin
diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_sparc b/usr/src/pkgdefs/SUNWmdb/prototype_sparc
index 0a24c3f4b3..a5b29115a6 100644
--- a/usr/src/pkgdefs/SUNWmdb/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWmdb/prototype_sparc
@@ -90,3 +90,4 @@ d none usr/platform/sun4v/lib/mdb 755 root sys
d none usr/platform/sun4v/lib/mdb/kvm 755 root sys
d none usr/platform/sun4v/lib/mdb/kvm/sparcv9 755 root sys
f none usr/platform/sun4v/lib/mdb/kvm/sparcv9/unix.so 555 root sys
+f none usr/platform/sun4v/lib/mdb/kvm/sparcv9/vdsk.so 555 root sys
diff --git a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
index d08979e697..154c47733f 100644
--- a/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
+++ b/usr/src/pkgdefs/SUNWmdbr/prototype_sparc
@@ -75,3 +75,4 @@ d none platform/sun4v/kernel 755 root sys
d none platform/sun4v/kernel/kmdb 755 root sys
d none platform/sun4v/kernel/kmdb/sparcv9 755 root sys
f none platform/sun4v/kernel/kmdb/sparcv9/unix 555 root sys
+f none platform/sun4v/kernel/kmdb/sparcv9/vdsk 555 root sys
diff --git a/usr/src/pkgdefs/etc/exception_list_i386 b/usr/src/pkgdefs/etc/exception_list_i386
index 532972b732..47b57dcb09 100644
--- a/usr/src/pkgdefs/etc/exception_list_i386
+++ b/usr/src/pkgdefs/etc/exception_list_i386
@@ -642,6 +642,7 @@ usr/include/librestart.h i386
usr/include/librestart_priv.h i386
usr/include/libcontract_priv.h i386
var/svc/manifest/platform/sun4u i386
+var/svc/manifest/platform/sun4v i386
var/svc/profile/platform_SUNW,SPARC-Enterprise.xml i386
var/svc/profile/platform_SUNW,Sun-Fire.xml i386
var/svc/profile/platform_SUNW,Sun-Fire-880.xml i386
diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh
index a1e6559bea..5056ba6dfb 100644
--- a/usr/src/tools/scripts/bfu.sh
+++ b/usr/src/tools/scripts/bfu.sh
@@ -325,6 +325,7 @@ superfluous_local_zone_files="
lib/svc/method/svc-scheduler
lib/svc/method/svc-sckmd
lib/svc/method/svc-syseventd
+ lib/svc/method/svc-vntsd
lib/svc/method/svc-zones
platform/*/kernel
platform/SUNW,Sun-Fire-15000/lib/cvcd
@@ -357,6 +358,7 @@ superfluous_local_zone_files="
usr/include/netinet/ipl.h
usr/include/sys/dcam
usr/lib/devfsadm/linkmod/SUNW_dcam1394_link.so
+ usr/lib/ldoms
usr/platform/SUNW,SPARC-Enterprise/lib/dscp.ppp.options
usr/platform/SUNW,SPARC-Enterprise/lib/libdscp.so
usr/platform/SUNW,SPARC-Enterprise/lib/libdscp.so.1
@@ -376,6 +378,7 @@ superfluous_local_zone_files="
var/svc/manifest/platform/sun4u/efdaemon.xml
var/svc/manifest/platform/sun4u/sckmd.xml
var/svc/manifest/platform/sun4u/sf880drd.xml
+ var/svc/manifest/platform/sun4v/vntsd.xml
var/svc/manifest/system/cvc.xml
var/svc/manifest/system/dumpadm.xml
var/svc/manifest/system/fmd.xml
diff --git a/usr/src/uts/common/sys/mdesc.h b/usr/src/uts/common/sys/mdesc.h
index e05374f60e..4bd335c38f 100644
--- a/usr/src/uts/common/sys/mdesc.h
+++ b/usr/src/uts/common/sys/mdesc.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -87,22 +87,39 @@ extern "C" {
#ifndef _ASM /* { */
-typedef uint64_t mde_cookie_t;
+/*
+ * Opaque handles for use in external interfaces
+ */
+
+typedef void *md_t;
+
+typedef uint64_t mde_cookie_t;
#define MDE_INVAL_ELEM_COOKIE ((mde_cookie_t)-1)
typedef uint32_t mde_str_cookie_t;
#define MDE_INVAL_STR_COOKIE ((mde_str_cookie_t)-1)
+typedef uint64_t md_diff_cookie_t;
+#define MD_INVAL_DIFF_COOKIE ((md_diff_cookie_t)-1)
- /* Opaque structure for handling in functions */
-typedef void * md_t;
+#define MDESC_INVAL_GEN (0)
+/*
+ * External structure for MD diff interface
+ */
+typedef struct {
+ uint8_t type; /* property type */
+ char *namep; /* property name */
+} md_prop_match_t;
+/*
+ * External Interface
+ */
-extern md_t *md_init(void *);
-extern md_t *md_init_intern(uint64_t *, void*(*)(size_t),
- void (*)(void*, size_t));
+extern md_t *md_init_intern(uint64_t *,
+ void *(*allocp)(size_t),
+ void (*freep)(void *, size_t));
extern int md_fini(md_t *);
@@ -112,6 +129,10 @@ extern mde_str_cookie_t md_find_name(md_t *, char *namep);
extern mde_cookie_t md_root_node(md_t *);
+extern uint64_t md_get_gen(md_t *);
+
+extern size_t md_get_bin_size(md_t *);
+
extern int md_scan_dag(md_t *,
mde_cookie_t,
mde_str_cookie_t,
@@ -134,6 +155,24 @@ extern int md_get_prop_data(md_t *,
uint8_t **,
int *);
+extern md_diff_cookie_t md_diff_init(md_t *,
+ mde_cookie_t,
+ md_t *,
+ mde_cookie_t,
+ char *,
+ md_prop_match_t *);
+
+extern int md_diff_added(md_diff_cookie_t,
+ mde_cookie_t **);
+
+extern int md_diff_removed(md_diff_cookie_t,
+ mde_cookie_t **);
+
+extern int md_diff_matched(md_diff_cookie_t,
+ mde_cookie_t **,
+ mde_cookie_t **);
+
+extern int md_diff_fini(md_diff_cookie_t);
#endif /* } _ASM */
@@ -150,7 +189,6 @@ extern int md_get_prop_data(md_t *,
#define MDESCIOCSSZ (MDESCIOC | 2) /* Set new quote buffer size */
#define MDESCIOCDISCARD (MDESCIOC | 3) /* Discard quotes and reset */
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/mdesc_impl.h b/usr/src/uts/common/sys/mdesc_impl.h
index e0f2ced499..391a646b45 100644
--- a/usr/src/uts/common/sys/mdesc_impl.h
+++ b/usr/src/uts/common/sys/mdesc_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -115,7 +115,7 @@ struct MACHINE_DESCRIPTION {
caddr_t caddr;
void *(*allocp)(size_t);
- void (*freep)(void*, size_t);
+ void (*freep)(void *, size_t);
md_header_t *headerp;
md_element_t *mdep;
@@ -132,6 +132,7 @@ struct MACHINE_DESCRIPTION {
mde_cookie_t root_node;
int size;
+ uint64_t gen;
uint64_t md_magic;
};
@@ -152,7 +153,6 @@ extern mde_cookie_t md_find_node_prop(md_impl_t *,
mde_cookie_t,
mde_str_cookie_t,
int);
-
#endif /* _ASM */
#ifdef __cplusplus
diff --git a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
index e968c1d90f..53b5f9f938 100644
--- a/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
+++ b/usr/src/uts/sfmmu/ml/sfmmu_kdi.s
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -303,8 +303,11 @@ kdi_trap_vatotte(void)
ldx [%g2], %g2 /* VA %g1, sfmmup %g2 */
mov 1, %g3 /* VA %g1, sfmmup %g2, idx %g3 */
-1: mov HBLK_RANGE_SHIFT, %g4
- mulx %g3, 3, %g4
+ mov HBLK_RANGE_SHIFT, %g4
+ ba 3f
+ nop
+
+1: mulx %g3, 3, %g4 /* 3: see TTE_BSZS_SHIFT */
add %g4, MMU_PAGESHIFT, %g4
3: KDI_HME_HASH_FUNCTION /* %g1, %g2, %g4 => hash in %g4 */
@@ -321,11 +324,9 @@ kdi_trap_vatotte(void)
4: ba,a 6f
5: add %g3, 1, %g3
-#ifdef sun4v
- cmp %g3, MAX_HASHCNT
-#else
- cmp %g3, DEFAULT_MAX_HASHCNT /* no 32/256M kernel pages */
-#endif
+ set mmu_hashcnt, %g4
+ lduw [%g4], %g4
+ cmp %g3, %g4
ble 1b
nop
diff --git a/usr/src/uts/sun4/io/trapstat.c b/usr/src/uts/sun4/io/trapstat.c
index bdaac735fe..9aa25eca4f 100644
--- a/usr/src/uts/sun4/io/trapstat.c
+++ b/usr/src/uts/sun4/io/trapstat.c
@@ -1712,8 +1712,25 @@ trapstat_cpu_setup(cpu_setup_t what, processorid_t cpu)
break;
case CPU_UNCONFIG:
- if (tcpu->tcpu_flags & TSTAT_CPU_ENABLED)
+ if (tcpu->tcpu_flags & TSTAT_CPU_ENABLED) {
tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED;
+#ifdef sun4v
+ /*
+ * A power-off, causes the cpu mondo queues to be
+ * unconfigured on sun4v. Since we can't teardown
+ * trapstat's mappings on the cpu that is going away,
+ * we simply mark it as not allocated. This will
+ * prevent a teardown on a cpu with the same cpu id
+ * that might have been added while trapstat is running.
+ */
+ if (tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED) {
+ tcpu->tcpu_pfn = NULL;
+ tcpu->tcpu_instr = NULL;
+ tcpu->tcpu_data = NULL;
+ tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED;
+ }
+#endif
+ }
break;
default:
diff --git a/usr/src/uts/sun4/os/ddi_impl.c b/usr/src/uts/sun4/os/ddi_impl.c
index 517f109fed..343d3391b5 100644
--- a/usr/src/uts/sun4/os/ddi_impl.c
+++ b/usr/src/uts/sun4/os/ddi_impl.c
@@ -53,6 +53,7 @@
#include <sys/fs/dv_node.h>
#include <sys/fs/snode.h>
#include <sys/ddi_isa.h>
+#include <sys/modhash.h>
dev_info_t *get_intr_parent(dev_info_t *, dev_info_t *,
ddi_intr_handle_impl_t *);
@@ -1968,3 +1969,831 @@ peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
return (err);
}
+
+/*
+ * Platform independent DR routines
+ */
+
+static int
+ndi2errno(int n)
+{
+ int err = 0;
+
+ switch (n) {
+ case NDI_NOMEM:
+ err = ENOMEM;
+ break;
+ case NDI_BUSY:
+ err = EBUSY;
+ break;
+ case NDI_FAULT:
+ err = EFAULT;
+ break;
+ case NDI_FAILURE:
+ err = EIO;
+ break;
+ case NDI_SUCCESS:
+ break;
+ case NDI_BADHANDLE:
+ default:
+ err = EINVAL;
+ break;
+ }
+ return (err);
+}
+
+/*
+ * Prom tree node list
+ */
+struct ptnode {
+ pnode_t nodeid;
+ struct ptnode *next;
+};
+
+/*
+ * Prom tree walk arg
+ */
+struct pta {
+ dev_info_t *pdip;
+ devi_branch_t *bp;
+ uint_t flags;
+ dev_info_t *fdip;
+ struct ptnode *head;
+};
+
+static void
+visit_node(pnode_t nodeid, struct pta *ap)
+{
+ struct ptnode **nextp;
+ int (*select)(pnode_t, void *, uint_t);
+
+ ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
+
+ select = ap->bp->create.prom_branch_select;
+
+ ASSERT(select);
+
+ if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
+
+ for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
+ ;
+
+ *nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
+
+ (*nextp)->nodeid = nodeid;
+ }
+
+ if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
+ return;
+
+ nodeid = prom_childnode(nodeid);
+ while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+ visit_node(nodeid, ap);
+ nodeid = prom_nextnode(nodeid);
+ }
+}
+
+/*ARGSUSED*/
+static int
+set_dip_offline(dev_info_t *dip, void *arg)
+{
+ ASSERT(dip);
+
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!DEVI_IS_DEVICE_OFFLINE(dip))
+ DEVI_SET_DEVICE_OFFLINE(dip);
+ mutex_exit(&(DEVI(dip)->devi_lock));
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*ARGSUSED*/
+static int
+create_prom_branch(void *arg, int has_changed)
+{
+ int circ, c;
+ int exists, rv;
+ pnode_t nodeid;
+ struct ptnode *tnp;
+ dev_info_t *dip;
+ struct pta *ap = arg;
+ devi_branch_t *bp;
+
+ ASSERT(ap);
+ ASSERT(ap->fdip == NULL);
+ ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
+
+ bp = ap->bp;
+
+ nodeid = ddi_get_nodeid(ap->pdip);
+ if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
+ cmn_err(CE_WARN, "create_prom_branch: invalid "
+ "nodeid: 0x%x", nodeid);
+ return (EINVAL);
+ }
+
+ ap->head = NULL;
+
+ nodeid = prom_childnode(nodeid);
+ while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+ visit_node(nodeid, ap);
+ nodeid = prom_nextnode(nodeid);
+ }
+
+ if (ap->head == NULL)
+ return (ENODEV);
+
+ rv = 0;
+ while ((tnp = ap->head) != NULL) {
+ ap->head = tnp->next;
+
+ ndi_devi_enter(ap->pdip, &circ);
+
+ /*
+ * Check if the branch already exists.
+ */
+ exists = 0;
+ dip = e_ddi_nodeid_to_dip(tnp->nodeid);
+ if (dip != NULL) {
+ exists = 1;
+
+ /* Parent is held busy, so release hold */
+ ndi_rele_devi(dip);
+#ifdef DEBUG
+ cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
+ " for nodeid 0x%x", (void *)dip, tnp->nodeid);
+#endif
+ } else {
+ dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
+ }
+
+ kmem_free(tnp, sizeof (struct ptnode));
+
+ if (dip == NULL) {
+ ndi_devi_exit(ap->pdip, circ);
+ rv = EIO;
+ continue;
+ }
+
+ ASSERT(ddi_get_parent(dip) == ap->pdip);
+
+ /*
+ * Hold the branch if it is not already held
+ */
+ if (!exists)
+ e_ddi_branch_hold(dip);
+
+ ASSERT(e_ddi_branch_held(dip));
+
+ /*
+ * Set all dips in the branch offline so that
+ * only a "configure" operation can attach
+ * the branch
+ */
+ (void) set_dip_offline(dip, NULL);
+
+ ndi_devi_enter(dip, &c);
+ ddi_walk_devs(ddi_get_child(dip), set_dip_offline, NULL);
+ ndi_devi_exit(dip, c);
+
+ ndi_devi_exit(ap->pdip, circ);
+
+ if (ap->flags & DEVI_BRANCH_CONFIGURE) {
+ int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
+ if (error && rv == 0)
+ rv = error;
+ }
+
+ /*
+ * Invoke devi_branch_callback() (if it exists) only for
+ * newly created branches
+ */
+ if (bp->devi_branch_callback && !exists)
+ bp->devi_branch_callback(dip, bp->arg, 0);
+ }
+
+ return (rv);
+}
+
+static int
+sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
+{
+ int rv, circ, len;
+ int i, flags;
+ dev_info_t *dip;
+ char *nbuf;
+ static const char *noname = "<none>";
+
+ ASSERT(pdip);
+ ASSERT(DEVI_BUSY_OWNED(pdip));
+
+ flags = 0;
+
+ /*
+ * Creating the root of a branch ?
+ */
+ if (rdipp) {
+ *rdipp = NULL;
+ flags = DEVI_BRANCH_ROOT;
+ }
+
+ ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
+ rv = bp->create.sid_branch_create(dip, bp->arg, flags);
+
+ nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
+
+ if (rv == DDI_WALK_ERROR) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
+ " properties on devinfo node %p", (void *)dip);
+ goto fail;
+ }
+
+ len = OBP_MAXDRVNAME;
+ if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
+ != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
+ "no name property", (void *)dip);
+ goto fail;
+ }
+
+ ASSERT(i_ddi_node_state(dip) == DS_PROTO);
+ if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
+ " for devinfo node %p", nbuf, (void *)dip);
+ goto fail;
+ }
+
+ kmem_free(nbuf, OBP_MAXDRVNAME);
+
+ /*
+ * Ignore bind failures just like boot does
+ */
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ switch (rv) {
+ case DDI_WALK_CONTINUE:
+ case DDI_WALK_PRUNESIB:
+ ndi_devi_enter(dip, &circ);
+
+ i = DDI_WALK_CONTINUE;
+ for (; i == DDI_WALK_CONTINUE; ) {
+ i = sid_node_create(dip, bp, NULL);
+ }
+
+ ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
+ if (i == DDI_WALK_ERROR)
+ rv = i;
+ /*
+ * If PRUNESIB stop creating siblings
+ * of dip's child. Subsequent walk behavior
+ * is determined by rv returned by dip.
+ */
+
+ ndi_devi_exit(dip, circ);
+ break;
+ case DDI_WALK_TERMINATE:
+ /*
+ * Don't create children and ask our parent
+ * to not create siblings either.
+ */
+ rv = DDI_WALK_PRUNESIB;
+ break;
+ case DDI_WALK_PRUNECHILD:
+ /*
+ * Don't create children, but ask parent to continue
+ * with siblings.
+ */
+ rv = DDI_WALK_CONTINUE;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (rdipp)
+ *rdipp = dip;
+
+ /*
+ * Set device offline - only the "configure" op should cause an attach
+ */
+ (void) set_dip_offline(dip, NULL);
+
+ return (rv);
+fail:
+ (void) ndi_devi_free(dip);
+ kmem_free(nbuf, OBP_MAXDRVNAME);
+ return (DDI_WALK_ERROR);
+}
+
+static int
+create_sid_branch(
+ dev_info_t *pdip,
+ devi_branch_t *bp,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int rv = 0, state = DDI_WALK_CONTINUE;
+ dev_info_t *rdip;
+
+ while (state == DDI_WALK_CONTINUE) {
+ int circ;
+
+ ndi_devi_enter(pdip, &circ);
+
+ state = sid_node_create(pdip, bp, &rdip);
+ if (rdip == NULL) {
+ ndi_devi_exit(pdip, circ);
+ ASSERT(state == DDI_WALK_ERROR);
+ break;
+ }
+
+ e_ddi_branch_hold(rdip);
+
+ ndi_devi_exit(pdip, circ);
+
+ if (flags & DEVI_BRANCH_CONFIGURE) {
+ int error = e_ddi_branch_configure(rdip, dipp, 0);
+ if (error && rv == 0)
+ rv = error;
+ }
+
+ /*
+ * devi_branch_callback() is optional
+ */
+ if (bp->devi_branch_callback)
+ bp->devi_branch_callback(rdip, bp->arg, 0);
+ }
+
+ ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
+
+ return (state == DDI_WALK_ERROR ? EIO : rv);
+}
+
+int
+e_ddi_branch_create(
+ dev_info_t *pdip,
+ devi_branch_t *bp,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int prom_devi, sid_devi, error;
+
+ if (pdip == NULL || bp == NULL || bp->type == 0)
+ return (EINVAL);
+
+ prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
+ sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
+
+ if (prom_devi && bp->create.prom_branch_select == NULL)
+ return (EINVAL);
+ else if (sid_devi && bp->create.sid_branch_create == NULL)
+ return (EINVAL);
+ else if (!prom_devi && !sid_devi)
+ return (EINVAL);
+
+ if (flags & DEVI_BRANCH_EVENT)
+ return (EINVAL);
+
+ if (prom_devi) {
+ struct pta pta = {0};
+
+ pta.pdip = pdip;
+ pta.bp = bp;
+ pta.flags = flags;
+
+ error = prom_tree_access(create_prom_branch, &pta, NULL);
+
+ if (dipp)
+ *dipp = pta.fdip;
+ else if (pta.fdip)
+ ndi_rele_devi(pta.fdip);
+ } else {
+ error = create_sid_branch(pdip, bp, dipp, flags);
+ }
+
+ return (error);
+}
+
+int
+e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
+{
+ int circ, rv;
+ char *devnm;
+ dev_info_t *pdip;
+
+ if (dipp)
+ *dipp = NULL;
+
+ if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
+ return (EINVAL);
+
+ pdip = ddi_get_parent(rdip);
+
+ ndi_devi_enter(pdip, &circ);
+
+ if (!e_ddi_branch_held(rdip)) {
+ ndi_devi_exit(pdip, circ);
+ cmn_err(CE_WARN, "e_ddi_branch_configure: "
+ "dip(%p) not held", (void *)rdip);
+ return (EINVAL);
+ }
+
+ if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
+ /*
+ * First attempt to bind a driver. If we fail, return
+ * success (On some platforms, dips for some device
+ * types (CPUs) may not have a driver)
+ */
+ if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
+ ndi_devi_exit(pdip, circ);
+ return (0);
+ }
+
+ if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
+ rv = NDI_FAILURE;
+ goto out;
+ }
+ }
+
+ ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
+
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+ (void) ddi_deviname(rdip, devnm);
+
+ if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
+ NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
+ /* release hold from ndi_devi_config_one() */
+ ndi_rele_devi(rdip);
+ }
+
+ kmem_free(devnm, MAXNAMELEN + 1);
+out:
+ if (rv != NDI_SUCCESS && dipp) {
+ ndi_hold_devi(rdip);
+ *dipp = rdip;
+ }
+ ndi_devi_exit(pdip, circ);
+ return (ndi2errno(rv));
+}
+
+void
+e_ddi_branch_hold(dev_info_t *rdip)
+{
+ if (e_ddi_branch_held(rdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
+ return;
+ }
+
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
+ DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
+ DEVI(rdip)->devi_ref++;
+ }
+ ASSERT(DEVI(rdip)->devi_ref > 0);
+ mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_held(dev_info_t *rdip)
+{
+ int rv = 0;
+
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
+ DEVI(rdip)->devi_ref > 0) {
+ rv = 1;
+ }
+ mutex_exit(&DEVI(rdip)->devi_lock);
+
+ return (rv);
+}
+void
+e_ddi_branch_rele(dev_info_t *rdip)
+{
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
+ DEVI(rdip)->devi_ref--;
+ mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_unconfigure(
+ dev_info_t *rdip,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int circ, rv;
+ int destroy;
+ char *devnm;
+ uint_t nflags;
+ dev_info_t *pdip;
+
+ if (dipp)
+ *dipp = NULL;
+
+ if (rdip == NULL)
+ return (EINVAL);
+
+ pdip = ddi_get_parent(rdip);
+
+ ASSERT(pdip);
+
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks during
+ * devfs_clean()
+ */
+ if (DEVI_BUSY_OWNED(pdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
+ " devinfo node(%p) is busy held", (void *)pdip);
+ return (EINVAL);
+ }
+
+ destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
+
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+ ndi_devi_enter(pdip, &circ);
+ (void) ddi_deviname(rdip, devnm);
+ ndi_devi_exit(pdip, circ);
+
+ /*
+ * ddi_deviname() returns a component name with / prepended.
+ */
+ rv = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+ if (rv) {
+ kmem_free(devnm, MAXNAMELEN + 1);
+ return (rv);
+ }
+
+ ndi_devi_enter(pdip, &circ);
+
+ /*
+ * Recreate device name as it may have changed state (init/uninit)
+ * when parent busy lock was dropped for devfs_clean()
+ */
+ (void) ddi_deviname(rdip, devnm);
+
+ if (!e_ddi_branch_held(rdip)) {
+ kmem_free(devnm, MAXNAMELEN + 1);
+ ndi_devi_exit(pdip, circ);
+ cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
+ destroy ? "destroy" : "unconfigure", (void *)rdip);
+ return (EINVAL);
+ }
+
+ /*
+ * Release hold on the branch. This is ok since we are holding the
+ * parent busy. If rdip is not removed, we must do a hold on the
+ * branch before returning.
+ */
+ e_ddi_branch_rele(rdip);
+
+ nflags = NDI_DEVI_OFFLINE;
+ if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
+ nflags |= NDI_DEVI_REMOVE;
+ destroy = 1;
+ } else {
+ nflags |= NDI_UNCONFIG; /* uninit but don't remove */
+ }
+
+ if (flags & DEVI_BRANCH_EVENT)
+ nflags |= NDI_POST_EVENT;
+
+ if (i_ddi_devi_attached(pdip) &&
+ (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
+ rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
+ } else {
+ rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
+ if (rv == NDI_SUCCESS) {
+ ASSERT(!destroy || ddi_get_child(rdip) == NULL);
+ rv = ndi_devi_offline(rdip, nflags);
+ }
+ }
+
+ if (!destroy || rv != NDI_SUCCESS) {
+ /* The dip still exists, so do a hold */
+ e_ddi_branch_hold(rdip);
+ }
+out:
+ kmem_free(devnm, MAXNAMELEN + 1);
+ ndi_devi_exit(pdip, circ);
+ return (ndi2errno(rv));
+}
+
+int
+e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
+{
+ return (e_ddi_branch_unconfigure(rdip, dipp,
+ flag|DEVI_BRANCH_DESTROY));
+}
+
+/*
+ * Number of chains for hash table
+ */
+#define NUMCHAINS 17
+
+/*
+ * Devinfo busy arg
+ */
+struct devi_busy {
+ int dv_total;
+ int s_total;
+ mod_hash_t *dv_hash;
+ mod_hash_t *s_hash;
+ int (*callback)(dev_info_t *, void *, uint_t);
+ void *arg;
+};
+
+static int
+visit_dip(dev_info_t *dip, void *arg)
+{
+ uintptr_t sbusy, dvbusy, ref;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(bsp->callback);
+
+ /*
+ * A dip cannot be busy if its reference count is 0
+ */
+ if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
+ return (bsp->callback(dip, bsp->arg, 0));
+ }
+
+ if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
+ dvbusy = 0;
+
+ /*
+ * To catch device opens currently maintained on specfs common snodes.
+ */
+ if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+ sbusy = 0;
+
+#ifdef DEBUG
+ if (ref < sbusy || ref < dvbusy) {
+ cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
+ "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
+ }
+#endif
+
+ dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
+
+ return (bsp->callback(dip, bsp->arg, dvbusy));
+}
+
+static int
+visit_snode(struct snode *sp, void *arg)
+{
+ uintptr_t sbusy;
+ dev_info_t *dip;
+ int count;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(sp);
+
+ /*
+ * The stable lock is held. This prevents
+ * the snode and its associated dip from
+ * going away.
+ */
+ dip = NULL;
+ count = spec_devi_open_count(sp, &dip);
+
+ if (count <= 0)
+ return (DDI_WALK_CONTINUE);
+
+ ASSERT(dip);
+
+ if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+ sbusy = count;
+ else
+ sbusy += count;
+
+ if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
+ cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
+ "sbusy = %lu", "e_ddi_branch_referenced",
+ (void *)dip, sbusy);
+ }
+
+ bsp->s_total += count;
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+visit_dvnode(struct dv_node *dv, void *arg)
+{
+ uintptr_t dvbusy;
+ uint_t count;
+ struct vnode *vp;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(dv && dv->dv_devi);
+
+ vp = DVTOV(dv);
+
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+
+ if (!count)
+ return;
+
+ if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
+ (mod_hash_val_t *)&dvbusy))
+ dvbusy = count;
+ else
+ dvbusy += count;
+
+ if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
+ (mod_hash_val_t)dvbusy)) {
+ cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
+ "dvbusy=%lu", "e_ddi_branch_referenced",
+ (void *)dv->dv_devi, dvbusy);
+ }
+
+ bsp->dv_total += count;
+}
+
+/*
+ * Returns reference count on success or -1 on failure.
+ */
+int
+e_ddi_branch_referenced(
+ dev_info_t *rdip,
+ int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
+ void *arg)
+{
+ int circ;
+ char *path;
+ dev_info_t *pdip;
+ struct devi_busy bsa = {0};
+
+ ASSERT(rdip);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ ndi_hold_devi(rdip);
+
+ pdip = ddi_get_parent(rdip);
+
+ ASSERT(pdip);
+
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks during
+ * devfs_walk()
+ */
+ if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
+ "devinfo branch(%p) not held or parent busy held",
+ (void *)rdip);
+ ndi_rele_devi(rdip);
+ kmem_free(path, MAXPATHLEN);
+ return (-1);
+ }
+
+ ndi_devi_enter(pdip, &circ);
+ (void) ddi_pathname(rdip, path);
+ ndi_devi_exit(pdip, circ);
+
+ bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
+ mod_hash_null_valdtor, sizeof (struct dev_info));
+
+ bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
+ mod_hash_null_valdtor, sizeof (struct snode));
+
+ if (devfs_walk(path, visit_dvnode, &bsa)) {
+ cmn_err(CE_WARN, "e_ddi_branch_referenced: "
+ "devfs walk failed for: %s", path);
+ kmem_free(path, MAXPATHLEN);
+ bsa.s_total = bsa.dv_total = -1;
+ goto out;
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ /*
+ * Walk the snode table to detect device opens, which are currently
+ * maintained on specfs common snodes.
+ */
+ spec_snode_walk(visit_snode, &bsa);
+
+ if (callback == NULL)
+ goto out;
+
+ bsa.callback = callback;
+ bsa.arg = arg;
+
+ if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
+ ndi_devi_enter(rdip, &circ);
+ ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
+ ndi_devi_exit(rdip, circ);
+ }
+
+out:
+ ndi_rele_devi(rdip);
+ mod_hash_destroy_ptrhash(bsa.s_hash);
+ mod_hash_destroy_ptrhash(bsa.dv_hash);
+ return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
+}
diff --git a/usr/src/uts/sun4/os/mlsetup.c b/usr/src/uts/sun4/os/mlsetup.c
index 7095e6551d..53e4812dae 100644
--- a/usr/src/uts/sun4/os/mlsetup.c
+++ b/usr/src/uts/sun4/os/mlsetup.c
@@ -71,6 +71,7 @@
*/
extern void map_wellknown_devices(void);
extern void hsvc_setup(void);
+extern void mach_descrip_startup_init(void);
int dcache_size;
int dcache_linesize;
@@ -242,6 +243,13 @@ mlsetup(struct regs *rp, void *cif, kfpu_t *fp)
ctlp->d.limit = TRAP_TSIZE; /* XXX dynamic someday */
ctlp->d.paddr_base = va_to_pa(trap_tr0);
#endif /* TRAPTRACE */
+
+ /*
+ * Initialize the Machine Description kernel framework
+ */
+
+ mach_descrip_startup_init();
+
/*
* initialize HV trap trace buffer for the boot cpu
*/
diff --git a/usr/src/uts/sun4/os/mp_startup.c b/usr/src/uts/sun4/os/mp_startup.c
index ed3477597d..0139e6a5f0 100644
--- a/usr/src/uts/sun4/os/mp_startup.c
+++ b/usr/src/uts/sun4/os/mp_startup.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -45,6 +46,7 @@
#include <sys/cpu_sgnblk_defs.h>
extern void cpu_intrq_setup(struct cpu *);
+extern void cpu_intrq_cleanup(struct cpu *);
extern void cpu_intrq_register(struct cpu *);
struct cpu *cpus; /* pointer to other cpus; dynamically allocate */
@@ -469,6 +471,11 @@ cleanup_cpu_common(int cpuid)
cleanup_intr_pool(cp);
/*
+ * Clean any machine specific interrupt states.
+ */
+ cpu_intrq_cleanup(cp);
+
+ /*
* At this point, the only threads bound to this CPU should be
* special per-cpu threads: it's idle thread, it's pause thread,
* and it's interrupt threads. Clean these up.
diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c
index 8e2ce99fc8..3a0506263c 100644
--- a/usr/src/uts/sun4/os/startup.c
+++ b/usr/src/uts/sun4/os/startup.c
@@ -71,16 +71,21 @@ extern void cpu_intrq_register(struct cpu *);
extern void contig_mem_init(void);
extern void mach_dump_buffer_init(void);
extern void mach_descrip_init(void);
+extern void mach_descrip_startup_fini(void);
extern void mach_memscrub(void);
extern void mach_fpras(void);
extern void mach_cpu_halt_idle(void);
extern void mach_hw_copy_limit(void);
+extern void load_mach_drivers(void);
extern void load_tod_module(void);
#pragma weak load_tod_module
extern int ndata_alloc_mmfsa(struct memlist *ndata);
#pragma weak ndata_alloc_mmfsa
+extern void cif_init(void);
+#pragma weak cif_init
+
extern void parse_idprom(void);
extern void add_vx_handler(char *, int, void (*)(cell_t *));
extern void mem_config_init(void);
@@ -1748,6 +1753,13 @@ startup_bop_gone(void)
extern int bop_io_quiesced;
/*
+ * Destroy the MD initialized at startup
+ * The startup initializes the MD framework
+ * using prom and BOP alloc free it now.
+ */
+ mach_descrip_startup_fini();
+
+ /*
* Call back into boot and release boots resources.
*/
BOP_QUIESCE_IO(bootops);
@@ -2198,6 +2210,10 @@ post_startup(void)
*/
(void) modload("fs", "procfs");
+ /* load machine class specific drivers */
+ load_mach_drivers();
+
+ /* load platform specific drivers */
if (&load_platform_drivers)
load_platform_drivers();
@@ -2214,6 +2230,9 @@ post_startup(void)
#ifdef PTL1_PANIC_DEBUG
init_ptl1_thread();
#endif /* PTL1_PANIC_DEBUG */
+
+ if (&cif_init)
+ cif_init();
}
#ifdef PTL1_PANIC_DEBUG
diff --git a/usr/src/uts/sun4u/os/mach_ddi_impl.c b/usr/src/uts/sun4u/os/mach_ddi_impl.c
index 195d86520b..6f01f3408e 100644
--- a/usr/src/uts/sun4u/os/mach_ddi_impl.c
+++ b/usr/src/uts/sun4u/os/mach_ddi_impl.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -35,7 +35,6 @@
#include <sys/ethernet.h>
#include <sys/idprom.h>
#include <sys/machsystm.h>
-#include <sys/modhash.h>
#include <sys/promif.h>
#include <sys/prom_plat.h>
#include <sys/sunndi.h>
@@ -397,831 +396,3 @@ dip_to_cpu_id(dev_info_t *dip, processorid_t *cpu_id)
}
return (DDI_FAILURE);
}
-
-/*
- * Platform independent DR routines
- */
-
-static int
-ndi2errno(int n)
-{
- int err = 0;
-
- switch (n) {
- case NDI_NOMEM:
- err = ENOMEM;
- break;
- case NDI_BUSY:
- err = EBUSY;
- break;
- case NDI_FAULT:
- err = EFAULT;
- break;
- case NDI_FAILURE:
- err = EIO;
- break;
- case NDI_SUCCESS:
- break;
- case NDI_BADHANDLE:
- default:
- err = EINVAL;
- break;
- }
- return (err);
-}
-
-/*
- * Prom tree node list
- */
-struct ptnode {
- pnode_t nodeid;
- struct ptnode *next;
-};
-
-/*
- * Prom tree walk arg
- */
-struct pta {
- dev_info_t *pdip;
- devi_branch_t *bp;
- uint_t flags;
- dev_info_t *fdip;
- struct ptnode *head;
-};
-
-static void
-visit_node(pnode_t nodeid, struct pta *ap)
-{
- struct ptnode **nextp;
- int (*select)(pnode_t, void *, uint_t);
-
- ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
-
- select = ap->bp->create.prom_branch_select;
-
- ASSERT(select);
-
- if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
-
- for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
- ;
-
- *nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
-
- (*nextp)->nodeid = nodeid;
- }
-
- if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
- return;
-
- nodeid = prom_childnode(nodeid);
- while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
- visit_node(nodeid, ap);
- nodeid = prom_nextnode(nodeid);
- }
-}
-
-/*ARGSUSED*/
-static int
-set_dip_offline(dev_info_t *dip, void *arg)
-{
- ASSERT(dip);
-
- mutex_enter(&(DEVI(dip)->devi_lock));
- if (!DEVI_IS_DEVICE_OFFLINE(dip))
- DEVI_SET_DEVICE_OFFLINE(dip);
- mutex_exit(&(DEVI(dip)->devi_lock));
-
- return (DDI_WALK_CONTINUE);
-}
-
-/*ARGSUSED*/
-static int
-create_prom_branch(void *arg, int has_changed)
-{
- int circ, c;
- int exists, rv;
- pnode_t nodeid;
- struct ptnode *tnp;
- dev_info_t *dip;
- struct pta *ap = arg;
- devi_branch_t *bp;
-
- ASSERT(ap);
- ASSERT(ap->fdip == NULL);
- ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
-
- bp = ap->bp;
-
- nodeid = ddi_get_nodeid(ap->pdip);
- if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
- cmn_err(CE_WARN, "create_prom_branch: invalid "
- "nodeid: 0x%x", nodeid);
- return (EINVAL);
- }
-
- ap->head = NULL;
-
- nodeid = prom_childnode(nodeid);
- while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
- visit_node(nodeid, ap);
- nodeid = prom_nextnode(nodeid);
- }
-
- if (ap->head == NULL)
- return (ENODEV);
-
- rv = 0;
- while ((tnp = ap->head) != NULL) {
- ap->head = tnp->next;
-
- ndi_devi_enter(ap->pdip, &circ);
-
- /*
- * Check if the branch already exists.
- */
- exists = 0;
- dip = e_ddi_nodeid_to_dip(tnp->nodeid);
- if (dip != NULL) {
- exists = 1;
-
- /* Parent is held busy, so release hold */
- ndi_rele_devi(dip);
-#ifdef DEBUG
- cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
- " for nodeid 0x%x", (void *)dip, tnp->nodeid);
-#endif
- } else {
- dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
- }
-
- kmem_free(tnp, sizeof (struct ptnode));
-
- if (dip == NULL) {
- ndi_devi_exit(ap->pdip, circ);
- rv = EIO;
- continue;
- }
-
- ASSERT(ddi_get_parent(dip) == ap->pdip);
-
- /*
- * Hold the branch if it is not already held
- */
- if (!exists)
- e_ddi_branch_hold(dip);
-
- ASSERT(e_ddi_branch_held(dip));
-
- /*
- * Set all dips in the branch offline so that
- * only a "configure" operation can attach
- * the branch
- */
- (void) set_dip_offline(dip, NULL);
-
- ndi_devi_enter(dip, &c);
- ddi_walk_devs(ddi_get_child(dip), set_dip_offline, NULL);
- ndi_devi_exit(dip, c);
-
- ndi_devi_exit(ap->pdip, circ);
-
- if (ap->flags & DEVI_BRANCH_CONFIGURE) {
- int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
- if (error && rv == 0)
- rv = error;
- }
-
- /*
- * Invoke devi_branch_callback() (if it exists) only for
- * newly created branches
- */
- if (bp->devi_branch_callback && !exists)
- bp->devi_branch_callback(dip, bp->arg, 0);
- }
-
- return (rv);
-}
-
-static int
-sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
-{
- int rv, circ, len;
- int i, flags;
- dev_info_t *dip;
- char *nbuf;
- static const char *noname = "<none>";
-
- ASSERT(pdip);
- ASSERT(DEVI_BUSY_OWNED(pdip));
-
- flags = 0;
-
- /*
- * Creating the root of a branch ?
- */
- if (rdipp) {
- *rdipp = NULL;
- flags = DEVI_BRANCH_ROOT;
- }
-
- ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
- rv = bp->create.sid_branch_create(dip, bp->arg, flags);
-
- nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
-
- if (rv == DDI_WALK_ERROR) {
- cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
- " properties on devinfo node %p", (void *)dip);
- goto fail;
- }
-
- len = OBP_MAXDRVNAME;
- if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
- DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
- != DDI_PROP_SUCCESS) {
- cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
- "no name property", (void *)dip);
- goto fail;
- }
-
- ASSERT(i_ddi_node_state(dip) == DS_PROTO);
- if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
- cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
- " for devinfo node %p", nbuf, (void *)dip);
- goto fail;
- }
-
- kmem_free(nbuf, OBP_MAXDRVNAME);
-
- /*
- * Ignore bind failures just like boot does
- */
- (void) ndi_devi_bind_driver(dip, 0);
-
- switch (rv) {
- case DDI_WALK_CONTINUE:
- case DDI_WALK_PRUNESIB:
- ndi_devi_enter(dip, &circ);
-
- i = DDI_WALK_CONTINUE;
- for (; i == DDI_WALK_CONTINUE; ) {
- i = sid_node_create(dip, bp, NULL);
- }
-
- ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
- if (i == DDI_WALK_ERROR)
- rv = i;
- /*
- * If PRUNESIB stop creating siblings
- * of dip's child. Subsequent walk behavior
- * is determined by rv returned by dip.
- */
-
- ndi_devi_exit(dip, circ);
- break;
- case DDI_WALK_TERMINATE:
- /*
- * Don't create children and ask our parent
- * to not create siblings either.
- */
- rv = DDI_WALK_PRUNESIB;
- break;
- case DDI_WALK_PRUNECHILD:
- /*
- * Don't create children, but ask parent to continue
- * with siblings.
- */
- rv = DDI_WALK_CONTINUE;
- break;
- default:
- ASSERT(0);
- break;
- }
-
- if (rdipp)
- *rdipp = dip;
-
- /*
- * Set device offline - only the "configure" op should cause an attach
- */
- (void) set_dip_offline(dip, NULL);
-
- return (rv);
-fail:
- (void) ndi_devi_free(dip);
- kmem_free(nbuf, OBP_MAXDRVNAME);
- return (DDI_WALK_ERROR);
-}
-
-static int
-create_sid_branch(
- dev_info_t *pdip,
- devi_branch_t *bp,
- dev_info_t **dipp,
- uint_t flags)
-{
- int rv = 0, state = DDI_WALK_CONTINUE;
- dev_info_t *rdip;
-
- while (state == DDI_WALK_CONTINUE) {
- int circ;
-
- ndi_devi_enter(pdip, &circ);
-
- state = sid_node_create(pdip, bp, &rdip);
- if (rdip == NULL) {
- ndi_devi_exit(pdip, circ);
- ASSERT(state == DDI_WALK_ERROR);
- break;
- }
-
- e_ddi_branch_hold(rdip);
-
- ndi_devi_exit(pdip, circ);
-
- if (flags & DEVI_BRANCH_CONFIGURE) {
- int error = e_ddi_branch_configure(rdip, dipp, 0);
- if (error && rv == 0)
- rv = error;
- }
-
- /*
- * devi_branch_callback() is optional
- */
- if (bp->devi_branch_callback)
- bp->devi_branch_callback(rdip, bp->arg, 0);
- }
-
- ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
-
- return (state == DDI_WALK_ERROR ? EIO : rv);
-}
-
-int
-e_ddi_branch_create(
- dev_info_t *pdip,
- devi_branch_t *bp,
- dev_info_t **dipp,
- uint_t flags)
-{
- int prom_devi, sid_devi, error;
-
- if (pdip == NULL || bp == NULL || bp->type == 0)
- return (EINVAL);
-
- prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
- sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
-
- if (prom_devi && bp->create.prom_branch_select == NULL)
- return (EINVAL);
- else if (sid_devi && bp->create.sid_branch_create == NULL)
- return (EINVAL);
- else if (!prom_devi && !sid_devi)
- return (EINVAL);
-
- if (flags & DEVI_BRANCH_EVENT)
- return (EINVAL);
-
- if (prom_devi) {
- struct pta pta = {0};
-
- pta.pdip = pdip;
- pta.bp = bp;
- pta.flags = flags;
-
- error = prom_tree_access(create_prom_branch, &pta, NULL);
-
- if (dipp)
- *dipp = pta.fdip;
- else if (pta.fdip)
- ndi_rele_devi(pta.fdip);
- } else {
- error = create_sid_branch(pdip, bp, dipp, flags);
- }
-
- return (error);
-}
-
-int
-e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
-{
- int circ, rv;
- char *devnm;
- dev_info_t *pdip;
-
- if (dipp)
- *dipp = NULL;
-
- if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
- return (EINVAL);
-
- pdip = ddi_get_parent(rdip);
-
- ndi_devi_enter(pdip, &circ);
-
- if (!e_ddi_branch_held(rdip)) {
- ndi_devi_exit(pdip, circ);
- cmn_err(CE_WARN, "e_ddi_branch_configure: "
- "dip(%p) not held", (void *)rdip);
- return (EINVAL);
- }
-
- if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
- /*
- * First attempt to bind a driver. If we fail, return
- * success (On some platforms, dips for some device
- * types (CPUs) may not have a driver)
- */
- if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
- ndi_devi_exit(pdip, circ);
- return (0);
- }
-
- if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
- rv = NDI_FAILURE;
- goto out;
- }
- }
-
- ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
-
- devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
- (void) ddi_deviname(rdip, devnm);
-
- if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
- NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
- /* release hold from ndi_devi_config_one() */
- ndi_rele_devi(rdip);
- }
-
- kmem_free(devnm, MAXNAMELEN + 1);
-out:
- if (rv != NDI_SUCCESS && dipp) {
- ndi_hold_devi(rdip);
- *dipp = rdip;
- }
- ndi_devi_exit(pdip, circ);
- return (ndi2errno(rv));
-}
-
-void
-e_ddi_branch_hold(dev_info_t *rdip)
-{
- if (e_ddi_branch_held(rdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
- return;
- }
-
- mutex_enter(&DEVI(rdip)->devi_lock);
- if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
- DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
- DEVI(rdip)->devi_ref++;
- }
- ASSERT(DEVI(rdip)->devi_ref > 0);
- mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_held(dev_info_t *rdip)
-{
- int rv = 0;
-
- mutex_enter(&DEVI(rdip)->devi_lock);
- if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
- DEVI(rdip)->devi_ref > 0) {
- rv = 1;
- }
- mutex_exit(&DEVI(rdip)->devi_lock);
-
- return (rv);
-}
-void
-e_ddi_branch_rele(dev_info_t *rdip)
-{
- mutex_enter(&DEVI(rdip)->devi_lock);
- DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
- DEVI(rdip)->devi_ref--;
- mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_unconfigure(
- dev_info_t *rdip,
- dev_info_t **dipp,
- uint_t flags)
-{
- int circ, rv;
- int destroy;
- char *devnm;
- uint_t nflags;
- dev_info_t *pdip;
-
- if (dipp)
- *dipp = NULL;
-
- if (rdip == NULL)
- return (EINVAL);
-
- pdip = ddi_get_parent(rdip);
-
- ASSERT(pdip);
-
- /*
- * Check if caller holds pdip busy - can cause deadlocks during
- * devfs_clean()
- */
- if (DEVI_BUSY_OWNED(pdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
- " devinfo node(%p) is busy held", (void *)pdip);
- return (EINVAL);
- }
-
- destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
-
- devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
- ndi_devi_enter(pdip, &circ);
- (void) ddi_deviname(rdip, devnm);
- ndi_devi_exit(pdip, circ);
-
- /*
- * ddi_deviname() returns a component name with / prepended.
- */
- rv = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
- if (rv) {
- kmem_free(devnm, MAXNAMELEN + 1);
- return (rv);
- }
-
- ndi_devi_enter(pdip, &circ);
-
- /*
- * Recreate device name as it may have changed state (init/uninit)
- * when parent busy lock was dropped for devfs_clean()
- */
- (void) ddi_deviname(rdip, devnm);
-
- if (!e_ddi_branch_held(rdip)) {
- kmem_free(devnm, MAXNAMELEN + 1);
- ndi_devi_exit(pdip, circ);
- cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
- destroy ? "destroy" : "unconfigure", (void *)rdip);
- return (EINVAL);
- }
-
- /*
- * Release hold on the branch. This is ok since we are holding the
- * parent busy. If rdip is not removed, we must do a hold on the
- * branch before returning.
- */
- e_ddi_branch_rele(rdip);
-
- nflags = NDI_DEVI_OFFLINE;
- if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
- nflags |= NDI_DEVI_REMOVE;
- destroy = 1;
- } else {
- nflags |= NDI_UNCONFIG; /* uninit but don't remove */
- }
-
- if (flags & DEVI_BRANCH_EVENT)
- nflags |= NDI_POST_EVENT;
-
- if (i_ddi_devi_attached(pdip) &&
- (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
- rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
- } else {
- rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
- if (rv == NDI_SUCCESS) {
- ASSERT(!destroy || ddi_get_child(rdip) == NULL);
- rv = ndi_devi_offline(rdip, nflags);
- }
- }
-
- if (!destroy || rv != NDI_SUCCESS) {
- /* The dip still exists, so do a hold */
- e_ddi_branch_hold(rdip);
- }
-out:
- kmem_free(devnm, MAXNAMELEN + 1);
- ndi_devi_exit(pdip, circ);
- return (ndi2errno(rv));
-}
-
-int
-e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
-{
- return (e_ddi_branch_unconfigure(rdip, dipp,
- flag|DEVI_BRANCH_DESTROY));
-}
-
-/*
- * Number of chains for hash table
- */
-#define NUMCHAINS 17
-
-/*
- * Devinfo busy arg
- */
-struct devi_busy {
- int dv_total;
- int s_total;
- mod_hash_t *dv_hash;
- mod_hash_t *s_hash;
- int (*callback)(dev_info_t *, void *, uint_t);
- void *arg;
-};
-
-static int
-visit_dip(dev_info_t *dip, void *arg)
-{
- uintptr_t sbusy, dvbusy, ref;
- struct devi_busy *bsp = arg;
-
- ASSERT(bsp->callback);
-
- /*
- * A dip cannot be busy if its reference count is 0
- */
- if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
- return (bsp->callback(dip, bsp->arg, 0));
- }
-
- if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
- dvbusy = 0;
-
- /*
- * To catch device opens currently maintained on specfs common snodes.
- */
- if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
- sbusy = 0;
-
-#ifdef DEBUG
- if (ref < sbusy || ref < dvbusy) {
- cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
- "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
- }
-#endif
-
- dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
-
- return (bsp->callback(dip, bsp->arg, dvbusy));
-}
-
-static int
-visit_snode(struct snode *sp, void *arg)
-{
- uintptr_t sbusy;
- dev_info_t *dip;
- int count;
- struct devi_busy *bsp = arg;
-
- ASSERT(sp);
-
- /*
- * The stable lock is held. This prevents
- * the snode and its associated dip from
- * going away.
- */
- dip = NULL;
- count = spec_devi_open_count(sp, &dip);
-
- if (count <= 0)
- return (DDI_WALK_CONTINUE);
-
- ASSERT(dip);
-
- if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
- sbusy = count;
- else
- sbusy += count;
-
- if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
- cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
- "sbusy = %lu", "e_ddi_branch_referenced",
- (void *)dip, sbusy);
- }
-
- bsp->s_total += count;
-
- return (DDI_WALK_CONTINUE);
-}
-
-static void
-visit_dvnode(struct dv_node *dv, void *arg)
-{
- uintptr_t dvbusy;
- uint_t count;
- struct vnode *vp;
- struct devi_busy *bsp = arg;
-
- ASSERT(dv && dv->dv_devi);
-
- vp = DVTOV(dv);
-
- mutex_enter(&vp->v_lock);
- count = vp->v_count;
- mutex_exit(&vp->v_lock);
-
- if (!count)
- return;
-
- if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
- (mod_hash_val_t *)&dvbusy))
- dvbusy = count;
- else
- dvbusy += count;
-
- if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
- (mod_hash_val_t)dvbusy)) {
- cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
- "dvbusy=%lu", "e_ddi_branch_referenced",
- (void *)dv->dv_devi, dvbusy);
- }
-
- bsp->dv_total += count;
-}
-
-/*
- * Returns reference count on success or -1 on failure.
- */
-int
-e_ddi_branch_referenced(
- dev_info_t *rdip,
- int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
- void *arg)
-{
- int circ;
- char *path;
- dev_info_t *pdip;
- struct devi_busy bsa = {0};
-
- ASSERT(rdip);
-
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
- ndi_hold_devi(rdip);
-
- pdip = ddi_get_parent(rdip);
-
- ASSERT(pdip);
-
- /*
- * Check if caller holds pdip busy - can cause deadlocks during
- * devfs_walk()
- */
- if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
- "devinfo branch(%p) not held or parent busy held",
- (void *)rdip);
- ndi_rele_devi(rdip);
- kmem_free(path, MAXPATHLEN);
- return (-1);
- }
-
- ndi_devi_enter(pdip, &circ);
- (void) ddi_pathname(rdip, path);
- ndi_devi_exit(pdip, circ);
-
- bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
- mod_hash_null_valdtor, sizeof (struct dev_info));
-
- bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
- mod_hash_null_valdtor, sizeof (struct snode));
-
- if (devfs_walk(path, visit_dvnode, &bsa)) {
- cmn_err(CE_WARN, "e_ddi_branch_referenced: "
- "devfs walk failed for: %s", path);
- kmem_free(path, MAXPATHLEN);
- bsa.s_total = bsa.dv_total = -1;
- goto out;
- }
-
- kmem_free(path, MAXPATHLEN);
-
- /*
- * Walk the snode table to detect device opens, which are currently
- * maintained on specfs common snodes.
- */
- spec_snode_walk(visit_snode, &bsa);
-
- if (callback == NULL)
- goto out;
-
- bsa.callback = callback;
- bsa.arg = arg;
-
- if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
- ndi_devi_enter(rdip, &circ);
- ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
- ndi_devi_exit(rdip, circ);
- }
-
-out:
- ndi_rele_devi(rdip);
- mod_hash_destroy_ptrhash(bsa.s_hash);
- mod_hash_destroy_ptrhash(bsa.dv_hash);
- return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
-}
diff --git a/usr/src/uts/sun4u/os/mach_startup.c b/usr/src/uts/sun4u/os/mach_startup.c
index 5a8366da64..78639b9a80 100644
--- a/usr/src/uts/sun4u/os/mach_startup.c
+++ b/usr/src/uts/sun4u/os/mach_startup.c
@@ -402,6 +402,13 @@ cpu_intrq_setup(struct cpu *cp)
/*ARGSUSED*/
void
+cpu_intrq_cleanup(struct cpu *cp)
+{
+ /* Interrupt mondo queues not applicable to sun4u */
+}
+
+/*ARGSUSED*/
+void
cpu_intrq_register(struct cpu *cp)
{
/* Interrupt/error queues not applicable to sun4u */
@@ -429,9 +436,29 @@ mach_htraptrace_cleanup(int cpuid)
}
void
+mach_descrip_startup_init(void)
+{
+ /*
+ * Only for sun4v.
+ * Initialize Machine description framework during startup.
+ */
+}
+void
+mach_descrip_startup_fini(void)
+{
+ /*
+ * Only for sun4v.
+ * Clean up Machine Description framework during startup.
+ */
+}
+
+void
mach_descrip_init(void)
{
- /* Obtain Machine description - only for sun4v */
+ /*
+ * Only for sun4v.
+ * Initialize Machine description framework.
+ */
}
void
@@ -440,6 +467,12 @@ hsvc_setup(void)
/* Setup hypervisor services, not applicable to sun4u */
}
+void
+load_mach_drivers(void)
+{
+ /* Currently no machine class (sun4u) specific drivers to load */
+}
+
/*
* Return true if the machine we're running on is a Positron.
* (Positron is an unsupported developers platform.)
diff --git a/usr/src/uts/sun4v/Makefile.files b/usr/src/uts/sun4v/Makefile.files
index 8fd0f3f1af..adfda1129f 100644
--- a/usr/src/uts/sun4v/Makefile.files
+++ b/usr/src/uts/sun4v/Makefile.files
@@ -34,6 +34,7 @@
# object lists
#
CORE_OBJS += bootops.o
+CORE_OBJS += prom_alloc.o
CORE_OBJS += cmp.o
CORE_OBJS += cpc_hwreg.o
CORE_OBJS += cpc_subr.o
@@ -44,11 +45,13 @@ CORE_OBJS += hardclk.o
CORE_OBJS += hat_sfmmu.o
CORE_OBJS += hat_kdi.o
CORE_OBJS += hsvc.o
+CORE_OBJS += lpad.o
CORE_OBJS += mach_cpu_states.o
CORE_OBJS += mach_ddi_impl.o
-CORE_OBJS += mach_descrip.o
+CORE_OBJS += mach_descrip.o
CORE_OBJS += mach_mp_startup.o
CORE_OBJS += mach_mp_states.o
+CORE_OBJS += mach_proc_init.o
CORE_OBJS += mach_sfmmu.o
CORE_OBJS += mach_startup.o
CORE_OBJS += mach_subr_asm.o
@@ -59,13 +62,30 @@ CORE_OBJS += mem_cage.o
CORE_OBJS += mem_config.o
CORE_OBJS += memlist_new.o
CORE_OBJS += ppage.o
+CORE_OBJS += promif_asr.o
+CORE_OBJS += promif_cpu.o
+CORE_OBJS += promif_emul.o
+CORE_OBJS += promif_mon.o
+CORE_OBJS += promif_io.o
+CORE_OBJS += promif_interp.o
+CORE_OBJS += promif_key.o
+CORE_OBJS += promif_power_off.o
+CORE_OBJS += promif_prop.o
+CORE_OBJS += promif_node.o
+CORE_OBJS += promif_reboot.o
+CORE_OBJS += promif_stree.o
+CORE_OBJS += promif_test.o
+CORE_OBJS += promif_version.o
CORE_OBJS += sfmmu_kdi.o
CORE_OBJS += swtch.o
CORE_OBJS += xhat_sfmmu.o
+CORE_OBJS += mdesc_diff.o
CORE_OBJS += mdesc_findname.o
CORE_OBJS += mdesc_findnodeprop.o
CORE_OBJS += mdesc_fini.o
+CORE_OBJS += mdesc_getbinsize.o
+CORE_OBJS += mdesc_getgen.o
CORE_OBJS += mdesc_getpropdata.o
CORE_OBJS += mdesc_getpropstr.o
CORE_OBJS += mdesc_getpropval.o
@@ -109,14 +129,26 @@ MEMTEST_OBJS += memtest.o memtest_asm.o \
#
QCN_OBJS = qcn.o
VNEX_OBJS = vnex.o
+CNEX_OBJS = cnex.o
GLVC_OBJS = glvc.o glvc_hcall.o
MDESC_OBJS = mdesc.o
+LDC_OBJS = ldc.o
+VLDC_OBJS = vldc.o
+VCC_OBJS = vcc.o
+VNET_OBJS = vnet.o vnet_gen.o
+VSW_OBJS = vsw.o
+VDC_OBJS = vdc.o
+VDS_OBJS = vds.o
#
# Misc modules
#
-OBPSYM_OBJS += obpsym.o obpsym_1275.o
BOOTDEV_OBJS += bootdev.o
+DR_CPU_OBJS += dr_cpu.o dr_util.o
+DS_OBJS = ds.o
+FAULT_ISO_OBJS = fault_iso.o
+OBPSYM_OBJS += obpsym.o obpsym_1275.o
+PLATSVC_OBJS = platsvc.o mdeg.o
#
# Performance Counter BackEnd (PCBE) Modules
@@ -163,4 +195,3 @@ ASSYM_DEPS += mach_sfmmu_asm.o sfmmu_asm.o
#
ARCFOUR_OBJS += arcfour.o arcfour_crypt.o
-
diff --git a/usr/src/uts/sun4v/Makefile.rules b/usr/src/uts/sun4v/Makefile.rules
index 8b49649880..6afc3c0da0 100644
--- a/usr/src/uts/sun4v/Makefile.rules
+++ b/usr/src/uts/sun4v/Makefile.rules
@@ -62,6 +62,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/sun4v/pcbe/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/sun4v/promif/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/sun4v/io/px/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -163,6 +167,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/sun4v/os/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/sun4v/pcbe/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/sun4v/promif/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/sun4v/vm/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/sun4v/Makefile.sun4v.shared b/usr/src/uts/sun4v/Makefile.sun4v.shared
index f5c55b3d47..1d6a5ad798 100644
--- a/usr/src/uts/sun4v/Makefile.sun4v.shared
+++ b/usr/src/uts/sun4v/Makefile.sun4v.shared
@@ -23,7 +23,7 @@
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
+#ident "%Z%%M% %I% %E% SMI"
#
# This makefile contains the common definitions for the sun4v unix
# and all sun4v implementation architecture dependent modules.
@@ -309,19 +309,26 @@ MACH_NOT_YET_KMODS = $(AUTOCONF_OBJS)
#
# Machine Specific Driver Modules (/kernel/drv):
#
-DRV_KMODS += vnex
-DRV_KMODS += qcn
-DRV_KMODS += dma
+DRV_KMODS += bge
+DRV_KMODS += cnex
DRV_KMODS += cpc
-DRV_KMODS += rootnex
-DRV_KMODS += trapstat
-DRV_KMODS += px
+DRV_KMODS += dma
+DRV_KMODS += ebus
DRV_KMODS += fpc
DRV_KMODS += glvc
-DRV_KMODS += bge
DRV_KMODS += mdesc
-DRV_KMODS += ebus
-DRV_KMODS += su
+DRV_KMODS += px
+DRV_KMODS += qcn
+DRV_KMODS += rootnex
+DRV_KMODS += su
+DRV_KMODS += trapstat
+DRV_KMODS += vcc
+DRV_KMODS += vdc
+DRV_KMODS += vds
+DRV_KMODS += vldc
+DRV_KMODS += vnet
+DRV_KMODS += vnex
+DRV_KMODS += vsw
$(CLOSED_BUILD)CLOSED_DRV_KMODS += memtest
$(CLOSED_BUILD)CLOSED_DRV_KMODS += ncp
@@ -354,8 +361,16 @@ SYS_KMODS +=
#
# 'User' Modules (/kernel/misc):
#
-MISC_KMODS += obpsym bootdev vis platmod
-
+MISC_KMODS += bootdev
+MISC_KMODS += dr_cpu
+MISC_KMODS += ds
+MISC_KMODS += fault_iso
+MISC_KMODS += ldc
+MISC_KMODS += obpsym
+MISC_KMODS += platmod
+MISC_KMODS += platsvc
+MISC_KMODS += vis
+
# md5 optimized for Niagara
#
MISC_KMODS += md5
diff --git a/usr/src/uts/sun4v/cnex/Makefile b/usr/src/uts/sun4v/cnex/Makefile
new file mode 100644
index 0000000000..8f520908a5
--- /dev/null
+++ b/usr/src/uts/sun4v/cnex/Makefile
@@ -0,0 +1,99 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/sun4v/cnex/Makefile
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the cnex driver kernel module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = cnex
+OBJECTS = $(CNEX_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(CNEX_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# Module dependencies
+#
+LDFLAGS += -dy -Nmisc/ldc
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/cpu/common_asm.s b/usr/src/uts/sun4v/cpu/common_asm.s
index 63ff1e77c6..8de96b3bed 100644
--- a/usr/src/uts/sun4v/cpu/common_asm.s
+++ b/usr/src/uts/sun4v/cpu/common_asm.s
@@ -1050,8 +1050,16 @@ flush_instr_mem(caddr_t vaddr, size_t len)
ta FAST_TRAP
brz,pt %o0, 1f
nop
- ba ptl1_panic
+
mov PTL1_BAD_HCALL, %g1
+
+ cmp %o0, H_ENOMAP
+ move %xcc, PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP, %g1
+
+ cmp %o0, H_EINVAL
+ move %xcc, PTL1_BAD_HCALL_UNMAP_PERM_EINVAL, %g1
+
+ ba,a ptl1_panic
1:
mov %g6, %o5
mov %g5, %o2
diff --git a/usr/src/uts/sun4v/cpu/generic.c b/usr/src/uts/sun4v/cpu/generic.c
index e753000a99..0a6d9394f1 100644
--- a/usr/src/uts/sun4v/cpu/generic.c
+++ b/usr/src/uts/sun4v/cpu/generic.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -54,140 +53,77 @@
#include <sys/panic.h>
#include <sys/dtrace.h>
#include <vm/seg_spt.h>
+#include <sys/simulate.h>
+#include <sys/fault.h>
-#define S_VAC_SIZE MMU_PAGESIZE /* XXXQ? */
-
-/*
- * Maximum number of contexts
- */
-#define MAX_NCTXS (1 << 13)
uint_t root_phys_addr_lo_mask = 0xffffffffU;
void
cpu_setup(void)
{
- extern int at_flags;
- extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int mmu_exported_pagesize_mask;
- extern int get_cpu_pagesizes(void);
-
- cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
-
- at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; /* XXXQ */
+ char *generic_isa_set[] = {
+ "sparcv9+vis",
+ "sparcv8plus+vis",
+ NULL
+ };
/*
- * Use the maximum number of contexts available for Spitfire unless
- * it has been tuned for debugging.
- * We are checking against 0 here since this value can be patched
- * while booting. It can not be patched via /etc/system since it
- * will be patched too late and thus cause the system to panic.
+ * The setup common to all CPU modules is done in cpu_setup_common
+ * routine.
*/
- if (nctxs == 0)
- nctxs = MAX_NCTXS;
+ cpu_setup_common(generic_isa_set);
- if (use_page_coloring) {
- do_pg_coloring = 1;
- if (use_virtual_coloring)
- do_virtual_coloring = 1;
- }
- /*
- * Initalize supported page sizes information before the PD.
- * If no information is available, then initialize the
- * mmu_exported_pagesize_mask to a reasonable value for that processor.
- */
- mmu_exported_pagesize_mask = get_cpu_pagesizes();
- if (mmu_exported_pagesize_mask <= 0) {
- mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) |
- (1 << TTE4M);
- }
-
- /*
- * Tune pp_slots to use up to 1/8th of the tlb entries.
- */
- pp_slots = MIN(8, MAXPP_SLOTS);
-
- /*
- * Block stores invalidate all pages of the d$ so pagecopy
- * et. al. do not need virtual translations with virtual
- * coloring taken into consideration.
- */
- pp_consistent_coloring = 0;
- isa_list =
- "sparcv9+vis sparcv9 "
- "sparcv8plus+vis sparcv8plus "
- "sparcv8 sparcv8-fsmuld sparcv7 sparc";
-
- /*
- * On Spitfire, there's a hole in the address space
- * that we must never map (the hardware only support 44-bits of
- * virtual address). Later CPUs are expected to have wider
- * supported address ranges.
- *
- * See address map on p23 of the UltraSPARC 1 user's manual.
- */
-/* XXXQ get from machine description */
- hole_start = (caddr_t)0x80000000000ull;
- hole_end = (caddr_t)0xfffff80000000000ull;
-
- /*
- * The kpm mapping window.
- * kpm_size:
- * The size of a single kpm range.
- * The overall size will be: kpm_size * vac_colors.
- * kpm_vbase:
- * The virtual start address of the kpm range within the kernel
- * virtual address space. kpm_vbase has to be kpm_size aligned.
- */
- kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
- kpm_size_shift = 41;
- kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
+ cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
- /*
- * The traptrace code uses either %tick or %stick for
- * timestamping. We have %stick so we can use it.
- */
- traptrace_use_stick = 1;
+ if (broken_md_flag) {
+ /*
+ * Turn on the missing bits supported by sun4v architecture in
+ * MMU pagesize mask returned by MD.
+ */
+ mmu_exported_pagesize_mask |= DEFAULT_SUN4V_MMU_PAGESIZE_MASK;
+ } else {
+ /*
+ * According to sun4v architecture each processor must
+ * support 8K, 64K and 4M page sizes. If any of the page
+ * size is missing from page size mask, then panic.
+ */
+ if ((mmu_exported_pagesize_mask &
+ DEFAULT_SUN4V_MMU_PAGESIZE_MASK) !=
+ DEFAULT_SUN4V_MMU_PAGESIZE_MASK)
+ cmn_err(CE_PANIC, "machine description"
+ " does not have required sun4v page sizes"
+ " 8K, 64K and 4M: MD mask is 0x%x",
+ mmu_exported_pagesize_mask);
+ }
/*
- * sun4v provides demap_all
+ * If processor supports the subset of full 64-bit virtual
+ * address space, then set VA hole accordingly.
*/
- if (!disable_delay_tlb_flush)
- delay_tlb_flush = 1;
+ if (va_bits < VA_ADDRESS_SPACE_BITS) {
+ hole_start = (caddr_t)(1ull << (va_bits - 1));
+ hole_end = (caddr_t)(0ull - (1ull << (va_bits - 1)));
+ } else {
+ hole_start = hole_end = 0;
+ }
}
-/*
- * Set the magic constants of the implementation.
- */
void
cpu_fiximp(struct cpu_node *cpunode)
{
- extern int vac_size, vac_shift;
- extern uint_t vac_mask;
- int i, a;
-
- /*
- * The assumption here is that fillsysinfo will eventually
- * have code to fill this info in from the PD.
- * We hard code this for now.
- * Once the PD access library is done this code
- * might need to be changed to get the info from the PD
- */
/*
- * Page Coloring defaults for sun4v
+ * The Cache node is optional in MD. Therefore in case "Cache"
+ * does not exists in MD, set the default L2 cache associativity,
+ * size, linesize for generic CPU module.
*/
- ecache_setsize = 0x100000;
- ecache_alignsize = 64;
- cpunode->ecache_setsize = 0x100000;
-
- vac_size = S_VAC_SIZE;
- vac_mask = MMU_PAGEMASK & (vac_size - 1);
- i = 0; a = vac_size;
- while (a >>= 1)
- ++i;
- vac_shift = i;
- shm_alignment = vac_size;
- vac = 0;
+ if (cpunode->ecache_size == 0)
+ cpunode->ecache_size = 0x100000;
+ if (cpunode->ecache_linesize == 0)
+ cpunode->ecache_linesize = 64;
+ if (cpunode->ecache_associativity == 0)
+ cpunode->ecache_associativity = 1;
}
void
@@ -220,7 +156,9 @@ cpu_init_private(struct cpu *cp)
* unit sharing information from the Machine Description table.
* It defaults to the CPU id in the absence of such information.
*/
- cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
+ cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
+ if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
+ cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
}
void
@@ -246,6 +184,96 @@ cpu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes)
}
/*
+ * Sun4v kernel must emulate code a generic sun4v processor may not support
+ * i.e. VIS1 and VIS2.
+ */
+#define IS_FLOAT(i) (((i) & 0x1000000) != 0)
+#define IS_IBIT_SET(x) (x & 0x2000)
+#define IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
+#define IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi) \
+ (op == 3 && (op3 == IOP_V8_LDDFA || \
+ op3 == IOP_V8_STDFA) && asi > ASI_SNFL)
+int
+vis1_partial_support(struct regs *rp, k_siginfo_t *siginfo, uint_t *fault)
+{
+ char *badaddr;
+ int instr;
+ uint_t optype, op3, asi;
+ uint_t rd, ignor;
+
+ if (!USERMODE(rp->r_tstate))
+ return (-1);
+
+ instr = fetch_user_instr((caddr_t)rp->r_pc);
+
+ rd = (instr >> 25) & 0x1f;
+ optype = (instr >> 30) & 0x3;
+ op3 = (instr >> 19) & 0x3f;
+ ignor = (instr >> 5) & 0xff;
+ if (IS_IBIT_SET(instr)) {
+ asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
+ TSTATE_ASI_MASK);
+ } else {
+ asi = ignor;
+ }
+
+ if (!IS_VIS1(optype, op3) &&
+ !IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi)) {
+ return (-1);
+ }
+ switch (simulate_unimp(rp, &badaddr)) {
+ case SIMU_RETRY:
+ break; /* regs are already set up */
+ /*NOTREACHED*/
+
+ case SIMU_SUCCESS:
+ /*
+ * skip the successfully
+ * simulated instruction
+ */
+ rp->r_pc = rp->r_npc;
+ rp->r_npc += 4;
+ break;
+ /*NOTREACHED*/
+
+ case SIMU_FAULT:
+ siginfo->si_signo = SIGSEGV;
+ siginfo->si_code = SEGV_MAPERR;
+ siginfo->si_addr = badaddr;
+ *fault = FLTBOUNDS;
+ break;
+
+ case SIMU_DZERO:
+ siginfo->si_signo = SIGFPE;
+ siginfo->si_code = FPE_INTDIV;
+ siginfo->si_addr = (caddr_t)rp->r_pc;
+ *fault = FLTIZDIV;
+ break;
+
+ case SIMU_UNALIGN:
+ siginfo->si_signo = SIGBUS;
+ siginfo->si_code = BUS_ADRALN;
+ siginfo->si_addr = badaddr;
+ *fault = FLTACCESS;
+ break;
+
+ case SIMU_ILLEGAL:
+ default:
+ siginfo->si_signo = SIGILL;
+ op3 = (instr >> 19) & 0x3F;
+ if ((IS_FLOAT(instr) && (op3 == IOP_V8_STQFA) ||
+ (op3 == IOP_V8_STDFA)))
+ siginfo->si_code = ILL_ILLADR;
+ else
+ siginfo->si_code = ILL_ILLOPC;
+ siginfo->si_addr = (caddr_t)rp->r_pc;
+ *fault = FLTILL;
+ break;
+ }
+ return (0);
+}
+
+/*
* Trapstat support for generic sun4v processor
*/
int
diff --git a/usr/src/uts/sun4v/cpu/niagara.c b/usr/src/uts/sun4v/cpu/niagara.c
index d2413f773e..125ca8e224 100644
--- a/usr/src/uts/sun4v/cpu/niagara.c
+++ b/usr/src/uts/sun4v/cpu/niagara.c
@@ -58,12 +58,8 @@
#include <sys/trapstat.h>
#include <sys/hsvc.h>
-#define S_VAC_SIZE MMU_PAGESIZE /* XXXQ? */
-
-/*
- * Maximum number of contexts
- */
-#define MAX_NCTXS (1 << 13)
+#define NI_MMU_PAGESIZE_MASK ((1 << TTE8K) | (1 << TTE64K) | (1 << TTE4M) \
+ | (1 << TTE256M))
uint_t root_phys_addr_lo_mask = 0xffffffffU;
static niagara_mmustat_t *cpu_tstat_va; /* VA of mmustat buffer */
@@ -82,12 +78,16 @@ static hsvc_info_t niagara_hsvc = {
void
cpu_setup(void)
{
- extern int at_flags;
- extern int disable_delay_tlb_flush, delay_tlb_flush;
extern int mmu_exported_pagesize_mask;
- extern int get_cpu_pagesizes(void);
extern int cpc_has_overflow_intr;
int status;
+ char *ni_isa_set[] = {
+ "sparcv9+vis",
+ "sparcv9+vis2",
+ "sparcv8plus+vis",
+ "sparcv8plus+vis2",
+ NULL
+ };
/*
* Negotiate the API version for Niagara specific hypervisor
@@ -102,49 +102,29 @@ cpu_setup(void)
niagara_hsvc_available = B_FALSE;
}
- cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
- at_flags = EF_SPARC_SUN_US3 | EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
-
/*
- * Use the maximum number of contexts available for Spitfire unless
- * it has been tuned for debugging.
- * We are checking against 0 here since this value can be patched
- * while booting. It can not be patched via /etc/system since it
- * will be patched too late and thus cause the system to panic.
+ * The setup common to all CPU modules is done in cpu_setup_common
+ * routine.
*/
- if (nctxs == 0)
- nctxs = MAX_NCTXS;
+ cpu_setup_common(ni_isa_set);
- if (use_page_coloring) {
- do_pg_coloring = 1;
- if (use_virtual_coloring)
- do_virtual_coloring = 1;
- }
- /*
- * Initalize supported page sizes information before the PD.
- * If no information is available, then initialize the
- * mmu_exported_pagesize_mask to a reasonable value for that processor.
- */
- mmu_exported_pagesize_mask = get_cpu_pagesizes();
- if (mmu_exported_pagesize_mask <= 0) {
- mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) |
- (1 << TTE4M) | (1 << TTE256M);
- }
-
- /*
- * Tune pp_slots to use up to 1/8th of the tlb entries.
- */
- pp_slots = MIN(8, MAXPP_SLOTS);
+ cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
- /*
- * Block stores invalidate all pages of the d$ so pagecopy
- * et. al. do not need virtual translations with virtual
- * coloring taken into consideration.
- */
- pp_consistent_coloring = 0;
- isa_list =
- "sparcv9 sparcv8plus sparcv8 sparcv8-fsmuld sparcv7 "
- "sparc sparcv9+vis sparcv9+vis2 sparcv8plus+vis sparcv8plus+vis2";
+ if (broken_md_flag) {
+ /*
+ * Turn on the missing bits supported by Niagara CPU in
+ * MMU pagesize mask returned by MD.
+ */
+ mmu_exported_pagesize_mask |= NI_MMU_PAGESIZE_MASK;
+ } else {
+ if ((mmu_exported_pagesize_mask &
+ DEFAULT_SUN4V_MMU_PAGESIZE_MASK) !=
+ DEFAULT_SUN4V_MMU_PAGESIZE_MASK)
+ cmn_err(CE_PANIC, "machine description"
+ " does not have required sun4v page sizes"
+ " 8K, 64K and 4M: MD mask is 0x%x",
+ mmu_exported_pagesize_mask);
+ }
cpu_hwcap_flags |= AV_SPARC_ASI_BLK_INIT;
@@ -155,84 +135,34 @@ cpu_setup(void)
* and must never be mapped. In addition, software must not use
* pages within 4GB of the VA hole as instruction pages to
* avoid problems with prefetching into the VA hole.
- *
- * VA hole information should be obtained from the machine
- * description.
- */
- hole_start = (caddr_t)(0x800000000000ul - (1ul << 32));
- hole_end = (caddr_t)(0xffff800000000000ul + (1ul << 32));
-
- /*
- * The kpm mapping window.
- * kpm_size:
- * The size of a single kpm range.
- * The overall size will be: kpm_size * vac_colors.
- * kpm_vbase:
- * The virtual start address of the kpm range within the kernel
- * virtual address space. kpm_vbase has to be kpm_size aligned.
*/
- kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
- kpm_size_shift = 41;
- kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
+ hole_start = (caddr_t)((1ull << (va_bits - 1)) - (1ull << 32));
+ hole_end = (caddr_t)((0ull - (1ull << (va_bits - 1))) + (1ull << 32));
/*
- * The traptrace code uses either %tick or %stick for
- * timestamping. We have %stick so we can use it.
- */
- traptrace_use_stick = 1;
-
- /*
- * sun4v provides demap_all
- */
- if (!disable_delay_tlb_flush)
- delay_tlb_flush = 1;
- /*
* Niagara has a performance counter overflow interrupt
*/
cpc_has_overflow_intr = 1;
}
-#define MB * 1024 * 1024
+#define MB(n) ((n) * 1024 * 1024)
/*
* Set the magic constants of the implementation.
*/
void
cpu_fiximp(struct cpu_node *cpunode)
{
- extern int vac_size, vac_shift;
- extern uint_t vac_mask;
- int i, a;
-
/*
- * The assumption here is that fillsysinfo will eventually
- * have code to fill this info in from the PD.
- * We hard code this for niagara now.
- * Once the PD access library is done this code
- * might need to be changed to get the info from the PD
+ * The Cache node is optional in MD. Therefore in case "Cache"
+ * node does not exists in MD, set the default L2 cache associativity,
+ * size, linesize.
*/
if (cpunode->ecache_size == 0)
- cpunode->ecache_size = 3 MB;
+ cpunode->ecache_size = MB(3);
if (cpunode->ecache_linesize == 0)
cpunode->ecache_linesize = 64;
if (cpunode->ecache_associativity == 0)
cpunode->ecache_associativity = 12;
-
- cpunode->ecache_setsize =
- cpunode->ecache_size / cpunode->ecache_associativity;
-
- if (ecache_setsize == 0)
- ecache_setsize = cpunode->ecache_setsize;
- if (ecache_alignsize == 0)
- ecache_alignsize = cpunode->ecache_linesize;
-
- vac_size = S_VAC_SIZE;
- vac_mask = MMU_PAGEMASK & (vac_size - 1);
- i = 0; a = vac_size;
- while (a >>= 1)
- ++i;
- vac_shift = i;
- shm_alignment = vac_size;
- vac = 0;
}
static int niagara_cpucnt;
@@ -243,13 +173,13 @@ cpu_init_private(struct cpu *cp)
extern int niagara_kstat_init(void);
/*
- * This code change assumes that the virtual cpu ids are identical
- * to the physical cpu ids which is true for ontario but not for
- * niagara in general.
- * This is a temporary fix which will later be modified to obtain
- * the execution unit sharing information from MD table.
+ * The cpu_ipipe field is initialized based on the execution
+ * unit sharing information from the MD. It defaults to the
+ * virtual CPU id in the absence of such information.
*/
- cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id / 4);
+ cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
+ if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
+ cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
ASSERT(MUTEX_HELD(&cpu_lock));
if (niagara_cpucnt++ == 0 && niagara_hsvc_available == B_TRUE) {
diff --git a/usr/src/uts/sun4v/dr_cpu/Makefile b/usr/src/uts/sun4v/dr_cpu/Makefile
new file mode 100644
index 0000000000..828679baa9
--- /dev/null
+++ b/usr/src/uts/sun4v/dr_cpu/Makefile
@@ -0,0 +1,93 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = dr_cpu
+OBJECTS = $(DR_CPU_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DR_CPU_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += -v
+
+#
+# Turn on doubleword alignment for 64 bit registers
+#
+CFLAGS += -dalign
+
+#
+# Module Dependencies
+#
+LDFLAGS += -dy -Nmisc/ds
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/ds/Makefile b/usr/src/uts/sun4v/ds/Makefile
new file mode 100644
index 0000000000..41c8351a4a
--- /dev/null
+++ b/usr/src/uts/sun4v/ds/Makefile
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the zp kernel module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ds
+OBJECTS = $(DS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += -v
+
+#
+# Turn on doubleword alignment for 64 bit registers
+#
+CFLAGS += -dalign
+
+#
+# Module Dependencies
+#
+LDFLAGS += -dy -Nmisc/ldc
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/fault_iso/Makefile b/usr/src/uts/sun4v/fault_iso/Makefile
new file mode 100644
index 0000000000..37188fcfff
--- /dev/null
+++ b/usr/src/uts/sun4v/fault_iso/Makefile
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the fault_iso kernel module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = fault_iso
+OBJECTS = $(FAULT_ISO_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(FAULT_ISO_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += -v
+
+#
+# Turn on doubleword alignment for 64 bit registers
+#
+CFLAGS += -dalign
+
+#
+# Module Dependencies
+#
+LDFLAGS += -dy -Nmisc/ds
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/io/cnex.c b/usr/src/uts/sun4v/io/cnex.c
new file mode 100644
index 0000000000..08a70cc810
--- /dev/null
+++ b/usr/src/uts/sun4v/io/cnex.c
@@ -0,0 +1,1133 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Logical domain channel devices are devices implemented entirely
+ * in software; cnex is the nexus for channel-devices. They use
+ * the HV channel interfaces via the LDC transport module to send
+ * and receive data and to register callbacks.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/devops.h>
+#include <sys/instance.h>
+#include <sys/modctl.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/systm.h>
+#include <sys/mkdev.h>
+#include <sys/machsystm.h>
+#include <sys/intr.h>
+#include <sys/ddi_intr_impl.h>
+#include <sys/ivintr.h>
+#include <sys/hypervisor_api.h>
+#include <sys/ldc.h>
+#include <sys/cnex.h>
+#include <sys/mach_descrip.h>
+
+/*
+ * Internal functions/information
+ */
+static struct cnex_pil_map cnex_class_to_pil[] = {
+ {LDC_DEV_GENERIC, PIL_3},
+ {LDC_DEV_BLK, PIL_4},
+ {LDC_DEV_BLK_SVC, PIL_3},
+ {LDC_DEV_NT, PIL_6},
+ {LDC_DEV_NT_SVC, PIL_4},
+ {LDC_DEV_SERIAL, PIL_6}
+};
+#define CNEX_MAX_DEVS (sizeof (cnex_class_to_pil) / \
+ sizeof (cnex_class_to_pil[0]))
+
+#define SUN4V_REG_SPEC2CFG_HDL(x) ((x >> 32) & ~(0xfull << 28))
+
+static hrtime_t cnex_pending_tmout = 2ull * NANOSEC; /* 2 secs in nsecs */
+static void *cnex_state;
+
+static void cnex_intr_redist(void *arg);
+static uint_t cnex_intr_wrapper(caddr_t arg);
+
+/*
+ * Debug info
+ */
+#ifdef DEBUG
+
+/*
+ * Print debug messages
+ *
+ * set cnexdbg to 0xf for enabling all msgs
+ * 0x8 - Errors
+ * 0x4 - Warnings
+ * 0x2 - All debug messages
+ * 0x1 - Minimal debug messages
+ */
+
+int cnexdbg = 0x8;
+
+static void
+cnexdebug(const char *fmt, ...)
+{
+ char buf[512];
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) vsprintf(buf, fmt, ap);
+ va_end(ap);
+
+ cmn_err(CE_CONT, "%s\n", buf);
+}
+
+#define D1 \
+if (cnexdbg & 0x01) \
+ cnexdebug
+
+#define D2 \
+if (cnexdbg & 0x02) \
+ cnexdebug
+
+#define DWARN \
+if (cnexdbg & 0x04) \
+ cnexdebug
+
+#define DERR \
+if (cnexdbg & 0x08) \
+ cnexdebug
+
+#else
+
+#define D1
+#define D2
+#define DWARN
+#define DERR
+
+#endif
+
+/*
+ * Config information
+ */
+static int cnex_attach(dev_info_t *, ddi_attach_cmd_t);
+static int cnex_detach(dev_info_t *, ddi_detach_cmd_t);
+static int cnex_open(dev_t *, int, int, cred_t *);
+static int cnex_close(dev_t, int, int, cred_t *);
+static int cnex_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static int cnex_ctl(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
+ void *);
+
+static struct bus_ops cnex_bus_ops = {
+ BUSO_REV,
+ nullbusmap, /* bus_map */
+ NULL, /* bus_get_intrspec */
+ NULL, /* bus_add_intrspec */
+ NULL, /* bus_remove_intrspec */
+ i_ddi_map_fault, /* bus_map_fault */
+ ddi_no_dma_map, /* bus_dma_map */
+ ddi_no_dma_allochdl, /* bus_dma_allochdl */
+ NULL, /* bus_dma_freehdl */
+ NULL, /* bus_dma_bindhdl */
+ NULL, /* bus_dma_unbindhdl */
+ NULL, /* bus_dma_flush */
+ NULL, /* bus_dma_win */
+ NULL, /* bus_dma_ctl */
+ cnex_ctl, /* bus_ctl */
+ ddi_bus_prop_op, /* bus_prop_op */
+ 0, /* bus_get_eventcookie */
+ 0, /* bus_add_eventcall */
+ 0, /* bus_remove_eventcall */
+ 0, /* bus_post_event */
+ NULL, /* bus_intr_ctl */
+ NULL, /* bus_config */
+ NULL, /* bus_unconfig */
+ NULL, /* bus_fm_init */
+ NULL, /* bus_fm_fini */
+ NULL, /* bus_fm_access_enter */
+ NULL, /* bus_fm_access_exit */
+ NULL, /* bus_power */
+ NULL /* bus_intr_op */
+};
+
+static struct cb_ops cnex_cb_ops = {
+ cnex_open, /* open */
+ cnex_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ cnex_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_MP | D_NEW | D_HOTPLUG /* Driver compatibility flag */
+};
+
+static struct dev_ops cnex_ops = {
+ DEVO_REV, /* devo_rev, */
+ 0, /* refcnt */
+ ddi_getinfo_1to1, /* info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ cnex_attach, /* attach */
+ cnex_detach, /* detach */
+ nodev, /* reset */
+ &cnex_cb_ops, /* driver operations */
+ &cnex_bus_ops, /* bus operations */
+ nulldev /* power */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "sun4v channel-devices nexus driver v%I%",
+ &cnex_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modldrv, NULL
+};
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = ddi_soft_state_init(&cnex_state,
+ sizeof (cnex_soft_state_t), 0)) != 0) {
+ return (err);
+ }
+ if ((err = mod_install(&modlinkage)) != 0) {
+ ddi_soft_state_fini(&cnex_state);
+ return (err);
+ }
+ return (0);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) != 0)
+ return (err);
+ ddi_soft_state_fini(&cnex_state);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Callback function invoked by the interrupt redistribution
+ * framework. This will redirect interrupts at CPUs that are
+ * currently available in the system.
+ */
+static void
+cnex_intr_redist(void *arg)
+{
+ cnex_ldc_t *cldcp;
+ cnex_soft_state_t *cnex_ssp = arg;
+ int intr_state;
+ hrtime_t start;
+ uint64_t cpuid;
+ int rv;
+
+ ASSERT(cnex_ssp != NULL);
+ mutex_enter(&cnex_ssp->clist_lock);
+
+ cldcp = cnex_ssp->clist;
+ while (cldcp != NULL) {
+
+ mutex_enter(&cldcp->lock);
+
+ if (cldcp->tx.hdlr) {
+ /*
+ * Don't do anything for disabled interrupts.
+ */
+ rv = hvldc_intr_getvalid(cnex_ssp->cfghdl,
+ cldcp->tx.ino, &intr_state);
+ if (rv) {
+ DWARN("cnex_intr_redist: tx ino=0x%llx, "
+ "can't get valid\n", cldcp->tx.ino);
+ mutex_exit(&cldcp->lock);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return;
+ }
+ if (intr_state == HV_INTR_NOTVALID) {
+ cldcp = cldcp->next;
+ continue;
+ }
+
+ cpuid = intr_dist_cpuid();
+
+ /* disable interrupts */
+ rv = hvldc_intr_setvalid(cnex_ssp->cfghdl,
+ cldcp->tx.ino, HV_INTR_NOTVALID);
+ if (rv) {
+ DWARN("cnex_intr_redist: tx ino=0x%llx, "
+ "can't set valid\n", cldcp->tx.ino);
+ mutex_exit(&cldcp->lock);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return;
+ }
+
+ /*
+ * Make a best effort to wait for pending interrupts
+ * to finish. There is not much we can do if we timeout.
+ */
+ start = gethrtime();
+
+ do {
+ rv = hvldc_intr_getstate(cnex_ssp->cfghdl,
+ cldcp->tx.ino, &intr_state);
+ if (rv) {
+ DWARN("cnex_intr_redist: tx ino=0x%llx,"
+ "can't get state\n", cldcp->tx.ino);
+ mutex_exit(&cldcp->lock);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return;
+ }
+
+ if ((gethrtime() - start) > cnex_pending_tmout)
+ break;
+
+ } while (!panicstr &&
+ intr_state == HV_INTR_DELIVERED_STATE);
+
+ (void) hvldc_intr_settarget(cnex_ssp->cfghdl,
+ cldcp->tx.ino, cpuid);
+ (void) hvldc_intr_setvalid(cnex_ssp->cfghdl,
+ cldcp->tx.ino, HV_INTR_VALID);
+ }
+
+ if (cldcp->rx.hdlr) {
+ /*
+ * Don't do anything for disabled interrupts.
+ */
+ rv = hvldc_intr_getvalid(cnex_ssp->cfghdl,
+ cldcp->rx.ino, &intr_state);
+ if (rv) {
+ DWARN("cnex_intr_redist: rx ino=0x%llx, "
+ "can't get valid\n", cldcp->rx.ino);
+ mutex_exit(&cldcp->lock);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return;
+ }
+ if (intr_state == HV_INTR_NOTVALID) {
+ cldcp = cldcp->next;
+ continue;
+ }
+
+ cpuid = intr_dist_cpuid();
+
+ /* disable interrupts */
+ rv = hvldc_intr_setvalid(cnex_ssp->cfghdl,
+ cldcp->rx.ino, HV_INTR_NOTVALID);
+ if (rv) {
+ DWARN("cnex_intr_redist: rx ino=0x%llx, "
+ "can't set valid\n", cldcp->rx.ino);
+ mutex_exit(&cldcp->lock);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return;
+ }
+
+ /*
+ * Make a best effort to wait for pending interrupts
+ * to finish. There is not much we can do if we timeout.
+ */
+ start = gethrtime();
+
+ do {
+ rv = hvldc_intr_getstate(cnex_ssp->cfghdl,
+ cldcp->rx.ino, &intr_state);
+ if (rv) {
+ DWARN("cnex_intr_redist: rx ino=0x%llx,"
+ "can't set state\n", cldcp->rx.ino);
+ mutex_exit(&cldcp->lock);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return;
+ }
+
+ if ((gethrtime() - start) > cnex_pending_tmout)
+ break;
+
+ } while (!panicstr &&
+ intr_state == HV_INTR_DELIVERED_STATE);
+
+ (void) hvldc_intr_settarget(cnex_ssp->cfghdl,
+ cldcp->rx.ino, cpuid);
+ (void) hvldc_intr_setvalid(cnex_ssp->cfghdl,
+ cldcp->rx.ino, HV_INTR_VALID);
+ }
+
+ mutex_exit(&cldcp->lock);
+
+ /* next channel */
+ cldcp = cldcp->next;
+ }
+
+ mutex_exit(&cnex_ssp->clist_lock);
+}
+
+/*
+ * Exported interface to register a LDC endpoint with
+ * the channel nexus
+ */
+static int
+cnex_reg_chan(dev_info_t *dip, uint64_t id, ldc_dev_t devclass)
+{
+ int idx;
+ cnex_ldc_t *cldcp;
+ int listsz, num_nodes, num_channels;
+ md_t *mdp = NULL;
+ mde_cookie_t rootnode, *listp = NULL;
+ uint64_t tmp_id, rxino, txino;
+ cnex_soft_state_t *cnex_ssp;
+ int status, instance;
+
+ /* Get device instance and structure */
+ instance = ddi_get_instance(dip);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ /* Check to see if channel is already registered */
+ mutex_enter(&cnex_ssp->clist_lock);
+ cldcp = cnex_ssp->clist;
+ while (cldcp) {
+ if (cldcp->id == id) {
+ DWARN("cnex_reg_chan: channel 0x%llx exists\n", id);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (EINVAL);
+ }
+ cldcp = cldcp->next;
+ }
+
+ /* Get the Tx/Rx inos from the MD */
+ if ((mdp = md_get_handle()) == NULL) {
+ DWARN("cnex_reg_chan: cannot init MD\n");
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (ENXIO);
+ }
+ num_nodes = md_node_count(mdp);
+ ASSERT(num_nodes > 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
+
+ rootnode = md_root_node(mdp);
+
+ /* search for all channel_endpoint nodes */
+ num_channels = md_scan_dag(mdp, rootnode,
+ md_find_name(mdp, "channel-endpoint"),
+ md_find_name(mdp, "fwd"), listp);
+ if (num_channels <= 0) {
+ DWARN("cnex_reg_chan: invalid channel id\n");
+ kmem_free(listp, listsz);
+ (void) md_fini_handle(mdp);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (EINVAL);
+ }
+
+ for (idx = 0; idx < num_channels; idx++) {
+
+ /* Get the channel ID */
+ status = md_get_prop_val(mdp, listp[idx], "id", &tmp_id);
+ if (status) {
+ DWARN("cnex_reg_chan: cannot read LDC ID\n");
+ kmem_free(listp, listsz);
+ (void) md_fini_handle(mdp);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (ENXIO);
+ }
+ if (tmp_id != id)
+ continue;
+
+ /* Get the Tx and Rx ino */
+ status = md_get_prop_val(mdp, listp[idx], "tx-ino", &txino);
+ if (status) {
+ DWARN("cnex_reg_chan: cannot read Tx ino\n");
+ kmem_free(listp, listsz);
+ (void) md_fini_handle(mdp);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (ENXIO);
+ }
+ status = md_get_prop_val(mdp, listp[idx], "rx-ino", &rxino);
+ if (status) {
+ DWARN("cnex_reg_chan: cannot read Rx ino\n");
+ kmem_free(listp, listsz);
+ (void) md_fini_handle(mdp);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (ENXIO);
+ }
+ }
+ kmem_free(listp, listsz);
+ (void) md_fini_handle(mdp);
+
+ /* Allocate a new channel structure */
+ cldcp = kmem_zalloc(sizeof (*cldcp), KM_SLEEP);
+
+ /* Initialize the channel */
+ mutex_init(&cldcp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ cldcp->id = id;
+ cldcp->tx.ino = txino;
+ cldcp->rx.ino = rxino;
+ cldcp->devclass = devclass;
+
+ /* add channel to nexus channel list */
+ cldcp->next = cnex_ssp->clist;
+ cnex_ssp->clist = cldcp;
+
+ mutex_exit(&cnex_ssp->clist_lock);
+
+ return (0);
+}
+
+/*
+ * Add Tx/Rx interrupt handler for the channel
+ */
+static int
+cnex_add_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype,
+ uint_t (*hdlr)(), caddr_t arg1, caddr_t arg2)
+{
+ int rv, idx, pil;
+ cnex_ldc_t *cldcp;
+ cnex_intr_t *iinfo;
+ uint64_t cpuid;
+ cnex_soft_state_t *cnex_ssp;
+ int instance;
+
+ /* Get device instance and structure */
+ instance = ddi_get_instance(dip);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ /* get channel info */
+ mutex_enter(&cnex_ssp->clist_lock);
+ cldcp = cnex_ssp->clist;
+ while (cldcp) {
+ if (cldcp->id == id)
+ break;
+ cldcp = cldcp->next;
+ }
+ if (cldcp == NULL) {
+ DWARN("cnex_add_intr: channel 0x%llx does not exist\n", id);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (EINVAL);
+ }
+ mutex_exit(&cnex_ssp->clist_lock);
+
+ /* get channel lock */
+ mutex_enter(&cldcp->lock);
+
+ /* get interrupt type */
+ if (itype == CNEX_TX_INTR) {
+ iinfo = &(cldcp->tx);
+ } else if (itype == CNEX_RX_INTR) {
+ iinfo = &(cldcp->rx);
+ } else {
+ DWARN("cnex_add_intr: invalid interrupt type\n", id);
+ mutex_exit(&cldcp->lock);
+ return (EINVAL);
+ }
+
+ /* check if a handler is already added */
+ if (iinfo->hdlr != 0) {
+ DWARN("cnex_add_intr: interrupt handler exists\n");
+ mutex_exit(&cldcp->lock);
+ return (EINVAL);
+ }
+
+ /* save interrupt handler info */
+ iinfo->hdlr = hdlr;
+ iinfo->arg1 = arg1;
+ iinfo->arg2 = arg2;
+
+ iinfo->ssp = cnex_ssp;
+
+ /*
+ * FIXME - generate the interrupt cookie
+ * using the interrupt registry
+ */
+ iinfo->icookie = cnex_ssp->cfghdl | iinfo->ino;
+
+ D1("cnex_add_intr: add hdlr, cfghdl=0x%llx, ino=0x%llx, "
+ "cookie=0x%llx\n", cnex_ssp->cfghdl, iinfo->ino, iinfo->icookie);
+
+ /* Pick a PIL on the basis of the channel's devclass */
+ for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) {
+ if (cldcp->devclass == cnex_class_to_pil[idx].devclass) {
+ pil = cnex_class_to_pil[idx].pil;
+ break;
+ }
+ }
+
+ /* add interrupt to solaris ivec table */
+ VERIFY(add_ivintr(iinfo->icookie, pil, cnex_intr_wrapper,
+ (caddr_t)iinfo, NULL) == 0);
+
+ /* set the cookie in the HV */
+ rv = hvldc_intr_setcookie(cnex_ssp->cfghdl, iinfo->ino, iinfo->icookie);
+
+ /* pick next CPU in the domain for this channel */
+ cpuid = intr_dist_cpuid();
+
+ /* set the target CPU and then enable interrupts */
+ rv = hvldc_intr_settarget(cnex_ssp->cfghdl, iinfo->ino, cpuid);
+ if (rv) {
+ DWARN("cnex_add_intr: ino=0x%llx, cannot set target cpu\n",
+ iinfo->ino);
+ goto hv_error;
+ }
+ rv = hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino,
+ HV_INTR_IDLE_STATE);
+ if (rv) {
+ DWARN("cnex_add_intr: ino=0x%llx, cannot set state\n",
+ iinfo->ino);
+ goto hv_error;
+ }
+ rv = hvldc_intr_setvalid(cnex_ssp->cfghdl, iinfo->ino, HV_INTR_VALID);
+ if (rv) {
+ DWARN("cnex_add_intr: ino=0x%llx, cannot set valid\n",
+ iinfo->ino);
+ goto hv_error;
+ }
+
+ mutex_exit(&cldcp->lock);
+ return (0);
+
+hv_error:
+ (void) rem_ivintr(iinfo->icookie, NULL);
+ mutex_exit(&cldcp->lock);
+ return (ENXIO);
+}
+
+
+/*
+ * Exported interface to unregister a LDC endpoint with
+ * the channel nexus
+ */
+static int
+cnex_unreg_chan(dev_info_t *dip, uint64_t id)
+{
+ cnex_ldc_t *cldcp, *prev_cldcp;
+ cnex_soft_state_t *cnex_ssp;
+ int instance;
+
+ /* Get device instance and structure */
+ instance = ddi_get_instance(dip);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ /* find and remove channel from list */
+ mutex_enter(&cnex_ssp->clist_lock);
+ prev_cldcp = NULL;
+ cldcp = cnex_ssp->clist;
+ while (cldcp) {
+ if (cldcp->id == id)
+ break;
+ prev_cldcp = cldcp;
+ cldcp = cldcp->next;
+ }
+
+ if (cldcp == 0) {
+ DWARN("cnex_unreg_chan: invalid channel %d\n", id);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (EINVAL);
+ }
+
+ if (cldcp->tx.hdlr || cldcp->rx.hdlr) {
+ DWARN("cnex_unreg_chan: handlers still exist\n");
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (ENXIO);
+ }
+
+ if (prev_cldcp)
+ prev_cldcp->next = cldcp->next;
+ else
+ cnex_ssp->clist = cldcp->next;
+
+ mutex_exit(&cnex_ssp->clist_lock);
+
+ /* destroy mutex */
+ mutex_destroy(&cldcp->lock);
+
+ /* free channel */
+ kmem_free(cldcp, sizeof (*cldcp));
+
+ return (0);
+}
+
+/*
+ * Remove Tx/Rx interrupt handler for the channel
+ */
+static int
+cnex_rem_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype)
+{
+ int rv;
+ cnex_ldc_t *cldcp;
+ cnex_intr_t *iinfo;
+ cnex_soft_state_t *cnex_ssp;
+ hrtime_t start;
+ int instance, istate;
+
+ /* Get device instance and structure */
+ instance = ddi_get_instance(dip);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ /* get channel info */
+ mutex_enter(&cnex_ssp->clist_lock);
+ cldcp = cnex_ssp->clist;
+ while (cldcp) {
+ if (cldcp->id == id)
+ break;
+ cldcp = cldcp->next;
+ }
+ if (cldcp == NULL) {
+ DWARN("cnex_rem_intr: channel 0x%llx does not exist\n", id);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (EINVAL);
+ }
+ mutex_exit(&cnex_ssp->clist_lock);
+
+ /* get rid of the channel intr handler */
+ mutex_enter(&cldcp->lock);
+
+ /* get interrupt type */
+ if (itype == CNEX_TX_INTR) {
+ iinfo = &(cldcp->tx);
+ } else if (itype == CNEX_RX_INTR) {
+ iinfo = &(cldcp->rx);
+ } else {
+ DWARN("cnex_rem_intr: invalid interrupt type\n");
+ mutex_exit(&cldcp->lock);
+ return (EINVAL);
+ }
+
+ D1("cnex_rem_intr: interrupt ino=0x%x\n", iinfo->ino);
+
+ /* check if a handler is already added */
+ if (iinfo->hdlr == 0) {
+ DWARN("cnex_rem_intr: interrupt handler does not exist\n");
+ mutex_exit(&cldcp->lock);
+ return (EINVAL);
+ }
+
+ D1("cnex_rem_intr: set intr to invalid ino=0x%x\n", iinfo->ino);
+ rv = hvldc_intr_setvalid(cnex_ssp->cfghdl,
+ iinfo->ino, HV_INTR_NOTVALID);
+ if (rv) {
+ DWARN("cnex_rem_intr: cannot set valid ino=%x\n", iinfo->ino);
+ mutex_exit(&cldcp->lock);
+ return (ENXIO);
+ }
+
+ /*
+ * Make a best effort to wait for pending interrupts
+ * to finish. There is not much we can do if we timeout.
+ */
+ start = gethrtime();
+ do {
+ rv = hvldc_intr_getstate(cnex_ssp->cfghdl, iinfo->ino, &istate);
+ if (rv) {
+ DWARN("cnex_rem_intr: ino=0x%llx, cannot get state\n",
+ iinfo->ino);
+ }
+
+ if (rv || ((gethrtime() - start) > cnex_pending_tmout))
+ break;
+
+ } while (!panicstr && istate == HV_INTR_DELIVERED_STATE);
+
+ /* if interrupts are still pending print warning */
+ if (istate != HV_INTR_IDLE_STATE) {
+ DWARN("cnex_rem_intr: cannot remove intr busy ino=%x\n",
+ iinfo->ino);
+ /* clear interrupt state */
+ (void) hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino,
+ HV_INTR_IDLE_STATE);
+ }
+
+ /* remove interrupt */
+ rem_ivintr(iinfo->icookie, NULL);
+
+ /* clear interrupt info */
+ bzero(iinfo, sizeof (*iinfo));
+
+ mutex_exit(&cldcp->lock);
+
+ return (0);
+}
+
+
+/*
+ * Clear pending Tx/Rx interrupt
+ */
+static int
+cnex_clr_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype)
+{
+ int rv;
+ cnex_ldc_t *cldcp;
+ cnex_intr_t *iinfo;
+ cnex_soft_state_t *cnex_ssp;
+ int instance;
+
+ /* Get device instance and structure */
+ instance = ddi_get_instance(dip);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ /* get channel info */
+ mutex_enter(&cnex_ssp->clist_lock);
+ cldcp = cnex_ssp->clist;
+ while (cldcp) {
+ if (cldcp->id == id)
+ break;
+ cldcp = cldcp->next;
+ }
+ if (cldcp == NULL) {
+ DWARN("cnex_clr_intr: channel 0x%llx does not exist\n", id);
+ mutex_exit(&cnex_ssp->clist_lock);
+ return (EINVAL);
+ }
+ mutex_exit(&cnex_ssp->clist_lock);
+
+ mutex_enter(&cldcp->lock);
+
+ /* get interrupt type */
+ if (itype == CNEX_TX_INTR) {
+ iinfo = &(cldcp->tx);
+ } else if (itype == CNEX_RX_INTR) {
+ iinfo = &(cldcp->rx);
+ } else {
+ DWARN("cnex_rem_intr: invalid interrupt type\n");
+ mutex_exit(&cldcp->lock);
+ return (EINVAL);
+ }
+
+ D1("cnex_rem_intr: interrupt ino=0x%x\n", iinfo->ino);
+
+ /* check if a handler is already added */
+ if (iinfo->hdlr == 0) {
+ DWARN("cnex_clr_intr: interrupt handler does not exist\n");
+ mutex_exit(&cldcp->lock);
+ return (EINVAL);
+ }
+
+ rv = hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino,
+ HV_INTR_IDLE_STATE);
+ if (rv) {
+ DWARN("cnex_intr_wrapper: cannot clear interrupt state\n");
+ }
+
+ mutex_exit(&cldcp->lock);
+
+ return (0);
+}
+
+/*
+ * Channel nexus interrupt handler wrapper
+ */
+static uint_t
+cnex_intr_wrapper(caddr_t arg)
+{
+ int res;
+ uint_t (*handler)();
+ caddr_t handler_arg1;
+ caddr_t handler_arg2;
+ cnex_intr_t *iinfo = (cnex_intr_t *)arg;
+
+ ASSERT(iinfo != NULL);
+
+ handler = iinfo->hdlr;
+ handler_arg1 = iinfo->arg1;
+ handler_arg2 = iinfo->arg2;
+
+ D1("cnex_intr_wrapper: ino=0x%llx invoke client handler\n", iinfo->ino);
+ res = (*handler)(handler_arg1, handler_arg2);
+
+ return (res);
+}
+
+/*ARGSUSED*/
+static int
+cnex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ int rv, instance, reglen;
+ cnex_regspec_t *reg_p;
+ ldc_cnex_t cinfo;
+ cnex_soft_state_t *cnex_ssp;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Get the instance specific soft state structure.
+ * Save the devi for this instance in the soft_state data.
+ */
+ instance = ddi_get_instance(devi);
+ if (ddi_soft_state_zalloc(cnex_state, instance) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ cnex_ssp->devi = devi;
+ cnex_ssp->clist = NULL;
+
+ if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
+ "reg", (caddr_t)&reg_p, &reglen) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+
+ /* get the sun4v config handle for this device */
+ cnex_ssp->cfghdl = SUN4V_REG_SPEC2CFG_HDL(reg_p->physaddr);
+ kmem_free(reg_p, reglen);
+
+ D1("cnex_attach: cfghdl=0x%llx\n", cnex_ssp->cfghdl);
+
+ /* init channel list mutex */
+ mutex_init(&cnex_ssp->clist_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Register with LDC module */
+ cinfo.dip = devi;
+ cinfo.reg_chan = cnex_reg_chan;
+ cinfo.unreg_chan = cnex_unreg_chan;
+ cinfo.add_intr = cnex_add_intr;
+ cinfo.rem_intr = cnex_rem_intr;
+ cinfo.clr_intr = cnex_clr_intr;
+
+ /*
+ * LDC register will fail if an nexus instance had already
+ * registered with the LDC framework
+ */
+ rv = ldc_register(&cinfo);
+ if (rv) {
+ DWARN("cnex_attach: unable to register with LDC\n");
+ ddi_soft_state_free(cnex_state, instance);
+ mutex_destroy(&cnex_ssp->clist_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "devctl", S_IFCHR, instance,
+ DDI_NT_NEXUS, 0) != DDI_SUCCESS) {
+ ddi_remove_minor_node(devi, NULL);
+ ddi_soft_state_free(cnex_state, instance);
+ mutex_destroy(&cnex_ssp->clist_lock);
+ return (DDI_FAILURE);
+ }
+
+ /* Add interrupt redistribution callback. */
+ intr_dist_add(cnex_intr_redist, cnex_ssp);
+
+ ddi_report_dev(devi);
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+cnex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+ int instance;
+ ldc_cnex_t cinfo;
+ cnex_soft_state_t *cnex_ssp;
+
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ instance = ddi_get_instance(devi);
+ cnex_ssp = ddi_get_soft_state(cnex_state, instance);
+
+ /* check if there are any channels still registered */
+ if (cnex_ssp->clist) {
+ cmn_err(CE_WARN, "?cnex_dettach: channels registered %d\n",
+ ddi_get_instance(devi));
+ return (DDI_FAILURE);
+ }
+
+ /* Unregister with LDC module */
+ cinfo.dip = devi;
+ (void) ldc_unregister(&cinfo);
+
+ /* Remove interrupt redistribution callback. */
+ intr_dist_rem(cnex_intr_redist, cnex_ssp);
+
+ /* destroy mutex */
+ mutex_destroy(&cnex_ssp->clist_lock);
+
+ /* free soft state structure */
+ ddi_soft_state_free(cnex_state, instance);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+cnex_open(dev_t *devp, int flags, int otyp, cred_t *credp)
+{
+ int instance;
+
+ if (otyp != OTYP_CHR)
+ return (EINVAL);
+
+ instance = getminor(*devp);
+ if (ddi_get_soft_state(cnex_state, instance) == NULL)
+ return (ENXIO);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+cnex_close(dev_t dev, int flags, int otyp, cred_t *credp)
+{
+ int instance;
+
+ if (otyp != OTYP_CHR)
+ return (EINVAL);
+
+ instance = getminor(dev);
+ if (ddi_get_soft_state(cnex_state, instance) == NULL)
+ return (ENXIO);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+cnex_ioctl(dev_t dev,
+ int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_p)
+{
+ int instance;
+ cnex_soft_state_t *cnex_ssp;
+
+ instance = getminor(dev);
+ if ((cnex_ssp = ddi_get_soft_state(cnex_state, instance)) == NULL)
+ return (ENXIO);
+ ASSERT(cnex_ssp->devi);
+ return (ndi_devctl_ioctl(cnex_ssp->devi, cmd, arg, mode, 0));
+}
+
+static int
+cnex_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
+ void *arg, void *result)
+{
+ char name[MAXNAMELEN];
+ uint32_t reglen;
+ int *cnex_regspec;
+
+ switch (ctlop) {
+ case DDI_CTLOPS_REPORTDEV:
+ if (rdip == NULL)
+ return (DDI_FAILURE);
+ cmn_err(CE_CONT, "?channel-device: %s%d\n",
+ ddi_driver_name(rdip), ddi_get_instance(rdip));
+ return (DDI_SUCCESS);
+
+ case DDI_CTLOPS_INITCHILD:
+ {
+ dev_info_t *child = (dev_info_t *)arg;
+
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, child,
+ DDI_PROP_DONTPASS, "reg",
+ &cnex_regspec, &reglen) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+
+ (void) snprintf(name, sizeof (name), "%x", *cnex_regspec);
+ ddi_set_name_addr(child, name);
+ ddi_set_parent_data(child, NULL);
+ ddi_prop_free(cnex_regspec);
+ return (DDI_SUCCESS);
+ }
+
+ case DDI_CTLOPS_UNINITCHILD:
+ {
+ dev_info_t *child = (dev_info_t *)arg;
+
+ NDI_CONFIG_DEBUG((CE_NOTE,
+ "DDI_CTLOPS_UNINITCHILD(%s, instance=%d)",
+ ddi_driver_name(child), DEVI(child)->devi_instance));
+
+ ddi_set_name_addr(child, NULL);
+
+ return (DDI_SUCCESS);
+ }
+
+ case DDI_CTLOPS_DMAPMAPC:
+ case DDI_CTLOPS_REPORTINT:
+ case DDI_CTLOPS_REGSIZE:
+ case DDI_CTLOPS_NREGS:
+ case DDI_CTLOPS_SIDDEV:
+ case DDI_CTLOPS_SLAVEONLY:
+ case DDI_CTLOPS_AFFINITY:
+ case DDI_CTLOPS_POKE:
+ case DDI_CTLOPS_PEEK:
+ /*
+ * These ops correspond to functions that "shouldn't" be called
+ * by a channel-device driver. So we whine when we're called.
+ */
+ cmn_err(CE_WARN, "%s%d: invalid op (%d) from %s%d\n",
+ ddi_driver_name(dip), ddi_get_instance(dip), ctlop,
+ ddi_driver_name(rdip), ddi_get_instance(rdip));
+ return (DDI_FAILURE);
+
+ case DDI_CTLOPS_ATTACH:
+ case DDI_CTLOPS_BTOP:
+ case DDI_CTLOPS_BTOPR:
+ case DDI_CTLOPS_DETACH:
+ case DDI_CTLOPS_DVMAPAGESIZE:
+ case DDI_CTLOPS_IOMIN:
+ case DDI_CTLOPS_POWER:
+ case DDI_CTLOPS_PTOB:
+ default:
+ /*
+ * Everything else (e.g. PTOB/BTOP/BTOPR requests) we pass up
+ */
+ return (ddi_ctlops(dip, rdip, ctlop, arg, result));
+ }
+}
+
+/* -------------------------------------------------------------------------- */
diff --git a/usr/src/uts/sun4v/io/dr_cpu.c b/usr/src/uts/sun4v/io/dr_cpu.c
new file mode 100644
index 0000000000..a66f6610bd
--- /dev/null
+++ b/usr/src/uts/sun4v/io/dr_cpu.c
@@ -0,0 +1,1151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v CPU DR Module
+ */
+
+#include <sys/modctl.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/note.h>
+#include <sys/sysevent/dr.h>
+#include <sys/hypervisor_api.h>
+#include <sys/mach_descrip.h>
+#include <sys/mdesc.h>
+#include <sys/ds.h>
+#include <sys/dr_util.h>
+#include <sys/dr_cpu.h>
+#include <sys/promif.h>
+#include <sys/machsystm.h>
+
+
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "sun4v CPU DR %I%"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlmisc,
+ NULL
+};
+
+/*
+ * Global DS Handle
+ */
+static ds_svc_hdl_t ds_handle;
+
+/*
+ * Supported DS Capability Versions
+ */
+static ds_ver_t dr_cpu_vers[] = { { 1, 0 } };
+#define DR_CPU_NVERS (sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
+
+/*
+ * DS Capability Description
+ */
+static ds_capability_t dr_cpu_cap = {
+ DR_CPU_DS_ID, /* svc_id */
+ dr_cpu_vers, /* vers */
+ DR_CPU_NVERS /* nvers */
+};
+
+/*
+ * DS Callbacks
+ */
+static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
+static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
+static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
+
+/*
+ * DS Client Ops Vector
+ */
+static ds_clnt_ops_t dr_cpu_ops = {
+ dr_cpu_reg_handler, /* ds_reg_cb */
+ dr_cpu_unreg_handler, /* ds_unreg_cb */
+ dr_cpu_data_handler, /* ds_data_cb */
+ NULL /* cb_arg */
+};
+
+/*
+ * Internal Functions
+ */
+static int dr_cpu_init(void);
+static int dr_cpu_fini(void);
+
+static int dr_cpu_list_configure(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
+static int dr_cpu_list_unconfigure(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
+static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
+
+static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
+static int dr_cpu_configure(processorid_t, int *status);
+static int dr_cpu_status(processorid_t, int *status);
+
+static int dr_cpu_probe(processorid_t newcpuid);
+static int dr_cpu_deprobe(processorid_t cpuid);
+
+static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
+static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
+
+
+int
+_init(void)
+{
+ int status;
+
+ /* check that CPU DR is enabled */
+ if (dr_is_disabled(DR_TYPE_CPU)) {
+ cmn_err(CE_CONT, "!CPU DR is disabled\n");
+ return (-1);
+ }
+
+ if ((status = dr_cpu_init()) != 0) {
+ cmn_err(CE_NOTE, "CPU DR initialization failed");
+ return (status);
+ }
+
+ if ((status = mod_install(&modlinkage)) != 0) {
+ (void) dr_cpu_fini();
+ }
+
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int dr_cpu_allow_unload;
+
+int
+_fini(void)
+{
+ int status;
+
+ if (dr_cpu_allow_unload == 0)
+ return (EBUSY);
+
+ if ((status = mod_remove(&modlinkage)) == 0) {
+ (void) dr_cpu_fini();
+ }
+
+ return (status);
+}
+
+static int
+dr_cpu_init(void)
+{
+ int rv;
+
+ if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
+ cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+dr_cpu_fini(void)
+{
+ int rv;
+
+ if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
+ cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
+{
+ DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
+ ver->major, ver->minor, hdl);
+
+ ds_handle = hdl;
+}
+
+static void
+dr_cpu_unreg_handler(ds_cb_arg_t arg)
+{
+ DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
+
+ ds_handle = DS_INVALID_HDL;
+}
+
+static void
+dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ dr_cpu_hdr_t *req = buf;
+ dr_cpu_hdr_t err_resp;
+ dr_cpu_hdr_t *resp = &err_resp;
+ int resp_len = 0;
+ int rv;
+
+ /*
+ * Sanity check the message
+ */
+ if (buflen < sizeof (dr_cpu_hdr_t)) {
+ DR_DBG_CPU("incoming message short: expected at least %ld "
+ "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
+ goto done;
+ }
+
+ if (req == NULL) {
+ DR_DBG_CPU("empty message: expected at least %ld bytes\n",
+ sizeof (dr_cpu_hdr_t));
+ goto done;
+ }
+
+ DR_DBG_CPU("incoming request:\n");
+ DR_DBG_DUMP_MSG(buf, buflen);
+
+ if (req->num_records > NCPU) {
+ DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
+ req->num_records, NCPU);
+ goto done;
+ }
+
+ if (req->num_records == 0) {
+ DR_DBG_CPU("No CPU specified for operation\n");
+ goto done;
+ }
+
+ /*
+ * Process the command
+ */
+ switch (req->msg_type) {
+ case DR_CPU_CONFIGURE:
+ if ((rv = dr_cpu_list_configure(req, &resp, &resp_len)) != 0)
+ DR_DBG_CPU("dr_cpu_list_configure failed (%d)\n", rv);
+ break;
+
+ case DR_CPU_UNCONFIGURE:
+ case DR_CPU_FORCE_UNCONFIG:
+ if ((rv = dr_cpu_list_unconfigure(req, &resp, &resp_len)) != 0)
+ DR_DBG_CPU("dr_cpu_list_unconfigure failed (%d)\n", rv);
+ break;
+
+ case DR_CPU_STATUS:
+ if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
+ DR_DBG_CPU("dr_cpu_list_status failed (%d)\n", rv);
+ break;
+
+ default:
+ cmn_err(CE_NOTE, "unsupported DR operation (%d)",
+ req->msg_type);
+ break;
+ }
+
+done:
+ /* check if an error occurred */
+ if (resp == &err_resp) {
+ resp->req_num = (req) ? req->req_num : 0;
+ resp->msg_type = DR_CPU_ERROR;
+ resp->num_records = 0;
+ resp_len = sizeof (dr_cpu_hdr_t);
+ }
+
+ /* send back the response */
+ if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
+ DR_DBG_CPU("ds_send failed\n");
+ }
+
+ /* free any allocated memory */
+ if (resp != &err_resp) {
+ kmem_free(resp, resp_len);
+ }
+}
+
+/*
+ * Do not modify result buffer or length on error.
+ */
+static int
+dr_cpu_list_configure(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
+{
+ int idx;
+ int result;
+ int status;
+ int rlen;
+ uint32_t *cpuids;
+ dr_cpu_hdr_t *rp;
+ dr_cpu_stat_t *stat;
+
+ /* the incoming array of cpuids to configure */
+ cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
+
+ /* allocate a response message */
+ rlen = sizeof (dr_cpu_hdr_t);
+ rlen += req->num_records * sizeof (dr_cpu_stat_t);
+ rp = kmem_zalloc(rlen, KM_SLEEP);
+
+ /* fill in the known data */
+ rp->req_num = req->req_num;
+ rp->msg_type = DR_CPU_OK;
+ rp->num_records = req->num_records;
+
+ /* stat array for the response */
+ stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
+
+ /* configure each of the CPUs */
+ for (idx = 0; idx < req->num_records; idx++) {
+
+ result = dr_cpu_configure(cpuids[idx], &status);
+
+ /* save off results of the configure */
+ stat[idx].cpuid = cpuids[idx];
+ stat[idx].result = result;
+ stat[idx].status = status;
+ }
+
+ *resp = rp;
+ *resp_len = rlen;
+
+ dr_generate_event(DR_TYPE_CPU, SE_HINT_INSERT);
+
+ return (0);
+}
+
+static void
+dr_cpu_check_cpus(uint32_t *cpuids, int ncpus, dr_cpu_stat_t *stat)
+{
+ int idx;
+ kthread_t *tp;
+ proc_t *pp;
+
+ DR_DBG_CPU("dr_cpu_check_cpus...\n");
+
+ mutex_enter(&cpu_lock);
+
+ /* process each cpu that is part of the request */
+ for (idx = 0; idx < ncpus; idx++) {
+
+ if (cpu_get(cpuids[idx]) == NULL)
+ continue;
+
+ mutex_enter(&pidlock);
+
+ /*
+ * Walk the active processes, checking if each
+ * thread belonging to the process is bound.
+ */
+ for (pp = practive; pp != NULL; pp = pp->p_next) {
+ mutex_enter(&pp->p_lock);
+ tp = pp->p_tlist;
+
+ if (tp == NULL || (pp->p_flag & SSYS)) {
+ mutex_exit(&pp->p_lock);
+ continue;
+ }
+
+ do {
+ if (tp->t_bind_cpu != cpuids[idx])
+ continue;
+
+ DR_DBG_CPU("thread(s) bound to cpu %d\n",
+ cpuids[idx]);
+
+ stat[idx].cpuid = cpuids[idx];
+ stat[idx].result = DR_CPU_RES_BLOCKED;
+ stat[idx].status = DR_CPU_STAT_CONFIGURED;
+ break;
+
+ } while ((tp = tp->t_forw) != pp->p_tlist);
+ mutex_exit(&pp->p_lock);
+ }
+
+ mutex_exit(&pidlock);
+ }
+
+ mutex_exit(&cpu_lock);
+}
+
+/*
+ * Do not modify result buffer or length on error.
+ */
+static int
+dr_cpu_list_unconfigure(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
+{
+ int idx;
+ int result;
+ int status;
+ int rlen;
+ uint32_t *cpuids;
+ dr_cpu_hdr_t *rp;
+ dr_cpu_stat_t *stat;
+ boolean_t force;
+
+ /* the incoming array of cpuids to configure */
+ cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
+
+ /* check if this is a forced unconfigured */
+ force = (req->msg_type == DR_CPU_FORCE_UNCONFIG) ? B_TRUE : B_FALSE;
+
+ /* allocate a response message */
+ rlen = sizeof (dr_cpu_hdr_t);
+ rlen += req->num_records * sizeof (dr_cpu_stat_t);
+ rp = kmem_zalloc(rlen, KM_SLEEP);
+
+ /* fill in the known data */
+ rp->req_num = req->req_num;
+ rp->msg_type = DR_CPU_OK;
+ rp->num_records = req->num_records;
+
+ /* stat array for the response */
+ stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
+
+ /*
+ * If the operation is not a forced unconfigure,
+ * perform secondary checks for things that would
+ * prevent an operation.
+ */
+ if (!force)
+ dr_cpu_check_cpus(cpuids, req->num_records, stat);
+
+ /* unconfigure each of the CPUs */
+ for (idx = 0; idx < req->num_records; idx++) {
+
+ /* skip this cpu if it is already marked as blocked */
+ if (stat[idx].result == DR_CPU_RES_BLOCKED)
+ continue;
+
+ result = dr_cpu_unconfigure(cpuids[idx], &status, force);
+
+ /* save off results of the unconfigure */
+ stat[idx].cpuid = cpuids[idx];
+ stat[idx].result = result;
+ stat[idx].status = status;
+ }
+
+ *resp = rp;
+ *resp_len = rlen;
+
+ dr_generate_event(DR_TYPE_CPU, SE_HINT_REMOVE);
+
+ return (0);
+}
+
+/*
+ * Do not modify result buffer or length on error.
+ */
+static int
+dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
+{
+ int idx;
+ int result;
+ int status;
+ int rlen;
+ uint32_t *cpuids;
+ dr_cpu_hdr_t *rp;
+ dr_cpu_stat_t *stat;
+ md_t *mdp = NULL;
+ int num_nodes;
+ int listsz;
+ mde_cookie_t *listp = NULL;
+ mde_cookie_t cpunode;
+ boolean_t walk_md = B_FALSE;
+
+ /* the incoming array of cpuids to configure */
+ cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
+
+ /* allocate a response message */
+ rlen = sizeof (dr_cpu_hdr_t);
+ rlen += req->num_records * sizeof (dr_cpu_stat_t);
+ rp = kmem_zalloc(rlen, KM_SLEEP);
+
+ /* fill in the known data */
+ rp->req_num = req->req_num;
+ rp->msg_type = DR_CPU_STATUS;
+ rp->num_records = req->num_records;
+
+ /* stat array for the response */
+ stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
+
+ /* get the status for each of the CPUs */
+ for (idx = 0; idx < req->num_records; idx++) {
+
+ result = dr_cpu_status(cpuids[idx], &status);
+
+ if (result == DR_CPU_RES_FAILURE)
+ walk_md = B_TRUE;
+
+ /* save off results of the status */
+ stat[idx].cpuid = cpuids[idx];
+ stat[idx].result = result;
+ stat[idx].status = status;
+ }
+
+ if (walk_md == B_FALSE)
+ goto done;
+
+ /*
+ * At least one of the cpus did not have a CPU
+ * structure. So, consult the MD to determine if
+ * they are present.
+ */
+
+ if ((mdp = md_get_handle()) == NULL) {
+ DR_DBG_CPU("unable to initialize MD\n");
+ goto done;
+ }
+
+ num_nodes = md_node_count(mdp);
+ ASSERT(num_nodes > 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_zalloc(listsz, KM_SLEEP);
+
+ for (idx = 0; idx < req->num_records; idx++) {
+
+ if (stat[idx].result != DR_CPU_RES_FAILURE)
+ continue;
+
+ /* check the MD for the current cpuid */
+ cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
+
+ stat[idx].result = DR_CPU_RES_OK;
+
+ if (cpunode == MDE_INVAL_ELEM_COOKIE) {
+ stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
+ } else {
+ stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
+ }
+ }
+
+ kmem_free(listp, listsz);
+
+ (void) md_fini_handle(mdp);
+
+done:
+ *resp = rp;
+ *resp_len = rlen;
+
+ return (0);
+}
+
+static int
+dr_cpu_configure(processorid_t cpuid, int *status)
+{
+ struct cpu *cp;
+ int rv = 0;
+
+ DR_DBG_CPU("dr_cpu_configure...\n");
+
+ /*
+ * Build device tree node for the CPU
+ */
+ if ((rv = dr_cpu_probe(cpuid)) != 0) {
+ DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
+ if (rv == EINVAL) {
+ *status = DR_CPU_STAT_NOT_PRESENT;
+ return (DR_CPU_RES_NOT_IN_MD);
+ }
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ return (DR_CPU_RES_FAILURE);
+ }
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Configure the CPU
+ */
+ if ((cp = cpu_get(cpuid)) == NULL) {
+
+ if ((rv = cpu_configure(cpuid)) != 0) {
+ DR_DBG_CPU("failed to configure CPU %d (%d)\n",
+ cpuid, rv);
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d configured\n", cpuid);
+
+ /* CPU struct should exist now */
+ cp = cpu_get(cpuid);
+ }
+
+ ASSERT(cp);
+
+ /*
+ * Power on the CPU. In sun4v, this brings the stopped
+ * CPU into the guest from the Hypervisor.
+ */
+ if (cpu_is_poweredoff(cp)) {
+
+ if ((rv = cpu_poweron(cp)) != 0) {
+ DR_DBG_CPU("failed to power on CPU %d (%d)\n",
+ cpuid, rv);
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d powered on\n", cpuid);
+ }
+
+ /*
+ * Online the CPU
+ */
+ if (cpu_is_offline(cp)) {
+
+ if ((rv = cpu_online(cp)) != 0) {
+ DR_DBG_CPU("failed to online CPU %d (%d)\n",
+ cpuid, rv);
+ rv = DR_CPU_RES_FAILURE;
+ /* offline is still configured */
+ *status = DR_CPU_STAT_CONFIGURED;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d online\n", cpuid);
+ }
+
+ rv = DR_CPU_RES_OK;
+ *status = DR_CPU_STAT_CONFIGURED;
+
+done:
+ mutex_exit(&cpu_lock);
+
+ return (rv);
+}
+
+static int
+dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
+{
+ struct cpu *cp;
+ int rv = 0;
+ int cpu_flags;
+
+ DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
+
+ mutex_enter(&cpu_lock);
+
+ cp = cpu_get(cpuid);
+
+ if (cp == NULL) {
+
+ /*
+ * The OS CPU structures are already torn down,
+ * Attempt to deprobe the CPU to make sure the
+ * device tree is up to date.
+ */
+ if (dr_cpu_deprobe(cpuid) != 0) {
+ DR_DBG_CPU("failed to deprobe CPU %d\n", cpuid);
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ goto done;
+ }
+
+ goto done;
+ }
+
+ ASSERT(cp->cpu_id == cpuid);
+
+ /*
+ * Offline the CPU
+ */
+ if (cpu_is_active(cp)) {
+
+ /* set the force flag correctly */
+ cpu_flags = (force) ? CPU_FORCED : 0;
+
+ if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
+ DR_DBG_CPU("failed to offline CPU %d (%d)\n",
+ cpuid, rv);
+
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_CONFIGURED;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d offline\n", cpuid);
+ }
+
+ /*
+ * Power off the CPU. In sun4v, this puts the running
+ * CPU into the stopped state in the Hypervisor.
+ */
+ if (!cpu_is_poweredoff(cp)) {
+
+ if ((rv = cpu_poweroff(cp)) != 0) {
+ DR_DBG_CPU("failed to power off CPU %d (%d)\n",
+ cpuid, rv);
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_CONFIGURED;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d powered off\n", cpuid);
+ }
+
+ /*
+ * Unconfigure the CPU
+ */
+ if ((rv = cpu_unconfigure(cpuid)) != 0) {
+ DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
+
+ /*
+ * Tear down device tree.
+ */
+ if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
+ DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
+ rv = DR_CPU_RES_FAILURE;
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ goto done;
+ }
+
+ rv = DR_CPU_RES_OK;
+ *status = DR_CPU_STAT_UNCONFIGURED;
+
+done:
+ mutex_exit(&cpu_lock);
+
+ return (rv);
+}
+
+/*
+ * Determine the state of a CPU. If the CPU structure is not present,
+ * it does not attempt to determine whether or not the CPU is in the
+ * MD. It is more efficient to do this at the higher level for all
+ * CPUs since it may not even be necessary to search the MD if all
+ * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
+ * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
+ * that an MD walk is necessary.
+ */
+static int
+dr_cpu_status(processorid_t cpuid, int *status)
+{
+ int rv;
+ struct cpu *cp;
+
+ DR_DBG_CPU("dr_cpu_status...\n");
+
+ mutex_enter(&cpu_lock);
+
+ if ((cp = cpu_get(cpuid)) == NULL) {
+ /* need to check if cpu is in the MD */
+ rv = DR_CPU_RES_FAILURE;
+ goto done;
+ }
+
+ if (cpu_is_poweredoff(cp)) {
+ /*
+ * The CPU is powered off, so it is considered
+ * unconfigured from the service entity point of
+ * view. The CPU is not available to the system
+ * and intervention by the service entity would
+ * be required to change that.
+ */
+ *status = DR_CPU_STAT_UNCONFIGURED;
+ } else {
+ /*
+ * The CPU is powered on, so it is considered
+ * configured from the service entity point of
+ * view. It is available for use by the system
+ * and service entities are not concerned about
+ * the operational status (offline, online, etc.)
+ * of the CPU in terms of DR.
+ */
+ *status = DR_CPU_STAT_CONFIGURED;
+ }
+
+ rv = DR_CPU_RES_OK;
+
+done:
+ mutex_exit(&cpu_lock);
+
+ return (rv);
+}
+
+typedef struct {
+ md_t *mdp;
+ mde_cookie_t cpunode;
+ dev_info_t *dip;
+} cb_arg_t;
+
+#define STR_ARR_LEN 5
+
+static int
+new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
+{
+ _NOTE(ARGUNUSED(flags))
+
+ char *compat;
+ uint64_t freq;
+ uint64_t cpuid = 0;
+ int regbuf[4];
+ int len = 0;
+ cb_arg_t *cba;
+ char *str_arr[STR_ARR_LEN];
+ char *curr;
+ int idx = 0;
+
+ DR_DBG_CPU("new_cpu_node...\n");
+
+ cba = (cb_arg_t *)arg;
+
+ /*
+ * Add 'name' property
+ */
+ if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
+ "name", "cpu") != DDI_SUCCESS) {
+ DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ /*
+ * Add 'compatible' property
+ */
+ if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
+ (uint8_t **)(&compat), &len)) {
+ DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
+ "from MD\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ DR_DBG_CPU("'compatible' len is %d\n", len);
+
+ /* parse the MD string array */
+ curr = compat;
+ while (curr < (compat + len)) {
+
+ DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
+
+ str_arr[idx++] = curr;
+ curr += strlen(curr) + 1;
+
+ if (idx == STR_ARR_LEN) {
+ DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
+ break;
+ }
+ }
+
+ if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
+ "compatible", str_arr, idx) != DDI_SUCCESS) {
+ DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
+ "property\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ /*
+ * Add 'device_type' property
+ */
+ if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
+ "device_type", "cpu") != DDI_SUCCESS) {
+ DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
+ "property\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ /*
+ * Add 'clock-frequency' property
+ */
+ if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
+ DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
+ "property from MD\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
+ "clock-frequency", freq) != DDI_SUCCESS) {
+ DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
+ "property\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ /*
+ * Add 'reg' (cpuid) property
+ */
+ if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
+ DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
+ "from MD\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
+
+ bzero(regbuf, 4 * sizeof (int));
+ regbuf[0] = 0xc0000000 | cpuid;
+
+ if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
+ "reg", regbuf, 4) != DDI_SUCCESS) {
+ DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
+ return (DDI_WALK_ERROR);
+ }
+
+ cba->dip = new_node;
+
+ return (DDI_WALK_TERMINATE);
+}
+
+static int
+dr_cpu_probe(processorid_t cpuid)
+{
+ dev_info_t *pdip;
+ dev_info_t *dip;
+ devi_branch_t br;
+ md_t *mdp = NULL;
+ int num_nodes;
+ int rv = 0;
+ int listsz;
+ mde_cookie_t *listp = NULL;
+ cb_arg_t cba;
+ mde_cookie_t cpunode;
+
+ if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
+ /* nothing to do */
+ e_ddi_branch_rele(dip);
+ return (0);
+ }
+
+ if ((mdp = md_get_handle()) == NULL) {
+ DR_DBG_CPU("unable to initialize machine description\n");
+ return (-1);
+ }
+
+ num_nodes = md_node_count(mdp);
+ ASSERT(num_nodes > 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_zalloc(listsz, KM_SLEEP);
+
+ cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
+
+ if (cpunode == MDE_INVAL_ELEM_COOKIE) {
+ rv = EINVAL;
+ goto done;
+ }
+
+ /* pass in MD cookie for CPU */
+ cba.mdp = mdp;
+ cba.cpunode = cpunode;
+
+ br.arg = (void *)&cba;
+ br.type = DEVI_BRANCH_SID;
+ br.create.sid_branch_create = new_cpu_node;
+ br.devi_branch_callback = NULL;
+ pdip = ddi_root_node();
+
+ if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
+ DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
+ rv = -1;
+ goto done;
+ }
+
+ DR_DBG_CPU("CPU %d probed\n", cpuid);
+
+ rv = 0;
+
+done:
+ if (listp)
+ kmem_free(listp, listsz);
+
+ if (mdp)
+ (void) md_fini_handle(mdp);
+
+ return (rv);
+}
+
+static int
+dr_cpu_deprobe(processorid_t cpuid)
+{
+ dev_info_t *fdip = NULL;
+ dev_info_t *dip;
+
+ if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
+ DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
+ return (0);
+ }
+
+ ASSERT(e_ddi_branch_held(dip));
+
+ if (e_ddi_branch_destroy(dip, &fdip, 0)) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * If non-NULL, fdip is held and must be released.
+ */
+ if (fdip != NULL) {
+ (void) ddi_pathname(fdip, path);
+ ddi_release_devi(fdip);
+ } else {
+ (void) ddi_pathname(dip, path);
+ }
+ cmn_err(CE_NOTE, "node removal failed: %s (%p)",
+ path, (fdip) ? (void *)fdip : (void *)dip);
+
+ kmem_free(path, MAXPATHLEN);
+
+ return (-1);
+ }
+
+ DR_DBG_CPU("CPU %d deprobed\n", cpuid);
+
+ return (0);
+}
+
+typedef struct {
+ processorid_t cpuid;
+ dev_info_t *dip;
+} dr_search_arg_t;
+
+static int
+dr_cpu_check_node(dev_info_t *dip, void *arg)
+{
+ char *name;
+ processorid_t cpuid;
+ dr_search_arg_t *sarg = (dr_search_arg_t *)arg;
+
+ if (dip == ddi_root_node()) {
+ return (DDI_WALK_CONTINUE);
+ }
+
+ name = ddi_node_name(dip);
+
+ if (strcmp(name, "cpu") != 0) {
+ return (DDI_WALK_PRUNECHILD);
+ }
+
+ cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "reg", -1);
+
+ cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
+
+ DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
+
+ if (cpuid == sarg->cpuid) {
+ DR_DBG_CPU("matching node\n");
+
+ /* matching node must be returned held */
+ if (!e_ddi_branch_held(dip))
+ e_ddi_branch_hold(dip);
+
+ sarg->dip = dip;
+ return (DDI_WALK_TERMINATE);
+ }
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * Walk the device tree to find the dip corresponding to the cpuid
+ * passed in. If present, the dip is returned held. The caller must
+ * release the hold on the dip once it is no longer required. If no
+ * matching node if found, NULL is returned.
+ */
+static dev_info_t *
+dr_cpu_find_node(processorid_t cpuid)
+{
+ dr_search_arg_t arg;
+
+ DR_DBG_CPU("dr_cpu_find_node...\n");
+
+ arg.cpuid = cpuid;
+ arg.dip = NULL;
+
+ ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
+
+ ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
+
+ return ((arg.dip) ? arg.dip : NULL);
+}
+
+/*
+ * Look up a particular cpuid in the MD. Returns the mde_cookie_t
+ * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
+ * otherwise. It is assumed the scratch array has already been
+ * allocated so that it can accommodate the worst case scenario,
+ * every node in the MD.
+ */
+static mde_cookie_t
+dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
+{
+ int idx;
+ int nnodes;
+ mde_cookie_t rootnode;
+ uint64_t cpuid_prop;
+ mde_cookie_t result = MDE_INVAL_ELEM_COOKIE;
+
+ rootnode = md_root_node(mdp);
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ /*
+ * Scan the DAG for all the CPU nodes
+ */
+ nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (nnodes < 0) {
+ DR_DBG_CPU("Scan for CPUs failed\n");
+ return (result);
+ }
+
+ DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
+
+ /*
+ * Find the CPU of interest
+ */
+ for (idx = 0; idx < nnodes; idx++) {
+
+ if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
+ DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
+ idx);
+ break;
+ }
+
+ if (cpuid_prop == cpuid) {
+ /* found a match */
+ DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
+ "in MD\n", cpuid);
+ result = listp[idx];
+ break;
+ }
+ }
+
+ if (result == MDE_INVAL_ELEM_COOKIE) {
+ DR_DBG_CPU("CPU %d not in MD\n", cpuid);
+ }
+
+ return (result);
+}
diff --git a/usr/src/uts/sun4v/io/dr_util.c b/usr/src/uts/sun4v/io/dr_util.c
new file mode 100644
index 0000000000..58e7710a08
--- /dev/null
+++ b/usr/src/uts/sun4v/io/dr_util.c
@@ -0,0 +1,206 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v DR Utility functions
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/sunddi.h>
+#include <sys/note.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/ldoms.h>
+
+#include <sys/dr_util.h>
+
+boolean_t
+dr_is_disabled(dr_type_t type)
+{
+ /*
+ * The type argument is currently unused. However, it
+ * keeps the interface flexible enough to allows for
+ * only disabling certain types of DR.
+ */
+ _NOTE(ARGUNUSED(type))
+
+ /*
+ * DR requires that the kernel is using its own CIF
+ * handler. If that is not the case, either because
+ * domaining has been explicitly disabled, or because
+ * the firmware does not support it, the system must
+ * remain static and DR must be disabled.
+ */
+ if (!domaining_enabled) {
+ cmn_err(CE_NOTE, "!Kernel CIF handler is not enabled, DR "
+ "is not available\n");
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Generate a DR sysevent based on the type of resource and
+ * sysevent hint specified. The hint indicates whether the
+ * resource was added or removed.
+ */
+void
+dr_generate_event(dr_type_t type, int se_hint)
+{
+ int rv;
+ sysevent_id_t eid;
+ sysevent_t *ev = NULL;
+ sysevent_attr_list_t *evnt_attr_list = NULL;
+ sysevent_value_t evnt_val;
+ static char pubname[] = SUNW_KERN_PUB"dr";
+
+ DR_DBG_ALL("generate_event: type=%s, hint=%s\n", DR_TYPE2STR(type),
+ SE_HINT2STR(se_hint));
+
+ /*
+ * Add the attachment point attribute
+ */
+ ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE, pubname, KM_SLEEP);
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = DR_TYPE2STR(type);
+
+ rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val, KM_SLEEP);
+ if (rv != 0) {
+ DR_DBG_ALL("generate_event: failed to add attr '%s' for "
+ "'%s' event\n", DR_AP_ID, EC_DR);
+ goto done;
+ }
+
+ /*
+ * Add the DR hint attribute
+ */
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = SE_HINT2STR(se_hint);
+
+ rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val, KM_SLEEP);
+ if (rv != 0) {
+ DR_DBG_ALL("generate_event: failed to add attr '%s' for "
+ "'%s' event\n", DR_HINT, EC_DR);
+ sysevent_free_attr(evnt_attr_list);
+ goto done;
+ }
+
+ /*
+ * Attach the attribute list to the event
+ */
+ rv = sysevent_attach_attributes(ev, evnt_attr_list);
+ if (rv != 0) {
+ DR_DBG_ALL("generate_event: failed to add attr list for "
+ "'%s' event\n", EC_DR);
+ sysevent_free_attr(evnt_attr_list);
+ goto done;
+ }
+
+ /*
+ * Log the event
+ */
+ rv = log_sysevent(ev, KM_NOSLEEP, &eid);
+ if (rv != 0) {
+ DR_DBG_ALL("generate_event: failed to log event (%d)\n", rv);
+ }
+
+done:
+ if (ev != NULL)
+ sysevent_free(ev);
+}
+
+/*
+ * Debugging Features
+ */
+#ifdef DEBUG
+
+uint_t dr_debug = 0x0;
+
+#define BYTESPERLINE 8
+#define LINEWIDTH ((BYTESPERLINE * 3) + (BYTESPERLINE + 2) + 1)
+#define ASCIIOFFSET ((BYTESPERLINE * 3) + 2)
+#define ISPRINT(c) ((c >= ' ') && (c <= '~'))
+
+/*
+ * Output a buffer formatted with a set number of bytes on
+ * each line. Append each line with the ASCII equivalent of
+ * each byte if it falls within the printable ASCII range,
+ * and '.' otherwise.
+ */
+void
+dr_dbg_dump_msg(void *buf, size_t len)
+{
+ int i, j;
+ char *msg = buf;
+ char *curr;
+ char *aoff;
+ char line[LINEWIDTH];
+
+ /* abort if not debugging transport */
+ if (!(dr_debug & DR_DBG_FLAG_TRANS)) {
+ return;
+ }
+
+ /* walk the buffer one line at a time */
+ for (i = 0; i < len; i += BYTESPERLINE) {
+
+ bzero(line, LINEWIDTH);
+
+ curr = line;
+ aoff = line + ASCIIOFFSET;
+
+ /*
+ * Walk the bytes in the current line, storing
+ * the hex value for the byte as well as the
+ * ASCII representation in a temporary buffer.
+ * All ASCII values are placed at the end of
+ * the line.
+ */
+ for (j = 0; (j < BYTESPERLINE) && ((i + j) < len); j++) {
+ (void) sprintf(curr, " %02x", msg[i + j]);
+ *aoff = (ISPRINT(msg[i + j])) ? msg[i + j] : '.';
+ curr += 3;
+ aoff++;
+ }
+
+ /*
+ * Fill in to the start of the ASCII translation
+ * with spaces. This will only be necessary if
+ * this is the last line and there are not enough
+ * bytes to fill the whole line.
+ */
+ while (curr != (line + ASCIIOFFSET))
+ *curr++ = ' ';
+
+ DR_DBG_TRANS("%s\n", line);
+ }
+}
+#endif /* DEBUG */
diff --git a/usr/src/uts/sun4v/io/ds.c b/usr/src/uts/sun4v/io/ds.c
new file mode 100644
index 0000000000..06961cef91
--- /dev/null
+++ b/usr/src/uts/sun4v/io/ds.c
@@ -0,0 +1,2728 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Domain Services Module
+ *
+ * The Domain Services (DS) module is responsible for communication
+ * with external service entities. It provides an API for clients to
+ * publish capabilities and handles the low level communication and
+ * version negotiation required to export those capabilites to any
+ * interested service entity. Once a capability has been successfully
+ * registered with a service entity, the DS module facilitates all
+ * data transfers between the service entity and the client providing
+ * that particular capability.
+ */
+
+#include <sys/modctl.h>
+#include <sys/ksynch.h>
+#include <sys/taskq.h>
+#include <sys/disp.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+
+#include <sys/mach_descrip.h>
+#include <sys/mdesc.h>
+#include <sys/ldc.h>
+
+#include <sys/ds.h>
+#include <sys/ds_impl.h>
+
+/*
+ * All DS ports in the system
+ *
+ * The list of DS ports is read in from the MD when the DS module is
+ * initialized and is never modified. This eliminates the need for
+ * locking to access the port array itself. Access to the individual
+ * ports are synchronized at the port level.
+ */
+static ds_port_t ds_ports[DS_MAX_PORTS];
+static ds_portset_t ds_allports; /* all DS ports in the system */
+
+/*
+ * Table of registered services
+ *
+ * Locking: Accesses to the table of services are sychronized using
+ * a RW lock. The reader lock must be held when looking up service
+ * information in the table. The writer lock must be held when any
+ * service information is being modified.
+ */
+static struct ds_svcs {
+ ds_svc_t **tbl; /* the table itself */
+ krwlock_t rwlock; /* table lock */
+ uint_t maxsvcs; /* size of the table */
+ uint_t nsvcs; /* current number of items */
+} ds_svcs;
+
+/* initial size of the table */
+#define DS_MAXSVCS_INIT 32
+
+/*
+ * Taskq for internal task processing
+ */
+static taskq_t *ds_taskq;
+static boolean_t ds_enabled; /* enable/disable taskq processing */
+
+/*
+ * The actual required number of parallel threads is not expected
+ * to be very large. Use the maximum number of CPUs in the system
+ * as a rough upper bound.
+ */
+#define DS_MAX_TASKQ_THR NCPU
+#define DS_DISPATCH(fn, arg) taskq_dispatch(ds_taskq, fn, arg, TQ_SLEEP)
+
+/*
+ * Supported versions of the DS message protocol
+ *
+ * The version array must be sorted in order from the highest
+ * supported version to the lowest. Support for a particular
+ * <major>.<minor> version implies all lower minor versions of
+ * that same major version are supported as well.
+ */
+static ds_ver_t ds_vers[] = { { 1, 0 } };
+
+#define DS_NUM_VER (sizeof (ds_vers) / sizeof (ds_vers[0]))
+
+/*
+ * Results of checking version array with ds_vers_isvalid()
+ */
+typedef enum {
+ DS_VERS_OK,
+ DS_VERS_INCREASING_MAJOR_ERR,
+ DS_VERS_INCREASING_MINOR_ERR
+} ds_vers_check_t;
+
+/* incoming message handling functions */
+typedef void (*ds_msg_handler_t)(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_init_req(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_init_ack(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_init_nack(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_reg_req(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_reg_ack(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_reg_nack(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_unreg_req(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_unreg_ack(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_unreg_nack(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_data(ds_port_t *port, caddr_t buf, size_t len);
+static void ds_handle_nack(ds_port_t *port, caddr_t buf, size_t len);
+
+/*
+ * DS Message Handler Dispatch Table
+ *
+ * A table used to dispatch all incoming messages. This table
+ * contains handlers for all the fixed message types, as well as
+ * the the messages defined in the 1.0 version of the DS protocol.
+ */
+static const ds_msg_handler_t ds_msg_handlers[] = {
+ ds_handle_init_req, /* DS_INIT_REQ */
+ ds_handle_init_ack, /* DS_INIT_ACK */
+ ds_handle_init_nack, /* DS_INIT_NACK */
+ ds_handle_reg_req, /* DS_REG_REQ */
+ ds_handle_reg_ack, /* DS_REG_ACK */
+ ds_handle_reg_nack, /* DS_REG_NACK */
+ ds_handle_unreg_req, /* DS_UNREG */
+ ds_handle_unreg_ack, /* DS_UNREG_ACK */
+ ds_handle_unreg_nack, /* DS_UNREG_NACK */
+ ds_handle_data, /* DS_DATA */
+ ds_handle_nack /* DS_NACK */
+};
+
+/*
+ * DS message log
+ *
+ * Locking: The message log is protected by a single mutex. This
+ * protects all fields in the log structure itself as well as
+ * everything in the entry structures on both the log and the
+ * free list.
+ */
+static struct log {
+ ds_log_entry_t *head; /* head of the log */
+ ds_log_entry_t *freelist; /* head of the free list */
+ size_t size; /* size of the log in bytes */
+ uint32_t nentry; /* number of entries */
+ kmutex_t lock; /* log lock */
+} ds_log;
+
+/* log soft limit */
+uint_t ds_log_sz = DS_LOG_DEFAULT_SZ;
+
+/* initial pool of log entry structures */
+static ds_log_entry_t ds_log_entry_pool[DS_LOG_NPOOL];
+
+/*
+ * Debugging Features
+ */
+#ifdef DEBUG
+
+#define DS_DBG_FLAG_LDC 0x1
+#define DS_DBG_FLAG_LOG 0x2
+#define DS_DBG_FLAG_ALL 0xf
+
+#define DS_DBG if (ds_debug) printf
+#define DS_DBG_LDC if (ds_debug & DS_DBG_FLAG_LDC) printf
+#define DS_DBG_LOG if (ds_debug & DS_DBG_FLAG_LOG) printf
+#define DS_DUMP_LDC_MSG(buf, len) ds_dump_ldc_msg(buf, len)
+
+uint_t ds_debug = 0;
+static void ds_dump_ldc_msg(void *buf, size_t len);
+
+#else /* DEBUG */
+
+#define DS_DBG _NOTE(CONSTCOND) if (0) printf
+#define DS_DBG_LDC DS_DBG
+#define DS_DBG_LOG DS_DBG
+#define DS_DUMP_LDC_MSG(buf, len)
+
+#endif /* DEBUG */
+
+
+/* initialization functions */
+static void ds_init(void);
+static void ds_fini(void);
+static int ds_ports_init(void);
+static int ds_ports_fini(void);
+static int ds_ldc_init(ds_port_t *port);
+static int ds_ldc_fini(ds_port_t *port);
+
+/* event processing functions */
+static uint_t ds_ldc_cb(uint64_t event, caddr_t arg);
+static void ds_dispatch_event(void *arg);
+static void ds_handle_ldc_event(ds_port_t *port, int newstate);
+static int ds_recv_msg(ldc_handle_t ldc_hdl, caddr_t msgp, size_t msglen);
+static void ds_handle_recv(void *arg);
+
+/* message sending functions */
+static int ds_send_msg(ds_port_t *port, caddr_t msg, size_t msglen);
+static void ds_send_init_req(ds_port_t *port);
+static int ds_send_reg_req(ds_svc_t *svc);
+static int ds_send_unreg_req(ds_svc_t *svc);
+static void ds_send_unreg_nack(ds_port_t *port, ds_svc_hdl_t bad_hdl);
+static void ds_send_data_nack(ds_port_t *port, ds_svc_hdl_t bad_hdl);
+
+/* walker functions */
+typedef int (*svc_cb_t)(ds_svc_t *svc, void *arg);
+static int ds_walk_svcs(svc_cb_t svc_cb, void *arg);
+static int ds_svc_isfree(ds_svc_t *svc, void *arg);
+static int ds_svc_ismatch(ds_svc_t *svc, void *arg);
+static int ds_svc_free(ds_svc_t *svc, void *arg);
+static int ds_svc_register(ds_svc_t *svc, void *arg);
+static int ds_svc_unregister(ds_svc_t *svc, void *arg);
+static int ds_svc_port_up(ds_svc_t *svc, void *arg);
+
+/* service utilities */
+static ds_svc_t *ds_alloc_svc(void);
+static void ds_reset_svc(ds_svc_t *svc, ds_port_t *port);
+static ds_svc_t *ds_get_svc(ds_svc_hdl_t hdl);
+
+/* port utilities */
+static int ds_port_add(md_t *mdp, mde_cookie_t port, mde_cookie_t chan);
+static void ds_port_reset(ds_port_t *port);
+
+/* misc utilities */
+static ds_vers_check_t ds_vers_isvalid(ds_ver_t *vers, int nvers);
+
+/* log functions */
+static void ds_log_init(void);
+static void ds_log_fini(void);
+static int ds_log_add_msg(int32_t dest, uint8_t *msg, size_t sz);
+static int ds_log_remove(void);
+static void ds_log_purge(void *arg);
+
+
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "Domain Services %I%"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int rv;
+
+ /*
+ * Perform all internal setup before initializing
+ * the DS ports. This ensures that events can be
+ * processed as soon as the port comes up.
+ */
+ ds_init();
+
+ if ((rv = ds_ports_init()) != 0) {
+ cmn_err(CE_WARN, "Domin Services initialization failed");
+ ds_fini();
+ return (rv);
+ }
+
+ if ((rv = mod_install(&modlinkage)) != 0) {
+ (void) ds_ports_fini();
+ ds_fini();
+ }
+
+ return (rv);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int rv;
+
+ if ((rv = mod_remove(&modlinkage)) == 0) {
+ (void) ds_ports_fini();
+ ds_fini();
+ }
+
+ return (rv);
+}
+
+static void
+ds_init(void)
+{
+ int tblsz;
+
+ /*
+ * Initialize table of registered service classes
+ */
+ ds_svcs.maxsvcs = DS_MAXSVCS_INIT;
+
+ tblsz = ds_svcs.maxsvcs * sizeof (ds_svc_t *);
+ ds_svcs.tbl = kmem_zalloc(tblsz, KM_SLEEP);
+
+ rw_init(&ds_svcs.rwlock, NULL, RW_DRIVER, NULL);
+
+ ds_svcs.nsvcs = 0;
+
+ /*
+ * Initialize the message log.
+ */
+ ds_log_init();
+
+ /*
+ * Create taskq for internal processing threads. This
+ * includes processing incoming request messages and
+ * sending out of band registration messages.
+ */
+ ds_taskq = taskq_create("ds_taskq", 1, minclsyspri, 1,
+ DS_MAX_TASKQ_THR, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+ ds_enabled = B_TRUE;
+
+ /* catch problems with the version array */
+ ASSERT(ds_vers_isvalid(ds_vers, DS_NUM_VER) == DS_VERS_OK);
+}
+
+static void
+ds_fini(void)
+{
+ int idx;
+
+ /*
+ * Flip the enabled switch to make sure that no
+ * incoming events get dispatched while things
+ * are being torn down.
+ */
+ ds_enabled = B_FALSE;
+
+ /*
+ * Destroy the taskq.
+ */
+ taskq_destroy(ds_taskq);
+
+ /*
+ * Destroy the message log.
+ */
+ ds_log_fini();
+
+ /*
+ * Deallocate the table of registered services
+ */
+
+ /* clear out all entries */
+ rw_enter(&ds_svcs.rwlock, RW_WRITER);
+ idx = ds_walk_svcs(ds_svc_free, NULL);
+ rw_exit(&ds_svcs.rwlock);
+
+ /* should have gone through the whole table */
+ ASSERT(idx == ds_svcs.maxsvcs);
+
+ /* destroy the table itself */
+ kmem_free(ds_svcs.tbl, ds_svcs.maxsvcs * sizeof (ds_svc_t *));
+ rw_destroy(&ds_svcs.rwlock);
+ bzero(&ds_svcs, sizeof (ds_svcs));
+}
+
+/*
+ * Initialize the list of ports based on the MD.
+ */
+static int
+ds_ports_init(void)
+{
+ int idx;
+ int rv;
+ md_t *mdp;
+ int num_nodes;
+ int listsz;
+ mde_cookie_t rootnode;
+ mde_cookie_t *portp = NULL;
+ mde_cookie_t *chanp = NULL;
+ int nport;
+ int nchan;
+ ds_port_t *port;
+
+ if ((mdp = md_get_handle()) == NULL) {
+ cmn_err(CE_WARN, "unable to initialize machine description");
+ return (-1);
+ }
+
+ num_nodes = md_node_count(mdp);
+ ASSERT(num_nodes > 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+
+ /* allocate temporary storage for MD scans */
+ portp = kmem_zalloc(listsz, KM_SLEEP);
+ chanp = kmem_zalloc(listsz, KM_SLEEP);
+
+ rootnode = md_root_node(mdp);
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ /* find all the DS ports in the MD */
+ nport = md_scan_dag(mdp, rootnode, md_find_name(mdp, DS_MD_PORT_NAME),
+ md_find_name(mdp, "fwd"), portp);
+
+ if (nport <= 0) {
+ cmn_err(CE_NOTE, "No '%s' nodes in MD", DS_MD_PORT_NAME);
+ rv = -1;
+ goto done;
+ }
+
+ /*
+ * Initialize all the ports found in the MD.
+ */
+ for (idx = 0; idx < nport; idx++) {
+
+ /* get the channels for this port */
+ nchan = md_scan_dag(mdp, portp[idx],
+ md_find_name(mdp, DS_MD_CHAN_NAME),
+ md_find_name(mdp, "fwd"), chanp);
+
+ if (nchan <= 0) {
+ cmn_err(CE_NOTE, "No '%s' node for DS port",
+ DS_MD_CHAN_NAME);
+ rv = -1;
+ goto done;
+ }
+
+ /* expecting only one channel */
+ if (nchan != 1) {
+ DS_DBG("expected 1 '%s' node for DS port, found %d\n",
+ DS_MD_CHAN_NAME, nchan);
+ }
+
+ if (ds_port_add(mdp, portp[idx], chanp[0]) != 0) {
+ rv = -1;
+ goto done;
+ }
+ }
+
+ /*
+ * Initialize the LDC channel for each port.
+ */
+ for (idx = 0; idx < DS_MAX_PORTS; idx++) {
+
+ if (!DS_PORT_IN_SET(ds_allports, idx))
+ continue;
+
+ port = &ds_ports[idx];
+
+ mutex_enter(&port->lock);
+
+ if (ds_ldc_init(port)) {
+ cmn_err(CE_WARN, "ds@%lx: ports_init: failed to "
+ "initialize LDC %ld", port->id, port->ldc.id);
+ } else {
+ DS_DBG("ds@%lx: ports_init: initialization complete\n",
+ port->id);
+ }
+
+ mutex_exit(&port->lock);
+ }
+
+ rv = 0;
+
+done:
+ if (rv != 0)
+ (void) ds_ports_fini();
+
+ kmem_free(portp, listsz);
+ kmem_free(chanp, listsz);
+
+ (void) md_fini_handle(mdp);
+
+ return (rv);
+}
+
+static int
+ds_ports_fini(void)
+{
+ int idx;
+ ds_port_t *port;
+
+ /*
+ * Tear down each initialized port.
+ */
+ for (idx = 0; idx < DS_MAX_PORTS; idx++) {
+
+ if (!DS_PORT_IN_SET(ds_allports, idx))
+ continue;
+
+ port = &ds_ports[idx];
+
+ mutex_enter(&port->lock);
+
+ if (port->state >= DS_PORT_LDC_INIT) {
+ /* shut down the LDC for this port */
+ (void) ds_ldc_fini(port);
+ }
+
+ port->state = DS_PORT_FREE;
+
+ mutex_exit(&port->lock);
+
+ /* clean up the port structure */
+ mutex_destroy(&port->lock);
+ DS_PORTSET_DEL(ds_allports, idx);
+ }
+
+ return (0);
+}
+
+static int
+ds_ldc_init(ds_port_t *port)
+{
+ int rv;
+ ldc_attr_t ldc_attr;
+ caddr_t cb_arg = (caddr_t)port;
+
+ ASSERT(MUTEX_HELD(&port->lock));
+
+ DS_DBG("ds@%lx: ldc_init: ldc_id=%ld\n", port->id, port->ldc.id);
+
+ ldc_attr.devclass = LDC_DEV_GENERIC;
+ ldc_attr.instance = 0;
+ ldc_attr.mode = LDC_MODE_STREAM;
+ ldc_attr.qlen = DS_QUEUE_LEN;
+
+ if ((rv = ldc_init(port->ldc.id, &ldc_attr, &port->ldc.hdl)) != 0) {
+ cmn_err(CE_WARN, "ds@%lx: ldc_init: ldc_init error (%d)",
+ port->id, rv);
+ goto done;
+ }
+
+ /* register the LDC callback */
+ if ((rv = ldc_reg_callback(port->ldc.hdl, ds_ldc_cb, cb_arg)) != 0) {
+ cmn_err(CE_WARN, "ds@%lx: dc_init: ldc_reg_callback error "
+ "(%d)", port->id, rv);
+ goto done;
+ }
+
+ if ((rv = ldc_open(port->ldc.hdl)) != 0) {
+ cmn_err(CE_WARN, "ds@%lx: ldc_init: ldc_open error (%d)",
+ port->id, rv);
+ goto done;
+ }
+
+ (void) ldc_up(port->ldc.hdl);
+
+ (void) ldc_status(port->ldc.hdl, &port->ldc.state);
+
+ DS_DBG_LDC("ds@%lx: ldc_init: initial LDC state 0x%x\n",
+ port->id, port->ldc.state);
+
+ port->state = DS_PORT_LDC_INIT;
+
+ /* if port is up, send init message */
+ if (port->ldc.state == LDC_UP) {
+ ds_send_init_req(port);
+ }
+
+done:
+ return (rv);
+}
+
+static int
+ds_ldc_fini(ds_port_t *port)
+{
+ int rv;
+
+ ASSERT(port->state >= DS_PORT_LDC_INIT);
+
+ DS_DBG("ds@%lx: ldc_fini: ldc_id=%ld\n", port->id, port->ldc.id);
+
+ if ((rv = ldc_close(port->ldc.hdl)) != 0) {
+ cmn_err(CE_WARN, "ds@%lx: ldc_fini: ldc_close error (%d)",
+ port->id, rv);
+ return (rv);
+ }
+
+ if ((rv = ldc_unreg_callback(port->ldc.hdl)) != 0) {
+ cmn_err(CE_WARN, "ds@%lx: ldc_fini: ldc_unreg_callback error "
+ "(%d)", port->id, rv);
+ return (rv);
+ }
+
+ if ((rv = ldc_fini(port->ldc.hdl)) != 0) {
+ cmn_err(CE_WARN, "ds@%lx: ldc_fini: ldc_fini error (%d)",
+ port->id, rv);
+ return (rv);
+ }
+
+ return (rv);
+}
+
+/*
+ * A DS event consists of a buffer on a port.
+ */
+typedef struct ds_event {
+ ds_port_t *port;
+ char *buf;
+ size_t buflen;
+} ds_event_t;
+
+static uint_t
+ds_ldc_cb(uint64_t event, caddr_t arg)
+{
+ ldc_status_t ldc_state;
+ int rv;
+ ds_port_t *port = (ds_port_t *)arg;
+ ldc_handle_t ldc_hdl;
+
+ DS_DBG("ds@%lx: ds_ldc_cb...\n", port->id);
+
+ if (!ds_enabled) {
+ DS_DBG("ds@%lx: callback handling is disabled\n", port->id);
+ return (LDC_SUCCESS);
+ }
+
+ ldc_hdl = port->ldc.hdl;
+
+ /*
+ * Check the LDC event.
+ */
+
+ if (event & LDC_EVT_READ) {
+ /* dispatch a thread to handle the read event */
+ if (DS_DISPATCH(ds_handle_recv, port) == NULL) {
+ cmn_err(CE_WARN, "error initiating event handler");
+ }
+ return (LDC_SUCCESS);
+ }
+
+ /* only check status if not a read event */
+ if ((rv = ldc_status(ldc_hdl, &ldc_state)) != 0) {
+ DS_DBG("ds@%lx: ldc_status error (%d)", port->id, rv);
+ return (LDC_SUCCESS);
+ }
+
+ if (event & LDC_EVT_DOWN || event & LDC_EVT_UP) {
+ mutex_enter(&port->lock);
+ ds_handle_ldc_event(port, ldc_state);
+ mutex_exit(&port->lock);
+ return (LDC_SUCCESS);
+ }
+
+ if (event & LDC_EVT_RESET || event & LDC_EVT_WRITE) {
+ DS_DBG("ds@%lx: LDC event (%lx) received", port->id, event);
+ return (LDC_SUCCESS);
+ }
+
+ cmn_err(CE_NOTE, "ds@%lx: Unexpected LDC event (%lx) received",
+ port->id, event);
+
+ return (LDC_SUCCESS);
+}
+
+static int
+ds_recv_msg(ldc_handle_t ldc_hdl, caddr_t msgp, size_t msglen)
+{
+ int rv = 0;
+ size_t amt_left = msglen;
+ int loopcnt = 0;
+
+ while (msglen > 0) {
+ if ((rv = ldc_read(ldc_hdl, msgp, &amt_left)) != 0) {
+ if ((rv == EAGAIN) && (loopcnt++ < 1000)) {
+ /*
+ * Try again, but don't try for more than
+ * one second. Something is wrong with
+ * the channel.
+ */
+ delay(drv_usectohz(10000)); /* 1/1000 sec */
+ } else {
+ /* fail */
+ return (rv);
+ }
+ } else {
+ msgp += amt_left;
+ msglen -= amt_left;
+ amt_left = msglen;
+ }
+ } /* while (msglen > 0) */
+
+ return (rv);
+}
+
+static void
+ds_handle_recv(void *arg)
+{
+ ds_port_t *port = (ds_port_t *)arg;
+ char *hbuf;
+ size_t len;
+ size_t read_size;
+ boolean_t isempty;
+ ds_hdr_t hdr;
+ uint8_t *msg;
+ char *currp;
+ int rv;
+ ldc_handle_t ldc_hdl;
+ ds_event_t *devent;
+
+ DS_DBG("ds@%lx: ds_ldc_cb...\n", port->id);
+
+ ldc_hdl = port->ldc.hdl;
+
+ mutex_enter(&port->lock);
+ while ((ldc_chkq(ldc_hdl, &isempty) == 0) && (!isempty)) {
+
+
+ DS_DBG("ds@%lx: reading next message\n", port->id);
+
+ /*
+ * Read in the next message.
+ */
+ hbuf = (char *)&hdr;
+ bzero(hbuf, DS_HDR_SZ);
+ read_size = DS_HDR_SZ;
+ currp = hbuf;
+
+ /* read in the message header */
+
+ if ((rv = ds_recv_msg(ldc_hdl, currp, read_size)) != 0) {
+ /*
+ * failed to read message drop it and see if there
+ * are anymore messages
+ */
+ cmn_err(CE_NOTE, "ldc_read returned %d", rv);
+ continue;
+ }
+
+ len = read_size;
+
+ /* get payload size and alloc a buffer */
+
+ read_size = ((ds_hdr_t *)hbuf)->payload_len;
+ msg = kmem_zalloc((DS_HDR_SZ + read_size), KM_SLEEP);
+
+ /* move message header into buffer */
+
+ bcopy(hbuf, msg, DS_HDR_SZ);
+ currp = (char *)(msg) + DS_HDR_SZ;
+
+ /* read in the message body */
+
+ if ((rv = ds_recv_msg(ldc_hdl, currp, read_size)) != 0) {
+ /*
+ * failed to read message drop it and see if there
+ * are anymore messages
+ */
+ kmem_free(msg, (DS_HDR_SZ + read_size));
+ cmn_err(CE_NOTE, "ldc_read returned %d", rv);
+ continue;
+ }
+
+ len += read_size;
+ DS_DUMP_LDC_MSG(msg, len);
+
+ /*
+ * Send the message for processing, and store it
+ * in the log. The memory is deallocated only when
+ * the message is removed from the log.
+ */
+
+ devent = kmem_zalloc(sizeof (ds_event_t), KM_SLEEP);
+ devent->port = port;
+ devent->buf = (char *)msg;
+ devent->buflen = len;
+
+ /* log the message */
+ (void) ds_log_add_msg(DS_LOG_IN(port->id), msg, len);
+
+ /* send the message off to get processed in a new thread */
+ if (DS_DISPATCH(ds_dispatch_event, devent) == NULL) {
+ cmn_err(CE_WARN, "error initiating event handler");
+ continue;
+ }
+
+ }
+ mutex_exit(&port->lock);
+}
+
+static void
+ds_dispatch_event(void *arg)
+{
+ ds_event_t *event = (ds_event_t *)arg;
+ ds_hdr_t *hdr;
+ ds_port_t *port;
+
+ port = event->port;
+
+ hdr = (ds_hdr_t *)event->buf;
+
+ if (!DS_MSG_TYPE_VALID(hdr->msg_type)) {
+ cmn_err(CE_NOTE, "ds@%lx: dispatch_event: invalid msg "
+ "type (%d)", port->id, hdr->msg_type);
+ return;
+ }
+
+ DS_DBG("ds@%lx: dispatch_event: msg_type=%d\n", port->id,
+ hdr->msg_type);
+
+ (*ds_msg_handlers[hdr->msg_type])(port, event->buf, event->buflen);
+
+ kmem_free(event, sizeof (ds_event_t));
+}
+
+static void
+ds_handle_ldc_event(ds_port_t *port, int newstate)
+{
+ ldc_status_t oldstate = port->ldc.state;
+
+ ASSERT(MUTEX_HELD(&port->lock));
+
+ DS_DBG_LDC("ds@%lx: LDC state change: 0x%x -> 0x%x\n",
+ port->id, oldstate, newstate);
+
+ switch (newstate) {
+ case LDC_UP:
+ if ((oldstate == LDC_OPEN) || (oldstate == LDC_READY)) {
+ /* start the version negotiation */
+ ds_send_init_req(port);
+ } else {
+ DS_DBG_LDC("unsupported LDC state change\n");
+ }
+ break;
+
+ case LDC_READY:
+ case LDC_OPEN:
+ if (oldstate != LDC_UP) {
+ /* not worried about this state change */
+ break;
+ }
+
+ _NOTE(FALLTHROUGH)
+
+ default:
+ if (oldstate == LDC_UP) {
+ ds_port_reset(port);
+ } else {
+ DS_DBG_LDC("unsupported LDC state change\n");
+ }
+ break;
+ }
+
+ port->ldc.state = newstate;
+}
+
+/*
+ * Version negotiation is always initiated by the guest. Any
+ * attempt by a remote party to initiate the handshake gets
+ * nack'd with a major number equal to zero. This indicates
+ * that no version is supported since an init request is not
+ * expected.
+ */
+static void
+ds_handle_init_req(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_hdr_t *hdr;
+ ds_init_nack_t *nack;
+ char *msg;
+ size_t msglen;
+ ds_init_req_t *req;
+ size_t explen = DS_MSG_LEN(ds_init_req_t);
+
+ req = (ds_init_req_t *)(buf + DS_HDR_SZ);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <init_req: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ } else {
+ DS_DBG("ds@%lx: <init_req: ver=%d.%d\n", port->id,
+ req->major_vers, req->minor_vers);
+ }
+
+ DS_DBG("ds@%lx: init_nack>: major=0\n", port->id);
+
+ msglen = DS_MSG_LEN(ds_init_nack_t);
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_INIT_NACK;
+ hdr->payload_len = sizeof (ds_init_nack_t);
+
+ nack = (ds_init_nack_t *)(msg + DS_HDR_SZ);
+ nack->major_vers = 0;
+
+ /* send message */
+ mutex_enter(&port->lock);
+ (void) ds_send_msg(port, msg, msglen);
+ mutex_exit(&port->lock);
+}
+
+static void
+ds_handle_init_ack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_init_ack_t *ack;
+ ds_ver_t *ver;
+ size_t explen = DS_MSG_LEN(ds_init_ack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <init_ack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ ack = (ds_init_ack_t *)(buf + DS_HDR_SZ);
+
+ mutex_enter(&port->lock);
+
+ if (port->state != DS_PORT_INIT_REQ) {
+ cmn_err(CE_NOTE, "ds@%lx: <init_ack: invalid state for msg "
+ "(%d)", port->id, port->state);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ ver = &(ds_vers[port->ver_idx]);
+
+ DS_DBG("ds@%lx: <init_ack: req=v%d.%d, ack=v%d.%d\n", port->id,
+ ver->major, ver->minor, ver->major, ack->minor_vers);
+
+ /* agreed upon a major version */
+ port->ver.major = ver->major;
+
+ /*
+ * If the returned minor version is larger than
+ * the requested minor version, use the lower of
+ * the two, i.e. the requested version.
+ */
+ if (ack->minor_vers >= ver->minor) {
+ /*
+ * Use the minor version specified in the
+ * original request.
+ */
+ port->ver.minor = ver->minor;
+ } else {
+ /*
+ * Use the lower minor version returned in
+ * the ack. By defninition, all lower minor
+ * versions must be supported.
+ */
+ port->ver.minor = ack->minor_vers;
+ }
+
+ port->state = DS_PORT_READY;
+
+ DS_DBG("ds@%lx: <init_ack: port ready v%d.%d\n", port->id,
+ port->ver.major, port->ver.minor);
+
+ mutex_exit(&port->lock);
+
+ /*
+ * The port came up, so update all the services
+ * with this information. Follow that up with an
+ * attempt to register any service that is not
+ * already registered.
+ */
+ rw_enter(&ds_svcs.rwlock, RW_WRITER);
+
+ (void) ds_walk_svcs(ds_svc_port_up, port);
+ (void) ds_walk_svcs(ds_svc_register, NULL);
+
+ rw_exit(&ds_svcs.rwlock);
+}
+
+static void
+ds_handle_init_nack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ int idx;
+ ds_init_nack_t *nack;
+ ds_ver_t *ver;
+ size_t explen = DS_MSG_LEN(ds_init_nack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <init_nack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ nack = (ds_init_nack_t *)(buf + DS_HDR_SZ);
+
+ mutex_enter(&port->lock);
+
+ if (port->state != DS_PORT_INIT_REQ) {
+ cmn_err(CE_NOTE, "ds@%lx: <init_nack: invalid state for msg "
+ "(%d)", port->id, port->state);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ ver = &(ds_vers[port->ver_idx]);
+
+ DS_DBG("ds@%lx: <init_nack: req=v%d.%d, nack=v%d.x\n", port->id,
+ ver->major, ver->minor, nack->major_vers);
+
+ if (nack->major_vers == 0) {
+ /* no supported protocol version */
+ cmn_err(CE_NOTE, "ds@%lx: <init_nack: DS not supported",
+ port->id);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ /*
+ * Walk the version list, looking for a major version
+ * that is as close to the requested major version as
+ * possible.
+ */
+ for (idx = port->ver_idx; idx < DS_NUM_VER; idx++) {
+ if (ds_vers[idx].major <= nack->major_vers) {
+ /* found a version to try */
+ goto done;
+ }
+ }
+
+ if (idx == DS_NUM_VER) {
+ /* no supported version */
+ cmn_err(CE_NOTE, "ds@%lx: <init_nack: DS v%d.x not supported",
+ port->id, nack->major_vers);
+
+ mutex_exit(&port->lock);
+ return;
+ }
+
+done:
+ /* start the handshake again */
+ port->ver_idx = idx;
+ port->state = DS_PORT_LDC_INIT;
+
+ ds_send_init_req(port);
+
+ mutex_exit(&port->lock);
+}
+
+static void
+ds_handle_reg_req(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_hdr_t *hdr;
+ ds_reg_req_t *req;
+ ds_reg_nack_t *nack;
+ char *msg;
+ size_t msglen;
+ size_t explen = DS_MSG_LEN(ds_reg_req_t);
+
+ /* the request information */
+ req = (ds_reg_req_t *)(buf + DS_HDR_SZ);
+
+ /* sanity check the incoming message */
+ if (len < explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_req: invalid message length "
+ "(%ld), expected at least %ld", port->id, len, explen);
+ } else {
+ DS_DBG("ds@%lx: <reg_req: id='%s', ver=%d.%d, hdl=0x%lx\n",
+ port->id, req->svc_id, req->major_vers, req->minor_vers,
+ req->svc_handle);
+ }
+
+ DS_DBG("ds@%lx: reg_nack>: major=0\n", port->id);
+
+ msglen = DS_MSG_LEN(ds_reg_nack_t);
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_REG_NACK;
+ hdr->payload_len = sizeof (ds_reg_nack_t);
+
+ nack = (ds_reg_nack_t *)(msg + DS_HDR_SZ);
+ nack->svc_handle = req->svc_handle;
+ nack->result = DS_REG_VER_NACK;
+ nack->major_vers = 0;
+
+ /* send message */
+ mutex_enter(&port->lock);
+ (void) ds_send_msg(port, msg, msglen);
+ mutex_exit(&port->lock);
+}
+
+static void
+ds_handle_reg_ack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_reg_ack_t *ack;
+ ds_ver_t *ver;
+ ds_ver_t tmpver;
+ ds_svc_t *svc;
+ size_t explen = DS_MSG_LEN(ds_reg_ack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_ack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ ack = (ds_reg_ack_t *)(buf + DS_HDR_SZ);
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ /* lookup appropriate client */
+ if ((svc = ds_get_svc(ack->svc_handle)) == NULL) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_ack: invalid handle 0x%lx",
+ port->id, ack->svc_handle);
+ goto done;
+ }
+
+ /* make sure the message makes sense */
+ if (svc->state != DS_SVC_REG_PENDING) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_ack: invalid state for message "
+ "(%d)", port->id, svc->state);
+ goto done;
+ }
+
+ ver = &(svc->cap.vers[svc->ver_idx]);
+
+ DS_DBG("ds@%lx: <reg_ack: hdl=0x%lx, ack=v%d.%d\n", port->id,
+ ack->svc_handle, ver->major, ack->minor_vers);
+
+ /* major version has been agreed upon */
+ svc->ver.major = ver->major;
+
+ if (ack->minor_vers >= ver->minor) {
+ /*
+ * Use the minor version specified in the
+ * original request.
+ */
+ svc->ver.minor = ver->minor;
+ } else {
+ /*
+ * Use the lower minor version returned in
+ * the ack. By defninition, all lower minor
+ * versions must be supported.
+ */
+ svc->ver.minor = ack->minor_vers;
+ }
+
+ svc->state = DS_SVC_ACTIVE;
+
+ DS_DBG("ds@%lx: <reg_ack: %s v%d.%d ready, hdl=0x%lx\n", port->id,
+ svc->cap.svc_id, svc->ver.major, svc->ver.minor, svc->hdl);
+
+ /* notify the client that registration is complete */
+ if (svc->ops.ds_reg_cb) {
+ /*
+ * Use a temporary version structure so that
+ * the copy in the svc structure cannot be
+ * modified by the client.
+ */
+ tmpver.major = svc->ver.major;
+ tmpver.minor = svc->ver.minor;
+
+ (*svc->ops.ds_reg_cb)(svc->ops.cb_arg, &tmpver, svc->hdl);
+ }
+
+done:
+ rw_exit(&ds_svcs.rwlock);
+}
+
+static void
+ds_handle_reg_nack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_reg_nack_t *nack;
+ ds_svc_t *svc;
+ int idx;
+ size_t explen = DS_MSG_LEN(ds_reg_nack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_nack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ nack = (ds_reg_nack_t *)(buf + DS_HDR_SZ);
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ /* lookup appropriate client */
+ if ((svc = ds_get_svc(nack->svc_handle)) == NULL) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_nack: invalid handle 0x%lx",
+ port->id, nack->svc_handle);
+ goto done;
+ }
+
+ /* make sure the message makes sense */
+ if (svc->state != DS_SVC_REG_PENDING) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_nack: invalid state for message "
+ "(%d)", port->id, svc->state);
+ goto done;
+ }
+
+ if (nack->result == DS_REG_DUP) {
+ cmn_err(CE_NOTE, "ds@%lx: <reg_nack: duplicate registration "
+ "for %s", port->id, svc->cap.svc_id);
+ goto done;
+ }
+
+ /*
+ * A major version of zero indicates that the
+ * service is not supported at all.
+ */
+ if (nack->major_vers == 0) {
+ DS_DBG("ds@%lx: <reg_nack: %s not supported\n", port->id,
+ svc->cap.svc_id);
+ ds_reset_svc(svc, port);
+ goto done;
+ }
+
+ DS_DBG("ds@%lx: <reg_nack: hdl=0x%lx, nack=%d.x\n", port->id,
+ nack->svc_handle, nack->major_vers);
+
+ /*
+ * Walk the version list for the service, looking for
+ * a major version that is as close to the requested
+ * major version as possible.
+ */
+ for (idx = svc->ver_idx; idx < svc->cap.nvers; idx++) {
+ if (svc->cap.vers[idx].major <= nack->major_vers) {
+ /* found a version to try */
+ break;
+ }
+ }
+
+ if (idx == svc->cap.nvers) {
+ /* no supported version */
+ DS_DBG("ds@%lx: <reg_nack: %s v%d.x not supported\n",
+ port->id, svc->cap.svc_id, nack->major_vers);
+ svc->state = DS_SVC_INACTIVE;
+ goto done;
+ }
+
+ /* start the handshake again */
+ svc->state = DS_SVC_INACTIVE;
+ svc->ver_idx = idx;
+
+ (void) ds_svc_register(svc, NULL);
+
+done:
+ rw_exit(&ds_svcs.rwlock);
+}
+
+static void
+ds_handle_unreg_req(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_hdr_t *hdr;
+ ds_unreg_req_t *req;
+ ds_unreg_ack_t *ack;
+ ds_svc_t *svc;
+ char *msg;
+ size_t msglen;
+ size_t explen = DS_MSG_LEN(ds_unreg_req_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <unreg_req: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ /* the request information */
+ req = (ds_unreg_req_t *)(buf + DS_HDR_SZ);
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ /* lookup appropriate client */
+ if ((svc = ds_get_svc(req->svc_handle)) == NULL) {
+ cmn_err(CE_NOTE, "ds@%lx: <unreg_req: invalid handle "
+ "0x%lx", port->id, req->svc_handle);
+ ds_send_unreg_nack(port, req->svc_handle);
+ goto done;
+ }
+
+ /* unregister the service */
+ (void) ds_svc_unregister(svc, svc->port);
+
+ DS_DBG("ds@%lx: unreg_ack>: hdl=0x%lx\n", port->id, req->svc_handle);
+
+ msglen = DS_HDR_SZ + sizeof (ds_unreg_ack_t);
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_UNREG_ACK;
+ hdr->payload_len = sizeof (ds_unreg_ack_t);
+
+ ack = (ds_unreg_ack_t *)(msg + DS_HDR_SZ);
+ ack->svc_handle = req->svc_handle;
+
+ /* send message */
+ mutex_enter(&port->lock);
+ (void) ds_send_msg(port, msg, msglen);
+ mutex_enter(&port->lock);
+
+done:
+ rw_exit(&ds_svcs.rwlock);
+}
+
+static void
+ds_handle_unreg_ack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_unreg_ack_t *ack;
+ size_t explen = DS_MSG_LEN(ds_unreg_ack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <unreg_ack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ ack = (ds_unreg_ack_t *)(buf + DS_HDR_SZ);
+
+ DS_DBG("ds@%lx: <unreg_ack: hdl=0x%lx\n", port->id, ack->svc_handle);
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ /*
+ * Since the unregister request was initiated locally,
+ * the service structure has already been torn down.
+ * Just perform a sanity check to make sure the message
+ * is appropriate.
+ */
+ if (ds_get_svc(ack->svc_handle) != NULL) {
+ cmn_err(CE_NOTE, "ds@%lx: <unreg_ack: handle 0x%lx still "
+ "in use", port->id, ack->svc_handle);
+ }
+
+ rw_exit(&ds_svcs.rwlock);
+}
+
+static void
+ds_handle_unreg_nack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_unreg_nack_t *nack;
+ size_t explen = DS_MSG_LEN(ds_unreg_nack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <unreg_nack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ nack = (ds_unreg_nack_t *)(buf + DS_HDR_SZ);
+
+ DS_DBG("ds@%lx: <unreg_nack: hdl=0x%lx\n", port->id,
+ nack->svc_handle);
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ /*
+ * Since the unregister request was initiated locally,
+ * the service structure has already been torn down.
+ * Just perform a sanity check to make sure the message
+ * is appropriate.
+ */
+ if (ds_get_svc(nack->svc_handle) != NULL) {
+ cmn_err(CE_NOTE, "ds@%lx: <unreg_nack: handle 0x%lx still "
+ "in use", port->id, nack->svc_handle);
+ }
+
+ rw_exit(&ds_svcs.rwlock);
+}
+
+static void
+ds_handle_data(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_data_handle_t *data;
+ ds_svc_t *svc;
+ char *msg;
+ int msgsz;
+ int hdrsz;
+ size_t explen = DS_MSG_LEN(ds_data_handle_t);
+
+ /* sanity check the incoming message */
+ if (len < explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <data: invalid message length "
+ "(%ld), expected at least %ld", port->id, len, explen);
+ return;
+ }
+
+ data = (ds_data_handle_t *)(buf + DS_HDR_SZ);
+
+ hdrsz = DS_HDR_SZ + sizeof (ds_data_handle_t);
+ msgsz = len - hdrsz;
+
+ /* strip off the header for the client */
+ msg = (msgsz) ? (buf + hdrsz) : NULL;
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ /* lookup appropriate client */
+ if ((svc = ds_get_svc(data->svc_handle)) == NULL) {
+ cmn_err(CE_NOTE, "ds@%lx: <data: invalid handle 0x%lx",
+ port->id, data->svc_handle);
+ ds_send_data_nack(port, data->svc_handle);
+ return;
+ }
+
+ rw_exit(&ds_svcs.rwlock);
+
+ DS_DBG("ds@%lx: <data: client=%s hdl=0x%lx\n", port->id,
+ (svc->cap.svc_id) ? svc->cap.svc_id : "NULL", svc->hdl);
+
+ /* dispatch this message to the client */
+ (*svc->ops.ds_data_cb)(svc->ops.cb_arg, msg, msgsz);
+}
+
+static void
+ds_handle_nack(ds_port_t *port, caddr_t buf, size_t len)
+{
+ ds_svc_t *svc;
+ ds_data_nack_t *nack;
+ size_t explen = DS_MSG_LEN(ds_data_nack_t);
+
+ /* sanity check the incoming message */
+ if (len != explen) {
+ cmn_err(CE_NOTE, "ds@%lx: <data_nack: invalid message length "
+ "(%ld), expected %ld", port->id, len, explen);
+ return;
+ }
+
+ nack = (ds_data_nack_t *)(buf + DS_HDR_SZ);
+
+ DS_DBG("ds@%lx: data_nack: hdl=0x%lx, result=0x%lx\n", port->id,
+ nack->svc_handle, nack->result);
+
+ if (nack->result == DS_INV_HDL) {
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ if ((svc = ds_get_svc(nack->svc_handle)) == NULL) {
+ rw_exit(&ds_svcs.rwlock);
+ return;
+ }
+
+ cmn_err(CE_NOTE, "ds@%lx: <data_nack: handle 0x%lx reported "
+ "as invalid", port->id, nack->svc_handle);
+
+ (void) ds_svc_unregister(svc, svc->port);
+
+ rw_exit(&ds_svcs.rwlock);
+ }
+}
+
+static int
+ds_send_msg(ds_port_t *port, caddr_t msg, size_t msglen)
+{
+ int rv;
+ caddr_t currp = msg;
+ size_t amt_left = msglen;
+ int loopcnt = 0;
+
+ DS_DUMP_LDC_MSG(msg, msglen);
+ (void) ds_log_add_msg(DS_LOG_OUT(port->id), (uint8_t *)msg, msglen);
+
+ /*
+ * ensure that no other messages can be sent on this port in case
+ * the write doesn't get sent with one write to guarantee that the
+ * message doesn't become fragmented.
+ */
+ ASSERT(MUTEX_HELD(&port->lock));
+
+ /* send the message */
+ do {
+ if ((rv = ldc_write(port->ldc.hdl, currp, &msglen)) != 0) {
+ if ((rv == EWOULDBLOCK) && (loopcnt++ < 1000)) {
+ /*
+ * don't try for more than a sec. Something
+ * is wrong with the channel.
+ */
+ delay(drv_usectohz(10000)); /* 1/1000 sec */
+ } else {
+ cmn_err(CE_WARN,
+ "ds@%lx: send_msg: ldc_write failed (%d)",
+ port->id, rv);
+ return (rv);
+ }
+ } else {
+ amt_left -= msglen;
+ currp += msglen;
+ msglen = amt_left;
+ loopcnt = 0;
+ }
+ } while (amt_left > 0);
+
+ return (rv);
+}
+
+static void
+ds_send_init_req(ds_port_t *port)
+{
+ ds_hdr_t *hdr;
+ ds_init_req_t *init_req;
+ size_t nbytes;
+ ds_ver_t *vers = &ds_vers[port->ver_idx];
+
+ ASSERT(MUTEX_HELD(&port->lock));
+
+ if (port->state != DS_PORT_LDC_INIT) {
+ cmn_err(CE_NOTE, "ds@%lx: init_req>: invalid port state (%d)",
+ port->id, port->state);
+ return;
+ }
+
+ DS_DBG("ds@%lx: init_req>: req=v%d.%d\n", port->id, vers->major,
+ vers->minor);
+
+ nbytes = DS_HDR_SZ + sizeof (ds_init_req_t);
+ hdr = kmem_zalloc(nbytes, KM_SLEEP);
+
+ hdr->msg_type = DS_INIT_REQ;
+ hdr->payload_len = sizeof (ds_init_req_t);
+
+ init_req = (ds_init_req_t *)((caddr_t)hdr + DS_HDR_SZ);
+ init_req->major_vers = vers->major;
+ init_req->minor_vers = vers->minor;
+
+ /* send the message */
+ if (ds_send_msg(port, (caddr_t)hdr, nbytes) == 0) {
+ port->state = DS_PORT_INIT_REQ;
+ }
+}
+
+static int
+ds_send_reg_req(ds_svc_t *svc)
+{
+ ds_port_t *port = svc->port;
+ ds_ver_t *ver;
+ ds_hdr_t *hdr;
+ caddr_t msg;
+ size_t msglen;
+ size_t nbytes;
+ ds_reg_req_t *req;
+ size_t idlen;
+
+ /* assumes some checking has already occurred */
+ ASSERT(svc->state == DS_SVC_INACTIVE);
+
+ mutex_enter(&port->lock);
+
+ /* check on the LDC to Zeus */
+ if (port->ldc.state != LDC_UP) {
+ /* can not send message */
+ DS_DBG("ds@%lx: reg_req>: channel %ld is not up\n", port->id,
+ port->ldc.id);
+ mutex_exit(&port->lock);
+ return (-1);
+ }
+
+ /* make sure port is ready */
+ if (port->state != DS_PORT_READY) {
+ /* can not send message */
+ DS_DBG("ds@%lx: reg_req>: port is not ready\n", port->id);
+ mutex_exit(&port->lock);
+ return (-1);
+ }
+
+ mutex_exit(&port->lock);
+
+ /* allocate the message buffer */
+ idlen = strlen(svc->cap.svc_id);
+ msglen = DS_HDR_SZ + sizeof (ds_reg_req_t) + idlen;
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ /* copy in the header data */
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_REG_REQ;
+ hdr->payload_len = sizeof (ds_reg_req_t) + idlen;
+
+ req = (ds_reg_req_t *)(msg + DS_HDR_SZ);
+ req->svc_handle = svc->hdl;
+ ver = &(svc->cap.vers[svc->ver_idx]);
+ req->major_vers = ver->major;
+ req->minor_vers = ver->minor;
+
+ /* copy in the service id */
+ bcopy(svc->cap.svc_id, req->svc_id, idlen + 1);
+
+ /* send the message */
+ DS_DBG("ds@%lx: reg_req>: id='%s', ver=%d.%d, hdl=0x%lx\n", port->id,
+ svc->cap.svc_id, ver->major, ver->minor, svc->hdl);
+
+ nbytes = msglen;
+ mutex_enter(&port->lock);
+ if (ds_send_msg(port, msg, nbytes) != 0) {
+ mutex_exit(&port->lock);
+ return (-1);
+ } else {
+ svc->state = DS_SVC_REG_PENDING;
+ }
+ mutex_exit(&port->lock);
+
+ return (0);
+}
+
+static int
+ds_send_unreg_req(ds_svc_t *svc)
+{
+ caddr_t msg;
+ size_t msglen;
+ size_t nbytes;
+ ds_hdr_t *hdr;
+ ds_unreg_req_t *req;
+ ds_port_t *port = svc->port;
+
+ if (port == NULL) {
+ DS_DBG("send_unreg_req: service '%s' not associated with "
+ "a port\n", svc->cap.svc_id);
+ return (-1);
+ }
+
+ mutex_enter(&port->lock);
+
+ /* check on the LDC to Zeus */
+ if (port->ldc.state != LDC_UP) {
+ /* can not send message */
+ cmn_err(CE_NOTE, "ds@%lx: unreg_req>: channel %ld is not up",
+ port->id, port->ldc.id);
+ mutex_exit(&port->lock);
+ return (-1);
+ }
+
+ /* make sure port is ready */
+ if (port->state != DS_PORT_READY) {
+ /* can not send message */
+ cmn_err(CE_NOTE, "ds@%lx: unreg_req>: port is not ready",
+ port->id);
+ mutex_exit(&port->lock);
+ return (-1);
+ }
+
+ mutex_exit(&port->lock);
+
+ msglen = DS_HDR_SZ + sizeof (ds_unreg_req_t);
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ /* copy in the header data */
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_UNREG;
+ hdr->payload_len = sizeof (ds_unreg_req_t);
+
+ req = (ds_unreg_req_t *)(msg + DS_HDR_SZ);
+ req->svc_handle = svc->hdl;
+
+ /* send the message */
+ DS_DBG("ds@%lx: unreg_req>: id='%s', hdl=0x%lx\n", port->id,
+ (svc->cap.svc_id) ? svc->cap.svc_id : "NULL", svc->hdl);
+
+ nbytes = msglen;
+ mutex_enter(&port->lock);
+ if (ds_send_msg(port, msg, nbytes) != 0) {
+ mutex_exit(&port->lock);
+ return (-1);
+ }
+ mutex_exit(&port->lock);
+
+ return (0);
+}
+
+static void
+ds_send_unreg_nack(ds_port_t *port, ds_svc_hdl_t bad_hdl)
+{
+ caddr_t msg;
+ size_t msglen;
+ size_t nbytes;
+ ds_hdr_t *hdr;
+ ds_unreg_nack_t *nack;
+
+ mutex_enter(&port->lock);
+
+ /* check on the LDC to Zeus */
+ if (port->ldc.state != LDC_UP) {
+ /* can not send message */
+ cmn_err(CE_NOTE, "ds@%lx: unreg_nack>: channel %ld is not up",
+ port->id, port->ldc.id);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ /* make sure port is ready */
+ if (port->state != DS_PORT_READY) {
+ /* can not send message */
+ cmn_err(CE_NOTE, "ds@%lx: unreg_nack>: port is not ready",
+ port->id);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ mutex_exit(&port->lock);
+
+ msglen = DS_HDR_SZ + sizeof (ds_unreg_nack_t);
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ /* copy in the header data */
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_UNREG_NACK;
+ hdr->payload_len = sizeof (ds_unreg_nack_t);
+
+ nack = (ds_unreg_nack_t *)(msg + DS_HDR_SZ);
+ nack->svc_handle = bad_hdl;
+
+ /* send the message */
+ DS_DBG("ds@%lx: unreg_nack>: hdl=0x%lx\n", port->id, bad_hdl);
+
+ nbytes = msglen;
+ mutex_enter(&port->lock);
+ (void) ds_send_msg(port, msg, nbytes);
+ mutex_exit(&port->lock);
+}
+
+static void
+ds_send_data_nack(ds_port_t *port, ds_svc_hdl_t bad_hdl)
+{
+ caddr_t msg;
+ size_t msglen;
+ size_t nbytes;
+ ds_hdr_t *hdr;
+ ds_data_nack_t *nack;
+
+ mutex_enter(&port->lock);
+
+ /* check on the LDC to Zeus */
+ if (port->ldc.state != LDC_UP) {
+ /* can not send message */
+ cmn_err(CE_NOTE, "ds@%lx: data_nack>: channel %ld is not up",
+ port->id, port->ldc.id);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ /* make sure port is ready */
+ if (port->state != DS_PORT_READY) {
+ /* can not send message */
+ cmn_err(CE_NOTE, "ds@%lx: data_nack>: port is not ready",
+ port->id);
+ mutex_exit(&port->lock);
+ return;
+ }
+
+ mutex_exit(&port->lock);
+
+ msglen = DS_HDR_SZ + sizeof (ds_data_nack_t);
+ msg = kmem_zalloc(msglen, KM_SLEEP);
+
+ /* copy in the header data */
+ hdr = (ds_hdr_t *)msg;
+ hdr->msg_type = DS_NACK;
+ hdr->payload_len = sizeof (ds_data_nack_t);
+
+ nack = (ds_data_nack_t *)(msg + DS_HDR_SZ);
+ nack->svc_handle = bad_hdl;
+ nack->result = DS_INV_HDL;
+
+ /* send the message */
+ DS_DBG("ds@%lx: data_nack>: hdl=0x%lx\n", port->id, bad_hdl);
+
+ nbytes = msglen;
+ mutex_enter(&port->lock);
+ (void) ds_send_msg(port, msg, nbytes);
+ mutex_exit(&port->lock);
+}
+
+#ifdef DEBUG
+
+#define BYTESPERLINE 8
+#define LINEWIDTH ((BYTESPERLINE * 3) + (BYTESPERLINE + 2) + 1)
+#define ASCIIOFFSET ((BYTESPERLINE * 3) + 2)
+#define ISPRINT(c) ((c >= ' ') && (c <= '~'))
+
+/*
+ * Output a buffer formatted with a set number of bytes on
+ * each line. Append each line with the ASCII equivalent of
+ * each byte if it falls within the printable ASCII range,
+ * and '.' otherwise.
+ */
+static void
+ds_dump_ldc_msg(void *vbuf, size_t len)
+{
+ int i, j;
+ char *curr;
+ char *aoff;
+ char line[LINEWIDTH];
+ uint8_t *buf = vbuf;
+
+ /* abort if not debugging ldc */
+ if (!(ds_debug & DS_DBG_FLAG_LDC)) {
+ return;
+ }
+
+ /* walk the buffer one line at a time */
+ for (i = 0; i < len; i += BYTESPERLINE) {
+
+ bzero(line, LINEWIDTH);
+
+ curr = line;
+ aoff = line + ASCIIOFFSET;
+
+ /*
+ * Walk the bytes in the current line, storing
+ * the hex value for the byte as well as the
+ * ASCII representation in a temporary buffer.
+ * All ASCII values are placed at the end of
+ * the line.
+ */
+ for (j = 0; (j < BYTESPERLINE) && ((i + j) < len); j++) {
+ (void) sprintf(curr, " %02x", buf[i + j]);
+ *aoff = (ISPRINT(buf[i + j])) ? buf[i + j] : '.';
+ curr += 3;
+ aoff++;
+ }
+
+ /*
+ * Fill in to the start of the ASCII translation
+ * with spaces. This will only be necessary if
+ * this is the last line and there are not enough
+ * bytes to fill the whole line.
+ */
+ while (curr != (line + ASCIIOFFSET))
+ *curr++ = ' ';
+
+ DS_DBG_LDC("%s\n", line);
+ }
+}
+#endif /* DEBUG */
+
+
+/*
+ * Walk the table of registered services, executing the specified
+ * callback function for each service. A non-zero return value from
+ * the callback is used to terminate the walk, not to indicate an
+ * error. Returns the index of the last service visited.
+ */
+static int
+ds_walk_svcs(svc_cb_t svc_cb, void *arg)
+{
+ int idx;
+ ds_svc_t *svc;
+
+ ASSERT(RW_LOCK_HELD(&ds_svcs.rwlock));
+
+ /* walk every table entry */
+ for (idx = 0; idx < ds_svcs.maxsvcs; idx++) {
+
+ svc = ds_svcs.tbl[idx];
+
+ /* execute the callback */
+ if ((*svc_cb)(svc, arg) != 0)
+ break;
+ }
+
+ return (idx);
+}
+
+static int
+ds_svc_isfree(ds_svc_t *svc, void *arg)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ /*
+ * Looking for a free service. This may be a NULL entry
+ * in the table, or an unused structure that could be
+ * reused.
+ */
+
+ if (DS_SVC_ISFREE(svc)) {
+ /* yes, it is free */
+ return (1);
+ }
+
+ /* not a candidate */
+ return (0);
+}
+
+static int
+ds_svc_ismatch(ds_svc_t *svc, void *arg)
+{
+ if (DS_SVC_ISFREE(svc)) {
+ return (0);
+ }
+
+ if (strcmp(svc->cap.svc_id, arg) == 0) {
+ /* found a match */
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+ds_svc_free(ds_svc_t *svc, void *arg)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ if (svc == NULL) {
+ return (0);
+ }
+
+ if (svc->cap.svc_id) {
+ kmem_free(svc->cap.svc_id, strlen(svc->cap.svc_id) + 1);
+ svc->cap.svc_id = NULL;
+ }
+
+ if (svc->cap.vers) {
+ kmem_free(svc->cap.vers, svc->cap.nvers * sizeof (ds_ver_t));
+ svc->cap.vers = NULL;
+ }
+
+ kmem_free(svc, sizeof (ds_svc_t));
+
+ return (0);
+}
+
+static int
+ds_svc_register(ds_svc_t *svc, void *arg)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ int idx;
+
+ /* check the state of the service */
+ if (DS_SVC_ISFREE(svc) || (svc->state != DS_SVC_INACTIVE))
+ return (0);
+
+ /* check if there are any ports to try */
+ if (DS_PORTSET_ISNULL(svc->avail))
+ return (0);
+
+ /*
+ * Attempt to register the service. Start with the lowest
+ * numbered port and continue until a registration message
+ * is sent successfully, or there are no ports left to try.
+ */
+ for (idx = 0; idx < DS_MAX_PORTS; idx++) {
+
+ /*
+ * If the port is not in the available list,
+ * it is not a candidate for registration.
+ */
+ if (!DS_PORT_IN_SET(svc->avail, idx)) {
+ continue;
+ }
+
+ svc->port = &ds_ports[idx];
+ if (ds_send_reg_req(svc) == 0) {
+ /* register sent successfully */
+ break;
+ }
+
+ /* reset the service to try the next port */
+ ds_reset_svc(svc, svc->port);
+ }
+
+ return (0);
+}
+
+static int
+ds_svc_unregister(ds_svc_t *svc, void *arg)
+{
+ ds_port_t *port = (ds_port_t *)arg;
+
+ if (DS_SVC_ISFREE(svc)) {
+ return (0);
+ }
+
+ /* make sure the service is using this port */
+ if (svc->port != port) {
+ return (0);
+ }
+
+ /* reset the service structure */
+ ds_reset_svc(svc, port);
+
+ /* increment the count in the handle to prevent reuse */
+ svc->hdl = DS_ALLOC_HDL(DS_HDL2IDX(svc->hdl), DS_HDL2COUNT(svc->hdl));
+
+ /* call the client unregister callback */
+ if (svc->ops.ds_unreg_cb)
+ (*svc->ops.ds_unreg_cb)(svc->ops.cb_arg);
+
+ /* try to initiate a new registration */
+ (void) ds_svc_register(svc, NULL);
+
+ return (0);
+}
+
+static int
+ds_svc_port_up(ds_svc_t *svc, void *arg)
+{
+ ds_port_t *port = (ds_port_t *)arg;
+
+ if (DS_SVC_ISFREE(svc)) {
+ /* nothing to do */
+ return (0);
+ }
+
+ DS_PORTSET_ADD(svc->avail, port->id);
+
+ return (0);
+}
+
+static ds_svc_t *
+ds_alloc_svc(void)
+{
+ int idx;
+ uint_t newmaxsvcs;
+ ds_svc_t **newtbl;
+ ds_svc_t *newsvc;
+
+ ASSERT(RW_WRITE_HELD(&ds_svcs.rwlock));
+
+ idx = ds_walk_svcs(ds_svc_isfree, NULL);
+
+ if (idx != ds_svcs.maxsvcs) {
+ goto found;
+ }
+
+ /*
+ * There was no free space in the table. Grow
+ * the table to double its current size.
+ */
+ newmaxsvcs = ds_svcs.maxsvcs * 2;
+ newtbl = kmem_zalloc(newmaxsvcs * sizeof (ds_svc_t *), KM_SLEEP);
+
+ /* copy old table data to the new table */
+ for (idx = 0; idx < ds_svcs.maxsvcs; idx++) {
+ newtbl[idx] = ds_svcs.tbl[idx];
+ }
+
+ /* clean up the old table */
+ kmem_free(ds_svcs.tbl, ds_svcs.maxsvcs * sizeof (ds_svc_t *));
+ ds_svcs.tbl = newtbl;
+ ds_svcs.maxsvcs = newmaxsvcs;
+
+ /* search for a free space again */
+ idx = ds_walk_svcs(ds_svc_isfree, NULL);
+
+ /* the table is locked so should find a free slot */
+ ASSERT(idx != ds_svcs.maxsvcs);
+
+found:
+ /* allocate a new svc structure if necessary */
+ if ((newsvc = ds_svcs.tbl[idx]) == NULL) {
+ /* allocate a new service */
+ newsvc = kmem_zalloc(sizeof (ds_svc_t), KM_SLEEP);
+ ds_svcs.tbl[idx] = newsvc;
+ }
+
+ /* fill in the handle */
+ newsvc->hdl = DS_ALLOC_HDL(idx, DS_HDL2COUNT(newsvc->hdl));
+
+ return (newsvc);
+}
+
+static void
+ds_reset_svc(ds_svc_t *svc, ds_port_t *port)
+{
+ svc->state = DS_SVC_INACTIVE;
+ svc->ver_idx = 0;
+ svc->ver.major = 0;
+ svc->ver.minor = 0;
+ svc->port = NULL;
+ DS_PORTSET_DEL(svc->avail, port->id);
+}
+
+static ds_svc_t *
+ds_get_svc(ds_svc_hdl_t hdl)
+{
+ int idx;
+ ds_svc_t *svc;
+
+ ASSERT(RW_LOCK_HELD(&ds_svcs.rwlock));
+
+ if (hdl == DS_INVALID_HDL)
+ return (NULL);
+
+ idx = DS_HDL2IDX(hdl);
+
+ /* check if index is out of bounds */
+ if ((idx < 0) || (idx >= ds_svcs.maxsvcs))
+ return (NULL);
+
+ svc = ds_svcs.tbl[idx];
+
+ /* check for a valid service */
+ if (DS_SVC_ISFREE(svc))
+ return (NULL);
+
+ /* make sure the handle is an exact match */
+ if (svc->hdl != hdl)
+ return (NULL);
+
+ return (svc);
+}
+
+static int
+ds_port_add(md_t *mdp, mde_cookie_t port, mde_cookie_t chan)
+{
+ ds_port_t *newport;
+ uint64_t port_id;
+ uint64_t ldc_id;
+
+ /* get the ID for this port */
+ if (md_get_prop_val(mdp, port, "id", &port_id) != 0) {
+ cmn_err(CE_NOTE, "ds_port_add: port 'id' property not found");
+ return (-1);
+ }
+
+ /* sanity check the port id */
+ if (port_id > DS_MAX_PORT_ID) {
+ cmn_err(CE_WARN, "ds_port_add: port ID %ld out of range",
+ port_id);
+ return (-1);
+ }
+
+ DS_DBG("ds_port_add: adding port ds@%ld\n", port_id);
+
+ /* get the channel ID for this port */
+ if (md_get_prop_val(mdp, chan, "id", &ldc_id) != 0) {
+ cmn_err(CE_NOTE, "ds@%lx: add_port: no channel 'id' property",
+ port_id);
+ return (-1);
+ }
+
+ /* get the port structure from the array of ports */
+ newport = &ds_ports[port_id];
+
+ /* check for a duplicate port in the MD */
+ if (newport->state != DS_PORT_FREE) {
+ cmn_err(CE_NOTE, "ds@%lx: add_port: port already exists",
+ port_id);
+ return (-1);
+ }
+
+ /* initialize the port lock */
+ mutex_init(&newport->lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* initialize the port */
+ newport->id = port_id;
+ newport->state = DS_PORT_INIT;
+ newport->ldc.id = ldc_id;
+
+ /* add the port to the set of all ports */
+ DS_PORTSET_ADD(ds_allports, port_id);
+
+ return (0);
+}
+
+static void
+ds_port_reset(ds_port_t *port)
+{
+ ASSERT(MUTEX_HELD(&port->lock));
+
+ /* connection went down, mark everything inactive */
+ rw_enter(&ds_svcs.rwlock, RW_WRITER);
+
+ (void) ds_walk_svcs(ds_svc_unregister, port);
+
+ rw_exit(&ds_svcs.rwlock);
+
+ port->ver_idx = 0;
+ port->ver.major = 0;
+ port->ver.minor = 0;
+ port->state = DS_PORT_LDC_INIT;
+}
+
+/*
+ * Verify that a version array is sorted as expected for the
+ * version negotiation to work correctly.
+ */
+static ds_vers_check_t
+ds_vers_isvalid(ds_ver_t *vers, int nvers)
+{
+ uint16_t curr_major;
+ uint16_t curr_minor;
+ int idx;
+
+ curr_major = vers[0].major;
+ curr_minor = vers[0].minor;
+
+ /*
+ * Walk the version array, verifying correct ordering.
+ * The array must be sorted from highest supported
+ * version to lowest supported version.
+ */
+ for (idx = 0; idx < nvers; idx++) {
+ if (vers[idx].major > curr_major) {
+ DS_DBG("vers_isvalid: version array has increasing "
+ "major versions\n");
+ return (DS_VERS_INCREASING_MAJOR_ERR);
+ }
+
+ if (vers[idx].major < curr_major) {
+ curr_major = vers[idx].major;
+ curr_minor = vers[idx].minor;
+ continue;
+ }
+
+ if (vers[idx].minor > curr_minor) {
+ DS_DBG("vers_isvalid: version array has increasing "
+ "minor versions\n");
+ return (DS_VERS_INCREASING_MINOR_ERR);
+ }
+
+ curr_minor = vers[idx].minor;
+ }
+
+ return (DS_VERS_OK);
+}
+
+/*
+ * Logging Support
+ */
+static void
+ds_log_init(void)
+{
+ ds_log_entry_t *new;
+
+ /* initialize global lock */
+ mutex_init(&ds_log.lock, NULL, MUTEX_DRIVER, NULL);
+
+ mutex_enter(&ds_log.lock);
+
+ /* initialize the log */
+ ds_log.head = NULL;
+ ds_log.size = 0;
+ ds_log.nentry = 0;
+
+ /* initialize the free list */
+ for (new = ds_log_entry_pool; new < DS_LOG_POOL_END; new++) {
+ new->next = ds_log.freelist;
+ ds_log.freelist = new;
+ }
+
+ mutex_exit(&ds_log.lock);
+
+ DS_DBG_LOG("ds_log initialized: size=%d bytes, limit=%d bytes, "
+ "ninit=%ld\n", ds_log_sz, DS_LOG_LIMIT, DS_LOG_NPOOL);
+}
+
+static void
+ds_log_fini(void)
+{
+ ds_log_entry_t *next;
+
+ mutex_enter(&ds_log.lock);
+
+ /* clear out the log */
+ while (ds_log.nentry > 0)
+ (void) ds_log_remove();
+
+ /*
+ * Now all the entries are on the free list.
+ * Clear out the free list, deallocating any
+ * entry that was dynamically allocated.
+ */
+ while (ds_log.freelist != NULL) {
+ next = ds_log.freelist->next;
+
+ if (!DS_IS_POOL_ENTRY(ds_log.freelist)) {
+ kmem_free(ds_log.freelist, sizeof (ds_log_entry_t));
+ }
+
+ ds_log.freelist = next;
+ }
+
+ mutex_exit(&ds_log.lock);
+
+ mutex_destroy(&ds_log.lock);
+}
+
+static ds_log_entry_t *
+ds_log_entry_alloc(void)
+{
+ ds_log_entry_t *new = NULL;
+
+ ASSERT(MUTEX_HELD(&ds_log.lock));
+
+ if (ds_log.freelist != NULL) {
+ new = ds_log.freelist;
+ ds_log.freelist = ds_log.freelist->next;
+ }
+
+ if (new == NULL) {
+ /* free list was empty */
+ new = kmem_zalloc(sizeof (ds_log_entry_t), KM_SLEEP);
+ }
+
+ ASSERT(new);
+
+ return (new);
+}
+
+static void
+ds_log_entry_free(ds_log_entry_t *entry)
+{
+ ASSERT(MUTEX_HELD(&ds_log.lock));
+
+ if (entry == NULL)
+ return;
+
+ if (entry->data != NULL) {
+ kmem_free(entry->data, entry->datasz);
+ entry->data = NULL;
+ }
+
+ /* place entry on the free list */
+ entry->next = ds_log.freelist;
+ ds_log.freelist = entry;
+}
+
+/*
+ * Add a message to the end of the log
+ */
+static int
+ds_log_add(ds_log_entry_t *new)
+{
+ ASSERT(MUTEX_HELD(&ds_log.lock));
+
+ if (ds_log.head == NULL) {
+
+ new->prev = new;
+ new->next = new;
+
+ ds_log.head = new;
+ } else {
+ ds_log_entry_t *head = ds_log.head;
+ ds_log_entry_t *tail = ds_log.head->prev;
+
+ new->next = head;
+ new->prev = tail;
+ tail->next = new;
+ head->prev = new;
+ }
+
+ /* increase the log size, including the metadata size */
+ ds_log.size += DS_LOG_ENTRY_SZ(new);
+ ds_log.nentry++;
+
+ DS_DBG_LOG("ds_log: added %ld data bytes, %ld total bytes\n",
+ new->datasz, DS_LOG_ENTRY_SZ(new));
+
+ return (0);
+}
+
+/*
+ * Remove an entry from the head of the log
+ */
+static int
+ds_log_remove(void)
+{
+ ds_log_entry_t *head;
+
+ ASSERT(MUTEX_HELD(&ds_log.lock));
+
+ head = ds_log.head;
+
+ /* empty list */
+ if (head == NULL)
+ return (0);
+
+ if (head->next == ds_log.head) {
+ /* one element list */
+ ds_log.head = NULL;
+ } else {
+ head->next->prev = head->prev;
+ head->prev->next = head->next;
+ ds_log.head = head->next;
+ }
+
+ DS_DBG_LOG("ds_log: removed %ld data bytes, %ld total bytes\n",
+ head->datasz, DS_LOG_ENTRY_SZ(head));
+
+ ds_log.size -= DS_LOG_ENTRY_SZ(head);
+ ds_log.nentry--;
+
+ ASSERT((ds_log.size >= 0) && (ds_log.nentry >= 0));
+
+ ds_log_entry_free(head);
+
+ return (0);
+}
+
+/*
+ * Replace the data in the entry at the front of the list with then
+ * new data. This has the effect of removing the oldest entry and
+ * adding the new entry.
+ */
+static int
+ds_log_replace(uint8_t *msg, size_t sz)
+{
+ ds_log_entry_t *head;
+
+ ASSERT(MUTEX_HELD(&ds_log.lock));
+
+ head = ds_log.head;
+
+ DS_DBG_LOG("ds_log: replaced %ld data bytes (%ld total) with %ld data "
+ "bytes (%ld total)\n", head->datasz, DS_LOG_ENTRY_SZ(head),
+ sz, sz + sizeof (ds_log_entry_t));
+
+ ds_log.size -= DS_LOG_ENTRY_SZ(head);
+
+ ASSERT((ds_log.size >= 0) && (ds_log.nentry >= 0));
+
+ kmem_free(head->data, head->datasz);
+ head->data = msg;
+ head->datasz = sz;
+
+ ds_log.size += DS_LOG_ENTRY_SZ(head);
+
+ ds_log.head = head->next;
+
+ return (0);
+}
+
+static void
+ds_log_purge(void *arg)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ mutex_enter(&ds_log.lock);
+
+ DS_DBG_LOG("ds_log: purging oldest log entries\n");
+
+ while ((ds_log.nentry) && (ds_log.size >= ds_log_sz)) {
+ (void) ds_log_remove();
+ }
+
+ mutex_exit(&ds_log.lock);
+}
+
+static int
+ds_log_add_msg(int32_t dest, uint8_t *msg, size_t sz)
+{
+ int rv = 0;
+
+ mutex_enter(&ds_log.lock);
+
+ /* check if the log is larger than the soft limit */
+ if ((ds_log.nentry) && ((ds_log.size + sz) >= ds_log_sz)) {
+ /*
+ * The log is larger than the soft limit.
+ * Swap the oldest entry for the newest.
+ */
+ DS_DBG_LOG("ds_log: replacing oldest entry with new entry\n");
+ (void) ds_log_replace(msg, sz);
+ } else {
+ /*
+ * Still have headroom under the soft limit.
+ * Add the new entry to the log.
+ */
+ ds_log_entry_t *new;
+
+ new = ds_log_entry_alloc();
+
+ /* fill in message data */
+ new->data = msg;
+ new->datasz = sz;
+ new->timestamp = ddi_get_time();
+ new->dest = dest;
+
+ rv = ds_log_add(new);
+ }
+
+ /* check if the log is larger than the hard limit */
+ if ((ds_log.nentry > 1) && (ds_log.size >= DS_LOG_LIMIT)) {
+ /*
+ * Wakeup the thread to remove entries
+ * from the log until it is smaller than
+ * the soft limit.
+ */
+ DS_DBG_LOG("ds_log: log exceeded %d bytes, scheduling a "
+ "purge...\n", DS_LOG_LIMIT);
+
+ if (DS_DISPATCH(ds_log_purge, (void *)msg) == NULL) {
+ cmn_err(CE_NOTE, "ds_log: purge thread failed to "
+ "start");
+ }
+ }
+
+ mutex_exit(&ds_log.lock);
+
+ return (rv);
+}
+
+/*
+ * Client Interface
+ */
+
+int
+ds_cap_init(ds_capability_t *cap, ds_clnt_ops_t *ops)
+{
+ int idx;
+ ds_vers_check_t status;
+ ds_svc_t *svc;
+
+ /* sanity check the args */
+ if ((cap == NULL) || (ops == NULL)) {
+ cmn_err(CE_NOTE, "ds_cap_init: invalid arguments");
+ return (EINVAL);
+ }
+
+ /* sanity check the capability specifier */
+ if ((cap->svc_id == NULL) || (cap->vers == NULL) || (cap->nvers == 0)) {
+ cmn_err(CE_NOTE, "ds_cap_init: invalid capability specifier");
+ return (EINVAL);
+ }
+
+ /* sanity check the version array */
+ if ((status = ds_vers_isvalid(cap->vers, cap->nvers)) != DS_VERS_OK) {
+ cmn_err(CE_NOTE, "ds_cap_init: invalid capability "
+ "version array for %s service: %s", cap->svc_id,
+ (status == DS_VERS_INCREASING_MAJOR_ERR) ?
+ "increasing major versions" :
+ "increasing minor versions");
+ return (EINVAL);
+ }
+
+ /* data and register callbacks are required */
+ if ((ops->ds_data_cb == NULL) || (ops->ds_reg_cb == NULL)) {
+ cmn_err(CE_NOTE, "ds_cap_init: invalid ops specifier for "
+ "%s service", cap->svc_id);
+ return (EINVAL);
+ }
+
+ DS_DBG("ds_cap_init: svc_id='%s', data_cb=0x%lx, cb_arg=0x%lx\n",
+ cap->svc_id, (uint64_t)ops->ds_data_cb, (uint64_t)ops->cb_arg);
+
+ rw_enter(&ds_svcs.rwlock, RW_WRITER);
+
+ /* check if the service is already registered */
+ idx = ds_walk_svcs(ds_svc_ismatch, cap->svc_id);
+ if (idx != ds_svcs.maxsvcs) {
+ /* already registered */
+ cmn_err(CE_NOTE, "service '%s' already registered",
+ cap->svc_id);
+ rw_exit(&ds_svcs.rwlock);
+ return (EALREADY);
+ }
+
+ svc = ds_alloc_svc();
+
+ /* copy over all the client information */
+ bcopy(cap, &svc->cap, sizeof (ds_capability_t));
+
+ /* make a copy of the service name */
+ svc->cap.svc_id = kmem_zalloc(strlen(cap->svc_id) + 1, KM_SLEEP);
+ (void) strncpy(svc->cap.svc_id, cap->svc_id, strlen(cap->svc_id));
+
+ /* make a copy of the version array */
+ svc->cap.vers = kmem_zalloc(cap->nvers * sizeof (ds_ver_t), KM_SLEEP);
+ bcopy(cap->vers, svc->cap.vers, cap->nvers * sizeof (ds_ver_t));
+
+ /* copy the client ops vector */
+ bcopy(ops, &svc->ops, sizeof (ds_clnt_ops_t));
+
+ svc->state = DS_SVC_INACTIVE;
+ svc->ver_idx = 0;
+ DS_PORTSET_DUP(svc->avail, ds_allports);
+
+ ds_svcs.nsvcs++;
+
+ rw_exit(&ds_svcs.rwlock);
+
+ /* attempt to register the service */
+ (void) ds_svc_register(svc, NULL);
+
+ DS_DBG("ds_cap_init: service '%s' assigned handle 0x%lx\n",
+ svc->cap.svc_id, svc->hdl);
+
+ return (0);
+}
+
+int
+ds_cap_fini(ds_capability_t *cap)
+{
+ int idx;
+ ds_svc_t *svc;
+ ds_svc_hdl_t tmp_hdl;
+
+ rw_enter(&ds_svcs.rwlock, RW_WRITER);
+
+ /* make sure the service is registered */
+ idx = ds_walk_svcs(ds_svc_ismatch, cap->svc_id);
+ if (idx == ds_svcs.maxsvcs) {
+ /* service is not registered */
+ cmn_err(CE_NOTE, "ds_cap_fini: unknown service '%s'",
+ cap->svc_id);
+ rw_exit(&ds_svcs.rwlock);
+ return (EINVAL);
+ }
+
+ svc = ds_svcs.tbl[idx];
+
+ DS_DBG("ds_cap_fini: svcid='%s', hdl=0x%lx\n", svc->cap.svc_id,
+ svc->hdl);
+
+ /*
+ * Attempt to send an unregister notification. Even
+ * if sending the message fails, the local unregister
+ * request must be honored, since this indicates that
+ * the client will no longer handle incoming requests.
+ */
+ (void) ds_send_unreg_req(svc);
+
+ /*
+ * Clear out the structure, but do not deallocate the
+ * memory. It can be reused for the next registration.
+ */
+ kmem_free(svc->cap.svc_id, strlen(svc->cap.svc_id) + 1);
+ kmem_free(svc->cap.vers, svc->cap.nvers * sizeof (ds_ver_t));
+
+ /* save the handle to prevent reuse */
+ tmp_hdl = svc->hdl;
+ bzero(svc, sizeof (ds_svc_t));
+
+ /* initialize for next use */
+ svc->hdl = tmp_hdl;
+ svc->state = DS_SVC_FREE;
+
+ ds_svcs.nsvcs--;
+
+ rw_exit(&ds_svcs.rwlock);
+
+ return (0);
+}
+
+int
+ds_cap_send(ds_svc_hdl_t hdl, void *buf, size_t len)
+{
+ int rv;
+ ds_hdr_t *hdr;
+ caddr_t msg;
+ size_t msglen;
+ size_t hdrlen;
+ caddr_t payload;
+ ds_svc_t *svc;
+ ds_port_t *port;
+ ds_data_handle_t *data;
+
+ rw_enter(&ds_svcs.rwlock, RW_READER);
+
+ if ((hdl == DS_INVALID_HDL) || (svc = ds_get_svc(hdl)) == NULL) {
+ cmn_err(CE_NOTE, "ds_cap_send: invalid handle 0x%lx", hdl);
+ rw_exit(&ds_svcs.rwlock);
+ return (EINVAL);
+ }
+
+ if ((port = svc->port) == NULL) {
+ cmn_err(CE_NOTE, "ds_cap_send: service '%s' not associated "
+ "with a port", svc->cap.svc_id);
+ rw_exit(&ds_svcs.rwlock);
+ return (ECONNRESET);
+ }
+
+ mutex_enter(&port->lock);
+
+ /* check that the LDC channel is ready */
+ if (port->ldc.state != LDC_UP) {
+ cmn_err(CE_NOTE, "ds_cap_send: LDC channel is not up");
+ mutex_exit(&port->lock);
+ rw_exit(&ds_svcs.rwlock);
+ return (ECONNRESET);
+ }
+
+
+ if (svc->state != DS_SVC_ACTIVE) {
+ /* channel is up, but svc is not registered */
+ cmn_err(CE_NOTE, "ds_cap_send: invalid service state 0x%x",
+ svc->state);
+ mutex_exit(&port->lock);
+ rw_exit(&ds_svcs.rwlock);
+ return (EINVAL);
+ }
+
+ hdrlen = DS_HDR_SZ + sizeof (ds_data_handle_t);
+
+ msg = kmem_zalloc(len + hdrlen, KM_SLEEP);
+ hdr = (ds_hdr_t *)msg;
+ payload = msg + hdrlen;
+ msglen = len + hdrlen;
+
+ hdr->payload_len = len + sizeof (ds_data_handle_t);
+ hdr->msg_type = DS_DATA;
+
+ data = (ds_data_handle_t *)(msg + DS_HDR_SZ);
+ data->svc_handle = hdl;
+
+ if ((buf != NULL) && (len != 0)) {
+ bcopy(buf, payload, len);
+ }
+
+ DS_DBG("ds@%lx: data>: hdl=0x%lx, len=%ld, payload_len=%d\n",
+ port->id, svc->hdl, msglen, hdr->payload_len);
+
+ if ((rv = ds_send_msg(port, msg, msglen)) != 0) {
+ rv = (rv == EIO) ? ECONNRESET : rv;
+ }
+
+ mutex_exit(&port->lock);
+ rw_exit(&ds_svcs.rwlock);
+
+ return (rv);
+}
diff --git a/usr/src/uts/sun4v/io/fault_iso.c b/usr/src/uts/sun4v/io/fault_iso.c
new file mode 100644
index 0000000000..0123c19291
--- /dev/null
+++ b/usr/src/uts/sun4v/io/fault_iso.c
@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v Fault Isolation Services Module
+ */
+
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/machsystm.h>
+#include <sys/processor.h>
+#include <sys/mem.h>
+#include <vm/page.h>
+#include <sys/note.h>
+#include <sys/ds.h>
+#include <sys/fault_iso.h>
+
+/*
+ * Debugging routines
+ */
+#ifdef DEBUG
+uint_t fi_debug = 0x0;
+#define FI_DBG if (fi_debug) cmn_err
+#else /* DEBUG */
+#define FI_DBG _NOTE(CONSTCOND) if (0) cmn_err
+#endif /* DEBUG */
+
+/*
+ * Domains Services interaction
+ */
+static ds_svc_hdl_t cpu_handle;
+static ds_svc_hdl_t mem_handle;
+
+static ds_ver_t fi_vers[] = { { 1, 0 } };
+#define FI_NVERS (sizeof (fi_vers) / sizeof (fi_vers[0]))
+
+static ds_capability_t cpu_cap = {
+ "fma-cpu-service", /* svc_id */
+ fi_vers, /* vers */
+ FI_NVERS /* nvers */
+};
+
+static ds_capability_t mem_cap = {
+ "fma-mem-service", /* svc_id */
+ fi_vers, /* vers */
+ FI_NVERS /* nvers */
+};
+
+static void fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl);
+static void fi_unreg_handler(ds_cb_arg_t arg);
+
+static void cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
+static void mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
+
+static ds_clnt_ops_t cpu_ops = {
+ fi_reg_handler, /* ds_reg_cb */
+ fi_unreg_handler, /* ds_unreg_cb */
+ cpu_data_handler, /* ds_data_cb */
+ &cpu_handle /* cb_arg */
+};
+
+static ds_clnt_ops_t mem_ops = {
+ fi_reg_handler, /* ds_reg_cb */
+ fi_unreg_handler, /* ds_unreg_cb */
+ mem_data_handler, /* ds_data_cb */
+ &mem_handle /* cb_arg */
+};
+
+static int fi_init(void);
+static void fi_fini(void);
+
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "sun4v Fault Isolation Services %I%"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int rv;
+
+ if ((rv = fi_init()) != 0)
+ return (rv);
+
+ if ((rv = mod_install(&modlinkage)) != 0)
+ fi_fini();
+
+ return (rv);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int fi_allow_unload;
+
+int
+_fini(void)
+{
+ int status;
+
+ if (fi_allow_unload == 0)
+ return (EBUSY);
+
+ if ((status = mod_remove(&modlinkage)) == 0)
+ fi_fini();
+
+ return (status);
+}
+
+static int
+fi_init(void)
+{
+ int rv;
+
+ /* register CPU service with domain services framework */
+ rv = ds_cap_init(&cpu_cap, &cpu_ops);
+ if (rv != 0) {
+ FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv);
+ return (rv);
+ }
+
+ /* register MEM servicewith domain services framework */
+ rv = ds_cap_init(&mem_cap, &mem_ops);
+ if (rv != 0) {
+ FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv);
+ (void) ds_cap_fini(&cpu_cap);
+ return (rv);
+ }
+
+ return (rv);
+}
+
+static void
+fi_fini(void)
+{
+ /*
+ * Stop incoming requests from Zeus
+ */
+ (void) ds_cap_fini(&cpu_cap);
+ (void) ds_cap_fini(&mem_cap);
+}
+
+static void
+cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ fma_cpu_service_req_t *msg = buf;
+ fma_cpu_resp_t resp_msg;
+ int rv = 0;
+ int cpu_status;
+ int resp_back = 0;
+
+ /*
+ * If the buffer is the wrong size for CPU calls or is NULL then
+ * do not return any message. The call from the ldom mgr. will time out
+ * and the response will be NULL.
+ */
+ if (msg == NULL || buflen != sizeof (fma_cpu_service_req_t)) {
+ return;
+ }
+
+ FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, cpu_id = %d\n",
+ msg->req_num, msg->msg_type, msg->cpu_id);
+
+ resp_msg.req_num = msg->req_num;
+
+ switch (msg->msg_type) {
+ case FMA_CPU_REQ_STATUS:
+ rv = p_online_internal(msg->cpu_id, P_STATUS,
+ &cpu_status);
+ if (rv == EINVAL) {
+ FI_DBG(CE_CONT, "Failed p_online call failed."
+ "Invalid CPU\n");
+ resp_msg.result = FMA_CPU_RESP_FAILURE;
+ resp_msg.status = FMA_CPU_STAT_ILLEGAL;
+ resp_back = 1;
+ }
+ break;
+ case FMA_CPU_REQ_OFFLINE:
+ rv = p_online_internal(msg->cpu_id, P_OFFLINE,
+ &cpu_status);
+ if (rv == EINVAL) {
+ FI_DBG(CE_CONT, "Failed p_online call failed."
+ "Invalid CPU\n");
+ resp_msg.result = FMA_CPU_RESP_FAILURE;
+ resp_msg.status = FMA_CPU_STAT_ILLEGAL;
+ resp_back = 1;
+ } else if (rv == EBUSY) {
+ FI_DBG(CE_CONT, "Failed p_online call failed."
+ "Tried to offline while busy\n");
+ resp_msg.result = FMA_CPU_RESP_FAILURE;
+ resp_msg.status = FMA_CPU_STAT_ONLINE;
+ resp_back = 1;
+ }
+ break;
+ case FMA_CPU_REQ_ONLINE:
+ rv = p_online_internal(msg->cpu_id, P_ONLINE,
+ &cpu_status);
+ if (rv == EINVAL) {
+ FI_DBG(CE_CONT, "Failed p_online call failed."
+ "Invalid CPU\n");
+ resp_msg.result = FMA_CPU_RESP_FAILURE;
+ resp_msg.status = FMA_CPU_STAT_ILLEGAL;
+ resp_back = 1;
+ } else if (rv == ENOTSUP) {
+ FI_DBG(CE_CONT, "Failed p_online call failed."
+ "Online not supported for single CPU\n");
+ resp_msg.result = FMA_CPU_RESP_FAILURE;
+ resp_msg.status = FMA_CPU_STAT_OFFLINE;
+ resp_back = 1;
+ }
+ break;
+ default:
+ /*
+ * If the msg_type was of unknown type simply return and
+ * have the ldom mgr. time out with a NULL response.
+ */
+ return;
+ }
+
+ if (rv != 0) {
+ if (resp_back) {
+ if ((rv = ds_cap_send(cpu_handle, &resp_msg,
+ sizeof (resp_msg))) != 0) {
+ FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n",
+ rv);
+ }
+ return;
+ }
+ ASSERT((rv == EINVAL) || ((rv == EBUSY) &&
+ (msg->msg_type == FMA_CPU_REQ_OFFLINE)) ||
+ ((rv == ENOTSUP) &&
+ (msg->msg_type == FMA_CPU_REQ_ONLINE)));
+
+ cmn_err(CE_WARN, "p_online_internal error not handled "
+ "rv = %d\n", rv);
+ }
+
+ resp_msg.req_num = msg->req_num;
+ resp_msg.result = FMA_CPU_RESP_OK;
+
+ switch (cpu_status) {
+ case P_OFFLINE:
+ case P_FAULTED:
+ case P_POWEROFF:
+ case P_SPARE:
+ resp_msg.status = FMA_CPU_STAT_OFFLINE;
+ break;
+ case P_ONLINE:
+ case P_NOINTR:
+ resp_msg.status = FMA_CPU_STAT_ONLINE;
+ break;
+ default:
+ resp_msg.status = FMA_CPU_STAT_ILLEGAL;
+ }
+
+ if ((rv = ds_cap_send(cpu_handle, &resp_msg,
+ sizeof (resp_msg))) != 0) {
+ FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv);
+ }
+}
+
+static void
+mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ fma_mem_service_req_t *msg = buf;
+ fma_mem_resp_t resp_msg;
+ int rv = 0;
+
+ /*
+ * If the buffer is the wrong size for Mem calls or is NULL then
+ * do not return any message. The call from the ldom mgr. will time out
+ * and the response will be NULL.
+ */
+ if (msg == NULL || buflen != sizeof (fma_mem_service_req_t)) {
+ return;
+ }
+
+ FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, memory addr = 0x%lx"
+ "memory length = 0x%lx\n", msg->req_num, msg->msg_type,
+ msg->real_addr, msg->length);
+
+ resp_msg.req_num = msg->req_num;
+ resp_msg.res_addr = msg->real_addr;
+ resp_msg.res_length = msg->length;
+
+ /*
+ * Information about return values for page calls can be referenced
+ * in usr/src/uts/common/vm/page_retire.c
+ */
+ switch (msg->msg_type) {
+ case FMA_MEM_REQ_STATUS:
+ rv = page_retire_check(msg->real_addr, NULL);
+ switch (rv) {
+ /* Page is retired */
+ case 0:
+ resp_msg.result = FMA_MEM_RESP_OK;
+ resp_msg.status = FMA_MEM_STAT_RETIRED;
+ break;
+ /* Page is pending. Send back failure and not retired */
+ case EAGAIN:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
+ break;
+ /* Page is not retired. */
+ case EIO:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
+ break;
+ /* PA is not valid */
+ case EINVAL:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_ILLEGAL;
+ break;
+ default:
+ ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
+ (rv == EINVAL));
+ cmn_err(CE_WARN, "fault_iso: return value from "
+ "page_retire_check invalid: %d\n", rv);
+ }
+ break;
+ case FMA_MEM_REQ_RETIRE:
+ rv = page_retire(msg->real_addr, PR_FMA);
+ switch (rv) {
+ /* Page retired successfully */
+ case 0:
+ resp_msg.result = FMA_MEM_RESP_OK;
+ resp_msg.status = FMA_MEM_STAT_RETIRED;
+ break;
+ /* Tried to retire and now Pending retirement */
+ case EAGAIN:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
+ break;
+ /* Did not try to retire. Page already retired */
+ case EIO:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_RETIRED;
+ break;
+ /* PA is not valid */
+ case EINVAL:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_ILLEGAL;
+ break;
+ default:
+ ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
+ (rv == EINVAL));
+ cmn_err(CE_WARN, "fault_iso: return value from "
+ "page_retire invalid: %d\n", rv);
+ }
+ break;
+ case FMA_MEM_REQ_RESURRECT:
+ rv = page_unretire(msg->real_addr);
+ switch (rv) {
+ /* Page succesfullly unretired */
+ case 0:
+ resp_msg.result = FMA_MEM_RESP_OK;
+ resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
+ break;
+ /* Page could not be locked. Still retired */
+ case EAGAIN:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_RETIRED;
+ break;
+ /* Page was not retired already */
+ case EIO:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
+ break;
+ /* PA is not valid */
+ case EINVAL:
+ resp_msg.result = FMA_MEM_RESP_FAILURE;
+ resp_msg.status = FMA_MEM_STAT_ILLEGAL;
+ break;
+ default:
+ ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
+ (rv == EINVAL));
+ cmn_err(CE_WARN, "fault_iso: return value from "
+ "page_unretire invalid: %d\n", rv);
+ }
+ break;
+ default:
+ /*
+ * If the msg_type was of unknown type simply return and
+ * have the ldom mgr. time out with a NULL response.
+ */
+ return;
+ }
+
+ if ((rv = ds_cap_send(mem_handle, &resp_msg, sizeof (resp_msg))) != 0) {
+ FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv);
+ }
+}
+
+static void
+fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
+{
+ FI_DBG(CE_CONT, "fi_reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n",
+ arg, ver->major, ver->minor, hdl);
+
+ if ((ds_svc_hdl_t *)arg == &cpu_handle)
+ cpu_handle = hdl;
+ if ((ds_svc_hdl_t *)arg == &mem_handle)
+ mem_handle = hdl;
+}
+
+static void
+fi_unreg_handler(ds_cb_arg_t arg)
+{
+ FI_DBG(CE_CONT, "fi_unreg_handler: arg=0x%p\n", arg);
+
+ if ((ds_svc_hdl_t *)arg == &cpu_handle)
+ cpu_handle = DS_INVALID_HDL;
+ if ((ds_svc_hdl_t *)arg == &mem_handle)
+ mem_handle = DS_INVALID_HDL;
+}
diff --git a/usr/src/uts/sun4v/io/ldc.c b/usr/src/uts/sun4v/io/ldc.c
new file mode 100644
index 0000000000..87b02588ca
--- /dev/null
+++ b/usr/src/uts/sun4v/io/ldc.c
@@ -0,0 +1,5609 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v LDC Transport Layer
+ */
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/open.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/ksynch.h>
+#include <sys/modctl.h>
+#include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/cred.h>
+#include <sys/promif.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/cyclic.h>
+#include <sys/machsystm.h>
+#include <sys/vm.h>
+#include <sys/cpu.h>
+#include <sys/intreg.h>
+#include <sys/machcpuvar.h>
+#include <sys/note.h>
+#include <sys/ivintr.h>
+#include <sys/hypervisor_api.h>
+#include <sys/ldc.h>
+#include <sys/ldc_impl.h>
+#include <sys/cnex.h>
+#include <sys/hsvc.h>
+
+/* Core internal functions */
+static int i_ldc_h2v_error(int h_error);
+static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
+static int i_ldc_rxq_reconf(ldc_chan_t *ldcp);
+static void i_ldc_reset_state(ldc_chan_t *ldcp);
+static void i_ldc_reset(ldc_chan_t *ldcp);
+
+static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
+static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
+static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
+static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
+ uint8_t ctrlmsg);
+
+/* Interrupt handling functions */
+static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
+static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
+static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
+
+/* Read method functions */
+static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
+static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
+ size_t *sizep);
+static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
+ size_t *sizep);
+
+/* Write method functions */
+static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
+ size_t *sizep);
+static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
+ size_t *sizep);
+static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
+ size_t *sizep);
+
+/* Pkt processing internal functions */
+static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
+static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
+static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
+static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
+static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
+static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
+static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
+
+/* Memory synchronization internal functions */
+static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
+ uint8_t direction, uint64_t offset, size_t size);
+static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
+ uint8_t direction, uint64_t start, uint64_t end);
+
+/* LDC Version */
+static ldc_ver_t ldc_versions[] = { {1, 0} };
+
+/* number of supported versions */
+#define LDC_NUM_VERS (sizeof (ldc_versions) / sizeof (ldc_versions[0]))
+
+/* Module State Pointer */
+static ldc_soft_state_t *ldcssp;
+
+static struct modldrv md = {
+ &mod_miscops, /* This is a misc module */
+ "sun4v LDC module v%I%", /* Name of the module */
+};
+
+static struct modlinkage ml = {
+ MODREV_1,
+ &md,
+ NULL
+};
+
+static uint64_t ldc_sup_minor; /* Supported minor number */
+static hsvc_info_t ldc_hsvc = {
+ HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
+};
+
+static uint64_t intr_sup_minor; /* Supported minor number */
+static hsvc_info_t intr_hsvc = {
+ HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
+};
+
+#ifdef DEBUG
+
+/*
+ * Print debug messages
+ *
+ * set ldcdbg to 0x7 for enabling all msgs
+ * 0x4 - Warnings
+ * 0x2 - All debug messages
+ * 0x1 - Minimal debug messages
+ *
+ * set ldcdbgchan to the channel number you want to debug
+ * setting it to -1 prints debug messages for all channels
+ * NOTE: ldcdbgchan has no effect on error messages
+ */
+
+#define DBG_ALL_LDCS -1
+
+int ldcdbg = 0x0;
+int64_t ldcdbgchan = DBG_ALL_LDCS;
+
+static void
+ldcdebug(int64_t id, const char *fmt, ...)
+{
+ char buf[512];
+ va_list ap;
+
+ /*
+ * Do not return if,
+ * caller wants to print it anyway - (id == DBG_ALL_LDCS)
+ * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
+ * debug channel = caller specified channel
+ */
+ if ((id != DBG_ALL_LDCS) &&
+ (ldcdbgchan != DBG_ALL_LDCS) &&
+ (ldcdbgchan != id)) {
+ return;
+ }
+
+ va_start(ap, fmt);
+ (void) vsprintf(buf, fmt, ap);
+ va_end(ap);
+
+ cmn_err(CE_CONT, "?%s\n", buf);
+}
+
+#define D1 \
+if (ldcdbg & 0x01) \
+ ldcdebug
+
+#define D2 \
+if (ldcdbg & 0x02) \
+ ldcdebug
+
+#define DWARN \
+if (ldcdbg & 0x04) \
+ ldcdebug
+
+#define DUMP_PAYLOAD(id, addr) \
+{ \
+ char buf[65*3]; \
+ int i; \
+ uint8_t *src = (uint8_t *)addr; \
+ for (i = 0; i < 64; i++, src++) \
+ (void) sprintf(&buf[i * 3], "|%02x", *src); \
+ (void) sprintf(&buf[i * 3], "|\n"); \
+ D2((id), "payload: %s", buf); \
+}
+
+#define DUMP_LDC_PKT(c, s, addr) \
+{ \
+ ldc_msg_t *msg = (ldc_msg_t *)(addr); \
+ uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0; \
+ if (msg->type == LDC_DATA) { \
+ D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])", \
+ (s), mid, msg->type, msg->stype, msg->ctrl, \
+ (msg->env & LDC_FRAG_START) ? 'B' : ' ', \
+ (msg->env & LDC_FRAG_STOP) ? 'E' : ' ', \
+ (msg->env & LDC_LEN_MASK)); \
+ } else { \
+ D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s), \
+ mid, msg->type, msg->stype, msg->ctrl, msg->env); \
+ } \
+}
+
+#else
+
+#define DBG_ALL_LDCS -1
+
+#define D1
+#define D2
+#define DWARN
+
+#define DUMP_PAYLOAD(id, addr)
+#define DUMP_LDC_PKT(c, s, addr)
+
+#endif
+
+#define ZERO_PKT(p) \
+ bzero((p), sizeof (ldc_msg_t));
+
+#define IDX2COOKIE(idx, pg_szc, pg_shift) \
+ (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
+
+
+int
+_init(void)
+{
+ int status;
+
+ status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
+ if (status != 0) {
+ cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
+ " group: 0x%lx major: %ld minor: %ld errno: %d",
+ ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
+ ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
+ return (-1);
+ }
+
+ status = hsvc_register(&intr_hsvc, &intr_sup_minor);
+ if (status != 0) {
+ cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
+ "services group: 0x%lx major: %ld minor: %ld errno: %d",
+ intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
+ intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
+ (void) hsvc_unregister(&ldc_hsvc);
+ return (-1);
+ }
+
+ /* allocate soft state structure */
+ ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
+
+ /* Link the module into the system */
+ status = mod_install(&ml);
+ if (status != 0) {
+ kmem_free(ldcssp, sizeof (ldc_soft_state_t));
+ return (status);
+ }
+
+ /* Initialize the LDC state structure */
+ mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ mutex_enter(&ldcssp->lock);
+
+ ldcssp->channel_count = 0;
+ ldcssp->channels_open = 0;
+ ldcssp->chan_list = NULL;
+ ldcssp->dring_list = NULL;
+
+ mutex_exit(&ldcssp->lock);
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ /* Report status of the dynamically loadable driver module */
+ return (mod_info(&ml, modinfop));
+}
+
+int
+_fini(void)
+{
+ int rv, status;
+ ldc_chan_t *ldcp;
+ ldc_dring_t *dringp;
+ ldc_mem_info_t minfo;
+
+ /* Unlink the driver module from the system */
+ status = mod_remove(&ml);
+ if (status) {
+ DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
+ return (EIO);
+ }
+
+ /* close and finalize channels */
+ ldcp = ldcssp->chan_list;
+ while (ldcp != NULL) {
+ (void) ldc_close((ldc_handle_t)ldcp);
+ (void) ldc_fini((ldc_handle_t)ldcp);
+
+ ldcp = ldcp->next;
+ }
+
+ /* Free descriptor rings */
+ dringp = ldcssp->dring_list;
+ while (dringp != NULL) {
+ dringp = dringp->next;
+
+ rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
+ if (rv == 0 && minfo.status != LDC_UNBOUND) {
+ if (minfo.status == LDC_BOUND) {
+ (void) ldc_mem_dring_unbind(
+ (ldc_dring_handle_t)dringp);
+ }
+ if (minfo.status == LDC_MAPPED) {
+ (void) ldc_mem_dring_unmap(
+ (ldc_dring_handle_t)dringp);
+ }
+ }
+
+ (void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
+ }
+ ldcssp->dring_list = NULL;
+
+ /*
+ * We have successfully "removed" the driver.
+ * Destroying soft states
+ */
+ mutex_destroy(&ldcssp->lock);
+ kmem_free(ldcssp, sizeof (ldc_soft_state_t));
+
+ (void) hsvc_unregister(&ldc_hsvc);
+ (void) hsvc_unregister(&intr_hsvc);
+
+ return (status);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * LDC Transport Internal Functions
+ */
+
+/*
+ * Translate HV Errors to sun4v error codes
+ */
+static int
+i_ldc_h2v_error(int h_error)
+{
+ switch (h_error) {
+
+ case H_EOK:
+ return (0);
+
+ case H_ENORADDR:
+ return (EFAULT);
+
+ case H_EBADPGSZ:
+ case H_EINVAL:
+ return (EINVAL);
+
+ case H_EWOULDBLOCK:
+ return (EWOULDBLOCK);
+
+ case H_ENOACCESS:
+ case H_ENOMAP:
+ return (EACCES);
+
+ case H_EIO:
+ case H_ECPUERROR:
+ return (EIO);
+
+ case H_ENOTSUPPORTED:
+ return (ENOTSUP);
+
+ case H_ETOOMANY:
+ return (ENOSPC);
+
+ case H_ECHANNEL:
+ return (ECHRNG);
+ default:
+ break;
+ }
+
+ return (EIO);
+}
+
+/*
+ * Reconfigure the transmit queue
+ */
+static int
+i_ldc_txq_reconf(ldc_chan_t *ldcp)
+{
+ int rv;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_tx_qconf: (0x%lx) cannot set qconf", ldcp->id);
+ return (EIO);
+ }
+ rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
+ &(ldcp->tx_tail), &(ldcp->link_state));
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_tx_get_state: (0x%lx) cannot get qptrs", ldcp->id);
+ return (EIO);
+ }
+ D1(ldcp->id, "ldc_tx_get_state: (0x%llx) h=0x%llx,t=0x%llx,"
+ "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
+ ldcp->link_state);
+
+ return (0);
+}
+
+/*
+ * Reconfigure the receive queue
+ */
+static int
+i_ldc_rxq_reconf(ldc_chan_t *ldcp)
+{
+ int rv;
+ uint64_t rx_head, rx_tail;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
+ &(ldcp->link_state));
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_rx_getstate: (0x%lx) cannot get state",
+ ldcp->id);
+ return (EIO);
+ }
+
+ if (rx_head != rx_tail || ldcp->tstate > TS_READY) {
+ rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
+ ldcp->rx_q_entries);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_rx_qconf: (0x%lx) cannot set qconf",
+ ldcp->id);
+ return (EIO);
+ }
+ D1(ldcp->id, "ldc_rx_qconf: (0x%llx) completed qconf",
+ ldcp->id);
+ }
+
+ return (0);
+}
+
+/*
+ * Reset LDC state structure and its contents
+ */
+static void
+i_ldc_reset_state(ldc_chan_t *ldcp)
+{
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ ldcp->last_msg_snt = LDC_INIT_SEQID;
+ ldcp->last_ack_rcd = 0;
+ ldcp->last_msg_rcd = 0;
+ ldcp->tx_ackd_head = ldcp->tx_head;
+ ldcp->next_vidx = 0;
+ ldcp->hstate = 0;
+ ldcp->tstate = TS_OPEN;
+ ldcp->status = LDC_OPEN;
+
+ if (ldcp->link_state == LDC_CHANNEL_UP ||
+ ldcp->link_state == LDC_CHANNEL_RESET) {
+
+ if (ldcp->mode == LDC_MODE_RAW) {
+ ldcp->status = LDC_UP;
+ ldcp->tstate = TS_UP;
+ } else {
+ ldcp->status = LDC_READY;
+ ldcp->tstate |= TS_LINK_READY;
+ }
+ }
+}
+
+/*
+ * Reset a LDC channel
+ */
+static void
+i_ldc_reset(ldc_chan_t *ldcp)
+{
+ D2(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
+
+ (void) i_ldc_txq_reconf(ldcp);
+ (void) i_ldc_rxq_reconf(ldcp);
+ i_ldc_reset_state(ldcp);
+}
+
+/*
+ * Clear pending interrupts
+ */
+static void
+i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
+{
+ ldc_cnex_t *cinfo = &ldcssp->cinfo;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ if (cinfo->dip && ldcp->intr_pending) {
+ ldcp->intr_pending = B_FALSE;
+ (void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
+ }
+}
+
+/*
+ * Set the receive queue head
+ * Returns an error if it fails
+ */
+static int
+i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
+{
+ int rv;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ rv = hv_ldc_rx_set_qhead(ldcp->id, head);
+ if (rv && rv != H_EWOULDBLOCK) {
+ cmn_err(CE_WARN,
+ "ldc_rx_set_qhead: (0x%lx) cannot set qhead", ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Returns the tx_tail to be used for transfer
+ * Re-reads the TX queue ptrs if and only if the
+ * the cached head and tail are equal (queue is full)
+ */
+static int
+i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
+{
+ int rv;
+ uint64_t current_head, new_tail;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ /* Read the head and tail ptrs from HV */
+ rv = hv_ldc_tx_get_state(ldcp->id,
+ &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
+ ldcp->id);
+ return (EIO);
+ }
+ if (ldcp->link_state == LDC_CHANNEL_DOWN) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
+ ldcp->id);
+ return (ECONNRESET);
+ }
+
+ /* In reliable mode, check against last ACKd msg */
+ current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
+ ldcp->mode == LDC_MODE_STREAM)
+ ? ldcp->tx_ackd_head : ldcp->tx_head;
+
+ /* increment the tail */
+ new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ if (new_tail == current_head) {
+ DWARN(ldcp->id,
+ "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
+ ldcp->id);
+ return (EWOULDBLOCK);
+ }
+
+ D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
+ ldcp->id, ldcp->tx_head, ldcp->tx_tail);
+
+ *tail = ldcp->tx_tail;
+ return (0);
+}
+
+/*
+ * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
+ * and retry LDC_CHK_CNT times before returning an error.
+ * Returns 0, EWOULDBLOCK or EIO
+ */
+static int
+i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
+{
+ int rv, retval = EWOULDBLOCK;
+ int loop_cnt, chk_cnt;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ for (chk_cnt = 0; chk_cnt < LDC_CHK_CNT; chk_cnt++) {
+
+ if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
+ retval = 0;
+ break;
+ }
+ if (rv != H_EWOULDBLOCK) {
+ DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
+ "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
+ retval = EIO;
+ break;
+ }
+
+ /* spin LDC_LOOP_CNT and then try again */
+ for (loop_cnt = 0; loop_cnt < LDC_LOOP_CNT; loop_cnt++);
+ }
+ return (retval);
+}
+
+/*
+ * Send a LDC message
+ */
+static int
+i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
+ uint8_t ctrlmsg)
+{
+ int rv;
+ ldc_msg_t *pkt;
+ uint64_t tx_tail;
+ uint32_t curr_seqid = ldcp->last_msg_snt;
+
+ ASSERT(MUTEX_HELD(&ldcp->lock));
+ /* get the current tail for the message */
+ rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_send_pkt: (0x%llx) error sending pkt, "
+ "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
+ ldcp->id, pkttype, subtype, ctrlmsg);
+ return (rv);
+ }
+
+ pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+ ZERO_PKT(pkt);
+
+ /* Initialize the packet */
+ pkt->type = pkttype;
+ pkt->stype = subtype;
+ pkt->ctrl = ctrlmsg;
+
+ /* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
+ if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
+ ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
+ curr_seqid++;
+ if (ldcp->mode != LDC_MODE_RAW) {
+ pkt->seqid = curr_seqid;
+ pkt->ackid = ldcp->last_msg_rcd;
+ }
+ }
+ DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
+
+ /* initiate the send by calling into HV and set the new tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_send_pkt:(0x%llx) error sending pkt, "
+ "type=0x%x,stype=0x%x,ctrl=0x%x\n",
+ ldcp->id, pkttype, subtype, ctrlmsg);
+ return (EIO);
+ }
+
+ ldcp->last_msg_snt = curr_seqid;
+ ldcp->tx_tail = tx_tail;
+
+ return (0);
+}
+
+/*
+ * Checks if packet was received in right order
+ * in the case of a reliable transport.
+ * Returns 0 if in order, else EIO
+ */
+static int
+i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ /* No seqid checking for RAW mode */
+ if (ldcp->mode == LDC_MODE_RAW)
+ return (0);
+
+ /* No seqid checking for version, RTS, RTR message */
+ if (msg->ctrl == LDC_VER ||
+ msg->ctrl == LDC_RTS ||
+ msg->ctrl == LDC_RTR)
+ return (0);
+
+ /* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
+ if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
+ DWARN(ldcp->id,
+ "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
+ "expecting 0x%x\n", ldcp->id, msg->seqid,
+ (ldcp->last_msg_rcd + 1));
+ return (EIO);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Process an incoming version ctrl message
+ */
+static int
+i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ int rv = 0, idx = ldcp->next_vidx;
+ ldc_msg_t *pkt;
+ uint64_t tx_tail;
+ ldc_ver_t *rcvd_ver;
+
+ /* get the received version */
+ rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
+
+ D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
+ ldcp->id, rcvd_ver->major, rcvd_ver->minor);
+
+ switch (msg->stype) {
+ case LDC_INFO:
+
+ /* get the current tail and pkt for the response */
+ rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
+ if (rv != 0) {
+ DWARN(ldcp->id,
+ "i_ldc_process_VER: (0x%llx) err sending "
+ "version ACK/NACK\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+ ZERO_PKT(pkt);
+
+ /* initialize the packet */
+ pkt->type = LDC_CTRL;
+ pkt->ctrl = LDC_VER;
+
+ for (;;) {
+
+ D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
+ rcvd_ver->major, rcvd_ver->minor,
+ ldc_versions[idx].major, ldc_versions[idx].minor);
+
+ if (rcvd_ver->major == ldc_versions[idx].major) {
+ /* major version match - ACK version */
+ pkt->stype = LDC_ACK;
+
+ /*
+ * lower minor version to the one this endpt
+ * supports, if necessary
+ */
+ if (rcvd_ver->minor > ldc_versions[idx].minor)
+ rcvd_ver->minor =
+ ldc_versions[idx].minor;
+ bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
+
+ break;
+ }
+
+ if (rcvd_ver->major > ldc_versions[idx].major) {
+
+ D1(ldcp->id, "i_ldc_process_VER: using next"
+ " lower idx=%d, v%u.%u\n", idx,
+ ldc_versions[idx].major,
+ ldc_versions[idx].minor);
+
+ /* nack with next lower version */
+ pkt->stype = LDC_NACK;
+ bcopy(&ldc_versions[idx], pkt->udata,
+ sizeof (ldc_versions[idx]));
+ ldcp->next_vidx = idx;
+ break;
+ }
+
+ /* next major version */
+ idx++;
+
+ D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
+
+ if (idx == LDC_NUM_VERS) {
+ /* no version match - send NACK */
+ pkt->stype = LDC_NACK;
+ bzero(pkt->udata, sizeof (ldc_ver_t));
+ ldcp->next_vidx = 0;
+ break;
+ }
+ }
+
+ /* initiate the send by calling into HV and set the new tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv == 0) {
+ ldcp->tx_tail = tx_tail;
+ if (pkt->stype == LDC_ACK) {
+ D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
+ " version ACK\n", ldcp->id);
+ /* Save the ACK'd version */
+ ldcp->version.major = rcvd_ver->major;
+ ldcp->version.minor = rcvd_ver->minor;
+ ldcp->tstate |= TS_VER_DONE;
+ DWARN(DBG_ALL_LDCS,
+ "(0x%llx) Agreed on version v%u.%u\n",
+ ldcp->id, rcvd_ver->major, rcvd_ver->minor);
+ }
+ } else {
+ DWARN(ldcp->id,
+ "i_ldc_process_VER: (0x%llx) error sending "
+ "ACK/NACK\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ break;
+
+ case LDC_ACK:
+ /* SUCCESS - we have agreed on a version */
+ ldcp->version.major = rcvd_ver->major;
+ ldcp->version.minor = rcvd_ver->minor;
+ ldcp->tstate |= TS_VER_DONE;
+
+ D1(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n",
+ ldcp->id, rcvd_ver->major, rcvd_ver->minor);
+
+ /* initiate RTS-RTR-RDX handshake */
+ rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+ ZERO_PKT(pkt);
+
+ pkt->type = LDC_CTRL;
+ pkt->stype = LDC_INFO;
+ pkt->ctrl = LDC_RTS;
+ pkt->env = ldcp->mode;
+ if (ldcp->mode != LDC_MODE_RAW)
+ pkt->seqid = LDC_INIT_SEQID;
+
+ ldcp->last_msg_rcd = LDC_INIT_SEQID;
+
+ DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
+
+ /* initiate the send by calling into HV and set the new tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv) {
+ D2(ldcp->id,
+ "i_ldc_process_VER: (0x%llx) no listener\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ ldcp->last_msg_snt++;
+ ldcp->tx_tail = tx_tail;
+ ldcp->hstate |= TS_SENT_RTS;
+
+ break;
+
+ case LDC_NACK:
+ /* check if version in NACK is zero */
+ if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
+ /* version handshake failure */
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_process_VER: (0x%llx) no version match\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ /* get the current tail and pkt for the response */
+ rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
+ if (rv != 0) {
+ cmn_err(CE_NOTE,
+ "i_ldc_process_VER: (0x%lx) err sending "
+ "version ACK/NACK\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+ ZERO_PKT(pkt);
+
+ /* initialize the packet */
+ pkt->type = LDC_CTRL;
+ pkt->ctrl = LDC_VER;
+ pkt->stype = LDC_INFO;
+
+ /* check ver in NACK msg has a match */
+ for (;;) {
+ if (rcvd_ver->major == ldc_versions[idx].major) {
+ /*
+ * major version match - resubmit request
+ * if lower minor version to the one this endpt
+ * supports, if necessary
+ */
+ if (rcvd_ver->minor > ldc_versions[idx].minor)
+ rcvd_ver->minor =
+ ldc_versions[idx].minor;
+ bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
+ break;
+
+ }
+
+ if (rcvd_ver->major > ldc_versions[idx].major) {
+
+ D1(ldcp->id, "i_ldc_process_VER: using next"
+ " lower idx=%d, v%u.%u\n", idx,
+ ldc_versions[idx].major,
+ ldc_versions[idx].minor);
+
+ /* send next lower version */
+ bcopy(&ldc_versions[idx], pkt->udata,
+ sizeof (ldc_versions[idx]));
+ ldcp->next_vidx = idx;
+ break;
+ }
+
+ /* next version */
+ idx++;
+
+ D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
+
+ if (idx == LDC_NUM_VERS) {
+ /* no version match - terminate */
+ ldcp->next_vidx = 0;
+ return (ECONNRESET);
+ }
+ }
+
+ /* initiate the send by calling into HV and set the new tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv == 0) {
+ D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
+ "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
+ ldc_versions[idx].minor);
+ ldcp->tx_tail = tx_tail;
+ } else {
+ cmn_err(CE_NOTE,
+ "i_ldc_process_VER: (0x%lx) error sending version"
+ "INFO\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ break;
+ }
+
+ return (rv);
+}
+
+
+/*
+ * Process an incoming RTS ctrl message
+ */
+static int
+i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ int rv = 0;
+ ldc_msg_t *pkt;
+ uint64_t tx_tail;
+ boolean_t sent_NACK = B_FALSE;
+
+ D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
+
+ switch (msg->stype) {
+ case LDC_NACK:
+ DWARN(ldcp->id,
+ "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
+ ldcp->id);
+
+ /* Reset the channel -- as we cannot continue */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+ break;
+
+ case LDC_INFO:
+
+ /* check mode */
+ if (ldcp->mode != (ldc_mode_t)msg->env) {
+ cmn_err(CE_NOTE,
+ "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
+ ldcp->id);
+ /*
+ * send NACK in response to MODE message
+ * get the current tail for the response
+ */
+ rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
+ if (rv) {
+ /* if cannot send NACK - reset channel */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+ break;
+ }
+ sent_NACK = B_TRUE;
+ }
+ break;
+ default:
+ DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+ break;
+ }
+
+ /*
+ * If either the connection was reset (when rv != 0) or
+ * a NACK was sent, we return. In the case of a NACK
+ * we dont want to consume the packet that came in but
+ * not record that we received the RTS
+ */
+ if (rv || sent_NACK)
+ return (rv);
+
+ /* record RTS received */
+ ldcp->hstate |= TS_RCVD_RTS;
+
+ /* store initial SEQID info */
+ ldcp->last_msg_snt = msg->seqid;
+
+ /* get the current tail for the response */
+ rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
+ if (rv != 0) {
+ cmn_err(CE_NOTE,
+ "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+ ZERO_PKT(pkt);
+
+ /* initialize the packet */
+ pkt->type = LDC_CTRL;
+ pkt->stype = LDC_INFO;
+ pkt->ctrl = LDC_RTR;
+ pkt->env = ldcp->mode;
+ if (ldcp->mode != LDC_MODE_RAW)
+ pkt->seqid = LDC_INIT_SEQID;
+
+ ldcp->last_msg_rcd = msg->seqid;
+
+ /* initiate the send by calling into HV and set the new tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv == 0) {
+ D2(ldcp->id,
+ "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
+ DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
+
+ ldcp->tx_tail = tx_tail;
+ ldcp->hstate |= TS_SENT_RTR;
+
+ } else {
+ cmn_err(CE_NOTE,
+ "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ return (0);
+}
+
+/*
+ * Process an incoming RTR ctrl message
+ */
+static int
+i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ int rv = 0;
+ boolean_t sent_NACK = B_FALSE;
+
+ D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
+
+ switch (msg->stype) {
+ case LDC_NACK:
+ /* RTR NACK received */
+ DWARN(ldcp->id,
+ "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
+ ldcp->id);
+
+ /* Reset the channel -- as we cannot continue */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+
+ break;
+
+ case LDC_INFO:
+
+ /* check mode */
+ if (ldcp->mode != (ldc_mode_t)msg->env) {
+ DWARN(ldcp->id,
+ "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
+ ldcp->id);
+ /*
+ * send NACK in response to MODE message
+ * get the current tail for the response
+ */
+ rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
+ if (rv) {
+ /* if cannot send NACK - reset channel */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+ break;
+ }
+ sent_NACK = B_TRUE;
+ }
+ break;
+
+ default:
+ DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
+ ldcp->id);
+
+ /* Reset the channel -- as we cannot continue */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+ break;
+ }
+
+ /*
+ * If either the connection was reset (when rv != 0) or
+ * a NACK was sent, we return. In the case of a NACK
+ * we dont want to consume the packet that came in but
+ * not record that we received the RTR
+ */
+ if (rv || sent_NACK)
+ return (rv);
+
+ ldcp->last_msg_snt = msg->seqid;
+ ldcp->hstate |= TS_RCVD_RTR;
+
+ rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
+ if (rv) {
+ cmn_err(CE_NOTE,
+ "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+ D2(ldcp->id,
+ "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
+
+ ldcp->hstate |= TS_SENT_RDX;
+ ldcp->tstate |= TS_HSHAKE_DONE;
+ ldcp->status = LDC_UP;
+
+ DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
+
+ return (0);
+}
+
+
+/*
+ * Process an incoming RDX ctrl message
+ */
+static int
+i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ int rv = 0;
+
+ D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
+
+ switch (msg->stype) {
+ case LDC_NACK:
+ /* RDX NACK received */
+ DWARN(ldcp->id,
+ "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
+ ldcp->id);
+
+ /* Reset the channel -- as we cannot continue */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+
+ break;
+
+ case LDC_INFO:
+
+ /*
+ * if channel is UP and a RDX received after data transmission
+ * has commenced it is an error
+ */
+ if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_process_RDX: (0x%llx) unexpected RDX"
+ " - LDC reset\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ ldcp->hstate |= TS_RCVD_RDX;
+ ldcp->tstate |= TS_HSHAKE_DONE;
+ ldcp->status = LDC_UP;
+
+ D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
+ break;
+
+ default:
+ DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
+ ldcp->id);
+
+ /* Reset the channel -- as we cannot continue */
+ i_ldc_reset(ldcp);
+ rv = ECONNRESET;
+ break;
+ }
+
+ return (rv);
+}
+
+/*
+ * Process an incoming ACK for a data packet
+ */
+static int
+i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ int rv;
+ uint64_t tx_head;
+ ldc_msg_t *pkt;
+
+ /*
+ * Read the curret Tx head and tail
+ */
+ rv = hv_ldc_tx_get_state(ldcp->id,
+ &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
+ ldcp->id);
+ return (0);
+ }
+
+ /*
+ * loop from where the previous ACK location was to the
+ * current head location. This is how far the HV has
+ * actually send pkts. Pkts between head and tail are
+ * yet to be sent by HV.
+ */
+ tx_head = ldcp->tx_ackd_head;
+ for (;;) {
+ pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
+ tx_head = (tx_head + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ if (pkt->seqid == msg->ackid) {
+ D2(ldcp->id,
+ "i_ldc_process_data_ACK: (0x%llx) found packet\n",
+ ldcp->id);
+ ldcp->last_ack_rcd = msg->ackid;
+ ldcp->tx_ackd_head = tx_head;
+ break;
+ }
+ if (tx_head == ldcp->tx_head) {
+ /* could not find packet */
+ DWARN(ldcp->id,
+ "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
+ ldcp->id);
+ break;
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Process incoming control message
+ * Return 0 - session can continue
+ * EAGAIN - reprocess packet - state was changed
+ * ECONNRESET - channel was reset
+ */
+static int
+i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
+{
+ int rv = 0;
+
+ switch (ldcp->tstate) {
+
+ case TS_OPEN:
+ case TS_READY:
+
+ switch (msg->ctrl & LDC_CTRL_MASK) {
+ case LDC_VER:
+ /* process version message */
+ rv = i_ldc_process_VER(ldcp, msg);
+ break;
+ default:
+ DWARN(ldcp->id,
+ "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
+ "tstate=0x%x\n", ldcp->id,
+ (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
+ break;
+ }
+
+ break;
+
+ case TS_VREADY:
+
+ switch (msg->ctrl & LDC_CTRL_MASK) {
+ case LDC_VER:
+ /* peer is redoing version negotiation */
+ (void) i_ldc_txq_reconf(ldcp);
+ i_ldc_reset_state(ldcp);
+ rv = EAGAIN;
+ break;
+ case LDC_RTS:
+ /* process RTS message */
+ rv = i_ldc_process_RTS(ldcp, msg);
+ break;
+ case LDC_RTR:
+ /* process RTR message */
+ rv = i_ldc_process_RTR(ldcp, msg);
+ break;
+ case LDC_RDX:
+ /* process RDX message */
+ rv = i_ldc_process_RDX(ldcp, msg);
+ break;
+ default:
+ DWARN(ldcp->id,
+ "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
+ "tstate=0x%x\n", ldcp->id,
+ (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
+ break;
+ }
+
+ break;
+
+ case TS_UP:
+
+ switch (msg->ctrl & LDC_CTRL_MASK) {
+ case LDC_VER:
+ DWARN(ldcp->id,
+ "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
+ "- LDC reset\n", ldcp->id);
+ /* peer is redoing version negotiation */
+ (void) i_ldc_txq_reconf(ldcp);
+ i_ldc_reset_state(ldcp);
+ rv = EAGAIN;
+ break;
+
+ case LDC_RDX:
+ /* process RDX message */
+ rv = i_ldc_process_RDX(ldcp, msg);
+ break;
+
+ default:
+ DWARN(ldcp->id,
+ "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
+ "tstate=0x%x\n", ldcp->id,
+ (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
+ break;
+ }
+ }
+
+ return (rv);
+}
+
+/*
+ * Register channel with the channel nexus
+ */
+static int
+i_ldc_register_channel(ldc_chan_t *ldcp)
+{
+ int rv = 0;
+ ldc_cnex_t *cinfo = &ldcssp->cinfo;
+
+ if (cinfo->dip == NULL) {
+ DWARN(ldcp->id,
+ "i_ldc_register_channel: cnex has not registered\n");
+ return (EAGAIN);
+ }
+
+ rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_register_channel: cannot register channel\n");
+ return (rv);
+ }
+
+ rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
+ i_ldc_tx_hdlr, ldcp, NULL);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_register_channel: cannot add Tx interrupt\n");
+ (void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
+ return (rv);
+ }
+
+ rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
+ i_ldc_rx_hdlr, ldcp, NULL);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_register_channel: cannot add Rx interrupt\n");
+ (void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
+ (void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
+ return (rv);
+ }
+
+ ldcp->tstate |= TS_CNEX_RDY;
+
+ return (0);
+}
+
+/*
+ * Unregister a channel with the channel nexus
+ */
+static int
+i_ldc_unregister_channel(ldc_chan_t *ldcp)
+{
+ int rv = 0;
+ ldc_cnex_t *cinfo = &ldcssp->cinfo;
+
+ if (cinfo->dip == NULL) {
+ DWARN(ldcp->id,
+ "i_ldc_unregister_channel: cnex has not registered\n");
+ return (EAGAIN);
+ }
+
+ if (ldcp->tstate & TS_CNEX_RDY) {
+
+ rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_unregister_channel: err removing Rx intr\n");
+ }
+ rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_unregister_channel: err removing Tx intr\n");
+ }
+ rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_unregister_channel: cannot unreg channel\n");
+ }
+
+ ldcp->tstate &= ~TS_CNEX_RDY;
+ }
+
+ return (0);
+}
+
+
+/*
+ * LDC transmit interrupt handler
+ * triggered for chanel up/down/reset events
+ * and Tx queue content changes
+ */
+static uint_t
+i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
+{
+ _NOTE(ARGUNUSED(arg2))
+
+ int rv;
+ ldc_chan_t *ldcp;
+ boolean_t notify_client = B_FALSE;
+ uint64_t notify_event = 0;
+
+ /* Get the channel for which interrupt was received */
+ ASSERT(arg1 != NULL);
+ ldcp = (ldc_chan_t *)arg1;
+
+ D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
+ ldcp->id, ldcp);
+
+ /* Lock channel */
+ mutex_enter(&ldcp->lock);
+
+ rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
+ &ldcp->link_state);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
+ ldcp->id, rv);
+ mutex_exit(&ldcp->lock);
+ return (DDI_INTR_CLAIMED);
+ }
+
+ /*
+ * reset the channel state if the channel went down
+ * (other side unconfigured queue) or channel was reset
+ * (other side reconfigured its queue)
+ */
+ if (ldcp->link_state == LDC_CHANNEL_DOWN) {
+ D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_DOWN;
+ }
+
+ if (ldcp->link_state == LDC_CHANNEL_RESET) {
+ D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_RESET;
+ }
+
+ if (ldcp->tstate == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) {
+ D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_RESET;
+ ldcp->tstate |= TS_LINK_READY;
+ ldcp->status = LDC_READY;
+ }
+
+ /* if callbacks are disabled, do not notify */
+ if (!ldcp->cb_enabled)
+ notify_client = B_FALSE;
+
+ if (notify_client)
+ ldcp->cb_inprogress = B_TRUE;
+
+ /* Unlock channel */
+ mutex_exit(&ldcp->lock);
+
+ if (notify_client) {
+ rv = ldcp->cb(notify_event, ldcp->cb_arg);
+ if (rv) {
+ DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
+ "failure", ldcp->id);
+ }
+ mutex_enter(&ldcp->lock);
+ ldcp->cb_inprogress = B_FALSE;
+ mutex_exit(&ldcp->lock);
+ }
+
+ mutex_enter(&ldcp->lock);
+ i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
+ mutex_exit(&ldcp->lock);
+
+ D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
+
+ return (DDI_INTR_CLAIMED);
+}
+
+/*
+ * LDC receive interrupt handler
+ * triggered for channel with data pending to read
+ * i.e. Rx queue content changes
+ */
+static uint_t
+i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
+{
+ _NOTE(ARGUNUSED(arg2))
+
+ int rv;
+ uint64_t rx_head, rx_tail;
+ ldc_msg_t *msg;
+ ldc_chan_t *ldcp;
+ boolean_t notify_client = B_FALSE;
+ uint64_t notify_event = 0;
+
+ /* Get the channel for which interrupt was received */
+ if (arg1 == NULL) {
+ cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ ldcp = (ldc_chan_t *)arg1;
+
+ D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
+ ldcp->id, ldcp);
+
+ /* Lock channel */
+ mutex_enter(&ldcp->lock);
+
+ /* mark interrupt as pending */
+ ldcp->intr_pending = B_TRUE;
+
+ /*
+ * Read packet(s) from the queue
+ */
+ for (;;) {
+
+ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
+ &ldcp->link_state);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "i_ldc_rx_hdlr: (0x%lx) cannot read "
+ "queue ptrs, rv=0x%d\n", ldcp->id, rv);
+ i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
+ mutex_exit(&ldcp->lock);
+ return (DDI_INTR_CLAIMED);
+ }
+
+ /*
+ * reset the channel state if the channel went down
+ * (other side unconfigured queue) or channel was reset
+ * (other side reconfigured its queue
+ */
+ if (ldcp->link_state == LDC_CHANNEL_DOWN) {
+ D1(ldcp->id, "i_ldc_rx_hdlr: channel link down\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_DOWN;
+ break;
+ }
+ if (ldcp->link_state == LDC_CHANNEL_RESET) {
+ D1(ldcp->id, "i_ldc_rx_hdlr: channel link reset\n",
+ ldcp->id);
+ i_ldc_reset(ldcp);
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_RESET;
+ }
+
+ if (ldcp->tstate == TS_OPEN &&
+ ldcp->link_state == LDC_CHANNEL_UP) {
+ D1(ldcp->id, "i_ldc_rx_hdlr: channel link up\n",
+ ldcp->id);
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_RESET;
+ ldcp->tstate |= TS_LINK_READY;
+ ldcp->status = LDC_READY;
+ }
+
+ if (rx_head == rx_tail) {
+ D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
+ ldcp->id);
+ break;
+ }
+ D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
+ rx_head, rx_tail);
+ DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
+ ldcp->rx_q_va + rx_head);
+
+ /* get the message */
+ msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
+
+ /* if channel is in RAW mode or data pkt, notify and return */
+ if (ldcp->mode == LDC_MODE_RAW) {
+ notify_client = B_TRUE;
+ notify_event |= LDC_EVT_READ;
+ break;
+ }
+
+ if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
+
+ /* discard packet if channel is not up */
+ if (ldcp->tstate != TS_UP) {
+
+ /* move the head one position */
+ rx_head = (rx_head + LDC_PACKET_SIZE) %
+ (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
+
+ if (rv = i_ldc_set_rx_head(ldcp, rx_head))
+ break;
+
+ continue;
+ } else {
+ notify_client = B_TRUE;
+ notify_event |= LDC_EVT_READ;
+ break;
+ }
+ }
+
+ /* Check the sequence ID for the message received */
+ if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
+
+ DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
+ "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
+
+ /* Reset last_msg_rcd to start of message */
+ if (ldcp->first_fragment != 0) {
+ ldcp->last_msg_rcd =
+ ldcp->first_fragment - 1;
+ ldcp->first_fragment = 0;
+ }
+ /*
+ * Send a NACK due to seqid mismatch
+ */
+ rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
+ (msg->ctrl & LDC_CTRL_MASK));
+
+ if (rv) {
+ cmn_err(CE_NOTE,
+ "i_ldc_rx_hdlr: (0x%lx) err sending "
+ "CTRL/NACK msg\n", ldcp->id);
+ }
+
+ /* purge receive queue */
+ (void) i_ldc_set_rx_head(ldcp, rx_tail);
+ break;
+ }
+
+ /* record the message ID */
+ ldcp->last_msg_rcd = msg->seqid;
+
+ /* process control messages */
+ if (msg->type & LDC_CTRL) {
+ /* save current internal state */
+ uint64_t tstate = ldcp->tstate;
+
+ rv = i_ldc_ctrlmsg(ldcp, msg);
+ if (rv == EAGAIN) {
+ /* re-process pkt - state was adjusted */
+ continue;
+ }
+ if (rv == ECONNRESET) {
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_RESET;
+ break;
+ }
+
+ /*
+ * control message processing was successful
+ * channel transitioned to ready for communication
+ */
+ if (rv == 0 && ldcp->tstate == TS_UP &&
+ tstate != ldcp->tstate) {
+ notify_client = B_TRUE;
+ notify_event = LDC_EVT_UP;
+ }
+ }
+
+ /* process data ACKs */
+ if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
+ (void) i_ldc_process_data_ACK(ldcp, msg);
+ }
+
+ /* move the head one position */
+ rx_head = (rx_head + LDC_PACKET_SIZE) %
+ (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
+ if (rv = i_ldc_set_rx_head(ldcp, rx_head))
+ break;
+
+ } /* for */
+
+ /* if callbacks are disabled, do not notify */
+ if (!ldcp->cb_enabled)
+ notify_client = B_FALSE;
+
+ if (notify_client)
+ ldcp->cb_inprogress = B_TRUE;
+
+ /* Unlock channel */
+ mutex_exit(&ldcp->lock);
+
+ if (notify_client) {
+ rv = ldcp->cb(notify_event, ldcp->cb_arg);
+ if (rv) {
+ DWARN(ldcp->id,
+ "i_ldc_rx_hdlr: (0x%llx) callback failure",
+ ldcp->id);
+ }
+ mutex_enter(&ldcp->lock);
+ ldcp->cb_inprogress = B_FALSE;
+ mutex_exit(&ldcp->lock);
+ }
+
+ mutex_enter(&ldcp->lock);
+
+ /*
+ * If there are data packets in the queue, the ldc_read will
+ * clear interrupts after draining the queue, else clear interrupts
+ */
+ if ((notify_event & LDC_EVT_READ) == 0) {
+ i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
+ return (DDI_INTR_CLAIMED);
+}
+
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * LDC API functions
+ */
+
+/*
+ * Initialize the channel. Allocate internal structure and memory for
+ * TX/RX queues, and initialize locks.
+ */
+int
+ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
+{
+ ldc_chan_t *ldcp;
+ int rv, exit_val;
+ uint64_t ra_base, nentries;
+
+ exit_val = EINVAL; /* guarantee an error if exit on failure */
+
+ if (attr == NULL) {
+ DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
+ return (EINVAL);
+ }
+ if (handle == NULL) {
+ DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
+ return (EINVAL);
+ }
+
+ /* check if channel is valid */
+ rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
+ if (rv == H_ECHANNEL) {
+ DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
+ return (EINVAL);
+ }
+
+ /* check if the channel has already been initialized */
+ mutex_enter(&ldcssp->lock);
+ ldcp = ldcssp->chan_list;
+ while (ldcp != NULL) {
+ if (ldcp->id == id) {
+ DWARN(id, "ldc_init: (0x%llx) already initialized\n",
+ id);
+ mutex_exit(&ldcssp->lock);
+ return (EADDRINUSE);
+ }
+ ldcp = ldcp->next;
+ }
+ mutex_exit(&ldcssp->lock);
+
+ ASSERT(ldcp == NULL);
+
+ *handle = 0;
+
+ /* Allocate an ldcp structure */
+ ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
+
+ /* Initialize the channel lock */
+ mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Channel specific processing */
+ mutex_enter(&ldcp->lock);
+
+ /* Initialize the channel */
+ ldcp->id = id;
+ ldcp->cb = NULL;
+ ldcp->cb_arg = NULL;
+ ldcp->cb_inprogress = B_FALSE;
+ ldcp->cb_enabled = B_FALSE;
+ ldcp->next = NULL;
+
+ /* Read attributes */
+ ldcp->mode = attr->mode;
+ ldcp->devclass = attr->devclass;
+ ldcp->devinst = attr->instance;
+
+ ldcp->rx_q_entries =
+ (attr->qlen > 0) ? attr->qlen : LDC_QUEUE_ENTRIES;
+ ldcp->tx_q_entries = ldcp->rx_q_entries;
+
+ D1(ldcp->id,
+ "ldc_init: (0x%llx) channel attributes, class=0x%x, "
+ "instance=0x%llx,mode=%d, qlen=%d\n",
+ ldcp->id, ldcp->devclass, ldcp->devinst,
+ ldcp->mode, ldcp->rx_q_entries);
+
+ ldcp->next_vidx = 0;
+ ldcp->tstate = 0;
+ ldcp->hstate = 0;
+ ldcp->last_msg_snt = LDC_INIT_SEQID;
+ ldcp->last_ack_rcd = 0;
+ ldcp->last_msg_rcd = 0;
+
+ ldcp->stream_bufferp = NULL;
+ ldcp->exp_dring_list = NULL;
+ ldcp->imp_dring_list = NULL;
+ ldcp->mhdl_list = NULL;
+
+ /* Initialize payload size depending on whether channel is reliable */
+ switch (ldcp->mode) {
+ case LDC_MODE_RAW:
+ ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
+ ldcp->read_p = i_ldc_read_raw;
+ ldcp->write_p = i_ldc_write_raw;
+ ldcp->mtu = 0;
+ break;
+ case LDC_MODE_UNRELIABLE:
+ ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
+ ldcp->read_p = i_ldc_read_packet;
+ ldcp->write_p = i_ldc_write_packet;
+ ldcp->mtu = 0;
+ break;
+ case LDC_MODE_RELIABLE:
+ ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
+ ldcp->read_p = i_ldc_read_packet;
+ ldcp->write_p = i_ldc_write_packet;
+ ldcp->mtu = 0;
+ break;
+ case LDC_MODE_STREAM:
+ ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
+
+ ldcp->stream_remains = 0;
+ ldcp->stream_offset = 0;
+ ldcp->mtu = LDC_STREAM_MTU;
+ ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
+ ldcp->read_p = i_ldc_read_stream;
+ ldcp->write_p = i_ldc_write_stream;
+ break;
+ default:
+ exit_val = EINVAL;
+ goto cleanup_on_exit;
+ }
+
+ /* Create a transmit queue */
+ ldcp->tx_q_va = (uint64_t)
+ contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+ if (ldcp->tx_q_va == NULL) {
+ cmn_err(CE_WARN,
+ "ldc_init: (0x%lx) TX queue allocation failed\n",
+ ldcp->id);
+ exit_val = ENOMEM;
+ goto cleanup_on_exit;
+ }
+ ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
+
+ D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
+ ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
+
+ ldcp->tstate |= TS_TXQ_RDY;
+
+ /* Create a receive queue */
+ ldcp->rx_q_va = (uint64_t)
+ contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
+ if (ldcp->rx_q_va == NULL) {
+ cmn_err(CE_WARN,
+ "ldc_init: (0x%lx) RX queue allocation failed\n",
+ ldcp->id);
+ exit_val = ENOMEM;
+ goto cleanup_on_exit;
+ }
+ ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
+
+ D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
+ ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
+
+ ldcp->tstate |= TS_RXQ_RDY;
+
+ /* Init descriptor ring and memory handle list lock */
+ mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* mark status as INITialized */
+ ldcp->status = LDC_INIT;
+
+ mutex_exit(&ldcp->lock);
+
+ /* Add to channel list */
+ mutex_enter(&ldcssp->lock);
+ ldcp->next = ldcssp->chan_list;
+ ldcssp->chan_list = ldcp;
+ ldcssp->channel_count++;
+ mutex_exit(&ldcssp->lock);
+
+ /* set the handle */
+ *handle = (ldc_handle_t)ldcp;
+
+ D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
+
+ return (0);
+
+cleanup_on_exit:
+
+ if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
+ kmem_free(ldcp->stream_bufferp, ldcp->mtu);
+
+ if (ldcp->tstate & TS_TXQ_RDY)
+ contig_mem_free((caddr_t)ldcp->tx_q_va,
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
+
+ if (ldcp->tstate & TS_RXQ_RDY)
+ contig_mem_free((caddr_t)ldcp->rx_q_va,
+ (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
+
+ mutex_exit(&ldcp->lock);
+ mutex_destroy(&ldcp->lock);
+
+ if (ldcp)
+ kmem_free(ldcp, sizeof (ldc_chan_t));
+
+ return (exit_val);
+}
+
+/*
+ * Finalizes the LDC connection. It will return EBUSY if the
+ * channel is open. A ldc_close() has to be done prior to
+ * a ldc_fini operation. It frees TX/RX queues, associated
+ * with the channel
+ */
+int
+ldc_fini(ldc_handle_t handle)
+{
+ ldc_chan_t *ldcp;
+ ldc_chan_t *tmp_ldcp;
+ uint64_t id;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+ id = ldcp->id;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->tstate > TS_INIT) {
+ DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EBUSY);
+ }
+
+ /* Remove from the channel list */
+ mutex_enter(&ldcssp->lock);
+ tmp_ldcp = ldcssp->chan_list;
+ if (tmp_ldcp == ldcp) {
+ ldcssp->chan_list = ldcp->next;
+ ldcp->next = NULL;
+ } else {
+ while (tmp_ldcp != NULL) {
+ if (tmp_ldcp->next == ldcp) {
+ tmp_ldcp->next = ldcp->next;
+ ldcp->next = NULL;
+ break;
+ }
+ tmp_ldcp = tmp_ldcp->next;
+ }
+ if (tmp_ldcp == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
+ mutex_exit(&ldcssp->lock);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+ }
+
+ ldcssp->channel_count--;
+
+ mutex_exit(&ldcssp->lock);
+
+ /* Free the map table for this channel */
+ if (ldcp->mtbl) {
+ (void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
+ contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
+ mutex_destroy(&ldcp->mtbl->lock);
+ kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
+ }
+
+ /* Destroy descriptor ring and memory handle list lock */
+ mutex_destroy(&ldcp->exp_dlist_lock);
+ mutex_destroy(&ldcp->imp_dlist_lock);
+ mutex_destroy(&ldcp->mlist_lock);
+
+ /* Free the stream buffer for STREAM_MODE */
+ if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
+ kmem_free(ldcp->stream_bufferp, ldcp->mtu);
+
+ /* Free the RX queue */
+ contig_mem_free((caddr_t)ldcp->rx_q_va,
+ (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
+ ldcp->tstate &= ~TS_RXQ_RDY;
+
+ /* Free the TX queue */
+ contig_mem_free((caddr_t)ldcp->tx_q_va,
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
+ ldcp->tstate &= ~TS_TXQ_RDY;
+
+
+ mutex_exit(&ldcp->lock);
+
+ /* Destroy mutex */
+ mutex_destroy(&ldcp->lock);
+
+ /* free channel structure */
+ kmem_free(ldcp, sizeof (ldc_chan_t));
+
+ D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
+
+ return (0);
+}
+
+/*
+ * Open the LDC channel for use. It registers the TX/RX queues
+ * with the Hypervisor. It also specifies the interrupt number
+ * and target CPU for this channel
+ */
+int
+ldc_open(ldc_handle_t handle)
+{
+ ldc_chan_t *ldcp;
+ int rv;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
+ return (EINVAL);
+ }
+
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->tstate < TS_INIT) {
+ DWARN(ldcp->id,
+ "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EFAULT);
+ }
+ if (ldcp->tstate >= TS_OPEN) {
+ DWARN(ldcp->id,
+ "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EFAULT);
+ }
+
+ /*
+ * Unregister/Register the tx queue with the hypervisor
+ */
+ rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_open: (0x%lx) channel tx queue unconf failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_open: (0x%lx) channel tx queue conf failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
+ ldcp->id);
+
+ /*
+ * Unregister/Register the rx queue with the hypervisor
+ */
+ rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_open: (0x%lx) channel rx queue unconf failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_open: (0x%lx) channel rx queue conf failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
+ ldcp->id);
+
+ ldcp->tstate |= TS_QCONF_RDY;
+
+ /* Register the channel with the channel nexus */
+ rv = i_ldc_register_channel(ldcp);
+ if (rv && rv != EAGAIN) {
+ cmn_err(CE_WARN,
+ "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
+ (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
+ (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ /* mark channel in OPEN state */
+ ldcp->status = LDC_OPEN;
+
+ /* Read channel state */
+ rv = hv_ldc_tx_get_state(ldcp->id,
+ &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_open: (0x%lx) cannot read channel state\n",
+ ldcp->id);
+ (void) i_ldc_unregister_channel(ldcp);
+ (void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
+ (void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ /*
+ * set the ACKd head to current head location for reliable &
+ * streaming mode
+ */
+ ldcp->tx_ackd_head = ldcp->tx_head;
+
+ /* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
+ if (ldcp->link_state == LDC_CHANNEL_UP ||
+ ldcp->link_state == LDC_CHANNEL_RESET) {
+ ldcp->tstate |= TS_LINK_READY;
+ ldcp->status = LDC_READY;
+ }
+
+ /*
+ * if channel is being opened in RAW mode - no handshake is needed
+ * switch the channel READY and UP state
+ */
+ if (ldcp->mode == LDC_MODE_RAW) {
+ ldcp->tstate = TS_UP; /* set bits associated with LDC UP */
+ ldcp->status = LDC_UP;
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ /*
+ * Increment number of open channels
+ */
+ mutex_enter(&ldcssp->lock);
+ ldcssp->channels_open++;
+ mutex_exit(&ldcssp->lock);
+
+ D1(ldcp->id,
+ "ldc_open: (0x%llx) channel (0x%p) open for use (tstate=0x%x)\n",
+ ldcp->id, ldcp, ldcp->tstate);
+
+ return (0);
+}
+
+/*
+ * Close the LDC connection. It will return EBUSY if there
+ * are memory segments or descriptor rings either bound to or
+ * mapped over the channel
+ */
+int
+ldc_close(ldc_handle_t handle)
+{
+ ldc_chan_t *ldcp;
+ int rv = 0;
+ boolean_t chk_done = B_FALSE;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ /* return error if channel is not open */
+ if (ldcp->tstate < TS_OPEN) {
+ DWARN(ldcp->id,
+ "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EFAULT);
+ }
+
+ /* if any memory handles, drings, are bound or mapped cannot close */
+ if (ldcp->mhdl_list != NULL) {
+ DWARN(ldcp->id,
+ "ldc_close: (0x%llx) channel has bound memory handles\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EBUSY);
+ }
+ if (ldcp->exp_dring_list != NULL) {
+ DWARN(ldcp->id,
+ "ldc_close: (0x%llx) channel has bound descriptor rings\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EBUSY);
+ }
+ if (ldcp->imp_dring_list != NULL) {
+ DWARN(ldcp->id,
+ "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EBUSY);
+ }
+
+ /*
+ * Wait for pending transmits to complete i.e Tx queue to drain
+ * if there are pending pkts - wait 1 ms and retry again
+ */
+ for (;;) {
+
+ rv = hv_ldc_tx_get_state(ldcp->id,
+ &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ if (ldcp->tx_head == ldcp->tx_tail ||
+ ldcp->link_state != LDC_CHANNEL_UP) {
+ break;
+ }
+
+ if (chk_done) {
+ DWARN(ldcp->id,
+ "ldc_close: (0x%llx) Tx queue drain timeout\n",
+ ldcp->id);
+ break;
+ }
+
+ /* wait for one ms and try again */
+ delay(drv_usectohz(1000));
+ chk_done = B_TRUE;
+ }
+
+ /*
+ * Unregister the channel with the nexus
+ */
+ rv = i_ldc_unregister_channel(ldcp);
+ if (rv && rv != EAGAIN) {
+ cmn_err(CE_WARN,
+ "ldc_close: (0x%lx) channel unregister failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (rv);
+ }
+
+ /*
+ * Unregister queues
+ */
+ rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_close: (0x%lx) channel TX queue unconf failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+ rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
+ if (rv) {
+ cmn_err(CE_WARN,
+ "ldc_close: (0x%lx) channel RX queue unconf failed\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ ldcp->tstate &= ~TS_QCONF_RDY;
+
+ /* Reset channel state information */
+ i_ldc_reset_state(ldcp);
+
+ /* Mark channel as down and in initialized state */
+ ldcp->tx_ackd_head = 0;
+ ldcp->tx_head = 0;
+ ldcp->tstate = TS_INIT;
+ ldcp->status = LDC_INIT;
+
+ mutex_exit(&ldcp->lock);
+
+ /* Decrement number of open channels */
+ mutex_enter(&ldcssp->lock);
+ ldcssp->channels_open--;
+ mutex_exit(&ldcssp->lock);
+
+ D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
+
+ return (0);
+}
+
+/*
+ * Register channel callback
+ */
+int
+ldc_reg_callback(ldc_handle_t handle,
+ uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
+{
+ ldc_chan_t *ldcp;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_reg_callback: invalid channel handle\n");
+ return (EINVAL);
+ }
+ if (((uint64_t)cb) < KERNELBASE) {
+ DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->cb) {
+ DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+ if (ldcp->cb_inprogress) {
+ DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EWOULDBLOCK);
+ }
+
+ ldcp->cb = cb;
+ ldcp->cb_arg = arg;
+ ldcp->cb_enabled = B_TRUE;
+
+ D1(ldcp->id,
+ "ldc_reg_callback: (0x%llx) registered callback for channel\n",
+ ldcp->id);
+
+ mutex_exit(&ldcp->lock);
+
+ return (0);
+}
+
+/*
+ * Unregister channel callback
+ */
+int
+ldc_unreg_callback(ldc_handle_t handle)
+{
+ ldc_chan_t *ldcp;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_unreg_callback: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->cb == NULL) {
+ DWARN(ldcp->id,
+ "ldc_unreg_callback: (0x%llx) no callback exists\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+ if (ldcp->cb_inprogress) {
+ DWARN(ldcp->id,
+ "ldc_unreg_callback: (0x%llx) callback active\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EWOULDBLOCK);
+ }
+
+ ldcp->cb = NULL;
+ ldcp->cb_arg = NULL;
+ ldcp->cb_enabled = B_FALSE;
+
+ D1(ldcp->id,
+ "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
+ ldcp->id);
+
+ mutex_exit(&ldcp->lock);
+
+ return (0);
+}
+
+
+/*
+ * Bring a channel up by initiating a handshake with the peer
+ * This call is asynchronous. It will complete at a later point
+ * in time when the peer responds back with an RTR.
+ */
+int
+ldc_up(ldc_handle_t handle)
+{
+ int rv;
+ ldc_chan_t *ldcp;
+ ldc_msg_t *ldcmsg;
+ uint64_t tx_tail;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->tstate == TS_UP) {
+ D2(ldcp->id,
+ "ldc_up: (0x%llx) channel is already in UP state\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (0);
+ }
+
+ /* if the channel is in RAW mode - mark it as UP, if READY */
+ if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
+ ldcp->tstate = TS_UP;
+ mutex_exit(&ldcp->lock);
+ return (0);
+ }
+
+ /* Don't start another handshake if there is one in progress */
+ if (ldcp->hstate) {
+ D2(ldcp->id,
+ "ldc_up: (0x%llx) channel handshake in progress\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (0);
+ }
+
+ /* get the current tail for the LDC msg */
+ rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
+ if (rv) {
+ DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (ECONNREFUSED);
+ }
+
+ ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+ ZERO_PKT(ldcmsg);
+
+ ldcmsg->type = LDC_CTRL;
+ ldcmsg->stype = LDC_INFO;
+ ldcmsg->ctrl = LDC_VER;
+ ldcp->next_vidx = 0;
+ bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
+
+ DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
+
+ /* initiate the send by calling into HV and set the new tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv) {
+ DWARN(ldcp->id,
+ "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
+ ldcp->id, rv);
+ mutex_exit(&ldcp->lock);
+ return (rv);
+ }
+
+ ldcp->tx_tail = tx_tail;
+ D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
+
+ mutex_exit(&ldcp->lock);
+
+ return (rv);
+}
+
+
+/*
+ * Reset a channel by re-registering the Rx queues
+ */
+int
+ldc_reset(ldc_handle_t handle)
+{
+ ldc_chan_t *ldcp;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_reset: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+ i_ldc_reset(ldcp);
+ mutex_exit(&ldcp->lock);
+
+ return (0);
+}
+
+/*
+ * Get the current channel status
+ */
+int
+ldc_status(ldc_handle_t handle, ldc_status_t *status)
+{
+ ldc_chan_t *ldcp;
+
+ if (handle == NULL || status == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ *status = ((ldc_chan_t *)handle)->status;
+
+ D1(ldcp->id,
+ "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
+ return (0);
+}
+
+
+/*
+ * Set the channel's callback mode - enable/disable callbacks
+ */
+int
+ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
+{
+ ldc_chan_t *ldcp;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_set_intr_mode: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ /*
+ * Record no callbacks should be invoked
+ */
+ mutex_enter(&ldcp->lock);
+
+ switch (cmode) {
+ case LDC_CB_DISABLE:
+ if (!ldcp->cb_enabled) {
+ DWARN(ldcp->id,
+ "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
+ ldcp->id);
+ break;
+ }
+ ldcp->cb_enabled = B_FALSE;
+
+ D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
+ ldcp->id);
+ break;
+
+ case LDC_CB_ENABLE:
+ if (ldcp->cb_enabled) {
+ DWARN(ldcp->id,
+ "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
+ ldcp->id);
+ break;
+ }
+ ldcp->cb_enabled = B_TRUE;
+
+ D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
+ ldcp->id);
+ break;
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ return (0);
+}
+
+/*
+ * Check to see if there are packets on the incoming queue
+ * Will return isempty = B_FALSE if there are packets
+ */
+int
+ldc_chkq(ldc_handle_t handle, boolean_t *isempty)
+{
+ int rv;
+ uint64_t rx_head, rx_tail;
+ ldc_chan_t *ldcp;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ *isempty = B_TRUE;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->tstate != TS_UP) {
+ D1(ldcp->id,
+ "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (ECONNRESET);
+ }
+
+ /* Read packet(s) from the queue */
+ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
+ &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+ /* reset the channel state if the channel went down */
+ if (ldcp->link_state == LDC_CHANNEL_DOWN ||
+ ldcp->link_state == LDC_CHANNEL_RESET) {
+ i_ldc_reset(ldcp);
+ mutex_exit(&ldcp->lock);
+ return (ECONNRESET);
+ }
+
+ if (rx_head != rx_tail) {
+ D1(ldcp->id, "ldc_chkq: (0x%llx) queue has pkt(s)\n", ldcp->id);
+ *isempty = B_FALSE;
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ return (0);
+}
+
+
+/*
+ * Read 'size' amount of bytes or less. If incoming buffer
+ * is more than 'size', ENOBUFS is returned.
+ *
+ * On return, size contains the number of bytes read.
+ */
+int
+ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
+{
+ ldc_chan_t *ldcp;
+ uint64_t rx_head = 0, rx_tail = 0;
+ int rv = 0, exit_val;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
+ return (EINVAL);
+ }
+
+ ldcp = (ldc_chan_t *)handle;
+
+ /* channel lock */
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->tstate != TS_UP) {
+ DWARN(ldcp->id,
+ "ldc_read: (0x%llx) channel is not in UP state\n",
+ ldcp->id);
+ exit_val = ECONNRESET;
+ } else {
+ exit_val = ldcp->read_p(ldcp, bufp, sizep);
+ }
+
+ /*
+ * if queue has been drained - clear interrupt
+ */
+ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
+ &ldcp->link_state);
+ if (exit_val == 0 && rv == 0 && rx_head == rx_tail) {
+ i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
+ }
+
+ mutex_exit(&ldcp->lock);
+ return (exit_val);
+}
+
+/*
+ * Basic raw mondo read -
+ * no interpretation of mondo contents at all.
+ *
+ * Enter and exit with ldcp->lock held by caller
+ */
+static int
+i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
+{
+ uint64_t q_size_mask;
+ ldc_msg_t *msgp;
+ uint8_t *msgbufp;
+ int rv = 0, space;
+ uint64_t rx_head, rx_tail;
+
+ space = *sizep;
+
+ if (space < LDC_PAYLOAD_SIZE_RAW)
+ return (ENOBUFS);
+
+ ASSERT(mutex_owned(&ldcp->lock));
+
+ /* compute mask for increment */
+ q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
+
+ /*
+ * Read packet(s) from the queue
+ */
+ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
+ &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_read_raw: (0x%lx) unable to read queue ptrs",
+ ldcp->id);
+ return (EIO);
+ }
+ D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
+ " rxt=0x%llx, st=0x%llx\n",
+ ldcp->id, rx_head, rx_tail, ldcp->link_state);
+
+ /* reset the channel state if the channel went down */
+ if (ldcp->link_state == LDC_CHANNEL_DOWN) {
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ /*
+ * Check for empty queue
+ */
+ if (rx_head == rx_tail) {
+ *sizep = 0;
+ return (0);
+ }
+
+ /* get the message */
+ msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
+
+ /* if channel is in RAW mode, copy data and return */
+ msgbufp = (uint8_t *)&(msgp->raw[0]);
+
+ bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
+
+ DUMP_PAYLOAD(ldcp->id, msgbufp);
+
+ *sizep = LDC_PAYLOAD_SIZE_RAW;
+
+ rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
+ (void) i_ldc_set_rx_head(ldcp, rx_head);
+
+ return (rv);
+}
+
+/*
+ * Process LDC mondos to build larger packets
+ * with either un-reliable or reliable delivery.
+ *
+ * Enter and exit with ldcp->lock held by caller
+ */
+static int
+i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
+{
+ int rv = 0;
+ uint64_t rx_head = 0, rx_tail = 0;
+ uint64_t curr_head = 0;
+ ldc_msg_t *msg;
+ caddr_t target;
+ size_t len = 0, bytes_read = 0;
+ int loop_cnt = 0, chk_cnt = 0;
+ uint64_t q_size_mask;
+
+ target = target_bufp;
+
+ ASSERT(mutex_owned(&ldcp->lock));
+
+ /* reset first frag to 0 */
+ ldcp->first_fragment = 0;
+
+ /* compute mask for increment */
+ q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
+
+ /*
+ * Read packet(s) from the queue
+ */
+ rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
+ &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_read: (0x%lx) unable to read queue ptrs",
+ ldcp->id);
+ return (EIO);
+ }
+ D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
+ ldcp->id, curr_head, rx_tail, ldcp->link_state);
+
+ /* reset the channel state if the channel went down */
+ if (ldcp->link_state == LDC_CHANNEL_DOWN) {
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ for (;;) {
+
+ if (curr_head == rx_tail) {
+ rv = hv_ldc_rx_get_state(ldcp->id,
+ &rx_head, &rx_tail, &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_read: (0x%lx) cannot read queue ptrs",
+ ldcp->id);
+ return (EIO);
+ }
+ /* reset the channel state if the channel went down */
+ if (ldcp->link_state == LDC_CHANNEL_DOWN) {
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+ }
+
+ if (curr_head == rx_tail) {
+
+ /* If in the middle of a fragmented xfer */
+ if (ldcp->first_fragment != 0) {
+ if (++loop_cnt > LDC_LOOP_CNT) {
+ loop_cnt = 0;
+ ++chk_cnt;
+ }
+ if (chk_cnt < LDC_CHK_CNT) {
+ continue;
+ } else {
+ *sizep = 0;
+ ldcp->last_msg_rcd =
+ ldcp->first_fragment - 1;
+ DWARN(DBG_ALL_LDCS,
+ "ldc_read: (0x%llx) read timeout",
+ ldcp->id);
+ return (ETIMEDOUT);
+ }
+ }
+ *sizep = 0;
+ break;
+ }
+ loop_cnt = 0;
+ chk_cnt = 0;
+
+ D2(ldcp->id,
+ "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
+ ldcp->id, curr_head, rx_head, rx_tail);
+
+ /* get the message */
+ msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
+
+ DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
+ ldcp->rx_q_va + curr_head);
+
+ /* Check the message ID for the message received */
+ if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
+
+ DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
+ "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
+
+ /* Reset last_msg_rcd to start of message */
+ if (ldcp->first_fragment != 0) {
+ ldcp->last_msg_rcd =
+ ldcp->first_fragment - 1;
+ ldcp->first_fragment = 0;
+ }
+ /*
+ * Send a NACK -- invalid seqid
+ * get the current tail for the response
+ */
+ rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
+ (msg->ctrl & LDC_CTRL_MASK));
+ if (rv) {
+ cmn_err(CE_NOTE,
+ "ldc_read: (0x%lx) err sending "
+ "NACK msg\n", ldcp->id);
+ }
+
+ /* purge receive queue */
+ (void) i_ldc_set_rx_head(ldcp, rx_tail);
+
+ break;
+ }
+
+ /*
+ * Process any messages of type CTRL messages
+ * Future implementations should try to pass these to
+ * LDC transport by resetting the intr state.
+ *
+ * NOTE: not done as a switch() as type can be both ctrl+data
+ */
+ if (msg->type & LDC_CTRL) {
+ if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
+ if (rv == EAGAIN)
+ continue;
+ (void) i_ldc_set_rx_head(ldcp, rx_tail);
+ *sizep = 0;
+ bytes_read = 0;
+ rv = ECONNRESET;
+ break;
+ }
+ }
+
+ /* process data ACKs */
+ if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
+ (void) i_ldc_process_data_ACK(ldcp, msg);
+ }
+
+ /* process data messages */
+ if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
+
+ uint8_t *msgbuf = (uint8_t *)(
+ (ldcp->mode == LDC_MODE_RELIABLE ||
+ ldcp->mode == LDC_MODE_STREAM)
+ ? msg->rdata : msg->udata);
+
+ D2(ldcp->id,
+ "ldc_read: (0x%llx) received data msg\n", ldcp->id);
+
+ /* get the packet length */
+ len = (msg->env & LDC_LEN_MASK);
+
+ /*
+ * FUTURE OPTIMIZATION:
+ * dont need to set q head for every
+ * packet we read just need to do this when
+ * we are done or need to wait for more
+ * mondos to make a full packet - this is
+ * currently expensive.
+ */
+
+ if (ldcp->first_fragment == 0) {
+
+ /*
+ * first packets should always have the start
+ * bit set (even for a single packet). If not
+ * throw away the packet
+ */
+ if (!(msg->env & LDC_FRAG_START)) {
+
+ DWARN(DBG_ALL_LDCS,
+ "ldc_read: (0x%llx) not start - "
+ "frag=%x\n", ldcp->id,
+ (msg->env) & LDC_FRAG_MASK);
+
+ /* toss pkt, inc head, cont reading */
+ bytes_read = 0;
+ target = target_bufp;
+ curr_head =
+ (curr_head + LDC_PACKET_SIZE)
+ & q_size_mask;
+ if (rv = i_ldc_set_rx_head(ldcp,
+ curr_head))
+ break;
+
+ continue;
+ }
+
+ ldcp->first_fragment = msg->seqid;
+ } else {
+ /* check to see if this is a pkt w/ START bit */
+ if (msg->env & LDC_FRAG_START) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_read:(0x%llx) unexpected pkt"
+ " env=0x%x discarding %d bytes,"
+ " lastmsg=%d, currentmsg=%d\n",
+ ldcp->id, msg->env&LDC_FRAG_MASK,
+ bytes_read, ldcp->last_msg_rcd,
+ msg->seqid);
+
+ /* throw data we have read so far */
+ bytes_read = 0;
+ target = target_bufp;
+ ldcp->first_fragment = msg->seqid;
+
+ if (rv = i_ldc_set_rx_head(ldcp,
+ curr_head))
+ break;
+ }
+ }
+
+ /* copy (next) pkt into buffer */
+ if (len <= (*sizep - bytes_read)) {
+ bcopy(msgbuf, target, len);
+ target += len;
+ bytes_read += len;
+ } else {
+ /*
+ * there is not enough space in the buffer to
+ * read this pkt. throw message away & continue
+ * reading data from queue
+ */
+ DWARN(DBG_ALL_LDCS,
+ "ldc_read: (0x%llx) buffer too small, "
+ "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
+ curr_head, *sizep, bytes_read+len);
+
+ ldcp->first_fragment = 0;
+ target = target_bufp;
+ bytes_read = 0;
+
+ /* throw away everything received so far */
+ if (rv = i_ldc_set_rx_head(ldcp, curr_head))
+ break;
+
+ /* continue reading remaining pkts */
+ continue;
+ }
+ }
+
+ /* set the message id */
+ ldcp->last_msg_rcd = msg->seqid;
+
+ /* move the head one position */
+ curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
+
+ if (msg->env & LDC_FRAG_STOP) {
+
+ /*
+ * All pkts that are part of this fragmented transfer
+ * have been read or this was a single pkt read
+ * or there was an error
+ */
+
+ /* set the queue head */
+ if (rv = i_ldc_set_rx_head(ldcp, curr_head))
+ bytes_read = 0;
+
+ *sizep = bytes_read;
+
+ break;
+ }
+
+ /* advance head if it is a DATA ACK */
+ if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
+
+ /* set the queue head */
+ if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
+ bytes_read = 0;
+ break;
+ }
+
+ D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
+ ldcp->id, curr_head);
+ }
+
+ } /* for (;;) */
+
+
+ /*
+ * If useful data was read - Send msg ACK
+ * OPTIMIZE: do not send ACK for all msgs - use some frequency
+ */
+ if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
+ ldcp->mode == LDC_MODE_STREAM)) {
+
+ rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
+ if (rv != 0) {
+ cmn_err(CE_NOTE,
+ "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
+ return (0);
+ }
+ }
+
+ D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
+
+ return (rv);
+}
+
+/*
+ * Use underlying reliable packet mechanism to fetch
+ * and buffer incoming packets so we can hand them back as
+ * a basic byte stream.
+ *
+ * Enter and exit with ldcp->lock held by caller
+ */
+static int
+i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
+{
+ int rv;
+ size_t size;
+
+ ASSERT(mutex_owned(&ldcp->lock));
+
+ D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
+ ldcp->id, *sizep);
+
+ if (ldcp->stream_remains == 0) {
+ size = ldcp->mtu;
+ rv = i_ldc_read_packet(ldcp,
+ (caddr_t)ldcp->stream_bufferp, &size);
+ D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
+ ldcp->id, size);
+
+ if (rv != 0)
+ return (rv);
+
+ ldcp->stream_remains = size;
+ ldcp->stream_offset = 0;
+ }
+
+ size = MIN(ldcp->stream_remains, *sizep);
+
+ bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
+ ldcp->stream_offset += size;
+ ldcp->stream_remains -= size;
+
+ D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
+ ldcp->id, size);
+
+ *sizep = size;
+ return (0);
+}
+
+/*
+ * Write specified amount of bytes to the channel
+ * in multiple pkts of pkt_payload size. Each
+ * packet is tagged with an unique packet ID in
+ * the case of a reliable transport.
+ *
+ * On return, size contains the number of bytes written.
+ */
+int
+ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
+{
+ ldc_chan_t *ldcp;
+ int rv = 0;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ /* check if non-zero data to write */
+ if (buf == NULL || sizep == NULL) {
+ DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ if (*sizep == 0) {
+ DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (0);
+ }
+
+ /* Check if channel is UP for data exchange */
+ if (ldcp->tstate != TS_UP) {
+ DWARN(ldcp->id,
+ "ldc_write: (0x%llx) channel is not in UP state\n",
+ ldcp->id);
+ *sizep = 0;
+ rv = ECONNRESET;
+ } else {
+ rv = ldcp->write_p(ldcp, buf, sizep);
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ return (rv);
+}
+
+/*
+ * Write a raw packet to the channel
+ * On return, size contains the number of bytes written.
+ */
+static int
+i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
+{
+ ldc_msg_t *ldcmsg;
+ uint64_t tx_head, tx_tail, new_tail;
+ int rv = 0;
+ size_t size;
+
+ ASSERT(mutex_owned(&ldcp->lock));
+ ASSERT(ldcp->mode == LDC_MODE_RAW);
+
+ size = *sizep;
+
+ /*
+ * Check to see if the packet size is less than or
+ * equal to packet size support in raw mode
+ */
+ if (size > ldcp->pkt_payload) {
+ DWARN(ldcp->id,
+ "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
+ ldcp->id, *sizep);
+ *sizep = 0;
+ return (EMSGSIZE);
+ }
+
+ /* get the qptrs for the tx queue */
+ rv = hv_ldc_tx_get_state(ldcp->id,
+ &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
+ *sizep = 0;
+ return (EIO);
+ }
+
+ if (ldcp->link_state == LDC_CHANNEL_DOWN ||
+ ldcp->link_state == LDC_CHANNEL_RESET) {
+ DWARN(ldcp->id,
+ "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
+ i_ldc_reset(ldcp);
+ *sizep = 0;
+ return (ECONNRESET);
+ }
+
+ tx_tail = ldcp->tx_tail;
+ tx_head = ldcp->tx_head;
+ new_tail = (tx_tail + LDC_PACKET_SIZE) &
+ ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
+
+ if (new_tail == tx_head) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
+ *sizep = 0;
+ return (EWOULDBLOCK);
+ }
+
+ D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
+ ldcp->id, size);
+
+ /* Send the data now */
+ ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+
+ /* copy the data into pkt */
+ bcopy((uint8_t *)buf, ldcmsg, size);
+
+ /* increment tail */
+ tx_tail = new_tail;
+
+ /*
+ * All packets have been copied into the TX queue
+ * update the tail ptr in the HV
+ */
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv) {
+ if (rv == EWOULDBLOCK) {
+ DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
+ ldcp->id);
+ *sizep = 0;
+ return (EWOULDBLOCK);
+ }
+
+ /* cannot write data - reset channel */
+ i_ldc_reset(ldcp);
+ *sizep = 0;
+ return (ECONNRESET);
+ }
+
+ ldcp->tx_tail = tx_tail;
+ *sizep = size;
+
+ D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
+
+ return (rv);
+}
+
+
+/*
+ * Write specified amount of bytes to the channel
+ * in multiple pkts of pkt_payload size. Each
+ * packet is tagged with an unique packet ID in
+ * the case of a reliable transport.
+ *
+ * On return, size contains the number of bytes written.
+ * This function needs to ensure that the write size is < MTU size
+ */
+static int
+i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
+{
+ ldc_msg_t *ldcmsg;
+ uint64_t tx_head, tx_tail, new_tail, start;
+ uint64_t txq_size_mask, numavail;
+ uint8_t *msgbuf, *source = (uint8_t *)buf;
+ size_t len, bytes_written = 0, remaining;
+ int rv;
+ uint32_t curr_seqid;
+
+ ASSERT(mutex_owned(&ldcp->lock));
+
+ ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
+ ldcp->mode == LDC_MODE_UNRELIABLE ||
+ ldcp->mode == LDC_MODE_STREAM);
+
+ /* compute mask for increment */
+ txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
+
+ /* get the qptrs for the tx queue */
+ rv = hv_ldc_tx_get_state(ldcp->id,
+ &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
+ *size = 0;
+ return (EIO);
+ }
+
+ if (ldcp->link_state == LDC_CHANNEL_DOWN ||
+ ldcp->link_state == LDC_CHANNEL_RESET) {
+ DWARN(ldcp->id,
+ "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
+ *size = 0;
+ i_ldc_reset(ldcp);
+ return (ECONNRESET);
+ }
+
+ tx_tail = ldcp->tx_tail;
+ new_tail = (tx_tail + LDC_PACKET_SIZE) %
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
+
+ /*
+ * Transport mode determines whether we use HV Tx head or the
+ * private protocol head (corresponding to last ACKd pkt) for
+ * determining how much we can write
+ */
+ tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
+ ldcp->mode == LDC_MODE_STREAM)
+ ? ldcp->tx_ackd_head : ldcp->tx_head;
+ if (new_tail == tx_head) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
+ *size = 0;
+ return (EWOULDBLOCK);
+ }
+
+ /*
+ * Make sure that the LDC Tx queue has enough space
+ */
+ numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
+ + ldcp->tx_q_entries - 1;
+ numavail %= ldcp->tx_q_entries;
+
+ if (*size > (numavail * ldcp->pkt_payload)) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
+ return (EWOULDBLOCK);
+ }
+
+ D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
+ ldcp->id, *size);
+
+ /* Send the data now */
+ bytes_written = 0;
+ curr_seqid = ldcp->last_msg_snt;
+ start = tx_tail;
+
+ while (*size > bytes_written) {
+
+ ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
+
+ msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
+ ldcp->mode == LDC_MODE_STREAM)
+ ? ldcmsg->rdata : ldcmsg->udata);
+
+ ldcmsg->type = LDC_DATA;
+ ldcmsg->stype = LDC_INFO;
+ ldcmsg->ctrl = 0;
+
+ remaining = *size - bytes_written;
+ len = min(ldcp->pkt_payload, remaining);
+ ldcmsg->env = (uint8_t)len;
+
+ curr_seqid++;
+ ldcmsg->seqid = curr_seqid;
+
+ DUMP_LDC_PKT(ldcp, "ldc_write snd data", (uint64_t)ldcmsg);
+
+ /* copy the data into pkt */
+ bcopy(source, msgbuf, len);
+
+ source += len;
+ bytes_written += len;
+
+ /* increment tail */
+ tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
+
+ ASSERT(tx_tail != tx_head);
+ }
+
+ /* Set the start and stop bits */
+ ldcmsg->env |= LDC_FRAG_STOP;
+ ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
+ ldcmsg->env |= LDC_FRAG_START;
+
+ /*
+ * All packets have been copied into the TX queue
+ * update the tail ptr in the HV
+ */
+ rv = i_ldc_set_tx_tail(ldcp, tx_tail);
+ if (rv == 0) {
+ ldcp->tx_tail = tx_tail;
+ ldcp->last_msg_snt = curr_seqid;
+ *size = bytes_written;
+ } else {
+ int rv2;
+
+ if (rv != EWOULDBLOCK) {
+ /* cannot write data - reset channel */
+ i_ldc_reset(ldcp);
+ *size = 0;
+ return (ECONNRESET);
+ }
+
+ DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
+ "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
+ rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
+ (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
+
+ rv2 = hv_ldc_tx_get_state(ldcp->id,
+ &tx_head, &tx_tail, &ldcp->link_state);
+
+ DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
+ "(head 0x%x, tail 0x%x state 0x%x)\n",
+ rv2, tx_head, tx_tail, ldcp->link_state);
+
+ *size = 0;
+ }
+
+ D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
+
+ return (rv);
+}
+
+/*
+ * Write specified amount of bytes to the channel
+ * in multiple pkts of pkt_payload size. Each
+ * packet is tagged with an unique packet ID in
+ * the case of a reliable transport.
+ *
+ * On return, size contains the number of bytes written.
+ * This function needs to ensure that the write size is < MTU size
+ */
+static int
+i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
+{
+ ASSERT(mutex_owned(&ldcp->lock));
+ ASSERT(ldcp->mode == LDC_MODE_STREAM);
+
+ /* Truncate packet to max of MTU size */
+ if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
+ return (i_ldc_write_packet(ldcp, buf, sizep));
+}
+
+
+/*
+ * Interfaces for channel nexus to register/unregister with LDC module
+ * The nexus will register functions to be used to register individual
+ * channels with the nexus and enable interrupts for the channels
+ */
+int
+ldc_register(ldc_cnex_t *cinfo)
+{
+ ldc_chan_t *ldcp;
+
+ if (cinfo == NULL || cinfo->dip == NULL ||
+ cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
+ cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
+ cinfo->clr_intr == NULL) {
+
+ DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
+ return (EINVAL);
+ }
+
+ mutex_enter(&ldcssp->lock);
+
+ /* nexus registration */
+ ldcssp->cinfo.dip = cinfo->dip;
+ ldcssp->cinfo.reg_chan = cinfo->reg_chan;
+ ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
+ ldcssp->cinfo.add_intr = cinfo->add_intr;
+ ldcssp->cinfo.rem_intr = cinfo->rem_intr;
+ ldcssp->cinfo.clr_intr = cinfo->clr_intr;
+
+ /* register any channels that might have been previously initialized */
+ ldcp = ldcssp->chan_list;
+ while (ldcp) {
+ if ((ldcp->tstate & TS_QCONF_RDY) &&
+ (ldcp->tstate & TS_CNEX_RDY) == 0)
+ (void) i_ldc_register_channel(ldcp);
+
+ ldcp = ldcp->next;
+ }
+
+ mutex_exit(&ldcssp->lock);
+
+ return (0);
+}
+
+int
+ldc_unregister(ldc_cnex_t *cinfo)
+{
+ if (cinfo == NULL || cinfo->dip == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
+ return (EINVAL);
+ }
+
+ mutex_enter(&ldcssp->lock);
+
+ if (cinfo->dip != ldcssp->cinfo.dip) {
+ DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
+ mutex_exit(&ldcssp->lock);
+ return (EINVAL);
+ }
+
+ /* nexus unregister */
+ ldcssp->cinfo.dip = NULL;
+ ldcssp->cinfo.reg_chan = NULL;
+ ldcssp->cinfo.unreg_chan = NULL;
+ ldcssp->cinfo.add_intr = NULL;
+ ldcssp->cinfo.rem_intr = NULL;
+ ldcssp->cinfo.clr_intr = NULL;
+
+ mutex_exit(&ldcssp->lock);
+
+ return (0);
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Allocate a memory handle for the channel and link it into the list
+ * Also choose which memory table to use if this is the first handle
+ * being assigned to this channel
+ */
+int
+ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
+{
+ ldc_chan_t *ldcp;
+ ldc_mhdl_t *mhdl;
+ int rv;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_alloc_handle: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ /* check to see if channel is initalized */
+ if (ldcp->tstate < TS_INIT) {
+ DWARN(ldcp->id,
+ "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ /*
+ * If this channel is allocating a mem handle for the
+ * first time allocate it a memory map table and initialize it
+ */
+ if (ldcp->mtbl == NULL) {
+
+ ldc_mtbl_t *mtbl;
+
+ /* Allocate and initialize the map table structure */
+ mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
+ mtbl->size = MTBL_MAX_SIZE;
+ mtbl->num_entries = mtbl->num_avail =
+ (MTBL_MAX_SIZE/sizeof (ldc_mte_slot_t));
+ mtbl->next_entry = NULL;
+
+ /* Allocate the table itself */
+ mtbl->table = (ldc_mte_slot_t *)
+ contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
+ if (mtbl->table == NULL) {
+ cmn_err(CE_WARN,
+ "ldc_mem_alloc_handle: (0x%lx) error allocating "
+ "table memory", ldcp->id);
+ kmem_free(mtbl, sizeof (ldc_mtbl_t));
+ mutex_exit(&ldcp->lock);
+ return (ENOMEM);
+ }
+
+ /* zero out the memory */
+ bzero(mtbl->table, mtbl->size);
+
+ /* initialize the lock */
+ mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* register table for this channel */
+ rv = hv_ldc_set_map_table(ldcp->id,
+ va_to_pa(mtbl->table), mtbl->num_entries);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_mem_alloc_handle: (0x%lx) err %d mapping tbl",
+ ldcp->id, rv);
+ contig_mem_free(mtbl->table, mtbl->size);
+ mutex_destroy(&mtbl->lock);
+ kmem_free(mtbl, sizeof (ldc_mtbl_t));
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ ldcp->mtbl = mtbl;
+
+ D1(ldcp->id,
+ "ldc_mem_alloc_handle: (0x%llx) alloc'd map table 0x%llx\n",
+ ldcp->id, ldcp->mtbl->table);
+ }
+
+ /* allocate handle for channel */
+ mhdl = kmem_zalloc(sizeof (ldc_mhdl_t), KM_SLEEP);
+
+ /* initialize the lock */
+ mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
+
+ mhdl->status = LDC_UNBOUND;
+ mhdl->ldcp = ldcp;
+
+ /* insert memory handle (@ head) into list */
+ if (ldcp->mhdl_list == NULL) {
+ ldcp->mhdl_list = mhdl;
+ mhdl->next = NULL;
+ } else {
+ /* insert @ head */
+ mhdl->next = ldcp->mhdl_list;
+ ldcp->mhdl_list = mhdl;
+ }
+
+ /* return the handle */
+ *mhandle = (ldc_mem_handle_t)mhdl;
+
+ mutex_exit(&ldcp->lock);
+
+ D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
+ ldcp->id, mhdl);
+
+ return (0);
+}
+
+/*
+ * Free memory handle for the channel and unlink it from the list
+ */
+int
+ldc_mem_free_handle(ldc_mem_handle_t mhandle)
+{
+ ldc_mhdl_t *mhdl, *phdl;
+ ldc_chan_t *ldcp;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_free_handle: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+
+ mutex_enter(&mhdl->lock);
+
+ ldcp = mhdl->ldcp;
+
+ if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
+ DWARN(ldcp->id,
+ "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
+ mhdl);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+ mutex_exit(&mhdl->lock);
+
+ mutex_enter(&ldcp->mlist_lock);
+
+ phdl = ldcp->mhdl_list;
+
+ /* first handle */
+ if (phdl == mhdl) {
+ ldcp->mhdl_list = mhdl->next;
+ mutex_destroy(&mhdl->lock);
+ kmem_free(mhdl, sizeof (ldc_mhdl_t));
+ D1(ldcp->id,
+ "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
+ ldcp->id, mhdl);
+ } else {
+ /* walk the list - unlink and free */
+ while (phdl != NULL) {
+ if (phdl->next == mhdl) {
+ phdl->next = mhdl->next;
+ mutex_destroy(&mhdl->lock);
+ kmem_free(mhdl, sizeof (ldc_mhdl_t));
+ D1(ldcp->id,
+ "ldc_mem_free_handle: (0x%llx) freed "
+ "handle 0x%llx\n", ldcp->id, mhdl);
+ break;
+ }
+ phdl = phdl->next;
+ }
+ }
+
+ if (phdl == NULL) {
+ DWARN(ldcp->id,
+ "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
+ mutex_exit(&ldcp->mlist_lock);
+ return (EINVAL);
+ }
+
+ mutex_exit(&ldcp->mlist_lock);
+
+ return (0);
+}
+
+/*
+ * Bind a memory handle to a virtual address.
+ * The virtual address is converted to the corresponding real addresses.
+ * Returns pointer to the first ldc_mem_cookie and the total number
+ * of cookies for this virtual address. Other cookies can be obtained
+ * using the ldc_mem_nextcookie() call. If the pages are stored in
+ * consecutive locations in the table, a single cookie corresponding to
+ * the first location is returned. The cookie size spans all the entries.
+ *
+ * If the VA corresponds to a page that is already being exported, reuse
+ * the page and do not export it again. Bump the page's use count.
+ */
+int
+ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
+ uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
+{
+ ldc_mhdl_t *mhdl;
+ ldc_chan_t *ldcp;
+ ldc_mtbl_t *mtbl;
+ ldc_memseg_t *memseg;
+ ldc_mte_t tmp_mte;
+ uint64_t index, prev_index = 0;
+ int64_t cookie_idx;
+ uintptr_t raddr, ra_aligned;
+ uint64_t psize, poffset, v_offset;
+ uint64_t pg_shift, pg_size, pg_size_code, pg_mask;
+ pgcnt_t npages;
+ caddr_t v_align, addr;
+ int i;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_bind_handle: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+ ldcp = mhdl->ldcp;
+ mtbl = ldcp->mtbl;
+
+ /* clear count */
+ *ccount = 0;
+
+ mutex_enter(&mhdl->lock);
+
+ if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
+ DWARN(ldcp->id,
+ "ldc_mem_bind_handle: (0x%x) handle already bound\n",
+ mhandle);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ /* Force address and size to be 8-byte aligned */
+ if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
+ DWARN(ldcp->id,
+ "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ /* FUTURE: get the page size, pgsz code, and shift */
+ pg_size = MMU_PAGESIZE;
+ pg_size_code = page_szc(pg_size);
+ pg_shift = page_get_shift(pg_size_code);
+ pg_mask = ~(pg_size - 1);
+
+ D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
+ "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
+ ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
+
+ /* aligned VA and its offset */
+ v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
+ v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
+
+ npages = (len+v_offset)/pg_size;
+ npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
+
+ D1(ldcp->id, "ldc_mem_bind_handle: binding "
+ "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
+ ldcp->id, vaddr, v_align, v_offset, npages);
+
+ /* lock the memory table - exclusive access to channel */
+ mutex_enter(&mtbl->lock);
+
+ if (npages > mtbl->num_avail) {
+ DWARN(ldcp->id,
+ "ldc_mem_bind_handle: (0x%llx) no table entries\n",
+ ldcp->id);
+ mutex_exit(&mtbl->lock);
+ mutex_exit(&mhdl->lock);
+ return (ENOMEM);
+ }
+
+ /* Allocate a memseg structure */
+ memseg = mhdl->memseg = kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
+
+ /* Allocate memory to store all pages and cookies */
+ memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
+ memseg->cookies =
+ kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
+
+ D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
+ ldcp->id, npages);
+
+ addr = v_align;
+
+ /*
+ * Table slots are used in a round-robin manner. The algorithm permits
+ * inserting duplicate entries. Slots allocated earlier will typically
+ * get freed before we get back to reusing the slot.Inserting duplicate
+ * entries should be OK as we only lookup entries using the cookie addr
+ * i.e. tbl index, during export, unexport and copy operation.
+ *
+ * One implementation what was tried was to search for a duplicate
+ * page entry first and reuse it. The search overhead is very high and
+ * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
+ * So it does make sense to avoid searching for duplicates.
+ *
+ * But during the process of searching for a free slot, if we find a
+ * duplicate entry we will go ahead and use it, and bump its use count.
+ */
+
+ /* index to start searching from */
+ index = mtbl->next_entry;
+ cookie_idx = -1;
+
+ tmp_mte.ll = 0; /* initialise fields to 0 */
+
+ if (mtype & LDC_DIRECT_MAP) {
+ tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
+ tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
+ tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
+ }
+
+ if (mtype & LDC_SHADOW_MAP) {
+ tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
+ tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
+ }
+
+ if (mtype & LDC_IO_MAP) {
+ tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
+ tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
+ }
+
+ D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
+
+ tmp_mte.mte_pgszc = pg_size_code;
+
+ /* initialize each mem table entry */
+ for (i = 0; i < npages; i++) {
+
+ /* check if slot is available in the table */
+ while (mtbl->table[index].entry.ll != 0) {
+
+ index = (index + 1) % mtbl->num_entries;
+
+ if (index == mtbl->next_entry) {
+ /* we have looped around */
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_bind_handle: (0x%llx) cannot find "
+ "entry\n", ldcp->id);
+ *ccount = 0;
+
+ /* NOTE: free memory, remove previous entries */
+ /* this shouldnt happen as num_avail was ok */
+
+ mutex_exit(&mtbl->lock);
+ mutex_exit(&mhdl->lock);
+ return (ENOMEM);
+ }
+ }
+
+ /* get the real address */
+ raddr = va_to_pa((void *)addr);
+ ra_aligned = ((uintptr_t)raddr & pg_mask);
+
+ /* build the mte */
+ tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
+
+ D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
+
+ /* update entry in table */
+ mtbl->table[index].entry = tmp_mte;
+
+ D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
+ " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
+
+ /* calculate the size and offset for this export range */
+ if (i == 0) {
+ /* first page */
+ psize = min((pg_size - v_offset), len);
+ poffset = v_offset;
+
+ } else if (i == (npages - 1)) {
+ /* last page */
+ psize = (((uintptr_t)(vaddr + len)) &
+ ((uint64_t)(pg_size-1)));
+ if (psize == 0)
+ psize = pg_size;
+ poffset = 0;
+
+ } else {
+ /* middle pages */
+ psize = pg_size;
+ poffset = 0;
+ }
+
+ /* store entry for this page */
+ memseg->pages[i].index = index;
+ memseg->pages[i].raddr = raddr;
+ memseg->pages[i].offset = poffset;
+ memseg->pages[i].size = psize;
+ memseg->pages[i].mte = &(mtbl->table[index]);
+
+ /* create the cookie */
+ if (i == 0 || (index != prev_index + 1)) {
+ cookie_idx++;
+ memseg->cookies[cookie_idx].addr =
+ IDX2COOKIE(index, pg_size_code, pg_shift);
+ memseg->cookies[cookie_idx].addr |= poffset;
+ memseg->cookies[cookie_idx].size = psize;
+
+ } else {
+ memseg->cookies[cookie_idx].size += psize;
+ }
+
+ D1(ldcp->id, "ldc_mem_bind_handle: bound "
+ "(0x%llx) va=0x%llx, idx=0x%llx, "
+ "ra=0x%llx(sz=0x%x,off=0x%x)\n",
+ ldcp->id, addr, index, raddr, psize, poffset);
+
+ /* decrement number of available entries */
+ mtbl->num_avail--;
+
+ /* increment va by page size */
+ addr += pg_size;
+
+ /* increment index */
+ prev_index = index;
+ index = (index + 1) % mtbl->num_entries;
+
+ /* save the next slot */
+ mtbl->next_entry = index;
+ }
+
+ mutex_exit(&mtbl->lock);
+
+ /* memory handle = bound */
+ mhdl->mtype = mtype;
+ mhdl->perm = perm;
+ mhdl->status = LDC_BOUND;
+
+ /* update memseg_t */
+ memseg->vaddr = vaddr;
+ memseg->raddr = memseg->pages[0].raddr;
+ memseg->size = len;
+ memseg->npages = npages;
+ memseg->ncookies = cookie_idx + 1;
+ memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
+
+ /* return count and first cookie */
+ *ccount = memseg->ncookies;
+ cookie->addr = memseg->cookies[0].addr;
+ cookie->size = memseg->cookies[0].size;
+
+ D1(ldcp->id,
+ "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
+ "pgs=0x%llx cookies=0x%llx\n",
+ ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
+
+ mutex_exit(&mhdl->lock);
+ return (0);
+}
+
+/*
+ * Return the next cookie associated with the specified memory handle
+ */
+int
+ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
+{
+ ldc_mhdl_t *mhdl;
+ ldc_chan_t *ldcp;
+ ldc_memseg_t *memseg;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_nextcookie: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+
+ mutex_enter(&mhdl->lock);
+
+ ldcp = mhdl->ldcp;
+ memseg = mhdl->memseg;
+
+ if (cookie == 0) {
+ DWARN(ldcp->id,
+ "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
+ ldcp->id);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ if (memseg->next_cookie != 0) {
+ cookie->addr = memseg->cookies[memseg->next_cookie].addr;
+ cookie->size = memseg->cookies[memseg->next_cookie].size;
+ memseg->next_cookie++;
+ if (memseg->next_cookie == memseg->ncookies)
+ memseg->next_cookie = 0;
+
+ } else {
+ DWARN(ldcp->id,
+ "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
+ cookie->addr = 0;
+ cookie->size = 0;
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ D1(ldcp->id,
+ "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
+ ldcp->id, cookie->addr, cookie->size);
+
+ mutex_exit(&mhdl->lock);
+ return (0);
+}
+
+/*
+ * Unbind the virtual memory region associated with the specified
+ * memory handle. Allassociated cookies are freed and the corresponding
+ * RA space is no longer exported.
+ */
+int
+ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
+{
+ ldc_mhdl_t *mhdl;
+ ldc_chan_t *ldcp;
+ ldc_mtbl_t *mtbl;
+ ldc_memseg_t *memseg;
+ int i;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_unbind_handle: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+
+ mutex_enter(&mhdl->lock);
+
+ if (mhdl->status == LDC_UNBOUND) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
+ mhandle);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ ldcp = mhdl->ldcp;
+ mtbl = ldcp->mtbl;
+
+ memseg = mhdl->memseg;
+
+ /* lock the memory table - exclusive access to channel */
+ mutex_enter(&mtbl->lock);
+
+ /* undo the pages exported */
+ for (i = 0; i < memseg->npages; i++) {
+
+ /* FUTURE: check for mapped pages */
+ if (memseg->pages[i].mte->cookie) {
+ _NOTE(EMPTY)
+ }
+
+ /* clear the entry from the table */
+ memseg->pages[i].mte->entry.ll = 0;
+ mtbl->num_avail++;
+ }
+ mutex_exit(&mtbl->lock);
+
+ /* free the allocated memseg and page structures */
+ kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
+ kmem_free(memseg->cookies,
+ (sizeof (ldc_mem_cookie_t) * memseg->npages));
+ kmem_free(memseg, sizeof (ldc_memseg_t));
+
+ /* uninitialize the memory handle */
+ mhdl->memseg = NULL;
+ mhdl->status = LDC_UNBOUND;
+
+ D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
+ ldcp->id, mhdl);
+
+ mutex_exit(&mhdl->lock);
+ return (0);
+}
+
+/*
+ * Get information about the dring. The base address of the descriptor
+ * ring along with the type and permission are returned back.
+ */
+int
+ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
+{
+ ldc_mhdl_t *mhdl;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+
+ if (minfo == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
+ return (EINVAL);
+ }
+
+ mutex_enter(&mhdl->lock);
+
+ minfo->status = mhdl->status;
+ if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
+ minfo->vaddr = mhdl->memseg->vaddr;
+ minfo->raddr = mhdl->memseg->raddr;
+ minfo->mtype = mhdl->mtype;
+ minfo->perm = mhdl->perm;
+ }
+ mutex_exit(&mhdl->lock);
+
+ return (0);
+}
+
+/*
+ * Copy data either from or to the client specified virtual address
+ * space to or from the exported memory associated with the cookies.
+ * The direction argument determines whether the data is read from or
+ * written to exported memory.
+ */
+int
+ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
+ ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
+{
+ ldc_chan_t *ldcp;
+ uint64_t local_voff, local_valign;
+ uint64_t cookie_addr, cookie_size;
+ uint64_t pg_shift, pg_size, pg_size_code;
+ uint64_t export_caddr, export_poff, export_psize, export_size;
+ uint64_t local_ra, local_poff, local_psize;
+ uint64_t copy_size, copied_len = 0, total_bal = 0, idx = 0;
+ pgcnt_t npages;
+ size_t len = *size;
+ int i, rv = 0;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ /* check to see if channel is UP */
+ if (ldcp->tstate != TS_UP) {
+ DWARN(ldcp->id, "ldc_mem_copy: (0x%llx) channel is not UP\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ /* Force address and size to be 8-byte aligned */
+ if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
+ DWARN(ldcp->id,
+ "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ /* Find the size of the exported memory */
+ export_size = 0;
+ for (i = 0; i < ccount; i++)
+ export_size += cookies[i].size;
+
+ /* check to see if offset is valid */
+ if (off > export_size) {
+ DWARN(ldcp->id,
+ "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ /*
+ * Check to see if the export size is smaller than the size we
+ * are requesting to copy - if so flag an error
+ */
+ if ((export_size - off) < *size) {
+ DWARN(ldcp->id,
+ "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ total_bal = min(export_size, *size);
+
+ /* FUTURE: get the page size, pgsz code, and shift */
+ pg_size = MMU_PAGESIZE;
+ pg_size_code = page_szc(pg_size);
+ pg_shift = page_get_shift(pg_size_code);
+
+ D1(ldcp->id, "ldc_mem_copy: copying data "
+ "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
+ ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
+
+ /* aligned VA and its offset */
+ local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
+ local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
+
+ npages = (len+local_voff)/pg_size;
+ npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
+
+ D1(ldcp->id,
+ "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
+ ldcp->id, vaddr, local_valign, local_voff, npages);
+
+ local_ra = va_to_pa((void *)local_valign);
+ local_poff = local_voff;
+ local_psize = min(len, (pg_size - local_voff));
+
+ len -= local_psize;
+
+ /*
+ * find the first cookie in the list of cookies
+ * if the offset passed in is not zero
+ */
+ for (idx = 0; idx < ccount; idx++) {
+ cookie_size = cookies[idx].size;
+ if (off < cookie_size)
+ break;
+ off -= cookie_size;
+ }
+
+ cookie_addr = cookies[idx].addr + off;
+ cookie_size = cookies[idx].size - off;
+
+ export_caddr = cookie_addr & ~(pg_size - 1);
+ export_poff = cookie_addr & (pg_size - 1);
+ export_psize = min(cookie_size, (pg_size - export_poff));
+
+ for (;;) {
+
+ copy_size = min(export_psize, local_psize);
+
+ D1(ldcp->id,
+ "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
+ " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
+ " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
+ " total_bal=0x%llx\n",
+ ldcp->id, direction, export_caddr, local_ra, export_poff,
+ local_poff, export_psize, local_psize, copy_size,
+ total_bal);
+
+ rv = hv_ldc_copy(ldcp->id, direction,
+ (export_caddr + export_poff), (local_ra + local_poff),
+ copy_size, &copied_len);
+
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_mem_copy: (0x%lx) err %d during copy\n",
+ ldcp->id, rv);
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%llx, "
+ "loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
+ " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
+ " copied_len=0x%llx, total_bal=0x%llx\n",
+ ldcp->id, direction, export_caddr, local_ra,
+ export_poff, local_poff, export_psize, local_psize,
+ copy_size, copied_len, total_bal);
+
+ *size = *size - total_bal;
+ mutex_exit(&ldcp->lock);
+ return (EIO);
+ }
+
+ ASSERT(copied_len <= copy_size);
+
+ D2(ldcp->id, "ldc_mem_copy: copied=0x%llx\n", copied_len);
+ export_poff += copied_len;
+ local_poff += copied_len;
+ export_psize -= copied_len;
+ local_psize -= copied_len;
+ cookie_size -= copied_len;
+
+ total_bal -= copied_len;
+
+ if (copy_size != copied_len)
+ continue;
+
+ if (export_psize == 0 && total_bal != 0) {
+
+ if (cookie_size == 0) {
+ idx++;
+ cookie_addr = cookies[idx].addr;
+ cookie_size = cookies[idx].size;
+
+ export_caddr = cookie_addr & ~(pg_size - 1);
+ export_poff = cookie_addr & (pg_size - 1);
+ export_psize =
+ min(cookie_size, (pg_size-export_poff));
+ } else {
+ export_caddr += pg_size;
+ export_poff = 0;
+ export_psize = min(cookie_size, pg_size);
+ }
+ }
+
+ if (local_psize == 0 && total_bal != 0) {
+ local_valign += pg_size;
+ local_ra = va_to_pa((void *)local_valign);
+ local_poff = 0;
+ local_psize = min(pg_size, len);
+ len -= local_psize;
+ }
+
+ /* check if we are all done */
+ if (total_bal == 0)
+ break;
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ D1(ldcp->id,
+ "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
+ ldcp->id, *size);
+
+ return (0);
+}
+
+/*
+ * Copy data either from or to the client specified virtual address
+ * space to or from HV physical memory.
+ *
+ * The direction argument determines whether the data is read from or
+ * written to HV memory. direction values are LDC_COPY_IN/OUT similar
+ * to the ldc_mem_copy interface
+ */
+int
+ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
+ caddr_t paddr, uint8_t direction)
+{
+ ldc_chan_t *ldcp;
+ uint64_t local_voff, local_valign;
+ uint64_t pg_shift, pg_size, pg_size_code;
+ uint64_t target_pa, target_poff, target_psize, target_size;
+ uint64_t local_ra, local_poff, local_psize;
+ uint64_t copy_size, copied_len = 0;
+ pgcnt_t npages;
+ size_t len = *size;
+ int rv = 0;
+
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_rdwr_pa: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ mutex_enter(&ldcp->lock);
+
+ /* check to see if channel is UP */
+ if (ldcp->tstate != TS_UP) {
+ DWARN(ldcp->id,
+ "ldc_mem_rdwr_pa: (0x%llx) channel is not UP\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ /* Force address and size to be 8-byte aligned */
+ if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
+ DWARN(ldcp->id,
+ "ldc_mem_rdwr_pa: addr/size is not 8-byte aligned\n");
+ mutex_exit(&ldcp->lock);
+ return (EINVAL);
+ }
+
+ target_size = *size;
+
+ /* FUTURE: get the page size, pgsz code, and shift */
+ pg_size = MMU_PAGESIZE;
+ pg_size_code = page_szc(pg_size);
+ pg_shift = page_get_shift(pg_size_code);
+
+ D1(ldcp->id, "ldc_mem_rdwr_pa: copying data "
+ "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
+ ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
+
+ /* aligned VA and its offset */
+ local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
+ local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
+
+ npages = (len + local_voff) / pg_size;
+ npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
+
+ D1(ldcp->id,
+ "ldc_mem_rdwr_pa: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
+ ldcp->id, vaddr, local_valign, local_voff, npages);
+
+ local_ra = va_to_pa((void *)local_valign);
+ local_poff = local_voff;
+ local_psize = min(len, (pg_size - local_voff));
+
+ len -= local_psize;
+
+ target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
+ target_poff = ((uintptr_t)paddr) & (pg_size - 1);
+ target_psize = pg_size - target_poff;
+
+ for (;;) {
+
+ copy_size = min(target_psize, local_psize);
+
+ D1(ldcp->id,
+ "ldc_mem_rdwr_pa: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
+ " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
+ " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
+ " total_bal=0x%llx\n",
+ ldcp->id, direction, target_pa, local_ra, target_poff,
+ local_poff, target_psize, local_psize, copy_size,
+ target_size);
+
+ rv = hv_ldc_copy(ldcp->id, direction,
+ (target_pa + target_poff), (local_ra + local_poff),
+ copy_size, &copied_len);
+
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "ldc_mem_rdwr_pa: (0x%lx) err %d during copy\n",
+ ldcp->id, rv);
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_rdwr_pa: (0x%llx) dir=%lld,tar_pa=0x%llx, "
+ "loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
+ " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
+ " total_bal=0x%llx\n",
+ ldcp->id, direction, target_pa, local_ra,
+ target_poff, local_poff, target_psize, local_psize,
+ copy_size, target_size);
+
+ *size = *size - target_size;
+ mutex_exit(&ldcp->lock);
+ return (i_ldc_h2v_error(rv));
+ }
+
+ D2(ldcp->id, "ldc_mem_rdwr_pa: copied=0x%llx\n", copied_len);
+ target_poff += copied_len;
+ local_poff += copied_len;
+ target_psize -= copied_len;
+ local_psize -= copied_len;
+
+ target_size -= copied_len;
+
+ if (copy_size != copied_len)
+ continue;
+
+ if (target_psize == 0 && target_size != 0) {
+ target_pa += pg_size;
+ target_poff = 0;
+ target_psize = min(pg_size, target_size);
+ }
+
+ if (local_psize == 0 && target_size != 0) {
+ local_valign += pg_size;
+ local_ra = va_to_pa((void *)local_valign);
+ local_poff = 0;
+ local_psize = min(pg_size, len);
+ len -= local_psize;
+ }
+
+ /* check if we are all done */
+ if (target_size == 0)
+ break;
+ }
+
+ mutex_exit(&ldcp->lock);
+
+ D1(ldcp->id, "ldc_mem_rdwr_pa: (0x%llx) done copying sz=0x%llx\n",
+ ldcp->id, *size);
+
+ return (0);
+}
+
+/*
+ * Map an exported memory segment into the local address space. If the
+ * memory range was exported for direct map access, a HV call is made
+ * to allocate a RA range. If the map is done via a shadow copy, local
+ * shadow memory is allocated and the base VA is returned in 'vaddr'. If
+ * the mapping is a direct map then the RA is returned in 'raddr'.
+ */
+int
+ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
+ uint8_t mtype, caddr_t *vaddr, caddr_t *raddr)
+{
+ int i, idx;
+ ldc_chan_t *ldcp;
+ ldc_mhdl_t *mhdl;
+ ldc_memseg_t *memseg;
+ caddr_t shadow_base = NULL, tmpaddr;
+ uint64_t pg_size, pg_shift, pg_size_code;
+ uint64_t exp_size = 0, npages;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+
+ mutex_enter(&mhdl->lock);
+
+ if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
+ mhdl->memseg != NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ ldcp = mhdl->ldcp;
+
+ mutex_enter(&ldcp->lock);
+
+ if (ldcp->tstate != TS_UP) {
+ DWARN(ldcp->id,
+ "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
+ ldcp->id);
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
+ DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ if (mtype == LDC_SHADOW_MAP && vaddr == NULL) {
+ DWARN(ldcp->id,
+ "ldc_mem_map: invalid vaddr arg0x%llx\n", vaddr);
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ if (mtype == LDC_SHADOW_MAP &&
+ (vaddr) && ((uintptr_t)(*vaddr) & MMU_PAGEOFFSET)) {
+ DWARN(ldcp->id,
+ "ldc_mem_map: vaddr not page aligned, 0x%llx\n", *vaddr);
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
+ mhandle, cookie->addr, cookie->size);
+
+ /* FUTURE: get the page size, pgsz code, and shift */
+ pg_size = MMU_PAGESIZE;
+ pg_size_code = page_szc(pg_size);
+ pg_shift = page_get_shift(pg_size_code);
+
+ /* calculate the number of pages in the exported cookie */
+ for (idx = 0; idx < ccount; idx++) {
+ if (cookie[idx].addr & MMU_PAGEOFFSET ||
+ cookie[idx].size & MMU_PAGEOFFSET) {
+ DWARN(ldcp->id,
+ "ldc_mem_map: cookie addr/size not page aligned, "
+ "0x%llx\n", cookie[idx].addr);
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+ exp_size += cookie[idx].size;
+ }
+ npages = (exp_size >> pg_shift);
+
+ /* Allocate memseg structure */
+ memseg = mhdl->memseg = kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
+
+ /* Allocate memory to store all pages and cookies */
+ memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
+ memseg->cookies =
+ kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
+
+ D2(ldcp->id, "ldc_mem_map: (0x%llx) processing 0x%llx pages\n",
+ ldcp->id, npages);
+
+ /* Check to see if the client is requesting direct or shadow map */
+ if (mtype == LDC_SHADOW_MAP) {
+ if (*vaddr == NULL) {
+ shadow_base =
+ contig_mem_alloc_align(exp_size, PAGESIZE);
+ if (shadow_base == NULL) {
+ cmn_err(CE_WARN, "ldc_mem_map: shadow memory "
+ "allocation failed\n");
+ kmem_free(memseg->cookies,
+ (sizeof (ldc_mem_cookie_t) * ccount));
+ kmem_free(memseg->pages,
+ (sizeof (ldc_page_t) * npages));
+ kmem_free(memseg, sizeof (ldc_memseg_t));
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (ENOMEM);
+ }
+
+ bzero(shadow_base, exp_size);
+ mhdl->myshadow = B_TRUE;
+
+ D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
+ "shadow page va=0x%llx\n", ldcp->id, shadow_base);
+ } else {
+ /*
+ * Use client supplied memory for shadow_base
+ * WARNING: assuming that client mem is >= exp_size
+ */
+ shadow_base = *vaddr;
+ }
+ } else if (mtype == LDC_DIRECT_MAP) {
+ /* FUTURE: Do a direct map by calling into HV */
+ _NOTE(EMPTY)
+ }
+
+ /* Save all page and cookie information */
+ for (i = 0, tmpaddr = shadow_base; i < npages; i++) {
+ memseg->pages[i].raddr = va_to_pa(tmpaddr);
+ memseg->pages[i].size = pg_size;
+ memseg->pages[i].index = 0;
+ memseg->pages[i].offset = 0;
+ memseg->pages[i].mte = NULL;
+ tmpaddr += pg_size;
+ }
+ for (i = 0; i < ccount; i++) {
+ memseg->cookies[i].addr = cookie[i].addr;
+ memseg->cookies[i].size = cookie[i].size;
+ }
+
+ /* update memseg_t */
+ memseg->vaddr = shadow_base;
+ memseg->raddr = memseg->pages[0].raddr;
+ memseg->size = exp_size;
+ memseg->npages = npages;
+ memseg->ncookies = ccount;
+ memseg->next_cookie = 0;
+
+ /* memory handle = mapped */
+ mhdl->mtype = mtype;
+ mhdl->perm = 0;
+ mhdl->status = LDC_MAPPED;
+
+ D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
+ "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
+ ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
+ memseg->npages, memseg->ncookies);
+
+ if (raddr)
+ *raddr = (caddr_t)memseg->raddr;
+ if (vaddr)
+ *vaddr = memseg->vaddr;
+
+ mutex_exit(&ldcp->lock);
+ mutex_exit(&mhdl->lock);
+ return (0);
+}
+
+/*
+ * Unmap a memory segment. Free shadow memory (if any).
+ */
+int
+ldc_mem_unmap(ldc_mem_handle_t mhandle)
+{
+ ldc_mhdl_t *mhdl = (ldc_mhdl_t *)mhandle;
+ ldc_chan_t *ldcp;
+ ldc_memseg_t *memseg;
+
+ if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
+ mhandle);
+ return (EINVAL);
+ }
+
+ mutex_enter(&mhdl->lock);
+
+ ldcp = mhdl->ldcp;
+ memseg = mhdl->memseg;
+
+ D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
+ ldcp->id, mhdl);
+
+ /* if we allocated shadow memory - free it */
+ if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
+ contig_mem_free(memseg->vaddr, memseg->size);
+ }
+
+ /* free the allocated memseg and page structures */
+ kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
+ kmem_free(memseg->cookies,
+ (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
+ kmem_free(memseg, sizeof (ldc_memseg_t));
+
+ /* uninitialize the memory handle */
+ mhdl->memseg = NULL;
+ mhdl->status = LDC_UNBOUND;
+
+ D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
+ ldcp->id, mhdl);
+
+ mutex_exit(&mhdl->lock);
+ return (0);
+}
+
+/*
+ * Internal entry point for LDC mapped memory entry consistency
+ * semantics. Acquire copies the contents of the remote memory
+ * into the local shadow copy. The release operation copies the local
+ * contents into the remote memory. The offset and size specify the
+ * bounds for the memory range being synchronized.
+ */
+static int
+i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
+ uint64_t offset, size_t size)
+{
+ int err;
+ ldc_mhdl_t *mhdl;
+ ldc_chan_t *ldcp;
+ ldc_memseg_t *memseg;
+ caddr_t local_vaddr;
+ size_t copy_size;
+
+ if (mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_mem_acquire_release: invalid memory handle\n");
+ return (EINVAL);
+ }
+ mhdl = (ldc_mhdl_t *)mhandle;
+
+ mutex_enter(&mhdl->lock);
+
+ if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_mem_acquire_release: not mapped memory\n");
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ if (offset >= mhdl->memseg->size ||
+ (offset + size) > mhdl->memseg->size) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_mem_acquire_release: memory out of range\n");
+ mutex_exit(&mhdl->lock);
+ return (EINVAL);
+ }
+
+ /* get the channel handle and memory segment */
+ ldcp = mhdl->ldcp;
+ memseg = mhdl->memseg;
+
+ if (mhdl->mtype == LDC_SHADOW_MAP) {
+
+ local_vaddr = memseg->vaddr + offset;
+ copy_size = size;
+
+ /* copy to/from remote from/to local memory */
+ err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
+ &copy_size, memseg->cookies, memseg->ncookies,
+ direction);
+ if (err || copy_size != size) {
+ cmn_err(CE_WARN,
+ "i_ldc_mem_acquire_release: copy failed\n");
+ mutex_exit(&mhdl->lock);
+ return (err);
+ }
+ }
+
+ mutex_exit(&mhdl->lock);
+
+ return (0);
+}
+
+/*
+ * Ensure that the contents in the remote memory seg are consistent
+ * with the contents if of local segment
+ */
+int
+ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
+{
+ return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
+}
+
+
+/*
+ * Ensure that the contents in the local memory seg are consistent
+ * with the contents if of remote segment
+ */
+int
+ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
+{
+ return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
+}
+
+/*
+ * Allocate a descriptor ring. The size of each each descriptor
+ * must be 8-byte aligned and the entire ring should be a multiple
+ * of MMU_PAGESIZE.
+ */
+int
+ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
+{
+ ldc_dring_t *dringp;
+ size_t size = (dsize * len);
+
+ D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
+ len, dsize);
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
+ return (EINVAL);
+ }
+
+ if (len == 0) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
+ return (EINVAL);
+ }
+
+ /* descriptor size should be 8-byte aligned */
+ if (dsize == 0 || (dsize & 0x7)) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
+ return (EINVAL);
+ }
+
+ *dhandle = 0;
+
+ /* Allocate a desc ring structure */
+ dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
+
+ /* Initialize dring */
+ dringp->length = len;
+ dringp->dsize = dsize;
+
+ /* round off to multiple of pagesize */
+ dringp->size = (size & MMU_PAGEMASK);
+ if (size & MMU_PAGEOFFSET)
+ dringp->size += MMU_PAGESIZE;
+
+ dringp->status = LDC_UNBOUND;
+
+ /* allocate descriptor ring memory */
+ dringp->base = contig_mem_alloc_align(dringp->size, PAGESIZE);
+ if (dringp->base == NULL) {
+ cmn_err(CE_WARN,
+ "ldc_mem_dring_create: unable to alloc desc\n");
+ kmem_free(dringp, sizeof (ldc_dring_t));
+ return (ENOMEM);
+ }
+
+ bzero(dringp->base, dringp->size);
+
+ /* initialize the desc ring lock */
+ mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Add descriptor ring to the head of global list */
+ mutex_enter(&ldcssp->lock);
+ dringp->next = ldcssp->dring_list;
+ ldcssp->dring_list = dringp;
+ mutex_exit(&ldcssp->lock);
+
+ *dhandle = (ldc_dring_handle_t)dringp;
+
+ D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
+
+ return (0);
+}
+
+
+/*
+ * Destroy a descriptor ring.
+ */
+int
+ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
+{
+ ldc_dring_t *dringp;
+ ldc_dring_t *tmp_dringp;
+
+ D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_destroy: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+
+ if (dringp->status == LDC_BOUND) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_destroy: desc ring is bound\n");
+ return (EACCES);
+ }
+
+ mutex_enter(&dringp->lock);
+ mutex_enter(&ldcssp->lock);
+
+ /* remove from linked list - if not bound */
+ tmp_dringp = ldcssp->dring_list;
+ if (tmp_dringp == dringp) {
+ ldcssp->dring_list = dringp->next;
+ dringp->next = NULL;
+
+ } else {
+ while (tmp_dringp != NULL) {
+ if (tmp_dringp->next == dringp) {
+ tmp_dringp->next = dringp->next;
+ dringp->next = NULL;
+ break;
+ }
+ tmp_dringp = tmp_dringp->next;
+ }
+ if (tmp_dringp == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_destroy: invalid descriptor\n");
+ mutex_exit(&ldcssp->lock);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+ }
+
+ mutex_exit(&ldcssp->lock);
+
+ /* free the descriptor ring */
+ contig_mem_free((caddr_t)dringp->base, dringp->size);
+
+ mutex_exit(&dringp->lock);
+
+ /* destroy dring lock */
+ mutex_destroy(&dringp->lock);
+
+ /* free desc ring object */
+ kmem_free(dringp, sizeof (ldc_dring_t));
+
+ return (0);
+}
+
+/*
+ * Bind a previously allocated dring to a channel. The channel should
+ * be OPEN in order to bind the ring to the channel. Returns back a
+ * descriptor ring cookie. The descriptor ring is exported for remote
+ * access by the client at the other end of the channel. An entry for
+ * dring pages is stored in map table (via call to ldc_mem_bind_handle).
+ */
+int
+ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
+ uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
+{
+ int err;
+ ldc_chan_t *ldcp;
+ ldc_dring_t *dringp;
+ ldc_mem_handle_t mhandle;
+
+ /* check to see if channel is initalized */
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_bind: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_bind: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+
+ if (cookie == NULL) {
+ DWARN(ldcp->id,
+ "ldc_mem_dring_bind: invalid cookie arg\n");
+ return (EINVAL);
+ }
+
+ mutex_enter(&dringp->lock);
+
+ if (dringp->status == LDC_BOUND) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
+ ldcp->id);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ if ((perm & LDC_MEM_RW) == 0) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_bind: invalid permissions\n");
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
+ DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ dringp->ldcp = ldcp;
+
+ /* create an memory handle */
+ err = ldc_mem_alloc_handle(handle, &mhandle);
+ if (err || mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
+ ldcp->id);
+ mutex_exit(&dringp->lock);
+ return (err);
+ }
+ dringp->mhdl = mhandle;
+
+ /* bind the descriptor ring to channel */
+ err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
+ mtype, perm, cookie, ccount);
+ if (err) {
+ DWARN(ldcp->id,
+ "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
+ ldcp->id);
+ mutex_exit(&dringp->lock);
+ return (err);
+ }
+
+ /*
+ * For now return error if we get more than one cookie
+ * FUTURE: Return multiple cookies ..
+ */
+ if (*ccount > 1) {
+ (void) ldc_mem_unbind_handle(mhandle);
+ (void) ldc_mem_free_handle(mhandle);
+
+ dringp->ldcp = NULL;
+ dringp->mhdl = NULL;
+ *ccount = 0;
+
+ mutex_exit(&dringp->lock);
+ return (EAGAIN);
+ }
+
+ /* Add descriptor ring to channel's exported dring list */
+ mutex_enter(&ldcp->exp_dlist_lock);
+ dringp->ch_next = ldcp->exp_dring_list;
+ ldcp->exp_dring_list = dringp;
+ mutex_exit(&ldcp->exp_dlist_lock);
+
+ dringp->status = LDC_BOUND;
+
+ mutex_exit(&dringp->lock);
+
+ return (0);
+}
+
+/*
+ * Return the next cookie associated with the specified dring handle
+ */
+int
+ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
+{
+ int rv = 0;
+ ldc_dring_t *dringp;
+ ldc_chan_t *ldcp;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+ mutex_enter(&dringp->lock);
+
+ if (dringp->status != LDC_BOUND) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
+ "is not bound\n", dringp);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ ldcp = dringp->ldcp;
+
+ if (cookie == NULL) {
+ DWARN(ldcp->id,
+ "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
+ ldcp->id);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
+ mutex_exit(&dringp->lock);
+
+ return (rv);
+}
+/*
+ * Unbind a previously bound dring from a channel.
+ */
+int
+ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
+{
+ ldc_dring_t *dringp;
+ ldc_dring_t *tmp_dringp;
+ ldc_chan_t *ldcp;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_unbind: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+
+ mutex_enter(&dringp->lock);
+
+ if (dringp->status == LDC_UNBOUND) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
+ dringp);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+ ldcp = dringp->ldcp;
+
+ mutex_enter(&ldcp->exp_dlist_lock);
+
+ tmp_dringp = ldcp->exp_dring_list;
+ if (tmp_dringp == dringp) {
+ ldcp->exp_dring_list = dringp->ch_next;
+ dringp->ch_next = NULL;
+
+ } else {
+ while (tmp_dringp != NULL) {
+ if (tmp_dringp->ch_next == dringp) {
+ tmp_dringp->ch_next = dringp->ch_next;
+ dringp->ch_next = NULL;
+ break;
+ }
+ tmp_dringp = tmp_dringp->ch_next;
+ }
+ if (tmp_dringp == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_unbind: invalid descriptor\n");
+ mutex_exit(&ldcp->exp_dlist_lock);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+ }
+
+ mutex_exit(&ldcp->exp_dlist_lock);
+
+ (void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
+ (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
+
+ dringp->ldcp = NULL;
+ dringp->mhdl = NULL;
+ dringp->status = LDC_UNBOUND;
+
+ mutex_exit(&dringp->lock);
+
+ return (0);
+}
+
+/*
+ * Get information about the dring. The base address of the descriptor
+ * ring along with the type and permission are returned back.
+ */
+int
+ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
+{
+ ldc_dring_t *dringp;
+ int rv;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_info: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+
+ mutex_enter(&dringp->lock);
+
+ if (dringp->mhdl) {
+ rv = ldc_mem_info(dringp->mhdl, minfo);
+ if (rv) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_info: error reading mem info\n");
+ mutex_exit(&dringp->lock);
+ return (rv);
+ }
+ } else {
+ minfo->vaddr = dringp->base;
+ minfo->raddr = NULL;
+ minfo->status = dringp->status;
+ }
+
+ mutex_exit(&dringp->lock);
+
+ return (0);
+}
+
+/*
+ * Map an exported descriptor ring into the local address space. If the
+ * descriptor ring was exported for direct map access, a HV call is made
+ * to allocate a RA range. If the map is done via a shadow copy, local
+ * shadow memory is allocated.
+ */
+int
+ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
+ uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
+ ldc_dring_handle_t *dhandle)
+{
+ int err;
+ ldc_chan_t *ldcp = (ldc_chan_t *)handle;
+ ldc_mem_handle_t mhandle;
+ ldc_dring_t *dringp;
+ size_t dring_size;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_map: invalid dhandle\n");
+ return (EINVAL);
+ }
+
+ /* check to see if channel is initalized */
+ if (handle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_map: invalid channel handle\n");
+ return (EINVAL);
+ }
+ ldcp = (ldc_chan_t *)handle;
+
+ if (cookie == NULL) {
+ DWARN(ldcp->id,
+ "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
+ ldcp->id);
+ return (EINVAL);
+ }
+
+ /* FUTURE: For now we support only one cookie per dring */
+ ASSERT(ccount == 1);
+
+ if (cookie->size < (dsize * len)) {
+ DWARN(ldcp->id,
+ "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
+ ldcp->id);
+ return (EINVAL);
+ }
+
+ *dhandle = 0;
+
+ /* Allocate an dring structure */
+ dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
+
+ D1(ldcp->id,
+ "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
+ mtype, len, dsize, cookie->addr, cookie->size);
+
+ /* Initialize dring */
+ dringp->length = len;
+ dringp->dsize = dsize;
+
+ /* round of to multiple of page size */
+ dring_size = len * dsize;
+ dringp->size = (dring_size & MMU_PAGEMASK);
+ if (dring_size & MMU_PAGEOFFSET)
+ dringp->size += MMU_PAGESIZE;
+
+ dringp->ldcp = ldcp;
+
+ /* create an memory handle */
+ err = ldc_mem_alloc_handle(handle, &mhandle);
+ if (err || mhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
+ err);
+ kmem_free(dringp, sizeof (ldc_dring_t));
+ return (ENOMEM);
+ }
+
+ dringp->mhdl = mhandle;
+ dringp->base = NULL;
+
+ /* map the dring into local memory */
+ err = ldc_mem_map(mhandle, cookie, ccount, mtype,
+ &(dringp->base), NULL);
+ if (err || dringp->base == NULL) {
+ cmn_err(CE_WARN,
+ "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
+ (void) ldc_mem_free_handle(mhandle);
+ kmem_free(dringp, sizeof (ldc_dring_t));
+ return (ENOMEM);
+ }
+
+ /* initialize the desc ring lock */
+ mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Add descriptor ring to channel's imported dring list */
+ mutex_enter(&ldcp->imp_dlist_lock);
+ dringp->ch_next = ldcp->imp_dring_list;
+ ldcp->imp_dring_list = dringp;
+ mutex_exit(&ldcp->imp_dlist_lock);
+
+ dringp->status = LDC_MAPPED;
+
+ *dhandle = (ldc_dring_handle_t)dringp;
+
+ return (0);
+}
+
+/*
+ * Unmap a descriptor ring. Free shadow memory (if any).
+ */
+int
+ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
+{
+ ldc_dring_t *dringp;
+ ldc_dring_t *tmp_dringp;
+ ldc_chan_t *ldcp;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_unmap: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+
+ if (dringp->status != LDC_MAPPED) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_unmap: not a mapped desc ring\n");
+ return (EINVAL);
+ }
+
+ mutex_enter(&dringp->lock);
+
+ ldcp = dringp->ldcp;
+
+ mutex_enter(&ldcp->imp_dlist_lock);
+
+ /* find and unlink the desc ring from channel import list */
+ tmp_dringp = ldcp->imp_dring_list;
+ if (tmp_dringp == dringp) {
+ ldcp->imp_dring_list = dringp->ch_next;
+ dringp->ch_next = NULL;
+
+ } else {
+ while (tmp_dringp != NULL) {
+ if (tmp_dringp->ch_next == dringp) {
+ tmp_dringp->ch_next = dringp->ch_next;
+ dringp->ch_next = NULL;
+ break;
+ }
+ tmp_dringp = tmp_dringp->ch_next;
+ }
+ if (tmp_dringp == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "ldc_mem_dring_unmap: invalid descriptor\n");
+ mutex_exit(&ldcp->imp_dlist_lock);
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+ }
+
+ mutex_exit(&ldcp->imp_dlist_lock);
+
+ /* do a LDC memory handle unmap and free */
+ (void) ldc_mem_unmap(dringp->mhdl);
+ (void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
+
+ dringp->status = 0;
+ dringp->ldcp = NULL;
+
+ mutex_exit(&dringp->lock);
+
+ /* destroy dring lock */
+ mutex_destroy(&dringp->lock);
+
+ /* free desc ring object */
+ kmem_free(dringp, sizeof (ldc_dring_t));
+
+ return (0);
+}
+
+/*
+ * Internal entry point for descriptor ring access entry consistency
+ * semantics. Acquire copies the contents of the remote descriptor ring
+ * into the local shadow copy. The release operation copies the local
+ * contents into the remote dring. The start and end locations specify
+ * bounds for the entries being synchronized.
+ */
+static int
+i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
+ uint8_t direction, uint64_t start, uint64_t end)
+{
+ int err;
+ ldc_dring_t *dringp;
+ ldc_chan_t *ldcp;
+ uint64_t soff;
+ size_t copy_size;
+
+ if (dhandle == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_dring_acquire_release: invalid desc ring handle\n");
+ return (EINVAL);
+ }
+ dringp = (ldc_dring_t *)dhandle;
+ mutex_enter(&dringp->lock);
+
+ if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_dring_acquire_release: not a mapped desc ring\n");
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ if (start >= dringp->length || end >= dringp->length) {
+ DWARN(DBG_ALL_LDCS,
+ "i_ldc_dring_acquire_release: index out of range\n");
+ mutex_exit(&dringp->lock);
+ return (EINVAL);
+ }
+
+ /* get the channel handle */
+ ldcp = dringp->ldcp;
+
+ copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
+ ((dringp->length - start) * dringp->dsize);
+
+ /* Calculate the relative offset for the first desc */
+ soff = (start * dringp->dsize);
+
+ /* copy to/from remote from/to local memory */
+ D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
+ soff, copy_size);
+ err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
+ direction, soff, copy_size);
+ if (err) {
+ DWARN(ldcp->id,
+ "i_ldc_dring_acquire_release: copy failed\n");
+ mutex_exit(&dringp->lock);
+ return (err);
+ }
+
+ /* do the balance */
+ if (start > end) {
+ copy_size = ((end + 1) * dringp->dsize);
+ soff = 0;
+
+ /* copy to/from remote from/to local memory */
+ D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
+ "off=0x%llx sz=0x%llx\n", soff, copy_size);
+ err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
+ direction, soff, copy_size);
+ if (err) {
+ DWARN(ldcp->id,
+ "i_ldc_dring_acquire_release: copy failed\n");
+ mutex_exit(&dringp->lock);
+ return (err);
+ }
+ }
+
+ mutex_exit(&dringp->lock);
+
+ return (0);
+}
+
+/*
+ * Ensure that the contents in the local dring are consistent
+ * with the contents if of remote dring
+ */
+int
+ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
+{
+ return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
+}
+
+/*
+ * Ensure that the contents in the remote dring are consistent
+ * with the contents if of local dring
+ */
+int
+ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
+{
+ return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
+}
+
+
+/* ------------------------------------------------------------------------- */
diff --git a/usr/src/uts/sun4v/io/mdeg.c b/usr/src/uts/sun4v/io/mdeg.c
new file mode 100644
index 0000000000..879f8b9725
--- /dev/null
+++ b/usr/src/uts/sun4v/io/mdeg.c
@@ -0,0 +1,914 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * MD Event Generator (MDEG) Module
+ */
+
+#include <sys/machsystm.h>
+#include <sys/taskq.h>
+#include <sys/disp.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+
+#include <sys/mdeg.h>
+#include <sys/mach_descrip.h>
+#include <sys/mdesc.h>
+
+/*
+ * A single client registration
+ */
+typedef struct mdeg_clnt {
+ boolean_t valid; /* structure is in active use */
+ mdeg_node_match_t *nmatch; /* node match filter */
+ mdeg_node_spec_t *pspec; /* parent match filter */
+ mdeg_cb_t cb; /* the client callback */
+ caddr_t cb_arg; /* argument to the callback */
+ uint64_t magic; /* sanity checking magic */
+ mdeg_handle_t hdl; /* handle assigned by MDEG */
+} mdeg_clnt_t;
+
+/*
+ * Global MDEG data
+ *
+ * Locking Strategy:
+ *
+ * mdeg.lock - lock used to sychronize system wide MD updates. An
+ * MD update must be treated as an atomic event. The lock is
+ * taken when notification that a new MD is available and held
+ * until all clients have been notified.
+ *
+ * mdeg.rwlock - lock used to sychronize access to the table of
+ * registered clients. The reader lock must be held when looking
+ * up client information in the table. The writer lock must be
+ * held when modifying any client information.
+ */
+static struct mdeg {
+ taskq_t *taskq; /* for internal processing */
+ boolean_t enabled; /* enable/disable taskq processing */
+ kmutex_t lock; /* synchronize MD updates */
+ md_t *md_prev; /* previous MD */
+ md_t *md_curr; /* current MD */
+ mdeg_clnt_t *tbl; /* table of registered clients */
+ krwlock_t rwlock; /* client table lock */
+ uint_t maxclnts; /* client table size */
+ uint_t nclnts; /* current number of clients */
+} mdeg;
+
+/*
+ * Debugging routines
+ */
+#ifdef DEBUG
+uint_t mdeg_debug = 0x0;
+
+static void mdeg_dump_clnt(mdeg_clnt_t *clnt);
+static void mdeg_dump_table(void);
+
+#define MDEG_DBG if (mdeg_debug) printf
+#define MDEG_DUMP_CLNT mdeg_dump_clnt
+#define MDEG_DUMP_TABLE mdeg_dump_table
+
+#else /* DEBUG */
+
+#define MDEG_DBG _NOTE(CONSTCOND) if (0) printf
+#define MDEG_DUMP_CLNT
+#define MDEG_DUMP_TABLE()
+
+#endif /* DEBUG */
+
+/*
+ * Global constants
+ */
+#define MDEG_MAX_TASKQ_THR 512 /* maximum number of taskq threads */
+#define MDEG_MAX_CLNTS_INIT 64 /* initial client table size */
+
+#define MDEG_MAGIC 0x4D4445475F48444Cull /* 'MDEG_HDL' */
+
+/*
+ * A client handle is a 64 bit value with two pieces of
+ * information encoded in it. The upper 32 bits are the
+ * index into the table of a particular client structure.
+ * The lower 32 bits are a counter that is incremented
+ * each time a client structure is reused.
+ */
+#define MDEG_IDX_SHIFT 32
+#define MDEG_COUNT_MASK 0xfffffffful
+
+#define MDEG_ALLOC_HDL(_idx, _count) (((uint64_t)_idx << MDEG_IDX_SHIFT) | \
+ ((uint64_t)(_count + 1) & \
+ MDEG_COUNT_MASK))
+#define MDEG_HDL2IDX(hdl) (hdl >> MDEG_IDX_SHIFT)
+#define MDEG_HDL2COUNT(hdl) (hdl & MDEG_COUNT_MASK)
+
+static const char trunc_str[] = " ... }";
+
+/*
+ * Utility routines
+ */
+static mdeg_clnt_t *mdeg_alloc_clnt(void);
+static void mdeg_notify_client(void *);
+static mde_cookie_t mdeg_find_start_node(md_t *, mdeg_node_spec_t *);
+static boolean_t mdeg_node_spec_match(md_t *, mde_cookie_t, mdeg_node_spec_t *);
+static void mdeg_get_diff_results(md_diff_cookie_t, mdeg_result_t *);
+
+int
+mdeg_init(void)
+{
+ int tblsz;
+
+ /*
+ * Grab the current MD
+ */
+ if ((mdeg.md_curr = md_get_handle()) == NULL) {
+ cmn_err(CE_WARN, "unable to cache snapshot of MD");
+ return (-1);
+ }
+
+ /*
+ * Initialize table of registered clients
+ */
+ mdeg.maxclnts = MDEG_MAX_CLNTS_INIT;
+
+ tblsz = mdeg.maxclnts * sizeof (mdeg_clnt_t);
+ mdeg.tbl = kmem_zalloc(tblsz, KM_SLEEP);
+
+ rw_init(&mdeg.rwlock, NULL, RW_DRIVER, NULL);
+
+ mdeg.nclnts = 0;
+
+ /*
+ * Initialize global lock
+ */
+ mutex_init(&mdeg.lock, NULL, MUTEX_DRIVER, NULL);
+
+ /*
+ * Initialize the task queue
+ */
+ mdeg.taskq = taskq_create("mdeg_taskq", 1, minclsyspri, 1,
+ MDEG_MAX_TASKQ_THR, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+ /* ready to begin handling clients */
+ mdeg.enabled = B_TRUE;
+
+ return (0);
+}
+
+void
+mdeg_fini(void)
+{
+ /*
+ * Flip the enabled switch off to make sure that
+ * no events get dispatched while things are being
+ * torn down.
+ */
+ mdeg.enabled = B_FALSE;
+
+ /* destroy the task queue */
+ taskq_destroy(mdeg.taskq);
+
+ /*
+ * Deallocate the table of registered clients
+ */
+ kmem_free(mdeg.tbl, mdeg.maxclnts * sizeof (mdeg_clnt_t));
+ rw_destroy(&mdeg.rwlock);
+
+ /*
+ * Free up the cached MDs.
+ */
+ if (mdeg.md_curr)
+ (void) md_fini_handle(mdeg.md_curr);
+
+ if (mdeg.md_prev)
+ (void) md_fini_handle(mdeg.md_prev);
+
+ mutex_destroy(&mdeg.lock);
+}
+
+static mdeg_clnt_t *
+mdeg_alloc_clnt(void)
+{
+ mdeg_clnt_t *clnt;
+ int idx;
+ mdeg_clnt_t *newtbl;
+ uint_t newmaxclnts;
+ uint_t newtblsz;
+ uint_t oldtblsz;
+
+ ASSERT(RW_WRITE_HELD(&mdeg.rwlock));
+
+ /* search for an unused slot in the table */
+ for (idx = 0; idx < mdeg.maxclnts; idx++) {
+ clnt = &mdeg.tbl[idx];
+ if (!clnt->valid) {
+ break;
+ }
+ }
+
+ /* found any empty slot */
+ if (idx != mdeg.maxclnts) {
+ goto found;
+ }
+
+ /*
+ * There was no free space in the table. Grow
+ * the table to double its current size.
+ */
+
+ MDEG_DBG("client table full:\n");
+ MDEG_DUMP_TABLE();
+
+ newmaxclnts = mdeg.maxclnts * 2;
+ newtblsz = newmaxclnts * sizeof (mdeg_clnt_t);
+
+ newtbl = kmem_zalloc(newtblsz, KM_SLEEP);
+
+ /* copy old table data to the new table */
+ oldtblsz = mdeg.maxclnts * sizeof (mdeg_clnt_t);
+ bcopy(mdeg.tbl, newtbl, oldtblsz);
+
+ /*
+ * Since the old table was full, the first free entry
+ * will be just past the end of the old table.
+ */
+ clnt = &mdeg.tbl[mdeg.maxclnts];
+
+ /* clean up the old table */
+ kmem_free(mdeg.tbl, oldtblsz);
+ mdeg.tbl = newtbl;
+ mdeg.maxclnts = newmaxclnts;
+
+found:
+ ASSERT(clnt->valid == 0);
+
+ clnt->hdl = MDEG_ALLOC_HDL(idx, MDEG_HDL2COUNT(clnt->hdl));
+
+ return (clnt);
+}
+
+static mdeg_clnt_t *
+mdeg_get_client(mdeg_handle_t hdl)
+{
+ int idx;
+ mdeg_clnt_t *clnt;
+
+ idx = MDEG_HDL2IDX(hdl);
+
+ /* check if index is out of bounds */
+ if ((idx < 0) || (idx >= mdeg.maxclnts)) {
+ MDEG_DBG("mdeg_get_client: index out of bounds\n");
+ return (NULL);
+ }
+
+ clnt = &mdeg.tbl[idx];
+
+ /* check for a valid client */
+ if (!clnt->valid) {
+ MDEG_DBG("mdeg_get_client: client is not valid\n");
+ return (NULL);
+ }
+
+ /* make sure the handle is an exact match */
+ if (clnt->hdl != hdl) {
+ MDEG_DBG("mdeg_get_client: bad handle\n");
+ return (NULL);
+ }
+
+ if (clnt->magic != MDEG_MAGIC) {
+ MDEG_DBG("mdeg_get_client: bad magic\n");
+ return (NULL);
+ }
+
+ return (clnt);
+}
+
+/*
+ * Send a notification to a client immediately after it registers.
+ * The result_t is a list of all the nodes that match their specified
+ * nodes of interest, all returned on the added list. This serves
+ * as a base of reference to the client. All future MD updates are
+ * relative to this list.
+ */
+static int
+mdeg_notify_client_reg(mdeg_clnt_t *clnt)
+{
+ md_t *mdp = NULL;
+ mde_str_cookie_t nname;
+ mde_str_cookie_t aname;
+ mde_cookie_t startnode;
+ int nnodes;
+ int nodechk;
+ mde_cookie_t *listp = NULL;
+ mdeg_result_t *mdeg_res = NULL;
+ int rv = MDEG_SUCCESS;
+
+ mutex_enter(&mdeg.lock);
+
+ /*
+ * Handle the special case where the node specification
+ * is NULL. In this case, call the client callback without
+ * any results. All processing is left to the client.
+ */
+ if (clnt->pspec == NULL) {
+ /* call the client callback */
+ (*clnt->cb)(clnt->cb_arg, NULL);
+ goto done;
+ }
+
+ if ((mdp = md_get_handle()) == NULL) {
+ cmn_err(CE_WARN, "unable to retrieve current MD");
+ rv = MDEG_FAILURE;
+ goto done;
+ }
+
+ startnode = mdeg_find_start_node(mdp, clnt->pspec);
+ if (startnode == MDE_INVAL_ELEM_COOKIE) {
+ /* not much we can do */
+ cmn_err(CE_WARN, "unable to match node specifier");
+ rv = MDEG_FAILURE;
+ goto done;
+ }
+
+ /*
+ * Use zalloc to provide correct default values for the
+ * unused removed, match_prev, and match_curr lists.
+ */
+ mdeg_res = kmem_zalloc(sizeof (mdeg_result_t), KM_SLEEP);
+
+ nname = md_find_name(mdp, clnt->nmatch->namep);
+ aname = md_find_name(mdp, "fwd");
+
+ nnodes = md_scan_dag(mdp, startnode, nname, aname, NULL);
+
+ if (nnodes == 0) {
+ MDEG_DBG("mdeg_notify_client_reg: no nodes of interest\n");
+ rv = MDEG_SUCCESS;
+ goto done;
+ } else if (nnodes == -1) {
+ MDEG_DBG("error scanning DAG\n");
+ rv = MDEG_FAILURE;
+ goto done;
+ }
+
+ MDEG_DBG("mdeg_notify_client_reg: %d node%s of interest\n",
+ nnodes, (nnodes == 1) ? "" : "s");
+
+ /* get the list of nodes of interest */
+ listp = kmem_alloc(sizeof (mde_cookie_t) * nnodes, KM_SLEEP);
+ nodechk = md_scan_dag(mdp, startnode, nname, aname, listp);
+
+ ASSERT(nodechk == nnodes);
+
+ mdeg_res->added.mdp = mdp;
+ mdeg_res->added.mdep = listp;
+ mdeg_res->added.nelem = nnodes;
+
+ /* call the client callback */
+ (*clnt->cb)(clnt->cb_arg, mdeg_res);
+
+done:
+ mutex_exit(&mdeg.lock);
+
+ if (mdp)
+ (void) md_fini_handle(mdp);
+
+ if (listp)
+ kmem_free(listp, sizeof (mde_cookie_t) * nnodes);
+
+ if (mdeg_res)
+ kmem_free(mdeg_res, sizeof (mdeg_result_t));
+
+ return (rv);
+}
+
+/*
+ * Register to receive an event notification when the system
+ * machine description is updated.
+ *
+ * Passing NULL for the node specification parameter is valid
+ * as long as the match specification is also NULL. In this
+ * case, the client will receive a notification when the MD
+ * has been updated, but the callback will not include any
+ * information. The client is then responsible for obtaining
+ * its own copy of the system MD and performing any processing
+ * manually.
+ */
+int
+mdeg_register(mdeg_node_spec_t *pspecp, mdeg_node_match_t *nmatchp,
+ mdeg_cb_t cb, void *cb_arg, mdeg_handle_t *hdlp)
+{
+ mdeg_clnt_t *clnt;
+
+ /*
+ * If the RW lock is held, a client is calling
+ * register from its own callback.
+ */
+ if (RW_LOCK_HELD(&mdeg.rwlock)) {
+ MDEG_DBG("mdeg_register: rwlock already held\n");
+ return (MDEG_FAILURE);
+ }
+
+ /* node spec and node match must both be valid, or both NULL */
+ if (((pspecp != NULL) && (nmatchp == NULL)) ||
+ ((pspecp == NULL) && (nmatchp != NULL))) {
+ MDEG_DBG("mdeg_register: invalid parameters\n");
+ return (MDEG_FAILURE);
+ }
+
+ rw_enter(&mdeg.rwlock, RW_WRITER);
+
+ clnt = mdeg_alloc_clnt();
+
+ ASSERT(clnt);
+
+ /*
+ * Fill in the rest of the data
+ */
+ clnt->nmatch = nmatchp;
+ clnt->pspec = pspecp;
+ clnt->cb = cb;
+ clnt->cb_arg = cb_arg;
+ clnt->magic = MDEG_MAGIC;
+
+ /* do this last */
+ clnt->valid = B_TRUE;
+
+ MDEG_DBG("client registered (0x%lx):\n", clnt->hdl);
+ MDEG_DUMP_CLNT(clnt);
+
+ mdeg.nclnts++;
+
+ if (mdeg_notify_client_reg(clnt) != MDEG_SUCCESS) {
+ bzero(clnt, sizeof (mdeg_clnt_t));
+ rw_exit(&mdeg.rwlock);
+ return (MDEG_FAILURE);
+ }
+
+ rw_exit(&mdeg.rwlock);
+
+ *hdlp = clnt->hdl;
+
+ return (MDEG_SUCCESS);
+}
+
+int
+mdeg_unregister(mdeg_handle_t hdl)
+{
+ mdeg_clnt_t *clnt;
+ mdeg_handle_t mdh;
+
+ /*
+ * If the RW lock is held, a client is calling
+ * unregister from its own callback.
+ */
+ if (RW_LOCK_HELD(&mdeg.rwlock)) {
+ MDEG_DBG("mdeg_unregister: rwlock already held\n");
+ return (MDEG_FAILURE);
+ }
+
+ /* lookup the client */
+ if ((clnt = mdeg_get_client(hdl)) == NULL) {
+ return (MDEG_FAILURE);
+ }
+
+ rw_enter(&mdeg.rwlock, RW_WRITER);
+
+ MDEG_DBG("client unregistered (0x%lx):\n", hdl);
+ MDEG_DUMP_CLNT(clnt);
+
+ /* save the handle to prevent reuse */
+ mdh = clnt->hdl;
+ bzero(clnt, sizeof (mdeg_clnt_t));
+
+ clnt->hdl = mdh;
+
+ mdeg.nclnts--;
+
+ rw_exit(&mdeg.rwlock);
+
+ return (MDEG_SUCCESS);
+}
+
+/*
+ * Simple algorithm for now, grab the global lock and let all
+ * the clients update themselves in parallel. There is a lot of
+ * room for improvement here. We could eliminate some scans of
+ * the DAG by imcrementally scanning at lower levels of the DAG
+ * rather than having each client start its own scan from the root.
+ */
+void
+mdeg_notify_clients(void)
+{
+ md_t *md_new;
+ mdeg_clnt_t *clnt;
+ int idx;
+ int nclnt;
+
+ rw_enter(&mdeg.rwlock, RW_READER);
+ mutex_enter(&mdeg.lock);
+
+ /*
+ * Rotate the MDs
+ */
+ if ((md_new = md_get_handle()) == NULL) {
+ cmn_err(CE_WARN, "unable to retrieve new MD");
+ goto done;
+ }
+
+ if (mdeg.md_prev) {
+ (void) md_fini_handle(mdeg.md_prev);
+ }
+
+ mdeg.md_prev = mdeg.md_curr;
+ mdeg.md_curr = md_new;
+
+ if (mdeg.nclnts == 0) {
+ MDEG_DBG("mdeg_notify_clients: no clients registered\n");
+ goto done;
+ }
+
+ /* dispatch the update notification to all clients */
+ for (idx = 0, nclnt = 0; idx < mdeg.maxclnts; idx++) {
+ clnt = &mdeg.tbl[idx];
+
+ if (!clnt->valid)
+ continue;
+
+ MDEG_DBG("notifying client 0x%lx (%d/%d)\n", clnt->hdl,
+ ++nclnt, mdeg.nclnts);
+
+ (void) taskq_dispatch(mdeg.taskq, mdeg_notify_client,
+ (void *)clnt, TQ_SLEEP);
+ }
+
+ taskq_wait(mdeg.taskq);
+
+done:
+ mutex_exit(&mdeg.lock);
+ rw_exit(&mdeg.rwlock);
+}
+
+static void
+mdeg_notify_client(void *arg)
+{
+ mdeg_clnt_t *clnt = (mdeg_clnt_t *)arg;
+ md_diff_cookie_t mdd = MD_INVAL_DIFF_COOKIE;
+ mdeg_result_t mdeg_res;
+ mde_cookie_t md_prev_start;
+ mde_cookie_t md_curr_start;
+
+ rw_enter(&mdeg.rwlock, RW_READER);
+
+ if (!mdeg.enabled) {
+ /* trying to shutdown */
+ MDEG_DBG("mdeg_notify_client: mdeg disabled, aborting\n");
+ goto cleanup;
+ }
+
+ /*
+ * Handle the special case where the node specification
+ * is NULL. In this case, call the client callback without
+ * any results. All processing is left to the client.
+ */
+ if (clnt->pspec == NULL) {
+ /* call the client callback */
+ (*clnt->cb)(clnt->cb_arg, NULL);
+
+ MDEG_DBG("MDEG client callback done\n");
+ goto cleanup;
+ }
+
+ /* find our start nodes */
+ md_prev_start = mdeg_find_start_node(mdeg.md_prev, clnt->pspec);
+ if (md_prev_start == MDE_INVAL_ELEM_COOKIE) {
+ goto cleanup;
+ }
+
+ md_curr_start = mdeg_find_start_node(mdeg.md_curr, clnt->pspec);
+ if (md_curr_start == MDE_INVAL_ELEM_COOKIE) {
+ goto cleanup;
+ }
+
+ /* diff the MDs */
+ mdd = md_diff_init(mdeg.md_prev, md_prev_start, mdeg.md_curr,
+ md_curr_start, clnt->nmatch->namep, clnt->nmatch->matchp);
+
+ if (mdd == MD_INVAL_DIFF_COOKIE) {
+ MDEG_DBG("unable to diff MDs\n");
+ goto cleanup;
+ }
+
+ /*
+ * Cache the results of the diff
+ */
+ mdeg_get_diff_results(mdd, &mdeg_res);
+
+ /* call the client callback */
+ (*clnt->cb)(clnt->cb_arg, &mdeg_res);
+
+ MDEG_DBG("MDEG client callback done\n");
+
+cleanup:
+ rw_exit(&mdeg.rwlock);
+
+ if (mdd != MD_INVAL_DIFF_COOKIE)
+ (void) md_diff_fini(mdd);
+}
+
+static mde_cookie_t
+mdeg_find_start_node(md_t *md, mdeg_node_spec_t *nspec)
+{
+ mde_cookie_t *nodesp;
+ mde_str_cookie_t nname;
+ mde_str_cookie_t aname;
+ int nnodes;
+ int idx;
+
+ if ((md == NULL) || (nspec == NULL))
+ return (MDE_INVAL_ELEM_COOKIE);
+
+ nname = md_find_name(md, nspec->namep);
+ aname = md_find_name(md, "fwd");
+
+ nnodes = md_scan_dag(md, NULL, nname, aname, NULL);
+ if (nnodes == 0)
+ return (MDE_INVAL_ELEM_COOKIE);
+
+ nodesp = kmem_alloc(sizeof (mde_cookie_t) * nnodes, KM_SLEEP);
+
+ (void) md_scan_dag(md, NULL, nname, aname, nodesp);
+
+ for (idx = 0; idx < nnodes; idx++) {
+
+ if (mdeg_node_spec_match(md, nodesp[idx], nspec)) {
+ mde_cookie_t res = nodesp[idx];
+
+ kmem_free(nodesp, sizeof (mde_cookie_t) * nnodes);
+ return (res);
+ }
+ }
+
+ kmem_free(nodesp, sizeof (mde_cookie_t) * nnodes);
+ return (MDE_INVAL_ELEM_COOKIE);
+}
+
+static boolean_t
+mdeg_node_spec_match(md_t *md, mde_cookie_t node, mdeg_node_spec_t *nspec)
+{
+ mdeg_prop_spec_t *prop;
+
+ ASSERT(md && nspec);
+ ASSERT(node != MDE_INVAL_ELEM_COOKIE);
+
+ prop = nspec->specp;
+
+ while (prop->type != MDET_LIST_END) {
+
+ switch (prop->type) {
+ case MDET_PROP_VAL: {
+ uint64_t val;
+
+ if (md_get_prop_val(md, node, prop->namep, &val) != 0)
+ return (B_FALSE);
+
+ if (prop->ps_val != val)
+ return (B_FALSE);
+
+ break;
+ }
+ case MDET_PROP_STR: {
+ char *str;
+
+ if (md_get_prop_str(md, node, prop->namep, &str) != 0)
+ return (B_FALSE);
+
+ if (strcmp(prop->ps_str, str) != 0)
+ return (B_FALSE);
+
+ break;
+ }
+
+ default:
+ return (B_FALSE);
+ }
+
+ prop++;
+ }
+
+ return (B_TRUE);
+}
+
+static void
+mdeg_get_diff_results(md_diff_cookie_t mdd, mdeg_result_t *res)
+{
+ /*
+ * Cache added nodes.
+ */
+ res->added.mdp = mdeg.md_curr;
+ res->added.nelem = md_diff_added(mdd, &(res->added.mdep));
+
+ if (res->added.nelem == -1) {
+ bzero(&(res->added), sizeof (mdeg_diff_t));
+ }
+
+ /*
+ * Cache removed nodes.
+ */
+ res->removed.mdp = mdeg.md_prev;
+ res->removed.nelem = md_diff_removed(mdd, &(res->removed.mdep));
+
+ if (res->removed.nelem == -1) {
+ bzero(&(res->removed), sizeof (mdeg_diff_t));
+ }
+
+ /*
+ * Cache matching node pairs.
+ */
+ res->match_curr.mdp = mdeg.md_curr;
+ res->match_prev.mdp = mdeg.md_prev;
+ res->match_curr.nelem = md_diff_matched(mdd, &(res->match_prev.mdep),
+ &(res->match_curr.mdep));
+ res->match_prev.nelem = res->match_curr.nelem;
+
+ if (res->match_prev.nelem == -1) {
+ bzero(&(res->match_prev), sizeof (mdeg_diff_t));
+ bzero(&(res->match_curr), sizeof (mdeg_diff_t));
+ }
+}
+
+#ifdef DEBUG
+/*
+ * Generate a string that represents the node specifier
+ * structure. Clamp the string length if the specifier
+ * structure contains too much information.
+ *
+ * General form:
+ *
+ * <nodename>:{<propname>=<propval>,...}
+ * e.g.
+ * vdevice:{name=vsw,reg=0x0}
+ */
+static void
+mdeg_spec_str(mdeg_node_spec_t *spec, char *buf, int len)
+{
+ mdeg_prop_spec_t *prop;
+ int offset;
+ boolean_t first = B_TRUE;
+ char *end = buf + len;
+
+ offset = snprintf(buf, len, "%s:{", spec->namep);
+
+ buf += offset;
+ len -= offset;
+ if (len <= 0)
+ goto trunc;
+
+ prop = spec->specp;
+
+ while (prop->type != MDET_LIST_END) {
+
+ switch (prop->type) {
+ case MDET_PROP_VAL:
+ offset = snprintf(buf, len, "%s%s=0x%lx",
+ (first) ? "" : ",", prop->namep, prop->ps_val);
+ buf += offset;
+ len -= offset;
+ if (len <= 0)
+ goto trunc;
+ break;
+
+ case MDET_PROP_STR:
+ offset = snprintf(buf, len, "%s%s=%s",
+ (first) ? "" : ",", prop->namep, prop->ps_str);
+ buf += offset;
+ len -= offset;
+ if (len <= 0)
+ goto trunc;
+ break;
+
+ default:
+ (void) snprintf(buf, len, "}");
+ return;
+ }
+
+ if (first)
+ first = B_FALSE;
+ prop++;
+ }
+
+ (void) snprintf(buf, len, "}");
+ return;
+
+trunc:
+ /* string too long, truncate it */
+ buf = end - (strlen(trunc_str) + 1);
+ (void) sprintf(buf, trunc_str);
+}
+
+/*
+ * Generate a string that represents the match structure.
+ * Clamp the string length if the match structure contains
+ * too much information.
+ *
+ * General form:
+ *
+ * <nodename>:{<propname>,...}
+ * e.g.
+ * nmatch=vport:{reg}
+ */
+static void
+mdeg_match_str(mdeg_node_match_t *match, char *buf, int len)
+{
+ md_prop_match_t *prop;
+ int offset;
+ boolean_t first = B_TRUE;
+ char *end = buf + len;
+
+ offset = snprintf(buf, len, "%s:{", match->namep);
+
+ buf += offset;
+ len -= offset;
+ if (len <= 0)
+ goto trunc;
+
+ prop = match->matchp;
+
+ while (prop->type != MDET_LIST_END) {
+ offset = snprintf(buf, len, "%s%s", (first) ? "" : ",",
+ prop->namep);
+ buf += offset;
+ len -= offset;
+ if (len <= 0)
+ goto trunc;
+
+ if (first)
+ first = B_FALSE;
+ prop++;
+ }
+
+ (void) snprintf(buf, len, "}");
+ return;
+
+trunc:
+ /* string too long, truncate it */
+ buf = end - (strlen(trunc_str) + 1);
+ (void) sprintf(buf, trunc_str);
+}
+
+#define MAX_FIELD_STR 80
+
+static void
+mdeg_dump_clnt(mdeg_clnt_t *clnt)
+{
+ char str[MAX_FIELD_STR];
+
+ if (!clnt->valid) {
+ MDEG_DBG(" valid=B_FALSE\n");
+ return;
+ }
+
+ mdeg_spec_str(clnt->pspec, str, MAX_FIELD_STR);
+ MDEG_DBG(" pspecp=%s\n", str);
+
+ mdeg_match_str(clnt->nmatch, str, MAX_FIELD_STR);
+ MDEG_DBG(" nmatch=%s\n", str);
+}
+
+static void
+mdeg_dump_table(void)
+{
+ int idx;
+ mdeg_clnt_t *clnt;
+
+ for (idx = 0; idx < mdeg.maxclnts; idx++) {
+ clnt = &(mdeg.tbl[idx]);
+
+ MDEG_DBG("client %d (0x%lx):\n", idx, clnt->hdl);
+ mdeg_dump_clnt(clnt);
+ }
+}
+#endif /* DEBUG */
diff --git a/usr/src/uts/sun4v/io/mdesc.c b/usr/src/uts/sun4v/io/mdesc.c
index 84dc13fdc0..6aca5946fc 100644
--- a/usr/src/uts/sun4v/io/mdesc.c
+++ b/usr/src/uts/sun4v/io/mdesc.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -55,23 +54,29 @@
* Operational state flags
*/
-#define MDESC_DIDMINOR 0x2 /* Created minors */
-#define MDESC_DIDMUTEX 0x8 /* Created mutex */
-#define MDESC_DIDCV 0x10 /* Created cv */
-#define MDESC_BUSY 0x20 /* Device is busy */
+#define MDESC_GOT_HANDLE 0x10 /* Got mdesc handle */
+#define MDESC_BUSY 0x20 /* Device is busy */
-static void *mdesc_state_head;
+static void *mdesc_state_head;
+static vmem_t *mdesc_minor;
+static uint16_t mdesc_max_opens = 256;
+static uint16_t mdesc_opens = 0;
+static int mdesc_attached = 0;
+static dev_info_t *mdesc_devi;
+static kmutex_t mdesc_lock;
struct mdesc_state {
int instance;
- dev_info_t *devi;
+ dev_t dev;
kmutex_t lock;
kcondvar_t cv;
size_t mdesc_len;
- uint8_t *mdesc;
+ md_t *mdesc;
int flags;
};
+typedef struct mdesc_state mdesc_state_t;
+
static int mdesc_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
static int mdesc_attach(dev_info_t *, ddi_attach_cmd_t);
static int mdesc_detach(dev_info_t *, ddi_detach_cmd_t);
@@ -129,19 +134,13 @@ static struct modlinkage modlinkage = {
};
-
-
-
-
-
-
int
_init(void)
{
int retval;
if ((retval = ddi_soft_state_init(&mdesc_state_head,
- sizeof (struct mdesc_state), 1)) != 0)
+ sizeof (struct mdesc_state), mdesc_max_opens)) != 0)
return (retval);
if ((retval = mod_install(&modlinkage)) != 0) {
ddi_soft_state_fini(&mdesc_state_head);
@@ -189,9 +188,10 @@ mdesc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
switch (cmd) {
case DDI_INFO_DEVT2DEVINFO:
- if ((mdsp = ddi_get_soft_state(mdesc_state_head,
- getminor((dev_t)arg))) != NULL) {
- *resultp = mdsp->devi;
+ mdsp = ddi_get_soft_state(mdesc_state_head,
+ getminor((dev_t)arg));
+ if (mdsp != NULL) {
+ *resultp = mdesc_devi;
retval = DDI_SUCCESS;
} else
*resultp = NULL;
@@ -212,47 +212,23 @@ static int
mdesc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
int instance = ddi_get_instance(dip);
- struct mdesc_state *mdsp;
switch (cmd) {
case DDI_ATTACH:
- if (ddi_soft_state_zalloc(mdesc_state_head, instance) !=
- DDI_SUCCESS) {
- cmn_err(CE_WARN, "%s@%d: Unable to allocate state",
- MDESC_NAME, instance);
- return (DDI_FAILURE);
- }
- if ((mdsp = ddi_get_soft_state(mdesc_state_head, instance)) ==
- NULL) {
- cmn_err(CE_WARN, "%s@%d: Unable to obtain state",
- MDESC_NAME, instance);
- ddi_soft_state_free(dip, instance);
- return (DDI_FAILURE);
- }
+
if (ddi_create_minor_node(dip, MDESC_NAME, S_IFCHR, instance,
DDI_PSEUDO, 0) != DDI_SUCCESS) {
cmn_err(CE_WARN, "%s@%d: Unable to create minor node",
MDESC_NAME, instance);
- (void) mdesc_detach(dip, DDI_DETACH);
return (DDI_FAILURE);
}
- mdsp->flags |= MDESC_DIDMINOR;
-
- mdsp->instance = instance;
- mdsp->devi = dip;
-
- mutex_init(&mdsp->lock, NULL, MUTEX_DRIVER, NULL);
- mdsp->flags |= MDESC_DIDMUTEX;
-
- cv_init(&mdsp->cv, NULL, CV_DRIVER, NULL);
- mdsp->flags |= MDESC_DIDCV;
-
- /* point the driver at the kernel's copy of the data */
- mdsp->mdesc = (uint8_t *)machine_descrip.va;
- mdsp->mdesc_len = (machine_descrip.va != NULL) ?
- machine_descrip.size : 0;
-
ddi_report_dev(dip);
+ mdesc_devi = dip;
+ mdesc_minor = vmem_create("mdesc_minor", (void *) 1,
+ mdesc_max_opens, 1, NULL, NULL, NULL, 0,
+ VM_SLEEP | VMC_IDENTIFIER);
+ mutex_init(&mdesc_lock, NULL, MUTEX_DRIVER, NULL);
+ mdesc_attached = 1;
return (DDI_SUCCESS);
case DDI_RESUME:
return (DDI_SUCCESS);
@@ -261,27 +237,16 @@ mdesc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
}
-
-
+/*ARGSUSED*/
static int
mdesc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
- int instance = ddi_get_instance(dip);
- struct mdesc_state *mdsp;
-
switch (cmd) {
case DDI_DETACH:
- mdsp = ddi_get_soft_state(mdesc_state_head, instance);
- if (mdsp != NULL) {
- ASSERT(!(mdsp->flags & MDESC_BUSY));
- if (mdsp->flags & MDESC_DIDCV)
- cv_destroy(&mdsp->cv);
- if (mdsp->flags & MDESC_DIDMUTEX)
- mutex_destroy(&mdsp->lock);
- if (mdsp->flags & MDESC_DIDMINOR)
- ddi_remove_minor_node(dip, NULL);
- }
- ddi_soft_state_free(mdesc_state_head, instance);
+ mutex_destroy(&mdesc_lock);
+ vmem_destroy(mdesc_minor);
+ ddi_remove_minor_node(mdesc_devi, NULL);
+ mdesc_attached = 0;
return (DDI_SUCCESS);
case DDI_SUSPEND:
@@ -292,28 +257,107 @@ mdesc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
}
}
+static void
+mdesc_destroy_state(mdesc_state_t *mdsp)
+{
+ minor_t minor = getminor(mdsp->dev);
+
+ if (mdsp->flags & MDESC_GOT_HANDLE)
+ (void) md_fini_handle(mdsp->mdesc);
+
+ cv_destroy(&mdsp->cv);
+ mutex_destroy(&mdsp->lock);
+ ddi_soft_state_free(mdesc_state_head, minor);
+ vmem_free(mdesc_minor, (void *)(uintptr_t)minor, 1);
+}
+
+static mdesc_state_t *
+mdesc_create_state(dev_t *devp)
+{
+ major_t major;
+ minor_t minor;
+ mdesc_state_t *mdsp;
+
+ minor = (minor_t)(uintptr_t)vmem_alloc(mdesc_minor, 1,
+ VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(mdesc_state_head, minor) !=
+ DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s@%d: Unable to allocate state",
+ MDESC_NAME, minor);
+ vmem_free(mdesc_minor, (void *)(uintptr_t)minor, 1);
+ return (NULL);
+ }
+
+ mdsp = ddi_get_soft_state(mdesc_state_head, minor);
+
+ if (devp != NULL) {
+ major = getemajor(*devp);
+ } else {
+ major = ddi_driver_major(mdesc_devi);
+ }
+
+ mdsp->dev = makedevice(major, minor);
+
+ if (devp != NULL)
+ *devp = mdsp->dev;
+
+ mdsp->instance = minor;
+
+ mutex_init(&mdsp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ cv_init(&mdsp->cv, NULL, CV_DRIVER, NULL);
+
+ mdsp->mdesc = md_get_handle();
+
+ if (mdsp->mdesc == NULL) {
+ mdesc_destroy_state(mdsp);
+ return (NULL);
+ }
+ mdsp->flags |= MDESC_GOT_HANDLE;
+
+ mdsp->mdesc_len = md_get_bin_size(mdsp->mdesc);
+
+ if (mdsp->mdesc_len == 0) {
+ mdesc_destroy_state(mdsp);
+ mdsp = NULL;
+ }
+
+ return (mdsp);
+}
/*ARGSUSED*/
static int
mdesc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
- int instance = getminor(*devp);
struct mdesc_state *mdsp;
- if ((mdsp = ddi_get_soft_state(mdesc_state_head, instance)) == NULL)
+ if (otyp != OTYP_CHR)
+ return (EINVAL);
+ if (!mdesc_attached)
return (ENXIO);
- ASSERT(mdsp->instance == instance);
+ mutex_enter(&mdesc_lock);
- if (otyp != OTYP_CHR)
- return (EINVAL);
+ if (mdesc_opens >= mdesc_max_opens) {
+ mutex_exit(&mdesc_lock);
+ return (ENXIO);
+ }
- return (0);
-}
+ mdsp = mdesc_create_state(devp);
+
+ if (mdsp == NULL) {
+ mutex_exit(&mdesc_lock);
+ return (ENXIO);
+ }
+ mdesc_opens++;
+ mutex_exit(&mdesc_lock);
+ return (0);
+}
/*ARGSUSED*/
static int
@@ -322,13 +366,25 @@ mdesc_close(dev_t dev, int flag, int otyp, cred_t *credp)
struct mdesc_state *mdsp;
int instance = getminor(dev);
+ if (otyp != OTYP_CHR)
+ return (EINVAL);
+
+ mutex_enter(&mdesc_lock);
+ if (mdesc_opens == 0) {
+ mutex_exit(&mdesc_lock);
+ return (0);
+ }
+ mutex_exit(&mdesc_lock);
+
if ((mdsp = ddi_get_soft_state(mdesc_state_head, instance)) == NULL)
return (ENXIO);
ASSERT(mdsp->instance == instance);
- if (otyp != OTYP_CHR)
- return (EINVAL);
+ mdesc_destroy_state(mdsp);
+ mutex_enter(&mdesc_lock);
+ mdesc_opens--;
+ mutex_exit(&mdesc_lock);
return (0);
}
@@ -363,6 +419,7 @@ mdesc_rw(dev_t dev, struct uio *uiop, enum uio_rw rw)
int instance = getminor(dev);
size_t len;
int retval;
+ caddr_t buf;
len = uiop->uio_resid;
@@ -400,7 +457,11 @@ mdesc_rw(dev_t dev, struct uio *uiop, enum uio_rw rw)
mdsp->flags |= MDESC_BUSY;
mutex_exit(&mdsp->lock);
- retval = uiomove((void *)(mdsp->mdesc + uiop->uio_offset),
+ buf = md_get_md_raw(mdsp->mdesc);
+ if (buf == NULL)
+ return (ENXIO);
+
+ retval = uiomove((void *)(buf + uiop->uio_offset),
len, rw, uiop);
mutex_enter(&mdsp->lock);
diff --git a/usr/src/uts/sun4v/io/platsvc.c b/usr/src/uts/sun4v/io/platsvc.c
new file mode 100644
index 0000000000..5970a7252f
--- /dev/null
+++ b/usr/src/uts/sun4v/io/platsvc.c
@@ -0,0 +1,371 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v Platform Services Module
+ */
+
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/machsystm.h>
+#include <sys/note.h>
+#include <sys/uadmin.h>
+#include <sys/ds.h>
+#include <sys/platsvc.h>
+
+/*
+ * Debugging routines
+ */
+#ifdef DEBUG
+uint_t ps_debug = 0x0;
+#define DBG if (ps_debug) printf
+#else /* DEBUG */
+#define DBG _NOTE(CONSTCOND) if (0) printf
+#endif /* DEBUG */
+
+/*
+ * Time resolution conversions.
+ */
+#define MS2NANO(x) ((x) * MICROSEC)
+#define MS2SEC(x) ((x) / MILLISEC)
+#define MS2MIN(x) (MS2SEC(x) / 60)
+
+/*
+ * Domains Services interaction
+ */
+static ds_svc_hdl_t ds_md_handle;
+static ds_svc_hdl_t ds_shutdown_handle;
+static ds_svc_hdl_t ds_panic_handle;
+
+static ds_ver_t ps_vers[] = {{ 1, 0 }};
+#define PS_NVERS (sizeof (ps_vers) / sizeof (ps_vers[0]))
+
+static ds_capability_t ps_md_cap = {
+ "md-update", /* svc_id */
+ ps_vers, /* vers */
+ PS_NVERS /* nvers */
+};
+
+static ds_capability_t ps_shutdown_cap = {
+ "domain-shutdown", /* svc_id */
+ ps_vers, /* vers */
+ PS_NVERS /* nvers */
+};
+
+static ds_capability_t ps_panic_cap = {
+ "domain-panic", /* svc_id */
+ ps_vers, /* vers */
+ PS_NVERS /* nvers */
+};
+
+static void ps_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl);
+static void ps_unreg_handler(ds_cb_arg_t arg);
+
+static void ps_md_data_handler(ds_cb_arg_t arg, void * buf, size_t buflen);
+static void ps_shutdown_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
+static void ps_panic_data_handler(ds_cb_arg_t arg, void * buf, size_t buflen);
+
+static ds_clnt_ops_t ps_md_ops = {
+ ps_reg_handler, /* ds_reg_cb */
+ ps_unreg_handler, /* ds_unreg_cb */
+ ps_md_data_handler, /* ds_data_cb */
+ &ds_md_handle /* cb_arg */
+};
+
+static ds_clnt_ops_t ps_shutdown_ops = {
+ ps_reg_handler, /* ds_reg_cb */
+ ps_unreg_handler, /* ds_unreg_cb */
+ ps_shutdown_data_handler, /* ds_data_cb */
+ &ds_shutdown_handle /* cb_arg */
+};
+
+static ds_clnt_ops_t ps_panic_ops = {
+ ps_reg_handler, /* ds_reg_cb */
+ ps_unreg_handler, /* ds_unreg_cb */
+ ps_panic_data_handler, /* ds_data_cb */
+ &ds_panic_handle /* cb_arg */
+};
+
+static int ps_init(void);
+static void ps_fini(void);
+
+/*
+ * Powerdown timeout value of 5 minutes.
+ */
+#define PLATSVC_POWERDOWN_DELAY 1200
+
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "sun4v Platform Services %I%"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int rv;
+
+ if ((rv = ps_init()) != 0)
+ return (rv);
+
+ if ((rv = mod_install(&modlinkage)) != 0)
+ ps_fini();
+
+ return (rv);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int platsvc_allow_unload;
+
+int
+_fini(void)
+{
+ int status;
+
+ if (platsvc_allow_unload == 0)
+ return (EBUSY);
+
+ if ((status = mod_remove(&modlinkage)) == 0)
+ ps_fini();
+
+ return (status);
+}
+
+static int
+ps_init(void)
+{
+ int rv;
+ extern int mdeg_init(void);
+
+ /* register with domain services framework */
+ rv = ds_cap_init(&ps_md_cap, &ps_md_ops);
+ if (rv != 0) {
+ cmn_err(CE_WARN, "ds_cap_init md-update failed: %d", rv);
+ return (rv);
+ }
+
+ rv = ds_cap_init(&ps_shutdown_cap, &ps_shutdown_ops);
+ if (rv != 0) {
+ cmn_err(CE_WARN, "ds_cap_init domain-shutdown failed: %d", rv);
+ (void) ds_cap_fini(&ps_md_cap);
+ return (rv);
+ }
+
+ rv = ds_cap_init(&ps_panic_cap, &ps_panic_ops);
+ if (rv != 0) {
+ cmn_err(CE_WARN, "ds_cap_init domain-panic failed: %d", rv);
+ (void) ds_cap_fini(&ps_md_cap);
+ (void) ds_cap_fini(&ps_shutdown_cap);
+ return (rv);
+ }
+
+ rv = mdeg_init();
+
+ return (rv);
+}
+
+static void
+ps_fini(void)
+{
+ extern void mdeg_fini(void);
+
+ /*
+ * Stop incoming requests from Zeus
+ */
+ (void) ds_cap_fini(&ps_md_cap);
+ (void) ds_cap_fini(&ps_shutdown_cap);
+ (void) ds_cap_fini(&ps_panic_cap);
+
+ mdeg_fini();
+}
+
+static void
+ps_md_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ extern int mach_descrip_update(void);
+ extern void mdeg_notify_clients(void);
+
+ ds_svc_hdl_t ds_handle;
+ platsvc_md_update_req_t *msg = buf;
+ platsvc_md_update_resp_t resp_msg;
+ uint_t rv;
+
+ if (arg == NULL)
+ return;
+
+ ds_handle = ds_md_handle;
+
+ if (msg == NULL || buflen != sizeof (platsvc_md_update_req_t)) {
+ resp_msg.req_num = 0;
+ resp_msg.result = MD_UPDATE_INVALID_MSG;
+ if ((rv = ds_cap_send(ds_handle, &resp_msg,
+ sizeof (resp_msg))) != 0) {
+ cmn_err(CE_NOTE, "md ds_cap_send failed (%d)", rv);
+ }
+ return;
+ }
+
+ DBG("MD Reload...\n");
+ if (mach_descrip_update()) {
+ cmn_err(CE_WARN, "MD reload failed\n");
+ return;
+ }
+
+ /*
+ * notify registered clients that MD has
+ * been updated
+ */
+ mdeg_notify_clients();
+
+ resp_msg.req_num = msg->req_num;
+ resp_msg.result = MD_UPDATE_SUCCESS;
+ if ((rv = ds_cap_send(ds_handle, &resp_msg, sizeof (resp_msg))) != 0) {
+ cmn_err(CE_NOTE, "md ds_cap_send resp failed (%d)", rv);
+ }
+}
+
+static void
+ps_shutdown_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ ds_svc_hdl_t ds_handle;
+ platsvc_shutdown_req_t *msg = buf;
+ platsvc_shutdown_resp_t resp_msg;
+ uint_t rv;
+ hrtime_t start;
+
+ if (arg == NULL)
+ return;
+
+ ds_handle = ds_shutdown_handle;
+
+ if (msg == NULL || buflen != sizeof (platsvc_shutdown_req_t)) {
+ resp_msg.req_num = 0;
+ resp_msg.result = DOMAIN_SHUTDOWN_INVALID_MSG;
+ resp_msg.reason[0] = '\0';
+ if ((rv = ds_cap_send(ds_handle, &resp_msg,
+ sizeof (resp_msg))) != 0) {
+ cmn_err(CE_NOTE, "shutdown ds_cap_send failed (%d)",
+ rv);
+ }
+ return;
+ }
+
+ resp_msg.req_num = msg->req_num;
+ resp_msg.result = DOMAIN_SHUTDOWN_SUCCESS;
+ resp_msg.reason[0] = '\0';
+
+ if ((rv = ds_cap_send(ds_handle, &resp_msg, sizeof (resp_msg))) != 0) {
+ cmn_err(CE_NOTE, "shutdown ds_cap_send resp failed (%d)", rv);
+ }
+
+ /*
+ * Honor the ldoms manager's shutdown delay requirement.
+ */
+ cmn_err(CE_NOTE, "shutdown requested by ldom manager, "
+ "system shutdown in %d minutes", MS2MIN(msg->delay));
+
+ start = gethrtime();
+ while (gethrtime() - start < MS2NANO(msg->delay))
+ ;
+
+ (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred);
+}
+
+
+static void
+ps_panic_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ ds_svc_hdl_t ds_handle;
+ platsvc_panic_req_t *msg = buf;
+ platsvc_panic_resp_t resp_msg;
+ uint_t rv;
+
+ if (arg == NULL)
+ return;
+
+ ds_handle = ds_panic_handle;
+
+ if (msg == NULL || buflen != sizeof (platsvc_panic_req_t)) {
+ resp_msg.req_num = 0;
+ resp_msg.result = DOMAIN_PANIC_INVALID_MSG;
+ resp_msg.reason[0] = '\0';
+ if ((rv = ds_cap_send(ds_handle, &resp_msg,
+ sizeof (resp_msg))) != 0) {
+ cmn_err(CE_NOTE, "panic ds_cap_send resp failed (%d)",
+ rv);
+ }
+ return;
+ }
+
+ resp_msg.req_num = msg->req_num;
+ resp_msg.result = DOMAIN_PANIC_SUCCESS;
+ resp_msg.reason[0] = '\0';
+ if ((rv = ds_cap_send(ds_handle, &resp_msg, sizeof (resp_msg))) != 0) {
+ cmn_err(CE_NOTE, "panic ds_cap_send resp failed (%d)", rv);
+ }
+
+ cmn_err(CE_PANIC, "Panic forced by ldom manager");
+ _NOTE(NOTREACHED)
+}
+
+static void
+ps_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
+{
+ DBG("ps_reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n",
+ arg, ver->major, ver->minor, hdl);
+
+ if ((ds_svc_hdl_t *)arg == &ds_md_handle)
+ ds_md_handle = hdl;
+ if ((ds_svc_hdl_t *)arg == &ds_shutdown_handle)
+ ds_shutdown_handle = hdl;
+ if ((ds_svc_hdl_t *)arg == &ds_panic_handle)
+ ds_panic_handle = hdl;
+}
+
+static void
+ps_unreg_handler(ds_cb_arg_t arg)
+{
+ DBG("ps_unreg_handler: arg=0x%p\n", arg);
+
+ if ((ds_svc_hdl_t *)arg == &ds_md_handle)
+ ds_md_handle = DS_INVALID_HDL;
+ if ((ds_svc_hdl_t *)arg == &ds_shutdown_handle)
+ ds_shutdown_handle = DS_INVALID_HDL;
+ if ((ds_svc_hdl_t *)arg == &ds_panic_handle)
+ ds_panic_handle = DS_INVALID_HDL;
+}
diff --git a/usr/src/uts/sun4v/io/qcn.c b/usr/src/uts/sun4v/io/qcn.c
index e68e1bde53..63b3c0b5fb 100644
--- a/usr/src/uts/sun4v/io/qcn.c
+++ b/usr/src/uts/sun4v/io/qcn.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -87,7 +87,8 @@ static cyc_handler_t qcn_poll_cychandler = {
};
static cyclic_id_t qcn_poll_cycid = CYCLIC_NONE;
static uint64_t qcn_poll_interval = 5; /* milli sec */
-static uint64_t sb_interval = 0;
+static uint64_t sb_interval = 0;
+uint_t qcn_force_polling = 0;
#endif
#define QCN_MI_IDNUM 0xABCE
@@ -338,7 +339,8 @@ qcn_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
* the console to work on older firmware releases.
*/
binding_name = ddi_binding_name(qcn_state->qcn_dip);
- if (strcmp(binding_name, "qcn") == 0)
+ if ((strcmp(binding_name, "qcn") == 0) ||
+ (qcn_force_polling))
qcn_state->qcn_polling = 1;
if (qcn_state->qcn_polling) {
@@ -802,7 +804,7 @@ qcn_transmit(queue_t *q, mblk_t *mp)
buf = (caddr_t)bp->b_rptr;
for (i = 0; i < len; i++) {
- if (hv_cnputchar(buf[i]) == -1)
+ if (hv_cnputchar(buf[i]) == H_EWOULDBLOCK)
break;
}
if (i != len) {
diff --git a/usr/src/uts/sun4v/io/vcc.c b/usr/src/uts/sun4v/io/vcc.c
new file mode 100644
index 0000000000..124db0d05b
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vcc.c
@@ -0,0 +1,2406 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/open.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/ksynch.h>
+#include <sys/modctl.h>
+#include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
+#include <sys/debug.h>
+#include <sys/promif.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/cyclic.h>
+#include <sys/termio.h>
+#include <sys/intr.h>
+#include <sys/ivintr.h>
+#include <sys/note.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/sysmacros.h>
+
+#include <sys/ldc.h>
+#include <sys/mdeg.h>
+#include <sys/vcc_impl.h>
+
+/*
+ * Function prototypes.
+ */
+
+/* DDI entrypoints */
+static int vcc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int vcc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int vcc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
+static int vcc_close(dev_t dev, int flag, int otyp, cred_t *cred);
+static int vcc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *credp, int *rvalp);
+static int vcc_read(dev_t dev, struct uio *uiop, cred_t *credp);
+static int vcc_write(dev_t dev, struct uio *uiop, cred_t *credp);
+static int vcc_chpoll(dev_t dev, short events, int anyyet,
+ short *reventsp, struct pollhead **phpp);
+static int vcc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
+ void *arg, void **resultp);
+
+/* callback functions */
+static uint_t vcc_ldc_cb(uint64_t event, caddr_t arg);
+static int vcc_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
+
+/* Internal functions */
+static int i_vcc_ldc_init(vcc_t *vccp, vcc_port_t *vport);
+static int i_vcc_add_port(vcc_t *vccp, char *group_name, uint64_t tcp_port,
+ uint_t portno, char *domain_name);
+static int i_vcc_config_port(vcc_t *vccp, uint_t portno, uint64_t ldc_id);
+static int i_vcc_reset_events(vcc_t *vccp);
+static int i_vcc_cons_tbl(vcc_t *vccp, uint_t num_ports,
+ caddr_t buf, int mode);
+static int i_vcc_del_cons_ok(vcc_t *vccp, caddr_t buf, int mode);
+static int i_vcc_close_port(vcc_port_t *vport);
+static int i_vcc_write_ldc(vcc_port_t *vport, vcc_msg_t *buf);
+
+static void *vcc_ssp;
+
+static struct cb_ops vcc_cb_ops = {
+ vcc_open, /* open */
+ vcc_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ vcc_read, /* read */
+ vcc_write, /* write */
+ vcc_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ ddi_segmap, /* segmap */
+ vcc_chpoll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* stream */
+ D_NEW | D_MP /* flags */
+};
+
+
+static struct dev_ops vcc_ops = {
+ DEVO_REV, /* rev */
+ 0, /* ref count */
+ vcc_getinfo, /* getinfo */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ vcc_attach, /* attach */
+ vcc_detach, /* detach */
+ nodev, /* reset */
+ &vcc_cb_ops, /* cb_ops */
+ (struct bus_ops *)NULL /* bus_ops */
+};
+
+extern struct mod_ops mod_driverops;
+
+#define VCC_CHANNEL_ENDPOINT "channel-endpoint"
+#define VCC_ID_PROP "id"
+
+/*
+ * This is the string displayed by modinfo(1m).
+ */
+static char vcc_ident[] = "sun4v Virtual Console Concentrator Driver v%I%";
+
+static struct modldrv md = {
+ &mod_driverops, /* Type - it is a driver */
+ vcc_ident, /* Name of the module */
+ &vcc_ops, /* driver specfic opts */
+};
+
+static struct modlinkage ml = {
+ MODREV_1,
+ &md,
+ NULL
+};
+
+/*
+ * Matching criteria passed to the MDEG to register interest
+ * in changes to 'virtual-device-port' nodes identified by their
+ * 'id' property.
+ */
+static md_prop_match_t vcc_port_prop_match[] = {
+ { MDET_PROP_VAL, "id" },
+ { MDET_LIST_END, NULL }
+};
+
+static mdeg_node_match_t vcc_port_match = {"virtual-device-port",
+ vcc_port_prop_match};
+
+/*
+ * Specification of an MD node passed to the MDEG to filter any
+ * 'virtual-device-port' nodes that do not belong to the specified node.
+ * This template is copied for each vldc instance and filled in with
+ * the appropriate 'cfg-handle' value before being passed to the MDEG.
+ */
+static mdeg_prop_spec_t vcc_prop_template[] = {
+ { MDET_PROP_STR, "name", "virtual-console-concentrator" },
+ { MDET_PROP_VAL, "cfg-handle", NULL },
+ { MDET_LIST_END, NULL, NULL }
+};
+
+#define VCC_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val);
+
+
+#ifdef DEBUG
+
+/*
+ * Print debug messages
+ *
+ * set vldcdbg to 0xf to enable all messages
+ *
+ * 0x8 - Errors
+ * 0x4 - Warnings
+ * 0x2 - All debug messages (most verbose)
+ * 0x1 - Minimal debug messages
+ */
+
+int vccdbg = 0x8;
+
+static void
+vccdebug(const char *fmt, ...)
+{
+ char buf[512];
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) vsprintf(buf, fmt, ap);
+ va_end(ap);
+
+ cmn_err(CE_CONT, "%s\n", buf);
+}
+
+#define D1 \
+if (vccdbg & 0x01) \
+ vccdebug
+
+#define D2 \
+if (vccdbg & 0x02) \
+ vccdebug
+
+#define DWARN \
+if (vccdbg & 0x04) \
+ vccdebug
+
+#else
+
+#define D1
+#define D2
+#define DWARN
+
+#endif
+
+/* _init(9E): initialize the loadable module */
+int
+_init(void)
+{
+ int error;
+
+ /* init the soft state structure */
+ error = ddi_soft_state_init(&vcc_ssp, sizeof (vcc_t), 1);
+ if (error != 0) {
+ return (error);
+ }
+
+ /* Link the driver into the system */
+ error = mod_install(&ml);
+
+ return (error);
+
+}
+
+/* _info(9E): return information about the loadable module */
+int
+_info(struct modinfo *modinfop)
+{
+ /* Report status of the dynamically loadable driver module */
+ return (mod_info(&ml, modinfop));
+}
+
+/* _fini(9E): prepare the module for unloading. */
+int
+_fini(void)
+{
+ int error;
+
+ /* Unlink the driver module from the system */
+ if ((error = mod_remove(&ml)) == 0) {
+ /*
+ * We have successfully "removed" the driver.
+ * destroy soft state
+ */
+ ddi_soft_state_fini(&vcc_ssp);
+ }
+
+ return (error);
+}
+
+/* getinfo(9E) */
+static int
+vcc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
+{
+ _NOTE(ARGUNUSED(dip))
+
+ int instance = VCCINST(getminor((dev_t)arg));
+ vcc_t *vccp = NULL;
+
+ switch (cmd) {
+
+ case DDI_INFO_DEVT2DEVINFO:
+ if ((vccp = ddi_get_soft_state(vcc_ssp, instance)) == NULL) {
+ *resultp = NULL;
+ return (DDI_FAILURE);
+ }
+ *resultp = vccp->dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)(uintptr_t)instance;
+ return (DDI_SUCCESS);
+
+ default:
+ *resultp = NULL;
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * There are two cases that need special blocking. One of them is to block
+ * a minor node without a port and another is to block application other
+ * than vntsd.
+ *
+ * A minor node can exist in the file system without associated with a port
+ * because when a port is deleted, ddi_remove_minor does not unlink it.
+ * Clients might try to open a minor node even after the corresponding port
+ * node has been removed. To identify and block these calls,
+ * we need to validate the association between a port and its minor node.
+ *
+ * An application other than vntsd can access a console port as long
+ * as vntsd is not using the port. A port opened by an application other
+ * than vntsd will be closed when vntsd wants to use the port.
+ * However, other application could use same file descriptor
+ * access vcc cb_ops. So we need to identify and block caller other
+ * than vntsd, when vntsd is using the port.
+ */
+static int
+i_vcc_can_use_port(vcc_minor_t *minorp, vcc_port_t *vport)
+{
+ if (vport->minorp != minorp) {
+ /* port config changed */
+ return (ENXIO);
+ }
+
+ if (vport->valid_pid == VCC_NO_PID_BLOCKING) {
+ /* no blocking needed */
+ return (0);
+ }
+
+ if (vport->valid_pid != ddi_get_pid()) {
+ return (EIO);
+ }
+
+ return (0);
+}
+
+
+/* Syncronization between thread using cv_wait */
+static int
+i_vcc_wait_port_status(vcc_port_t *vport, kcondvar_t *cv, uint32_t status)
+{
+
+ int rv;
+
+ ASSERT(mutex_owned(&vport->lock));
+
+ for (; ; ) {
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ /* port has been deleted */
+ D1("i_vcc_wait_port_status: port%d deleted\n",
+ vport->number);
+ return (EIO);
+ }
+
+ if ((vport->status & VCC_PORT_OPEN) == 0) {
+ D1("i_vcc_wait_port_status: port%d is closed \n",
+ vport->number);
+ return (EIO);
+ }
+
+ if (vport->status & VCC_PORT_LDC_LINK_DOWN) {
+ return (EIO);
+ }
+
+ if ((vport->valid_pid != VCC_NO_PID_BLOCKING) &&
+ (vport->valid_pid != ddi_get_pid())) {
+ return (EIO);
+ }
+
+ if ((vport->status & status) == status) {
+ return (0);
+ }
+
+ if (!ddi_can_receive_sig()) {
+ return (EIO);
+ }
+
+ rv = cv_wait_sig(cv, &vport->lock);
+ if (rv == 0) {
+ D1("i_vcc_wait_port_status: port%d get intr \n",
+ vport->number);
+ /* got signal */
+ return (EINTR);
+ }
+ }
+
+}
+
+/* Syncronization between threads, signal state change */
+static void
+i_vcc_set_port_status(vcc_port_t *vport, kcondvar_t *cv, uint32_t status)
+{
+
+ mutex_enter(&vport->lock);
+ vport->status |= status;
+ cv_broadcast(cv);
+ mutex_exit(&vport->lock);
+}
+
+/* initialize a ldc channel */
+static int
+i_vcc_ldc_init(vcc_t *vccp, vcc_port_t *vport)
+{
+ ldc_attr_t attr;
+ int rv = EIO;
+
+ ASSERT(mutex_owned(&vport->lock));
+ ASSERT(vport->ldc_id != VCC_INVALID_CHANNEL);
+
+ /* initialize the channel */
+ attr.devclass = LDC_DEV_SERIAL;
+ attr.instance = ddi_get_instance(vccp->dip);
+ attr.qlen = VCC_QUEUE_LEN;
+ attr.mode = LDC_MODE_RAW;
+
+ if ((rv = ldc_init(vport->ldc_id, &attr, &(vport->ldc_handle))) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_init: port %d inv channel 0x%lx\n",
+ vport->number, vport->ldc_id);
+ vport->ldc_id = VCC_INVALID_CHANNEL;
+ return (rv);
+ }
+
+ /* register it */
+ if ((rv = ldc_reg_callback(vport->ldc_handle, vcc_ldc_cb,
+ (caddr_t)vport)) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_init: port@%d ldc_register_cb"
+ "failed\n", vport->number);
+ (void) ldc_fini(vport->ldc_handle);
+ vport->ldc_id = VCC_INVALID_CHANNEL;
+ return (rv);
+ }
+
+ /* open and bring channel up */
+ if ((rv = ldc_open(vport->ldc_handle)) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_init: port@%d inv channel 0x%lx\n",
+ vport->number, vport->ldc_id);
+ (void) ldc_unreg_callback(vport->ldc_handle);
+ (void) ldc_fini(vport->ldc_handle);
+ vport->ldc_id = VCC_INVALID_CHANNEL;
+ return (rv);
+ }
+
+ /* init the channel status */
+ if ((rv = ldc_status(vport->ldc_handle, &vport->ldc_status)) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_init: port@%d ldc_status failed\n",
+ vport->number);
+ (void) ldc_close(vport->ldc_handle);
+ (void) ldc_unreg_callback(vport->ldc_handle);
+ (void) ldc_fini(vport->ldc_handle);
+ vport->ldc_id = VCC_INVALID_CHANNEL;
+ return (rv);
+ }
+
+ return (0);
+}
+
+/* release a ldc channel */
+static int
+i_vcc_ldc_fini(vcc_port_t *vport)
+{
+ int rv = EIO;
+ vcc_msg_t buf;
+
+ D1("i_vcc_ldc_fini: port@%lld, ldc_id%%llx\n", vport->number,
+ vport->ldc_id);
+
+ ASSERT(mutex_owned(&vport->lock));
+
+ /* wait for write available */
+ rv = i_vcc_wait_port_status(vport, &vport->write_cv,
+ VCC_PORT_USE_WRITE_LDC);
+ if (rv) {
+ return (rv);
+ }
+ vport->status &= ~VCC_PORT_USE_WRITE_LDC;
+ /* send a HUP message */
+ buf.type = LDC_CONSOLE_CTRL;
+ buf.ctrl_msg = LDC_CONSOLE_HUP;
+ buf.size = 0;
+
+ /* in case of error, we still want to clean up ldc channel */
+ (void) i_vcc_write_ldc(vport, &buf);
+
+ mutex_exit(&vport->lock);
+ i_vcc_set_port_status(vport, &vport->write_cv, VCC_PORT_USE_WRITE_LDC);
+ mutex_enter(&vport->lock);
+
+ (void) ldc_set_cb_mode(vport->ldc_handle, LDC_CB_DISABLE);
+ if ((rv = ldc_close(vport->ldc_handle)) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_fini: cannot close channel %ld\n",
+ vport->ldc_id);
+ return (rv);
+ }
+
+ if ((rv = ldc_unreg_callback(vport->ldc_handle)) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_fini: port@%d ldc_unreg_callback"
+ "failed\n", vport->number);
+ return (rv);
+ }
+
+ if ((rv = ldc_fini(vport->ldc_handle)) != 0) {
+ cmn_err(CE_CONT, "i_vcc_ldc_fini: cannot finilize channel"
+ "%ld\n", vport->ldc_id);
+ return (rv);
+ }
+
+ return (0);
+}
+
+/* read data from ldc channel */
+
+static int
+i_vcc_read_ldc(vcc_port_t *vport, char *data_buf, size_t *sz)
+{
+
+ int rv;
+ size_t size;
+ size_t space_left = *sz;
+ vcc_msg_t buf;
+ int i;
+
+
+
+
+ /* make sure holding read lock */
+ ASSERT((vport->status & VCC_PORT_USE_READ_LDC) == 0);
+ ASSERT(space_left >= VCC_MTU_SZ);
+
+ *sz = 0;
+ while (space_left >= VCC_MTU_SZ) {
+ size = sizeof (buf);
+
+ rv = ldc_read(vport->ldc_handle, (caddr_t)&buf, &size);
+
+ if (rv) {
+ return (rv);
+ }
+
+
+ /*
+ * FIXME: ldc_read should not reaturn 0 with
+ * either size == 0, buf.size == 0 or size < VCC_HDR_SZ
+ */
+ if (size == 0) {
+ if (*sz > 0) {
+ return (0);
+ }
+ return (EAGAIN);
+ }
+
+ if (size < VCC_HDR_SZ) {
+ return (EIO);
+ }
+
+ /*
+ * only data is expected from console - otherwise
+ * return error
+ */
+ if (buf.type != LDC_CONSOLE_DATA) {
+ return (EIO);
+ }
+
+ if (buf.size == 0) {
+ if (*sz > 0) {
+ return (0);
+ }
+ return (EAGAIN);
+ }
+
+ /* copy data */
+ for (i = 0; i < buf.size; i++, (*sz)++) {
+ data_buf[*sz] = buf.data[i];
+ }
+
+ space_left -= buf.size;
+ }
+
+ return (0);
+}
+
+/* callback from ldc */
+static uint_t
+vcc_ldc_cb(uint64_t event, caddr_t arg)
+{
+
+ vcc_port_t *vport = (vcc_port_t *)arg;
+ boolean_t isempty;
+
+ /*
+ * do not need to hold lock because if ldc calls back, the
+ * ldc_handle must be valid.
+ */
+ D2("vcc_ldc_cb: callback invoked port=%d events=%llx\n",
+ vport->number, event);
+
+ /* check event from ldc */
+ if (event & LDC_EVT_WRITE) {
+ /* channel has space for write */
+
+ i_vcc_set_port_status(vport, &vport->write_cv,
+ VCC_PORT_LDC_WRITE_READY);
+ return (LDC_SUCCESS);
+ }
+
+ if (event & LDC_EVT_READ) {
+
+ /* channel has data for read */
+ (void) ldc_chkq(vport->ldc_handle, &isempty);
+ if (isempty) {
+ /* data already read */
+ return (LDC_SUCCESS);
+ }
+
+ i_vcc_set_port_status(vport, &vport->read_cv,
+ VCC_PORT_LDC_DATA_READY);
+ return (LDC_SUCCESS);
+ }
+
+ if (event & LDC_EVT_DOWN) {
+ /* channel is down */
+ i_vcc_set_port_status(vport, &vport->write_cv,
+ VCC_PORT_LDC_LINK_DOWN);
+ cv_broadcast(&vport->read_cv);
+
+ }
+
+ return (LDC_SUCCESS);
+
+}
+
+
+/* configure a vcc port with ldc channel */
+static int
+i_vcc_config_port(vcc_t *vccp, uint_t portno, uint64_t ldc_id)
+{
+ int rv = EIO;
+ vcc_port_t *vport;
+
+ if ((portno >= VCC_MAX_PORTS) || (portno == VCC_CONTROL_PORT)) {
+ cmn_err(CE_CONT, "i_vcc_config_port: invalid port number %d\n",
+ portno);
+ return (EINVAL);
+ }
+
+ vport = &(vccp->port[portno]);
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ cmn_err(CE_CONT, "i_vcc_config_port: port@%d does not exist\n",
+ portno);
+ return (EINVAL);
+ }
+
+
+ if (vport->ldc_id != VCC_INVALID_CHANNEL) {
+ cmn_err(CE_CONT, "i_vcc_config_port: port@%d channel already"
+ "configured\n", portno);
+ return (EINVAL);
+ }
+
+ mutex_enter(&vport->lock);
+
+ /* store the ldc ID */
+ vport->ldc_id = ldc_id;
+ /* check if someone has already opened this port */
+ if (vport->status & VCC_PORT_OPEN) {
+
+ if ((rv = i_vcc_ldc_init(vccp, vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ /* mark port as ready */
+ vport->status |= VCC_PORT_LDC_CHANNEL_READY;
+ cv_broadcast(&vport->read_cv);
+ cv_broadcast(&vport->write_cv);
+ }
+
+ mutex_exit(&vport->lock);
+
+ D1("i_vcc_config_port: port@%d ldc=%d, domain=%s",
+ vport->number, vport->ldc_id, vport->minorp->domain_name);
+
+ return (0);
+}
+
+/* add a vcc console port */
+static int
+i_vcc_add_port(vcc_t *vccp, char *group_name, uint64_t tcp_port,
+ uint_t portno, char *domain_name)
+{
+ int instance;
+ int rv = MDEG_FAILURE;
+ minor_t minor;
+ vcc_port_t *vport;
+ uint_t minor_idx;
+ char name[MAXPATHLEN];
+
+ if ((portno >= VCC_MAX_PORTS) || (portno == VCC_CONTROL_PORT)) {
+ DWARN("i_vcc_add_port: invalid port number %d\n", portno);
+ return (MDEG_FAILURE);
+ }
+
+ vport = &(vccp->port[portno]);
+ if (vport->status & VCC_PORT_AVAIL) {
+ /* this port already exists */
+ cmn_err(CE_CONT, "i_vcc_add_port: invalid port - port@%d "
+ "exists\n", portno);
+ return (MDEG_FAILURE);
+ }
+
+ vport->number = portno;
+ vport->ldc_id = VCC_INVALID_CHANNEL;
+
+ if (domain_name == NULL) {
+ cmn_err(CE_CONT, "i_vcc_add_port: invalid domain name\n");
+ return (MDEG_FAILURE);
+ }
+
+ if (group_name == NULL) {
+ cmn_err(CE_CONT, "i_vcc_add_port: invalid group name\n");
+ return (MDEG_FAILURE);
+ }
+
+ /* look up minor number */
+ for (minor_idx = 0; minor_idx < vccp->minors_assigned; minor_idx++) {
+ if (strcmp(vccp->minor_tbl[minor_idx].domain_name,
+ domain_name) == 0) {
+ /* found previous assigned minor number */
+ break;
+ }
+ }
+
+ if (minor_idx == vccp->minors_assigned) {
+ /* end of lookup - assign new minor number */
+ if (minor_idx == VCC_MAX_PORTS) {
+ cmn_err(CE_CONT, "i_vcc_add_port:"
+ "too many minornodes (%d)\n",
+ minor_idx);
+ return (MDEG_FAILURE);
+ }
+
+ (void) strlcpy(vccp->minor_tbl[minor_idx].domain_name,
+ domain_name, MAXPATHLEN);
+
+ vccp->minors_assigned++;
+ }
+
+ vport->minorp = &vccp->minor_tbl[minor_idx];
+ vccp->minor_tbl[minor_idx].portno = portno;
+
+ (void) strlcpy(vport->group_name, group_name, MAXPATHLEN);
+
+ vport->tcp_port = tcp_port;
+ D1("i_vcc_add_port:@%d domain=%s, group=%s, tcp=%lld",
+ vport->number, vport->minorp->domain_name,
+ vport->group_name, vport->tcp_port);
+
+
+ /*
+ * Create a minor node. The minor number is
+ * (instance << VCC_INST_SHIFT) | minor_idx
+ */
+ instance = ddi_get_instance(vccp->dip);
+
+ minor = (instance << VCC_INST_SHIFT) | (minor_idx);
+
+ (void) snprintf(name, MAXPATHLEN - 1, "%s%s", VCC_MINOR_NAME_PREFIX,
+ domain_name);
+
+ rv = ddi_create_minor_node(vccp->dip, name, S_IFCHR, minor,
+ DDI_NT_SERIAL, 0);
+
+ if (rv != DDI_SUCCESS) {
+ vccp->minors_assigned--;
+ return (MDEG_FAILURE);
+ }
+
+ mutex_enter(&vport->lock);
+ vport->status = VCC_PORT_AVAIL | VCC_PORT_ADDED;
+ mutex_exit(&vport->lock);
+
+
+ return (MDEG_SUCCESS);
+}
+
+/* delete a port */
+static int
+i_vcc_delete_port(vcc_t *vccp, vcc_port_t *vport)
+{
+
+ char name[MAXPATHLEN];
+ int rv;
+
+
+ ASSERT(mutex_owned(&vport->lock));
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ D1("vcc_del_port port already deleted \n");
+ return (0);
+ }
+
+ if (vport->status & VCC_PORT_OPEN) {
+ /* do not block mdeg callback */
+ vport->valid_pid = VCC_NO_PID_BLOCKING;
+ rv = i_vcc_close_port(vport);
+ }
+
+ /* remove minor node */
+ (void) snprintf(name, MAXPATHLEN-1, "%s%s", VCC_MINOR_NAME_PREFIX,
+ vport->minorp->domain_name);
+
+ ddi_remove_minor_node(vccp->dip, name);
+
+ /* let read and write thread know */
+ cv_broadcast(&vport->read_cv);
+ cv_broadcast(&vport->write_cv);
+ vport->status = 0;
+ return (rv);
+
+
+}
+
+/* register callback to MDEG */
+static int
+i_vcc_mdeg_register(vcc_t *vccp, int instance)
+{
+ mdeg_prop_spec_t *pspecp;
+ mdeg_node_spec_t *ispecp;
+ mdeg_handle_t mdeg_hdl;
+ int sz;
+ int rv;
+
+ /*
+ * Allocate and initialize a per-instance copy
+ * of the global property spec array that will
+ * uniquely identify this vcc instance.
+ */
+ sz = sizeof (vcc_prop_template);
+ pspecp = kmem_alloc(sz, KM_SLEEP);
+
+ bcopy(vcc_prop_template, pspecp, sz);
+
+ VCC_SET_MDEG_PROP_INST(pspecp, instance);
+
+ /* initialize the complete prop spec structure */
+ ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
+ ispecp->namep = "virtual-device";
+ ispecp->specp = pspecp;
+
+ /* perform the registration */
+ rv = mdeg_register(ispecp, &vcc_port_match, vcc_mdeg_cb,
+ vccp, &mdeg_hdl);
+
+ if (rv != MDEG_SUCCESS) {
+ cmn_err(CE_CONT, "i_vcc_mdeg_register:"
+ "mdeg_register failed (%d)\n", rv);
+ kmem_free(ispecp, sizeof (mdeg_node_spec_t));
+ kmem_free(pspecp, sz);
+ return (DDI_FAILURE);
+ }
+
+ /* save off data that will be needed later */
+ vccp->md_ispecp = (void *)ispecp;
+ vccp->mdeg_hdl = mdeg_hdl;
+
+ return (0);
+}
+
+/* destroy all mutex from port table */
+static void
+i_vcc_cleanup_port_table(vcc_t *vccp)
+{
+ int i;
+ vcc_port_t *vport;
+
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+ vport = &(vccp->port[i]);
+ mutex_destroy(&vport->lock);
+ cv_destroy(&vport->read_cv);
+ cv_destroy(&vport->write_cv);
+ }
+}
+
+/*
+ * attach(9E): attach a device to the system.
+ * called once for each instance of the device on the system.
+ */
+static int
+vcc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i, instance, inst;
+ int rv = DDI_FAILURE;
+ vcc_t *vccp;
+ minor_t minor;
+ vcc_port_t *vport;
+
+ switch (cmd) {
+
+ case DDI_ATTACH:
+
+ instance = ddi_get_instance(dip);
+ if (ddi_soft_state_zalloc(vcc_ssp, instance) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ ddi_soft_state_free(vccp, instance);
+ return (ENXIO);
+ }
+
+ D1("vcc_attach: DDI_ATTACH instance=%d\n", instance);
+
+ /* initialize the mutex */
+ mutex_init(&vccp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ mutex_enter(&vccp->lock);
+
+ vccp->dip = dip;
+
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+ vport = &(vccp->port[i]);
+ mutex_init(&vport->lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&vport->read_cv, NULL, CV_DRIVER, NULL);
+ cv_init(&vport->write_cv, NULL, CV_DRIVER, NULL);
+ vport->valid_pid = VCC_NO_PID_BLOCKING;
+ }
+
+ vport = &vccp->port[VCC_CONTROL_PORT];
+ mutex_enter(&vport->lock);
+
+ vport->minorp = &vccp->minor_tbl[VCC_CONTROL_MINOR_IDX];
+ vport->status |= VCC_PORT_AVAIL;
+
+ /* create a minor node for vcc control */
+ minor = (instance << VCC_INST_SHIFT) | VCC_CONTROL_MINOR_IDX;
+
+ vccp->minor_tbl[VCC_CONTROL_PORT].portno =
+ VCC_CONTROL_MINOR_IDX;
+
+
+ rv = ddi_create_minor_node(vccp->dip, "ctl", S_IFCHR, minor,
+ DDI_NT_SERIAL, 0);
+
+ mutex_exit(&vport->lock);
+
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_CONT, "vcc_attach: error"
+ "creating control minor node\n");
+
+ i_vcc_cleanup_port_table(vccp);
+
+ mutex_exit(&vccp->lock);
+ /* clean up soft state */
+ ddi_soft_state_free(vccp, instance);
+
+ return (DDI_FAILURE);
+ }
+
+ /* get the instance number by reading 'reg' property */
+ inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "reg", -1);
+ if (inst == -1) {
+ cmn_err(CE_CONT, "vcc_attach: vcc%d has no "
+ "'reg' property\n",
+ ddi_get_instance(dip));
+
+ i_vcc_cleanup_port_table(vccp);
+
+ /* remove minor */
+ ddi_remove_minor_node(vccp->dip, NULL);
+
+ /* clean up soft state */
+ mutex_exit(&vccp->lock);
+ ddi_soft_state_free(vccp, instance);
+
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Mdeg might invoke callback in the same call sequence
+ * if there is a domain port at the time of registration.
+ * Since the callback also grabs vcc->lock mutex, to avoid
+ * mutex reentry error, release the lock before registration
+ */
+ mutex_exit(&vccp->lock);
+
+ /* register for notifications from Zeus */
+ rv = i_vcc_mdeg_register(vccp, inst);
+ if (rv != MDEG_SUCCESS) {
+ cmn_err(CE_CONT, "vcc_attach: error register to MD\n");
+
+ i_vcc_cleanup_port_table(vccp);
+
+ /* remove minor */
+ ddi_remove_minor_node(vccp->dip, NULL);
+
+ /* clean up soft state */
+ ddi_soft_state_free(vccp, instance);
+
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+
+ case DDI_RESUME:
+
+ return (DDI_SUCCESS);
+
+ default:
+
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * detach(9E): detach a device from the system.
+ */
+static int
+vcc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int i, instance;
+ vcc_t *vccp;
+ mdeg_node_spec_t *ispecp;
+ vcc_port_t *vport;
+
+ switch (cmd) {
+
+ case DDI_DETACH:
+
+ instance = ddi_get_instance(dip);
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL)
+ return (ENXIO);
+
+ D1("vcc_detach: DDI_DETACH instance=%d\n", instance);
+
+ mutex_enter(&vccp->lock);
+
+ /* unregister from MD event generator */
+
+ ASSERT(vccp->mdeg_hdl);
+ (void) mdeg_unregister(vccp->mdeg_hdl);
+
+ ispecp = (mdeg_node_spec_t *)vccp->md_ispecp;
+ ASSERT(ispecp);
+
+ kmem_free(ispecp->specp, sizeof (vcc_prop_template));
+ kmem_free(ispecp, sizeof (mdeg_node_spec_t));
+
+ /* remove minor nodes */
+ ddi_remove_minor_node(vccp->dip, NULL);
+ mutex_exit(&vccp->lock);
+
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+
+ vport = &vccp->port[i];
+ mutex_enter(&vport->lock);
+ if (i == VCC_CONTROL_PORT) {
+ if (vport->status & VCC_PORT_OPEN) {
+ (void) i_vcc_close_port(vport);
+ }
+ }
+
+ if ((vccp->port[i].status & VCC_PORT_AVAIL) &&
+ (i != VCC_CONTROL_PORT)) {
+ D1("vcc_detach: removing port port@%d\n", i);
+ (void) i_vcc_delete_port(vccp, vport);
+ }
+ mutex_exit(&vport->lock);
+ cv_destroy(&vport->read_cv);
+ cv_destroy(&vport->write_cv);
+ mutex_destroy(&vport->lock);
+ }
+
+
+
+ /* destroy mutex and free the soft state */
+ mutex_destroy(&vccp->lock);
+ ddi_soft_state_free(vcc_ssp, instance);
+
+ return (DDI_SUCCESS);
+
+ case DDI_SUSPEND:
+
+ return (DDI_SUCCESS);
+
+ default:
+
+ return (DDI_FAILURE);
+ }
+}
+
+/* cb_open */
+static int
+vcc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(otyp, cred))
+
+ int instance;
+ int rv = EIO;
+ minor_t minor;
+ uint_t portno;
+ vcc_t *vccp;
+ vcc_port_t *vport;
+
+ minor = getminor(*devp);
+ instance = VCCINST(minor);
+
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ return (ENXIO);
+ }
+
+ portno = VCCPORT(vccp, minor);
+
+ vport = &(vccp->port[portno]);
+
+ mutex_enter(&vport->lock);
+
+ if (vport->status & VCC_PORT_OPEN) {
+ /* only one open per port */
+ cmn_err(CE_CONT, "vcc_open: virtual-console-concentrator@%d:%d "
+ "is already open\n", instance, portno);
+ mutex_exit(&vport->lock);
+ return (EAGAIN);
+ }
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ if (portno == VCC_CONTROL_PORT) {
+ vport->status |= VCC_PORT_OPEN;
+ mutex_exit(&vport->lock);
+ return (0);
+ }
+
+
+ /* check if channel has been initialized */
+ if ((vport->status & VCC_PORT_LDC_CHANNEL_READY) == 0) {
+ rv = i_vcc_ldc_init(vccp, vport);
+ if (rv) {
+ mutex_exit(&vport->lock);
+ return (EIO);
+ }
+
+ /* mark port as ready */
+ vport->status |= VCC_PORT_LDC_CHANNEL_READY;
+ }
+
+ vport->status |= VCC_PORT_USE_READ_LDC | VCC_PORT_USE_WRITE_LDC|
+ VCC_PORT_TERM_RD|VCC_PORT_TERM_WR|VCC_PORT_OPEN;
+
+ if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) {
+ vport->status |= VCC_PORT_NONBLOCK;
+ }
+
+ mutex_exit(&vport->lock);
+
+ return (0);
+}
+
+/* close port */
+static int
+i_vcc_close_port(vcc_port_t *vport)
+{
+ int rv = EIO;
+
+ if ((vport->status & VCC_PORT_OPEN) == 0) {
+ return (0);
+ }
+
+ ASSERT(mutex_owned(&vport->lock));
+
+ if (vport->status & VCC_PORT_LDC_CHANNEL_READY) {
+ /* clean up ldc channel */
+ if ((rv = i_vcc_ldc_fini(vport)) != 0) {
+ return (rv);
+ }
+ vport->status &= ~VCC_PORT_LDC_CHANNEL_READY;
+ }
+
+ /* reset rd/wr suspends */
+ vport->status |= VCC_PORT_TERM_RD | VCC_PORT_TERM_WR;
+ vport->status &= ~VCC_PORT_NONBLOCK;
+ vport->status &= ~VCC_PORT_OPEN;
+ vport->valid_pid = VCC_NO_PID_BLOCKING;
+
+ /* signal any blocked read and write thread */
+ cv_broadcast(&vport->read_cv);
+ cv_broadcast(&vport->write_cv);
+
+ return (0);
+}
+
+/* cb_close */
+static int
+vcc_close(dev_t dev, int flag, int otyp, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(flag, otyp, cred))
+
+ int instance;
+ minor_t minor;
+ int rv = EIO;
+ uint_t portno;
+ vcc_t *vccp;
+ vcc_port_t *vport;
+
+ minor = getminor(dev);
+
+ instance = VCCINST(minor);
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ return (ENXIO);
+ }
+
+ portno = VCCPORT(vccp, minor);
+
+ D1("vcc_close: closing virtual-console-concentrator@%d:%d\n",
+ instance, portno);
+
+ vport = &(vccp->port[portno]);
+
+
+ if ((vport->status & VCC_PORT_OPEN) == 0) {
+ return (0);
+ }
+
+ if (portno == VCC_CONTROL_PORT) {
+ /*
+ * vntsd closes control port before it exits. There
+ * could be events still pending for vntsd.
+ */
+ rv = i_vcc_reset_events(vccp);
+ return (0);
+ }
+
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ rv = i_vcc_close_port(vport);
+ mutex_exit(&vport->lock);
+
+ return (rv);
+}
+
+/*
+ * ioctl VCC_CONS_TBL - vntsd allocates buffer according to return of
+ * VCC_NUM_PORTS. However, when vntsd requests for the console table, console
+ * ports could be deleted or added. parameter num_ports is number of structures
+ * that vntsd allocated for the table. If there are more ports than
+ * num_ports, set up to wakeup vntsd to add ports.
+ * If there less ports than num_ports, fill (-1) for cons_no to tell vntsd.
+ */
+static int
+i_vcc_cons_tbl(vcc_t *vccp, uint_t num_ports, caddr_t buf, int mode)
+{
+ vcc_console_t cons;
+ int i;
+ vcc_port_t *vport;
+ boolean_t notify_vntsd = B_FALSE;
+ char pathname[MAXPATHLEN];
+
+
+ (void) ddi_pathname(vccp->dip, pathname);
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+
+ vport = &vccp->port[i];
+
+ if (i == VCC_CONTROL_PORT) {
+ continue;
+ }
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ continue;
+ }
+
+ /* a port exists before vntsd becomes online */
+ mutex_enter(&vport->lock);
+
+ if (num_ports == 0) {
+ /* more ports than vntsd's buffer can hold */
+ vport->status |= VCC_PORT_ADDED;
+ notify_vntsd = B_TRUE;
+ mutex_exit(&vport->lock);
+ continue;
+ }
+
+ bzero(&cons, sizeof (vcc_console_t));
+
+ /* construct console buffer */
+ cons.cons_no = vport->number;
+ cons.tcp_port = vport->tcp_port;
+ (void) memcpy(cons.domain_name,
+ vport->minorp->domain_name, MAXPATHLEN);
+
+ (void) memcpy(cons.group_name, vport->group_name,
+ MAXPATHLEN);
+ vport->status &= ~VCC_PORT_ADDED;
+ mutex_exit(&vport->lock);
+
+ (void) snprintf(cons.dev_name, MAXPATHLEN-1, "%s:%s%s",
+ pathname, VCC_MINOR_NAME_PREFIX, cons.domain_name);
+
+ /* copy out data */
+ if (ddi_copyout(&cons, (void *)buf,
+ sizeof (vcc_console_t), mode)) {
+ mutex_exit(&vport->lock);
+ return (EFAULT);
+ }
+ buf += sizeof (vcc_console_t);
+
+ num_ports--;
+
+ }
+
+ if (num_ports == 0) {
+ /* vntsd's buffer is full */
+
+ if (notify_vntsd) {
+ /* more ports need to notify vntsd */
+ vport = &vccp->port[VCC_CONTROL_PORT];
+ mutex_enter(&vport->lock);
+ vport->pollevent |= VCC_POLL_ADD_PORT;
+ mutex_exit(&vport->lock);
+ }
+
+ return (0);
+ }
+
+ /* less ports than vntsd expected */
+ bzero(&cons, sizeof (vcc_console_t));
+ cons.cons_no = -1;
+
+ while (num_ports > 0) {
+ /* fill vntsd buffer with no console */
+ if (ddi_copyout(&cons, (void *)buf,
+ sizeof (vcc_console_t), mode) != 0) {
+ mutex_exit(&vport->lock);
+ return (EFAULT);
+ }
+ D1("i_vcc_cons_tbl: a port is deleted\n");
+ buf += sizeof (vcc_console_t) +MAXPATHLEN;
+ num_ports--;
+ }
+
+ return (0);
+}
+
+
+/* turn off event flag if there is no more change */
+static void
+i_vcc_turn_off_event(vcc_t *vccp, uint32_t port_status, uint32_t event)
+{
+
+ vcc_port_t *vport;
+ int i;
+
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+
+ vport = &(vccp->port[i]);
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ continue;
+ }
+
+
+ if (vport->status & port_status) {
+ /* more port changes status */
+ return;
+ }
+
+ }
+
+ /* no more changed port */
+ vport = &vccp->port[VCC_CONTROL_PORT];
+
+ /* turn off event */
+ mutex_enter(&vport->lock);
+ vport->pollevent &= ~event;
+ mutex_exit(&vport->lock);
+}
+
+/* ioctl VCC_CONS_INFO */
+static int
+i_vcc_cons_info(vcc_t *vccp, caddr_t buf, int mode)
+{
+ vcc_console_t cons;
+ uint_t portno;
+ vcc_port_t *vport;
+ char pathname[MAXPATHLEN];
+
+ /* read in portno */
+ if (ddi_copyin((void*)buf, &portno, sizeof (uint_t), mode)) {
+ return (EFAULT);
+ }
+
+ D1("i_vcc_cons_info@%d:\n", portno);
+
+ if ((portno >= VCC_MAX_PORTS) || (portno == VCC_CONTROL_PORT)) {
+ return (EINVAL);
+ }
+
+ vport = &vccp->port[portno];
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ return (EINVAL);
+ }
+
+ mutex_enter(&vport->lock);
+ vport->status &= ~VCC_PORT_ADDED;
+
+ /* construct configruation data */
+ bzero(&cons, sizeof (vcc_console_t));
+
+ cons.cons_no = vport->number;
+ cons.tcp_port = vport->tcp_port;
+
+ (void) memcpy(cons.domain_name, vport->minorp->domain_name, MAXPATHLEN);
+
+ (void) memcpy(cons.group_name, vport->group_name, MAXPATHLEN);
+
+ mutex_exit(&vport->lock);
+
+ (void) ddi_pathname(vccp->dip, pathname),
+
+ /* copy device name */
+ (void) snprintf(cons.dev_name, MAXPATHLEN-1, "%s:%s%s",
+ pathname, VCC_MINOR_NAME_PREFIX, cons.domain_name);
+ /* copy data */
+ if (ddi_copyout(&cons, (void *)buf,
+ sizeof (vcc_console_t), mode) != 0) {
+ mutex_exit(&vport->lock);
+ return (EFAULT);
+ }
+
+ D1("i_vcc_cons_info@%d:domain:%s serv:%s tcp@%lld %s\n",
+ cons.cons_no, cons.domain_name,
+ cons.group_name, cons.tcp_port, cons.dev_name);
+
+ i_vcc_turn_off_event(vccp, VCC_PORT_ADDED, VCC_POLL_ADD_PORT);
+
+ return (0);
+}
+
+
+/* response to vntsd inquiry ioctl call */
+static int
+i_vcc_inquiry(vcc_t *vccp, caddr_t buf, int mode)
+{
+ vcc_port_t *vport;
+ uint_t i;
+ vcc_response_t msg;
+
+ vport = &(vccp->port[VCC_CONTROL_PORT]);
+
+ if ((vport->pollevent & VCC_POLL_ADD_PORT) == 0) {
+ return (EINVAL);
+ }
+
+ /* an added port */
+
+ D1("i_vcc_inquiry\n");
+
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+ if ((vccp->port[i].status & VCC_PORT_AVAIL) == 0) {
+ continue;
+ }
+
+ if (vccp->port[i].status & VCC_PORT_ADDED) {
+ /* port added */
+ msg.reason = VCC_CONS_ADDED;
+ msg.cons_no = i;
+
+ if (ddi_copyout((void *)&msg, (void *)buf,
+ sizeof (msg), mode) == -1) {
+ cmn_err(CE_CONT, "i_vcc_find_changed_port:"
+ "ddi_copyout"
+ " failed\n");
+ return (EFAULT);
+ }
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+/* clean up events after vntsd exits */
+static int
+i_vcc_reset_events(vcc_t *vccp)
+{
+ uint_t i;
+ vcc_port_t *vport;
+
+ for (i = 0; i < VCC_MAX_PORTS; i++) {
+ vport = &(vccp->port[i]);
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ continue;
+ }
+
+ ASSERT(!mutex_owned(&vport->lock));
+
+ if (i == VCC_CONTROL_PORT) {
+ /* close control port */
+ mutex_enter(&vport->lock);
+ vport->status &= ~VCC_PORT_OPEN;
+
+ /* clean up poll events */
+ vport->pollevent = 0;
+ vport->pollflag = 0;
+ mutex_exit(&vport->lock);
+ continue;
+ }
+ if (vport->status & VCC_PORT_ADDED) {
+ /* pending added port event to vntsd */
+ mutex_enter(&vport->lock);
+ vport->status &= ~VCC_PORT_ADDED;
+ mutex_exit(&vport->lock);
+ }
+
+ }
+
+ vport = &vccp->port[VCC_CONTROL_PORT];
+
+ return (0);
+}
+
+/* ioctl VCC_FORCE_CLOSE */
+static int
+i_vcc_force_close(vcc_t *vccp, caddr_t buf, int mode)
+{
+ uint_t portno;
+ vcc_port_t *vport;
+ int rv;
+
+ /* read in portno */
+ if (ddi_copyin((void*)buf, &portno, sizeof (uint_t), mode)) {
+ return (EFAULT);
+ }
+
+ D1("i_vcc_force_close@%d:\n", portno);
+
+ if ((portno >= VCC_MAX_PORTS) || (portno == VCC_CONTROL_PORT)) {
+ return (EINVAL);
+ }
+
+ vport = &vccp->port[portno];
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ return (EINVAL);
+ }
+
+ mutex_enter(&vport->lock);
+
+ rv = i_vcc_close_port(vport);
+
+ /* block callers other than vntsd */
+ vport->valid_pid = ddi_get_pid();
+
+ mutex_exit(&vport->lock);
+ return (rv);
+
+}
+
+/* ioctl VCC_CONS_STATUS */
+static int
+i_vcc_cons_status(vcc_t *vccp, caddr_t buf, int mode)
+{
+ vcc_console_t console;
+ vcc_port_t *vport;
+
+ /* read in portno */
+ if (ddi_copyin((void*)buf, &console, sizeof (console), mode)) {
+ return (EFAULT);
+ }
+
+ D1("i_vcc_cons_status@%d:\n", console.cons_no);
+
+ if ((console.cons_no >= VCC_MAX_PORTS) ||
+ (console.cons_no == VCC_CONTROL_PORT)) {
+ return (EINVAL);
+ }
+
+
+ vport = &vccp->port[console.cons_no];
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ console.cons_no = -1;
+ } else if (strncmp(console.domain_name, vport->minorp->domain_name,
+ MAXPATHLEN)) {
+ console.cons_no = -1;
+ } else if (strncmp(console.group_name, vport->group_name,
+ MAXPATHLEN)) {
+ console.cons_no = -1;
+ } else if (console.tcp_port != vport->tcp_port) {
+ console.cons_no = -1;
+ }
+
+ D1("i_vcc_cons_status@%d: %s %s %llx\n", console.cons_no,
+ console.group_name, console.domain_name, console.tcp_port);
+ if (ddi_copyout(&console, (void *)buf, sizeof (console), mode) == -1) {
+ cmn_err(CE_CONT, "i_vcc_cons_status ddi_copyout failed\n");
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+/* cb_ioctl handler for vcc control port */
+static int
+i_vcc_ctrl_ioctl(vcc_t *vccp, int cmd, void* arg, int mode)
+{
+
+ static uint_t num_ports;
+
+
+ switch (cmd) {
+
+ case VCC_NUM_CONSOLE:
+
+ mutex_enter(&vccp->lock);
+ num_ports = vccp->num_ports;
+ mutex_exit(&vccp->lock);
+ /* number of consoles */
+
+ return (ddi_copyout((void *)&num_ports, arg,
+ sizeof (int), mode));
+ case VCC_CONS_TBL:
+
+ /* console config table */
+ return (i_vcc_cons_tbl(vccp, num_ports, (caddr_t)arg, mode));
+
+ case VCC_INQUIRY:
+
+ /* reason for wakeup */
+ return (i_vcc_inquiry(vccp, (caddr_t)arg, mode));
+
+ case VCC_CONS_INFO:
+ /* a console config */
+ return (i_vcc_cons_info(vccp, (caddr_t)arg, mode));
+
+ case VCC_FORCE_CLOSE:
+ /* force to close a console */
+ return (i_vcc_force_close(vccp, (caddr_t)arg, mode));
+
+ case VCC_CONS_STATUS:
+ /* console status */
+ return (i_vcc_cons_status(vccp, (caddr_t)arg, mode));
+
+ default:
+
+ /* unknown command */
+ return (ENODEV);
+ }
+
+
+}
+
+/* write data to ldc. may block if channel has no space for write */
+static int
+i_vcc_write_ldc(vcc_port_t *vport, vcc_msg_t *buf)
+{
+ int rv = EIO;
+ size_t size;
+
+ ASSERT(mutex_owned(&vport->lock));
+ ASSERT((vport->status & VCC_PORT_USE_WRITE_LDC) == 0);
+
+ for (; ; ) {
+
+ size = VCC_HDR_SZ + buf->size;
+ rv = ldc_write(vport->ldc_handle, (caddr_t)buf, &size);
+
+ D1("i_vcc_write_ldc: port@%d: err=%d %d bytes\n",
+ vport->number, rv, size);
+
+ if (rv == 0) {
+ return (rv);
+ }
+
+ if (rv != EWOULDBLOCK) {
+ return (EIO);
+ }
+
+ if (vport->status & VCC_PORT_NONBLOCK) {
+ return (EAGAIN);
+ }
+
+ /* block util ldc has more space */
+
+ rv = i_vcc_wait_port_status(vport, &vport->write_cv,
+ VCC_PORT_LDC_WRITE_READY);
+
+ if (rv) {
+ return (rv);
+ }
+
+ vport->status &= ~VCC_PORT_LDC_WRITE_READY;
+
+ }
+
+}
+
+
+
+/* cb_ioctl handler for port ioctl */
+static int
+i_vcc_port_ioctl(vcc_t *vccp, minor_t minor, int portno, int cmd, void *arg,
+ int mode)
+{
+
+ vcc_port_t *vport;
+ struct termios term;
+ vcc_msg_t buf;
+ int rv;
+
+ D1("i_vcc_port_ioctl@%d cmd %d\n", portno, cmd);
+
+ vport = &(vccp->port[portno]);
+
+ if ((vport->status & VCC_PORT_AVAIL) == 0) {
+ return (EIO);
+ }
+
+
+ switch (cmd) {
+
+ /* terminal support */
+ case TCGETA:
+ case TCGETS:
+
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ (void) memcpy(&term, &vport->term, sizeof (term));
+ mutex_exit(&vport->lock);
+
+ return (ddi_copyout(&term, arg, sizeof (term), mode));
+
+ case TCSETS:
+ case TCSETA:
+ case TCSETAW:
+ case TCSETAF:
+
+ if (ddi_copyin(arg, &term, sizeof (term), mode) != 0) {
+ return (EFAULT);
+ }
+
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ (void) memcpy(&vport->term, &term, sizeof (term));
+ mutex_exit(&vport->lock);
+ return (0);
+
+
+ case TCSBRK:
+
+ /* send break to console */
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ /* wait for write available */
+ rv = i_vcc_wait_port_status(vport, &vport->write_cv,
+ VCC_PORT_LDC_CHANNEL_READY| VCC_PORT_USE_WRITE_LDC);
+ if (rv) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ vport->status &= ~VCC_PORT_USE_WRITE_LDC;
+
+ buf.type = LDC_CONSOLE_CTRL;
+ buf.ctrl_msg = LDC_CONSOLE_BREAK;
+ buf.size = 0;
+
+ rv = i_vcc_write_ldc(vport, &buf);
+
+ mutex_exit(&vport->lock);
+
+ i_vcc_set_port_status(vport, &vport->write_cv,
+ VCC_PORT_USE_WRITE_LDC);
+ return (0);
+
+ case TCXONC:
+ /* suspend read or write */
+ if (ddi_copyin(arg, &cmd, sizeof (int), mode) != 0) {
+ return (EFAULT);
+ }
+
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+
+ switch (cmd) {
+
+ case 0:
+ vport->status |= VCC_PORT_TERM_WR;
+ cv_broadcast(&vport->write_cv);
+ break;
+ case 1:
+ /* get write lock */
+ rv = i_vcc_wait_port_status(vport, &vport->write_cv,
+ VCC_PORT_USE_WRITE_LDC);
+ if (rv) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+ vport->status &= ~VCC_PORT_TERM_WR;
+ cv_broadcast(&vport->write_cv);
+ break;
+ case 2:
+ vport->status |= VCC_PORT_TERM_RD;
+ cv_broadcast(&vport->read_cv);
+ break;
+ case 3:
+ /* get read lock */
+ rv = i_vcc_wait_port_status(vport, &vport->write_cv,
+ VCC_PORT_USE_READ_LDC);
+ if (rv) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+ vport->status &= ~VCC_PORT_TERM_RD;
+ cv_broadcast(&vport->read_cv);
+ break;
+
+ default:
+ break;
+ }
+
+ mutex_exit(&vport->lock);
+ return (0);
+
+ case TCFLSH:
+ return (0);
+
+ default:
+ return (ENODEV);
+ }
+
+}
+
+/* cb_ioctl */
+static int
+vcc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *credp, int *rvalp)
+{
+ _NOTE(ARGUNUSED(credp, rvalp))
+
+ int instance;
+ minor_t minor;
+ int portno;
+ vcc_t *vccp;
+
+ minor = getminor(dev);
+
+ instance = VCCINST(minor);
+
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ return (ENXIO);
+ }
+
+ portno = VCCPORT(vccp, minor);
+
+ D1("vcc_ioctl: virtual-console-concentrator@%d:%d\n", instance, portno);
+
+ if (portno >= VCC_MAX_PORTS) {
+ cmn_err(CE_CONT, "vcc_ioctl:virtual-console-concentrator@%d"
+ " invalid portno\n", portno);
+ return (EINVAL);
+ }
+
+ D1("vcc_ioctl: virtual-console-concentrator@%d:%d ioctl cmd=%d\n",
+ instance, portno, cmd);
+
+ if (portno == VCC_CONTROL_PORT) {
+ /* control ioctl */
+ return (i_vcc_ctrl_ioctl(vccp, cmd, (void *)arg, mode));
+ }
+
+ /* data port ioctl */
+ return (i_vcc_port_ioctl(vccp, minor, portno, cmd, (void *)arg, mode));
+}
+
+/* cb_read */
+static int
+vcc_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ _NOTE(ARGUNUSED(credp))
+
+ int instance;
+ minor_t minor;
+ uint_t portno;
+ vcc_t *vccp;
+ vcc_port_t *vport;
+ int rv = EIO; /* by default fail ! */
+ char *buf;
+ size_t uio_size;
+ size_t size;
+
+ minor = getminor(dev);
+
+ instance = VCCINST(minor);
+
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ return (ENXIO);
+ }
+
+ portno = VCCPORT(vccp, minor);
+
+ /* no read for control port */
+ if (portno == VCC_CONTROL_PORT) {
+ return (EIO);
+ }
+
+ /* temp buf to hold ldc data */
+ uio_size = uiop->uio_resid;
+
+ if (uio_size < VCC_MTU_SZ) {
+ return (EINVAL);
+ }
+
+ vport = &(vccp->port[portno]);
+
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+
+ rv = i_vcc_wait_port_status(vport, &vport->read_cv,
+ VCC_PORT_TERM_RD|VCC_PORT_LDC_CHANNEL_READY|
+ VCC_PORT_USE_READ_LDC);
+ if (rv) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ buf = kmem_alloc(uio_size, KM_SLEEP);
+
+ vport->status &= ~VCC_PORT_USE_READ_LDC;
+
+ for (; ; ) {
+
+ size = uio_size;
+ rv = i_vcc_read_ldc(vport, buf, &size);
+
+
+ if (rv == EAGAIN) {
+ /* should block? */
+ if (vport->status & VCC_PORT_NONBLOCK) {
+ break;
+ }
+
+ } else if (rv) {
+ /* error */
+ break;
+ }
+
+ if (size > 0) {
+ /* got data */
+ break;
+ }
+
+ /* wait for data from ldc */
+ vport->status &= ~VCC_PORT_LDC_DATA_READY;
+ rv = i_vcc_wait_port_status(vport, &vport->read_cv,
+ VCC_PORT_LDC_DATA_READY);
+ if (rv) {
+ break;
+ }
+ }
+
+ mutex_exit(&vport->lock);
+
+ if ((rv == 0) && (size > 0)) {
+ /* data is in buf */
+ rv = uiomove(buf, size, UIO_READ, uiop);
+ }
+
+ kmem_free(buf, uio_size);
+ i_vcc_set_port_status(vport, &vport->read_cv, VCC_PORT_USE_READ_LDC);
+
+ return (rv);
+}
+
+
+/* cb_write */
+static int
+vcc_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ _NOTE(ARGUNUSED(credp))
+
+ int instance;
+ minor_t minor;
+ size_t size;
+ size_t bytes;
+ uint_t portno;
+ vcc_t *vccp;
+
+ vcc_port_t *vport;
+ int rv = EIO;
+
+ vcc_msg_t buf;
+
+ minor = getminor(dev);
+
+ instance = VCCINST(minor);
+
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ return (ENXIO);
+ }
+
+ portno = VCCPORT(vccp, minor);
+
+ /* no write for control port */
+ if (portno == VCC_CONTROL_PORT) {
+ return (EIO);
+ }
+ vport = &(vccp->port[portno]);
+
+ /*
+ * check if the channel has been configured,
+ * if write has been suspend and grab write lock.
+ */
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ rv = i_vcc_wait_port_status(vport, &vport->write_cv,
+ VCC_PORT_TERM_WR|VCC_PORT_LDC_CHANNEL_READY|
+ VCC_PORT_USE_WRITE_LDC);
+ if (rv) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ vport->status &= ~VCC_PORT_USE_WRITE_LDC;
+ mutex_exit(&vport->lock);
+ size = uiop->uio_resid;
+
+ D2("vcc_write: virtual-console-concentrator@%d:%d writing %d bytes\n",
+ instance, portno, size);
+
+
+
+ buf.type = LDC_CONSOLE_DATA;
+
+ while (size) {
+
+ bytes = MIN(size, VCC_MTU_SZ);
+ /* move data */
+ rv = uiomove(&(buf.data), bytes, UIO_WRITE, uiop);
+
+ if (rv) {
+ break;
+ }
+
+ /* write to ldc */
+ buf.size = bytes;
+
+ mutex_enter(&vport->lock);
+
+ /* check minor no and pid */
+ if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
+ vport)) != 0) {
+ mutex_exit(&vport->lock);
+ return (rv);
+ }
+
+ rv = i_vcc_write_ldc(vport, &buf);
+
+ mutex_exit(&vport->lock);
+
+ if (rv) {
+ break;
+ }
+
+ size -= bytes;
+
+ }
+
+ i_vcc_set_port_status(vport, &vport->write_cv, VCC_PORT_USE_WRITE_LDC);
+ return (rv);
+}
+
+/* mdeg callback for a removed port */
+static int
+i_vcc_md_remove_port(md_t *mdp, mde_cookie_t mdep, vcc_t *vccp)
+{
+ uint64_t portno; /* md requires 64bit for port number */
+ int rv = MDEG_FAILURE;
+ vcc_port_t *vport;
+
+ if (md_get_prop_val(mdp, mdep, "id", &portno)) {
+ cmn_err(CE_CONT, "vcc_mdeg_cb: port has no 'id' property\n");
+ return (MDEG_FAILURE);
+ }
+
+ if ((portno >= VCC_MAX_PORTS) || (portno < 0)) {
+ cmn_err(CE_CONT, "i_vcc_md_remove_port@%ld invalid port no\n",
+ portno);
+ return (MDEG_FAILURE);
+ }
+
+ if (portno == VCC_CONTROL_PORT) {
+ cmn_err(CE_CONT, "i_vcc_md_remove_port@%ld can not remove"
+ "control port\n",
+ portno);
+ return (MDEG_FAILURE);
+ }
+
+ vport = &(vccp->port[portno]);
+
+ /* delete the port */
+ mutex_enter(&vport->lock);
+ rv = i_vcc_delete_port(vccp, vport);
+ mutex_exit(&vport->lock);
+
+ mutex_enter(&vccp->lock);
+ vccp->num_ports--;
+ mutex_exit(&vccp->lock);
+
+ return (rv ? MDEG_FAILURE : MDEG_SUCCESS);
+}
+
+static int
+i_vcc_get_ldc_id(md_t *md, mde_cookie_t mdep, uint64_t *ldc_id)
+{
+ int num_nodes;
+ size_t size;
+ mde_cookie_t *channel;
+ int num_channels;
+
+
+ if ((num_nodes = md_node_count(md)) <= 0) {
+ cmn_err(CE_CONT, "i_vcc_get_ldc_channel_id:"
+ " Invalid node count in Machine Description subtree");
+ return (-1);
+ }
+ size = num_nodes*(sizeof (*channel));
+ channel = kmem_zalloc(size, KM_SLEEP);
+ ASSERT(channel != NULL); /* because KM_SLEEP */
+
+
+ /* Look for channel endpoint child(ren) of the vdisk MD node */
+ if ((num_channels = md_scan_dag(md, mdep,
+ md_find_name(md, "channel-endpoint"),
+ md_find_name(md, "fwd"), channel)) <= 0) {
+ cmn_err(CE_CONT, "i_vcc_get_ldc_id: No 'channel-endpoint'"
+ " found for vcc");
+ kmem_free(channel, size);
+ return (-1);
+ }
+
+ /* Get the "id" value for the first channel endpoint node */
+ if (md_get_prop_val(md, channel[0], "id", ldc_id) != 0) {
+ cmn_err(CE_CONT, "i_vcc_get_ldc: No id property found "
+ "for channel-endpoint of vcc");
+ kmem_free(channel, size);
+ return (-1);
+ }
+
+ if (num_channels > 1) {
+ cmn_err(CE_CONT, "i_vcc_get_ldc: Warning: Using ID of first"
+ " of multiple channels for this vcc");
+ }
+
+ kmem_free(channel, size);
+ return (0);
+}
+/* mdeg callback for an added port */
+static int
+i_vcc_md_add_port(md_t *mdp, mde_cookie_t mdep, vcc_t *vccp)
+{
+ uint64_t portno; /* md requires 64 bit */
+ char *domain_name;
+ char *group_name;
+ uint64_t ldc_id;
+ uint64_t tcp_port;
+ vcc_port_t *vport;
+
+ /* read in the port's reg property */
+ if (md_get_prop_val(mdp, mdep, "id", &portno)) {
+ cmn_err(CE_CONT, "i_vcc_md_add_port_: port has no 'id' "
+ "property\n");
+ return (MDEG_FAILURE);
+ }
+
+ /* read in the port's "vcc-doman-name" property */
+ if (md_get_prop_str(mdp, mdep, "vcc-domain-name", &domain_name)) {
+ cmn_err(CE_CONT, "i_vcc_md_add_port: port%ld has "
+ "no 'vcc-domain-name' property\n", portno);
+ return (MDEG_FAILURE);
+ }
+
+
+ /* read in the port's "vcc-group-name" property */
+ if (md_get_prop_str(mdp, mdep, "vcc-group-name", &group_name)) {
+ cmn_err(CE_CONT, "i_vcc_md_add_port: port%ld has no "
+ "'vcc-group-name'property\n", portno);
+ return (MDEG_FAILURE);
+ }
+
+
+ /* read in the port's "vcc-tcp-port" property */
+ if (md_get_prop_val(mdp, mdep, "vcc-tcp-port", &tcp_port)) {
+ cmn_err(CE_CONT, "i_vcc_md_add_port: port%ld has no"
+ "'vcc-tcp-port' property\n", portno);
+ return (MDEG_FAILURE);
+ }
+
+ D1("i_vcc_md_add_port: port@%d domain-name=%s group-name=%s"
+ " tcp-port=%lld\n", portno, domain_name, group_name, tcp_port);
+
+ /* add the port */
+ if (i_vcc_add_port(vccp, group_name, tcp_port, portno, domain_name)) {
+ return (MDEG_FAILURE);
+ }
+
+ vport = &vccp->port[portno];
+ if (i_vcc_get_ldc_id(mdp, mdep, &ldc_id)) {
+ mutex_enter(&vport->lock);
+ (void) i_vcc_delete_port(vccp, vport);
+ mutex_exit(&vport->lock);
+ return (MDEG_FAILURE);
+ }
+
+ /* configure the port */
+ if (i_vcc_config_port(vccp, portno, ldc_id)) {
+ mutex_enter(&vport->lock);
+ (void) i_vcc_delete_port(vccp, vport);
+ mutex_exit(&vport->lock);
+ return (MDEG_FAILURE);
+ }
+
+ mutex_enter(&vccp->lock);
+ vccp->num_ports++;
+ mutex_exit(&vccp->lock);
+
+ vport = &vccp->port[VCC_CONTROL_PORT];
+
+ if (vport->pollflag & VCC_POLL_CONFIG) {
+ /* wakeup vntsd */
+ mutex_enter(&vport->lock);
+ vport->pollevent |= VCC_POLL_ADD_PORT;
+ mutex_exit(&vport->lock);
+ pollwakeup(&vport->poll, POLLIN);
+ }
+
+ return (MDEG_SUCCESS);
+}
+
+/* mdeg callback */
+static int
+vcc_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
+{
+ int idx;
+ vcc_t *vccp;
+ int rv;
+
+ vccp = (vcc_t *)cb_argp;
+ ASSERT(vccp);
+
+ if (resp == NULL) {
+ return (MDEG_FAILURE);
+ }
+
+ /* added port */
+ D1("vcc_mdeg_cb: added %d port(s)\n", resp->added.nelem);
+
+ for (idx = 0; idx < resp->added.nelem; idx++) {
+ rv = i_vcc_md_add_port(resp->added.mdp,
+ resp->added.mdep[idx], vccp);
+
+ if (rv != MDEG_SUCCESS) {
+ return (rv);
+ }
+ }
+
+ /* removed port */
+ D1("vcc_mdeg_cb: removed %d port(s)\n", resp->removed.nelem);
+
+ for (idx = 0; idx < resp->removed.nelem; idx++) {
+ rv = i_vcc_md_remove_port(resp->removed.mdp,
+ resp->removed.mdep[idx], vccp);
+
+ if (rv != MDEG_SUCCESS) {
+ return (rv);
+ }
+ }
+
+ /*
+ * XXX - Currently no support for updating already active
+ * ports. So, ignore the match_curr and match_prev arrays
+ * for now.
+ */
+
+
+ return (MDEG_SUCCESS);
+}
+
+
+/* cb_chpoll */
+static int
+vcc_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int instance;
+ minor_t minor;
+ uint_t portno;
+ vcc_t *vccp;
+ vcc_port_t *vport;
+
+ minor = getminor(dev);
+
+ instance = VCCINST(minor);
+
+ vccp = ddi_get_soft_state(vcc_ssp, instance);
+ if (vccp == NULL) {
+ return (ENXIO);
+ }
+
+ portno = VCCPORT(vccp, minor);
+
+ vport = &(vccp->port[portno]);
+
+ D1("vcc_chpoll: virtual-console-concentrator@%d events 0x%x\n",
+ portno, events);
+
+ *reventsp = 0;
+
+ if (portno != VCC_CONTROL_PORT) {
+ return (ENXIO);
+ }
+
+ /* poll for config change */
+ if (vport->pollevent) {
+ *reventsp |= (events & POLLIN);
+ }
+
+ if (((*reventsp) == 0) && (!anyyet)) {
+ *phpp = &vport->poll;
+ if (events & POLLIN) {
+ mutex_enter(&vport->lock);
+ vport->pollflag |= VCC_POLL_CONFIG;
+ mutex_exit(&vport->lock);
+ } else {
+ return (ENXIO);
+ }
+ }
+
+ D1("vcc_chpoll: virtual-console-concentrator@%d:%d ev=0x%x, "
+ "rev=0x%x pev=0x%x, flag=0x%x\n",
+ instance, portno, events, (*reventsp),
+ vport->pollevent, vport->pollflag);
+
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/io/vdc.c b/usr/src/uts/sun4v/io/vdc.c
new file mode 100644
index 0000000000..8a3d5c3444
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vdc.c
@@ -0,0 +1,3560 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * LDoms virtual disk client (vdc) device driver
+ *
+ * This driver runs on a guest logical domain and communicates with the virtual
+ * disk server (vds) driver running on the service domain which is exporting
+ * virtualized "disks" to the guest logical domain.
+ *
+ * The driver can be divided into four sections:
+ *
+ * 1) generic device driver housekeeping
+ * _init, _fini, attach, detach, ops structures, etc.
+ *
+ * 2) communication channel setup
+ * Setup the communications link over the LDC channel that vdc uses to
+ * talk to the vDisk server. Initialise the descriptor ring which
+ * allows the LDC clients to transfer data via memory mappings.
+ *
+ * 3) Support exported to upper layers (filesystems, etc)
+ * The upper layers call into vdc via strategy(9E) and DKIO(7I)
+ * ioctl calls. vdc will copy the data to be written to the descriptor
+ * ring or maps the buffer to store the data read by the vDisk
+ * server into the descriptor ring. It then sends a message to the
+ * vDisk server requesting it to complete the operation.
+ *
+ * 4) Handling responses from vDisk server.
+ * The vDisk server will ACK some or all of the messages vdc sends to it
+ * (this is configured during the handshake). Upon receipt of an ACK
+ * vdc will check the descriptor ring and signal to the upper layer
+ * code waiting on the IO.
+ */
+
+#include <sys/conf.h>
+#include <sys/disp.h>
+#include <sys/ddi.h>
+#include <sys/dkio.h>
+#include <sys/efi_partition.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/mach_descrip.h>
+#include <sys/modctl.h>
+#include <sys/mdeg.h>
+#include <sys/note.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/types.h>
+#include <sys/promif.h>
+#include <sys/vtoc.h>
+#include <sys/archsystm.h>
+#include <sys/sysmacros.h>
+
+#include <sys/cdio.h>
+#include <sys/dktp/cm.h>
+#include <sys/dktp/fdisk.h>
+#include <sys/scsi/generic/sense.h>
+#include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */
+#include <sys/scsi/targets/sddef.h>
+
+#include <sys/ldoms.h>
+#include <sys/ldc.h>
+#include <sys/vio_common.h>
+#include <sys/vio_mailbox.h>
+#include <sys/vdsk_common.h>
+#include <sys/vdsk_mailbox.h>
+#include <sys/vdc.h>
+
+/*
+ * function prototypes
+ */
+
+/* standard driver functions */
+static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred);
+static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred);
+static int vdc_strategy(struct buf *buf);
+static int vdc_print(dev_t dev, char *str);
+static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
+static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred);
+static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred);
+static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *credp, int *rvalp);
+static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred);
+static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred);
+
+static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
+ void *arg, void **resultp);
+static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+
+/* setup */
+static int vdc_send(ldc_handle_t ldc_handle, caddr_t pkt, size_t *msglen);
+static int vdc_do_ldc_init(vdc_t *vdc);
+static int vdc_start_ldc_connection(vdc_t *vdc);
+static int vdc_create_device_nodes(vdc_t *vdc);
+static int vdc_create_device_nodes_props(vdc_t *vdc);
+static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id);
+static void vdc_terminate_ldc(vdc_t *vdc);
+static int vdc_init_descriptor_ring(vdc_t *vdc);
+static void vdc_destroy_descriptor_ring(vdc_t *vdc);
+
+/* handshake with vds */
+static void vdc_init_handshake_negotiation(void *arg);
+static int vdc_init_ver_negotiation(vdc_t *vdc);
+static int vdc_init_attr_negotiation(vdc_t *vdc);
+static int vdc_init_dring_negotiate(vdc_t *vdc);
+static int vdc_handle_ver_negotiate();
+static int vdc_handle_attr_negotiate();
+static void vdc_reset_connection(vdc_t *vdc, boolean_t resetldc);
+static boolean_t vdc_is_able_to_tx_data(vdc_t *vdc, int flag);
+
+/* processing */
+static void vdc_process_msg_thread(vdc_t *vdc);
+static uint_t vdc_handle_cb(uint64_t event, caddr_t arg);
+static void vdc_process_msg(void *arg);
+static int vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg);
+static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg);
+static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg);
+static void vdc_do_process_msg(vdc_t *vdc);
+static int vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed);
+static int vdc_populate_descriptor(vdc_t *vdc, caddr_t addr,
+ size_t nbytes, int op, uint64_t arg, uint64_t slice);
+static int vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx,
+ vio_dring_msg_t dmsg);
+static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx);
+static int vdc_get_response(vdc_t *vdc, int start, int end);
+static int vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx,
+ caddr_t addr, size_t nbytes, int operation);
+static boolean_t vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int
+ num_msgs);
+
+/* dkio */
+static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode);
+static int vdc_create_fake_geometry(vdc_t *vdc);
+
+/*
+ * Module variables
+ */
+uint64_t vdc_hz_timeout;
+uint64_t vdc_usec_timeout = VDC_USEC_TIMEOUT_MIN;
+uint64_t vdc_dump_usec_timeout = VDC_USEC_TIMEOUT_MIN / 300;
+static int vdc_retries = VDC_RETRIES;
+static int vdc_dump_retries = VDC_RETRIES * 10;
+
+/* Soft state pointer */
+static void *vdc_state;
+
+/* variable level controlling the verbosity of the error/debug messages */
+int vdc_msglevel = 0;
+
+
+static void
+vdc_msg(const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ vcmn_err(CE_CONT, format, args);
+ va_end(args);
+}
+
+static struct cb_ops vdc_cb_ops = {
+ vdc_open, /* cb_open */
+ vdc_close, /* cb_close */
+ vdc_strategy, /* cb_strategy */
+ vdc_print, /* cb_print */
+ vdc_dump, /* cb_dump */
+ vdc_read, /* cb_read */
+ vdc_write, /* cb_write */
+ vdc_ioctl, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* cb_str */
+ D_MP | D_64BIT, /* cb_flag */
+ CB_REV, /* cb_rev */
+ vdc_aread, /* cb_aread */
+ vdc_awrite /* cb_awrite */
+};
+
+static struct dev_ops vdc_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ vdc_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ vdc_attach, /* devo_attach */
+ vdc_detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &vdc_cb_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ nulldev /* devo_power */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "virtual disk client %I%",
+ &vdc_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Device Driver housekeeping and setup
+ */
+
+int
+_init(void)
+{
+ int status;
+
+ if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0)
+ return (status);
+ if ((status = mod_install(&modlinkage)) != 0)
+ ddi_soft_state_fini(&vdc_state);
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int status;
+
+ if ((status = mod_remove(&modlinkage)) != 0)
+ return (status);
+ ddi_soft_state_fini(&vdc_state);
+ return (0);
+}
+
+static int
+vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
+{
+ _NOTE(ARGUNUSED(dip))
+
+ int instance = SDUNIT(getminor((dev_t)arg));
+ vdc_t *vdc = NULL;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ *resultp = NULL;
+ return (DDI_FAILURE);
+ }
+ *resultp = vdc->dip;
+ return (DDI_SUCCESS);
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)(uintptr_t)instance;
+ return (DDI_SUCCESS);
+ default:
+ *resultp = NULL;
+ return (DDI_FAILURE);
+ }
+}
+
+static int
+vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance;
+ int rv;
+ uint_t retries = 0;
+ vdc_t *vdc = NULL;
+
+ switch (cmd) {
+ case DDI_DETACH:
+ /* the real work happens below */
+ break;
+ case DDI_SUSPEND:
+ /* nothing to do for this non-device */
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ ASSERT(cmd == DDI_DETACH);
+ instance = ddi_get_instance(dip);
+ PR1("%s[%d] Entered\n", __func__, instance);
+
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ vdc_msg("%s[%d]: Could not get state structure.",
+ __func__, instance);
+ return (DDI_FAILURE);
+ }
+
+ if (vdc->open) {
+ PR0("%s[%d]: Cannot detach: device is open",
+ __func__, instance);
+ return (DDI_FAILURE);
+ }
+
+ PR0("%s[%d] proceeding...\n", __func__, instance);
+
+ /*
+ * try and disable callbacks to prevent another handshake
+ */
+ rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE);
+ PR0("%s[%d] callback disabled (rv=%d)\n", __func__, instance, rv);
+
+ /*
+ * Prevent any more attempts to start a handshake with the vdisk
+ * server and tear down the existing connection.
+ */
+ mutex_enter(&vdc->lock);
+ vdc->initialized |= VDC_HANDSHAKE_STOP;
+ vdc_reset_connection(vdc, B_TRUE);
+ mutex_exit(&vdc->lock);
+
+ if (vdc->initialized & VDC_THREAD) {
+ mutex_enter(&vdc->msg_proc_lock);
+ vdc->msg_proc_thr_state = VDC_THR_STOP;
+ vdc->msg_pending = B_TRUE;
+ cv_signal(&vdc->msg_proc_cv);
+
+ while (vdc->msg_proc_thr_state != VDC_THR_DONE) {
+ PR0("%s[%d]: Waiting for thread to exit\n",
+ __func__, instance);
+ rv = cv_timedwait(&vdc->msg_proc_cv,
+ &vdc->msg_proc_lock, VD_GET_TIMEOUT_HZ(1));
+ if ((rv == -1) && (retries++ > vdc_retries))
+ break;
+ }
+ mutex_exit(&vdc->msg_proc_lock);
+ }
+
+ mutex_enter(&vdc->lock);
+
+ if (vdc->initialized & VDC_DRING)
+ vdc_destroy_descriptor_ring(vdc);
+
+ if (vdc->initialized & VDC_LDC)
+ vdc_terminate_ldc(vdc);
+
+ mutex_exit(&vdc->lock);
+
+ if (vdc->initialized & VDC_MINOR) {
+ ddi_prop_remove_all(dip);
+ ddi_remove_minor_node(dip, NULL);
+ }
+
+ if (vdc->initialized & VDC_LOCKS) {
+ mutex_destroy(&vdc->lock);
+ mutex_destroy(&vdc->attach_lock);
+ mutex_destroy(&vdc->msg_proc_lock);
+ mutex_destroy(&vdc->dring_lock);
+ cv_destroy(&vdc->cv);
+ cv_destroy(&vdc->attach_cv);
+ cv_destroy(&vdc->msg_proc_cv);
+ }
+
+ if (vdc->minfo)
+ kmem_free(vdc->minfo, sizeof (struct dk_minfo));
+
+ if (vdc->cinfo)
+ kmem_free(vdc->cinfo, sizeof (struct dk_cinfo));
+
+ if (vdc->vtoc)
+ kmem_free(vdc->vtoc, sizeof (struct vtoc));
+
+ if (vdc->initialized & VDC_SOFT_STATE)
+ ddi_soft_state_free(vdc_state, instance);
+
+ PR0("%s[%d] End %p\n", __func__, instance, vdc);
+
+ return (DDI_SUCCESS);
+}
+
+
+static int
+vdc_do_attach(dev_info_t *dip)
+{
+ int instance;
+ vdc_t *vdc = NULL;
+ int status;
+ uint_t retries = 0;
+
+ ASSERT(dip != NULL);
+
+ instance = ddi_get_instance(dip);
+ if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) {
+ vdc_msg("%s:(%d): Couldn't alloc state structure",
+ __func__, instance);
+ return (DDI_FAILURE);
+ }
+
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ vdc_msg("%s:(%d): Could not get state structure.",
+ __func__, instance);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * We assign the value to initialized in this case to zero out the
+ * variable and then set bits in it to indicate what has been done
+ */
+ vdc->initialized = VDC_SOFT_STATE;
+
+ vdc_hz_timeout = drv_usectohz(vdc_usec_timeout);
+
+ vdc->dip = dip;
+ vdc->instance = instance;
+ vdc->open = 0;
+ vdc->vdisk_type = VD_DISK_TYPE_UNK;
+ vdc->state = VD_STATE_INIT;
+ vdc->ldc_state = 0;
+ vdc->session_id = 0;
+ vdc->block_size = DEV_BSIZE;
+ vdc->max_xfer_sz = VD_MAX_BLOCK_SIZE / DEV_BSIZE;
+
+ vdc->vtoc = NULL;
+ vdc->cinfo = NULL;
+ vdc->minfo = NULL;
+
+ mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&vdc->cv, NULL, CV_DRIVER, NULL);
+ cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL);
+ cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL);
+ vdc->initialized |= VDC_LOCKS;
+
+ vdc->msg_pending = B_FALSE;
+ vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread,
+ vdc, 0, &p0, TS_RUN, minclsyspri);
+ if (vdc->msg_proc_thr_id == NULL) {
+ cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread",
+ instance);
+ return (DDI_FAILURE);
+ }
+ vdc->initialized |= VDC_THREAD;
+
+ /* initialise LDC channel which will be used to communicate with vds */
+ if (vdc_do_ldc_init(vdc) != 0) {
+ cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance);
+ return (DDI_FAILURE);
+ }
+
+ /* Bring up connection with vds via LDC */
+ status = vdc_start_ldc_connection(vdc);
+ if (status != 0) {
+ vdc_msg("%s[%d] Could not start LDC", __func__, instance);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * We need to wait until the handshake has completed before leaving
+ * the attach(). This is to allow the device node(s) to be created
+ * and the first usage of the filesystem to succeed.
+ */
+ mutex_enter(&vdc->attach_lock);
+ while ((vdc->ldc_state != LDC_UP) ||
+ (vdc->state != VD_STATE_DATA)) {
+
+ PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n",
+ __func__, instance, vdc->state, vdc->ldc_state);
+
+ status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock,
+ VD_GET_TIMEOUT_HZ(1));
+ if (status == -1) {
+ if (retries >= vdc_retries) {
+ PR0("%s[%d] Give up handshake wait.\n",
+ __func__, instance);
+ mutex_exit(&vdc->attach_lock);
+ return (DDI_FAILURE);
+ } else {
+ PR0("%s[%d] Retry #%d for handshake.\n",
+ __func__, instance, retries);
+ retries++;
+ }
+ }
+ }
+ mutex_exit(&vdc->attach_lock);
+
+ if (vdc->vtoc == NULL)
+ vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP);
+
+ status = vdc_populate_descriptor(vdc, (caddr_t)vdc->vtoc,
+ P2ROUNDUP(sizeof (struct vtoc), sizeof (uint64_t)),
+ VD_OP_GET_VTOC, FKIOCTL, 0);
+ if (status) {
+ cmn_err(CE_NOTE, "[%d] Failed to get VTOC", instance);
+ return (status);
+ }
+
+ /*
+ * Now that we have the device info we can create the
+ * device nodes and properties
+ */
+ status = vdc_create_device_nodes(vdc);
+ if (status) {
+ cmn_err(CE_NOTE, "[%d] Failed to create device nodes",
+ instance);
+ return (status);
+ }
+ status = vdc_create_device_nodes_props(vdc);
+ if (status) {
+ cmn_err(CE_NOTE, "[%d] Failed to create device nodes"
+ " properties", instance);
+ return (status);
+ }
+
+ ddi_report_dev(dip);
+
+ PR0("%s[%d] Attach completed\n", __func__, instance);
+ return (status);
+}
+
+static int
+vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int status;
+
+ PR0("%s[%d] Entered. Built %s %s\n", __func__, ddi_get_instance(dip),
+ __DATE__, __TIME__);
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ if ((status = vdc_do_attach(dip)) != 0)
+ (void) vdc_detach(dip, DDI_DETACH);
+ return (status);
+ case DDI_RESUME:
+ /* nothing to do for this non-device */
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+}
+
+static int
+vdc_do_ldc_init(vdc_t *vdc)
+{
+ int status = 0;
+ ldc_status_t ldc_state;
+ ldc_attr_t ldc_attr;
+ uint64_t ldc_id = 0;
+ dev_info_t *dip = NULL;
+
+ ASSERT(vdc != NULL);
+
+ dip = vdc->dip;
+ vdc->initialized |= VDC_LDC;
+
+ if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) {
+ vdc_msg("%s: Failed to get <ldc_id> property\n", __func__);
+ return (EIO);
+ }
+ vdc->ldc_id = ldc_id;
+
+ ldc_attr.devclass = LDC_DEV_BLK;
+ ldc_attr.instance = vdc->instance;
+ ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */
+ ldc_attr.qlen = VD_LDC_QLEN;
+
+ if ((vdc->initialized & VDC_LDC_INIT) == 0) {
+ status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d",
+ vdc->instance, ldc_id, status);
+ return (status);
+ }
+ vdc->initialized |= VDC_LDC_INIT;
+ }
+ status = ldc_status(vdc->ldc_handle, &ldc_state);
+ if (status != 0) {
+ vdc_msg("Cannot discover LDC status [err=%d].", status);
+ return (status);
+ }
+ vdc->ldc_state = ldc_state;
+
+ if ((vdc->initialized & VDC_LDC_CB) == 0) {
+ status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb,
+ (caddr_t)vdc);
+ if (status != 0) {
+ vdc_msg("%s: ldc_reg_callback()=%d", __func__, status);
+ return (status);
+ }
+ vdc->initialized |= VDC_LDC_CB;
+ }
+
+ vdc->initialized |= VDC_LDC;
+
+ /*
+ * At this stage we have initialised LDC, we will now try and open
+ * the connection.
+ */
+ if (vdc->ldc_state == LDC_INIT) {
+ status = ldc_open(vdc->ldc_handle);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d",
+ vdc->instance, vdc->ldc_id, status);
+ return (status);
+ }
+ vdc->initialized |= VDC_LDC_OPEN;
+ }
+
+ return (status);
+}
+
+static int
+vdc_start_ldc_connection(vdc_t *vdc)
+{
+ int status = 0;
+
+ ASSERT(vdc != NULL);
+
+ mutex_enter(&vdc->lock);
+
+ if (vdc->ldc_state == LDC_UP) {
+ PR0("%s: LDC is already UP ..\n", __func__);
+ mutex_exit(&vdc->lock);
+ return (0);
+ }
+
+ if ((status = ldc_up(vdc->ldc_handle)) != 0) {
+ switch (status) {
+ case ECONNREFUSED: /* listener not ready at other end */
+ PR0("%s: ldc_up(%d,...) return %d\n",
+ __func__, vdc->ldc_id, status);
+ status = 0;
+ break;
+ default:
+ cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: "
+ "channel=%ld, err=%d",
+ vdc->instance, vdc->ldc_id, status);
+ }
+ }
+
+ PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance);
+
+ mutex_exit(&vdc->lock);
+
+ return (status);
+}
+
+
+/*
+ * Function:
+ * vdc_create_device_nodes
+ *
+ * Description:
+ * This function creates the block and character device nodes under
+ * /devices along with the node properties. It is called as part of
+ * the attach(9E) of the instance during the handshake with vds after
+ * vds has sent the attributes to vdc.
+ *
+ * If the device is of type VD_DISK_TYPE_SLICE then the minor node
+ * of 2 is used in keeping with the Solaris convention that slice 2
+ * refers to a whole disk. Slices start at 'a'
+ *
+ * Parameters:
+ * vdc - soft state pointer
+ *
+ * Return Values
+ * 0 - Success
+ * EIO - Failed to create node
+ * EINVAL - Unknown type of disk exported
+ */
+static int
+vdc_create_device_nodes(vdc_t *vdc)
+{
+ /* uses NNNN which is OK as long as # of disks <= 10000 */
+ char name[sizeof ("disk@NNNN:s,raw")];
+ dev_info_t *dip = NULL;
+ int instance;
+ int num_slices = 1;
+ int i;
+
+ ASSERT(vdc != NULL);
+
+ instance = vdc->instance;
+ dip = vdc->dip;
+
+ switch (vdc->vdisk_type) {
+ case VD_DISK_TYPE_DISK:
+ num_slices = V_NUMPAR;
+ break;
+ case VD_DISK_TYPE_SLICE:
+ num_slices = 1;
+ break;
+ case VD_DISK_TYPE_UNK:
+ default:
+ return (EINVAL);
+ }
+
+ for (i = 0; i < num_slices; i++) {
+ (void) snprintf(name, sizeof (name), "%c", 'a' + i);
+ if (ddi_create_minor_node(dip, name, S_IFBLK,
+ VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
+ vdc_msg("%s[%d]: Couldn't add block node %s.",
+ __func__, instance, name);
+ return (EIO);
+ }
+
+ /* if any device node is created we set this flag */
+ vdc->initialized |= VDC_MINOR;
+
+ (void) snprintf(name, sizeof (name), "%c%s",
+ 'a' + i, ",raw");
+ if (ddi_create_minor_node(dip, name, S_IFCHR,
+ VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
+ vdc_msg("%s[%d]: Could not add raw node %s.",
+ __func__, instance, name);
+ return (EIO);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Function:
+ * vdc_create_device_nodes_props
+ *
+ * Description:
+ * This function creates the block and character device nodes under
+ * /devices along with the node properties. It is called as part of
+ * the attach(9E) of the instance during the handshake with vds after
+ * vds has sent the attributes to vdc.
+ *
+ * Parameters:
+ * vdc - soft state pointer
+ *
+ * Return Values
+ * 0 - Success
+ * EIO - Failed to create device node property
+ * EINVAL - Unknown type of disk exported
+ */
+static int
+vdc_create_device_nodes_props(vdc_t *vdc)
+{
+ dev_info_t *dip = NULL;
+ int instance;
+ int num_slices = 1;
+ int64_t size = 0;
+ dev_t dev;
+ int rv;
+ int i;
+
+ ASSERT(vdc != NULL);
+
+ instance = vdc->instance;
+ dip = vdc->dip;
+
+ if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) {
+ cmn_err(CE_NOTE, "![%d] Could not create device node property."
+ " No VTOC available", instance);
+ return (ENXIO);
+ }
+
+ switch (vdc->vdisk_type) {
+ case VD_DISK_TYPE_DISK:
+ num_slices = V_NUMPAR;
+ break;
+ case VD_DISK_TYPE_SLICE:
+ num_slices = 1;
+ break;
+ case VD_DISK_TYPE_UNK:
+ default:
+ return (EINVAL);
+ }
+
+ for (i = 0; i < num_slices; i++) {
+ dev = makedevice(ddi_driver_major(dip),
+ VD_MAKE_DEV(instance, i));
+
+ size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz;
+ PR0("%s[%d] sz %ld (%ld Mb) p_size %lx\n",
+ __func__, instance, size, size / (1024 * 1024),
+ vdc->vtoc->v_part[i].p_size);
+
+ rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size);
+ if (rv != DDI_PROP_SUCCESS) {
+ vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n",
+ __func__, instance, VDC_SIZE_PROP_NAME, size);
+ return (EIO);
+ }
+
+ rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME,
+ lbtodb(size));
+ if (rv != DDI_PROP_SUCCESS) {
+ vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", __func__,
+ instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size));
+ return (EIO);
+ }
+ }
+
+ return (0);
+}
+
+static int
+vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(cred))
+
+ int instance;
+ int status = 0;
+ vdc_t *vdc;
+
+ ASSERT(dev != NULL);
+ instance = SDUNIT(getminor(*dev));
+
+ PR0("%s[%d] minor = %d flag = %x, otyp = %x\n", __func__, instance,
+ getminor(*dev), flag, otyp);
+
+ if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK))
+ return (EINVAL);
+
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ vdc_msg("%s[%d] Could not get state.", __func__, instance);
+ return (ENXIO);
+ }
+
+ /*
+ * Check to see if we can communicate with vds
+ */
+ status = vdc_is_able_to_tx_data(vdc, flag);
+ if (status == B_FALSE) {
+ PR0("%s[%d] Not ready to transmit data\n", __func__, instance);
+ return (ENOLINK);
+ }
+
+ mutex_enter(&vdc->lock);
+ vdc->open++;
+ mutex_exit(&vdc->lock);
+
+ return (0);
+}
+
+static int
+vdc_close(dev_t dev, int flag, int otyp, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(cred))
+
+ int instance;
+ vdc_t *vdc;
+
+ instance = SDUNIT(getminor(dev));
+
+ PR0("%s[%d] flag = %x, otyp = %x\n", __func__, instance, flag, otyp);
+
+ if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK))
+ return (EINVAL);
+
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ vdc_msg("%s[%d] Could not get state.", __func__, instance);
+ return (ENXIO);
+ }
+
+ /*
+ * Check to see if we can communicate with vds
+ */
+ if (vdc_is_able_to_tx_data(vdc, 0) == B_FALSE) {
+ PR0("%s[%d] Not ready to transmit data\n", __func__, instance);
+ return (ETIMEDOUT);
+ }
+
+ if (vdc->dkio_flush_pending) {
+ PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes",
+ __func__, instance, vdc->dkio_flush_pending);
+ return (EBUSY);
+ }
+
+ /*
+ * Should not need the mutex here, since the framework should protect
+ * against more opens on this device, but just in case.
+ */
+ mutex_enter(&vdc->lock);
+ vdc->open--;
+ mutex_exit(&vdc->lock);
+
+ return (0);
+}
+
+static int
+vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
+{
+ _NOTE(ARGUNUSED(credp))
+ _NOTE(ARGUNUSED(rvalp))
+
+ return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode));
+}
+
+static int
+vdc_print(dev_t dev, char *str)
+{
+ cmn_err(CE_NOTE, "vdc%d: %s", SDUNIT(getminor(dev)), str);
+ return (0);
+}
+
+static int
+vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
+{
+ int rv = 0;
+ size_t nbytes = (nblk * DEV_BSIZE);
+ int instance = SDUNIT(getminor(dev));
+ vdc_t *vdc;
+
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ vdc_msg("%s (%d): Could not get state.", __func__, instance);
+ return (ENXIO);
+ }
+
+ rv = vdc_populate_descriptor(vdc, addr, nbytes, VD_OP_BWRITE,
+ blkno, SDPART(getminor(dev)));
+
+ PR1("%s: status=%d\n", __func__, rv);
+
+ return (rv);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Disk access routines
+ *
+ */
+
+/*
+ * vdc_strategy()
+ *
+ * Return Value:
+ * 0: As per strategy(9E), the strategy() function must return 0
+ * [ bioerror(9f) sets b_flags to the proper error code ]
+ */
+static int
+vdc_strategy(struct buf *buf)
+{
+ int rv = -1;
+ vdc_t *vdc = NULL;
+ int instance = SDUNIT(getminor(buf->b_edev));
+ int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE;
+
+ PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p",
+ __func__, (buf->b_flags & B_READ) ? "Read" : "Write",
+ buf->b_bcount, buf->b_lblkno, buf->b_un.b_addr);
+
+ if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
+ vdc_msg("%s[%d]: Could not get state.", __func__, instance);
+ bioerror(buf, ENXIO);
+ biodone(buf);
+ return (0);
+ }
+
+ ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size));
+
+ if (vdc_is_able_to_tx_data(vdc, O_NONBLOCK) == B_FALSE) {
+ vdc_msg("%s: Not ready to transmit data", __func__);
+ bioerror(buf, ENXIO);
+ biodone(buf);
+ return (0);
+ }
+ bp_mapin(buf);
+
+ rv = vdc_populate_descriptor(vdc, buf->b_un.b_addr, buf->b_bcount, op,
+ buf->b_lblkno, SDPART(getminor(buf->b_edev)));
+
+ PR1("%s: status=%d", __func__, rv);
+ bioerror(buf, rv);
+ biodone(buf);
+ return (0);
+}
+
+
+static int
+vdc_read(dev_t dev, struct uio *uio, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(cred))
+
+ PR1("vdc_read(): Entered");
+ return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio));
+}
+
+static int
+vdc_write(dev_t dev, struct uio *uio, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(cred))
+
+ PR1("vdc_write(): Entered");
+ return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio));
+}
+
+static int
+vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(cred))
+
+ PR1("vdc_aread(): Entered");
+ return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio));
+}
+
+static int
+vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(cred))
+
+ PR1("vdc_awrite(): Entered");
+ return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio));
+}
+
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Handshake support
+ */
+
+/*
+ * vdc_init_handshake_negotiation
+ *
+ * Description:
+ * This function is called to trigger the handshake negotiations between
+ * the client (vdc) and the server (vds). It may be called multiple times.
+ *
+ * Parameters:
+ * vdc - soft state pointer
+ */
+static void
+vdc_init_handshake_negotiation(void *arg)
+{
+ vdc_t *vdc = (vdc_t *)(void *)arg;
+ vd_state_t state;
+
+ ASSERT(vdc != NULL);
+ ASSERT(vdc->ldc_state == LDC_UP);
+
+ mutex_enter(&vdc->lock);
+
+ /*
+ * Do not continue if another thread has triggered a handshake which
+ * is in progress or detach() has stopped further handshakes.
+ */
+ if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) {
+ PR0("%s[%d] Negotiation not triggered. [init=%x]\n",
+ __func__, vdc->instance, vdc->initialized);
+ mutex_exit(&vdc->lock);
+ return;
+ }
+
+ PR0("Initializing vdc<->vds handshake\n");
+
+ vdc->initialized |= VDC_HANDSHAKE;
+
+ state = vdc->state;
+
+ if (state == VD_STATE_INIT) {
+ (void) vdc_init_ver_negotiation(vdc);
+ } else if (state == VD_STATE_VER) {
+ (void) vdc_init_attr_negotiation(vdc);
+ } else if (state == VD_STATE_ATTR) {
+ (void) vdc_init_dring_negotiate(vdc);
+ } else if (state == VD_STATE_DATA) {
+ /*
+ * nothing to do - we have already completed the negotiation
+ * and we can transmit data when ready.
+ */
+ PR0("%s[%d] Negotiation triggered after handshake completed",
+ __func__, vdc->instance);
+ }
+
+ mutex_exit(&vdc->lock);
+}
+
+static int
+vdc_init_ver_negotiation(vdc_t *vdc)
+{
+ vio_ver_msg_t pkt;
+ size_t msglen = sizeof (pkt);
+ int status = -1;
+
+ PR0("%s: Entered.\n", __func__);
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ /*
+ * set the Session ID to a unique value
+ * (the lower 32 bits of the clock tick)
+ */
+ vdc->session_id = ((uint32_t)gettick() & 0xffffffff);
+
+ pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
+ pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ pkt.tag.vio_subtype_env = VIO_VER_INFO;
+ pkt.tag.vio_sid = vdc->session_id;
+ pkt.dev_class = VDEV_DISK;
+ pkt.ver_major = VD_VER_MAJOR;
+ pkt.ver_minor = VD_VER_MINOR;
+
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen);
+ PR0("%s: vdc_send(status = %d)\n", __func__, status);
+
+ if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) {
+ PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n",
+ __func__, vdc->instance, vdc->ldc_handle,
+ status, msglen);
+ if (msglen != sizeof (vio_ver_msg_t))
+ status = ENOMSG;
+ }
+
+ return (status);
+}
+
+static int
+vdc_init_attr_negotiation(vdc_t *vdc)
+{
+ vd_attr_msg_t pkt;
+ size_t msglen = sizeof (pkt);
+ int status;
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ PR0("%s[%d] entered\n", __func__, vdc->instance);
+
+ /* fill in tag */
+ pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
+ pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ pkt.tag.vio_subtype_env = VIO_ATTR_INFO;
+ pkt.tag.vio_sid = vdc->session_id;
+ /* fill in payload */
+ pkt.max_xfer_sz = vdc->max_xfer_sz;
+ pkt.vdisk_block_size = vdc->block_size;
+ pkt.xfer_mode = VIO_DRING_MODE;
+ pkt.operations = 0; /* server will set bits of valid operations */
+ pkt.vdisk_type = 0; /* server will set to valid device type */
+ pkt.vdisk_size = 0; /* server will set to valid size */
+
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen);
+ PR0("%s: vdc_send(status = %d)\n", __func__, status);
+
+ if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) {
+ PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n",
+ __func__, vdc->instance, vdc->ldc_handle,
+ status, msglen);
+ if (msglen != sizeof (vio_ver_msg_t))
+ status = ENOMSG;
+ }
+
+ return (status);
+}
+
+static int
+vdc_init_dring_negotiate(vdc_t *vdc)
+{
+ vio_dring_reg_msg_t pkt;
+ size_t msglen = sizeof (pkt);
+ int status = -1;
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ status = vdc_init_descriptor_ring(vdc);
+ PR0("%s[%d] Init of descriptor ring completed (status = %d)\n",
+ __func__, vdc->instance, status);
+ if (status != 0) {
+ cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n",
+ vdc->instance, status);
+ vdc_reset_connection(vdc, B_FALSE);
+ return (status);
+ }
+
+ /* fill in tag */
+ pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
+ pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ pkt.tag.vio_subtype_env = VIO_DRING_REG;
+ pkt.tag.vio_sid = vdc->session_id;
+ /* fill in payload */
+ pkt.dring_ident = 0;
+ pkt.num_descriptors = VD_DRING_LEN;
+ pkt.descriptor_size = VD_DRING_ENTRY_SZ;
+ pkt.options = (VIO_TX_DRING | VIO_RX_DRING);
+ pkt.ncookies = vdc->dring_cookie_count;
+ pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */
+
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&pkt, &msglen);
+ if (status != 0) {
+ PR0("%s[%d] Failed to register DRing (status = %d)\n",
+ __func__, vdc->instance, status);
+ vdc_reset_connection(vdc, B_FALSE);
+ }
+
+ return (status);
+}
+
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * LDC helper routines
+ */
+
+/*
+ * Function:
+ * vdc_send()
+ *
+ * Description:
+ * The function encapsulates the call to write a message using LDC.
+ * If LDC indicates that the call failed due to the queue being full,
+ * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise
+ * we return the error returned by LDC.
+ *
+ * Arguments:
+ * ldc_handle - LDC handle for the channel this instance of vdc uses
+ * pkt - address of LDC message to be sent
+ * msglen - the size of the message being sent. When the function
+ * returns, this contains the number of bytes written.
+ *
+ * Return Code:
+ * 0 - Success.
+ * EINVAL - pkt or msglen were NULL
+ * ECONNRESET - The connection was not up.
+ * EWOULDBLOCK - LDC queue is full
+ * xxx - other error codes returned by ldc_write
+ */
+static int
+vdc_send(ldc_handle_t ldc_handle, caddr_t pkt, size_t *msglen)
+{
+ size_t size = 0;
+ int retries = 0;
+ int status = 0;
+
+ ASSERT(msglen != NULL);
+ ASSERT(*msglen != 0);
+
+ do {
+ size = *msglen;
+ status = ldc_write(ldc_handle, pkt, &size);
+ } while (status == EWOULDBLOCK && retries++ < vdc_retries);
+
+ /* return the last size written */
+ *msglen = size;
+
+ return (status);
+}
+
+/*
+ * Function:
+ * vdc_get_ldc_id()
+ *
+ * Description:
+ * This function gets the 'ldc-id' for this particular instance of vdc.
+ * The id returned is the guest domain channel endpoint LDC uses for
+ * communication with vds.
+ *
+ * Arguments:
+ * dip - dev info pointer for this instance of the device driver.
+ * ldc_id - pointer to variable used to return the 'ldc-id' found.
+ *
+ * Return Code:
+ * 0 - Success.
+ * ENOENT - Expected node or property did not exist.
+ * ENXIO - Unexpected error communicating with MD framework
+ */
+static int
+vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id)
+{
+ int status = ENOENT;
+ char *node_name = NULL;
+ md_t *mdp = NULL;
+ int num_nodes;
+ int num_vdevs;
+ int num_chans;
+ mde_cookie_t rootnode;
+ mde_cookie_t *listp = NULL;
+ mde_cookie_t *chanp = NULL;
+ boolean_t found_inst = B_FALSE;
+ int listsz;
+ int idx;
+ uint64_t md_inst;
+ int obp_inst;
+ int instance = ddi_get_instance(dip);
+
+ ASSERT(ldc_id != NULL);
+ *ldc_id = 0;
+
+ /*
+ * Get the OBP instance number for comparison with the MD instance
+ *
+ * The "cfg-handle" property of a vdc node in an MD contains the MD's
+ * notion of "instance", or unique identifier, for that node; OBP
+ * stores the value of the "cfg-handle" MD property as the value of
+ * the "reg" property on the node in the device tree it builds from
+ * the MD and passes to Solaris. Thus, we look up the devinfo node's
+ * "reg" property value to uniquely identify this device instance.
+ * If the "reg" property cannot be found, the device tree state is
+ * presumably so broken that there is no point in continuing.
+ */
+ if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) {
+ cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG);
+ return (ENOENT);
+ }
+ obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ OBP_REG, -1);
+ PR1("%s[%d]: OBP inst=%d\n", __func__, instance, obp_inst);
+
+ /*
+ * We now walk the MD nodes and if an instance of a vdc node matches
+ * the instance got from OBP we get the ldc-id property.
+ */
+ if ((mdp = md_get_handle()) == NULL) {
+ cmn_err(CE_WARN, "unable to init machine description");
+ return (ENXIO);
+ }
+
+ num_nodes = md_node_count(mdp);
+ ASSERT(num_nodes > 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+
+ /* allocate memory for nodes */
+ listp = kmem_zalloc(listsz, KM_SLEEP);
+ chanp = kmem_zalloc(listsz, KM_SLEEP);
+
+ rootnode = md_root_node(mdp);
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ /*
+ * Search for all the virtual devices, we will then check to see which
+ * ones are disk nodes.
+ */
+ num_vdevs = md_scan_dag(mdp, rootnode,
+ md_find_name(mdp, VDC_MD_VDEV_NAME),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (num_vdevs <= 0) {
+ cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME);
+ status = ENOENT;
+ goto done;
+ }
+
+ PR1("%s[%d] num_vdevs=%d\n", __func__, instance, num_vdevs);
+ for (idx = 0; idx < num_vdevs; idx++) {
+ status = md_get_prop_str(mdp, listp[idx], "name", &node_name);
+ if ((status != 0) || (node_name == NULL)) {
+ cmn_err(CE_NOTE, "Unable to get name of node type '%s'"
+ ": err %d", VDC_MD_VDEV_NAME, status);
+ continue;
+ }
+
+ PR1("%s[%d] Found node %s\n", __func__, instance, node_name);
+ if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) {
+ status = md_get_prop_val(mdp, listp[idx],
+ VDC_MD_CFG_HDL, &md_inst);
+ PR1("%s[%d] vdc inst# in MD=%d\n",
+ __func__, instance, md_inst);
+ if ((status == 0) && (md_inst == obp_inst)) {
+ found_inst = B_TRUE;
+ break;
+ }
+ }
+ }
+
+ if (found_inst == B_FALSE) {
+ cmn_err(CE_NOTE, "Unable to find correct '%s' node",
+ VDC_MD_DISK_NAME);
+ status = ENOENT;
+ goto done;
+ }
+ PR0("%s[%d] MD inst=%d\n", __func__, instance, md_inst);
+
+ /* get the channels for this node */
+ num_chans = md_scan_dag(mdp, listp[idx],
+ md_find_name(mdp, VDC_MD_CHAN_NAME),
+ md_find_name(mdp, "fwd"), chanp);
+
+ /* expecting at least one channel */
+ if (num_chans <= 0) {
+ cmn_err(CE_NOTE, "No '%s' node for '%s' port",
+ VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME);
+ status = ENOENT;
+ goto done;
+
+ } else if (num_chans != 1) {
+ PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n",
+ __func__, instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME,
+ num_chans);
+ }
+
+ /*
+ * We use the first channel found (index 0), irrespective of how
+ * many are there in total.
+ */
+ if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) {
+ cmn_err(CE_NOTE, "Channel '%s' property not found",
+ VDC_ID_PROP);
+ status = ENOENT;
+ }
+
+ PR0("%s[%d] LDC id is 0x%lx\n", __func__, instance, *ldc_id);
+
+done:
+ if (chanp)
+ kmem_free(chanp, listsz);
+ if (listp)
+ kmem_free(listp, listsz);
+
+ (void) md_fini_handle(mdp);
+
+ return (status);
+}
+
+
+/*
+ * vdc_is_able_to_tx_data()
+ *
+ * Description:
+ * This function checks if we are able to send data to the
+ * vDisk server (vds). The LDC connection needs to be up and
+ * vdc & vds need to have completed the handshake negotiation.
+ *
+ * Parameters:
+ * vdc - soft state pointer
+ * flag - flag to indicate if we can block or not
+ * [ If O_NONBLOCK or O_NDELAY (which are defined in
+ * open(2)) are set then do not block)
+ *
+ * Return Values
+ * B_TRUE - can talk to vds
+ * B_FALSE - unable to talk to vds
+ */
+static boolean_t
+vdc_is_able_to_tx_data(vdc_t *vdc, int flag)
+{
+ vd_state_t state;
+ uint32_t ldc_state;
+ uint_t retries = 0;
+ int rv = -1;
+
+ ASSERT(vdc != NULL);
+
+ mutex_enter(&vdc->lock);
+ state = vdc->state;
+ ldc_state = vdc->ldc_state;
+ mutex_exit(&vdc->lock);
+
+ if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP))
+ return (B_TRUE);
+
+ if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) {
+ PR0("%s[%d] Not ready to tx - state %d LDC state %d\n",
+ __func__, vdc->instance, state, ldc_state);
+ return (B_FALSE);
+ }
+
+ /*
+ * We want to check and see if any negotiations triggered earlier
+ * have succeeded. We are prepared to wait a little while in case
+ * they are still in progress.
+ */
+ mutex_enter(&vdc->lock);
+ while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) {
+ PR0("%s: Waiting for connection at state %d (LDC state %d)\n",
+ __func__, vdc->state, vdc->ldc_state);
+
+ rv = cv_timedwait(&vdc->cv, &vdc->lock,
+ VD_GET_TIMEOUT_HZ(retries));
+
+ /*
+ * An rv of -1 indicates that we timed out without the LDC
+ * state changing so it looks like the other side (vdc) is
+ * not yet ready/responding.
+ *
+ * Any other value of rv indicates that the LDC triggered an
+ * interrupt so we just loop again, check the handshake state
+ * and keep waiting if necessary.
+ */
+ if (rv == -1) {
+ if (retries >= vdc_retries) {
+ PR0("%s[%d] handshake wait timed out.\n",
+ __func__, vdc->instance);
+ mutex_exit(&vdc->lock);
+ return (B_FALSE);
+ } else {
+ PR1("%s[%d] Retry #%d for handshake timedout\n",
+ __func__, vdc->instance, retries);
+ retries++;
+ }
+ }
+ }
+
+ ASSERT(vdc->ldc_state == LDC_UP);
+ ASSERT(vdc->state == VD_STATE_DATA);
+
+ mutex_exit(&vdc->lock);
+
+ return (B_TRUE);
+}
+
+
+static void
+vdc_terminate_ldc(vdc_t *vdc)
+{
+ int instance = ddi_get_instance(vdc->dip);
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ PR0("%s[%d] initialized=%x\n", __func__, instance, vdc->initialized);
+
+ if (vdc->initialized & VDC_LDC_OPEN) {
+ PR0("%s[%d]: ldc_close()\n", __func__, instance);
+ (void) ldc_close(vdc->ldc_handle);
+ }
+ if (vdc->initialized & VDC_LDC_CB) {
+ PR0("%s[%d]: ldc_unreg_callback()\n", __func__, instance);
+ (void) ldc_unreg_callback(vdc->ldc_handle);
+ }
+ if (vdc->initialized & VDC_LDC) {
+ PR0("%s[%d]: ldc_fini()\n", __func__, instance);
+ (void) ldc_fini(vdc->ldc_handle);
+ vdc->ldc_handle = NULL;
+ }
+
+ vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN);
+}
+
+static void
+vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc)
+{
+ int status;
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ PR0("%s[%d] Entered\n", __func__, vdc->instance);
+
+ vdc->state = VD_STATE_INIT;
+
+ if (reset_ldc == B_TRUE) {
+ status = ldc_reset(vdc->ldc_handle);
+ PR0("%s[%d] ldc_reset() = %d\n",
+ __func__, vdc->instance, status);
+ }
+
+ vdc->initialized &= ~VDC_HANDSHAKE;
+ PR0("%s[%d] init=%x\n", __func__, vdc->instance, vdc->initialized);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Descriptor Ring helper routines
+ */
+
+static int
+vdc_init_descriptor_ring(vdc_t *vdc)
+{
+ vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */
+ int status = -1;
+ int i;
+
+ PR0("%s\n", __func__);
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+ ASSERT(vdc->ldc_handle != NULL);
+
+ status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ,
+ &vdc->ldc_dring_hdl);
+ if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) {
+ PR0("%s: Failed to create a descriptor ring", __func__);
+ return (status);
+ }
+ vdc->initialized |= VDC_DRING;
+ vdc->dring_entry_size = VD_DRING_ENTRY_SZ;
+ vdc->dring_len = VD_DRING_LEN;
+
+ vdc->dring_cookie = kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP);
+
+ status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl,
+ LDC_SHADOW_MAP, LDC_MEM_RW, &vdc->dring_cookie[0],
+ &vdc->dring_cookie_count);
+ if (status != 0) {
+ PR0("%s: Failed to bind descriptor ring (%p) to channel (%p)\n",
+ __func__, vdc->ldc_dring_hdl, vdc->ldc_handle);
+ return (status);
+ }
+ ASSERT(vdc->dring_cookie_count == 1);
+ vdc->initialized |= VDC_DRING_BOUND;
+
+ status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info);
+ if (status != 0) {
+ PR0("%s: Failed to get info for descriptor ring (%p)\n",
+ __func__, vdc->ldc_dring_hdl);
+ return (status);
+ }
+
+ /* Allocate the local copy of this dring */
+ vdc->local_dring = kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t),
+ KM_SLEEP);
+ vdc->initialized |= VDC_DRING_LOCAL;
+
+ /*
+ * Mark all DRing entries as free and init priv desc memory handles
+ * If any entry is initialized, we need to free it later so we set
+ * the bit in 'initialized' at the start.
+ */
+ vdc->initialized |= VDC_DRING_ENTRY;
+ for (i = 0; i < VD_DRING_LEN; i++) {
+ dep = VDC_GET_DRING_ENTRY_PTR(vdc, i);
+ dep->hdr.dstate = VIO_DESC_FREE;
+
+ status = ldc_mem_alloc_handle(vdc->ldc_handle,
+ &vdc->local_dring[i].desc_mhdl);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for"
+ " descriptor %d", vdc->instance, i);
+ return (status);
+ }
+ vdc->local_dring[i].flags = VIO_DESC_FREE;
+ vdc->local_dring[i].flags |= VDC_ALLOC_HANDLE;
+ vdc->local_dring[i].dep = dep;
+
+ mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL);
+ }
+
+ /*
+ * We init the index of the last DRing entry used. Since the code to
+ * get the next available entry increments it before selecting one,
+ * we set it to the last DRing entry so that it wraps around to zero
+ * for the 1st entry to be used.
+ */
+ vdc->dring_curr_idx = VD_DRING_LEN - 1;
+
+ return (status);
+}
+
+static void
+vdc_destroy_descriptor_ring(vdc_t *vdc)
+{
+ ldc_mem_handle_t mhdl = NULL;
+ int status = -1;
+ int i; /* loop */
+
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+ ASSERT(vdc->state == VD_STATE_INIT);
+
+ PR0("%s: Entered\n", __func__);
+
+ if (vdc->initialized & VDC_DRING_ENTRY) {
+ for (i = 0; i < VD_DRING_LEN; i++) {
+ mhdl = vdc->local_dring[i].desc_mhdl;
+
+ if (vdc->local_dring[i].flags | VDC_ALLOC_HANDLE)
+ (void) ldc_mem_free_handle(mhdl);
+
+ mutex_destroy(&vdc->local_dring[i].lock);
+ cv_destroy(&vdc->local_dring[i].cv);
+
+ bzero(&vdc->local_dring[i].desc_mhdl,
+ sizeof (ldc_mem_handle_t));
+ }
+ vdc->initialized &= ~VDC_DRING_ENTRY;
+ }
+
+ if (vdc->initialized & VDC_DRING_LOCAL) {
+ kmem_free(vdc->local_dring,
+ VD_DRING_LEN * sizeof (vdc_local_desc_t));
+ vdc->initialized &= ~VDC_DRING_LOCAL;
+ }
+
+ if (vdc->initialized & VDC_DRING_BOUND) {
+ status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl);
+ if (status == 0) {
+ vdc->initialized &= ~VDC_DRING_BOUND;
+ } else {
+ vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n",
+ vdc->ldc_dring_hdl);
+ }
+ }
+
+ if (vdc->initialized & VDC_DRING_INIT) {
+ status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl);
+ if (status == 0) {
+ vdc->ldc_dring_hdl = NULL;
+ bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t));
+ vdc->initialized &= ~VDC_DRING_INIT;
+ } else {
+ vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n",
+ vdc->ldc_dring_hdl);
+ }
+ }
+}
+
+/*
+ * vdc_get_next_dring_entry_idx()
+ *
+ * Description:
+ * This function gets the index of the next Descriptor Ring entry available
+ *
+ * Return Value:
+ * 0 <= rv < VD_DRING_LEN Next available slot
+ * -1 DRing is full
+ */
+static int
+vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed)
+{
+ _NOTE(ARGUNUSED(num_slots_needed))
+
+ vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */
+ int idx = -1;
+ int start_idx = 0;
+
+ ASSERT(vdc != NULL);
+ ASSERT(vdc->dring_len == VD_DRING_LEN);
+ ASSERT(vdc->dring_curr_idx >= 0);
+ ASSERT(vdc->dring_curr_idx < VD_DRING_LEN);
+ ASSERT(mutex_owned(&vdc->dring_lock));
+
+ /* Start at the last entry used */
+ idx = start_idx = vdc->dring_curr_idx;
+
+ /*
+ * Loop through Descriptor Ring checking for a free entry until we reach
+ * the entry we started at. We should never come close to filling the
+ * Ring at any stage, instead this is just to prevent an entry which
+ * gets into an inconsistent state (e.g. due to a request timing out)
+ * from blocking progress.
+ */
+ do {
+ /* Get the next entry after the last known index tried */
+ idx = (idx + 1) % VD_DRING_LEN;
+
+ dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx);
+ ASSERT(dep != NULL);
+
+ if (dep->hdr.dstate == VIO_DESC_FREE) {
+ ASSERT(idx >= 0);
+ ASSERT(idx < VD_DRING_LEN);
+ vdc->dring_curr_idx = idx;
+ return (idx);
+
+ } else if (dep->hdr.dstate == VIO_DESC_READY) {
+ PR0("%s: Entry %d waiting to be accepted\n",
+ __func__, idx);
+ continue;
+
+ } else if (dep->hdr.dstate == VIO_DESC_ACCEPTED) {
+ PR0("%s: Entry %d waiting to be processed\n",
+ __func__, idx);
+ continue;
+
+ } else if (dep->hdr.dstate == VIO_DESC_DONE) {
+ PR0("%s: Entry %d done but not marked free\n",
+ __func__, idx);
+
+ /*
+ * If we are currently panicking, interrupts are
+ * disabled and we will not be getting ACKs from the
+ * vDisk server so we mark the descriptor ring entries
+ * as FREE here instead of in the ACK handler.
+ */
+ if (panicstr) {
+ (void) vdc_depopulate_descriptor(vdc, idx);
+ dep->hdr.dstate = VIO_DESC_FREE;
+ vdc->local_dring[idx].flags = VIO_DESC_FREE;
+ }
+ continue;
+
+ } else {
+ vdc_msg("Public Descriptor Ring entry corrupted");
+ mutex_enter(&vdc->lock);
+ vdc_reset_connection(vdc, B_TRUE);
+ mutex_exit(&vdc->lock);
+ return (-1);
+ }
+
+ } while (idx != start_idx);
+
+ return (-1);
+}
+
+/*
+ * Function:
+ * vdc_populate_descriptor
+ *
+ * Description:
+ * This routine writes the data to be transmitted to vds into the
+ * descriptor, notifies vds that the ring has been updated and
+ * then waits for the request to be processed.
+ *
+ * Arguments:
+ * vdc - the soft state pointer
+ * addr - start address of memory region.
+ * nbytes - number of bytes to read/write
+ * operation - operation we want vds to perform (VD_OP_XXX)
+ * arg - parameter to be sent to server (depends on VD_OP_XXX type)
+ * . mode for ioctl(9e)
+ * . LP64 diskaddr_t (block I/O)
+ * slice - the disk slice this request is for
+ *
+ * Return Codes:
+ * 0
+ * EAGAIN
+ * EFAULT
+ * ENXIO
+ * EIO
+ */
+static int
+vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation,
+ uint64_t arg, uint64_t slice)
+{
+ vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */
+ vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */
+ int idx = 0; /* Index of DRing entry used */
+ vio_dring_msg_t dmsg;
+ size_t msglen = sizeof (dmsg);
+ int status = 0;
+ int rv;
+ int retries = 0;
+
+ ASSERT(vdc != NULL);
+ ASSERT(slice < V_NUMPAR);
+
+ /*
+ * Get next available DRing entry.
+ */
+ mutex_enter(&vdc->dring_lock);
+ idx = vdc_get_next_dring_entry_idx(vdc, 1);
+ if (idx == -1) {
+ mutex_exit(&vdc->dring_lock);
+ vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n",
+ __func__, vdc->instance, vdc->seq_num);
+
+ /*
+ * Since strategy should not block we don't wait for the DRing
+ * to empty and instead return
+ */
+ return (EAGAIN);
+ }
+
+ ASSERT(idx < VD_DRING_LEN);
+ local_dep = &vdc->local_dring[idx];
+ dep = local_dep->dep;
+ ASSERT(dep != NULL);
+
+ /*
+ * Wait for anybody still using the DRing entry to finish.
+ * (e.g. still waiting for vds to respond to a request)
+ */
+ mutex_enter(&local_dep->lock);
+
+ switch (operation) {
+ case VD_OP_BREAD:
+ case VD_OP_BWRITE:
+ PR1("buf=%p, block=%lx, nbytes=%lx\n", addr, arg, nbytes);
+ dep->payload.addr = (diskaddr_t)arg;
+ rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, operation);
+ break;
+
+ case VD_OP_FLUSH:
+ case VD_OP_GET_VTOC:
+ case VD_OP_SET_VTOC:
+ case VD_OP_GET_DISKGEOM:
+ case VD_OP_SET_DISKGEOM:
+ case VD_OP_SCSICMD:
+ if (nbytes > 0) {
+ rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes,
+ operation);
+ }
+ break;
+ default:
+ cmn_err(CE_NOTE, "[%d] Unsupported vDisk operation [%d]\n",
+ vdc->instance, operation);
+ rv = EINVAL;
+ }
+
+ if (rv != 0) {
+ mutex_exit(&local_dep->lock);
+ mutex_exit(&vdc->dring_lock);
+ return (rv);
+ }
+
+ /*
+ * fill in the data details into the DRing
+ */
+ dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc);
+ dep->payload.operation = operation;
+ dep->payload.nbytes = nbytes;
+ dep->payload.status = EINPROGRESS; /* vds will set valid value */
+ dep->payload.slice = slice;
+ dep->hdr.dstate = VIO_DESC_READY;
+ dep->hdr.ack = 1; /* request an ACK for every message */
+
+ local_dep->flags = VIO_DESC_READY;
+ local_dep->addr = addr;
+
+ /*
+ * Send a msg with the DRing details to vds
+ */
+ VIO_INIT_DRING_DATA_TAG(dmsg);
+ VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc);
+ dmsg.dring_ident = vdc->dring_ident;
+ dmsg.start_idx = idx;
+ dmsg.end_idx = idx;
+
+ PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n",
+ vdc->dring_ident, dmsg.start_idx, dmsg.end_idx,
+ dmsg.seq_num, dep->payload.req_id, dep);
+
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&dmsg, &msglen);
+ PR1("%s[%d]: ldc_write() status=%d\n", __func__, vdc->instance, status);
+ if (status != 0) {
+ mutex_exit(&local_dep->lock);
+ mutex_exit(&vdc->dring_lock);
+ vdc_msg("%s: ldc_write(%d)\n", __func__, status);
+ return (EAGAIN);
+ }
+
+ /*
+ * XXX - potential performance enhancement (Investigate at a later date)
+ *
+ * for calls from strategy(9E), instead of waiting for a response from
+ * vds, we could return at this stage and let the ACK handling code
+ * trigger the biodone(9F)
+ */
+
+ /*
+ * When a guest is panicking, the completion of requests needs to be
+ * handled differently because interrupts are disabled and vdc
+ * will not get messages. We have to poll for the messages instead.
+ */
+ if (ddi_in_panic()) {
+ int start = 0;
+ retries = 0;
+ for (;;) {
+ msglen = sizeof (dmsg);
+ status = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg,
+ &msglen);
+ if (status) {
+ status = EINVAL;
+ break;
+ }
+
+ /*
+ * if there are no packets wait and check again
+ */
+ if ((status == 0) && (msglen == 0)) {
+ if (retries++ > vdc_dump_retries) {
+ PR0("[%d] Giving up waiting, idx %d\n",
+ vdc->instance, idx);
+ status = EAGAIN;
+ break;
+ }
+
+ PR1("Waiting for next packet @ %d\n", idx);
+ delay(drv_usectohz(vdc_dump_usec_timeout));
+ continue;
+ }
+
+ /*
+ * Ignore all messages that are not ACKs/NACKs to
+ * DRing requests.
+ */
+ if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) ||
+ (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) {
+ PR0("discarding pkt: type=%d sub=%d env=%d\n",
+ dmsg.tag.vio_msgtype,
+ dmsg.tag.vio_subtype,
+ dmsg.tag.vio_subtype_env);
+ continue;
+ }
+
+ /*
+ * set the appropriate return value for the
+ * current request.
+ */
+ switch (dmsg.tag.vio_subtype) {
+ case VIO_SUBTYPE_ACK:
+ status = 0;
+ break;
+ case VIO_SUBTYPE_NACK:
+ status = EAGAIN;
+ break;
+ default:
+ continue;
+ }
+
+ start = dmsg.start_idx;
+ if (start >= VD_DRING_LEN) {
+ PR0("[%d] Bogus ack data : start %d\n",
+ vdc->instance, start);
+ continue;
+ }
+
+ dep = VDC_GET_DRING_ENTRY_PTR(vdc, start);
+
+ PR1("[%d] Dumping start=%d idx=%d state=%d\n",
+ vdc->instance, start, idx, dep->hdr.dstate);
+
+ if (dep->hdr.dstate != VIO_DESC_DONE) {
+ PR0("[%d] Entry @ %d - state !DONE %d\n",
+ vdc->instance, start, dep->hdr.dstate);
+ continue;
+ }
+
+ (void) vdc_depopulate_descriptor(vdc, start);
+
+ /*
+ * We want to process all Dring entries up to
+ * the current one so that we can return an
+ * error with the correct request.
+ */
+ if (idx > start) {
+ PR0("[%d] Looping: start %d, idx %d\n",
+ vdc->instance, idx, start);
+ continue;
+ }
+
+ /* exit - all outstanding requests are completed */
+ break;
+ }
+
+ mutex_exit(&local_dep->lock);
+ mutex_exit(&vdc->dring_lock);
+
+ return (status);
+ }
+
+ /*
+ * Now watch the DRing entries we modified to get the response
+ * from vds.
+ */
+ status = vdc_wait_for_descriptor_update(vdc, idx, dmsg);
+ if (status == ETIMEDOUT) {
+ /* debug info when dumping state on vds side */
+ dep->payload.status = ECANCELED;
+ }
+
+ status = vdc_depopulate_descriptor(vdc, idx);
+ PR1("%s[%d] Status=%d\n", __func__, vdc->instance, status);
+
+ mutex_exit(&local_dep->lock);
+ mutex_exit(&vdc->dring_lock);
+
+ return (status);
+}
+
+static int
+vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg)
+{
+ vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */
+ vdc_local_desc_t *local_dep = NULL; /* Local Dring Entry Pointer */
+ size_t msglen = sizeof (dmsg);
+ int retries = 0;
+ int status = ENXIO;
+ int rv = 0;
+
+ ASSERT(vdc != NULL);
+ ASSERT(idx < VD_DRING_LEN);
+ local_dep = &vdc->local_dring[idx];
+ ASSERT(local_dep != NULL);
+ dep = local_dep->dep;
+ ASSERT(dep != NULL);
+
+ while (dep->hdr.dstate != VIO_DESC_DONE) {
+ rv = cv_timedwait(&local_dep->cv, &local_dep->lock,
+ VD_GET_TIMEOUT_HZ(retries));
+ if (rv == -1) {
+ /*
+ * If they persist in ignoring us we'll storm off in a
+ * huff and return ETIMEDOUT to the upper layers.
+ */
+ if (retries >= vdc_retries) {
+ PR0("%s: Finished waiting on entry %d\n",
+ __func__, idx);
+ status = ETIMEDOUT;
+ break;
+ } else {
+ retries++;
+ PR0("%s[%d]: Timeout #%d on entry %d "
+ "[seq %d][req %d]\n", __func__,
+ vdc->instance,
+ retries, idx, dmsg.seq_num,
+ dep->payload.req_id);
+ }
+
+ if (dep->hdr.dstate & VIO_DESC_ACCEPTED) {
+ PR0("%s[%d]: vds has accessed entry %d [seq %d]"
+ "[req %d] but not ack'ed it yet\n",
+ __func__, vdc->instance, idx, dmsg.seq_num,
+ dep->payload.req_id);
+ continue;
+ }
+
+ /*
+ * we resend the message as it may have been dropped
+ * and have never made it to the other side (vds).
+ * (We reuse the original message but update seq ID)
+ */
+ VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc);
+ retries = 0;
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&dmsg,
+ &msglen);
+ if (status != 0) {
+ vdc_msg("%s: Error (%d) while resending after "
+ "timeout\n", __func__, status);
+ status = ETIMEDOUT;
+ break;
+ }
+ }
+ }
+
+ return (status);
+}
+
+static int
+vdc_get_response(vdc_t *vdc, int start, int end)
+{
+ vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */
+ vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */
+ int status = ENXIO;
+ int idx = -1;
+
+ ASSERT(vdc != NULL);
+ ASSERT(start >= 0);
+ ASSERT(start <= VD_DRING_LEN);
+ ASSERT(start >= -1);
+ ASSERT(start <= VD_DRING_LEN);
+
+ idx = start;
+ ldep = &vdc->local_dring[idx];
+ ASSERT(ldep != NULL);
+ dep = ldep->dep;
+ ASSERT(dep != NULL);
+
+ PR0("%s[%d] DRING entry=%d status=%d\n", __func__, vdc->instance,
+ idx, VIO_GET_DESC_STATE(dep->hdr.dstate));
+ while (VIO_GET_DESC_STATE(dep->hdr.dstate) == VIO_DESC_DONE) {
+ if ((end != -1) && (idx > end))
+ return (0);
+
+ switch (ldep->operation) {
+ case VD_OP_BREAD:
+ case VD_OP_BWRITE:
+ /* call bioxxx */
+ break;
+ default:
+ /* signal waiter */
+ break;
+ }
+
+ /* Clear the DRing entry */
+ status = vdc_depopulate_descriptor(vdc, idx);
+ PR0("%s[%d] Status=%d\n", __func__, vdc->instance, status);
+
+ /* loop accounting to get next DRing entry */
+ idx++;
+ ldep = &vdc->local_dring[idx];
+ dep = ldep->dep;
+ }
+
+ return (status);
+}
+
+static int
+vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx)
+{
+ vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */
+ vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */
+ int status = ENXIO;
+
+ ASSERT(vdc != NULL);
+ ASSERT(idx < VD_DRING_LEN);
+ ldep = &vdc->local_dring[idx];
+ ASSERT(ldep != NULL);
+ dep = ldep->dep;
+ ASSERT(dep != NULL);
+
+ status = dep->payload.status;
+ VDC_MARK_DRING_ENTRY_FREE(vdc, idx);
+ ldep = &vdc->local_dring[idx];
+ VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE);
+
+ /*
+ * If the upper layer passed in a misaligned address we copied the
+ * data into an aligned buffer before sending it to LDC - we now
+ * copy it back to the original buffer.
+ */
+ if (ldep->align_addr) {
+ ASSERT(ldep->addr != NULL);
+ ASSERT(dep->payload.nbytes > 0);
+
+ bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes);
+ kmem_free(ldep->align_addr,
+ sizeof (caddr_t) * dep->payload.nbytes);
+ ldep->align_addr = NULL;
+ }
+
+ status = ldc_mem_unbind_handle(ldep->desc_mhdl);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d",
+ vdc->instance, ldep->desc_mhdl, idx, status);
+ }
+
+ return (status);
+}
+
+static int
+vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes,
+ int operation)
+{
+ vd_dring_entry_t *dep = NULL;
+ vdc_local_desc_t *ldep = NULL;
+ ldc_mem_handle_t mhdl;
+ caddr_t vaddr;
+ int perm = LDC_MEM_RW;
+ int rv = 0;
+ int i;
+
+ ASSERT(vdc != NULL);
+ ASSERT(idx < VD_DRING_LEN);
+
+ dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx);
+ ldep = &vdc->local_dring[idx];
+ mhdl = ldep->desc_mhdl;
+
+ switch (operation) {
+ case VD_OP_BREAD:
+ perm = LDC_MEM_W;
+ break;
+
+ case VD_OP_BWRITE:
+ perm = LDC_MEM_R;
+ break;
+
+ case VD_OP_FLUSH:
+ case VD_OP_GET_VTOC:
+ case VD_OP_SET_VTOC:
+ case VD_OP_GET_DISKGEOM:
+ case VD_OP_SET_DISKGEOM:
+ case VD_OP_SCSICMD:
+ perm = LDC_MEM_RW;
+ break;
+
+ default:
+ ASSERT(0); /* catch bad programming in vdc */
+ }
+
+ /*
+ * LDC expects any addresses passed in to be 8-byte aligned. We need
+ * to copy the contents of any misaligned buffers to a newly allocated
+ * buffer and bind it instead (and copy the the contents back to the
+ * original buffer passed in when depopulating the descriptor)
+ */
+ vaddr = addr;
+ if (((uint64_t)addr & 0x7) != 0) {
+ ldep->align_addr =
+ kmem_zalloc(sizeof (caddr_t) * nbytes, KM_SLEEP);
+ PR0("%s[%d] Misaligned address %lx reallocating "
+ "(buf=%lx entry=%d)\n",
+ __func__, vdc->instance, addr, ldep->align_addr, idx);
+ bcopy(addr, ldep->align_addr, nbytes);
+ vaddr = ldep->align_addr;
+ }
+
+ rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8),
+ vdc->dring_mem_info.mtype, perm, &dep->payload.cookie[0],
+ &dep->payload.ncookies);
+ PR1("%s[%d] bound mem handle; ncookies=%d\n",
+ __func__, vdc->instance, dep->payload.ncookies);
+ if (rv != 0) {
+ vdc_msg("%s[%d] failed to ldc_mem_bind_handle "
+ "(mhdl=%lx, buf=%lx entry=%d err=%d)\n",
+ __func__, vdc->instance, mhdl, addr, idx, rv);
+ if (ldep->align_addr) {
+ kmem_free(ldep->align_addr,
+ sizeof (caddr_t) * dep->payload.nbytes);
+ ldep->align_addr = NULL;
+ }
+ return (EAGAIN);
+ }
+
+ /*
+ * Get the other cookies (if any).
+ */
+ for (i = 1; i < dep->payload.ncookies; i++) {
+ rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]);
+ if (rv != 0) {
+ (void) ldc_mem_unbind_handle(mhdl);
+ vdc_msg("%s: failed to get next cookie(mhdl=%lx "
+ "cnum=%d), err=%d", __func__, mhdl, i, rv);
+ if (ldep->align_addr) {
+ kmem_free(ldep->align_addr,
+ sizeof (caddr_t) * dep->payload.nbytes);
+ ldep->align_addr = NULL;
+ }
+ return (EAGAIN);
+ }
+ }
+
+ return (rv);
+}
+
+/*
+ * Interrupt handlers for messages from LDC
+ */
+
+static uint_t
+vdc_handle_cb(uint64_t event, caddr_t arg)
+{
+ ldc_status_t ldc_state;
+ int rv = 0;
+
+ vdc_t *vdc = (vdc_t *)(void *)arg;
+
+ ASSERT(vdc != NULL);
+
+ PR1("%s[%d] event=%x seqID=%d\n",
+ __func__, vdc->instance, event, vdc->seq_num);
+
+ /*
+ * Depending on the type of event that triggered this callback,
+ * we modify the handhske state or read the data.
+ *
+ * NOTE: not done as a switch() as event could be triggered by
+ * a state change and a read request. Also the ordering of the
+ * check for the event types is deliberate.
+ */
+ if (event & LDC_EVT_UP) {
+ PR0("%s[%d] Received LDC_EVT_UP\n", __func__, vdc->instance);
+
+ /* get LDC state */
+ rv = ldc_status(vdc->ldc_handle, &ldc_state);
+ if (rv != 0) {
+ cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d",
+ vdc->instance, rv);
+ vdc_reset_connection(vdc, B_TRUE);
+ return (LDC_SUCCESS);
+ }
+
+ /*
+ * Reset the transaction sequence numbers when LDC comes up.
+ * We then kick off the handshake negotiation with the vDisk
+ * server.
+ */
+ mutex_enter(&vdc->lock);
+ vdc->seq_num = 0;
+ vdc->seq_num_reply = 0;
+ vdc->ldc_state = ldc_state;
+ ASSERT(ldc_state == LDC_UP);
+ mutex_exit(&vdc->lock);
+
+ vdc_init_handshake_negotiation(vdc);
+
+ ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
+ }
+
+ if (event & LDC_EVT_READ) {
+ /*
+ * Wake up the worker thread to process the message
+ */
+ mutex_enter(&vdc->msg_proc_lock);
+ vdc->msg_pending = B_TRUE;
+ cv_signal(&vdc->msg_proc_cv);
+ mutex_exit(&vdc->msg_proc_lock);
+
+ ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
+
+ /* that's all we have to do - no need to handle DOWN/RESET */
+ return (LDC_SUCCESS);
+ }
+
+ if (event & LDC_EVT_RESET) {
+ PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance);
+ }
+
+ if (event & LDC_EVT_DOWN) {
+ PR0("%s[%d] Recvd LDC DOWN event\n", __func__, vdc->instance);
+
+ /* get LDC state */
+ rv = ldc_status(vdc->ldc_handle, &ldc_state);
+ if (rv != 0) {
+ cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d",
+ vdc->instance, rv);
+ ldc_state = LDC_OPEN;
+ }
+ mutex_enter(&vdc->lock);
+ vdc->ldc_state = ldc_state;
+ mutex_exit(&vdc->lock);
+
+ vdc_reset_connection(vdc, B_TRUE);
+ }
+
+ if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ))
+ cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received",
+ vdc->instance, event);
+
+ return (LDC_SUCCESS);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * The following functions process the incoming messages from vds
+ */
+
+
+static void
+vdc_process_msg_thread(vdc_t *vdc)
+{
+ int status = 0;
+ boolean_t q_is_empty = B_TRUE;
+
+ ASSERT(vdc != NULL);
+
+ mutex_enter(&vdc->msg_proc_lock);
+ PR0("%s[%d]: Starting\n", __func__, vdc->instance);
+
+ vdc->msg_proc_thr_state = VDC_THR_RUNNING;
+
+ while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) {
+
+ PR1("%s[%d] Waiting\n", __func__, vdc->instance);
+ while (vdc->msg_pending == B_FALSE)
+ cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock);
+
+ PR1("%s[%d] Message Received\n", __func__, vdc->instance);
+
+ /* check if there is data */
+ status = ldc_chkq(vdc->ldc_handle, &q_is_empty);
+ if ((status != 0) &&
+ (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) {
+ cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk"
+ " server. Cannot check LDC queue: %d",
+ vdc->instance, status);
+ mutex_enter(&vdc->lock);
+ vdc_reset_connection(vdc, B_TRUE);
+ mutex_exit(&vdc->lock);
+ vdc->msg_proc_thr_state = VDC_THR_STOP;
+ continue;
+ }
+
+ if (q_is_empty == B_FALSE) {
+ PR1("%s: new pkt(s) available\n", __func__);
+ vdc_process_msg(vdc);
+ }
+
+ vdc->msg_pending = B_FALSE;
+ }
+
+ PR0("Message processing thread stopped\n");
+ vdc->msg_pending = B_FALSE;
+ vdc->msg_proc_thr_state = VDC_THR_DONE;
+ cv_signal(&vdc->msg_proc_cv);
+ mutex_exit(&vdc->msg_proc_lock);
+ thread_exit();
+}
+
+
+/*
+ * Function:
+ * vdc_process_msg()
+ *
+ * Description:
+ * This function is called by the message processing thread each time it
+ * is triggered when LDC sends an interrupt to indicate that there are
+ * more packets on the queue. When it is called it will continue to loop
+ * and read the messages until there are no more left of the queue. If it
+ * encounters an invalid sized message it will drop it and check the next
+ * message.
+ *
+ * Arguments:
+ * arg - soft state pointer for this instance of the device driver.
+ *
+ * Return Code:
+ * None.
+ */
+static void
+vdc_process_msg(void *arg)
+{
+ vdc_t *vdc = (vdc_t *)(void *)arg;
+ vio_msg_t vio_msg;
+ size_t nbytes = sizeof (vio_msg);
+ int status;
+
+ ASSERT(vdc != NULL);
+
+ mutex_enter(&vdc->lock);
+
+ PR1("%s\n", __func__);
+
+ for (;;) {
+
+ /* read all messages - until no more left */
+ status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes);
+
+ if (status) {
+ vdc_msg("%s: ldc_read() failed = %d", __func__, status);
+
+ /* if status is ECONNRESET --- reset vdc state */
+ if (status == EIO || status == ECONNRESET) {
+ vdc_reset_connection(vdc, B_FALSE);
+ }
+
+ mutex_exit(&vdc->lock);
+ return;
+ }
+
+ if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) {
+ cmn_err(CE_CONT, "![%d] Expect %lu bytes; recv'd %lu\n",
+ vdc->instance, sizeof (vio_msg_tag_t), nbytes);
+ mutex_exit(&vdc->lock);
+ return;
+ }
+
+ if (nbytes == 0) {
+ PR2("%s[%d]: ldc_read() done..\n",
+ __func__, vdc->instance);
+ mutex_exit(&vdc->lock);
+ return;
+ }
+
+ PR1("%s[%d] (%x/%x/%x)\n", __func__, vdc->instance,
+ vio_msg.tag.vio_msgtype,
+ vio_msg.tag.vio_subtype,
+ vio_msg.tag.vio_subtype_env);
+
+ /*
+ * Verify the Session ID of the message
+ *
+ * Every message after the Version has been negotiated should
+ * have the correct session ID set.
+ */
+ if ((vio_msg.tag.vio_sid != vdc->session_id) &&
+ (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) {
+ PR0("%s: Incorrect SID 0x%x msg 0x%lx, expected 0x%x\n",
+ __func__, vio_msg.tag.vio_sid, &vio_msg,
+ vdc->session_id);
+ vdc_reset_connection(vdc, B_FALSE);
+ mutex_exit(&vdc->lock);
+ return;
+ }
+
+ switch (vio_msg.tag.vio_msgtype) {
+ case VIO_TYPE_CTRL:
+ status = vdc_process_ctrl_msg(vdc, vio_msg);
+ break;
+ case VIO_TYPE_DATA:
+ status = vdc_process_data_msg(vdc, vio_msg);
+ break;
+ case VIO_TYPE_ERR:
+ status = vdc_process_err_msg(vdc, vio_msg);
+ break;
+ default:
+ PR1("%s", __func__);
+ status = EINVAL;
+ break;
+ }
+
+ if (status != 0) {
+ PR0("%s[%d] Error (%d) occcurred processing msg\n",
+ __func__, vdc->instance, status);
+ vdc_reset_connection(vdc, B_FALSE);
+ }
+ }
+ _NOTE(NOTREACHED)
+}
+
+/*
+ * Function:
+ * vdc_process_ctrl_msg()
+ *
+ * Description:
+ * This function is called by the message processing thread each time
+ * an LDC message with a msgtype of VIO_TYPE_CTRL is received.
+ *
+ * Arguments:
+ * vdc - soft state pointer for this instance of the device driver.
+ * msg - the LDC message sent by vds
+ *
+ * Return Codes:
+ * 0 - Success.
+ * EPROTO - A message was received which shouldn't have happened according
+ * to the protocol
+ * ENOTSUP - An action which is allowed according to the protocol but which
+ * isn't (or doesn't need to be) implemented yet.
+ * EINVAL - An invalid value was returned as part of a message.
+ */
+static int
+vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg)
+{
+ size_t msglen = sizeof (msg);
+ vd_attr_msg_t *attr_msg = NULL;
+ vio_dring_reg_msg_t *dring_msg = NULL;
+ int status = -1;
+
+ ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL);
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ /* Depending on which state we are in; process the message */
+ switch (vdc->state) {
+ case VD_STATE_INIT:
+ if (msg.tag.vio_subtype_env != VIO_VER_INFO) {
+ status = EPROTO;
+ break;
+ }
+
+ switch (msg.tag.vio_subtype) {
+ case VIO_SUBTYPE_ACK:
+ vdc->state = VD_STATE_VER;
+ status = vdc_init_attr_negotiation(vdc);
+ break;
+ case VIO_SUBTYPE_NACK:
+ /*
+ * For now there is only one version number so we
+ * cannot step back to an earlier version but in the
+ * future we may need to add further logic here
+ * to try negotiating an earlier version as the VIO
+ * design allow for it.
+ */
+
+ /*
+ * vds could not handle the version we sent so we just
+ * stop negotiating.
+ */
+ status = EPROTO;
+ break;
+
+ case VIO_SUBTYPE_INFO:
+ /*
+ * Handle the case where vds starts handshake
+ * (for now only vdc is the instigatior)
+ */
+ status = ENOTSUP;
+ break;
+
+ default:
+ status = ENOTSUP;
+ break;
+ }
+ break;
+
+ case VD_STATE_VER:
+ if (msg.tag.vio_subtype_env != VIO_ATTR_INFO) {
+ status = EPROTO;
+ break;
+ }
+
+ switch (msg.tag.vio_subtype) {
+ case VIO_SUBTYPE_ACK:
+ /*
+ * We now verify the attributes sent by vds.
+ */
+ attr_msg = (vd_attr_msg_t *)&msg;
+ vdc->vdisk_size = attr_msg->vdisk_size;
+ vdc->vdisk_type = attr_msg->vdisk_type;
+
+ if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) ||
+ (attr_msg->vdisk_block_size != vdc->block_size)) {
+ /*
+ * Future support: step down to the block size
+ * and max transfer size suggested by the
+ * server. (If this value is less than 128K
+ * then multiple Dring entries per request
+ * would need to be implemented)
+ */
+ cmn_err(CE_NOTE, "[%d] Couldn't process block "
+ "attrs from vds", vdc->instance);
+ status = EINVAL;
+ break;
+ }
+
+ if ((attr_msg->xfer_mode != VIO_DRING_MODE) ||
+ (attr_msg->vdisk_size > INT64_MAX) ||
+ (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) {
+ vdc_msg("%s[%d] Couldn't process attrs "
+ "from vds", __func__, vdc->instance);
+ status = EINVAL;
+ break;
+ }
+
+ vdc->state = VD_STATE_ATTR;
+ status = vdc_init_dring_negotiate(vdc);
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ /*
+ * vds could not handle the attributes we sent so we
+ * stop negotiating.
+ */
+ status = EPROTO;
+ break;
+
+ case VIO_SUBTYPE_INFO:
+ /*
+ * Handle the case where vds starts the handshake
+ * (for now; vdc is the only supported instigatior)
+ */
+ status = ENOTSUP;
+ break;
+
+ default:
+ status = ENOTSUP;
+ break;
+ }
+ break;
+
+
+ case VD_STATE_ATTR:
+ if (msg.tag.vio_subtype_env != VIO_DRING_REG) {
+ status = EPROTO;
+ break;
+ }
+
+ switch (msg.tag.vio_subtype) {
+ case VIO_SUBTYPE_ACK:
+ /* Verify that we have sent all the descr. ring info */
+ /* nop for now as we have just 1 dring */
+ dring_msg = (vio_dring_reg_msg_t *)&msg;
+
+ /* save the received dring_ident */
+ vdc->dring_ident = dring_msg->dring_ident;
+ PR0("%s[%d] Received dring ident=0x%lx\n",
+ __func__, vdc->instance, vdc->dring_ident);
+
+ /*
+ * Send an RDX message to vds to indicate we are ready
+ * to send data
+ */
+ msg.tag.vio_msgtype = VIO_TYPE_CTRL;
+ msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ msg.tag.vio_subtype_env = VIO_RDX;
+ msg.tag.vio_sid = vdc->session_id;
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&msg,
+ &msglen);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "[%d] Failed to send RDX"
+ " message (%d)", vdc->instance, status);
+ break;
+ }
+
+ status = vdc_create_fake_geometry(vdc);
+ if (status != 0) {
+ cmn_err(CE_NOTE, "[%d] Failed to create disk "
+ "geometery(%d)", vdc->instance, status);
+ break;
+ }
+
+ vdc->state = VD_STATE_RDX;
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ /*
+ * vds could not handle the DRing info we sent so we
+ * stop negotiating.
+ */
+ cmn_err(CE_CONT, "server could not register DRing\n");
+ vdc_reset_connection(vdc, B_FALSE);
+ vdc_destroy_descriptor_ring(vdc);
+ status = EPROTO;
+ break;
+
+ case VIO_SUBTYPE_INFO:
+ /*
+ * Handle the case where vds starts handshake
+ * (for now only vdc is the instigatior)
+ */
+ status = ENOTSUP;
+ break;
+ default:
+ status = ENOTSUP;
+ }
+ break;
+
+ case VD_STATE_RDX:
+ if (msg.tag.vio_subtype_env != VIO_RDX) {
+ status = EPROTO;
+ break;
+ }
+
+ PR0("%s: Received RDX - handshake successful\n", __func__);
+
+ status = 0;
+ vdc->state = VD_STATE_DATA;
+
+ cv_broadcast(&vdc->attach_cv);
+ break;
+
+ default:
+ cmn_err(CE_NOTE, "[%d] unknown handshake negotiation state %d",
+ vdc->instance, vdc->state);
+ break;
+ }
+
+ return (status);
+}
+
+
+/*
+ * Function:
+ * vdc_process_data_msg()
+ *
+ * Description:
+ * This function is called by the message processing thread each time it
+ * a message with a msgtype of VIO_TYPE_DATA is received. It will either
+ * be an ACK or NACK from vds[1] which vdc handles as follows.
+ * ACK - wake up the waiting thread
+ * NACK - resend any messages necessary
+ *
+ * [1] Although the message format allows it, vds should not send a
+ * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for
+ * some bizarre reason it does, vdc will reset the connection.
+ *
+ * Arguments:
+ * vdc - soft state pointer for this instance of the device driver.
+ * msg - the LDC message sent by vds
+ *
+ * Return Code:
+ * 0 - Success.
+ * > 0 - error value returned by LDC
+ */
+static int
+vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg)
+{
+ int status = 0;
+ vdc_local_desc_t *local_dep = NULL;
+ vio_dring_msg_t *dring_msg = NULL;
+ size_t msglen = sizeof (*dring_msg);
+ uint_t num_msgs;
+ uint_t start;
+ uint_t end;
+ uint_t i;
+
+ ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA);
+ ASSERT(vdc != NULL);
+ ASSERT(mutex_owned(&vdc->lock));
+
+ dring_msg = (vio_dring_msg_t *)&msg;
+
+ /*
+ * Check to see if the message has bogus data
+ */
+ start = dring_msg->start_idx;
+ end = dring_msg->end_idx;
+ if ((start >= VD_DRING_LEN) || (end >= VD_DRING_LEN)) {
+ vdc_msg("%s: Bogus ACK data : start %d, end %d\n",
+ __func__, start, end);
+ return (EPROTO);
+ }
+
+ /*
+ * calculate the number of messages that vds ACK'ed
+ *
+ * Assumes, (like the rest of vdc) that there is a 1:1 mapping
+ * between requests and Dring entries.
+ */
+ num_msgs = (end >= start) ?
+ (end - start + 1) :
+ (VD_DRING_LEN - start + end + 1);
+
+ /*
+ * Verify that the sequence number is what vdc expects.
+ */
+ if (vdc_verify_seq_num(vdc, dring_msg, num_msgs) == B_FALSE) {
+ return (ENXIO);
+ }
+
+ switch (msg.tag.vio_subtype) {
+ case VIO_SUBTYPE_ACK:
+ PR2("%s: DATA ACK\n", __func__);
+
+ /*
+ * Wake the thread waiting for each DRing entry ACK'ed
+ */
+ for (i = 0; i < num_msgs; i++) {
+ int idx = (start + i) % VD_DRING_LEN;
+
+ local_dep = &vdc->local_dring[idx];
+ mutex_enter(&local_dep->lock);
+ cv_signal(&local_dep->cv);
+ mutex_exit(&local_dep->lock);
+ }
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ PR0("%s: DATA NACK\n", __func__);
+ dring_msg = (vio_dring_msg_t *)&msg;
+ VDC_DUMP_DRING_MSG(dring_msg);
+
+ /* Resend necessary messages */
+ for (i = 0; i < num_msgs; i++) {
+ int idx = (start + i) % VD_DRING_LEN;
+
+ local_dep = &vdc->local_dring[idx];
+ ASSERT(local_dep != NULL);
+ mutex_enter(&local_dep->lock);
+
+ if (local_dep->dep->hdr.dstate != VIO_DESC_READY) {
+ PR0("%s[%d]: Won't resend entry %d [flag=%d]\n",
+ __func__, vdc->instance, idx,
+ local_dep->dep->hdr.dstate);
+ mutex_exit(&local_dep->lock);
+ break;
+ }
+
+ /* we'll reuse the message passed in */
+ VIO_INIT_DRING_DATA_TAG(msg);
+ dring_msg->tag.vio_sid = vdc->session_id;
+ dring_msg->seq_num = ++(vdc->seq_num);
+ VDC_DUMP_DRING_MSG(dring_msg);
+
+ status = vdc_send(vdc->ldc_handle, (caddr_t)&dring_msg,
+ &msglen);
+ PR1("%s: ldc_write() status=%d\n", __func__, status);
+ if (status != 0) {
+ vdc_msg("%s ldc_write(%d)\n", __func__, status);
+ mutex_exit(&local_dep->lock);
+ break;
+ }
+
+ mutex_exit(&local_dep->lock);
+ }
+ break;
+
+ case VIO_SUBTYPE_INFO:
+ default:
+ cmn_err(CE_NOTE, "[%d] Got an unexpected DATA msg [subtype %d]",
+ vdc->instance, msg.tag.vio_subtype);
+ break;
+ }
+
+ return (status);
+}
+
+/*
+ * Function:
+ * vdc_process_err_msg()
+ *
+ * NOTE: No error messages are used as part of the vDisk protocol
+ */
+static int
+vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg)
+{
+ _NOTE(ARGUNUSED(vdc))
+ _NOTE(ARGUNUSED(msg))
+
+ int status = ENOTSUP;
+
+ ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR);
+ cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance);
+
+ return (status);
+}
+
+/*
+ * Function:
+ * vdc_verify_seq_num()
+ *
+ * Description:
+ * This functions verifies that the sequence number sent back by vds with
+ * the latest message correctly follows the last request processed.
+ *
+ * Arguments:
+ * vdc - soft state pointer for this instance of the driver.
+ * dring_msg - pointer to the LDC message sent by vds
+ * num_msgs - the number of requests being acknowledged
+ *
+ * Return Code:
+ * B_TRUE - Success.
+ * B_FALSE - The seq numbers are so out of sync, vdc cannot deal with them
+ */
+static boolean_t
+vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int num_msgs)
+{
+ ASSERT(vdc != NULL);
+ ASSERT(dring_msg != NULL);
+
+ /*
+ * Check to see if the messages were responded to in the correct
+ * order by vds. There are 3 possible scenarios:
+ * - the seq_num we expected is returned (everything is OK)
+ * - a seq_num earlier than the last one acknowledged is returned,
+ * if so something is seriously wrong so we reset the connection
+ * - a seq_num greater than what we expected is returned.
+ */
+ if (dring_msg->seq_num != (vdc->seq_num_reply + num_msgs)) {
+ vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n",
+ __func__, vdc->instance, dring_msg->seq_num,
+ vdc->seq_num_reply + num_msgs);
+ if (dring_msg->seq_num < (vdc->seq_num_reply + num_msgs)) {
+ return (B_FALSE);
+ } else {
+ /*
+ * vds has responded with a seq_num greater than what we
+ * expected
+ */
+ return (B_FALSE);
+ }
+ }
+ vdc->seq_num_reply += num_msgs;
+
+ return (B_TRUE);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * DKIO(7) support
+ *
+ * XXX FIXME - needs to be converted to use the structures defined in the
+ * latest VIO spec to communicate with the vDisk server.
+ */
+
+typedef struct vdc_dk_arg {
+ struct dk_callback dkc;
+ int mode;
+ dev_t dev;
+ vdc_t *vdc;
+} vdc_dk_arg_t;
+
+/*
+ * Function:
+ * vdc_dkio_flush_cb()
+ *
+ * Description:
+ * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called
+ * by kernel code.
+ *
+ * Arguments:
+ * arg - a pointer to a vdc_dk_arg_t structure.
+ */
+void
+vdc_dkio_flush_cb(void *arg)
+{
+ struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg;
+ struct dk_callback *dkc = NULL;
+ vdc_t *vdc = NULL;
+ int rv;
+
+ if (dk_arg == NULL) {
+ vdc_msg("%s[?] DKIOCFLUSHWRITECACHE arg is NULL\n", __func__);
+ return;
+ }
+ dkc = &dk_arg->dkc;
+ vdc = dk_arg->vdc;
+ ASSERT(vdc != NULL);
+
+ rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH,
+ dk_arg->mode, SDPART(getminor(dk_arg->dev)));
+ if (rv != 0) {
+ PR0("%s[%d] DKIOCFLUSHWRITECACHE failed : model %x\n",
+ __func__, vdc->instance,
+ ddi_model_convert_from(dk_arg->mode & FMODELS));
+ return;
+ }
+
+ /*
+ * Trigger the call back to notify the caller the the ioctl call has
+ * been completed.
+ */
+ if ((dk_arg->mode & FKIOCTL) &&
+ (dkc != NULL) &&
+ (dkc->dkc_callback != NULL)) {
+ ASSERT(dkc->dkc_cookie != NULL);
+ (*dkc->dkc_callback)(dkc->dkc_cookie, ENOTSUP);
+ }
+
+ /* Indicate that one less DKIO write flush is outstanding */
+ mutex_enter(&vdc->lock);
+ vdc->dkio_flush_pending--;
+ ASSERT(vdc->dkio_flush_pending >= 0);
+ mutex_exit(&vdc->lock);
+}
+
+
+/*
+ * This structure is used in the DKIO(7I) array below.
+ */
+typedef struct vdc_dk_ioctl {
+ uint8_t op; /* VD_OP_XXX value */
+ int cmd; /* Solaris ioctl operation number */
+ uint8_t copy; /* copyin and/or copyout needed ? */
+ size_t nbytes; /* size of structure to be copied */
+ size_t nbytes32; /* size of 32bit struct if different */
+ /* to 64bit struct (zero otherwise) */
+} vdc_dk_ioctl_t;
+
+/*
+ * Subset of DKIO(7I) operations currently supported
+ */
+static vdc_dk_ioctl_t dk_ioctl[] = {
+ {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0,
+ 0, 0},
+ {VD_OP_GET_WCE, DKIOCGETWCE, 0,
+ 0, 0},
+ {VD_OP_SET_WCE, DKIOCSETWCE, 0,
+ 0, 0},
+ {VD_OP_GET_VTOC, DKIOCGVTOC, VD_COPYOUT,
+ sizeof (struct vtoc), sizeof (struct vtoc32)},
+ {VD_OP_SET_VTOC, DKIOCSVTOC, VD_COPYIN,
+ sizeof (struct vtoc), sizeof (struct vtoc32)},
+ {VD_OP_SET_DISKGEOM, DKIOCSGEOM, VD_COPYIN,
+ sizeof (struct dk_geom), 0},
+ {VD_OP_GET_DISKGEOM, DKIOCGGEOM, VD_COPYOUT,
+ sizeof (struct dk_geom), 0},
+ {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, VD_COPYOUT,
+ sizeof (struct dk_geom), 0},
+ {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, VD_COPYOUT,
+ sizeof (struct dk_geom), 0},
+ {VD_OP_SET_DISKGEOM, DKIOCSGEOM, VD_COPYOUT,
+ sizeof (struct dk_geom), 0},
+ {VD_OP_SCSICMD, USCSICMD, VD_COPYIN|VD_COPYOUT,
+ sizeof (struct uscsi_cmd), sizeof (struct uscsi_cmd32)},
+ {0, DKIOCINFO, VD_COPYOUT,
+ sizeof (struct dk_cinfo), 0},
+ {0, DKIOCGMEDIAINFO, VD_COPYOUT,
+ sizeof (struct dk_minfo), 0},
+ {0, DKIOCREMOVABLE, 0,
+ 0, 0},
+ {0, CDROMREADOFFSET, 0,
+ 0, 0}
+};
+
+/*
+ * Function:
+ * vd_process_ioctl()
+ *
+ * Description:
+ * This routine is the driver entry point for handling user
+ * requests to get the device geometry.
+ *
+ * Arguments:
+ * dev - the device number
+ * cmd - the operation [dkio(7I)] to be processed
+ * arg - pointer to user provided structure
+ * (contains data to be set or reference parameter for get)
+ * mode - bit flag, indicating open settings, 32/64 bit type, etc
+ * rvalp - calling process return value, used in some ioctl calls
+ * (passed throught to vds who fills in the value)
+ *
+ * Assumptions:
+ * vds will make the ioctl calls in the 64 bit address space so vdc
+ * will convert the data to/from 32 bit as necessary before doing
+ * the copyin or copyout.
+ *
+ * Return Code:
+ * 0
+ * EFAULT
+ * ENXIO
+ * EIO
+ * ENOTSUP
+ */
+static int
+vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode)
+{
+ int instance = SDUNIT(getminor(dev));
+ vdc_t *vdc = NULL;
+ int op = -1; /* VD_OP_XXX value */
+ int rv = -1;
+ int idx = 0; /* index into dk_ioctl[] */
+ size_t len = 0; /* #bytes to send to vds */
+ size_t alloc_len = 0; /* #bytes to allocate mem for */
+ size_t copy_len = 0; /* #bytes to copy in/out */
+ caddr_t mem_p = NULL;
+ boolean_t do_convert_32to64 = B_FALSE;
+ size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0]));
+
+ PR0("%s: Processing ioctl(%x) for dev %x : model %x\n",
+ __func__, cmd, dev, ddi_model_convert_from(mode & FMODELS));
+
+ vdc = ddi_get_soft_state(vdc_state, instance);
+ if (vdc == NULL) {
+ cmn_err(CE_NOTE, "![%d] Could not get soft state structure",
+ instance);
+ return (ENXIO);
+ }
+
+ /*
+ * Check to see if we can communicate with the vDisk server
+ */
+ rv = vdc_is_able_to_tx_data(vdc, O_NONBLOCK);
+ if (rv == B_FALSE) {
+ PR0("%s[%d] Not ready to transmit data\n", __func__, instance);
+ return (ENOLINK);
+ }
+
+ /*
+ * Validate the ioctl operation to be performed.
+ *
+ * If we have looped through the array without finding a match then we
+ * don't support this ioctl.
+ */
+ for (idx = 0; idx < nioctls; idx++) {
+ if (cmd == dk_ioctl[idx].cmd)
+ break;
+ }
+
+ if (idx >= nioctls) {
+ PR0("%s[%d] Unsupported ioctl(%x)\n",
+ __func__, vdc->instance, cmd);
+ return (ENOTSUP);
+ }
+
+ copy_len = len = dk_ioctl[idx].nbytes;
+ op = dk_ioctl[idx].op;
+
+ /*
+ * Some ioctl operations have different sized structures for 32 bit
+ * and 64 bit. If the userland caller is 32 bit, we need to check
+ * to see if the operation is one of those special cases and
+ * flag that we need to convert to and/or from 32 bit since vds
+ * will make the call as 64 bit.
+ */
+ if ((ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) &&
+ (dk_ioctl[idx].nbytes != 0) &&
+ (dk_ioctl[idx].nbytes32 != 0)) {
+ do_convert_32to64 = B_TRUE;
+ copy_len = dk_ioctl[idx].nbytes32;
+ }
+
+ /*
+ * Deal with the ioctls which the server does not provide.
+ */
+ switch (cmd) {
+ case CDROMREADOFFSET:
+ case DKIOCREMOVABLE:
+ return (ENOTTY);
+
+ case DKIOCINFO:
+ {
+ struct dk_cinfo cinfo;
+ if (vdc->cinfo == NULL)
+ return (ENXIO);
+
+ bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo));
+ cinfo.dki_partition = SDPART(getminor(dev));
+
+ rv = ddi_copyout(&cinfo, (void *)arg,
+ sizeof (struct dk_cinfo), mode);
+ if (rv != 0)
+ return (EFAULT);
+
+ return (0);
+ }
+
+ case DKIOCGMEDIAINFO:
+ if (vdc->minfo == NULL)
+ return (ENXIO);
+
+ rv = ddi_copyout(vdc->minfo, (void *)arg,
+ sizeof (struct dk_minfo), mode);
+ if (rv != 0)
+ return (EFAULT);
+
+ return (0);
+ }
+
+ /* catch programming error in vdc - should be a VD_OP_XXX ioctl */
+ ASSERT(op != 0);
+
+ /* LDC requires that the memory being mapped is 8-byte aligned */
+ alloc_len = P2ROUNDUP(len, sizeof (uint64_t));
+ PR1("%s[%d]: struct size %d alloc %d\n",
+ __func__, instance, len, alloc_len);
+
+ if (alloc_len != 0)
+ mem_p = kmem_zalloc(alloc_len, KM_SLEEP);
+
+ if (dk_ioctl[idx].copy & VD_COPYIN) {
+ if (arg == NULL) {
+ if (mem_p != NULL)
+ kmem_free(mem_p, alloc_len);
+ return (EINVAL);
+ }
+
+ ASSERT(copy_len != 0);
+
+ rv = ddi_copyin((void *)arg, mem_p, copy_len, mode);
+ if (rv != 0) {
+ if (mem_p != NULL)
+ kmem_free(mem_p, alloc_len);
+ return (EFAULT);
+ }
+
+ /*
+ * some operations need the data to be converted from 32 bit
+ * to 64 bit structures so that vds can process them on the
+ * other side.
+ */
+ if (do_convert_32to64) {
+ switch (cmd) {
+ case DKIOCSVTOC:
+ {
+ struct vtoc vt;
+ struct vtoc32 vt32;
+
+ ASSERT(mem_p != NULL);
+ vt32 = *((struct vtoc32 *)(mem_p));
+
+ vtoc32tovtoc(vt32, vt);
+ bcopy(&vt, mem_p, len);
+ break;
+ }
+
+ case USCSICMD:
+ {
+ struct uscsi_cmd scmd;
+ struct uscsi_cmd *uscmd = &scmd;
+ struct uscsi_cmd32 *uscmd32;
+
+ ASSERT(mem_p != NULL);
+ uscmd32 = (struct uscsi_cmd32 *)mem_p;
+
+ /*
+ * Convert the ILP32 uscsi data from the
+ * application to LP64 for internal use.
+ */
+ uscsi_cmd32touscsi_cmd(uscmd32, uscmd);
+ bcopy(uscmd, mem_p, len);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+
+ /*
+ * handle the special case of DKIOCFLUSHWRITECACHE
+ */
+ if (cmd == DKIOCFLUSHWRITECACHE) {
+ struct dk_callback *dkc = (struct dk_callback *)arg;
+
+ PR0("%s[%d]: DKIOCFLUSHWRITECACHE\n", __func__, instance);
+
+ /* no mem should have been allocated hence no need to free it */
+ ASSERT(mem_p == NULL);
+
+ /*
+ * If arg is NULL, we break here and the call operates
+ * synchronously; waiting for vds to return.
+ *
+ * i.e. after the request to vds returns successfully,
+ * all writes completed prior to the ioctl will have been
+ * flushed from the disk write cache to persistent media.
+ */
+ if (dkc != NULL) {
+ vdc_dk_arg_t arg;
+ arg.mode = mode;
+ arg.dev = dev;
+ bcopy(dkc, &arg.dkc, sizeof (*dkc));
+
+ mutex_enter(&vdc->lock);
+ vdc->dkio_flush_pending++;
+ arg.vdc = vdc;
+ mutex_exit(&vdc->lock);
+
+ /* put the request on a task queue */
+ rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb,
+ (void *)&arg, DDI_SLEEP);
+
+ return (rv == NULL ? ENOMEM : 0);
+ }
+ }
+
+ /*
+ * send request to vds to service the ioctl.
+ */
+ rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, op, mode,
+ SDPART((getminor(dev))));
+ if (rv != 0) {
+ /*
+ * This is not necessarily an error. The ioctl could
+ * be returning a value such as ENOTTY to indicate
+ * that the ioctl is not applicable.
+ */
+ PR0("%s[%d]: vds returned %d for ioctl 0x%x\n",
+ __func__, instance, rv, cmd);
+ if (mem_p != NULL)
+ kmem_free(mem_p, alloc_len);
+ return (rv);
+ }
+
+ /*
+ * If the VTOC has been changed, then vdc needs to update the copy
+ * it saved in the soft state structure and try and update the device
+ * node properties. Failing to set the properties should not cause
+ * an error to be return the caller though.
+ */
+ if (cmd == DKIOCSVTOC) {
+ bcopy(mem_p, vdc->vtoc, sizeof (struct vtoc));
+ if (vdc_create_device_nodes_props(vdc)) {
+ cmn_err(CE_NOTE, "![%d] Failed to update device nodes"
+ " properties", instance);
+ }
+ }
+
+ /*
+ * if we don't have to do a copyout, we have nothing left to do
+ * so we just return.
+ */
+ if ((dk_ioctl[idx].copy & VD_COPYOUT) == 0) {
+ if (mem_p != NULL)
+ kmem_free(mem_p, alloc_len);
+ return (0);
+ }
+
+ /* sanity check */
+ if (mem_p == NULL)
+ return (EFAULT);
+
+
+ /*
+ * some operations need the data to be converted from 64 bit
+ * back to 32 bit structures after vds has processed them.
+ */
+ if (do_convert_32to64) {
+ switch (cmd) {
+ case DKIOCGVTOC:
+ {
+ struct vtoc vt;
+ struct vtoc32 vt32;
+
+ ASSERT(mem_p != NULL);
+ vt = *((struct vtoc *)(mem_p));
+
+ vtoctovtoc32(vt, vt32);
+ bcopy(&vt32, mem_p, copy_len);
+ break;
+ }
+
+ case USCSICMD:
+ {
+ struct uscsi_cmd32 *uc32;
+ struct uscsi_cmd *uc;
+
+ len = sizeof (struct uscsi_cmd32);
+
+ ASSERT(mem_p != NULL);
+ uc = (struct uscsi_cmd *)mem_p;
+ uc32 = kmem_zalloc(len, KM_SLEEP);
+
+ uscsi_cmdtouscsi_cmd32(uc, uc32);
+ bcopy(uc32, mem_p, copy_len);
+ PR0("%s[%d]: uscsi_cmd32:%x\n", __func__, instance,
+ ((struct uscsi_cmd32 *)mem_p)->uscsi_cdblen);
+ kmem_free(uc32, len);
+ break;
+ }
+ default:
+ PR1("%s[%d]: This mode (%x) should just work for(%x)\n",
+ __func__, instance, mode, cmd);
+ break;
+ }
+ }
+
+ ASSERT(len != 0);
+ ASSERT(mem_p != NULL);
+
+ rv = ddi_copyout(mem_p, (void *)arg, copy_len, mode);
+ if (rv != 0) {
+ vdc_msg("%s[%d]: Could not do copy out for ioctl (%x)\n",
+ __func__, instance, cmd);
+ rv = EFAULT;
+ }
+
+ if (mem_p != NULL)
+ kmem_free(mem_p, alloc_len);
+
+ return (rv);
+}
+
+/*
+ * Function:
+ * vdc_create_fake_geometry()
+ *
+ * Description:
+ * This routine fakes up the disk info needed for some DKIO ioctls.
+ * - DKIOCINFO
+ * - DKIOCGMEDIAINFO
+ *
+ * [ just like lofi(7D) and ramdisk(7D) ]
+ *
+ * Arguments:
+ * vdc - soft state pointer for this instance of the device driver.
+ *
+ * Return Code:
+ * 0 - Success
+ */
+static int
+vdc_create_fake_geometry(vdc_t *vdc)
+{
+ ASSERT(vdc != NULL);
+
+ /*
+ * DKIOCINFO support
+ */
+ vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
+
+ (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME);
+ (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME);
+ vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz / vdc->block_size;
+ vdc->cinfo->dki_ctype = DKC_SCSI_CCS;
+ vdc->cinfo->dki_flags = DKI_FMTVOL;
+ vdc->cinfo->dki_cnum = 0;
+ vdc->cinfo->dki_addr = 0;
+ vdc->cinfo->dki_space = 0;
+ vdc->cinfo->dki_prio = 0;
+ vdc->cinfo->dki_vec = 0;
+ vdc->cinfo->dki_unit = vdc->instance;
+ vdc->cinfo->dki_slave = 0;
+ /*
+ * The partition number will be created on the fly depending on the
+ * actual slice (i.e. minor node) that is used to request the data.
+ */
+ vdc->cinfo->dki_partition = 0;
+
+ /*
+ * DKIOCGMEDIAINFO support
+ */
+ vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP);
+ vdc->minfo->dki_media_type = DK_FIXED_DISK;
+ vdc->minfo->dki_capacity = 1;
+ vdc->minfo->dki_lbsize = DEV_BSIZE;
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/io/vds.c b/usr/src/uts/sun4v/io/vds.c
new file mode 100644
index 0000000000..0495ef2de3
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vds.c
@@ -0,0 +1,2013 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Virtual disk server
+ */
+
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/dkio.h>
+#include <sys/file.h>
+#include <sys/mdeg.h>
+#include <sys/modhash.h>
+#include <sys/note.h>
+#include <sys/pathname.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/vio_common.h>
+#include <sys/vdsk_mailbox.h>
+#include <sys/vdsk_common.h>
+#include <sys/vtoc.h>
+#include <sys/scsi/impl/uscsi.h>
+
+
+/* Virtual disk server initialization flags */
+#define VDS_LOCKING 0x01
+#define VDS_LDI 0x02
+#define VDS_MDEG 0x04
+
+/* Virtual disk server tunable parameters */
+#define VDS_LDC_RETRIES 3
+#define VDS_NCHAINS 32
+
+/* Identification parameters for MD, synthetic dkio(7i) structures, etc. */
+#define VDS_NAME "virtual-disk-server"
+
+#define VD_NAME "vd"
+#define VD_VOLUME_NAME "vdisk"
+#define VD_ASCIILABEL "Virtual Disk"
+
+#define VD_CHANNEL_ENDPOINT "channel-endpoint"
+#define VD_ID_PROP "id"
+#define VD_BLOCK_DEVICE_PROP "vds-block-device"
+
+/* Virtual disk initialization flags */
+#define VD_LOCKING 0x01
+#define VD_TASKQ 0x02
+#define VD_LDC 0x04
+#define VD_DRING 0x08
+#define VD_SID 0x10
+#define VD_SEQ_NUM 0x20
+
+/* Flags for opening/closing backing devices via LDI */
+#define VD_OPEN_FLAGS (FEXCL | FREAD | FWRITE)
+
+/*
+ * By Solaris convention, slice/partition 2 represents the entire disk;
+ * unfortunately, this convention does not appear to be codified.
+ */
+#define VD_ENTIRE_DISK_SLICE 2
+
+/* Return a cpp token as a string */
+#define STRINGIZE(token) #token
+
+/*
+ * Print a message prefixed with the current function name to the message log
+ * (and optionally to the console for verbose boots); these macros use cpp's
+ * concatenation of string literals and C99 variable-length-argument-list
+ * macros
+ */
+#define PRN(...) _PRN("?%s(): "__VA_ARGS__, "")
+#define _PRN(format, ...) \
+ cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__)
+
+/* Return a pointer to the "i"th vdisk dring element */
+#define VD_DRING_ELEM(i) ((vd_dring_entry_t *)(void *) \
+ (vd->dring + (i)*vd->descriptor_size))
+
+/* Return the virtual disk client's type as a string (for use in messages) */
+#define VD_CLIENT(vd) \
+ (((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" : \
+ (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" : \
+ (((vd)->xfer_mode == 0) ? "null client" : \
+ "unsupported client")))
+
+/* Debugging macros */
+#ifdef DEBUG
+#define PR0 if (vd_msglevel > 0) PRN
+#define PR1 if (vd_msglevel > 1) PRN
+#define PR2 if (vd_msglevel > 2) PRN
+
+#define VD_DUMP_DRING_ELEM(elem) \
+ PRN("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n", \
+ elem->hdr.dstate, \
+ elem->payload.operation, \
+ elem->payload.status, \
+ elem->payload.nbytes, \
+ elem->payload.addr, \
+ elem->payload.ncookies);
+
+#else /* !DEBUG */
+#define PR0(...)
+#define PR1(...)
+#define PR2(...)
+
+#define VD_DUMP_DRING_ELEM(elem)
+
+#endif /* DEBUG */
+
+
+typedef struct vds {
+ uint_t initialized; /* driver inst initialization flags */
+ dev_info_t *dip; /* driver inst devinfo pointer */
+ kmutex_t lock; /* lock for this structure */
+ ldi_ident_t ldi_ident; /* driver's identifier for LDI */
+ mod_hash_t *vd_table; /* table of virtual disks served */
+ mdeg_handle_t mdeg; /* handle for MDEG operations */
+} vds_t;
+
+typedef struct vd {
+ uint_t initialized; /* vdisk initialization flags */
+ kmutex_t lock; /* lock for this structure */
+ vds_t *vds; /* server for this vdisk */
+ ddi_taskq_t *taskq; /* taskq for this vdisk */
+ ldi_handle_t ldi_handle[V_NUMPAR]; /* LDI slice handles */
+ dev_t dev[V_NUMPAR]; /* dev numbers for slices */
+ uint_t nslices; /* number for slices */
+ size_t vdisk_size; /* number of blocks in vdisk */
+ vd_disk_type_t vdisk_type; /* slice or entire disk */
+ boolean_t pseudo; /* underlying pseudo dev */
+ struct dk_geom dk_geom; /* synthetic for slice type */
+ struct vtoc vtoc; /* synthetic for slice type */
+ ldc_status_t ldc_state; /* LDC connection state */
+ ldc_handle_t ldc_handle; /* handle for LDC comm */
+ size_t max_msglen; /* largest LDC message len */
+ boolean_t enabled; /* whether vdisk is enabled */
+ vd_state_t state; /* client handshake state */
+ uint8_t xfer_mode; /* transfer mode with client */
+ uint32_t sid; /* client's session ID */
+ uint64_t seq_num; /* message sequence number */
+ uint64_t dring_ident; /* identifier of dring */
+ ldc_dring_handle_t dring_handle; /* handle for dring ops */
+ uint32_t descriptor_size; /* num bytes in desc */
+ uint32_t dring_len; /* number of dring elements */
+ caddr_t dring; /* address of dring */
+} vd_t;
+
+typedef struct vds_operation {
+ uint8_t operation;
+ int (*function)(vd_t *vd, vd_dring_payload_t *request);
+} vds_operation_t;
+
+typedef struct ioctl {
+ uint8_t operation;
+ const char *operation_name;
+ int cmd;
+ const char *cmd_name;
+ uint_t copy;
+ size_t nbytes;
+} ioctl_t;
+
+
+static int vds_ldc_retries = VDS_LDC_RETRIES;
+static void *vds_state;
+static uint64_t vds_operations; /* see vds_operation[] definition below */
+
+static int vd_open_flags = VD_OPEN_FLAGS;
+
+#ifdef DEBUG
+static int vd_msglevel;
+#endif /* DEBUG */
+
+
+static int
+vd_bread(vd_t *vd, vd_dring_payload_t *request)
+{
+ int status;
+ struct buf buf;
+
+ PR1("Read %lu bytes at block %lu", request->nbytes, request->addr);
+ if (request->nbytes == 0)
+ return (EINVAL); /* no service for trivial requests */
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(request->slice < vd->nslices);
+
+ bioinit(&buf);
+ buf.b_flags = B_BUSY | B_READ;
+ buf.b_bcount = request->nbytes;
+ buf.b_un.b_addr = kmem_alloc(buf.b_bcount, KM_SLEEP);
+ buf.b_lblkno = request->addr;
+ buf.b_edev = vd->dev[request->slice];
+
+ if ((status = ldi_strategy(vd->ldi_handle[request->slice], &buf)) == 0)
+ status = biowait(&buf);
+ biofini(&buf);
+ if ((status == 0) &&
+ ((status = ldc_mem_copy(vd->ldc_handle, buf.b_un.b_addr, 0,
+ &request->nbytes, request->cookie, request->ncookies,
+ LDC_COPY_OUT)) != 0)) {
+ PRN("ldc_mem_copy() returned errno %d copying to client",
+ status);
+ }
+ kmem_free(buf.b_un.b_addr, buf.b_bcount); /* nbytes can change */
+ return (status);
+}
+
+static int
+vd_do_bwrite(vd_t *vd, uint_t slice, diskaddr_t block, size_t nbytes,
+ ldc_mem_cookie_t *cookie, uint64_t ncookies, caddr_t data)
+{
+ int status;
+ struct buf buf;
+
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(slice < vd->nslices);
+ ASSERT(nbytes != 0);
+ ASSERT(data != NULL);
+
+ /* Get data from client */
+ if ((status = ldc_mem_copy(vd->ldc_handle, data, 0, &nbytes,
+ cookie, ncookies, LDC_COPY_IN)) != 0) {
+ PRN("ldc_mem_copy() returned errno %d copying from client",
+ status);
+ return (status);
+ }
+
+ bioinit(&buf);
+ buf.b_flags = B_BUSY | B_WRITE;
+ buf.b_bcount = nbytes;
+ buf.b_un.b_addr = data;
+ buf.b_lblkno = block;
+ buf.b_edev = vd->dev[slice];
+
+ if ((status = ldi_strategy(vd->ldi_handle[slice], &buf)) == 0)
+ status = biowait(&buf);
+ biofini(&buf);
+ return (status);
+}
+
+static int
+vd_bwrite(vd_t *vd, vd_dring_payload_t *request)
+{
+ int status;
+ caddr_t data;
+
+
+ PR1("Write %ld bytes at block %lu", request->nbytes, request->addr);
+ if (request->nbytes == 0)
+ return (EINVAL); /* no service for trivial requests */
+ data = kmem_alloc(request->nbytes, KM_SLEEP);
+ status = vd_do_bwrite(vd, request->slice, request->addr,
+ request->nbytes, request->cookie, request->ncookies, data);
+ kmem_free(data, request->nbytes);
+ return (status);
+}
+
+static int
+vd_do_slice_ioctl(vd_t *vd, int cmd, void *buf)
+{
+ switch (cmd) {
+ case DKIOCGGEOM:
+ ASSERT(buf != NULL);
+ bcopy(&vd->dk_geom, buf, sizeof (vd->dk_geom));
+ return (0);
+ case DKIOCGVTOC:
+ ASSERT(buf != NULL);
+ bcopy(&vd->vtoc, buf, sizeof (vd->vtoc));
+ return (0);
+ default:
+ return (ENOTSUP);
+ }
+}
+
+static int
+vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, ioctl_t *ioctl)
+{
+ int rval = 0, status;
+ size_t nbytes = request->nbytes; /* modifiable copy */
+
+
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(request->slice < vd->nslices);
+ PR0("Performing %s", ioctl->operation_name);
+
+ /* Get data from client, if necessary */
+ if (ioctl->copy & VD_COPYIN) {
+ ASSERT(nbytes != 0 && buf != NULL);
+ PR1("Getting \"arg\" data from client");
+ if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes,
+ request->cookie, request->ncookies,
+ LDC_COPY_IN)) != 0) {
+ PRN("ldc_mem_copy() returned errno %d "
+ "copying from client", status);
+ return (status);
+ }
+ }
+
+ /*
+ * Handle single-slice block devices internally; otherwise, have the
+ * real driver perform the ioctl()
+ */
+ if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) {
+ if ((status = vd_do_slice_ioctl(vd, ioctl->cmd, buf)) != 0)
+ return (status);
+ } else if ((status = ldi_ioctl(vd->ldi_handle[request->slice],
+ ioctl->cmd, (intptr_t)buf, FKIOCTL, kcred, &rval)) != 0) {
+ PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status);
+ return (status);
+ }
+#ifdef DEBUG
+ if (rval != 0) {
+ PRN("%s set rval = %d, which is not being returned to client",
+ ioctl->cmd_name, rval);
+ }
+#endif /* DEBUG */
+
+ /* Send data to client, if necessary */
+ if (ioctl->copy & VD_COPYOUT) {
+ ASSERT(nbytes != 0 && buf != NULL);
+ PR1("Sending \"arg\" data to client");
+ if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes,
+ request->cookie, request->ncookies,
+ LDC_COPY_OUT)) != 0) {
+ PRN("ldc_mem_copy() returned errno %d "
+ "copying to client", status);
+ return (status);
+ }
+ }
+
+ return (status);
+}
+
+#define RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t))
+static int
+vd_ioctl(vd_t *vd, vd_dring_payload_t *request)
+{
+ static ioctl_t ioctl[] = {
+ /* Command (no-copy) operations */
+ {VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), DKIOCFLUSHWRITECACHE,
+ STRINGIZE(DKIOCFLUSHWRITECACHE), 0, 0},
+
+ /* "Get" (copy-out) operations */
+ {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), DKIOCGETWCE,
+ STRINGIZE(DKIOCGETWCE), VD_COPYOUT, RNDSIZE(int)},
+ {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), DKIOCGGEOM,
+ STRINGIZE(DKIOCGGEOM), VD_COPYOUT, RNDSIZE(struct dk_geom)},
+ {VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), DKIOCGVTOC,
+ STRINGIZE(DKIOCGVTOC), VD_COPYOUT, RNDSIZE(struct vtoc)},
+
+ /* "Set" (copy-in) operations */
+ {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), DKIOCSETWCE,
+ STRINGIZE(DKIOCSETWCE), VD_COPYOUT, RNDSIZE(int)},
+ {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), DKIOCSGEOM,
+ STRINGIZE(DKIOCSGEOM), VD_COPYIN, RNDSIZE(struct dk_geom)},
+ {VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), DKIOCSVTOC,
+ STRINGIZE(DKIOCSVTOC), VD_COPYIN, RNDSIZE(struct vtoc)},
+
+ /* "Get/set" (copy-in/copy-out) operations */
+ {VD_OP_SCSICMD, STRINGIZE(VD_OP_SCSICMD), USCSICMD,
+ STRINGIZE(USCSICMD), VD_COPYIN|VD_COPYOUT,
+ RNDSIZE(struct uscsi_cmd)}
+
+ };
+ int i, status;
+ void *buf = NULL;
+ size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0]));
+
+
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(request->slice < vd->nslices);
+
+ /*
+ * Determine ioctl corresponding to caller's "operation" and
+ * validate caller's "nbytes"
+ */
+ for (i = 0; i < nioctls; i++) {
+ if (request->operation == ioctl[i].operation) {
+ if (request->nbytes > ioctl[i].nbytes) {
+ PRN("%s: Expected <= %lu \"nbytes\", "
+ "got %lu", ioctl[i].operation_name,
+ ioctl[i].nbytes, request->nbytes);
+ return (EINVAL);
+ } else if ((request->nbytes % sizeof (uint64_t)) != 0) {
+ PRN("%s: nbytes = %lu not a multiple of %lu",
+ ioctl[i].operation_name, request->nbytes,
+ sizeof (uint64_t));
+ return (EINVAL);
+ }
+
+ break;
+ }
+ }
+ ASSERT(i < nioctls); /* because "operation" already validated */
+
+ if (request->nbytes)
+ buf = kmem_zalloc(request->nbytes, KM_SLEEP);
+ status = vd_do_ioctl(vd, request, buf, &ioctl[i]);
+ if (request->nbytes)
+ kmem_free(buf, request->nbytes);
+ return (status);
+}
+
+/*
+ * Define the supported operations once the functions for performing them have
+ * been defined
+ */
+static const vds_operation_t vds_operation[] = {
+ {VD_OP_BREAD, vd_bread},
+ {VD_OP_BWRITE, vd_bwrite},
+ {VD_OP_FLUSH, vd_ioctl},
+ {VD_OP_GET_WCE, vd_ioctl},
+ {VD_OP_SET_WCE, vd_ioctl},
+ {VD_OP_GET_VTOC, vd_ioctl},
+ {VD_OP_SET_VTOC, vd_ioctl},
+ {VD_OP_GET_DISKGEOM, vd_ioctl},
+ {VD_OP_SET_DISKGEOM, vd_ioctl},
+ {VD_OP_SCSICMD, vd_ioctl}
+};
+
+static const size_t vds_noperations =
+ (sizeof (vds_operation))/(sizeof (vds_operation[0]));
+
+/*
+ * Process a request using a defined operation
+ */
+static int
+vd_process_request(vd_t *vd, vd_dring_payload_t *request)
+{
+ int i;
+
+
+ PR1("Entered");
+ ASSERT(mutex_owned(&vd->lock));
+
+ /* Range-check slice */
+ if (request->slice >= vd->nslices) {
+ PRN("Invalid \"slice\" %u (max %u) for virtual disk",
+ request->slice, (vd->nslices - 1));
+ return (EINVAL);
+ }
+
+ /* Perform the requested operation */
+ for (i = 0; i < vds_noperations; i++)
+ if (request->operation == vds_operation[i].operation)
+ return (vds_operation[i].function(vd, request));
+
+ /* No matching operation found */
+ PRN("Unsupported operation %u", request->operation);
+ return (ENOTSUP);
+}
+
+static int
+send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen)
+{
+ int retry, status;
+ size_t nbytes;
+
+
+ for (retry = 0, status = EWOULDBLOCK;
+ retry < vds_ldc_retries && status == EWOULDBLOCK;
+ retry++) {
+ PR1("ldc_write() attempt %d", (retry + 1));
+ nbytes = msglen;
+ status = ldc_write(ldc_handle, msg, &nbytes);
+ }
+
+ if (status != 0) {
+ PRN("ldc_write() returned errno %d", status);
+ return (status);
+ } else if (nbytes != msglen) {
+ PRN("ldc_write() performed only partial write");
+ return (EIO);
+ }
+
+ PR1("SENT %lu bytes", msglen);
+ return (0);
+}
+
+/*
+ * Return 1 if the "type", "subtype", and "env" fields of the "tag" first
+ * argument match the corresponding remaining arguments; otherwise, return 0
+ */
+int
+vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env)
+{
+ return ((tag->vio_msgtype == type) &&
+ (tag->vio_subtype == subtype) &&
+ (tag->vio_subtype_env == env)) ? 1 : 0;
+}
+
+static int
+process_ver_msg(vio_msg_t *msg, size_t msglen)
+{
+ vio_ver_msg_t *ver_msg = (vio_ver_msg_t *)msg;
+
+
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
+ VIO_VER_INFO)) {
+ return (ENOMSG); /* not a version message */
+ }
+
+ if (msglen != sizeof (*ver_msg)) {
+ PRN("Expected %lu-byte version message; "
+ "received %lu bytes", sizeof (*ver_msg), msglen);
+ return (EBADMSG);
+ }
+
+ if (ver_msg->dev_class != VDEV_DISK) {
+ PRN("Expected device class %u (disk); received %u",
+ VDEV_DISK, ver_msg->dev_class);
+ return (EBADMSG);
+ }
+
+ if ((ver_msg->ver_major != VD_VER_MAJOR) ||
+ (ver_msg->ver_minor != VD_VER_MINOR)) {
+ /* Unsupported version; send back supported version */
+ ver_msg->ver_major = VD_VER_MAJOR;
+ ver_msg->ver_minor = VD_VER_MINOR;
+ return (EBADMSG);
+ }
+
+ /* Valid message, version accepted */
+ ver_msg->dev_class = VDEV_DISK_SERVER;
+ return (0);
+}
+
+static int
+vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ vd_attr_msg_t *attr_msg = (vd_attr_msg_t *)msg;
+
+
+ PR0("Entered");
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
+ VIO_ATTR_INFO)) {
+ return (ENOMSG); /* not an attribute message */
+ }
+
+ if (msglen != sizeof (*attr_msg)) {
+ PRN("Expected %lu-byte attribute message; "
+ "received %lu bytes", sizeof (*attr_msg), msglen);
+ return (EBADMSG);
+ }
+
+ if (attr_msg->max_xfer_sz == 0) {
+ PRN("Received maximum transfer size of 0 from client");
+ return (EBADMSG);
+ }
+
+ if ((attr_msg->xfer_mode != VIO_DESC_MODE) &&
+ (attr_msg->xfer_mode != VIO_DRING_MODE)) {
+ PRN("Client requested unsupported transfer mode");
+ return (EBADMSG);
+ }
+
+
+ /* Success: valid message and transfer mode */
+ vd->xfer_mode = attr_msg->xfer_mode;
+ if (vd->xfer_mode == VIO_DESC_MODE) {
+ /*
+ * The vd_dring_inband_msg_t contains one cookie; need room
+ * for up to n-1 more cookies, where "n" is the number of full
+ * pages plus possibly one partial page required to cover
+ * "max_xfer_sz". Add room for one more cookie if
+ * "max_xfer_sz" isn't an integral multiple of the page size.
+ * Must first get the maximum transfer size in bytes.
+ */
+#if 1 /* NEWOBP */
+ size_t max_xfer_bytes = attr_msg->vdisk_block_size ?
+ attr_msg->vdisk_block_size*attr_msg->max_xfer_sz :
+ attr_msg->max_xfer_sz;
+ size_t max_inband_msglen =
+ sizeof (vd_dring_inband_msg_t) +
+ ((max_xfer_bytes/PAGESIZE +
+ ((max_xfer_bytes % PAGESIZE) ? 1 : 0))*
+ (sizeof (ldc_mem_cookie_t)));
+#else /* NEWOBP */
+ size_t max_inband_msglen =
+ sizeof (vd_dring_inband_msg_t) +
+ ((attr_msg->max_xfer_sz/PAGESIZE
+ + (attr_msg->max_xfer_sz % PAGESIZE ? 1 : 0))*
+ (sizeof (ldc_mem_cookie_t)));
+#endif /* NEWOBP */
+
+ /*
+ * Set the maximum expected message length to
+ * accommodate in-band-descriptor messages with all
+ * their cookies
+ */
+ vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen);
+ }
+
+ attr_msg->vdisk_size = vd->vdisk_size;
+ attr_msg->vdisk_type = vd->vdisk_type;
+ attr_msg->operations = vds_operations;
+ PR0("%s", VD_CLIENT(vd));
+ return (0);
+}
+
+static int
+vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ int status;
+ size_t expected;
+ ldc_mem_info_t dring_minfo;
+ vio_dring_reg_msg_t *reg_msg = (vio_dring_reg_msg_t *)msg;
+
+
+ PR0("Entered");
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
+ VIO_DRING_REG)) {
+ return (ENOMSG); /* not a register-dring message */
+ }
+
+ if (msglen < sizeof (*reg_msg)) {
+ PRN("Expected at least %lu-byte register-dring message; "
+ "received %lu bytes", sizeof (*reg_msg), msglen);
+ return (EBADMSG);
+ }
+
+ expected = sizeof (*reg_msg) +
+ (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0]));
+ if (msglen != expected) {
+ PRN("Expected %lu-byte register-dring message; "
+ "received %lu bytes", expected, msglen);
+ return (EBADMSG);
+ }
+
+ if (vd->initialized & VD_DRING) {
+ PRN("A dring was previously registered; only support one");
+ return (EBADMSG);
+ }
+
+ if (reg_msg->ncookies != 1) {
+ /*
+ * In addition to fixing the assertion in the success case
+ * below, supporting drings which require more than one
+ * "cookie" requires increasing the value of vd->max_msglen
+ * somewhere in the code path prior to receiving the message
+ * which results in calling this function. Note that without
+ * making this change, the larger message size required to
+ * accommodate multiple cookies cannot be successfully
+ * received, so this function will not even get called.
+ * Gracefully accommodating more dring cookies might
+ * reasonably demand exchanging an additional attribute or
+ * making a minor protocol adjustment
+ */
+ PRN("reg_msg->ncookies = %u != 1", reg_msg->ncookies);
+ return (EBADMSG);
+ }
+
+ status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie,
+ reg_msg->ncookies, reg_msg->num_descriptors,
+ reg_msg->descriptor_size, LDC_SHADOW_MAP, &vd->dring_handle);
+ if (status != 0) {
+ PRN("ldc_mem_dring_map() returned errno %d", status);
+ return (status);
+ }
+
+ /*
+ * To remove the need for this assertion, must call
+ * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a
+ * successful call to ldc_mem_dring_map()
+ */
+ ASSERT(reg_msg->ncookies == 1);
+
+ if ((status =
+ ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) {
+ PRN("ldc_mem_dring_info() returned errno %d", status);
+ if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)
+ PRN("ldc_mem_dring_unmap() returned errno %d", status);
+ return (status);
+ }
+
+ if (dring_minfo.vaddr == NULL) {
+ PRN("Descriptor ring virtual address is NULL");
+ return (EBADMSG); /* FIXME appropriate status? */
+ }
+
+
+ /* Valid message and dring mapped */
+ PR1("descriptor size = %u, dring length = %u",
+ vd->descriptor_size, vd->dring_len);
+ vd->initialized |= VD_DRING;
+ vd->dring_ident = 1; /* "There Can Be Only One" */
+ vd->dring = dring_minfo.vaddr;
+ vd->descriptor_size = reg_msg->descriptor_size;
+ vd->dring_len = reg_msg->num_descriptors;
+ reg_msg->dring_ident = vd->dring_ident;
+ return (0);
+}
+
+static int
+vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ vio_dring_unreg_msg_t *unreg_msg = (vio_dring_unreg_msg_t *)msg;
+
+
+ PR0("Entered");
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
+ VIO_DRING_UNREG)) {
+ return (ENOMSG); /* not an unregister-dring message */
+ }
+
+ if (msglen != sizeof (*unreg_msg)) {
+ PRN("Expected %lu-byte unregister-dring message; "
+ "received %lu bytes", sizeof (*unreg_msg), msglen);
+ return (EBADMSG);
+ }
+
+ if (unreg_msg->dring_ident != vd->dring_ident) {
+ PRN("Expected dring ident %lu; received %lu",
+ vd->dring_ident, unreg_msg->dring_ident);
+ return (EBADMSG);
+ }
+
+ /* FIXME set ack in unreg_msg? */
+ return (0);
+}
+
+static int
+process_rdx_msg(vio_msg_t *msg, size_t msglen)
+{
+ PR0("Entered");
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX))
+ return (ENOMSG); /* not an RDX message */
+
+ if (msglen != sizeof (vio_rdx_msg_t)) {
+ PRN("Expected %lu-byte RDX message; received %lu bytes",
+ sizeof (vio_rdx_msg_t), msglen);
+ return (EBADMSG);
+ }
+
+ return (0);
+}
+
+static void
+vd_reset_connection(vd_t *vd, boolean_t reset_ldc)
+{
+ int status = 0;
+
+
+ ASSERT(mutex_owned(&vd->lock));
+ PR0("Resetting connection with %s", VD_CLIENT(vd));
+ if ((vd->initialized & VD_DRING) &&
+ ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0))
+ PRN("ldc_mem_dring_unmap() returned errno %d", status);
+ if ((reset_ldc == B_TRUE) &&
+ ((status = ldc_reset(vd->ldc_handle)) != 0))
+ PRN("ldc_reset() returned errno %d", status);
+ vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING);
+ vd->state = VD_STATE_INIT;
+ vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */
+}
+
+static int
+vd_check_seq_num(vd_t *vd, uint64_t seq_num)
+{
+ ASSERT(mutex_owned(&vd->lock));
+ if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) {
+ PRN("Received seq_num %lu; expected %lu",
+ seq_num, (vd->seq_num + 1));
+ vd_reset_connection(vd, B_FALSE);
+ return (1);
+ }
+
+ vd->seq_num = seq_num;
+ vd->initialized |= VD_SEQ_NUM; /* superfluous after first time... */
+ return (0);
+}
+
+/*
+ * Return the expected size of an inband-descriptor message with all the
+ * cookies it claims to include
+ */
+static size_t
+expected_inband_size(vd_dring_inband_msg_t *msg)
+{
+ return ((sizeof (*msg)) +
+ (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0])));
+}
+
+/*
+ * Process an in-band descriptor message: used with clients like OBP, with
+ * which vds exchanges descriptors within VIO message payloads, rather than
+ * operating on them within a descriptor ring
+ */
+static int
+vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ size_t expected;
+ vd_dring_inband_msg_t *desc_msg = (vd_dring_inband_msg_t *)msg;
+
+
+ PR1("Entered");
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO,
+ VIO_DESC_DATA))
+ return (ENOMSG); /* not an in-band-descriptor message */
+
+ if (msglen < sizeof (*desc_msg)) {
+ PRN("Expected at least %lu-byte descriptor message; "
+ "received %lu bytes", sizeof (*desc_msg), msglen);
+ return (EBADMSG);
+ }
+
+ if (msglen != (expected = expected_inband_size(desc_msg))) {
+ PRN("Expected %lu-byte descriptor message; "
+ "received %lu bytes", expected, msglen);
+ return (EBADMSG);
+ }
+
+ if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0) {
+ return (EBADMSG);
+ }
+
+ /* Valid message; process the request */
+ desc_msg->payload.status = vd_process_request(vd, &desc_msg->payload);
+ return (0);
+}
+
+static boolean_t
+vd_accept_dring_elems(vd_t *vd, uint32_t start, uint32_t ndesc)
+{
+ uint32_t i, n;
+
+
+ /* Check descriptor states */
+ for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) {
+ if (VD_DRING_ELEM(i)->hdr.dstate != VIO_DESC_READY) {
+ PRN("descriptor %u not ready", i);
+ VD_DUMP_DRING_ELEM(VD_DRING_ELEM(i));
+ return (B_FALSE);
+ }
+ }
+
+ /* Descriptors are valid; accept them */
+ for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len)
+ VD_DRING_ELEM(i)->hdr.dstate = VIO_DESC_ACCEPTED;
+
+ return (B_TRUE);
+}
+
+static int
+vd_process_dring(vd_t *vd, uint32_t start, uint32_t end)
+{
+ int status;
+ boolean_t accepted;
+ uint32_t i, io_status, n, ndesc;
+
+
+ ASSERT(mutex_owned(&vd->lock));
+ PR1("start = %u, end = %u", start, end);
+
+ /* Validate descriptor range */
+ if ((start >= vd->dring_len) || (end >= vd->dring_len)) {
+ PRN("\"start\" = %u, \"end\" = %u; both must be less than %u",
+ start, end, vd->dring_len);
+ return (EINVAL);
+ }
+
+ /* Acquire updated dring elements */
+ if ((status = ldc_mem_dring_acquire(vd->dring_handle,
+ start, end)) != 0) {
+ PRN("ldc_mem_dring_acquire() returned errno %d", status);
+ return (status);
+ }
+ /* Accept updated dring elements */
+ ndesc = ((end < start) ? end + vd->dring_len : end) - start + 1;
+ PR1("ndesc = %u", ndesc);
+ accepted = vd_accept_dring_elems(vd, start, ndesc);
+ /* Release dring elements */
+ if ((status = ldc_mem_dring_release(vd->dring_handle,
+ start, end)) != 0) {
+ PRN("ldc_mem_dring_release() returned errno %d", status);
+ return (status);
+ }
+ /* If a descriptor was in the wrong state, return an error */
+ if (!accepted)
+ return (EINVAL);
+
+
+ /* Process accepted dring elements */
+ for (n = ndesc, i = start; n > 0; n--, i = (i + 1) % vd->dring_len) {
+ vd_dring_entry_t *elem = VD_DRING_ELEM(i);
+
+ /* Process descriptor outside acquire/release bracket */
+ PR1("Processing dring element %u", i);
+ io_status = vd_process_request(vd, &elem->payload);
+
+ /* Re-acquire client's dring element */
+ if ((status = ldc_mem_dring_acquire(vd->dring_handle,
+ i, i)) != 0) {
+ PRN("ldc_mem_dring_acquire() returned errno %d",
+ status);
+ return (status);
+ }
+ /* Update processed element */
+ if (elem->hdr.dstate == VIO_DESC_ACCEPTED) {
+ elem->payload.status = io_status;
+ elem->hdr.dstate = VIO_DESC_DONE;
+ } else {
+ /* Perhaps client timed out waiting for I/O... */
+ accepted = B_FALSE;
+ PRN("element %u no longer \"accepted\"", i);
+ VD_DUMP_DRING_ELEM(elem);
+ }
+ /* Release updated processed element */
+ if ((status = ldc_mem_dring_release(vd->dring_handle,
+ i, i)) != 0) {
+ PRN("ldc_mem_dring_release() returned errno %d",
+ status);
+ return (status);
+ }
+ /* If the descriptor was in the wrong state, return an error */
+ if (!accepted)
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static int
+vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ vio_dring_msg_t *dring_msg = (vio_dring_msg_t *)msg;
+
+
+ PR1("Entered");
+ ASSERT(mutex_owned(&vd->lock));
+ ASSERT(msglen >= sizeof (msg->tag));
+
+ if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO,
+ VIO_DRING_DATA)) {
+ return (ENOMSG); /* not a dring-data message */
+ }
+
+ if (msglen != sizeof (*dring_msg)) {
+ PRN("Expected %lu-byte dring message; received %lu bytes",
+ sizeof (*dring_msg), msglen);
+ return (EBADMSG);
+ }
+
+ if (vd_check_seq_num(vd, dring_msg->seq_num) != 0) {
+ return (EBADMSG);
+ }
+
+ if (dring_msg->dring_ident != vd->dring_ident) {
+ PRN("Expected dring ident %lu; received ident %lu",
+ vd->dring_ident, dring_msg->dring_ident);
+ return (EBADMSG);
+ }
+
+
+ /* Valid message; process dring */
+ dring_msg->tag.vio_subtype = VIO_SUBTYPE_ACK;
+ return (vd_process_dring(vd, dring_msg->start_idx, dring_msg->end_idx));
+}
+
+static int
+recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes)
+{
+ int retry, status;
+ size_t size = *nbytes;
+ boolean_t isempty = B_FALSE;
+
+
+ /* FIXME work around interrupt problem */
+ if ((ldc_chkq(ldc_handle, &isempty) != 0) || isempty)
+ return (ENOMSG);
+
+ for (retry = 0, status = ETIMEDOUT;
+ retry < vds_ldc_retries && status == ETIMEDOUT;
+ retry++) {
+ PR1("ldc_read() attempt %d", (retry + 1));
+ *nbytes = size;
+ status = ldc_read(ldc_handle, msg, nbytes);
+ }
+
+ if (status != 0) {
+ PRN("ldc_read() returned errno %d", status);
+ return (status);
+ } else if (*nbytes == 0) {
+ PR1("ldc_read() returned 0 and no message read");
+ return (ENOMSG);
+ }
+
+ PR1("RCVD %lu-byte message", *nbytes);
+ return (0);
+}
+
+static int
+vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ int status;
+
+
+ PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype,
+ msg->tag.vio_subtype, msg->tag.vio_subtype_env);
+ ASSERT(mutex_owned(&vd->lock));
+
+ /*
+ * Validate session ID up front, since it applies to all messages
+ * once set
+ */
+ if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) {
+ PRN("Expected SID %u, received %u", vd->sid,
+ msg->tag.vio_sid);
+ return (EBADMSG);
+ }
+
+
+ /*
+ * Process the received message based on connection state
+ */
+ switch (vd->state) {
+ case VD_STATE_INIT: /* expect version message */
+ if ((status = process_ver_msg(msg, msglen)) != 0)
+ return (status);
+
+ /* The first version message sets the SID */
+ ASSERT(!(vd->initialized & VD_SID));
+ vd->sid = msg->tag.vio_sid;
+ vd->initialized |= VD_SID;
+
+ /* Version negotiated, move to that state */
+ vd->state = VD_STATE_VER;
+ return (0);
+
+ case VD_STATE_VER: /* expect attribute message */
+ if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0)
+ return (status);
+
+ /* Attributes exchanged, move to that state */
+ vd->state = VD_STATE_ATTR;
+ return (0);
+
+ case VD_STATE_ATTR:
+ switch (vd->xfer_mode) {
+ case VIO_DESC_MODE: /* expect RDX message */
+ if ((status = process_rdx_msg(msg, msglen)) != 0)
+ return (status);
+
+ /* Ready to receive in-band descriptors */
+ vd->state = VD_STATE_DATA;
+ return (0);
+
+ case VIO_DRING_MODE: /* expect register-dring message */
+ if ((status =
+ vd_process_dring_reg_msg(vd, msg, msglen)) != 0)
+ return (status);
+
+ /* One dring negotiated, move to that state */
+ vd->state = VD_STATE_DRING;
+ return (0);
+
+ default:
+ ASSERT("Unsupported transfer mode");
+ PRN("Unsupported transfer mode");
+ return (ENOTSUP);
+ }
+
+ case VD_STATE_DRING: /* expect RDX, register-dring, or unreg-dring */
+ if ((status = process_rdx_msg(msg, msglen)) == 0) {
+ /* Ready to receive data */
+ vd->state = VD_STATE_DATA;
+ return (0);
+ } else if (status != ENOMSG) {
+ return (status);
+ }
+
+
+ /*
+ * If another register-dring message is received, stay in
+ * dring state in case the client sends RDX; although the
+ * protocol allows multiple drings, this server does not
+ * support using more than one
+ */
+ if ((status =
+ vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG)
+ return (status);
+
+ /*
+ * Acknowledge an unregister-dring message, but reset the
+ * connection anyway: Although the protocol allows
+ * unregistering drings, this server cannot serve a vdisk
+ * without its only dring
+ */
+ status = vd_process_dring_unreg_msg(vd, msg, msglen);
+ return ((status == 0) ? ENOTSUP : status);
+
+ case VD_STATE_DATA:
+ switch (vd->xfer_mode) {
+ case VIO_DESC_MODE: /* expect in-band-descriptor message */
+ return (vd_process_desc_msg(vd, msg, msglen));
+
+ case VIO_DRING_MODE: /* expect dring-data or unreg-dring */
+ /*
+ * Typically expect dring-data messages, so handle
+ * them first
+ */
+ if ((status = vd_process_dring_msg(vd, msg,
+ msglen)) != ENOMSG)
+ return (status);
+
+ /*
+ * Acknowledge an unregister-dring message, but reset
+ * the connection anyway: Although the protocol
+ * allows unregistering drings, this server cannot
+ * serve a vdisk without its only dring
+ */
+ status = vd_process_dring_unreg_msg(vd, msg, msglen);
+ return ((status == 0) ? ENOTSUP : status);
+
+ default:
+ ASSERT("Unsupported transfer mode");
+ PRN("Unsupported transfer mode");
+ return (ENOTSUP);
+ }
+
+ default:
+ ASSERT("Invalid client connection state");
+ PRN("Invalid client connection state");
+ return (ENOTSUP);
+ }
+}
+
+static void
+vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
+{
+ int status;
+ boolean_t reset_ldc = B_FALSE;
+
+
+ ASSERT(mutex_owned(&vd->lock));
+
+ /*
+ * Check that the message is at least big enough for a "tag", so that
+ * message processing can proceed based on tag-specified message type
+ */
+ if (msglen < sizeof (vio_msg_tag_t)) {
+ PRN("Received short (%lu-byte) message", msglen);
+ /* Can't "nack" short message, so drop the big hammer */
+ vd_reset_connection(vd, B_TRUE);
+ return;
+ }
+
+ /*
+ * Process the message
+ */
+ switch (status = vd_do_process_msg(vd, msg, msglen)) {
+ case 0:
+ /* "ack" valid, successfully-processed messages */
+ msg->tag.vio_subtype = VIO_SUBTYPE_ACK;
+ break;
+
+ case ENOMSG:
+ PRN("Received unexpected message");
+ _NOTE(FALLTHROUGH);
+ case EBADMSG:
+ case ENOTSUP:
+ /* "nack" invalid messages */
+ msg->tag.vio_subtype = VIO_SUBTYPE_NACK;
+ break;
+
+ default:
+ /* "nack" failed messages */
+ msg->tag.vio_subtype = VIO_SUBTYPE_NACK;
+ /* An LDC error probably occurred, so try resetting it */
+ reset_ldc = B_TRUE;
+ break;
+ }
+
+ /* "ack" or "nack" the message */
+ PR1("Sending %s",
+ (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK");
+ if (send_msg(vd->ldc_handle, msg, msglen) != 0)
+ reset_ldc = B_TRUE;
+
+ /* Reset the connection for nack'ed or failed messages */
+ if ((status != 0) || reset_ldc)
+ vd_reset_connection(vd, reset_ldc);
+}
+
+static void
+vd_process_queue(void *arg)
+{
+ vd_t *vd = (vd_t *)arg;
+ size_t max_msglen, nbytes;
+ vio_msg_t *vio_msg;
+
+
+ PR2("Entered");
+ ASSERT(vd != NULL);
+ mutex_enter(&vd->lock);
+ max_msglen = vd->max_msglen; /* vd->maxmsglen can change */
+ vio_msg = kmem_alloc(max_msglen, KM_SLEEP);
+ for (nbytes = vd->max_msglen;
+ vd->enabled && recv_msg(vd->ldc_handle, vio_msg, &nbytes) == 0;
+ nbytes = vd->max_msglen)
+ vd_process_msg(vd, vio_msg, nbytes);
+ kmem_free(vio_msg, max_msglen);
+ mutex_exit(&vd->lock);
+ PR2("Returning");
+}
+
+static uint_t
+vd_handle_ldc_events(uint64_t event, caddr_t arg)
+{
+ uint_t status;
+ vd_t *vd = (vd_t *)(void *)arg;
+
+
+ ASSERT(vd != NULL);
+ mutex_enter(&vd->lock);
+ if (event & LDC_EVT_READ) {
+ PR1("New packet(s) available");
+ /* Queue a task to process the new data */
+ if (ddi_taskq_dispatch(vd->taskq, vd_process_queue, vd, 0) !=
+ DDI_SUCCESS)
+ PRN("Unable to dispatch vd_process_queue()");
+ } else if (event & LDC_EVT_RESET) {
+ PR0("Attempting to bring up reset channel");
+ if (((status = ldc_up(vd->ldc_handle)) != 0) &&
+ (status != ECONNREFUSED)) {
+ PRN("ldc_up() returned errno %d", status);
+ }
+ } else if (event & LDC_EVT_UP) {
+ /* Reset the connection state when channel comes (back) up */
+ vd_reset_connection(vd, B_FALSE);
+ }
+ mutex_exit(&vd->lock);
+ return (LDC_SUCCESS);
+}
+
+static uint_t
+vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+ _NOTE(ARGUNUSED(key, val))
+ (*((uint_t *)arg))++;
+ return (MH_WALK_TERMINATE);
+}
+
+
+static int
+vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ uint_t vd_present = 0;
+ minor_t instance;
+ vds_t *vds;
+
+
+ PR0("Entered");
+ switch (cmd) {
+ case DDI_DETACH:
+ /* the real work happens below */
+ break;
+ case DDI_SUSPEND:
+ /* nothing to do for this non-device */
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ ASSERT(cmd == DDI_DETACH);
+ instance = ddi_get_instance(dip);
+ if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) {
+ PRN("Could not get state for instance %u", instance);
+ ddi_soft_state_free(vds_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ /* Do no detach when serving any vdisks */
+ mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present);
+ if (vd_present) {
+ PR0("Not detaching because serving vdisks");
+ return (DDI_FAILURE);
+ }
+
+ PR0("Detaching");
+ if (vds->initialized & VDS_MDEG)
+ (void) mdeg_unregister(vds->mdeg);
+ if (vds->initialized & VDS_LDI)
+ (void) ldi_ident_release(vds->ldi_ident);
+ mod_hash_destroy_hash(vds->vd_table);
+ if (vds->initialized & VDS_LOCKING)
+ mutex_destroy(&vds->lock);
+ ddi_soft_state_free(vds_state, instance);
+ return (DDI_SUCCESS);
+}
+
+static boolean_t
+is_pseudo_device(dev_info_t *dip)
+{
+ dev_info_t *parent, *root = ddi_root_node();
+
+
+ for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root);
+ parent = ddi_get_parent(parent)) {
+ if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static int
+vd_get_params(ldi_handle_t lh, char *block_device, vd_t *vd)
+{
+ int otyp, rval, status;
+ dev_info_t *dip;
+ struct dk_cinfo dk_cinfo;
+
+
+ /* Get block device's device number, otyp, and size */
+ if ((status = ldi_get_dev(lh, &vd->dev[0])) != 0) {
+ PRN("ldi_get_dev() returned errno %d for %s",
+ status, block_device);
+ return (status);
+ }
+ if ((status = ldi_get_otyp(lh, &otyp)) != 0) {
+ PRN("ldi_get_otyp() returned errno %d for %s",
+ status, block_device);
+ return (status);
+ }
+ if (otyp != OTYP_BLK) {
+ PRN("Cannot serve non-block device %s", block_device);
+ return (ENOTBLK);
+ }
+ if (ldi_get_size(lh, &vd->vdisk_size) != DDI_SUCCESS) {
+ PRN("ldi_get_size() failed for %s", block_device);
+ return (EIO);
+ }
+
+ /* Determine if backing block device is a pseudo device */
+ if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]),
+ dev_to_instance(vd->dev[0]), 0)) == NULL) {
+ PRN("%s is no longer accessible", block_device);
+ return (EIO);
+ }
+ vd->pseudo = is_pseudo_device(dip);
+ ddi_release_devi(dip);
+ if (vd->pseudo) {
+ vd->vdisk_type = VD_DISK_TYPE_SLICE;
+ vd->nslices = 1;
+ return (0); /* ...and we're done */
+ }
+
+ /* Get dk_cinfo to determine slice of backing block device */
+ if ((status = ldi_ioctl(lh, DKIOCINFO, (intptr_t)&dk_cinfo,
+ FKIOCTL, kcred, &rval)) != 0) {
+ PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s",
+ status, block_device);
+ return (status);
+ }
+
+ if (dk_cinfo.dki_partition >= V_NUMPAR) {
+ PRN("slice %u >= maximum slice %u for %s",
+ dk_cinfo.dki_partition, V_NUMPAR, block_device);
+ return (EIO);
+ }
+
+ /* If block device slice is entire disk, fill in all slice devices */
+ if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE) {
+ uint_t slice;
+ major_t major = getmajor(vd->dev[0]);
+ minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE;
+
+ vd->vdisk_type = VD_DISK_TYPE_DISK;
+ vd->nslices = V_NUMPAR;
+ for (slice = 0; slice < vd->nslices; slice++)
+ vd->dev[slice] = makedevice(major, (minor + slice));
+ return (0); /* ...and we're done */
+ }
+
+ /* Otherwise, we have a (partial) slice of a block device */
+ vd->vdisk_type = VD_DISK_TYPE_SLICE;
+ vd->nslices = 1;
+
+
+ /* Initialize dk_geom structure for single-slice block device */
+ if ((status = ldi_ioctl(lh, DKIOCGGEOM, (intptr_t)&vd->dk_geom,
+ FKIOCTL, kcred, &rval)) != 0) {
+ PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s",
+ status, block_device);
+ return (status);
+ }
+ if (vd->dk_geom.dkg_nsect == 0) {
+ PRN("%s geometry claims 0 sectors per track", block_device);
+ return (EIO);
+ }
+ if (vd->dk_geom.dkg_nhead == 0) {
+ PRN("%s geometry claims 0 heads", block_device);
+ return (EIO);
+ }
+ vd->dk_geom.dkg_ncyl =
+ lbtodb(vd->vdisk_size)/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead;
+ vd->dk_geom.dkg_acyl = 0;
+ vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl;
+
+
+ /* Initialize vtoc structure for single-slice block device */
+ if ((status = ldi_ioctl(lh, DKIOCGVTOC, (intptr_t)&vd->vtoc,
+ FKIOCTL, kcred, &rval)) != 0) {
+ PRN("ldi_ioctl(DKIOCGVTOC) returned errno %d for %s",
+ status, block_device);
+ return (status);
+ }
+ bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume,
+ MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume)));
+ bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part));
+ vd->vtoc.v_nparts = 1;
+ vd->vtoc.v_part[0].p_tag = V_UNASSIGNED;
+ vd->vtoc.v_part[0].p_flag = 0;
+ vd->vtoc.v_part[0].p_start = 0;
+ vd->vtoc.v_part[0].p_size = lbtodb(vd->vdisk_size);
+ bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel,
+ MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel)));
+
+
+ return (0);
+}
+
+static int
+vds_do_init_vd(vds_t *vds, uint64_t id, char *block_device, uint64_t ldc_id,
+ vd_t **vdp)
+{
+ char tq_name[TASKQ_NAMELEN];
+ int param_status, status;
+ uint_t slice;
+ ddi_iblock_cookie_t iblock = NULL;
+ ldc_attr_t ldc_attr;
+ ldi_handle_t lh = NULL;
+ vd_t *vd;
+
+
+ ASSERT(vds != NULL);
+ ASSERT(block_device != NULL);
+ ASSERT(vdp != NULL);
+ PR0("Adding vdisk for %s", block_device);
+
+ if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) {
+ PRN("No memory for virtual disk");
+ return (EAGAIN);
+ }
+ *vdp = vd; /* assign here so vds_destroy_vd() can cleanup later */
+ vd->vds = vds;
+
+
+ /* Get device parameters */
+ if ((status = ldi_open_by_name(block_device, FREAD, kcred, &lh,
+ vds->ldi_ident)) != 0) {
+ PRN("ldi_open_by_name(%s) = errno %d", block_device, status);
+ return (status);
+ }
+ param_status = vd_get_params(lh, block_device, vd);
+ if ((status = ldi_close(lh, FREAD, kcred)) != 0) {
+ PRN("ldi_close(%s) = errno %d", block_device, status);
+ return (status);
+ }
+ if (param_status != 0)
+ return (param_status);
+ ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR);
+ PR0("vdisk_type = %s, pseudo = %s, nslices = %u",
+ ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"),
+ (vd->pseudo ? "yes" : "no"), vd->nslices);
+
+
+ /* Initialize locking */
+ if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED,
+ &iblock) != DDI_SUCCESS) {
+ PRN("Could not get iblock cookie.");
+ return (EIO);
+ }
+
+ mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock);
+ vd->initialized |= VD_LOCKING;
+
+
+ /* Open the backing-device slices */
+ for (slice = 0; slice < vd->nslices; slice++) {
+ ASSERT(vd->ldi_handle[slice] == NULL);
+ PR0("Opening device %u, minor %u = slice %u",
+ getmajor(vd->dev[slice]), getminor(vd->dev[slice]), slice);
+ if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK,
+ vd_open_flags, kcred, &vd->ldi_handle[slice],
+ vds->ldi_ident)) != 0) {
+ PRN("ldi_open_by_dev() returned errno %d for slice %u",
+ status, slice);
+ /* vds_destroy_vd() will close any open slices */
+#if 0 /* FIXME */
+ return (status);
+#endif
+ }
+ }
+
+
+ /* Create the task queue for the vdisk */
+ (void) snprintf(tq_name, sizeof (tq_name), "vd%lu", id);
+ PR1("tq_name = %s", tq_name);
+ if ((vd->taskq = ddi_taskq_create(vds->dip, tq_name, 1,
+ TASKQ_DEFAULTPRI, 0)) == NULL) {
+ PRN("Could not create task queue");
+ return (EIO);
+ }
+ vd->initialized |= VD_TASKQ;
+ vd->enabled = 1; /* before callback can dispatch to taskq */
+
+
+ /* Bring up LDC */
+ ldc_attr.devclass = LDC_DEV_BLK_SVC;
+ ldc_attr.instance = ddi_get_instance(vds->dip);
+ ldc_attr.mode = LDC_MODE_UNRELIABLE;
+ ldc_attr.qlen = VD_LDC_QLEN;
+ if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) {
+ PRN("ldc_init(%lu) = errno %d", ldc_id, status);
+ return (status);
+ }
+ vd->initialized |= VD_LDC;
+
+ if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events,
+ (caddr_t)vd)) != 0) {
+ PRN("ldc_reg_callback() returned errno %d", status);
+ return (status);
+ }
+
+ if ((status = ldc_open(vd->ldc_handle)) != 0) {
+ PRN("ldc_open() returned errno %d", status);
+ return (status);
+ }
+
+ if (((status = ldc_up(vd->ldc_handle)) != 0) &&
+ (status != ECONNREFUSED)) {
+ PRN("ldc_up() returned errno %d", status);
+ return (status);
+ }
+
+
+ /* Add the successfully-initialized vdisk to the server's table */
+ if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) {
+ PRN("Error adding vdisk ID %lu to table", id);
+ return (EIO);
+ }
+
+ return (0);
+}
+
+/*
+ * Destroy the state associated with a virtual disk
+ */
+static void
+vds_destroy_vd(void *arg)
+{
+ vd_t *vd = (vd_t *)arg;
+
+
+ PR0("Entered");
+ if (vd == NULL)
+ return;
+
+ /* Disable queuing requests for the vdisk */
+ if (vd->initialized & VD_LOCKING) {
+ mutex_enter(&vd->lock);
+ vd->enabled = 0;
+ mutex_exit(&vd->lock);
+ }
+
+ /* Drain and destroy the task queue (*before* shutting down LDC) */
+ if (vd->initialized & VD_TASKQ)
+ ddi_taskq_destroy(vd->taskq); /* waits for queued tasks */
+
+ /* Shut down LDC */
+ if (vd->initialized & VD_LDC) {
+ if (vd->initialized & VD_DRING)
+ (void) ldc_mem_dring_unmap(vd->dring_handle);
+ (void) ldc_unreg_callback(vd->ldc_handle);
+ (void) ldc_close(vd->ldc_handle);
+ (void) ldc_fini(vd->ldc_handle);
+ }
+
+ /* Close any open backing-device slices */
+ for (uint_t slice = 0; slice < vd->nslices; slice++) {
+ if (vd->ldi_handle[slice] != NULL) {
+ PR0("Closing slice %u", slice);
+ (void) ldi_close(vd->ldi_handle[slice],
+ vd_open_flags, kcred);
+ }
+ }
+
+ /* Free lock */
+ if (vd->initialized & VD_LOCKING)
+ mutex_destroy(&vd->lock);
+
+ /* Finally, free the vdisk structure itself */
+ kmem_free(vd, sizeof (*vd));
+}
+
+static int
+vds_init_vd(vds_t *vds, uint64_t id, char *block_device, uint64_t ldc_id)
+{
+ int status;
+ vd_t *vd = NULL;
+
+
+#ifdef lint
+ (void) vd;
+#endif /* lint */
+
+ if ((status = vds_do_init_vd(vds, id, block_device, ldc_id, &vd)) != 0)
+ vds_destroy_vd(vd);
+
+ return (status);
+}
+
+static int
+vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel,
+ uint64_t *ldc_id)
+{
+ int num_channels;
+
+
+ /* Look for channel endpoint child(ren) of the vdisk MD node */
+ if ((num_channels = md_scan_dag(md, vd_node,
+ md_find_name(md, VD_CHANNEL_ENDPOINT),
+ md_find_name(md, "fwd"), channel)) <= 0) {
+ PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT);
+ return (-1);
+ }
+
+ /* Get the "id" value for the first channel endpoint node */
+ if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) {
+ PRN("No \"%s\" property found for \"%s\" of vdisk",
+ VD_ID_PROP, VD_CHANNEL_ENDPOINT);
+ return (-1);
+ }
+
+ if (num_channels > 1) {
+ PRN("Using ID of first of multiple channels for this vdisk");
+ }
+
+ return (0);
+}
+
+static int
+vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id)
+{
+ int num_nodes, status;
+ size_t size;
+ mde_cookie_t *channel;
+
+
+ if ((num_nodes = md_node_count(md)) <= 0) {
+ PRN("Invalid node count in Machine Description subtree");
+ return (-1);
+ }
+ size = num_nodes*(sizeof (*channel));
+ channel = kmem_zalloc(size, KM_SLEEP);
+ status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id);
+ kmem_free(channel, size);
+
+ return (status);
+}
+
+static void
+vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node)
+{
+ char *block_device = NULL;
+ uint64_t id = 0, ldc_id = 0;
+
+
+ if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) {
+ PRN("Error getting vdisk \"%s\"", VD_ID_PROP);
+ return;
+ }
+ PR0("Adding vdisk ID %lu", id);
+ if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP,
+ &block_device) != 0) {
+ PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP);
+ return;
+ }
+
+ if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) {
+ PRN("Error getting LDC ID for vdisk %lu", id);
+ return;
+ }
+
+ if (vds_init_vd(vds, id, block_device, ldc_id) != 0) {
+ PRN("Failed to add vdisk ID %lu", id);
+ return;
+ }
+}
+
+static void
+vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node)
+{
+ uint64_t id = 0;
+
+
+ if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) {
+ PRN("Unable to get \"%s\" property from vdisk's MD node",
+ VD_ID_PROP);
+ return;
+ }
+ PR0("Removing vdisk ID %lu", id);
+ if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0)
+ PRN("No vdisk entry found for vdisk ID %lu", id);
+}
+
+static void
+vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node,
+ md_t *curr_md, mde_cookie_t curr_vd_node)
+{
+ char *curr_dev, *prev_dev;
+ uint64_t curr_id = 0, curr_ldc_id = 0;
+ uint64_t prev_id = 0, prev_ldc_id = 0;
+ size_t len;
+
+
+ /* Validate that vdisk ID has not changed */
+ if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) {
+ PRN("Error getting previous vdisk \"%s\" property",
+ VD_ID_PROP);
+ return;
+ }
+ if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) {
+ PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP);
+ return;
+ }
+ if (curr_id != prev_id) {
+ PRN("Not changing vdisk: ID changed from %lu to %lu",
+ prev_id, curr_id);
+ return;
+ }
+
+ /* Validate that LDC ID has not changed */
+ if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) {
+ PRN("Error getting LDC ID for vdisk %lu", prev_id);
+ return;
+ }
+
+ if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) {
+ PRN("Error getting LDC ID for vdisk %lu", curr_id);
+ return;
+ }
+ if (curr_ldc_id != prev_ldc_id) {
+ _NOTE(NOTREACHED); /* FIXME is there a better way? */
+ PRN("Not changing vdisk: "
+ "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id);
+ return;
+ }
+
+ /* Determine whether device path has changed */
+ if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP,
+ &prev_dev) != 0) {
+ PRN("Error getting previous vdisk \"%s\"",
+ VD_BLOCK_DEVICE_PROP);
+ return;
+ }
+ if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP,
+ &curr_dev) != 0) {
+ PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP);
+ return;
+ }
+ if (((len = strlen(curr_dev)) == strlen(prev_dev)) &&
+ (strncmp(curr_dev, prev_dev, len) == 0))
+ return; /* no relevant (supported) change */
+
+ PR0("Changing vdisk ID %lu", prev_id);
+ /* Remove old state, which will close vdisk and reset */
+ if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0)
+ PRN("No entry found for vdisk ID %lu", prev_id);
+ /* Re-initialize vdisk with new state */
+ if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) {
+ PRN("Failed to change vdisk ID %lu", curr_id);
+ return;
+ }
+}
+
+static int
+vds_process_md(void *arg, mdeg_result_t *md)
+{
+ int i;
+ vds_t *vds = arg;
+
+
+ if (md == NULL)
+ return (MDEG_FAILURE);
+ ASSERT(vds != NULL);
+
+ for (i = 0; i < md->removed.nelem; i++)
+ vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]);
+ for (i = 0; i < md->match_curr.nelem; i++)
+ vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i],
+ md->match_curr.mdp, md->match_curr.mdep[i]);
+ for (i = 0; i < md->added.nelem; i++)
+ vds_add_vd(vds, md->added.mdp, md->added.mdep[i]);
+
+ return (MDEG_SUCCESS);
+}
+
+static int
+vds_do_attach(dev_info_t *dip)
+{
+ static char reg_prop[] = "reg"; /* devinfo ID prop */
+
+ /* MDEG specification for a (particular) vds node */
+ static mdeg_prop_spec_t vds_prop_spec[] = {
+ {MDET_PROP_STR, "name", {VDS_NAME}},
+ {MDET_PROP_VAL, "cfg-handle", {0}},
+ {MDET_LIST_END, NULL, {0}}};
+ static mdeg_node_spec_t vds_spec = {"virtual-device", vds_prop_spec};
+
+ /* MDEG specification for matching a vd node */
+ static md_prop_match_t vd_prop_spec[] = {
+ {MDET_PROP_VAL, VD_ID_PROP},
+ {MDET_LIST_END, NULL}};
+ static mdeg_node_match_t vd_spec = {"virtual-device-port",
+ vd_prop_spec};
+
+ int status;
+ uint64_t cfg_handle;
+ minor_t instance = ddi_get_instance(dip);
+ vds_t *vds;
+
+
+ /*
+ * The "cfg-handle" property of a vds node in an MD contains the MD's
+ * notion of "instance", or unique identifier, for that node; OBP
+ * stores the value of the "cfg-handle" MD property as the value of
+ * the "reg" property on the node in the device tree it builds from
+ * the MD and passes to Solaris. Thus, we look up the devinfo node's
+ * "reg" property value to uniquely identify this device instance when
+ * registering with the MD event-generation framework. If the "reg"
+ * property cannot be found, the device tree state is presumably so
+ * broken that there is no point in continuing.
+ */
+ if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, reg_prop)) {
+ PRN("vds \"%s\" property does not exist", reg_prop);
+ return (DDI_FAILURE);
+ }
+
+ /* Get the MD instance for later MDEG registration */
+ cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ reg_prop, -1);
+
+ if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) {
+ PRN("Could not allocate state for instance %u", instance);
+ return (DDI_FAILURE);
+ }
+
+ if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) {
+ PRN("Could not get state for instance %u", instance);
+ ddi_soft_state_free(vds_state, instance);
+ return (DDI_FAILURE);
+ }
+
+
+ vds->dip = dip;
+ vds->vd_table = mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS,
+ vds_destroy_vd,
+ sizeof (void *));
+ ASSERT(vds->vd_table != NULL);
+
+ mutex_init(&vds->lock, NULL, MUTEX_DRIVER, NULL);
+ vds->initialized |= VDS_LOCKING;
+
+ if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) {
+ PRN("ldi_ident_from_dip() returned errno %d", status);
+ return (DDI_FAILURE);
+ }
+ vds->initialized |= VDS_LDI;
+
+ /* Register for MD updates */
+ vds_prop_spec[1].ps_val = cfg_handle;
+ if (mdeg_register(&vds_spec, &vd_spec, vds_process_md, vds,
+ &vds->mdeg) != MDEG_SUCCESS) {
+ PRN("Unable to register for MD updates");
+ return (DDI_FAILURE);
+ }
+ vds->initialized |= VDS_MDEG;
+
+ ddi_report_dev(dip);
+ return (DDI_SUCCESS);
+}
+
+static int
+vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int status;
+
+ PR0("Entered");
+ switch (cmd) {
+ case DDI_ATTACH:
+ if ((status = vds_do_attach(dip)) != DDI_SUCCESS)
+ (void) vds_detach(dip, DDI_DETACH);
+ return (status);
+ case DDI_RESUME:
+ /* nothing to do for this non-device */
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+}
+
+static struct dev_ops vds_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ ddi_no_info, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ vds_attach, /* devo_attach */
+ vds_detach, /* devo_detach */
+ nodev, /* devo_reset */
+ NULL, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ nulldev /* devo_power */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "virtual disk server v%I%",
+ &vds_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+
+int
+_init(void)
+{
+ int i, status;
+
+
+ PR0("Built %s %s", __DATE__, __TIME__);
+ if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0)
+ return (status);
+ if ((status = mod_install(&modlinkage)) != 0) {
+ ddi_soft_state_fini(&vds_state);
+ return (status);
+ }
+
+ /* Fill in the bit-mask of server-supported operations */
+ for (i = 0; i < vds_noperations; i++)
+ vds_operations |= 1 << (vds_operation[i].operation - 1);
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int status;
+
+
+ PR0("Entered");
+ if ((status = mod_remove(&modlinkage)) != 0)
+ return (status);
+ ddi_soft_state_fini(&vds_state);
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/io/vldc.c b/usr/src/uts/sun4v/io/vldc.c
new file mode 100644
index 0000000000..6c366c5c59
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vldc.c
@@ -0,0 +1,1581 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/open.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/ksynch.h>
+#include <sys/modctl.h>
+#include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/cred.h>
+#include <sys/promif.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/cyclic.h>
+#include <sys/note.h>
+#include <sys/mach_descrip.h>
+#include <sys/mdeg.h>
+#include <sys/ldc.h>
+#include <sys/vldc_impl.h>
+
+/*
+ * Function prototypes.
+ */
+
+/* DDI entrypoints */
+static int vldc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int vldc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int vldc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
+static int vldc_close(dev_t dev, int flag, int otyp, cred_t *cred);
+static int vldc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *credp, int *rvalp);
+static int vldc_read(dev_t dev, struct uio *uiop, cred_t *credp);
+static int vldc_write(dev_t dev, struct uio *uiop, cred_t *credp);
+static int vldc_chpoll(dev_t dev, short events, int anyyet,
+ short *reventsp, struct pollhead **phpp);
+
+/* Internal functions */
+static uint_t i_vldc_cb(uint64_t event, caddr_t arg);
+static int i_vldc_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
+static int i_vldc_mdeg_register(vldc_t *vldcp);
+static int i_vldc_mdeg_unregister(vldc_t *vldcp);
+static int i_vldc_add_port(vldc_t *vldcp, md_t *mdp, mde_cookie_t node);
+static int i_vldc_remove_port(vldc_t *vldcp, uint_t portno);
+static int i_vldc_close_port(vldc_t *vldcp, uint_t portno);
+
+/* soft state structure */
+static void *vldc_ssp;
+
+/*
+ * Matching criteria passed to the MDEG to register interest
+ * in changes to 'virtual-device-port' nodes identified by their
+ * 'id' property.
+ */
+static md_prop_match_t vport_prop_match[] = {
+ { MDET_PROP_VAL, "id" },
+ { MDET_LIST_END, NULL }
+};
+
+static mdeg_node_match_t vport_match = { "virtual-device-port",
+ vport_prop_match };
+
+/*
+ * Specification of an MD node passed to the MDEG to filter any
+ * 'virtual-device-port' nodes that do not belong to the specified
+ * node. This template is copied for each vldc instance and filled
+ * in with the appropriate 'name' and 'cfg-handle' values before
+ * being passed to the MDEG.
+ */
+static mdeg_prop_spec_t vldc_prop_template[] = {
+ { MDET_PROP_STR, "name", NULL },
+ { MDET_PROP_VAL, "cfg-handle", NULL },
+ { MDET_LIST_END, NULL, NULL }
+};
+
+#define VLDC_MDEG_PROP_NAME(specp) ((specp)[0].ps_str)
+#define VLDC_SET_MDEG_PROP_NAME(specp, name) ((specp)[0].ps_str = (name))
+#define VLDC_SET_MDEG_PROP_INST(specp, inst) ((specp)[1].ps_val = (inst))
+
+
+static struct cb_ops vldc_cb_ops = {
+ vldc_open, /* open */
+ vldc_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ vldc_read, /* read */
+ vldc_write, /* write */
+ vldc_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ ddi_segmap, /* segmap */
+ vldc_chpoll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* stream */
+ D_NEW | D_MP /* flag */
+};
+
+static struct dev_ops vldc_ops = {
+ DEVO_REV, /* rev */
+ 0, /* ref count */
+ ddi_getinfo_1to1, /* getinfo */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ vldc_attach, /* attach */
+ vldc_detach, /* detach */
+ nodev, /* reset */
+ &vldc_cb_ops, /* cb_ops */
+ (struct bus_ops *)NULL /* bus_ops */
+};
+
+extern struct mod_ops mod_driverops;
+
+static struct modldrv md = {
+ &mod_driverops, /* Type - it is a driver */
+ "sun4v Virtual LDC Driver %I%", /* Name of the module */
+ &vldc_ops, /* driver specific ops */
+};
+
+static struct modlinkage ml = {
+ MODREV_1,
+ &md,
+ NULL
+};
+
+/* maximum MTU and cookie size tunables */
+uint32_t vldc_max_mtu = VLDC_MAX_MTU;
+uint64_t vldc_max_cookie = VLDC_MAX_COOKIE;
+
+
+#ifdef DEBUG
+
+/*
+ * Print debug messages
+ *
+ * set vldcdbg to 0x7 to enable all messages
+ *
+ * 0x4 - Warnings
+ * 0x2 - All debug messages (most verbose)
+ * 0x1 - Minimal debug messages
+ */
+
+int vldcdbg = 0x0;
+
+static void
+vldcdebug(const char *fmt, ...)
+{
+ char buf[512];
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) vsnprintf(buf, sizeof (buf), fmt, ap);
+ va_end(ap);
+
+ cmn_err(CE_CONT, "?%s", buf);
+}
+
+#define D1 if (vldcdbg & 0x01) vldcdebug
+#define D2 if (vldcdbg & 0x02) vldcdebug
+#define DWARN if (vldcdbg & 0x04) vldcdebug
+
+#else /* not DEBUG */
+
+#define D1 if (0) printf
+#define D2 if (0) printf
+#define DWARN if (0) printf
+
+#endif /* not DEBUG */
+
+
+/* _init(9E): initialize the loadable module */
+int
+_init(void)
+{
+ int error;
+
+ /* init the soft state structure */
+ error = ddi_soft_state_init(&vldc_ssp, sizeof (vldc_t), 1);
+ if (error != 0) {
+ return (error);
+ }
+
+ /* Link the driver into the system */
+ error = mod_install(&ml);
+
+ return (error);
+}
+
+/* _info(9E): return information about the loadable module */
+int
+_info(struct modinfo *modinfop)
+{
+ /* Report status of the dynamically loadable driver module */
+ return (mod_info(&ml, modinfop));
+}
+
+/* _fini(9E): prepare the module for unloading. */
+int
+_fini(void)
+{
+ int error;
+
+ /* Unlink the driver module from the system */
+ if ((error = mod_remove(&ml)) == 0) {
+ /*
+ * We have successfully "removed" the driver.
+ * destroy soft state
+ */
+ ddi_soft_state_fini(&vldc_ssp);
+ }
+
+ return (error);
+}
+
+/* ldc callback */
+static uint_t
+i_vldc_cb(uint64_t event, caddr_t arg)
+{
+ vldc_port_t *vport = (vldc_port_t *)arg;
+ short pollevents = 0;
+ int rv;
+
+ D1("i_vldc_cb: callback invoked port=%d, event=0x%lx\n",
+ vport->number, event);
+
+ if (event & LDC_EVT_UP) {
+ pollevents |= POLLOUT;
+ vport->hanged_up = B_FALSE;
+
+ } else if (event & LDC_EVT_DOWN) {
+ pollevents |= POLLHUP;
+ vport->hanged_up = B_TRUE;
+
+ } else if (event & LDC_EVT_RESET) {
+ /* do an ldc_up because we can't be sure the other side will */
+ if ((rv = ldc_up(vport->ldc_handle)) != 0)
+ if (rv != ECONNREFUSED)
+ DWARN("i_vldc_cb: port@%d failed to"
+ " bring up LDC channel=%ld, err=%d\n",
+ vport->number, vport->ldc_id, rv);
+ }
+
+ if (event & LDC_EVT_READ)
+ pollevents |= POLLIN;
+
+ if (pollevents != 0) {
+ D1("i_vldc_cb: port@%d pollwakeup=0x%x\n",
+ vport->number, pollevents);
+ pollwakeup(&vport->poll, pollevents);
+ }
+
+ return (LDC_SUCCESS);
+}
+
+/* mdeg callback */
+static int
+i_vldc_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
+{
+ vldc_t *vldcp;
+ int idx;
+ uint64_t portno;
+ int rv;
+ md_t *mdp;
+ mde_cookie_t node;
+
+ if (resp == NULL) {
+ D1("i_vldc_mdeg_cb: no result returned\n");
+ return (MDEG_FAILURE);
+ }
+
+ vldcp = (vldc_t *)cb_argp;
+
+ mutex_enter(&vldcp->lock);
+ if (vldcp->detaching == B_TRUE) {
+ D1("i_vldc_mdeg_cb: detach in progress\n");
+ mutex_exit(&vldcp->lock);
+ return (MDEG_FAILURE);
+ }
+
+ D1("i_vldc_mdeg_cb: added=%d, removed=%d, matched=%d\n",
+ resp->added.nelem, resp->removed.nelem, resp->match_prev.nelem);
+
+ /* process added ports */
+ for (idx = 0; idx < resp->added.nelem; idx++) {
+ mdp = resp->added.mdp;
+ node = resp->added.mdep[idx];
+
+ D1("i_vldc_mdeg_cb: processing added node 0x%lx\n", node);
+
+ /* attempt to add a port */
+ if ((rv = i_vldc_add_port(vldcp, mdp, node)) != MDEG_SUCCESS) {
+ cmn_err(CE_NOTE, "?i_vldc_mdeg_cb: unable to add port, "
+ "err = %d", rv);
+ }
+ }
+
+ /* process removed ports */
+ for (idx = 0; idx < resp->removed.nelem; idx++) {
+ mdp = resp->removed.mdp;
+ node = resp->removed.mdep[idx];
+
+ D1("i_vldc_mdeg_cb: processing removed node 0x%lx\n", node);
+
+ /* read in the port's id property */
+ if (md_get_prop_val(mdp, node, "id", &portno)) {
+ cmn_err(CE_NOTE, "?i_vldc_mdeg_cb: node 0x%lx of "
+ "removed list has no 'id' property", node);
+ continue;
+ }
+
+ /* attempt to remove a port */
+ if ((rv = i_vldc_remove_port(vldcp, portno)) != 0) {
+ cmn_err(CE_NOTE, "?i_vldc_mdeg_cb: unable to remove "
+ "port %lu, err %d", portno, rv);
+ }
+ }
+
+ /*
+ * Currently no support for updating already active ports. So, ignore
+ * the match_curr and match_prev arrays for now.
+ */
+
+ mutex_exit(&vldcp->lock);
+
+ return (MDEG_SUCCESS);
+}
+
+/* register callback to mdeg */
+static int
+i_vldc_mdeg_register(vldc_t *vldcp)
+{
+ mdeg_prop_spec_t *pspecp;
+ mdeg_node_spec_t *inst_specp;
+ mdeg_handle_t mdeg_hdl;
+ size_t templatesz;
+ int inst;
+ char *name;
+ size_t namesz;
+ char *nameprop;
+ int rv;
+
+ /* get the unique vldc instance assigned by the LDom manager */
+ inst = ddi_prop_get_int(DDI_DEV_T_ANY, vldcp->dip,
+ DDI_PROP_DONTPASS, "reg", -1);
+ if (inst == -1) {
+ cmn_err(CE_NOTE, "?vldc%d has no 'reg' property",
+ ddi_get_instance(vldcp->dip));
+ return (DDI_FAILURE);
+ }
+
+ /* get the name of the vldc instance */
+ rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, vldcp->dip,
+ DDI_PROP_DONTPASS, "name", &nameprop);
+ if (rv != DDI_PROP_SUCCESS) {
+ cmn_err(CE_NOTE, "?vldc%d has no 'name' property",
+ ddi_get_instance(vldcp->dip));
+ return (DDI_FAILURE);
+ }
+
+ D1("i_vldc_mdeg_register: name=%s, instance=%d\n", nameprop, inst);
+
+ /*
+ * Allocate and initialize a per-instance copy
+ * of the global property spec array that will
+ * uniquely identify this vldc instance.
+ */
+ templatesz = sizeof (vldc_prop_template);
+ pspecp = kmem_alloc(templatesz, KM_SLEEP);
+
+ bcopy(vldc_prop_template, pspecp, templatesz);
+
+ /* copy in the name property */
+ namesz = strlen(nameprop) + 1;
+ name = kmem_alloc(namesz, KM_SLEEP);
+
+ bcopy(nameprop, name, namesz);
+ VLDC_SET_MDEG_PROP_NAME(pspecp, name);
+
+ /* copy in the instance property */
+ VLDC_SET_MDEG_PROP_INST(pspecp, inst);
+
+ /* initialize the complete prop spec structure */
+ inst_specp = kmem_alloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
+ inst_specp->namep = "virtual-device";
+ inst_specp->specp = pspecp;
+
+ /* perform the registration */
+ rv = mdeg_register(inst_specp, &vport_match, i_vldc_mdeg_cb,
+ vldcp, &mdeg_hdl);
+
+ if (rv != MDEG_SUCCESS) {
+ cmn_err(CE_NOTE, "?i_vldc_mdeg_register: mdeg_register "
+ "failed, err = %d", rv);
+ kmem_free(name, namesz);
+ kmem_free(pspecp, templatesz);
+ kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
+ return (DDI_FAILURE);
+ }
+
+ /* save off data that will be needed later */
+ vldcp->inst_spec = inst_specp;
+ vldcp->mdeg_hdl = mdeg_hdl;
+
+ return (DDI_SUCCESS);
+}
+
+/* unregister callback from mdeg */
+static int
+i_vldc_mdeg_unregister(vldc_t *vldcp)
+{
+ char *name;
+ int rv;
+
+ D1("i_vldc_mdeg_unregister: hdl=0x%lx\n", vldcp->mdeg_hdl);
+
+ rv = mdeg_unregister(vldcp->mdeg_hdl);
+ if (rv != MDEG_SUCCESS) {
+ return (rv);
+ }
+
+ /*
+ * Clean up cached MDEG data
+ */
+ name = VLDC_MDEG_PROP_NAME(vldcp->inst_spec->specp);
+ if (name != NULL) {
+ kmem_free(name, strlen(name) + 1);
+ }
+ kmem_free(vldcp->inst_spec->specp, sizeof (vldc_prop_template));
+ vldcp->inst_spec->specp = NULL;
+
+ kmem_free(vldcp->inst_spec, sizeof (mdeg_node_spec_t));
+ vldcp->inst_spec = NULL;
+
+ return (MDEG_SUCCESS);
+}
+
+static int
+i_vldc_get_port_channel(md_t *mdp, mde_cookie_t node, uint64_t *ldc_id)
+{
+ int num_nodes, nchan;
+ size_t listsz;
+ mde_cookie_t *listp;
+
+ /*
+ * Find the channel-endpoint node(s) (which should be under this
+ * port node) which contain the channel id(s).
+ */
+ if ((num_nodes = md_node_count(mdp)) <= 0) {
+ cmn_err(CE_NOTE, "?i_vldc_get_port_channel: invalid number of "
+ "channel-endpoint nodes found (%d)", num_nodes);
+ return (-1);
+ }
+
+ /* allocate space for node list */
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_alloc(listsz, KM_SLEEP);
+
+ nchan = md_scan_dag(mdp, node, md_find_name(mdp, "channel-endpoint"),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (nchan <= 0) {
+ cmn_err(CE_NOTE, "?i_vldc_get_port_channel: no channel-endpoint"
+ " nodes found");
+ kmem_free(listp, listsz);
+ return (-1);
+ }
+
+ D2("i_vldc_get_port_channel: %d channel-endpoint nodes found", nchan);
+
+ /* use property from first node found */
+ if (md_get_prop_val(mdp, listp[0], "id", ldc_id)) {
+ cmn_err(CE_NOTE, "?i_vldc_get_port_channel: channel-endpoint "
+ "has no 'id' property");
+ kmem_free(listp, listsz);
+ return (-1);
+ }
+
+ kmem_free(listp, listsz);
+
+ return (0);
+}
+
+/* add a vldc port */
+static int
+i_vldc_add_port(vldc_t *vldcp, md_t *mdp, mde_cookie_t node)
+{
+ vldc_port_t *vport;
+ char *sname;
+ uint64_t portno;
+ int vldc_inst;
+ minor_t minor;
+ int minor_idx;
+ boolean_t new_minor;
+ int rv;
+
+ /* read in the port's id property */
+ if (md_get_prop_val(mdp, node, "id", &portno)) {
+ cmn_err(CE_NOTE, "?i_vldc_add_port: node 0x%lx of added "
+ "list has no 'id' property", node);
+ return (MDEG_FAILURE);
+ }
+
+ if (portno >= VLDC_MAX_PORTS) {
+ cmn_err(CE_NOTE, "?i_vldc_add_port: found port number (%lu) "
+ "larger than maximum supported number of ports", portno);
+ return (MDEG_FAILURE);
+ }
+
+ vport = &(vldcp->port[portno]);
+
+ if (vport->minorp != NULL) {
+ cmn_err(CE_NOTE, "?i_vldc_add_port: trying to add a port (%lu)"
+ " which is already bound", portno);
+ return (MDEG_FAILURE);
+ }
+
+ vport->number = portno;
+
+ /* get all channels for this device (currently only one) */
+ if (i_vldc_get_port_channel(mdp, node, &vport->ldc_id) == -1) {
+ return (MDEG_FAILURE);
+ }
+
+ /* set the default MTU */
+ vport->mtu = VLDC_DEFAULT_MTU;
+
+ /* get the service being exported by this port */
+ if (md_get_prop_str(mdp, node, "vldc-svc-name", &sname)) {
+ cmn_err(CE_NOTE, "?i_vldc_add_port: vdevice has no "
+ "'vldc-svc-name' property");
+ return (MDEG_FAILURE);
+ }
+
+ /* minor number look up */
+ for (minor_idx = 0; minor_idx < vldcp->minors_assigned;
+ minor_idx++) {
+ if (strcmp(vldcp->minor_tbl[minor_idx].sname, sname) == 0) {
+ /* found previously assigned minor number */
+ break;
+ }
+ }
+
+ new_minor = B_FALSE;
+ if (minor_idx == vldcp->minors_assigned) {
+ /* end of lookup - assign new minor number */
+ if (vldcp->minors_assigned == VLDC_MAX_MINORS) {
+ cmn_err(CE_NOTE, "?i_vldc_add_port: too many minor "
+ "nodes (%d)", minor_idx);
+ return (MDEG_FAILURE);
+ }
+
+ (void) strlcpy(vldcp->minor_tbl[minor_idx].sname,
+ sname, MAXPATHLEN);
+
+ vldcp->minors_assigned++;
+ new_minor = B_TRUE;
+ }
+
+ ASSERT(vldcp->minor_tbl[minor_idx].portno == VLDC_INVALID_PORTNO);
+
+ vport->minorp = &vldcp->minor_tbl[minor_idx];
+ vldcp->minor_tbl[minor_idx].portno = portno;
+ vldcp->minor_tbl[minor_idx].in_use = 0;
+
+ D1("i_vldc_add_port: port@%d mtu=%d, ldc=%ld, service=%s\n",
+ vport->number, vport->mtu, vport->ldc_id, sname);
+
+ /*
+ * Create a minor node. The minor number is
+ * (vldc_inst << VLDC_INST_SHIFT) | minor_idx
+ */
+ vldc_inst = ddi_get_instance(vldcp->dip);
+
+ minor = (vldc_inst << VLDC_INST_SHIFT) | (minor_idx);
+
+ rv = ddi_create_minor_node(vldcp->dip, sname, S_IFCHR,
+ minor, DDI_NT_SERIAL, 0);
+
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_NOTE, "?i_vldc_add_port: failed to create minor"
+ "node (%u), err = %d", minor, rv);
+ vldcp->minor_tbl[minor_idx].portno = VLDC_INVALID_PORTNO;
+ if (new_minor) {
+ vldcp->minors_assigned--;
+ }
+ return (MDEG_FAILURE);
+ }
+
+ /*
+ * The port is now bound to a minor node and is initially in the
+ * closed state.
+ */
+ vport->status = VLDC_PORT_CLOSED;
+
+ D1("i_vldc_add_port: port %lu initialized\n", portno);
+
+ return (MDEG_SUCCESS);
+}
+
+/* remove a vldc port */
+static int
+i_vldc_remove_port(vldc_t *vldcp, uint_t portno)
+{
+ vldc_port_t *vport;
+ vldc_minor_t *vminor;
+
+ vport = &(vldcp->port[portno]);
+ vminor = vport->minorp;
+ if (vminor == NULL) {
+ cmn_err(CE_NOTE, "?i_vldc_remove_port: trying to remove a "
+ "port (%u) which is not bound", portno);
+ return (MDEG_FAILURE);
+ }
+
+ /*
+ * Make sure that all new attempts to open or use the minor node
+ * associated with the port will fail.
+ */
+ mutex_enter(&vminor->lock);
+ vminor->portno = VLDC_INVALID_PORTNO;
+ mutex_exit(&vminor->lock);
+
+ /* send hangup to anyone polling */
+ pollwakeup(&vport->poll, POLLHUP);
+
+ /* Now wait for all current users of the minor node to finish. */
+ mutex_enter(&vminor->lock);
+ while (vminor->in_use > 0) {
+ cv_wait(&vminor->cv, &vminor->lock);
+ }
+
+ if ((vport->status == VLDC_PORT_READY) ||
+ (vport->status == VLDC_PORT_OPEN)) {
+ /* close the port before it is torn down */
+ (void) i_vldc_close_port(vldcp, portno);
+ }
+
+ /* remove minor node */
+ ddi_remove_minor_node(vldcp->dip, vport->minorp->sname);
+ vport->minorp = NULL;
+
+ mutex_exit(&vminor->lock);
+
+ D1("i_vldc_remove_port: removed vldc port %u\n", portno);
+
+ return (MDEG_SUCCESS);
+}
+
+/* close a ldc channel */
+static int
+i_vldc_ldc_close(vldc_port_t *vport)
+{
+ int rv = 0;
+ int err;
+
+ err = ldc_close(vport->ldc_handle);
+ if (err != 0)
+ rv = err;
+ err = ldc_unreg_callback(vport->ldc_handle);
+ if ((err != 0) && (rv != 0))
+ rv = err;
+ err = ldc_fini(vport->ldc_handle);
+ if ((err != 0) && (rv != 0))
+ rv = err;
+
+ return (rv);
+}
+
+/* close a vldc port */
+static int
+i_vldc_close_port(vldc_t *vldcp, uint_t portno)
+{
+ vldc_port_t *vport;
+ int rv;
+
+ vport = &(vldcp->port[portno]);
+
+ ASSERT(MUTEX_HELD(&vport->minorp->lock));
+
+ if (vport->status == VLDC_PORT_CLOSED) {
+ /* nothing to do */
+ DWARN("i_vldc_close_port: port %d in an unexpected "
+ "state (%d)\n", portno, vport->status);
+ return (DDI_SUCCESS);
+ }
+
+ rv = DDI_SUCCESS;
+ if (vport->status == VLDC_PORT_READY) {
+ rv = i_vldc_ldc_close(vport);
+ } else {
+ ASSERT(vport->status == VLDC_PORT_OPEN);
+ }
+
+ /* free memory */
+ kmem_free(vport->send_buf, vport->mtu);
+ kmem_free(vport->recv_buf, vport->mtu);
+
+ vport->status = VLDC_PORT_CLOSED;
+
+ return (rv);
+}
+
+/*
+ * attach(9E): attach a device to the system.
+ * called once for each instance of the device on the system.
+ */
+static int
+vldc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i, instance;
+ vldc_t *vldcp;
+
+ switch (cmd) {
+
+ case DDI_ATTACH:
+
+ instance = ddi_get_instance(dip);
+
+ if (ddi_soft_state_zalloc(vldc_ssp, instance) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ ddi_soft_state_free(vldc_ssp, instance);
+ return (ENXIO);
+ }
+
+ D1("vldc_attach: DDI_ATTACH instance=%d\n", instance);
+
+ mutex_init(&vldcp->lock, NULL, MUTEX_DRIVER, NULL);
+ vldcp->dip = dip;
+ vldcp->detaching = B_FALSE;
+
+ for (i = 0; i < VLDC_MAX_PORTS; i++) {
+ /* No minor node association to start with */
+ vldcp->port[i].minorp = NULL;
+ }
+
+ for (i = 0; i < VLDC_MAX_MINORS; i++) {
+ mutex_init(&(vldcp->minor_tbl[i].lock), NULL,
+ MUTEX_DRIVER, NULL);
+ cv_init(&(vldcp->minor_tbl[i].cv), NULL,
+ CV_DRIVER, NULL);
+ /* No port association to start with */
+ vldcp->minor_tbl[i].portno = VLDC_INVALID_PORTNO;
+ }
+
+ /* Register for MD update notification */
+ if (i_vldc_mdeg_register(vldcp) != DDI_SUCCESS) {
+ ddi_soft_state_free(vldc_ssp, instance);
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+
+ case DDI_RESUME:
+
+ return (DDI_SUCCESS);
+
+ default:
+
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * detach(9E): detach a device from the system.
+ */
+static int
+vldc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int i, instance;
+ vldc_t *vldcp;
+
+ switch (cmd) {
+
+ case DDI_DETACH:
+
+ instance = ddi_get_instance(dip);
+
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ D1("vldc_detach: DDI_DETACH instance=%d\n", instance);
+
+ mutex_enter(&vldcp->lock);
+
+ /* Fail the detach if all ports have not been removed. */
+ for (i = 0; i < VLDC_MAX_MINORS; i++) {
+ if (vldcp->minor_tbl[i].portno != VLDC_INVALID_PORTNO) {
+ D1("vldc_detach: vldc@%d:%d is bound, "
+ "detach failed\n",
+ instance, vldcp->minor_tbl[i].portno);
+ mutex_exit(&vldcp->lock);
+ return (DDI_FAILURE);
+ }
+ }
+
+ /*
+ * Prevent MDEG from adding new ports before the callback can
+ * be unregistered. The lock can't be held accross the
+ * unregistration call because a callback may be in progress
+ * and blocked on the lock.
+ */
+ vldcp->detaching = B_TRUE;
+
+ mutex_exit(&vldcp->lock);
+
+ if (i_vldc_mdeg_unregister(vldcp) != MDEG_SUCCESS) {
+ vldcp->detaching = B_FALSE;
+ return (DDI_FAILURE);
+ }
+
+ /* Tear down all bound ports and free resources. */
+ for (i = 0; i < VLDC_MAX_MINORS; i++) {
+ if (vldcp->minor_tbl[i].portno != VLDC_INVALID_PORTNO) {
+ (void) i_vldc_remove_port(vldcp, i);
+ }
+ mutex_destroy(&(vldcp->minor_tbl[i].lock));
+ cv_destroy(&(vldcp->minor_tbl[i].cv));
+ }
+
+ mutex_destroy(&vldcp->lock);
+ ddi_soft_state_free(vldc_ssp, instance);
+
+ return (DDI_SUCCESS);
+
+ case DDI_SUSPEND:
+
+ return (DDI_SUCCESS);
+
+ default:
+
+ return (DDI_FAILURE);
+ }
+}
+
+/* cb_open */
+static int
+vldc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(flag, otyp, cred))
+
+ int instance;
+ minor_t minor;
+ uint64_t portno;
+ vldc_t *vldcp;
+ vldc_port_t *vport;
+ vldc_minor_t *vminor;
+
+ minor = getminor(*devp);
+ instance = VLDCINST(minor);
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL)
+ return (ENXIO);
+
+ vminor = VLDCMINOR(vldcp, minor);
+ mutex_enter(&vminor->lock);
+ portno = vminor->portno;
+ if (portno == VLDC_INVALID_PORTNO) {
+ mutex_exit(&vminor->lock);
+ return (ENXIO);
+ }
+
+ vport = &(vldcp->port[portno]);
+
+ D1("vldc_open: opening vldc@%d:%lu\n", instance, portno);
+
+ if (vport->status != VLDC_PORT_CLOSED) {
+ mutex_exit(&vminor->lock);
+ return (EBUSY);
+ }
+
+ vport->recv_buf = kmem_alloc(vport->mtu, KM_SLEEP);
+ vport->send_buf = kmem_alloc(vport->mtu, KM_SLEEP);
+
+ vport->is_stream = B_FALSE; /* assume not a stream */
+ vport->hanged_up = B_FALSE;
+
+ vport->status = VLDC_PORT_OPEN;
+
+ mutex_exit(&vminor->lock);
+
+ return (DDI_SUCCESS);
+}
+
+/* cb_close */
+static int
+vldc_close(dev_t dev, int flag, int otyp, cred_t *cred)
+{
+ _NOTE(ARGUNUSED(flag, otyp, cred))
+
+ int instance;
+ minor_t minor;
+ uint64_t portno;
+ vldc_t *vldcp;
+ vldc_minor_t *vminor;
+ int rv;
+
+ minor = getminor(dev);
+ instance = VLDCINST(minor);
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ return (ENXIO);
+ }
+
+ vminor = VLDCMINOR(vldcp, minor);
+ mutex_enter(&vminor->lock);
+ portno = vminor->portno;
+ if (portno == VLDC_INVALID_PORTNO) {
+ mutex_exit(&vminor->lock);
+ return (ENOLINK);
+ }
+
+ D1("vldc_close: closing vldc@%d:%lu\n", instance, portno);
+
+ rv = i_vldc_close_port(vldcp, portno);
+
+ mutex_exit(&vminor->lock);
+
+ return (rv);
+}
+
+static int
+vldc_set_ldc_mode(vldc_port_t *vport, vldc_t *vldcp, int channel_mode)
+{
+ ldc_attr_t attr;
+ int rv;
+
+ ASSERT(MUTEX_HELD(&vport->minorp->lock));
+
+ /* validate mode */
+ switch (channel_mode) {
+ case LDC_MODE_STREAM:
+ vport->is_stream = B_TRUE;
+ break;
+ case LDC_MODE_RAW:
+ case LDC_MODE_UNRELIABLE:
+ case LDC_MODE_RELIABLE:
+ vport->is_stream = B_FALSE;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (vport->status == VLDC_PORT_READY) {
+ rv = i_vldc_ldc_close(vport);
+ vport->status = VLDC_PORT_OPEN;
+ if (rv != 0) {
+ DWARN("vldc_set_ldc_mode: i_vldc_ldc_close "
+ "failed, rv=%d\n", rv);
+ return (rv);
+ }
+ }
+
+ D1("vldc_set_ldc_mode: vport status %d, mode %d\n",
+ vport->status, channel_mode);
+
+ vport->ldc_mode = channel_mode;
+
+ /* initialize the channel */
+ attr.devclass = LDC_DEV_SERIAL;
+ attr.instance = ddi_get_instance(vldcp->dip);
+ attr.qlen = VLDC_QUEUE_LEN;
+ attr.mode = vport->ldc_mode;
+
+ if ((rv = ldc_init(vport->ldc_id, &attr,
+ &vport->ldc_handle)) != 0) {
+ DWARN("vldc_ioctl_opt_op: ldc_init failed, rv=%d\n", rv);
+ goto error_init;
+ }
+
+ /* register it */
+ if ((rv = ldc_reg_callback(vport->ldc_handle,
+ i_vldc_cb, (caddr_t)vport)) != 0) {
+ DWARN("vldc_ioctl_opt_op: ldc_reg_callback failed, rv=%d\n",
+ rv);
+ goto error_reg;
+ }
+
+ /* open the channel */
+ if ((rv = ldc_open(vport->ldc_handle)) != 0) {
+ DWARN("vldc_ioctl_opt_op: ldc_open failed, rv=%d\n", rv);
+ goto error_open;
+ }
+
+ vport->status = VLDC_PORT_READY;
+
+ /*
+ * Attempt to bring the channel up, but do not
+ * fail if the other end is not up yet.
+ */
+ rv = ldc_up(vport->ldc_handle);
+
+ if (rv == ECONNREFUSED) {
+ D1("vldc_ioctl_opt_op: remote endpoint not up yet\n");
+ } else if (rv != 0) {
+ DWARN("vldc_ioctl_opt_op: ldc_up failed, rv=%d\n", rv);
+ goto error_up;
+ }
+
+ D1("vldc_ioctl_opt_op: ldc %ld initialized successfully\n",
+ vport->ldc_id);
+
+ return (0);
+
+error_up:
+ vport->status = VLDC_PORT_OPEN;
+ (void) ldc_close(vport->ldc_handle);
+error_open:
+ (void) ldc_unreg_callback(vport->ldc_handle);
+error_reg:
+ (void) ldc_fini(vport->ldc_handle);
+error_init:
+ return (rv);
+}
+
+/* ioctl to read cookie */
+static int
+i_vldc_ioctl_read_cookie(vldc_port_t *vport, int vldc_instance, void *arg,
+ int mode)
+{
+ vldc_data_t copy_info;
+ caddr_t buf;
+ uint64_t len;
+ int rv;
+
+ if (ddi_copyin(arg, &copy_info, sizeof (copy_info), mode) == -1) {
+ return (EFAULT);
+ }
+
+ len = copy_info.length;
+ if (len > vldc_max_cookie) {
+ return (EINVAL);
+ }
+
+ /* allocate a temporary buffer */
+ buf = kmem_alloc(len, KM_SLEEP);
+
+ mutex_enter(&vport->minorp->lock);
+
+ D2("i_vldc_ioctl_read_cookie: vldc@%d:%d reading from 0x%lx "
+ "size 0x%lx to 0x%lx\n", vldc_instance, vport->number,
+ copy_info.dst_addr, copy_info.length, copy_info.src_addr);
+
+ /* read from the HV into the temporary buffer */
+ rv = ldc_mem_rdwr_pa(vport->ldc_handle, buf, &len,
+ (caddr_t)copy_info.dst_addr, LDC_COPY_IN);
+ if (rv != 0) {
+ DWARN("i_vldc_ioctl_read_cookie: vldc@%d:%d cannot read "
+ "address 0x%lx, rv=%d\n", vldc_instance, vport->number,
+ copy_info.dst_addr, rv);
+ mutex_exit(&vport->minorp->lock);
+ kmem_free(buf, copy_info.length);
+ return (EFAULT);
+ }
+
+ D2("i_vldc_ioctl_read_cookie: vldc@%d:%d read succeeded\n",
+ vldc_instance, vport->number);
+
+ mutex_exit(&vport->minorp->lock);
+
+ /* copy data from temporary buffer out to the caller and free buffer */
+ rv = ddi_copyout(buf, (caddr_t)copy_info.src_addr, len, mode);
+ kmem_free(buf, copy_info.length);
+ if (rv != 0) {
+ return (EFAULT);
+ }
+
+ /* set the structure to reflect outcome */
+ copy_info.length = len;
+ if (ddi_copyout(&copy_info, arg, sizeof (copy_info), mode) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+/* ioctl to write cookie */
+static int
+i_vldc_ioctl_write_cookie(vldc_port_t *vport, int vldc_instance, void *arg,
+ int mode)
+{
+ vldc_data_t copy_info;
+ caddr_t buf;
+ uint64_t len;
+ int rv;
+
+ if (ddi_copyin((caddr_t)arg, &copy_info,
+ sizeof (copy_info), mode) != 0) {
+ return (EFAULT);
+ }
+
+ len = copy_info.length;
+ if (len > vldc_max_cookie) {
+ return (EINVAL);
+ }
+
+ D2("i_vldc_ioctl_write_cookie: vldc@%d:%d writing 0x%lx size 0x%lx "
+ "to 0x%lx\n", vldc_instance, vport->number, copy_info.src_addr,
+ copy_info.length, copy_info.dst_addr);
+
+ /* allocate a temporary buffer */
+ buf = kmem_alloc(len, KM_SLEEP);
+
+ /* copy into the temporary buffer the data to be written to the HV */
+ if (ddi_copyin((caddr_t)copy_info.src_addr, buf,
+ copy_info.length, mode) != 0) {
+ kmem_free(buf, copy_info.length);
+ return (EFAULT);
+ }
+
+ mutex_enter(&vport->minorp->lock);
+
+ /* write the data from the temporary buffer to the HV */
+ rv = ldc_mem_rdwr_pa(vport->ldc_handle, buf, &len,
+ (caddr_t)copy_info.dst_addr, LDC_COPY_OUT);
+ if (rv != 0) {
+ DWARN("i_vldc_ioctl_write_cookie: vldc@%d:%d failed to write at"
+ " address 0x%lx\n, rv=%d", vldc_instance, vport->number,
+ copy_info.dst_addr, rv);
+ mutex_exit(&vport->minorp->lock);
+ kmem_free(buf, copy_info.length);
+ return (EFAULT);
+ }
+
+ D2("i_vldc_ioctl_write_cookie: vldc@%d:%d write succeeded\n",
+ vldc_instance, vport->number);
+
+ mutex_exit(&vport->minorp->lock);
+
+ kmem_free(buf, copy_info.length);
+
+ /* set the structure to reflect outcome */
+ copy_info.length = len;
+ if (ddi_copyout(&copy_info, (caddr_t)arg,
+ sizeof (copy_info), mode) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+/* vldc specific ioctl option commands */
+static int
+i_vldc_ioctl_opt_op(vldc_port_t *vport, vldc_t *vldcp, void *arg, int mode)
+{
+ vldc_opt_op_t vldc_cmd;
+ uint32_t new_mtu;
+ int rv = 0;
+
+ if (ddi_copyin(arg, &vldc_cmd, sizeof (vldc_cmd), mode) != 0) {
+ return (EFAULT);
+ }
+
+ D1("vldc_ioctl_opt_op: op %d\n", vldc_cmd.opt_sel);
+
+ switch (vldc_cmd.opt_sel) {
+
+ case VLDC_OPT_MTU_SZ:
+
+ if (vldc_cmd.op_sel == VLDC_OP_GET) {
+ vldc_cmd.opt_val = vport->mtu;
+ if (ddi_copyout(&vldc_cmd, arg,
+ sizeof (vldc_cmd), mode) == -1) {
+ return (EFAULT);
+ }
+ } else {
+ new_mtu = vldc_cmd.opt_val;
+
+ if ((new_mtu < LDC_PACKET_SIZE) ||
+ (new_mtu > vldc_max_mtu)) {
+ return (EINVAL);
+ }
+
+ mutex_enter(&vport->minorp->lock);
+
+ if ((vport->status != VLDC_PORT_CLOSED) &&
+ (new_mtu != vport->mtu)) {
+ /*
+ * The port has buffers allocated since it is
+ * not closed plus the MTU size has changed.
+ * Reallocate the buffers to the new MTU size.
+ */
+ kmem_free(vport->recv_buf, vport->mtu);
+ vport->recv_buf = kmem_alloc(new_mtu, KM_SLEEP);
+
+ kmem_free(vport->send_buf, vport->mtu);
+ vport->send_buf = kmem_alloc(new_mtu, KM_SLEEP);
+
+ vport->mtu = new_mtu;
+ }
+
+ mutex_exit(&vport->minorp->lock);
+ }
+
+ break;
+
+ case VLDC_OPT_STATUS:
+
+ if (vldc_cmd.op_sel == VLDC_OP_GET) {
+ vldc_cmd.opt_val = vport->status;
+ if (ddi_copyout(&vldc_cmd, arg,
+ sizeof (vldc_cmd), mode) == -1) {
+ return (EFAULT);
+ }
+ } else {
+ return (ENOTSUP);
+ }
+
+ break;
+
+ case VLDC_OPT_MODE:
+
+ if (vldc_cmd.op_sel == VLDC_OP_GET) {
+ vldc_cmd.opt_val = vport->ldc_mode;
+ if (ddi_copyout(&vldc_cmd, arg,
+ sizeof (vldc_cmd), mode) == -1) {
+ return (EFAULT);
+ }
+ } else {
+ mutex_enter(&vport->minorp->lock);
+ rv = vldc_set_ldc_mode(vport, vldcp, vldc_cmd.opt_val);
+ mutex_exit(&vport->minorp->lock);
+ }
+
+ break;
+
+ default:
+
+ D1("vldc_ioctl_opt_op: unsupported op %d\n", vldc_cmd.opt_sel);
+ return (ENOTSUP);
+ }
+
+ return (rv);
+}
+
+/* cb_ioctl */
+static int
+vldc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ _NOTE(ARGUNUSED(credp, rvalp))
+
+ int rv = EINVAL;
+ int instance;
+ minor_t minor;
+ uint64_t portno;
+ vldc_t *vldcp;
+ vldc_port_t *vport;
+ vldc_minor_t *vminor;
+
+ minor = getminor(dev);
+ instance = VLDCINST(minor);
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ return (ENXIO);
+ }
+
+ vminor = VLDCMINOR(vldcp, minor);
+ mutex_enter(&vminor->lock);
+ portno = vminor->portno;
+ if (portno == VLDC_INVALID_PORTNO) {
+ mutex_exit(&vminor->lock);
+ return (ENOLINK);
+ }
+ vminor->in_use += 1;
+ mutex_exit(&vminor->lock);
+
+ vport = &(vldcp->port[portno]);
+
+ D1("vldc_ioctl: vldc@%d:%lu cmd=0x%x\n", instance, portno, cmd);
+
+ switch (cmd) {
+
+ case VLDC_IOCTL_OPT_OP:
+
+ rv = i_vldc_ioctl_opt_op(vport, vldcp, (void *)arg, mode);
+ break;
+
+ case VLDC_IOCTL_READ_COOKIE:
+
+ rv = i_vldc_ioctl_read_cookie(vport, instance,
+ (void *)arg, mode);
+ break;
+
+ case VLDC_IOCTL_WRITE_COOKIE:
+
+ rv = i_vldc_ioctl_write_cookie(vport, instance,
+ (void *)arg, mode);
+ break;
+
+ default:
+
+ DWARN("vldc_ioctl: vldc@%d:%lu unknown cmd=0x%x\n",
+ instance, portno, cmd);
+ rv = EINVAL;
+ break;
+ }
+
+ mutex_enter(&vminor->lock);
+ vminor->in_use -= 1;
+ if (vminor->in_use == 0) {
+ cv_signal(&vminor->cv);
+ }
+ mutex_exit(&vminor->lock);
+
+ D1("vldc_ioctl: rv=%d\n", rv);
+
+ return (rv);
+}
+
+/* cb_read */
+static int
+vldc_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ _NOTE(ARGUNUSED(credp))
+
+ int instance;
+ minor_t minor;
+ size_t size = 0;
+ uint64_t portno;
+ vldc_t *vldcp;
+ vldc_port_t *vport;
+ vldc_minor_t *vminor;
+ int rv = 0;
+
+ minor = getminor(dev);
+ instance = VLDCINST(minor);
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ return (ENXIO);
+ }
+
+ vminor = VLDCMINOR(vldcp, minor);
+ mutex_enter(&vminor->lock);
+ portno = vminor->portno;
+ if (portno == VLDC_INVALID_PORTNO) {
+ mutex_exit(&vminor->lock);
+ return (ENOLINK);
+ }
+
+ D2("vldc_read: vldc@%d:%lu reading data\n", instance, portno);
+
+ vport = &(vldcp->port[portno]);
+
+ /* check the port status */
+ if (vport->status != VLDC_PORT_READY) {
+ DWARN("vldc_read: vldc@%d:%lu not in the ready state\n",
+ instance, portno);
+ mutex_exit(&vminor->lock);
+ return (ENOTACTIVE);
+ }
+
+ /* read data */
+ size = MIN(vport->mtu, uiop->uio_resid);
+ rv = ldc_read(vport->ldc_handle, vport->recv_buf, &size);
+
+ D2("vldc_read: vldc@%d:%lu ldc_read size=%ld, rv=%d\n",
+ instance, portno, size, rv);
+
+ if (rv == 0) {
+ if (size != 0) {
+ rv = uiomove(vport->recv_buf, size, UIO_READ, uiop);
+ } else {
+ rv = EWOULDBLOCK;
+ }
+ } else {
+ switch (rv) {
+ case ENOBUFS:
+ break;
+ case ETIMEDOUT:
+ case EWOULDBLOCK:
+ rv = EWOULDBLOCK;
+ break;
+ default:
+ rv = ECONNRESET;
+ break;
+ }
+ }
+
+ mutex_exit(&vminor->lock);
+
+ return (rv);
+}
+
+/* cb_write */
+static int
+vldc_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ _NOTE(ARGUNUSED(credp))
+
+ int instance;
+ minor_t minor;
+ size_t size;
+ size_t orig_size;
+ uint64_t portno;
+ vldc_t *vldcp;
+ vldc_port_t *vport;
+ vldc_minor_t *vminor;
+ int rv = EINVAL;
+
+ minor = getminor(dev);
+ instance = VLDCINST(minor);
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ return (ENXIO);
+ }
+
+ vminor = VLDCMINOR(vldcp, minor);
+ mutex_enter(&vminor->lock);
+ portno = vminor->portno;
+ if (portno == VLDC_INVALID_PORTNO) {
+ mutex_exit(&vminor->lock);
+ return (ENOLINK);
+ }
+
+ vport = &(vldcp->port[portno]);
+
+ /* check the port status */
+ if (vport->status != VLDC_PORT_READY) {
+ DWARN("vldc_write: vldc@%d:%lu not in the ready state\n",
+ instance, portno);
+ mutex_exit(&vminor->lock);
+ return (ENOTACTIVE);
+ }
+
+ orig_size = uiop->uio_resid;
+ size = orig_size;
+
+ if (size > vport->mtu) {
+ if (vport->is_stream) {
+ /* can only send MTU size at a time */
+ size = vport->mtu;
+ } else {
+ mutex_exit(&vminor->lock);
+ return (EMSGSIZE);
+ }
+ }
+
+ D2("vldc_write: vldc@%d:%lu writing %lu bytes\n", instance, portno,
+ size);
+
+ rv = uiomove(vport->send_buf, size, UIO_WRITE, uiop);
+ if (rv == 0) {
+ rv = ldc_write(vport->ldc_handle, (caddr_t)vport->send_buf,
+ &size);
+ if (rv != 0) {
+ DWARN("vldc_write: vldc@%d:%lu failed writing %lu "
+ "bytes rv=%d\n", instance, portno, size, rv);
+ }
+ } else {
+ size = 0;
+ }
+
+ mutex_exit(&vminor->lock);
+
+ /* resid is total number of bytes *not* sent */
+ uiop->uio_resid = orig_size - size;
+
+ return (rv);
+}
+
+/* cb_chpoll */
+static int
+vldc_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int instance;
+ minor_t minor;
+ uint64_t portno;
+ vldc_t *vldcp;
+ vldc_port_t *vport;
+ vldc_minor_t *vminor;
+ ldc_status_t ldc_state;
+ boolean_t isempty;
+ int rv;
+
+ minor = getminor(dev);
+ instance = VLDCINST(minor);
+ vldcp = ddi_get_soft_state(vldc_ssp, instance);
+ if (vldcp == NULL) {
+ return (ENXIO);
+ }
+
+ vminor = VLDCMINOR(vldcp, minor);
+ mutex_enter(&vminor->lock);
+ portno = vminor->portno;
+ if (portno == VLDC_INVALID_PORTNO) {
+ mutex_exit(&vminor->lock);
+ return (ENOLINK);
+ }
+
+ vport = &(vldcp->port[portno]);
+
+ /* check the port status */
+ if (vport->status != VLDC_PORT_READY) {
+ mutex_exit(&vminor->lock);
+ return (ENOTACTIVE);
+ }
+
+ D2("vldc_chpoll: vldc@%d:%lu polling events 0x%x\n",
+ instance, portno, events);
+
+ rv = ldc_status(vport->ldc_handle, &ldc_state);
+ if (rv != 0) {
+ DWARN("vldc_chpoll: vldc@%d:%lu could not get ldc status, "
+ "rv=%d\n", instance, portno, rv);
+ mutex_exit(&vminor->lock);
+ return (EBADFD);
+ }
+
+ *reventsp = 0;
+
+ if (ldc_state == LDC_UP) {
+ /*
+ * Check if the receive queue is empty and if not, signal that
+ * there is data ready to read.
+ */
+ if (events & POLLIN) {
+ if ((ldc_chkq(vport->ldc_handle, &isempty) == 0) &&
+ (isempty == B_FALSE)) {
+ *reventsp |= POLLIN;
+ }
+ }
+
+ if (events & POLLOUT)
+ *reventsp |= POLLOUT;
+
+ } else if (vport->hanged_up) {
+ *reventsp |= POLLHUP;
+ vport->hanged_up = B_FALSE;
+ }
+
+ mutex_exit(&vminor->lock);
+
+ if (((*reventsp) == 0) && (!anyyet)) {
+ *phpp = &vport->poll;
+ }
+
+ D2("vldc_chpoll: vldc@%d:%lu ev=0x%x, rev=0x%x\n",
+ instance, portno, events, *reventsp);
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/io/vnet.c b/usr/src/uts/sun4v/io/vnet.c
new file mode 100644
index 0000000000..ad625953e7
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vnet.c
@@ -0,0 +1,1049 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/stream.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/ksynch.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/debug.h>
+#include <sys/ethernet.h>
+#include <sys/dlpi.h>
+#include <net/if.h>
+#include <sys/mac.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/strsun.h>
+#include <sys/note.h>
+#include <sys/vnet.h>
+
+/*
+ * Function prototypes.
+ */
+
+/* DDI entrypoints */
+static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
+static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
+
+/* MAC entrypoints */
+static uint64_t vnet_m_stat(void *arg, enum mac_stat stat);
+static int vnet_m_start(void *);
+static void vnet_m_stop(void *);
+static int vnet_m_promisc(void *, boolean_t);
+static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
+static int vnet_m_unicst(void *, const uint8_t *);
+static void vnet_m_resources(void *);
+static void vnet_m_ioctl(void *, queue_t *, mblk_t *);
+mblk_t *vnet_m_tx(void *, mblk_t *);
+
+/* vnet internal functions */
+static int vnet_mac_register(vnet_t *);
+static int vnet_read_mac_address(vnet_t *vnetp);
+static void vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
+static void vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp);
+static vp_tl_t *vnet_get_vptl(vnet_t *vnetp, const char *devname);
+static fdb_t *vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr);
+
+/* exported functions */
+void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
+void vnet_del_fdb(void *arg, uint8_t *macaddr);
+void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
+void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
+void vnet_del_def_rte(void *arg);
+
+/* externs */
+extern int vgen_init(void *vnetp, dev_info_t *vnetdip, void *vnetmacp,
+ const uint8_t *macaddr, mac_t **vgenmacp);
+extern void vgen_uninit(void *arg);
+
+/*
+ * Linked list of "vnet_t" structures - one per instance.
+ */
+static vnet_t *vnet_headp = NULL;
+static krwlock_t vnet_rw;
+
+/* Tunables */
+uint32_t vnet_ntxds = VNET_NTXDS; /* power of 2 transmit descriptors */
+uint32_t vnet_reclaim_lowat = VNET_RECLAIM_LOWAT; /* tx recl low watermark */
+uint32_t vnet_reclaim_hiwat = VNET_RECLAIM_HIWAT; /* tx recl high watermark */
+uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
+uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT; /* tx timeout in msec */
+uint32_t vnet_ldc_qlen = VNET_LDC_QLEN; /* ldc qlen */
+uint32_t vnet_nfdb_hash = VNET_NFDB_HASH; /* size of fdb hash table */
+
+/*
+ * Property names
+ */
+static char macaddr_propname[] = "local-mac-address";
+
+static struct ether_addr etherbroadcastaddr = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+/*
+ * MIB II broadcast/multicast packets
+ */
+#define IS_BROADCAST(ehp) \
+ (ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
+#define IS_MULTICAST(ehp) \
+ ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
+
+/*
+ * This is the string displayed by modinfo(1m).
+ */
+static char vnet_ident[] = "vnet driver v1.0";
+extern struct mod_ops mod_driverops;
+static struct cb_ops cb_vnetops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* cb_stream */
+ (int)(D_MP) /* cb_flag */
+};
+
+static struct dev_ops vnetops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ NULL, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ vnetattach, /* devo_attach */
+ vnetdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_vnetops, /* devo_cb_ops */
+ (struct bus_ops *)NULL /* devo_bus_ops */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ vnet_ident, /* ID string */
+ &vnetops /* driver specific ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modldrv, NULL
+};
+
+
+/*
+ * Print debug messages - set to 0xf to enable all msgs
+ */
+int _vnet_dbglevel = 0x8;
+
+void
+_vnetdebug_printf(void *arg, const char *fmt, ...)
+{
+ char buf[512];
+ va_list ap;
+ vnet_t *vnetp = (vnet_t *)arg;
+
+ va_start(ap, fmt);
+ (void) vsprintf(buf, fmt, ap);
+ va_end(ap);
+
+ if (vnetp == NULL)
+ cmn_err(CE_CONT, "%s\n", buf);
+ else
+ cmn_err(CE_CONT, "vnet%d: %s\n", vnetp->instance, buf);
+}
+
+#ifdef DEBUG
+
+/*
+ * XXX: any changes to the definitions below need corresponding changes in
+ * vnet_gen.c
+ */
+
+/*
+ * debug levels:
+ * DBG_LEVEL1: Function entry/exit tracing
+ * DBG_LEVEL2: Info messages
+ * DBG_LEVEL3: Warning messages
+ * DBG_LEVEL4: Error messages
+ */
+
+enum { DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
+ DBG_LEVEL4 = 0x08 };
+
+#define DBG1(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL1) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#define DBG2(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL2) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#define DWARN(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL3) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#define DERR(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL4) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#else
+
+#define DBG1(_s) if (0) _vnetdebug_printf _s
+#define DBG2(_s) if (0) _vnetdebug_printf _s
+#define DWARN(_s) if (0) _vnetdebug_printf _s
+#define DERR(_s) if (0) _vnetdebug_printf _s
+
+#endif
+
+/* _init(9E): initialize the loadable module */
+int
+_init(void)
+{
+ int status;
+
+ DBG1((NULL, "_init: enter\n"));
+
+ mac_init_ops(&vnetops, "vnet");
+ status = mod_install(&modlinkage);
+ if (status != 0) {
+ mac_fini_ops(&vnetops);
+ }
+
+ DBG1((NULL, "_init: exit\n"));
+ return (status);
+}
+
+/* _fini(9E): prepare the module for unloading. */
+int
+_fini(void)
+{
+ int status;
+
+ DBG1((NULL, "_fini: enter\n"));
+
+ status = mod_remove(&modlinkage);
+ if (status != 0)
+ return (status);
+ mac_fini_ops(&vnetops);
+
+ DBG1((NULL, "_fini: exit\n"));
+ return (status);
+}
+
+/* _info(9E): return information about the loadable module */
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * attach(9E): attach a device to the system.
+ * called once for each instance of the device on the system.
+ */
+static int
+vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ mac_t *macp;
+ vnet_t *vnetp;
+ vp_tl_t *vp_tlp;
+ int instance;
+ int status;
+ enum { AST_init = 0x0, AST_vnet_alloc = 0x1,
+ AST_mac_alloc = 0x2, AST_read_macaddr = 0x4,
+ AST_vgen_init = 0x8, AST_vptl_alloc = 0x10,
+ AST_fdbh_alloc = 0x20 }
+ attach_state;
+ mac_t *vgenmacp = NULL;
+ uint32_t nfdbh = 0;
+
+ attach_state = AST_init;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ case DDI_PM_RESUME:
+ default:
+ goto vnet_attach_fail;
+ }
+
+ instance = ddi_get_instance(dip);
+ DBG1((NULL, "vnetattach: instance(%d) enter\n", instance));
+
+ /* allocate vnet_t and mac_t structures */
+ vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
+ attach_state |= AST_vnet_alloc;
+
+ macp = kmem_zalloc(sizeof (mac_t), KM_SLEEP);
+ attach_state |= AST_mac_alloc;
+
+ /* setup links to vnet_t from both devinfo and mac_t */
+ ddi_set_driver_private(dip, (caddr_t)vnetp);
+ macp->m_driver = vnetp;
+ vnetp->dip = dip;
+ vnetp->macp = macp;
+ vnetp->instance = instance;
+
+ /* read the mac address */
+ status = vnet_read_mac_address(vnetp);
+ if (status != DDI_SUCCESS) {
+ goto vnet_attach_fail;
+ }
+ attach_state |= AST_read_macaddr;
+
+ /*
+ * Initialize the generic vnet proxy transport. This is the first
+ * and default transport used by vnet. The generic transport
+ * is provided by using sun4v LDC (logical domain channel). On success,
+ * vgen_init() provides a pointer to mac_t of generic transport.
+ * Currently, this generic layer provides network connectivity to other
+ * vnets within ldoms and also to remote hosts oustide ldoms through
+ * the virtual switch (vsw) device on domain0. In the future, when
+ * physical adapters that are able to share their resources (such as
+ * dma channels) with guest domains become available, the vnet device
+ * will use hardware specific driver to communicate directly over the
+ * physical device to reach remote hosts without going through vswitch.
+ */
+ status = vgen_init(vnetp, vnetp->dip, vnetp->macp,
+ (uint8_t *)vnetp->curr_macaddr, &vgenmacp);
+ if (status != DDI_SUCCESS) {
+ DERR((vnetp, "vgen_init() failed\n"));
+ goto vnet_attach_fail;
+ }
+ attach_state |= AST_vgen_init;
+
+ vp_tlp = kmem_zalloc(sizeof (vp_tl_t), KM_SLEEP);
+ vp_tlp->macp = vgenmacp;
+ (void) snprintf(vp_tlp->name, MAXNAMELEN, "%s%u", "vgen", instance);
+ (void) strcpy(vnetp->vgen_name, vp_tlp->name);
+
+ /* add generic transport to the list of vnet proxy transports */
+ vnet_add_vptl(vnetp, vp_tlp);
+ attach_state |= AST_vptl_alloc;
+
+ nfdbh = vnet_nfdb_hash;
+ if ((nfdbh < VNET_NFDB_HASH) || (nfdbh > VNET_NFDB_HASH_MAX)) {
+ vnetp->nfdb_hash = VNET_NFDB_HASH;
+ }
+ else
+ vnetp->nfdb_hash = nfdbh;
+
+ /* allocate fdb hash table, with an extra slot for default route */
+ vnetp->fdbhp = kmem_zalloc(sizeof (fdb_fanout_t) *
+ (vnetp->nfdb_hash + 1), KM_SLEEP);
+ attach_state |= AST_fdbh_alloc;
+
+ /* register with MAC layer */
+ status = vnet_mac_register(vnetp);
+ if (status != DDI_SUCCESS) {
+ goto vnet_attach_fail;
+ }
+
+ /* add to the list of vnet devices */
+ WRITE_ENTER(&vnet_rw);
+ vnetp->nextp = vnet_headp;
+ vnet_headp = vnetp;
+ RW_EXIT(&vnet_rw);
+
+ DBG1((NULL, "vnetattach: instance(%d) exit\n", instance));
+ return (DDI_SUCCESS);
+
+vnet_attach_fail:
+ if (attach_state & AST_fdbh_alloc) {
+ kmem_free(vnetp->fdbhp,
+ sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
+ }
+ if (attach_state & AST_vptl_alloc) {
+ WRITE_ENTER(&vnetp->trwlock);
+ vnet_del_vptl(vnetp, vp_tlp);
+ RW_EXIT(&vnetp->trwlock);
+ }
+ if (attach_state & AST_vgen_init) {
+ vgen_uninit(vgenmacp->m_driver);
+ }
+ if (attach_state & AST_mac_alloc) {
+ KMEM_FREE(macp);
+ }
+ if (attach_state & AST_vnet_alloc) {
+ KMEM_FREE(vnetp);
+ }
+ return (DDI_FAILURE);
+}
+
+/*
+ * detach(9E): detach a device from the system.
+ */
+static int
+vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ vnet_t *vnetp;
+ vnet_t **vnetpp;
+ vp_tl_t *vp_tlp;
+ int instance;
+
+ instance = ddi_get_instance(dip);
+ DBG1((NULL, "vnetdetach: instance(%d) enter\n", instance));
+
+ vnetp = ddi_get_driver_private(dip);
+ if (vnetp == NULL) {
+ goto vnet_detach_fail;
+ }
+
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ case DDI_PM_SUSPEND:
+ default:
+ goto vnet_detach_fail;
+ }
+
+ /*
+ * Unregister from the MAC subsystem. This can fail, in
+ * particular if there are DLPI style-2 streams still open -
+ * in which case we just return failure.
+ */
+ if (mac_unregister(vnetp->macp) != 0)
+ goto vnet_detach_fail;
+
+ /* unlink from instance(vnet_t) list */
+ WRITE_ENTER(&vnet_rw);
+ for (vnetpp = &vnet_headp; *vnetpp; vnetpp = &(*vnetpp)->nextp) {
+ if (*vnetpp == vnetp) {
+ *vnetpp = vnetp->nextp;
+ break;
+ }
+ }
+ RW_EXIT(&vnet_rw);
+
+ /* uninit and free vnet proxy transports */
+ WRITE_ENTER(&vnetp->trwlock);
+ while ((vp_tlp = vnetp->tlp) != NULL) {
+ if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
+ /* uninitialize generic transport */
+ vgen_uninit(vp_tlp->macp->m_driver);
+ }
+ vnet_del_vptl(vnetp, vp_tlp);
+ }
+ RW_EXIT(&vnetp->trwlock);
+
+ KMEM_FREE(vnetp->macp);
+ KMEM_FREE(vnetp);
+
+ return (DDI_SUCCESS);
+
+vnet_detach_fail:
+ return (DDI_FAILURE);
+}
+
+/* enable the device for transmit/receive */
+static int
+vnet_m_start(void *arg)
+{
+ vnet_t *vnetp = arg;
+ vp_tl_t *vp_tlp;
+ mac_t *vp_macp;
+
+ DBG1((vnetp, "vnet_m_start: enter\n"));
+
+ /*
+ * XXX
+ * Currently, we only have generic transport. m_start() invokes
+ * vgen_start() which enables ports/channels in vgen and
+ * initiates handshake with peer vnets and vsw. In the future when we
+ * have support for hardware specific transports, this information
+ * needs to be propagted back to vnet from vgen and we need to revisit
+ * this code (see comments in vnet_attach()).
+ *
+ */
+ WRITE_ENTER(&vnetp->trwlock);
+ for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
+ vp_macp = vp_tlp->macp;
+ vp_macp->m_start(vp_macp->m_driver);
+ }
+ RW_EXIT(&vnetp->trwlock);
+
+ DBG1((vnetp, "vnet_m_start: exit\n"));
+ return (VNET_SUCCESS);
+
+}
+
+/* stop transmit/receive for the device */
+static void
+vnet_m_stop(void *arg)
+{
+ vnet_t *vnetp = arg;
+ vp_tl_t *vp_tlp;
+ mac_t *vp_macp;
+
+ DBG1((vnetp, "vnet_m_stop: enter\n"));
+
+ WRITE_ENTER(&vnetp->trwlock);
+ for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
+ vp_macp = vp_tlp->macp;
+ vp_macp->m_stop(vp_macp->m_driver);
+ }
+ RW_EXIT(&vnetp->trwlock);
+
+ DBG1((vnetp, "vnet_m_stop: exit\n"));
+}
+
+/* set the unicast mac address of the device */
+static int
+vnet_m_unicst(void *arg, const uint8_t *macaddr)
+{
+ _NOTE(ARGUNUSED(macaddr))
+
+ vnet_t *vnetp = arg;
+
+ DBG1((vnetp, "vnet_m_unicst: enter\n"));
+ /*
+ * XXX: setting mac address dynamically is not supported.
+ */
+#if 0
+ bcopy(macaddr, vnetp->curr_macaddr, ETHERADDRL);
+#endif
+ DBG1((vnetp, "vnet_m_unicst: exit\n"));
+
+ return (VNET_SUCCESS);
+}
+
+/* enable/disable a multicast address */
+static int
+vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
+{
+ _NOTE(ARGUNUSED(add, mca))
+
+ vnet_t *vnetp = arg;
+ vp_tl_t *vp_tlp;
+ mac_t *vp_macp;
+ int rv = VNET_SUCCESS;
+
+ DBG1((vnetp, "vnet_m_multicst: enter\n"));
+ READ_ENTER(&vnetp->trwlock);
+ for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
+ if (strcmp(vnetp->vgen_name, vp_tlp->name) == 0) {
+ vp_macp = vp_tlp->macp;
+ rv = vp_macp->m_multicst(vp_macp->m_driver, add, mca);
+ break;
+ }
+ }
+ RW_EXIT(&vnetp->trwlock);
+ DBG1((vnetp, "vnet_m_multicst: exit\n"));
+ return (rv);
+}
+
+/* set or clear promiscuous mode on the device */
+static int
+vnet_m_promisc(void *arg, boolean_t on)
+{
+ _NOTE(ARGUNUSED(on))
+
+ vnet_t *vnetp = arg;
+ DBG1((vnetp, "vnet_m_promisc: enter\n"));
+ /*
+ * XXX: setting promiscuous mode is not supported, just return success.
+ */
+ DBG1((vnetp, "vnet_m_promisc: exit\n"));
+ return (VNET_SUCCESS);
+}
+
+/*
+ * Transmit a chain of packets. This function provides switching functionality
+ * based on the destination mac address to reach other guests (within ldoms) or
+ * external hosts.
+ */
+mblk_t *
+vnet_m_tx(void *arg, mblk_t *mp)
+{
+ vnet_t *vnetp;
+ mblk_t *next;
+ uint32_t fdbhash;
+ fdb_t *fdbp;
+ fdb_fanout_t *fdbhp;
+ struct ether_header *ehp;
+ uint8_t *macaddr;
+ mblk_t *resid_mp;
+
+ vnetp = (vnet_t *)arg;
+ DBG1((vnetp, "vnet_m_tx: enter\n"));
+ ASSERT(mp != NULL);
+
+ while (mp != NULL) {
+ next = mp->b_next;
+ mp->b_next = NULL;
+
+ /* get the destination mac address in the eth header */
+ ehp = (struct ether_header *)mp->b_rptr;
+ macaddr = (uint8_t *)&ehp->ether_dhost;
+
+ /* Calculate hash value and fdb fanout */
+ fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
+ fdbhp = &(vnetp->fdbhp[fdbhash]);
+
+ READ_ENTER(&fdbhp->rwlock);
+ fdbp = vnet_lookup_fdb(fdbhp, macaddr);
+ if (fdbp) {
+ /*
+ * If the destination is in FDB, the destination is
+ * a vnet device within ldoms and directly reachable,
+ * invoke the tx function in the fdb entry.
+ */
+ resid_mp = fdbp->m_tx(fdbp->txarg, mp);
+ if (resid_mp != NULL) {
+ /* m_tx failed */
+ mp->b_next = next;
+ RW_EXIT(&fdbhp->rwlock);
+ break;
+ }
+ RW_EXIT(&fdbhp->rwlock);
+ } else {
+ /* destination is not in FDB */
+ RW_EXIT(&fdbhp->rwlock);
+ /*
+ * If the destination is broadcast/multicast
+ * or an unknown unicast address, forward the
+ * packet to vsw, using the last slot in fdb which is
+ * reserved for default route.
+ */
+ fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
+ READ_ENTER(&fdbhp->rwlock);
+ fdbp = fdbhp->headp;
+ if (fdbp) {
+ resid_mp = fdbp->m_tx(fdbp->txarg, mp);
+ if (resid_mp != NULL) {
+ /* m_tx failed */
+ mp->b_next = next;
+ RW_EXIT(&fdbhp->rwlock);
+ break;
+ }
+ } else {
+ /* drop the packet */
+ freemsg(mp);
+ }
+ RW_EXIT(&fdbhp->rwlock);
+ }
+
+ mp = next;
+ }
+
+ DBG1((vnetp, "vnet_m_tx: exit\n"));
+ return (mp);
+}
+
+/* register resources with mac layer */
+static void
+vnet_m_resources(void *arg)
+{
+ vnet_t *vnetp = arg;
+ vp_tl_t *vp_tlp;
+ mac_t *vp_macp;
+
+ DBG1((vnetp, "vnet_m_resources: enter\n"));
+
+ WRITE_ENTER(&vnetp->trwlock);
+ for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
+ vp_macp = vp_tlp->macp;
+ vp_macp->m_resources(vp_macp->m_driver);
+ }
+ RW_EXIT(&vnetp->trwlock);
+
+ DBG1((vnetp, "vnet_m_resources: exit\n"));
+}
+
+/*
+ * vnet specific ioctls
+ */
+static void
+vnet_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ struct iocblk *iocp;
+ int cmd;
+
+ DBG1((vnetp, "vnet_m_ioctl: enter\n"));
+
+ iocp = (struct iocblk *)mp->b_rptr;
+ iocp->ioc_error = 0;
+ cmd = iocp->ioc_cmd;
+ switch (cmd) {
+ default:
+ miocnak(wq, mp, 0, EINVAL);
+ break;
+ }
+ DBG1((vnetp, "vnet_m_ioctl: exit\n"));
+}
+
+/* get statistics from the device */
+uint64_t
+vnet_m_stat(void *arg, enum mac_stat stat)
+{
+ vnet_t *vnetp = arg;
+ vp_tl_t *vp_tlp;
+ mac_t *vp_macp;
+ uint64_t val = 0;
+
+ DBG1((vnetp, "vnet_m_stat: enter\n"));
+
+ /*
+ * get the specified statistic from each transport
+ * and return the aggregate val
+ */
+ READ_ENTER(&vnetp->trwlock);
+ for (vp_tlp = vnetp->tlp; vp_tlp != NULL; vp_tlp = vp_tlp->nextp) {
+ vp_macp = vp_tlp->macp;
+ val += vp_macp->m_stat(vp_macp->m_driver, stat);
+ }
+ RW_EXIT(&vnetp->trwlock);
+
+ DBG1((vnetp, "vnet_m_stat: exit\n"));
+ return (val);
+}
+
+/* wrapper function for mac_register() */
+static int
+vnet_mac_register(vnet_t *vnetp)
+{
+ mac_info_t *mip;
+ mac_t *macp;
+
+ macp = vnetp->macp;
+
+ mip = &(macp->m_info);
+ mip->mi_media = DL_ETHER;
+ mip->mi_sdu_min = 0;
+ mip->mi_sdu_max = ETHERMTU;
+ mip->mi_cksum = 0;
+ mip->mi_poll = 0; /* DL_CAPAB_POLL ? */
+ mip->mi_addr_length = ETHERADDRL;
+ bcopy(&etherbroadcastaddr, mip->mi_brdcst_addr, ETHERADDRL);
+ bcopy(vnetp->curr_macaddr, mip->mi_unicst_addr, ETHERADDRL);
+
+ MAC_STAT_MIB(mip->mi_stat);
+ mip->mi_stat[MAC_STAT_UNKNOWNS] = B_FALSE;
+ MAC_STAT_ETHER(mip->mi_stat);
+ mip->mi_stat[MAC_STAT_SQE_ERRORS] = B_FALSE;
+ mip->mi_stat[MAC_STAT_MACRCV_ERRORS] = B_FALSE;
+
+ macp->m_stat = vnet_m_stat;
+ macp->m_start = vnet_m_start;
+ macp->m_stop = vnet_m_stop;
+ macp->m_promisc = vnet_m_promisc;
+ macp->m_multicst = vnet_m_multicst;
+ macp->m_unicst = vnet_m_unicst;
+ macp->m_resources = vnet_m_resources;
+ macp->m_ioctl = vnet_m_ioctl;
+ macp->m_tx = vnet_m_tx;
+
+ macp->m_dip = vnetp->dip;
+ macp->m_ident = MAC_IDENT;
+
+ /*
+ * Finally, we're ready to register ourselves with the MAC layer
+ * interface; if this succeeds, we're all ready to start()
+ */
+ if (mac_register(macp) != 0) {
+ KMEM_FREE(macp);
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/* add vp_tl to the list */
+static void
+vnet_add_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
+{
+ vp_tl_t *ttlp;
+
+ WRITE_ENTER(&vnetp->trwlock);
+ if (vnetp->tlp == NULL) {
+ vnetp->tlp = vp_tlp;
+ } else {
+ ttlp = vnetp->tlp;
+ while (ttlp->nextp)
+ ttlp = ttlp->nextp;
+ ttlp->nextp = vp_tlp;
+ }
+ RW_EXIT(&vnetp->trwlock);
+}
+
+/* remove vp_tl from the list */
+static void
+vnet_del_vptl(vnet_t *vnetp, vp_tl_t *vp_tlp)
+{
+ vp_tl_t *ttlp, **pretlp;
+ boolean_t found = B_FALSE;
+
+ pretlp = &vnetp->tlp;
+ ttlp = *pretlp;
+ while (ttlp) {
+ if (ttlp == vp_tlp) {
+ found = B_TRUE;
+ (*pretlp) = ttlp->nextp;
+ ttlp->nextp = NULL;
+ break;
+ }
+ pretlp = &(ttlp->nextp);
+ ttlp = *pretlp;
+ }
+
+ if (found) {
+ KMEM_FREE(vp_tlp);
+ }
+}
+
+/* get vp_tl corresponding to the given name */
+static vp_tl_t *
+vnet_get_vptl(vnet_t *vnetp, const char *name)
+{
+ vp_tl_t *tlp;
+
+ tlp = vnetp->tlp;
+ while (tlp) {
+ if (strcmp(tlp->name, name) == 0) {
+ return (tlp);
+ }
+ tlp = tlp->nextp;
+ }
+ DWARN((vnetp,
+ "vnet_get_vptl: can't find vp_tl with name (%s)\n", name));
+ return (NULL);
+}
+
+/* read the mac address of the device */
+static int
+vnet_read_mac_address(vnet_t *vnetp)
+{
+ uchar_t *macaddr;
+ uint32_t size;
+ int rv;
+
+ rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
+ DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
+ if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
+ DWARN((vnetp,
+ "vnet_read_mac_address: prop_lookup failed (%s) err (%d)\n",
+ macaddr_propname, rv));
+ return (DDI_FAILURE);
+ }
+ bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
+ bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
+ ddi_prop_free(macaddr);
+
+ return (DDI_SUCCESS);
+}
+
+
+/*
+ * Functions below are called only by generic transport to add/remove/modify
+ * entries in forwarding database. See comments in vgen_port_init(vnet_gen.c).
+ */
+
+/* add an entry into the forwarding database */
+void
+vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ uint32_t fdbhash;
+ fdb_t *fdbp;
+ fdb_fanout_t *fdbhp;
+
+ /* Calculate hash value and fdb fanout */
+ fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
+ fdbhp = &(vnetp->fdbhp[fdbhash]);
+
+ WRITE_ENTER(&fdbhp->rwlock);
+
+ fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
+ if (fdbp == NULL) {
+ RW_EXIT(&fdbhp->rwlock);
+ return;
+ }
+ bcopy(macaddr, (caddr_t)fdbp->macaddr, ETHERADDRL);
+ fdbp->m_tx = m_tx;
+ fdbp->txarg = txarg;
+ fdbp->nextp = fdbhp->headp;
+ fdbhp->headp = fdbp;
+
+ RW_EXIT(&fdbhp->rwlock);
+}
+
+/* delete an entry from the forwarding database */
+void
+vnet_del_fdb(void *arg, uint8_t *macaddr)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ uint32_t fdbhash;
+ fdb_t *fdbp;
+ fdb_t **pfdbp;
+ fdb_fanout_t *fdbhp;
+
+ /* Calculate hash value and fdb fanout */
+ fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
+ fdbhp = &(vnetp->fdbhp[fdbhash]);
+
+ WRITE_ENTER(&fdbhp->rwlock);
+
+ for (pfdbp = &fdbhp->headp; (fdbp = *pfdbp) != NULL;
+ pfdbp = &fdbp->nextp) {
+ if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
+ /* Unlink it from the list */
+ *pfdbp = fdbp->nextp;
+ KMEM_FREE(fdbp);
+ break;
+ }
+ }
+
+ RW_EXIT(&fdbhp->rwlock);
+}
+
+/* modify an existing entry in the forwarding database */
+void
+vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ uint32_t fdbhash;
+ fdb_t *fdbp;
+ fdb_fanout_t *fdbhp;
+
+ /* Calculate hash value and fdb fanout */
+ fdbhash = MACHASH(macaddr, vnetp->nfdb_hash);
+ fdbhp = &(vnetp->fdbhp[fdbhash]);
+
+ WRITE_ENTER(&fdbhp->rwlock);
+
+ for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
+ if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
+ /* change the entry to have new tx params */
+ fdbp->m_tx = m_tx;
+ fdbp->txarg = txarg;
+ break;
+ }
+ }
+
+ RW_EXIT(&fdbhp->rwlock);
+}
+
+/* look up an fdb entry based on the mac address, caller holds lock */
+static fdb_t *
+vnet_lookup_fdb(fdb_fanout_t *fdbhp, uint8_t *macaddr)
+{
+ fdb_t *fdbp = NULL;
+
+ for (fdbp = fdbhp->headp; fdbp != NULL; fdbp = fdbp->nextp) {
+ if (bcmp(fdbp->macaddr, macaddr, ETHERADDRL) == 0) {
+ break;
+ }
+ }
+
+ return (fdbp);
+}
+
+/* add default route entry into the forwarding database */
+void
+vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ fdb_t *fdbp;
+ fdb_fanout_t *fdbhp;
+
+ /*
+ * The last hash list is reserved for default route entry,
+ * and for now, we have only one entry in this list.
+ */
+ fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
+
+ WRITE_ENTER(&fdbhp->rwlock);
+
+ if (fdbhp->headp) {
+ DWARN((vnetp,
+ "vnet_add_def_rte: default rte already exists\n"));
+ RW_EXIT(&fdbhp->rwlock);
+ return;
+ }
+ fdbp = kmem_zalloc(sizeof (fdb_t), KM_NOSLEEP);
+ if (fdbp == NULL) {
+ RW_EXIT(&fdbhp->rwlock);
+ return;
+ }
+ bzero(fdbp->macaddr, ETHERADDRL);
+ fdbp->m_tx = m_tx;
+ fdbp->txarg = txarg;
+ fdbp->nextp = NULL;
+ fdbhp->headp = fdbp;
+
+ RW_EXIT(&fdbhp->rwlock);
+}
+
+/* delete default route entry from the forwarding database */
+void
+vnet_del_def_rte(void *arg)
+{
+ vnet_t *vnetp = (vnet_t *)arg;
+ fdb_t *fdbp;
+ fdb_fanout_t *fdbhp;
+
+ /*
+ * The last hash list is reserved for default route entry,
+ * and for now, we have only one entry in this list.
+ */
+ fdbhp = &(vnetp->fdbhp[vnetp->nfdb_hash]);
+
+ WRITE_ENTER(&fdbhp->rwlock);
+
+ if (fdbhp->headp == NULL) {
+ RW_EXIT(&fdbhp->rwlock);
+ return;
+ }
+ fdbp = fdbhp->headp;
+ KMEM_FREE(fdbp);
+ fdbhp->headp = NULL;
+
+ RW_EXIT(&fdbhp->rwlock);
+}
diff --git a/usr/src/uts/sun4v/io/vnet_gen.c b/usr/src/uts/sun4v/io/vnet_gen.c
new file mode 100644
index 0000000000..56f753e5e7
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vnet_gen.c
@@ -0,0 +1,4899 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/stream.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/ksynch.h>
+#include <sys/stat.h>
+#include <sys/modctl.h>
+#include <sys/debug.h>
+#include <sys/ethernet.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/strsun.h>
+#include <sys/note.h>
+#include <sys/mac.h>
+#include <sys/ldc.h>
+#include <sys/mach_descrip.h>
+#include <sys/mdeg.h>
+#include <sys/vio_mailbox.h>
+#include <sys/vio_common.h>
+#include <sys/vnet_common.h>
+#include <sys/vnet_gen.h>
+#include <sys/vnet_mailbox.h>
+
+/*
+ * Implementation of the mac functionality for vnet using the
+ * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
+ */
+
+/*
+ * Function prototypes.
+ */
+/* vgen proxy entry points */
+int vgen_init(void *vnetp, dev_info_t *vnetdip, void *vnetmacp,
+ const uint8_t *macaddr, mac_t **vgenmacp);
+void vgen_uninit(void *arg);
+static int vgen_start(void *arg);
+static void vgen_stop(void *arg);
+static mblk_t *vgen_tx(void *arg, mblk_t *mp);
+static void vgen_resources(void *arg);
+static int vgen_multicst(void *arg, boolean_t add,
+ const uint8_t *mca);
+static int vgen_promisc(void *arg, boolean_t on);
+static int vgen_unicst(void *arg, const uint8_t *mca);
+static uint64_t vgen_stat(void *arg, enum mac_stat stat);
+static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp);
+
+/* externs - functions provided by vnet to add/remove/modify entries in fdb */
+void vnet_add_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
+void vnet_del_fdb(void *arg, uint8_t *macaddr);
+void vnet_modify_fdb(void *arg, uint8_t *macaddr, mac_tx_t m_tx, void *txarg);
+void vnet_add_def_rte(void *arg, mac_tx_t m_tx, void *txarg);
+void vnet_del_def_rte(void *arg);
+
+/* vgen internal functions */
+static void vgen_detach_ports(vgen_t *vgenp);
+static void vgen_port_detach(vgen_port_t *portp);
+static void vgen_port_list_insert(vgen_port_t *portp);
+static void vgen_port_list_remove(vgen_port_t *portp);
+static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
+ int port_num);
+static int vgen_mdeg_reg(vgen_t *vgenp);
+static void vgen_mdeg_unreg(vgen_t *vgenp);
+static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
+static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
+static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
+static int vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
+ int num_ids, struct ether_addr *macaddr, boolean_t vsw_port);
+static void vgen_port_detach_mdeg(vgen_port_t *portp);
+static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
+ mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
+static uint64_t vgen_port_stat(vgen_port_t *portp, enum mac_stat stat);
+
+static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
+static void vgen_ldc_detach(vgen_ldc_t *ldcp);
+static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp);
+static void vgen_free_tx_ring(vgen_ldc_t *ldcp);
+static void vgen_init_ports(vgen_t *vgenp);
+static void vgen_port_init(vgen_port_t *portp);
+static void vgen_uninit_ports(vgen_t *vgenp);
+static void vgen_port_uninit(vgen_port_t *portp);
+static void vgen_init_ldcs(vgen_port_t *portp);
+static void vgen_uninit_ldcs(vgen_port_t *portp);
+static int vgen_ldc_init(vgen_ldc_t *ldcp);
+static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
+static int vgen_init_tbufs(vgen_ldc_t *ldcp);
+static void vgen_uninit_tbufs(vgen_ldc_t *ldcp);
+static void vgen_clobber_tbufs(vgen_ldc_t *ldcp);
+static void vgen_clobber_rxds(vgen_ldc_t *ldcp);
+static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, enum mac_stat stat);
+static void vgen_init_macp(vgen_t *vgenp, mac_t *macp);
+static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
+static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
+static int vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp);
+static void vgen_reclaim(vgen_ldc_t *ldcp);
+static void vgen_reclaim_dring(vgen_ldc_t *ldcp);
+static int vgen_num_txpending(vgen_ldc_t *ldcp);
+static int vgen_tx_dring_full(vgen_ldc_t *ldcp);
+static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp);
+static void vgen_ldc_watchdog(void *arg);
+static void vgen_copymsg(mblk_t *mp, void *bufp);
+static int vgen_setup_kstats(vgen_ldc_t *ldcp);
+static void vgen_destroy_kstats(vgen_ldc_t *ldcp);
+static int vgen_kstat_update(kstat_t *ksp, int rw);
+
+/* vgen handshake functions */
+static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
+static int vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
+ uint16_t ver_minor);
+static int vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp);
+static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen,
+ boolean_t caller_holds_lock);
+static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
+static int vgen_send_attr_info(vgen_ldc_t *ldcp);
+static int vgen_send_dring_reg(vgen_ldc_t *ldcp);
+static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
+static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start,
+ uint32_t end, uint64_t next_txseq);
+static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
+static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
+static void vgen_handshake_reset(vgen_ldc_t *ldcp);
+static void vgen_reset_hphase(vgen_ldc_t *ldcp);
+static void vgen_handshake(vgen_ldc_t *ldcp);
+static int vgen_handshake_done(vgen_ldc_t *ldcp);
+static void vgen_handshake_retry(vgen_ldc_t *ldcp);
+static void vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
+ vio_msg_tag_t *tagp);
+static void vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static void vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static void vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static void vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static void vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static void vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
+ mblk_t **headp, mblk_t **tailp);
+static void vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
+ mblk_t **headp, mblk_t **tailp);
+static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
+static uint64_t vgen_macaddr_strtoul(const uint8_t *macaddr);
+static int vgen_macaddr_ultostr(uint64_t value, uint8_t *macaddr);
+static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf);
+static void vgen_hwatchdog(void *arg);
+static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint);
+static void vgen_print_hparams(vgen_hparams_t *hp);
+static void vgen_print_ldcinfo(vgen_ldc_t *ldcp);
+
+/*
+ * The handshake process consists of 5 phases defined below, with VH_PHASE0
+ * being the pre-handshake phase and VH_DONE is the phase to indicate
+ * successful completion of all phases.
+ * Each phase may have one to several handshake states which are required
+ * to complete successfully to move to the next phase.
+ * Refer to the functions vgen_handshake() and vgen_handshake_done() for
+ * more details.
+ */
+/* handshake phases */
+enum { VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 };
+
+/* handshake states */
+enum {
+
+ VER_INFO_SENT = 0x1,
+ VER_ACK_RCVD = 0x2,
+ VER_INFO_RCVD = 0x4,
+ VER_ACK_SENT = 0x8,
+ VER_NEGOTIATED = (VER_ACK_RCVD | VER_ACK_SENT),
+
+ ATTR_INFO_SENT = 0x10,
+ ATTR_ACK_RCVD = 0x20,
+ ATTR_INFO_RCVD = 0x40,
+ ATTR_ACK_SENT = 0x80,
+ ATTR_INFO_EXCHANGED = (ATTR_ACK_RCVD | ATTR_ACK_SENT),
+
+ DRING_INFO_SENT = 0x100,
+ DRING_ACK_RCVD = 0x200,
+ DRING_INFO_RCVD = 0x400,
+ DRING_ACK_SENT = 0x800,
+ DRING_INFO_EXCHANGED = (DRING_ACK_RCVD | DRING_ACK_SENT),
+
+ RDX_INFO_SENT = 0x1000,
+ RDX_ACK_RCVD = 0x2000,
+ RDX_INFO_RCVD = 0x4000,
+ RDX_ACK_SENT = 0x8000,
+ RDX_EXCHANGED = (RDX_ACK_RCVD | RDX_ACK_SENT)
+
+};
+
+#define LDC_LOCK(ldcp) \
+ mutex_enter(&((ldcp)->cblock));\
+ mutex_enter(&((ldcp)->txlock));\
+ mutex_enter(&((ldcp)->tclock));
+#define LDC_UNLOCK(ldcp) \
+ mutex_exit(&((ldcp)->tclock));\
+ mutex_exit(&((ldcp)->txlock));\
+ mutex_exit(&((ldcp)->cblock));
+
+static struct ether_addr etherbroadcastaddr = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+/*
+ * MIB II broadcast/multicast packets
+ */
+#define IS_BROADCAST(ehp) \
+ (ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
+#define IS_MULTICAST(ehp) \
+ ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
+
+/*
+ * Property names
+ */
+static char macaddr_propname[] = "mac-address";
+static char rmacaddr_propname[] = "remote-mac-address";
+static char channel_propname[] = "channel-endpoint";
+static char reg_propname[] = "reg";
+static char port_propname[] = "port";
+static char swport_propname[] = "switch-port";
+static char id_propname[] = "id";
+
+/* versions supported - in decreasing order */
+static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 0} };
+
+/* Tunables */
+uint32_t vgen_hwd_interval = 1000; /* handshake watchdog freq in msec */
+uint32_t vgen_max_hretries = 1; /* max # of handshake retries */
+
+uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */
+
+#ifdef DEBUG
+/* flags to simulate error conditions for debugging */
+int vgen_trigger_txtimeout = 0;
+int vgen_trigger_rxlost = 0;
+#endif
+
+/* MD update matching structure */
+static md_prop_match_t vport_prop_match[] = {
+ { MDET_PROP_VAL, "id" },
+ { MDET_LIST_END, NULL }
+};
+
+static mdeg_node_match_t vport_match = { "virtual-device-port",
+ vport_prop_match };
+
+/* template for matching a particular vnet instance */
+static mdeg_prop_spec_t vgen_prop_template[] = {
+ { MDET_PROP_STR, "name", "network" },
+ { MDET_PROP_VAL, "cfg-handle", NULL },
+ { MDET_LIST_END, NULL, NULL }
+};
+
+#define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val)
+
+static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
+
+/* externs */
+extern uint32_t vnet_ntxds;
+extern uint32_t vnet_reclaim_lowat;
+extern uint32_t vnet_reclaim_hiwat;
+extern uint32_t vnet_ldcwd_interval;
+extern uint32_t vnet_ldcwd_txtimeout;
+extern uint32_t vnet_ldc_qlen;
+extern int _vnet_dbglevel;
+extern void _vnetdebug_printf(void *vnetp, const char *fmt, ...);
+
+#ifdef DEBUG
+
+/*
+ * XXX: definitions below need to be in sync with those in vnet.c
+ */
+
+/*
+ * debug levels:
+ * DBG_LEVEL1: Function entry/exit tracing
+ * DBG_LEVEL2: Info messages
+ * DBG_LEVEL3: Warning messages
+ * DBG_LEVEL4: Error messages
+ */
+
+enum { DBG_LEVEL1 = 0x01, DBG_LEVEL2 = 0x02, DBG_LEVEL3 = 0x04,
+ DBG_LEVEL4 = 0x08 };
+
+#define DBG1(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL1) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#define DBG2(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL2) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#define DWARN(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL3) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#define DERR(_s) do { \
+ if ((_vnet_dbglevel & DBG_LEVEL4) != 0) { \
+ _vnetdebug_printf _s; \
+ } \
+ _NOTE(CONSTCOND) } while (0)
+
+#else
+
+#define DBG1(_s) if (0) _vnetdebug_printf _s
+#define DBG2(_s) if (0) _vnetdebug_printf _s
+#define DWARN(_s) if (0) _vnetdebug_printf _s
+#define DERR(_s) if (0) _vnetdebug_printf _s
+
+#endif
+
+#ifdef DEBUG
+
+/* simulate handshake error conditions for debug */
+uint32_t vgen_hdbg;
+#define HDBG_VERSION 0x1
+#define HDBG_TIMEOUT 0x2
+#define HDBG_BAD_SID 0x4
+#define HDBG_OUT_STATE 0x8
+
+#if 0
+/* debug version negotiation, need to redefine VGEN_NUM_VER */
+vgen_ver_t dbg_vgen_versions[VGEN_NUM_VER] =
+ { {5, 0}, {3, 0}, {2, 1}, {1, 2}, {1, 1} };
+#endif
+
+#endif
+
+/*
+ * vgen_init() is called by an instance of vnet driver to initialize the
+ * corresponding generic proxy transport layer. The arguments passed by vnet
+ * are - an opaque pointer to the vnet instance, pointers to dev_info_t and
+ * mac_t of the vnet device, mac address of the vnet device, and a pointer to
+ * the mac_t of the generic transport is returned in the last argument.
+ */
+int
+vgen_init(void *vnetp, dev_info_t *vnetdip, void *vnetmacp,
+ const uint8_t *macaddr, mac_t **vgenmacp)
+{
+ vgen_t *vgenp;
+ mac_t *macp;
+ int instance;
+
+ if ((vnetp == NULL) || (vnetdip == NULL) ||(vnetmacp == NULL))
+ return (DDI_FAILURE);
+
+ instance = ddi_get_instance(vnetdip);
+
+ DBG1((vnetp, "vgen_init: enter vnet_instance(%d)\n", instance));
+
+ vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
+
+ vgenp->vnetp = vnetp;
+ vgenp->vnetdip = vnetdip;
+ vgenp->vnetmacp = vnetmacp;
+ bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
+
+ /* allocate multicast table */
+ vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
+ sizeof (struct ether_addr), KM_SLEEP);
+ vgenp->mccount = 0;
+ vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
+
+ mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* register with MD event generator */
+ if (vgen_mdeg_reg(vgenp) != DDI_SUCCESS) {
+ mutex_destroy(&vgenp->lock);
+ kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
+ sizeof (struct ether_addr));
+ KMEM_FREE(vgenp);
+ return (DDI_FAILURE);
+ }
+
+ macp = &vgenp->vgenmac;
+ vgen_init_macp(vgenp, macp);
+
+ /* register mac_t of this vgen_t with vnet */
+ *vgenmacp = macp;
+
+ DBG1((vnetp, "vgen_init: exit vnet_instance(%d)\n", instance));
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Called by vnet to undo the initializations done by vgen_init().
+ * The handle provided by generic transport during vgen_init() is the argument.
+ */
+void
+vgen_uninit(void *arg)
+{
+ vgen_t *vgenp = (vgen_t *)arg;
+ void *vnetp;
+ int instance;
+
+ if (vgenp == NULL)
+ return;
+
+ instance = ddi_get_instance(vgenp->vnetdip);
+ vnetp = vgenp->vnetp;
+
+ DBG1((vnetp, "vgen_uninit: enter vnet_instance(%d)\n", instance));
+
+ /* unregister with MD event generator */
+ vgen_mdeg_unreg(vgenp);
+
+ mutex_enter(&vgenp->lock);
+
+ /* detach all ports from the device */
+ vgen_detach_ports(vgenp);
+
+ /* free multicast table */
+ kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
+
+ mutex_exit(&vgenp->lock);
+
+ mutex_destroy(&vgenp->lock);
+
+ KMEM_FREE(vgenp);
+
+ DBG1((vnetp, "vgen_uninit: exit vnet_instance(%d)\n", instance));
+}
+
+/* enable transmit/receive for the device */
+static int
+vgen_start(void *arg)
+{
+ vgen_t *vgenp = (vgen_t *)arg;
+
+ DBG1((vgenp->vnetp, "vgen_start: enter\n"));
+
+ mutex_enter(&vgenp->lock);
+ vgen_init_ports(vgenp);
+ vgenp->flags |= VGEN_STARTED;
+ mutex_exit(&vgenp->lock);
+
+ DBG1((vgenp->vnetp, "vgen_start: exit\n"));
+ return (DDI_SUCCESS);
+}
+
+/* stop transmit/receive */
+static void
+vgen_stop(void *arg)
+{
+ vgen_t *vgenp = (vgen_t *)arg;
+
+ DBG1((vgenp->vnetp, "vgen_stop: enter\n"));
+
+ mutex_enter(&vgenp->lock);
+ vgen_uninit_ports(vgenp);
+ vgenp->flags &= ~(VGEN_STARTED);
+ mutex_exit(&vgenp->lock);
+
+ DBG1((vgenp->vnetp, "vgen_stop: exit\n"));
+}
+
+/* vgen transmit function */
+static mblk_t *
+vgen_tx(void *arg, mblk_t *mp)
+{
+ vgen_port_t *portp;
+ int status;
+
+ portp = (vgen_port_t *)arg;
+ status = vgen_portsend(portp, mp);
+ if (status != VGEN_SUCCESS) {
+ /* failure */
+ return (mp);
+ }
+ /* success */
+ return (NULL);
+}
+
+/* transmit packets over the given port */
+static int
+vgen_portsend(vgen_port_t *portp, mblk_t *mp)
+{
+ vgen_ldclist_t *ldclp;
+ vgen_ldc_t *ldcp;
+ vgen_t *vgenp;
+ int status;
+
+ vgenp = portp->vgenp;
+ ldclp = &portp->ldclist;
+ READ_ENTER(&ldclp->rwlock);
+ /*
+ * XXX - for now, we have a single channel.
+ */
+ if (ldclp->headp == NULL) {
+ DWARN((vgenp->vnetp, "vgen_portsend: dropping packet\n"));
+ RW_EXIT(&ldclp->rwlock);
+ return (VGEN_FAILURE);
+ }
+ ldcp = ldclp->headp;
+
+ if (ldcp->need_resched) {
+ /* out of tx resources, see vgen_ldcsend() for details. */
+ DWARN((vgenp->vnetp, "vgen_portsend: dropping packet...\n"));
+
+ mutex_enter(&ldcp->txlock);
+ ldcp->statsp->tx_no_desc++;
+ mutex_exit(&ldcp->txlock);
+
+ RW_EXIT(&ldclp->rwlock);
+ freemsg(mp);
+ return (VGEN_SUCCESS);
+ }
+
+ status = vgen_ldcsend(ldcp, mp);
+ RW_EXIT(&ldclp->rwlock);
+
+ if (status != VGEN_TX_SUCCESS)
+ return (VGEN_FAILURE);
+
+ return (VGEN_SUCCESS);
+}
+
+/* channel transmit function */
+static int
+vgen_ldcsend(vgen_ldc_t *ldcp, mblk_t *mp)
+{
+ void *vnetp;
+ size_t size;
+ uint64_t datalen;
+ uchar_t *rptr;
+ mblk_t *bp = NULL;
+ int rv;
+ uint32_t i;
+ uint32_t start;
+ uint32_t end;
+ int txpending = 0;
+ int ci;
+ uint32_t ncookies;
+ uint64_t nc;
+ vgen_private_desc_t *tbufp;
+ vgen_private_desc_t *ntbufp;
+ vnet_public_desc_t *txdp;
+ vio_dring_entry_hdr_t *hdrp;
+ vgen_stats_t *statsp;
+ struct ether_header *ehp;
+ boolean_t is_bcast = B_FALSE;
+ boolean_t is_mcast = B_FALSE;
+ boolean_t reclaim = B_FALSE;
+ boolean_t need_intr = B_FALSE;
+ boolean_t err = B_FALSE;
+
+ vnetp = LDC_TO_VNET(ldcp);
+ statsp = ldcp->statsp;
+ DBG1((vnetp, "vgen_ldcsend: enter ldcid(%lx)\n", ldcp->ldc_id));
+
+ /* drop the packet if handshake is not done or ldc is not up */
+ if ((ldcp->hphase != VH_DONE) || (ldcp->ldc_status != LDC_UP)) {
+ DWARN((vnetp,
+ "vgen_ldcsend: id(%lx) status(%d), dropping packet\n",
+ ldcp->ldc_id, ldcp->ldc_status));
+ freemsg(mp);
+ return (VGEN_TX_SUCCESS);
+ }
+
+ size = msgsize(mp);
+ if (size > (size_t)ETHERMAX) {
+ DWARN((vnetp, "vgen_ldcsend: id(%lx) invalid size(%d)\n",
+ ldcp->ldc_id, size));
+ freemsg(mp);
+ return (VGEN_TX_SUCCESS);
+ }
+ if ((size < (size_t)ETHERMIN) || /* needs padding to ETHERMIN */
+ (mp->b_cont) || /* more than 1 mblk */
+ ((uintptr_t)mp->b_rptr & 0x7) || /* data not 8 byte aligned */
+ ((mp->b_wptr - mp->b_rptr) & 0x7)) { /* datalen not multiple of 8 */
+ if (size < ETHERMIN)
+ size = ETHERMIN;
+ /*
+ * The data buffer returned by allocb(9F) is 8byte aligned.
+ * We allocate extra 8 bytes to ensure size is multiple of
+ * 8 bytes for ldc_mem_bind_handle().
+ */
+ bp = allocb(size + 8, BPRI_MED);
+ if (bp == NULL) {
+ /* drop the packet */
+ freemsg(mp);
+ mutex_enter(&ldcp->txlock);
+ statsp->tx_allocb_fail++;
+ mutex_exit(&ldcp->txlock);
+ return (VGEN_TX_SUCCESS);
+ }
+ vgen_copymsg(mp, bp->b_rptr);
+ bp->b_wptr += size;
+ datalen = size; /* actual data length without pad */
+ size = (datalen + 7) & ~7;
+ bp->b_wptr += (size - datalen);
+ } else { /* size/alignment are ok */
+ datalen = size;
+ }
+
+ mutex_enter(&ldcp->txlock);
+
+ /* check if the channel is still up & running */
+ if ((ldcp->hphase != VH_DONE) || (ldcp->ldc_status != LDC_UP)) {
+ DWARN((vnetp,
+ "vgen_ldcsend: id(%lx) status(%d), dropping packet\n",
+ ldcp->ldc_id, ldcp->ldc_status));
+ err = B_TRUE;
+ goto vgen_tx_exit;
+ }
+
+ /*
+ * allocate a descriptor
+ */
+ tbufp = ldcp->next_tbufp;
+ ntbufp = NEXTTBUF(ldcp, tbufp);
+ if (tbufp->flags != VGEN_PRIV_DESC_FREE ||
+ ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
+
+ mutex_enter(&ldcp->tclock);
+ if (ntbufp == ldcp->cur_tbufp)
+ ldcp->need_resched = B_TRUE;
+ mutex_exit(&ldcp->tclock);
+
+ statsp->tx_no_desc++;
+ mutex_exit(&ldcp->txlock);
+ if (bp)
+ freemsg(bp);
+#ifdef VGEN_USE_MAC_TX_UPDATE
+ /*
+ * This cflag is disabled by default. This can be enabled if we
+ * want to return failure to the mac layer when we run out of
+ * descriptors and use mac_tx_update() to restart tx when
+ * descriptors become available. However, stopping tx would
+ * affect traffic going over other ports, as upper mac layer
+ * has no concept of multiple ports within a device.
+ * So currently, to avoid this, drop packets when we run out
+ * of descrs and just return success. See the corresponding
+ * code in vgen_portsend() and vgen_reclaim_dring().
+ */
+ return (VGEN_TX_NORESOURCES);
+#else
+ freemsg(mp); /* drop the packet */
+ return (VGEN_TX_SUCCESS);
+#endif
+ }
+
+ txpending = vgen_num_txpending(ldcp);
+ if (txpending >= ldcp->reclaim_hiwat) {
+ /*
+ * if num of pending transmits is more than hiwat,
+ * reclaim now and also enable ack bit.
+ */
+ reclaim = B_TRUE;
+ need_intr = B_TRUE;
+ } else {
+ if (txpending >= ldcp->reclaim_lowat) {
+ /*
+ * if the num of pending transmits is more than lowat
+ * enable ack bit in the descr and reclaim in intr().
+ */
+ need_intr = B_TRUE;
+ }
+ }
+
+ i = tbufp - ldcp->tbufp;
+
+ rptr = bp ? (bp->b_rptr) : (mp->b_rptr);
+ ci = 0;
+ rv = ldc_mem_bind_handle(tbufp->memhandle, (caddr_t)rptr, size,
+ LDC_SHADOW_MAP, LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies);
+ if (rv != 0) {
+ DWARN((vnetp, "vgen_ldcsend: id(%lx)ldc_mem_bind_handle failed"
+ " rv(%d) tbufi(%d)\n", ldcp->ldc_id, rv, i));
+ err = B_TRUE;
+ statsp->oerrors++;
+ goto vgen_tx_exit;
+ }
+
+ if ((ncookies < 0) || (ncookies > (uint64_t)MAX_COOKIES)) {
+ DWARN((vnetp,
+ "vgen_ldcsend: id(%lx)ldc_mem_bind_handle returned"
+ " invalid cookies (%d)\n", ldcp->ldc_id, ncookies));
+ err = B_TRUE;
+ statsp->oerrors++;
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ goto vgen_tx_exit;
+ }
+
+ if (ncookies > 1) {
+ nc = ncookies - 1;
+ while (nc) {
+ ci++;
+ rv = ldc_mem_nextcookie(tbufp->memhandle,
+ &(tbufp->memcookie[ci]));
+ if (rv != 0) {
+ DWARN((vnetp,
+ "vgen_ldcsend: ldc_mem_nextcookie"
+ " err(%d)\n", rv));
+ err = B_TRUE;
+ statsp->oerrors++;
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ goto vgen_tx_exit;
+ }
+ nc--;
+ }
+ }
+
+ ehp = (struct ether_header *)rptr;
+ is_bcast = IS_BROADCAST(ehp);
+ is_mcast = IS_MULTICAST(ehp);
+ /* save the packet, free when the descr done flag is set */
+ tbufp->mp = (bp ? bp : mp);
+ tbufp->flags = VGEN_PRIV_DESC_BUSY;
+ tbufp->datalen = datalen;
+ tbufp->ncookies = ncookies;
+ tbufp->seqnum = ldcp->next_txseq;
+
+ /* initialize the corresponding public descriptor (txd) */
+ txdp = tbufp->descp;
+ hdrp = &txdp->hdr;
+ hdrp->dstate = VIO_DESC_READY;
+ if (need_intr)
+ hdrp->ack = B_TRUE;
+ txdp->nbytes = datalen;
+ txdp->ncookies = ncookies;
+ bcopy((tbufp->memcookie), (txdp->memcookie),
+ ncookies * sizeof (ldc_mem_cookie_t));
+
+ /* send dring datamsg to the peer */
+ start = end = i;
+ rv = vgen_send_dring_data(ldcp, start, end, ldcp->next_txseq);
+ if (rv != 0) {
+ /* vgen_send_dring_data() error: drop the packet */
+ DWARN((vnetp,
+ "vgen_ldcsend: vgen_send_dring_data(): failed: "
+ "id(%lx) rv(%d) len (%d)\n", ldcp->ldc_id, rv, datalen));
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ tbufp->flags = VGEN_PRIV_DESC_FREE; /* free tbuf */
+ hdrp->dstate = VIO_DESC_FREE; /* free txd */
+ hdrp->ack = B_FALSE;
+ statsp->oerrors++;
+ err = B_TRUE;
+ goto vgen_tx_exit;
+ }
+
+ /* update next available tbuf in the ring */
+ ldcp->next_tbufp = ntbufp;
+ /* update tx seqnum and index */
+ ldcp->next_txseq++;
+ INCR_TXI(ldcp->next_txi, ldcp);
+
+ /* update stats */
+ statsp->opackets++;
+ statsp->obytes += datalen;
+ if (is_bcast)
+ statsp->brdcstxmt++;
+ else if (is_mcast)
+ statsp->multixmt++;
+
+vgen_tx_exit:
+ mutex_exit(&ldcp->txlock);
+
+ if (reclaim) {
+ vgen_reclaim(ldcp);
+ }
+ DBG1((vnetp, "vgen_ldcsend: exit: ldcid (%lx)\n", ldcp->ldc_id));
+
+ if (err) {
+ if (bp)
+ freemsg(bp);
+#ifdef VGEN_USE_MAC_TX_UPDATE
+ return (VGEN_TX_FAILURE); /* transmit failed */
+#else
+ freemsg(mp); /* drop the packet */
+ return (VGEN_TX_SUCCESS);
+#endif
+ } else {
+ if (bp) /* free original pkt, copy is in bp */
+ freemsg(mp);
+ return (VGEN_TX_SUCCESS);
+ }
+}
+
+/* register resources */
+static void
+vgen_resources(void *arg)
+{
+ vgen_t *vgenp;
+ mac_rx_fifo_t mrf;
+
+ vgenp = (vgen_t *)arg;
+ DBG1((vgenp->vnetp, "vgen_resources: enter\n"));
+
+ mrf.mrf_type = MAC_RX_FIFO;
+ mrf.mrf_blank = NULL;
+ mrf.mrf_arg = NULL;
+ mrf.mrf_normal_blank_time = 0;
+ mrf.mrf_normal_pkt_count = 0;
+ vgenp->mrh = mac_resource_add(vgenp->vnetmacp, (mac_resource_t *)&mrf);
+
+ DBG1((vgenp->vnetp, "vgen_resources: exit\n"));
+}
+
+/* enable/disable a multicast address */
+static int
+vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
+{
+ vgen_t *vgenp;
+ vnet_mcast_msg_t mcastmsg;
+ vio_msg_tag_t *tagp;
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+ vgen_ldc_t *ldcp;
+ vgen_ldclist_t *ldclp;
+ void *vnetp;
+ struct ether_addr *addrp;
+ int rv;
+ uint32_t i;
+
+ vgenp = (vgen_t *)arg;
+ vnetp = vgenp->vnetp;
+ addrp = (struct ether_addr *)mca;
+ tagp = &mcastmsg.tag;
+ bzero(&mcastmsg, sizeof (mcastmsg));
+
+ mutex_enter(&vgenp->lock);
+
+ plistp = &(vgenp->vgenports);
+
+ READ_ENTER(&plistp->rwlock);
+
+ portp = vgenp->vsw_portp;
+ if (portp == NULL) {
+ RW_EXIT(&plistp->rwlock);
+ goto vgen_mcast_exit;
+ }
+ ldclp = &portp->ldclist;
+
+ READ_ENTER(&ldclp->rwlock);
+
+ ldcp = ldclp->headp;
+ if (ldcp == NULL) {
+ RW_EXIT(&ldclp->rwlock);
+ RW_EXIT(&plistp->rwlock);
+ goto vgen_mcast_exit;
+ }
+
+ mutex_enter(&ldcp->cblock);
+
+ if (ldcp->hphase == VH_DONE) {
+ /*
+ * If handshake is done, send a msg to vsw to add/remove
+ * the multicast address.
+ */
+ tagp->vio_msgtype = VIO_TYPE_CTRL;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VNET_MCAST_INFO;
+ tagp->vio_sid = ldcp->local_sid;
+ bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
+ mcastmsg.set = add;
+ mcastmsg.count = 1;
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
+ B_FALSE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_mutlicst: vgen_sendmsg failed"
+ "id (%lx)\n", ldcp->ldc_id));
+ }
+ } else {
+ /* set the flag to send a msg to vsw after handshake is done */
+ ldcp->need_mcast_sync = B_TRUE;
+ }
+
+ mutex_exit(&ldcp->cblock);
+
+ if (add) {
+
+ /* expand multicast table if necessary */
+ if (vgenp->mccount >= vgenp->mcsize) {
+ struct ether_addr *newtab;
+ uint32_t newsize;
+
+
+ newsize = vgenp->mcsize * 2;
+
+ newtab = kmem_zalloc(newsize *
+ sizeof (struct ether_addr), KM_NOSLEEP);
+
+ bcopy(vgenp->mctab, newtab, vgenp->mcsize *
+ sizeof (struct ether_addr));
+ kmem_free(vgenp->mctab,
+ vgenp->mcsize * sizeof (struct ether_addr));
+
+ vgenp->mctab = newtab;
+ vgenp->mcsize = newsize;
+ }
+
+ /* add address to the table */
+ vgenp->mctab[vgenp->mccount++] = *addrp;
+
+ } else {
+
+ /* delete address from the table */
+ for (i = 0; i < vgenp->mccount; i++) {
+ if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
+
+ /*
+ * If there's more than one address in this
+ * table, delete the unwanted one by moving
+ * the last one in the list over top of it;
+ * otherwise, just remove it.
+ */
+ if (vgenp->mccount > 1) {
+ vgenp->mctab[i] =
+ vgenp->mctab[vgenp->mccount-1];
+ }
+ vgenp->mccount--;
+ break;
+ }
+ }
+ }
+
+ RW_EXIT(&ldclp->rwlock);
+ RW_EXIT(&plistp->rwlock);
+
+vgen_mcast_exit:
+ mutex_exit(&vgenp->lock);
+ return (DDI_SUCCESS);
+}
+
+/* set or clear promiscuous mode on the device */
+static int
+vgen_promisc(void *arg, boolean_t on)
+{
+ _NOTE(ARGUNUSED(arg, on))
+ return (DDI_SUCCESS);
+}
+
+/* set the unicast mac address of the device */
+static int
+vgen_unicst(void *arg, const uint8_t *mca)
+{
+ _NOTE(ARGUNUSED(arg, mca))
+ return (DDI_SUCCESS);
+}
+
+/* get device statistics */
+static uint64_t
+vgen_stat(void *arg, enum mac_stat stat)
+{
+ vgen_t *vgenp = (vgen_t *)arg;
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+ uint64_t val;
+
+ val = 0;
+
+ plistp = &(vgenp->vgenports);
+ READ_ENTER(&plistp->rwlock);
+
+ for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
+ val += vgen_port_stat(portp, stat);
+ }
+
+ RW_EXIT(&plistp->rwlock);
+
+ return (val);
+}
+
+static void
+vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp)
+{
+ _NOTE(ARGUNUSED(arg, wq, mp))
+}
+
+/* vgen internal functions */
+/* detach all ports from the device */
+static void
+vgen_detach_ports(vgen_t *vgenp)
+{
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+
+ plistp = &(vgenp->vgenports);
+ WRITE_ENTER(&plistp->rwlock);
+
+ while ((portp = plistp->headp) != NULL) {
+ vgen_port_detach(portp);
+ }
+
+ RW_EXIT(&plistp->rwlock);
+}
+
+/*
+ * detach the given port.
+ */
+static void
+vgen_port_detach(vgen_port_t *portp)
+{
+ vgen_t *vgenp;
+ vgen_ldclist_t *ldclp;
+ int port_num;
+
+ vgenp = portp->vgenp;
+ port_num = portp->port_num;
+
+ DBG1((vgenp->vnetp,
+ "vgen_port_detach: enter: port_num(%d)\n", port_num));
+
+ /* remove it from port list */
+ vgen_port_list_remove(portp);
+
+ /* detach channels from this port */
+ ldclp = &portp->ldclist;
+ WRITE_ENTER(&ldclp->rwlock);
+ while (ldclp->headp) {
+ vgen_ldc_detach(ldclp->headp);
+ }
+ RW_EXIT(&ldclp->rwlock);
+
+ if (vgenp->vsw_portp == portp) {
+ vgenp->vsw_portp = NULL;
+ }
+ KMEM_FREE(portp);
+
+ DBG1((vgenp->vnetp,
+ "vgen_port_detach: exit: port_num(%d)\n", port_num));
+}
+
+/* add a port to port list */
+static void
+vgen_port_list_insert(vgen_port_t *portp)
+{
+ vgen_portlist_t *plistp;
+ vgen_t *vgenp;
+
+ vgenp = portp->vgenp;
+ plistp = &(vgenp->vgenports);
+
+ if (plistp->headp == NULL) {
+ plistp->headp = portp;
+ } else {
+ plistp->tailp->nextp = portp;
+ }
+ plistp->tailp = portp;
+ portp->nextp = NULL;
+}
+
+/* remove a port from port list */
+static void
+vgen_port_list_remove(vgen_port_t *portp)
+{
+ vgen_port_t *prevp;
+ vgen_port_t *nextp;
+ vgen_portlist_t *plistp;
+ vgen_t *vgenp;
+
+ vgenp = portp->vgenp;
+
+ plistp = &(vgenp->vgenports);
+
+ if (plistp->headp == NULL)
+ return;
+
+ if (portp == plistp->headp) {
+ plistp->headp = portp->nextp;
+ if (portp == plistp->tailp)
+ plistp->tailp = plistp->headp;
+ } else {
+ for (prevp = plistp->headp; ((nextp = prevp->nextp) != NULL) &&
+ (nextp != portp); prevp = nextp);
+ if (nextp == portp) {
+ prevp->nextp = portp->nextp;
+ }
+ if (portp == plistp->tailp)
+ plistp->tailp = prevp;
+ }
+}
+
+/* lookup a port in the list based on port_num */
+static vgen_port_t *
+vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
+{
+ vgen_port_t *portp = NULL;
+
+ for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
+ if (portp->port_num == port_num) {
+ break;
+ }
+ }
+
+ return (portp);
+}
+
+/* enable ports for transmit/receive */
+static void
+vgen_init_ports(vgen_t *vgenp)
+{
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+
+ plistp = &(vgenp->vgenports);
+ READ_ENTER(&plistp->rwlock);
+
+ for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
+ vgen_port_init(portp);
+ }
+
+ RW_EXIT(&plistp->rwlock);
+}
+
+static void
+vgen_port_init(vgen_port_t *portp)
+{
+ vgen_t *vgenp;
+
+ vgenp = portp->vgenp;
+ /*
+ * Create fdb entry in vnet, corresponding to the mac
+ * address of this port. Note that the port specified
+ * is vsw-port. This is done so that vsw-port acts
+ * as the route to reach this macaddr, until the
+ * channel for this port comes up (LDC_UP) and
+ * handshake is done successfully.
+ * eg, if the peer is OBP-vnet, it may not bring the
+ * channel up for this port and may communicate via
+ * vsw to reach this port.
+ * Later, when Solaris-vnet comes up at the other end
+ * of the channel for this port and brings up the channel,
+ * it is an indication that peer vnet is capable of
+ * distributed switching, so the direct route through this
+ * port is specified in fdb, using vnet_modify_fdb(macaddr);
+ */
+ vnet_add_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr,
+ vgen_tx, vgenp->vsw_portp);
+
+ if (portp == vgenp->vsw_portp) {
+ /*
+ * create the default route entry in vnet's fdb.
+ * This is the entry used by vnet to reach
+ * unknown destinations, which basically goes
+ * through vsw on domain0 and out through the
+ * physical device bound to vsw.
+ */
+ vnet_add_def_rte(vgenp->vnetp, vgen_tx, portp);
+ }
+
+ /* Bring up the channels of this port */
+ vgen_init_ldcs(portp);
+}
+
+/* disable transmit/receive on ports */
+static void
+vgen_uninit_ports(vgen_t *vgenp)
+{
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+
+ plistp = &(vgenp->vgenports);
+ READ_ENTER(&plistp->rwlock);
+
+ for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
+ vgen_port_uninit(portp);
+ }
+
+ RW_EXIT(&plistp->rwlock);
+}
+
+static void
+vgen_port_uninit(vgen_port_t *portp)
+{
+ vgen_t *vgenp;
+
+ vgenp = portp->vgenp;
+
+ vgen_uninit_ldcs(portp);
+ /* delete the entry in vnet's fdb for this port */
+ vnet_del_fdb(vgenp->vnetp, (uint8_t *)&portp->macaddr);
+ if (portp == vgenp->vsw_portp) {
+ /*
+ * if this is vsw-port, then delete the default
+ * route entry in vnet's fdb.
+ */
+ vnet_del_def_rte(vgenp->vnetp);
+ }
+}
+
+/* register with MD event generator */
+static int
+vgen_mdeg_reg(vgen_t *vgenp)
+{
+ mdeg_prop_spec_t *pspecp;
+ mdeg_node_spec_t *parentp;
+ uint_t templatesz;
+ int rv;
+ mdeg_handle_t hdl;
+ int i;
+ void *vnetp = vgenp->vnetp;
+
+ i = ddi_prop_get_int(DDI_DEV_T_ANY, vgenp->vnetdip,
+ DDI_PROP_DONTPASS, reg_propname, -1);
+ if (i == -1) {
+ return (DDI_FAILURE);
+ }
+ templatesz = sizeof (vgen_prop_template);
+ pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
+ if (pspecp == NULL) {
+ return (DDI_FAILURE);
+ }
+ parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
+ if (parentp == NULL) {
+ kmem_free(pspecp, templatesz);
+ return (DDI_FAILURE);
+ }
+
+ bcopy(vgen_prop_template, pspecp, templatesz);
+
+ /*
+ * NOTE: The instance here refers to the value of "reg" property and
+ * not the dev_info instance (ddi_get_instance()) of vnet.
+ */
+ VGEN_SET_MDEG_PROP_INST(pspecp, i);
+
+ parentp->namep = "virtual-device";
+ parentp->specp = pspecp;
+
+ /* save parentp in vgen_t */
+ vgenp->mdeg_parentp = parentp;
+
+ rv = mdeg_register(parentp, &vport_match, vgen_mdeg_cb, vgenp, &hdl);
+ if (rv != MDEG_SUCCESS) {
+ DERR((vnetp, "vgen_mdeg_reg: mdeg_register failed\n"));
+ KMEM_FREE(parentp);
+ kmem_free(pspecp, templatesz);
+ vgenp->mdeg_parentp = NULL;
+ return (DDI_FAILURE);
+ }
+
+ /* save mdeg handle in vgen_t */
+ vgenp->mdeg_hdl = hdl;
+
+ return (DDI_SUCCESS);
+}
+
+/* unregister with MD event generator */
+static void
+vgen_mdeg_unreg(vgen_t *vgenp)
+{
+ (void) mdeg_unregister(vgenp->mdeg_hdl);
+ KMEM_FREE(vgenp->mdeg_parentp);
+ vgenp->mdeg_parentp = NULL;
+ vgenp->mdeg_hdl = NULL;
+}
+
+/* callback function registered with MD event generator */
+static int
+vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
+{
+ int idx;
+ int vsw_idx = -1;
+ uint64_t val;
+ vgen_t *vgenp;
+
+ if ((resp == NULL) || (cb_argp == NULL)) {
+ return (MDEG_FAILURE);
+ }
+
+ vgenp = (vgen_t *)cb_argp;
+ DBG1((vgenp->vnetp, "vgen_mdeg_cb: enter\n"));
+
+ mutex_enter(&vgenp->lock);
+
+ DBG1((vgenp->vnetp,
+ "vgen_mdeg_cb: ports: removed(%x), added(%x), updated(%x)\n",
+ resp->removed.nelem, resp->added.nelem, resp->match_curr.nelem));
+
+ for (idx = 0; idx < resp->removed.nelem; idx++) {
+ (void) vgen_remove_port(vgenp, resp->removed.mdp,
+ resp->removed.mdep[idx]);
+ }
+
+ if (vgenp->vsw_portp == NULL) {
+ /*
+ * find vsw_port and add it first, because other ports need
+ * this when adding fdb entry (see vgen_port_init()).
+ */
+ for (idx = 0; idx < resp->added.nelem; idx++) {
+ if (!(md_get_prop_val(resp->added.mdp,
+ resp->added.mdep[idx], swport_propname, &val))) {
+ if (val == 0) {
+ /*
+ * This port is connected to the
+ * vsw on dom0.
+ */
+ vsw_idx = idx;
+ (void) vgen_add_port(vgenp,
+ resp->added.mdp,
+ resp->added.mdep[idx]);
+ break;
+ }
+ }
+ }
+ if (vsw_idx == -1) {
+ DWARN((vgenp->vnetp, "vgen_mdeg_cb: "
+ "can't find vsw_port\n"));
+ return (MDEG_FAILURE);
+ }
+ }
+
+ for (idx = 0; idx < resp->added.nelem; idx++) {
+ if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
+ continue;
+ (void) vgen_add_port(vgenp, resp->added.mdp,
+ resp->added.mdep[idx]);
+ }
+
+ for (idx = 0; idx < resp->match_curr.nelem; idx++) {
+ (void) vgen_update_port(vgenp, resp->match_curr.mdp,
+ resp->match_curr.mdep[idx],
+ resp->match_prev.mdp,
+ resp->match_prev.mdep[idx]);
+ }
+
+ mutex_exit(&vgenp->lock);
+ DBG1((vgenp->vnetp, "vgen_mdeg_cb: exit\n"));
+ return (MDEG_SUCCESS);
+}
+
+/* add a new port to the device */
+static int
+vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
+{
+ uint64_t port_num;
+ uint64_t *ldc_ids;
+ uint64_t macaddr;
+ uint64_t val;
+ int num_ldcs;
+ int vsw_port = B_FALSE;
+ int i;
+ int addrsz;
+ int num_nodes = 0;
+ int listsz = 0;
+ mde_cookie_t *listp = NULL;
+ uint8_t *addrp;
+ struct ether_addr ea;
+
+ /* read "id" property to get the port number */
+ if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
+ DWARN((vgenp->vnetp,
+ "vgen_add_port: prop(%s) not found\n", id_propname));
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Find the channel endpoint node(s) under this port node.
+ */
+ if ((num_nodes = md_node_count(mdp)) <= 0) {
+ DWARN((vgenp->vnetp,
+ "vgen_add_port: invalid number of nodes found (%d)",
+ num_nodes));
+ return (DDI_FAILURE);
+ }
+
+ /* allocate space for node list */
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_zalloc(listsz, KM_NOSLEEP);
+ if (listp == NULL)
+ return (DDI_FAILURE);
+
+ num_ldcs = md_scan_dag(mdp, mdex,
+ md_find_name(mdp, channel_propname),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (num_ldcs <= 0) {
+ DWARN((vgenp->vnetp,
+ "vgen_add_port: can't find %s nodes", channel_propname));
+ kmem_free(listp, listsz);
+ return (DDI_FAILURE);
+ }
+
+ DBG2((vgenp->vnetp, "vgen_add_port: num_ldcs %d", num_ldcs));
+
+ ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
+ if (ldc_ids == NULL) {
+ kmem_free(listp, listsz);
+ return (DDI_FAILURE);
+ }
+
+ for (i = 0; i < num_ldcs; i++) {
+ /* read channel ids */
+ if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
+ DWARN((vgenp->vnetp,
+ "vgen_add_port: prop(%s) not found\n",
+ id_propname));
+ kmem_free(listp, listsz);
+ kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
+ return (DDI_FAILURE);
+ }
+ DBG2((vgenp->vnetp, "vgen_add_port: ldc_id 0x%llx",
+ ldc_ids[i]));
+ }
+
+ kmem_free(listp, listsz);
+
+ if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
+ &addrsz)) {
+ DWARN((vgenp->vnetp,
+ "vgen_add_port: prop(%s) not found\n", rmacaddr_propname));
+ kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
+ return (DDI_FAILURE);
+ }
+
+ if (addrsz < ETHERADDRL) {
+ DWARN((vgenp->vnetp,
+ "vgen_add_port: invalid address size (%d)\n", addrsz));
+ kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
+ return (DDI_FAILURE);
+ }
+
+ macaddr = *((uint64_t *)addrp);
+
+ DBG2((vgenp->vnetp, "vgen_add_port: remote mac address 0x%llx\n",
+ macaddr));
+
+ for (i = ETHERADDRL - 1; i >= 0; i--) {
+ ea.ether_addr_octet[i] = macaddr & 0xFF;
+ macaddr >>= 8;
+ }
+
+ if (vgenp->vsw_portp == NULL) {
+ if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
+ if (val == 0) {
+ /* This port is connected to the vsw on dom0 */
+ vsw_port = B_TRUE;
+ }
+ }
+ }
+ (void) vgen_port_attach_mdeg(vgenp, (int)port_num, ldc_ids, num_ldcs,
+ &ea, vsw_port);
+
+ kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
+
+ return (DDI_SUCCESS);
+}
+
+/* remove a port from the device */
+static int
+vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
+{
+ uint64_t port_num;
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+
+ /* read "id" property to get the port number */
+ if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
+ DWARN((vgenp->vnetp,
+ "vgen_remove_port: prop(%s) not found\n", id_propname));
+ return (DDI_FAILURE);
+ }
+
+ plistp = &(vgenp->vgenports);
+
+ WRITE_ENTER(&plistp->rwlock);
+ portp = vgen_port_lookup(plistp, (int)port_num);
+ if (portp == NULL) {
+ DWARN((vgenp->vnetp, "vgen_remove_port: can't find port(%lx)\n",
+ port_num));
+ RW_EXIT(&plistp->rwlock);
+ return (DDI_FAILURE);
+ }
+
+ vgen_port_detach_mdeg(portp);
+ RW_EXIT(&plistp->rwlock);
+
+ return (DDI_SUCCESS);
+}
+
+/* attach a port to the device based on mdeg data */
+static int
+vgen_port_attach_mdeg(vgen_t *vgenp, int port_num, uint64_t *ldcids,
+ int num_ids, struct ether_addr *macaddr, boolean_t vsw_port)
+{
+ vgen_port_t *portp;
+ vgen_portlist_t *plistp;
+ int i;
+
+ portp = kmem_zalloc(sizeof (vgen_port_t), KM_NOSLEEP);
+ if (portp == NULL) {
+ return (DDI_FAILURE);
+ }
+ portp->vgenp = vgenp;
+ portp->port_num = port_num;
+
+ DBG1((vgenp->vnetp,
+ "vgen_port_attach_mdeg: port_num(%d)\n", portp->port_num));
+
+ portp->ldclist.num_ldcs = 0;
+ portp->ldclist.headp = NULL;
+ rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL);
+
+ ether_copy(macaddr, &portp->macaddr);
+ for (i = 0; i < num_ids; i++) {
+ DBG2((vgenp->vnetp, "vgen_port_attach_mdeg: ldcid (%lx)\n",
+ ldcids[i]));
+ (void) vgen_ldc_attach(portp, ldcids[i]);
+ }
+
+ /* link it into the list of ports */
+ plistp = &(vgenp->vgenports);
+ WRITE_ENTER(&plistp->rwlock);
+ vgen_port_list_insert(portp);
+ RW_EXIT(&plistp->rwlock);
+
+ /* This port is connected to the vsw on domain0 */
+ if (vsw_port)
+ vgenp->vsw_portp = portp;
+
+ if (vgenp->flags & VGEN_STARTED) { /* interface is configured */
+ vgen_port_init(portp);
+ }
+
+ DBG1((vgenp->vnetp,
+ "vgen_port_attach_mdeg: exit: port_num(%d)\n", portp->port_num));
+ return (DDI_SUCCESS);
+}
+
+/* detach a port from the device based on mdeg data */
+static void
+vgen_port_detach_mdeg(vgen_port_t *portp)
+{
+ vgen_t *vgenp = portp->vgenp;
+
+ DBG1((vgenp->vnetp,
+ "vgen_port_detach_mdeg: enter: port_num(%d)\n", portp->port_num));
+ /* stop the port if needed */
+ if (vgenp->flags & VGEN_STARTED) {
+ vgen_port_uninit(portp);
+ }
+ vgen_port_detach(portp);
+
+ DBG1((vgenp->vnetp,
+ "vgen_port_detach_mdeg: exit: port_num(%d)\n", portp->port_num));
+}
+
+static int
+vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
+ md_t *prev_mdp, mde_cookie_t prev_mdex)
+{
+ _NOTE(ARGUNUSED(vgenp, curr_mdp, curr_mdex, prev_mdp, prev_mdex))
+
+ /* XXX: TBD */
+ return (DDI_SUCCESS);
+}
+
+static uint64_t
+vgen_port_stat(vgen_port_t *portp, enum mac_stat stat)
+{
+ vgen_ldclist_t *ldclp;
+ vgen_ldc_t *ldcp;
+ uint64_t val;
+
+ val = 0;
+ ldclp = &portp->ldclist;
+
+ READ_ENTER(&ldclp->rwlock);
+ for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) {
+ val += vgen_ldc_stat(ldcp, stat);
+ }
+ RW_EXIT(&ldclp->rwlock);
+
+ return (val);
+}
+
+/* attach the channel corresponding to the given ldc_id to the port */
+static int
+vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
+{
+ vgen_t *vgenp;
+ vgen_ldclist_t *ldclp;
+ vgen_ldc_t *ldcp, **prev_ldcp;
+ ldc_attr_t attr;
+ int status;
+ ldc_status_t istatus;
+ enum {AST_init = 0x0, AST_ldc_alloc = 0x1,
+ AST_mutex_init = 0x2, AST_ldc_init = 0x4,
+ AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10}
+ attach_state;
+
+ attach_state = AST_init;
+ vgenp = portp->vgenp;
+ ldclp = &portp->ldclist;
+
+ ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
+ if (ldcp == NULL) {
+ goto ldc_attach_failed;
+ }
+ ldcp->ldc_id = ldc_id;
+ ldcp->portp = portp;
+ ldcp->reclaim_lowat = vnet_reclaim_lowat;
+ ldcp->reclaim_hiwat = vnet_reclaim_hiwat;
+
+ attach_state |= AST_ldc_alloc;
+
+ mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
+
+ attach_state |= AST_mutex_init;
+
+ attr.devclass = LDC_DEV_NT;
+ attr.instance = ddi_get_instance(vgenp->vnetdip);
+ attr.mode = LDC_MODE_UNRELIABLE;
+ attr.qlen = vnet_ldc_qlen;
+ status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
+ if (status != 0) {
+ DWARN((vgenp->vnetp, "ldc_init failed, id (%lx) rv (%d)\n",
+ ldc_id, status));
+ goto ldc_attach_failed;
+ }
+ attach_state |= AST_ldc_init;
+
+ status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
+ if (status != 0) {
+ DWARN((vgenp->vnetp,
+ "ldc_reg_callback failed, id (%lx) rv (%d)\n",
+ ldc_id, status));
+ goto ldc_attach_failed;
+ }
+ attach_state |= AST_ldc_reg_cb;
+
+ (void) ldc_status(ldcp->ldc_handle, &istatus);
+ ASSERT(istatus == LDC_INIT);
+ ldcp->ldc_status = istatus;
+
+ /* allocate transmit resources */
+ status = vgen_alloc_tx_ring(ldcp);
+ if (status != 0) {
+ goto ldc_attach_failed;
+ }
+ attach_state |= AST_alloc_tx_ring;
+
+ /* Setup kstats for the channel */
+ status = vgen_setup_kstats(ldcp);
+ if (status != VGEN_SUCCESS) {
+ goto ldc_attach_failed;
+ }
+
+ /* initialize vgen_versions supported */
+ bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
+
+ /* link it into the list of channels for this port */
+ WRITE_ENTER(&ldclp->rwlock);
+ prev_ldcp = (vgen_ldc_t **)(&ldclp->headp);
+ ldcp->nextp = *prev_ldcp;
+ *prev_ldcp = ldcp;
+ ldclp->num_ldcs++;
+ RW_EXIT(&ldclp->rwlock);
+
+ ldcp->flags |= CHANNEL_ATTACHED;
+ return (DDI_SUCCESS);
+
+ldc_attach_failed:
+ if (attach_state & AST_alloc_tx_ring) {
+ vgen_free_tx_ring(ldcp);
+ }
+ if (attach_state & AST_ldc_reg_cb) {
+ (void) ldc_unreg_callback(ldcp->ldc_handle);
+ }
+ if (attach_state & AST_ldc_init) {
+ (void) ldc_fini(ldcp->ldc_handle);
+ }
+ if (attach_state & AST_mutex_init) {
+ mutex_destroy(&ldcp->tclock);
+ mutex_destroy(&ldcp->txlock);
+ mutex_destroy(&ldcp->cblock);
+ }
+ if (attach_state & AST_ldc_alloc) {
+ KMEM_FREE(ldcp);
+ }
+ return (DDI_FAILURE);
+}
+
+/* detach a channel from the port */
+static void
+vgen_ldc_detach(vgen_ldc_t *ldcp)
+{
+ vgen_port_t *portp;
+ vgen_t *vgenp;
+ vgen_ldc_t *pldcp;
+ vgen_ldc_t **prev_ldcp;
+ vgen_ldclist_t *ldclp;
+
+ portp = ldcp->portp;
+ vgenp = portp->vgenp;
+ ldclp = &portp->ldclist;
+
+ prev_ldcp = (vgen_ldc_t **)&ldclp->headp;
+ for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) {
+ if (pldcp == ldcp) {
+ break;
+ }
+ }
+
+ if (pldcp == NULL) {
+ /* invalid ldcp? */
+ return;
+ }
+
+ if (ldcp->ldc_status != LDC_INIT) {
+ DWARN((vgenp->vnetp,
+ "vgen_ldc_detach: ldc_status is not INIT id(%lx)\n",
+ ldcp->ldc_id));
+ }
+
+ if (ldcp->flags & CHANNEL_ATTACHED) {
+ ldcp->flags &= ~(CHANNEL_ATTACHED);
+
+ vgen_destroy_kstats(ldcp);
+ /* free transmit resources */
+ vgen_free_tx_ring(ldcp);
+ (void) ldc_unreg_callback(ldcp->ldc_handle);
+ (void) ldc_fini(ldcp->ldc_handle);
+ mutex_destroy(&ldcp->tclock);
+ mutex_destroy(&ldcp->txlock);
+ mutex_destroy(&ldcp->cblock);
+
+ /* unlink it from the list */
+ *prev_ldcp = ldcp->nextp;
+ ldclp->num_ldcs--;
+ KMEM_FREE(ldcp);
+ }
+}
+
+/*
+ * This function allocates transmit resources for the channel.
+ * The resources consist of a transmit descriptor ring and an associated
+ * transmit buffer ring.
+ */
+static int
+vgen_alloc_tx_ring(vgen_ldc_t *ldcp)
+{
+ void *tbufp;
+ ldc_mem_info_t minfo;
+ uint32_t txdsize;
+ uint32_t tbufsize;
+ int status;
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ ldcp->num_txds = vnet_ntxds;
+ txdsize = sizeof (vnet_public_desc_t);
+ tbufsize = sizeof (vgen_private_desc_t);
+
+ /* allocate transmit buffer ring */
+ tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP);
+ if (tbufp == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ /* create transmit descriptor ring */
+ status = ldc_mem_dring_create(ldcp->num_txds, txdsize,
+ &ldcp->tx_dhandle);
+ if (status) {
+ DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_create() "
+ "failed, id(%lx)\n", ldcp->ldc_id));
+ kmem_free(tbufp, ldcp->num_txds * tbufsize);
+ return (DDI_FAILURE);
+ }
+
+ /* get the addr of descripror ring */
+ status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo);
+ if (status) {
+ DWARN((vnetp, "vgen_alloc_tx_ring: ldc_mem_dring_info() "
+ "failed, id(%lx)\n", ldcp->ldc_id));
+ kmem_free(tbufp, ldcp->num_txds * tbufsize);
+ (void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
+ ldcp->tbufp = NULL;
+ return (DDI_FAILURE);
+ }
+ ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr);
+ ldcp->tbufp = tbufp;
+
+ ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]);
+ ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]);
+
+ return (DDI_SUCCESS);
+}
+
+/* Free transmit resources for the channel */
+static void
+vgen_free_tx_ring(vgen_ldc_t *ldcp)
+{
+ int tbufsize = sizeof (vgen_private_desc_t);
+
+ /* free transmit descriptor ring */
+ (void) ldc_mem_dring_destroy(ldcp->tx_dhandle);
+
+ /* free transmit buffer ring */
+ kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize);
+ ldcp->txdp = ldcp->txdendp = NULL;
+ ldcp->tbufp = ldcp->tbufendp = NULL;
+}
+
+/* enable transmit/receive on the channels for the port */
+static void
+vgen_init_ldcs(vgen_port_t *portp)
+{
+ vgen_ldclist_t *ldclp = &portp->ldclist;
+ vgen_ldc_t *ldcp;
+
+ READ_ENTER(&ldclp->rwlock);
+ ldcp = ldclp->headp;
+ for (; ldcp != NULL; ldcp = ldcp->nextp) {
+ (void) vgen_ldc_init(ldcp);
+ }
+ RW_EXIT(&ldclp->rwlock);
+}
+
+/* stop transmit/receive on the channels for the port */
+static void
+vgen_uninit_ldcs(vgen_port_t *portp)
+{
+ vgen_ldclist_t *ldclp = &portp->ldclist;
+ vgen_ldc_t *ldcp;
+
+ READ_ENTER(&ldclp->rwlock);
+ ldcp = ldclp->headp;
+ for (; ldcp != NULL; ldcp = ldcp->nextp) {
+ vgen_ldc_uninit(ldcp);
+ }
+ RW_EXIT(&ldclp->rwlock);
+}
+
+/* enable transmit/receive on the channel */
+static int
+vgen_ldc_init(vgen_ldc_t *ldcp)
+{
+ void *vnetp = LDC_TO_VNET(ldcp);
+ ldc_status_t istatus;
+ int rv;
+ enum { ST_init = 0x0, ST_init_tbufs = 0x1,
+ ST_ldc_open = 0x2, ST_dring_bind = 0x4
+ }
+ init_state;
+ uint32_t ncookies = 0;
+
+ init_state = ST_init;
+
+ LDC_LOCK(ldcp);
+
+ rv = ldc_open(ldcp->ldc_handle);
+ if (rv != 0) {
+ DWARN((vnetp,
+ "vgen_ldcinit: ldc_open failed: id<%lx> rv(%d)\n",
+ ldcp->ldc_id, rv));
+ goto ldcinit_failed;
+ }
+ init_state |= ST_ldc_open;
+
+ (void) ldc_status(ldcp->ldc_handle, &istatus);
+ if (istatus != LDC_OPEN && istatus != LDC_READY) {
+ DWARN((vnetp,
+ "vgen_ldcinit: id (%lx) status(%d) is not OPEN/READY\n",
+ ldcp->ldc_id, istatus));
+ goto ldcinit_failed;
+ }
+ ldcp->ldc_status = istatus;
+
+ rv = vgen_init_tbufs(ldcp);
+ if (rv != 0) {
+ DWARN((vnetp,
+ "vgen_ldcinit: vgen_init_tbufs() failed: id(%lx)\n",
+ ldcp->ldc_id));
+ goto ldcinit_failed;
+ }
+ init_state |= ST_init_tbufs;
+
+ /* Bind descriptor ring to the channel */
+ rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle,
+ LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dcookie, &ncookies);
+ if (rv != 0) {
+ DWARN((vnetp, "vgen_ldcinit: id (%lx) "
+ "ldc_mem_dring_bind failed\n", ldcp->ldc_id));
+ goto ldcinit_failed;
+ }
+
+ ASSERT(ncookies == 1);
+ ldcp->num_txdcookies = ncookies;
+
+ init_state |= ST_dring_bind;
+
+ rv = ldc_up(ldcp->ldc_handle);
+ if (rv != 0) {
+ DBG2((vnetp,
+ "vgen_ldcinit: ldc_up err id(%lx) rv(%d)\n",
+ ldcp->ldc_id, rv));
+ }
+
+ (void) ldc_status(ldcp->ldc_handle, &istatus);
+ if (istatus != LDC_UP) {
+ DBG2((vnetp, "vgen_ldcinit: id(%lx) status(%d) is not UP\n",
+ ldcp->ldc_id, istatus));
+ }
+ ldcp->ldc_status = istatus;
+
+ /* initialize transmit watchdog timeout */
+ ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
+ drv_usectohz(vnet_ldcwd_interval * 1000));
+
+ ldcp->flags |= CHANNEL_STARTED;
+
+ LDC_UNLOCK(ldcp);
+ return (DDI_SUCCESS);
+
+ldcinit_failed:
+ if (init_state & ST_dring_bind) {
+ (void) ldc_mem_dring_unbind(ldcp->tx_dhandle);
+ }
+ if (init_state & ST_init_tbufs) {
+ vgen_uninit_tbufs(ldcp);
+ }
+ if (init_state & ST_ldc_open) {
+ (void) ldc_close(ldcp->ldc_handle);
+ }
+ LDC_UNLOCK(ldcp);
+ return (DDI_FAILURE);
+}
+
+/* stop transmit/receive on the channel */
+static void
+vgen_ldc_uninit(vgen_ldc_t *ldcp)
+{
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int rv;
+
+ DBG1((vnetp, "vgen_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id));
+ LDC_LOCK(ldcp);
+
+ if ((ldcp->flags & CHANNEL_STARTED) == 0) {
+ LDC_UNLOCK(ldcp);
+ DWARN((vnetp, "vgen_ldc_uninit: id(%lx) CHANNEL_STARTED"
+ " flag is not set\n", ldcp->ldc_id));
+ return;
+ }
+
+ /* disable further callbacks */
+ rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
+ if (rv != 0) {
+ DWARN((vnetp, "vgen_ldc_uninit: id (%lx) "
+ "ldc_set_cb_mode failed\n", ldcp->ldc_id));
+ }
+
+ /* clear handshake done bit and wait for pending tx and cb to finish */
+ ldcp->hphase &= ~(VH_DONE);
+ LDC_UNLOCK(ldcp);
+ drv_usecwait(1000);
+ LDC_LOCK(ldcp);
+
+ vgen_reset_hphase(ldcp);
+
+ /* reset transmit watchdog timeout */
+ if (ldcp->wd_tid) {
+ (void) untimeout(ldcp->wd_tid);
+ ldcp->wd_tid = 0;
+ }
+
+ /* unbind tx descriptor ring from the channel */
+ rv = ldc_mem_dring_unbind(ldcp->tx_dhandle);
+ if (rv != 0) {
+ DWARN((vnetp, "vgen_ldcuninit: ldc_mem_dring_unbind "
+ "failed id(%lx)\n", ldcp->ldc_id));
+ }
+
+ vgen_uninit_tbufs(ldcp);
+
+ rv = ldc_close(ldcp->ldc_handle);
+ if (rv != 0) {
+ DWARN((vnetp, "vgen_ldcuninit: ldc_close err id(%lx)\n",
+ ldcp->ldc_id));
+ }
+ ldcp->ldc_status = LDC_INIT;
+ ldcp->flags &= ~(CHANNEL_STARTED);
+
+ LDC_UNLOCK(ldcp);
+
+ DBG1((vnetp, "vgen_ldc_uninit: exit: id(%lx)\n", ldcp->ldc_id));
+}
+
+/* Initialize the transmit buffer ring for the channel */
+static int
+vgen_init_tbufs(vgen_ldc_t *ldcp)
+{
+ vgen_private_desc_t *tbufp;
+ vnet_public_desc_t *txdp;
+ vio_dring_entry_hdr_t *hdrp;
+ int i;
+ int rv;
+
+ bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
+ bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
+
+ /*
+ * for each tx buf (priv_desc), allocate a ldc mem_handle which is
+ * required to map the data during transmit, set the flags
+ * to free (available for use by transmit routine).
+ */
+
+ for (i = 0; i < ldcp->num_txds; i++) {
+ tbufp = &(ldcp->tbufp[i]);
+ rv = ldc_mem_alloc_handle(ldcp->ldc_handle,
+ &(tbufp->memhandle));
+ if (rv) {
+ tbufp->memhandle = 0;
+ goto init_tbufs_failed;
+ }
+ tbufp->flags = VGEN_PRIV_DESC_FREE;
+ txdp = &(ldcp->txdp[i]);
+ hdrp = &txdp->hdr;
+ hdrp->dstate = VIO_DESC_FREE;
+ hdrp->ack = B_FALSE;
+ tbufp->descp = txdp;
+ }
+
+ /* reset tbuf walking pointers */
+ ldcp->next_tbufp = ldcp->tbufp;
+ ldcp->cur_tbufp = ldcp->tbufp;
+
+ /* initialize tx seqnum and index */
+ ldcp->next_txseq = VNET_ISS;
+ ldcp->next_txi = 0;
+
+ return (DDI_SUCCESS);
+
+init_tbufs_failed:;
+ vgen_uninit_tbufs(ldcp);
+ return (DDI_FAILURE);
+}
+
+/* Uninitialize transmit buffer ring for the channel */
+static void
+vgen_uninit_tbufs(vgen_ldc_t *ldcp)
+{
+ vgen_private_desc_t *tbufp = ldcp->tbufp;
+ vnet_public_desc_t *txdp;
+ vio_dring_entry_hdr_t *hdrp;
+ int i;
+
+ /* for each tbuf (priv_desc), free ldc mem_handle */
+ for (i = 0; i < ldcp->num_txds; i++) {
+
+ tbufp = &(ldcp->tbufp[i]);
+ txdp = tbufp->descp;
+ hdrp = &txdp->hdr;
+
+ if (tbufp->flags != VGEN_PRIV_DESC_FREE) {
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ freemsg(tbufp->mp);
+ tbufp->mp = NULL;
+ tbufp->flags = VGEN_PRIV_DESC_FREE;
+ hdrp->dstate = VIO_DESC_FREE;
+ hdrp->ack = B_FALSE;
+ }
+ if (tbufp->memhandle) {
+ (void) ldc_mem_free_handle(tbufp->memhandle);
+ tbufp->memhandle = 0;
+ }
+ tbufp->descp = NULL;
+ }
+
+ bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds));
+ bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds));
+}
+
+/* clobber tx descriptor ring */
+static void
+vgen_clobber_tbufs(vgen_ldc_t *ldcp)
+{
+ vnet_public_desc_t *txdp;
+ vgen_private_desc_t *tbufp;
+ vio_dring_entry_hdr_t *hdrp;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int i;
+#ifdef DEBUG
+ int ndone = 0;
+#endif
+
+ for (i = 0; i < ldcp->num_txds; i++) {
+
+ tbufp = &(ldcp->tbufp[i]);
+ txdp = tbufp->descp;
+ hdrp = &txdp->hdr;
+
+ if (tbufp->flags & VGEN_PRIV_DESC_BUSY) {
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ freemsg(tbufp->mp);
+ tbufp->mp = NULL;
+ tbufp->flags = VGEN_PRIV_DESC_FREE;
+#ifdef DEBUG
+ if (hdrp->dstate == VIO_DESC_DONE)
+ ndone++;
+#endif
+ hdrp->dstate = VIO_DESC_FREE;
+ hdrp->ack = B_FALSE;
+ }
+ }
+ /* reset tbuf walking pointers */
+ ldcp->next_tbufp = ldcp->tbufp;
+ ldcp->cur_tbufp = ldcp->tbufp;
+
+ /* reset tx seqnum and index */
+ ldcp->next_txseq = VNET_ISS;
+ ldcp->next_txi = 0;
+#ifdef DEBUG
+ DBG2((vnetp,
+ "vgen_clobber_tbufs: id(0x%lx) num descrs done (%d)\n",
+ ldcp->ldc_id, ndone));
+#endif
+}
+
+/* clobber receive descriptor ring */
+static void
+vgen_clobber_rxds(vgen_ldc_t *ldcp)
+{
+ ldcp->rx_dhandle = 0;
+ bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie));
+ ldcp->rxdp = NULL;
+ ldcp->next_rxi = 0;
+ ldcp->num_rxds = 0;
+ ldcp->next_rxseq = VNET_ISS;
+}
+
+/* initialize receive descriptor ring */
+static int
+vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size,
+ ldc_mem_cookie_t *dcookie, uint32_t ncookies)
+{
+ int rv;
+ ldc_mem_info_t minfo;
+
+ rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc,
+ desc_size, LDC_SHADOW_MAP, &(ldcp->rx_dhandle));
+ if (rv != 0) {
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * sucessfully mapped, now try to
+ * get info about the mapped dring
+ */
+ rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo);
+ if (rv != 0) {
+ (void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * save ring address, number of descriptors.
+ */
+ ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr);
+ bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie));
+ ldcp->num_rxdcookies = ncookies;
+ ldcp->num_rxds = num_desc;
+ ldcp->next_rxi = 0;
+ ldcp->next_rxseq = VNET_ISS;
+
+ return (DDI_SUCCESS);
+}
+
+/* get channel statistics */
+static uint64_t
+vgen_ldc_stat(vgen_ldc_t *ldcp, enum mac_stat stat)
+{
+ vgen_stats_t *statsp;
+ uint64_t val;
+
+ val = 0;
+ statsp = ldcp->statsp;
+ switch (stat) {
+
+ case MAC_STAT_MULTIRCV:
+ val = statsp->multircv;
+ break;
+
+ case MAC_STAT_BRDCSTRCV:
+ val = statsp->brdcstrcv;
+ break;
+
+ case MAC_STAT_MULTIXMT:
+ val = statsp->multixmt;
+ break;
+
+ case MAC_STAT_BRDCSTXMT:
+ val = statsp->brdcstxmt;
+ break;
+
+ case MAC_STAT_NORCVBUF:
+ val = statsp->norcvbuf;
+ break;
+
+ case MAC_STAT_IERRORS:
+ val = statsp->ierrors;
+ break;
+
+ case MAC_STAT_NOXMTBUF:
+ val = statsp->noxmtbuf;
+ break;
+
+ case MAC_STAT_OERRORS:
+ val = statsp->oerrors;
+ break;
+
+ case MAC_STAT_COLLISIONS:
+ break;
+
+ case MAC_STAT_RBYTES:
+ val = statsp->rbytes;
+ break;
+
+ case MAC_STAT_IPACKETS:
+ val = statsp->ipackets;
+ break;
+
+ case MAC_STAT_OBYTES:
+ val = statsp->obytes;
+ break;
+
+ case MAC_STAT_OPACKETS:
+ val = statsp->opackets;
+ break;
+
+ /* stats not relevant to ldc, return 0 */
+ case MAC_STAT_IFSPEED:
+ case MAC_STAT_ALIGN_ERRORS:
+ case MAC_STAT_FCS_ERRORS:
+ case MAC_STAT_FIRST_COLLISIONS:
+ case MAC_STAT_MULTI_COLLISIONS:
+ case MAC_STAT_DEFER_XMTS:
+ case MAC_STAT_TX_LATE_COLLISIONS:
+ case MAC_STAT_EX_COLLISIONS:
+ case MAC_STAT_MACXMT_ERRORS:
+ case MAC_STAT_CARRIER_ERRORS:
+ case MAC_STAT_TOOLONG_ERRORS:
+ case MAC_STAT_XCVR_ADDR:
+ case MAC_STAT_XCVR_ID:
+ case MAC_STAT_XCVR_INUSE:
+ case MAC_STAT_CAP_1000FDX:
+ case MAC_STAT_CAP_1000HDX:
+ case MAC_STAT_CAP_100FDX:
+ case MAC_STAT_CAP_100HDX:
+ case MAC_STAT_CAP_10FDX:
+ case MAC_STAT_CAP_10HDX:
+ case MAC_STAT_CAP_ASMPAUSE:
+ case MAC_STAT_CAP_PAUSE:
+ case MAC_STAT_CAP_AUTONEG:
+ case MAC_STAT_ADV_CAP_1000FDX:
+ case MAC_STAT_ADV_CAP_1000HDX:
+ case MAC_STAT_ADV_CAP_100FDX:
+ case MAC_STAT_ADV_CAP_100HDX:
+ case MAC_STAT_ADV_CAP_10FDX:
+ case MAC_STAT_ADV_CAP_10HDX:
+ case MAC_STAT_ADV_CAP_ASMPAUSE:
+ case MAC_STAT_ADV_CAP_PAUSE:
+ case MAC_STAT_ADV_CAP_AUTONEG:
+ case MAC_STAT_LP_CAP_1000FDX:
+ case MAC_STAT_LP_CAP_1000HDX:
+ case MAC_STAT_LP_CAP_100FDX:
+ case MAC_STAT_LP_CAP_100HDX:
+ case MAC_STAT_LP_CAP_10FDX:
+ case MAC_STAT_LP_CAP_10HDX:
+ case MAC_STAT_LP_CAP_ASMPAUSE:
+ case MAC_STAT_LP_CAP_PAUSE:
+ case MAC_STAT_LP_CAP_AUTONEG:
+ case MAC_STAT_LINK_ASMPAUSE:
+ case MAC_STAT_LINK_PAUSE:
+ case MAC_STAT_LINK_AUTONEG:
+ case MAC_STAT_LINK_DUPLEX:
+ default:
+ val = 0;
+ break;
+
+ }
+ return (val);
+}
+
+static void
+vgen_init_macp(vgen_t *vgenp, mac_t *macp)
+{
+ macp->m_driver = (void *)vgenp;
+ macp->m_start = vgen_start;
+ macp->m_stop = vgen_stop;
+ macp->m_tx = vgen_tx;
+ macp->m_resources = vgen_resources;
+ macp->m_multicst = vgen_multicst;
+ macp->m_promisc = vgen_promisc;
+ macp->m_unicst = vgen_unicst;
+ macp->m_stat = vgen_stat;
+ macp->m_ioctl = vgen_ioctl;
+}
+
+/* Interrupt handler for the channel */
+static uint_t
+vgen_ldc_cb(uint64_t event, caddr_t arg)
+{
+ _NOTE(ARGUNUSED(event))
+ vgen_ldc_t *ldcp;
+ void *vnetp;
+ vgen_t *vgenp;
+ size_t msglen;
+ ldc_status_t istatus;
+ uint64_t ldcmsg[7];
+ int rv;
+ vio_msg_tag_t *tagp;
+ mblk_t *mp = NULL;
+ mblk_t *bp = NULL;
+ mblk_t *bpt = NULL;
+ mblk_t *headp = NULL;
+ mblk_t *tailp = NULL;
+ vgen_stats_t *statsp;
+
+ ldcp = (vgen_ldc_t *)arg;
+ vgenp = LDC_TO_VGEN(ldcp);
+ vnetp = LDC_TO_VNET(ldcp);
+ statsp = ldcp->statsp;
+
+ DBG1((vnetp, "vgen_ldc_cb enter: ldcid (%lx)\n", ldcp->ldc_id));
+
+ mutex_enter(&ldcp->cblock);
+ statsp->callbacks++;
+ if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
+ DWARN((vnetp, "vgen_ldc_cb: id(%lx), status(%d) is LDC_INIT\n",
+ ldcp->ldc_id, ldcp->ldc_status));
+ mutex_exit(&ldcp->cblock);
+ return (LDC_SUCCESS);
+ }
+
+ /* check ldc status change events first */
+ (void) ldc_status(ldcp->ldc_handle, &istatus);
+
+ if (istatus != ldcp->ldc_status) {
+ switch (istatus) {
+ case LDC_UP:
+ ldcp->ldc_status = istatus;
+ DBG1((vnetp,
+ "vgen_ldc_cb: id(%lx) status(%d) is LDC_UP\n",
+ ldcp->ldc_id, ldcp->ldc_status));
+
+ if (ldcp->portp != vgenp->vsw_portp) {
+ /*
+ * modify fdb entry to use this port as the
+ * channel is up, instead of going through the
+ * vsw-port (see comments in vgen_port_init())
+ */
+ vnet_modify_fdb(vnetp,
+ (uint8_t *)&ldcp->portp->macaddr,
+ vgen_tx, ldcp->portp);
+ }
+ /* Initialize local session id */
+ ldcp->local_sid = ddi_get_lbolt();
+ /* clear peer session id */
+ ldcp->peer_sid = 0;
+ ldcp->hretries = 0;
+ /* Initiate Handshake process with peer ldc endpoint */
+ vgen_handshake_reset(ldcp);
+ vgen_handshake(vh_nextphase(ldcp));
+ break;
+
+ case LDC_OPEN:
+ case LDC_READY:
+ ldcp->ldc_status = istatus;
+ if ((ldcp->portp != vgenp->vsw_portp) &&
+ (vgenp->vsw_portp != NULL)) {
+ /*
+ * modify fdb entry to use vsw-port as the
+ * channel is reset and we don't have a direct
+ * link to the destination (see comments
+ * in vgen_port_init()).
+ */
+ vnet_modify_fdb(vnetp,
+ (uint8_t *)&ldcp->portp->macaddr,
+ vgen_tx, vgenp->vsw_portp);
+ }
+ /* clear sids */
+ ldcp->local_sid = 0;
+ ldcp->peer_sid = 0;
+ if (ldcp->hphase != VH_PHASE0) {
+ vgen_handshake_reset(ldcp);
+ }
+ DBG1((vnetp,
+ "vgen_ldc_cb: id(%lx) status is (%d)\n",
+ ldcp->ldc_id, ldcp->ldc_status));
+ break;
+
+ default:
+ DWARN((vnetp,
+ "vgen_ldc_cb: id(%lx) istatus=(%d) status(%d) is"
+ " *UNKNOWN*\n",
+ ldcp->ldc_id, istatus, ldcp->ldc_status));
+ break;
+ }
+ }
+
+ if (istatus != LDC_UP) {
+ DBG1((vnetp, "vgen_ldc_cb: id(%lx) status(%d) is NOT LDC_UP\n",
+ ldcp->ldc_id, ldcp->ldc_status));
+ mutex_exit(&ldcp->cblock);
+ return (LDC_SUCCESS);
+ }
+
+ /* if ldc_status is UP, receive all packets */
+ do {
+ msglen = sizeof (ldcmsg);
+ rv = ldc_read(ldcp->ldc_handle, (caddr_t)&ldcmsg, &msglen);
+
+ if (rv != 0) {
+ DWARN((vnetp,
+ "vgen_ldc_cb:ldc_read err id(%lx) rv(%d) "
+ "len(%d)\n", ldcp->ldc_id, rv, msglen));
+ break;
+ }
+ if (msglen == 0) {
+ DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx) NODATA",
+ ldcp->ldc_id));
+ break;
+ }
+ DBG2((vnetp, "vgen_ldc_cb: ldc_read id(%lx): msglen(%d)",
+ ldcp->ldc_id, msglen));
+
+ tagp = (vio_msg_tag_t *)ldcmsg;
+
+ if (ldcp->peer_sid) {
+ /*
+ * check sid only after we have received peer's sid
+ * in the version negotiate msg.
+ */
+#ifdef DEBUG
+ if (vgen_hdbg & HDBG_BAD_SID) {
+ /* simulate bad sid condition */
+ tagp->vio_sid = 0;
+ vgen_hdbg &= ~(HDBG_BAD_SID);
+ }
+#endif
+ if (vgen_check_sid(ldcp, tagp) == VGEN_FAILURE) {
+ /*
+ * If sid mismatch is detected,
+ * reset the channel.
+ */
+ ldcp->need_ldc_reset = B_TRUE;
+ vgen_handshake_reset(ldcp);
+ mutex_exit(&ldcp->cblock);
+ return (LDC_SUCCESS);
+ }
+ }
+
+ switch (tagp->vio_msgtype) {
+ case VIO_TYPE_CTRL:
+ vgen_handle_ctrlmsg(ldcp, tagp);
+ break;
+
+ case VIO_TYPE_DATA:
+ headp = tailp = NULL;
+ vgen_handle_datamsg(ldcp, tagp, &headp, &tailp);
+ /* build a chain of received packets */
+ if (headp != NULL) {
+ if (bp == NULL) {
+ bp = headp;
+ bpt = tailp;
+ } else {
+ bpt->b_next = headp;
+ bpt = tailp;
+ }
+ }
+ break;
+
+ case VIO_TYPE_ERR:
+ vgen_handle_errmsg(ldcp, tagp);
+ break;
+
+ default:
+ DWARN((vnetp,
+ "vgen_ldc_cb: Unknown VIO_TYPE(%x)\n",
+ tagp->vio_msgtype));
+ break;
+ }
+
+ } while (msglen);
+
+ mutex_exit(&ldcp->cblock);
+ /* send up the received packets to MAC layer */
+ while (bp != NULL) {
+ mp = bp;
+ bp = bp->b_next;
+ mp->b_next = mp->b_prev = NULL;
+ DBG2((vnetp, "vgen_ldc_cb: id(%lx) rx pkt len (%lx)\n",
+ ldcp->ldc_id, MBLKL(mp)));
+ mac_rx((mac_t *)vgenp->vnetmacp, vgenp->mrh, mp);
+ }
+ DBG1((vnetp, "vgen_ldc_cb exit: ldcid (%lx)\n", ldcp->ldc_id));
+
+ return (LDC_SUCCESS);
+}
+
+/* vgen handshake functions */
+
+/* change the hphase for the channel to the next phase */
+static vgen_ldc_t *
+vh_nextphase(vgen_ldc_t *ldcp)
+{
+ if (ldcp->hphase == VH_PHASE3) {
+ ldcp->hphase = VH_DONE;
+ } else {
+ ldcp->hphase++;
+ }
+ return (ldcp);
+}
+
+/*
+ * Check whether the given version is supported or not and
+ * return VGEN_SUCCESS if supported.
+ */
+static int
+vgen_supported_version(vgen_ldc_t *ldcp, uint16_t ver_major,
+uint16_t ver_minor)
+{
+ vgen_ver_t *versions = ldcp->vgen_versions;
+ int i = 0;
+
+ while (i < VGEN_NUM_VER) {
+ if ((versions[i].ver_major == 0) &&
+ (versions[i].ver_minor == 0)) {
+ break;
+ }
+ if ((versions[i].ver_major == ver_major) &&
+ (versions[i].ver_minor == ver_minor)) {
+ return (VGEN_SUCCESS);
+ }
+ i++;
+ }
+ return (VGEN_FAILURE);
+}
+
+/*
+ * Given a version, return VGEN_SUCCESS if a lower version is supported.
+ */
+static int
+vgen_next_version(vgen_ldc_t *ldcp, vgen_ver_t *verp)
+{
+ vgen_ver_t *versions = ldcp->vgen_versions;
+ int i = 0;
+
+ while (i < VGEN_NUM_VER) {
+ if ((versions[i].ver_major == 0) &&
+ (versions[i].ver_minor == 0)) {
+ break;
+ }
+ /*
+ * if we support a lower minor version within the same major
+ * version, or if we support a lower major version,
+ * update the verp parameter with this lower version and
+ * return success.
+ */
+ if (((versions[i].ver_major == verp->ver_major) &&
+ (versions[i].ver_minor < verp->ver_minor)) ||
+ (versions[i].ver_major < verp->ver_major)) {
+ verp->ver_major = versions[i].ver_major;
+ verp->ver_minor = versions[i].ver_minor;
+ return (VGEN_SUCCESS);
+ }
+ i++;
+ }
+
+ return (VGEN_FAILURE);
+}
+
+/*
+ * wrapper routine to send the given message over ldc using ldc_write().
+ */
+static int
+vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen,
+ boolean_t caller_holds_lock)
+{
+ int rv;
+ size_t len;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ uint32_t retries = 0;
+
+ len = msglen;
+ if ((len == 0) || (msg == NULL))
+ return (VGEN_FAILURE);
+
+ if (!caller_holds_lock) {
+ mutex_enter(&ldcp->txlock);
+ }
+
+ do {
+ len = msglen;
+ rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len);
+ if (retries++ >= vgen_ldcwr_retries)
+ break;
+ } while (rv == EWOULDBLOCK);
+
+ if (!caller_holds_lock) {
+ mutex_exit(&ldcp->txlock);
+ }
+
+ if ((rv != 0) || (len != msglen)) {
+ DWARN((vnetp,
+ "vgen_sendmsg: ldc_write failed: id(%lx) rv(%d)"
+ " msglen (%d)\n", ldcp->ldc_id, rv, msglen));
+ return (VGEN_FAILURE);
+ }
+ return (VGEN_SUCCESS);
+}
+
+/* send version negotiate message to the peer over ldc */
+static int
+vgen_send_version_negotiate(vgen_ldc_t *ldcp)
+{
+ vio_ver_msg_t vermsg;
+ vio_msg_tag_t *tagp = &vermsg.tag;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int rv;
+
+ bzero(&vermsg, sizeof (vermsg));
+
+ tagp->vio_msgtype = VIO_TYPE_CTRL;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VIO_VER_INFO;
+ tagp->vio_sid = ldcp->local_sid;
+
+ /* get version msg payload from ldcp->local */
+ vermsg.ver_major = ldcp->local_hparams.ver_major;
+ vermsg.ver_minor = ldcp->local_hparams.ver_minor;
+ vermsg.dev_class = ldcp->local_hparams.dev_class;
+
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_send_version_negotiate: vgen_sendmsg failed"
+ "id (%lx)\n", ldcp->ldc_id));
+ return (VGEN_FAILURE);
+ }
+
+ ldcp->hstate |= VER_INFO_SENT;
+ DBG2((vnetp,
+ "vgen_send_version_negotiate: VER_INFO_SENT id (%lx) ver(%d,%d)\n",
+ ldcp->ldc_id, vermsg.ver_major, vermsg.ver_minor));
+
+ return (VGEN_SUCCESS);
+}
+
+/* send attr info message to the peer over ldc */
+static int
+vgen_send_attr_info(vgen_ldc_t *ldcp)
+{
+ vnet_attr_msg_t attrmsg;
+ vio_msg_tag_t *tagp = &attrmsg.tag;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int rv;
+
+ bzero(&attrmsg, sizeof (attrmsg));
+
+ tagp->vio_msgtype = VIO_TYPE_CTRL;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VIO_ATTR_INFO;
+ tagp->vio_sid = ldcp->local_sid;
+
+ /* get attr msg payload from ldcp->local */
+ attrmsg.mtu = ldcp->local_hparams.mtu;
+ attrmsg.addr = ldcp->local_hparams.addr;
+ attrmsg.addr_type = ldcp->local_hparams.addr_type;
+ attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
+ attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
+
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_send_attr_info: vgen_sendmsg failed"
+ "id (%lx)\n", ldcp->ldc_id));
+ return (VGEN_FAILURE);
+ }
+
+ ldcp->hstate |= ATTR_INFO_SENT;
+ DBG2((vnetp, "vgen_send_attr_info: ATTR_INFO_SENT id (%lx)\n",
+ ldcp->ldc_id));
+
+ return (VGEN_SUCCESS);
+}
+
+/* send descriptor ring register message to the peer over ldc */
+static int
+vgen_send_dring_reg(vgen_ldc_t *ldcp)
+{
+ vio_dring_reg_msg_t msg;
+ vio_msg_tag_t *tagp = &msg.tag;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int rv;
+
+ bzero(&msg, sizeof (msg));
+
+ tagp->vio_msgtype = VIO_TYPE_CTRL;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VIO_DRING_REG;
+ tagp->vio_sid = ldcp->local_sid;
+
+ /* get dring info msg payload from ldcp->local */
+ bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie),
+ sizeof (ldc_mem_cookie_t));
+ msg.ncookies = ldcp->local_hparams.num_dcookies;
+ msg.num_descriptors = ldcp->local_hparams.num_desc;
+ msg.descriptor_size = ldcp->local_hparams.desc_size;
+
+ /*
+ * dring_ident is set to 0. After mapping the dring, peer sets this
+ * value and sends it in the ack, which is saved in
+ * vgen_handle_dring_reg().
+ */
+ msg.dring_ident = 0;
+
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_send_dring_reg: vgen_sendmsg failed"
+ "id (%lx)\n", ldcp->ldc_id));
+ return (VGEN_FAILURE);
+ }
+
+ ldcp->hstate |= DRING_INFO_SENT;
+ DBG2((vnetp, "vgen_send_dring_reg: DRING_INFO_SENT id (%lx)\n",
+ ldcp->ldc_id));
+
+ return (VGEN_SUCCESS);
+}
+
+static int
+vgen_send_rdx_info(vgen_ldc_t *ldcp)
+{
+ vio_rdx_msg_t rdxmsg;
+ vio_msg_tag_t *tagp = &rdxmsg.tag;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int rv;
+
+ bzero(&rdxmsg, sizeof (rdxmsg));
+
+ tagp->vio_msgtype = VIO_TYPE_CTRL;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VIO_RDX;
+ tagp->vio_sid = ldcp->local_sid;
+
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_send_rdx_info: vgen_sendmsg failed"
+ "id (%lx)\n", ldcp->ldc_id));
+ return (VGEN_FAILURE);
+ }
+
+ ldcp->hstate |= RDX_INFO_SENT;
+ DBG2((vnetp, "vgen_send_rdx_info: RDX_INFO_SENT id (%lx)\n",
+ ldcp->ldc_id));
+
+ return (VGEN_SUCCESS);
+}
+
+/* send descriptor ring data message to the peer over ldc */
+static int
+vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, uint32_t end,
+ uint64_t next_txseq)
+{
+ vio_dring_msg_t dringmsg, *msgp = &dringmsg;
+ vio_msg_tag_t *tagp = &msgp->tag;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int rv;
+
+ bzero(msgp, sizeof (*msgp));
+
+ tagp->vio_msgtype = VIO_TYPE_DATA;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VIO_DRING_DATA;
+ tagp->vio_sid = ldcp->local_sid;
+
+ msgp->seq_num = next_txseq;
+ msgp->dring_ident = ldcp->local_hparams.dring_ident;
+ msgp->start_idx = start;
+ msgp->end_idx = end;
+
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_send_dring_data: vgen_sendmsg failed"
+ "id (%lx)\n", ldcp->ldc_id));
+ return (VGEN_FAILURE);
+ }
+
+ DBG2((vnetp, "vgen_send_dring_data: DRING_DATA_SENT id (%lx)\n",
+ ldcp->ldc_id));
+
+ return (VGEN_SUCCESS);
+}
+
+/* send multicast addr info message to vsw */
+static int
+vgen_send_mcast_info(vgen_ldc_t *ldcp)
+{
+ vnet_mcast_msg_t mcastmsg;
+ vnet_mcast_msg_t *msgp;
+ vio_msg_tag_t *tagp;
+ vgen_t *vgenp;
+ void *vnetp;
+ struct ether_addr *mca;
+ int rv;
+ int i;
+ uint32_t size;
+ uint32_t mccount;
+ uint32_t n;
+
+ msgp = &mcastmsg;
+ tagp = &msgp->tag;
+ vgenp = LDC_TO_VGEN(ldcp);
+ vnetp = LDC_TO_VNET(ldcp);
+
+ mccount = vgenp->mccount;
+ i = 0;
+
+ do {
+ tagp->vio_msgtype = VIO_TYPE_CTRL;
+ tagp->vio_subtype = VIO_SUBTYPE_INFO;
+ tagp->vio_subtype_env = VNET_MCAST_INFO;
+ tagp->vio_sid = ldcp->local_sid;
+
+ n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
+ size = n * sizeof (struct ether_addr);
+
+ mca = &(vgenp->mctab[i]);
+ bcopy(mca, (msgp->mca), size);
+ msgp->set = B_TRUE;
+ msgp->count = n;
+
+ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
+ B_FALSE);
+ if (rv != VGEN_SUCCESS) {
+ DWARN((vnetp, "vgen_send_mcast_info: vgen_sendmsg err"
+ "id (%lx)\n", ldcp->ldc_id));
+ return (VGEN_FAILURE);
+ }
+
+ mccount -= n;
+ i += n;
+
+ } while (mccount);
+
+ return (VGEN_SUCCESS);
+}
+
+/* Initiate Phase 2 of handshake */
+static int
+vgen_handshake_phase2(vgen_ldc_t *ldcp)
+{
+ int rv;
+#ifdef DEBUG
+ if (vgen_hdbg & HDBG_OUT_STATE) {
+ /* simulate out of state condition */
+ vgen_hdbg &= ~(HDBG_OUT_STATE);
+ rv = vgen_send_rdx_info(ldcp);
+ return (rv);
+ }
+ if (vgen_hdbg & HDBG_TIMEOUT) {
+ /* simulate timeout condition */
+ vgen_hdbg &= ~(HDBG_TIMEOUT);
+ return (VGEN_SUCCESS);
+ }
+#endif
+ if ((rv = vgen_send_attr_info(ldcp)) != VGEN_SUCCESS) {
+ return (rv);
+ }
+ if ((rv = vgen_send_dring_reg(ldcp)) != VGEN_SUCCESS) {
+ return (rv);
+ }
+
+ return (VGEN_SUCCESS);
+}
+
+/*
+ * This function resets the handshake phase to VH_PHASE0(pre-handshake phase).
+ * This can happen after a channel comes up (status: LDC_UP) or
+ * when handshake gets terminated due to various conditions.
+ */
+static void
+vgen_reset_hphase(vgen_ldc_t *ldcp)
+{
+ vgen_t *vgenp = LDC_TO_VGEN(ldcp);
+ void *vnetp = LDC_TO_VNET(ldcp);
+ ldc_status_t istatus;
+
+ DBG2((vnetp, "vgen_reset_hphase: id(0x%lx)\n", ldcp->ldc_id));
+ /* reset hstate and hphase */
+ ldcp->hstate = 0;
+ ldcp->hphase = VH_PHASE0;
+
+ /* reset handshake watchdog timeout */
+ if (ldcp->htid) {
+ (void) untimeout(ldcp->htid);
+ ldcp->htid = 0;
+ }
+
+ /*
+ * Unmap drings, if dring_ready is set.
+ */
+ if (ldcp->local_hparams.dring_ready) {
+ ldcp->local_hparams.dring_ready = B_FALSE;
+ /* do not unbind our dring */
+ }
+
+ if (ldcp->peer_hparams.dring_ready) {
+ ldcp->peer_hparams.dring_ready = B_FALSE;
+ /* Unmap peer's dring */
+ (void) ldc_mem_dring_unmap(ldcp->rx_dhandle);
+ vgen_clobber_rxds(ldcp);
+ }
+
+ vgen_clobber_tbufs(ldcp);
+
+ /*
+ * clear local handshake params and initialize.
+ */
+ bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
+
+#ifdef DEBUG
+#if 0
+ if (vgen_hdbg & HDBG_VERSION) {
+ bcopy(dbg_vgen_versions, ldcp->vgen_versions,
+ sizeof (ldcp->vgen_versions));
+ }
+#endif
+#endif
+ /* set version to the highest version supported */
+ ldcp->local_hparams.ver_major =
+ ldcp->vgen_versions[0].ver_major;
+ ldcp->local_hparams.ver_minor =
+ ldcp->vgen_versions[0].ver_minor;
+ ldcp->local_hparams.dev_class = VDEV_NETWORK;
+
+ /* set attr_info params */
+ ldcp->local_hparams.mtu = ETHERMAX;
+ ldcp->local_hparams.addr =
+ vgen_macaddr_strtoul(vgenp->macaddr);
+ ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
+ ldcp->local_hparams.xfer_mode = VIO_DRING_MODE;
+ ldcp->local_hparams.ack_freq = 0; /* don't need acks */
+
+#ifdef DEBUG
+#if 0
+ vgen_print_attr_info(ldcp, VGEN_LOCAL);
+#endif
+#endif
+
+ /*
+ * set dring_info params.
+ * Note: dring is already created and bound.
+ */
+ bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie),
+ sizeof (ldc_mem_cookie_t));
+ ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies;
+ ldcp->local_hparams.num_desc = ldcp->num_txds;
+ ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
+
+ /*
+ * dring_ident is set to 0. After mapping the dring, peer sets this
+ * value and sends it in the ack, which is saved in
+ * vgen_handle_dring_reg().
+ */
+ ldcp->local_hparams.dring_ident = 0;
+
+ /* clear peer_hparams */
+ bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
+
+ /* reset the channel if required */
+ if (ldcp->need_ldc_reset) {
+ DWARN((vnetp,
+ "vgen_reset_hphase: id (%lx), Doing Channel Reset...\n",
+ ldcp->ldc_id));
+ ldcp->need_ldc_reset = B_FALSE;
+ (void) ldc_reset(ldcp->ldc_handle);
+ (void) ldc_status(ldcp->ldc_handle, &istatus);
+ DBG2((vnetp,
+ "vgen_reset_hphase: id (%lx), RESET Done,ldc_status(%x)\n",
+ ldcp->ldc_id, istatus));
+ ldcp->ldc_status = istatus;
+ /* clear sids */
+ ldcp->local_sid = 0;
+ ldcp->peer_sid = 0;
+ (void) ldc_up(ldcp->ldc_handle);
+ }
+}
+
+/* wrapper function for vgen_reset_hphase */
+static void
+vgen_handshake_reset(vgen_ldc_t *ldcp)
+{
+ ASSERT(MUTEX_HELD(&ldcp->cblock));
+ mutex_enter(&ldcp->txlock);
+ mutex_enter(&ldcp->tclock);
+
+ vgen_reset_hphase(ldcp);
+
+ mutex_exit(&ldcp->tclock);
+ mutex_exit(&ldcp->txlock);
+}
+
+/*
+ * Initiate handshake with the peer by sending various messages
+ * based on the handshake-phase that the channel is currently in.
+ */
+static void
+vgen_handshake(vgen_ldc_t *ldcp)
+{
+ uint32_t hphase = ldcp->hphase;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ vgen_t *vgenp = LDC_TO_VGEN(ldcp);
+
+ switch (hphase) {
+
+ case VH_PHASE1:
+
+ /*
+ * start timer, for entire handshake process, turn this timer
+ * off if all phases of handshake complete successfully and
+ * hphase goes to VH_DONE(below) or
+ * vgen_reset_hphase() gets called or
+ * channel is reset due to errors or
+ * vgen_ldc_uninit() is invoked(vgen_stop).
+ */
+ ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
+ drv_usectohz(vgen_hwd_interval * 1000));
+
+ /* Phase 1 involves negotiating the version */
+ if (vgen_send_version_negotiate(ldcp) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ }
+ break;
+
+ case VH_PHASE2:
+ if (vgen_handshake_phase2(ldcp) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ }
+ break;
+
+ case VH_PHASE3:
+ if (vgen_send_rdx_info(ldcp) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ }
+ break;
+
+ case VH_DONE:
+ /* reset handshake watchdog timeout */
+ if (ldcp->htid) {
+ (void) untimeout(ldcp->htid);
+ ldcp->htid = 0;
+ }
+ ldcp->hretries = 0;
+#if 0
+ vgen_print_ldcinfo(ldcp);
+#endif
+ DBG1((vnetp, "vgen_handshake: id(0x%lx) Handshake Done\n",
+ ldcp->ldc_id));
+
+ if (ldcp->need_mcast_sync) {
+ /* need to sync multicast table with vsw */
+
+ ldcp->need_mcast_sync = B_FALSE;
+ mutex_exit(&ldcp->cblock);
+
+ mutex_enter(&vgenp->lock);
+ (void) vgen_send_mcast_info(ldcp);
+ mutex_exit(&vgenp->lock);
+
+ mutex_enter(&ldcp->cblock);
+
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Check if the current handshake phase has completed successfully and
+ * return the status.
+ */
+static int
+vgen_handshake_done(vgen_ldc_t *ldcp)
+{
+ uint32_t hphase = ldcp->hphase;
+ int status = 0;
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ switch (hphase) {
+
+ case VH_PHASE1:
+ /*
+ * Phase1 is done, if version negotiation
+ * completed successfully.
+ */
+ status = ((ldcp->hstate & VER_NEGOTIATED) ==
+ VER_NEGOTIATED);
+ break;
+
+ case VH_PHASE2:
+ /*
+ * Phase 2 is done, if attr info and dring info
+ * have been exchanged successfully.
+ */
+ status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
+ ATTR_INFO_EXCHANGED) &&
+ ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
+ DRING_INFO_EXCHANGED));
+ break;
+
+ case VH_PHASE3:
+ /* Phase 3 is done, if rdx msg has been exchanged */
+ status = ((ldcp->hstate & RDX_EXCHANGED) ==
+ RDX_EXCHANGED);
+ break;
+
+ default:
+ break;
+ }
+
+ if (status == 0) {
+ return (VGEN_FAILURE);
+ }
+ DBG2((vnetp, "VNET_HANDSHAKE_DONE: PHASE(%d)\n", hphase));
+ return (VGEN_SUCCESS);
+}
+
+/* retry handshake on failure */
+static void
+vgen_handshake_retry(vgen_ldc_t *ldcp)
+{
+ /* reset handshake phase */
+ vgen_handshake_reset(ldcp);
+ if (vgen_max_hretries) { /* handshake retry is specified */
+ if (ldcp->hretries++ < vgen_max_hretries)
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+}
+
+/*
+ * Handle a version info msg from the peer or an ACK/NACK from the peer
+ * to a version info msg that we sent.
+ */
+static void
+vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp;
+ int ack = 0;
+ int failed = 0;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int idx;
+ vgen_ver_t *versions = ldcp->vgen_versions;
+
+ DBG1((vnetp, "vgen_handle_version_negotiate: enter\n"));
+ switch (tagp->vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+
+ /* Cache sid of peer if this is the first time */
+ if (ldcp->peer_sid == 0) {
+ DBG2((vnetp,
+ "vgen_handle_version_negotiate: id (%lx) Caching"
+ " peer_sid(%x)\n", ldcp->ldc_id, tagp->vio_sid));
+ ldcp->peer_sid = tagp->vio_sid;
+ }
+
+ if (ldcp->hphase != VH_PHASE1) {
+ /*
+ * If we are not already in VH_PHASE1, reset to
+ * pre-handshake state, and initiate handshake
+ * to the peer too.
+ */
+ vgen_handshake_reset(ldcp);
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+ ldcp->hstate |= VER_INFO_RCVD;
+
+ /* save peer's requested values */
+ ldcp->peer_hparams.ver_major = vermsg->ver_major;
+ ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
+ ldcp->peer_hparams.dev_class = vermsg->dev_class;
+
+ if ((vermsg->dev_class != VDEV_NETWORK) &&
+ (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
+ /* unsupported dev_class, send NACK */
+
+ tagp->vio_subtype = VIO_SUBTYPE_NACK;
+ tagp->vio_sid = ldcp->local_sid;
+ /* send reply msg back to peer */
+ (void) vgen_sendmsg(ldcp, (caddr_t)tagp,
+ sizeof (*vermsg), B_FALSE);
+ DWARN((vnetp,
+ "vgen_handle_version_negotiate: Version"
+ " Negotiation Failed id (%lx)\n", ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ DBG2((vnetp, "vgen_handle_version_negotiate: VER_INFO_RCVD,"
+ " id (%lx), ver(%d,%d)\n", ldcp->ldc_id,
+ vermsg->ver_major, vermsg->ver_minor));
+
+ idx = 0;
+
+ for (;;) {
+
+ if (vermsg->ver_major > versions[idx].ver_major) {
+
+ /* nack with next lower version */
+ tagp->vio_subtype = VIO_SUBTYPE_NACK;
+ vermsg->ver_major = versions[idx].ver_major;
+ vermsg->ver_minor = versions[idx].ver_minor;
+ break;
+ }
+
+ if (vermsg->ver_major == versions[idx].ver_major) {
+
+ /* major version match - ACK version */
+ tagp->vio_subtype = VIO_SUBTYPE_ACK;
+ ack = 1;
+
+ /*
+ * lower minor version to the one this endpt
+ * supports, if necessary
+ */
+ if (vermsg->ver_minor >
+ versions[idx].ver_minor) {
+ vermsg->ver_minor =
+ versions[idx].ver_minor;
+ ldcp->peer_hparams.ver_minor =
+ versions[idx].ver_minor;
+ }
+ break;
+ }
+
+ idx++;
+
+ if (idx == VGEN_NUM_VER) {
+
+ /* no version match - send NACK */
+ tagp->vio_subtype = VIO_SUBTYPE_NACK;
+ vermsg->ver_major = 0;
+ vermsg->ver_minor = 0;
+ failed = 1;
+ break;
+ }
+
+ }
+
+ tagp->vio_sid = ldcp->local_sid;
+
+ /* send reply msg back to peer */
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
+ B_FALSE) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ if (ack) {
+ ldcp->hstate |= VER_ACK_SENT;
+ DBG2((vnetp, "vgen_handle_version_negotiate:"
+ " VER_ACK_SENT, id (%lx) ver(%d,%d) \n",
+ ldcp->ldc_id, vermsg->ver_major,
+ vermsg->ver_minor));
+ }
+ if (failed) {
+ DWARN((vnetp, "vgen_handle_version_negotiate:"
+ " Version Negotiation Failed id (%lx)\n",
+ ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+
+ /* VER_ACK_SENT and VER_ACK_RCVD */
+
+ /* local and peer versions match? */
+ ASSERT((ldcp->local_hparams.ver_major ==
+ ldcp->peer_hparams.ver_major) &&
+ (ldcp->local_hparams.ver_minor ==
+ ldcp->peer_hparams.ver_minor));
+
+ /* move to the next phase */
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+
+ break;
+
+ case VIO_SUBTYPE_ACK:
+
+ if (ldcp->hphase != VH_PHASE1) {
+ /* This should not happen. */
+ DWARN((vnetp,
+ "vgen_handle_version_negotiate:"
+ " VER_ACK_RCVD id (%lx) Invalid Phase(%u)\n",
+ ldcp->ldc_id, ldcp->hphase));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ /* SUCCESS - we have agreed on a version */
+ ldcp->local_hparams.ver_major = vermsg->ver_major;
+ ldcp->local_hparams.ver_minor = vermsg->ver_minor;
+ ldcp->hstate |= VER_ACK_RCVD;
+
+ DBG2((vnetp, "vgen_handle_version_negotiate:"
+ " VER_ACK_RCVD, id (%lx) ver(%d,%d) \n",
+ ldcp->ldc_id, vermsg->ver_major, vermsg->ver_minor));
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+
+ /* VER_ACK_SENT and VER_ACK_RCVD */
+
+ /* local and peer versions match? */
+ ASSERT((ldcp->local_hparams.ver_major ==
+ ldcp->peer_hparams.ver_major) &&
+ (ldcp->local_hparams.ver_minor ==
+ ldcp->peer_hparams.ver_minor));
+
+ /* move to the next phase */
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+ break;
+
+ case VIO_SUBTYPE_NACK:
+
+ if (ldcp->hphase != VH_PHASE1) {
+ /* This should not happen. */
+ DWARN((vnetp,
+ "vgen_handle_version_negotiate:"
+ " VER_NACK_RCVD id (%lx) Invalid Phase(%u)\n",
+ ldcp->ldc_id, ldcp->hphase));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ DBG2((vnetp, "vgen_handle_version_negotiate:"
+ " VER_NACK_RCVD id(%lx) next ver(%d,%d)\n",
+ ldcp->ldc_id, vermsg->ver_major, vermsg->ver_minor));
+
+ /* check if version in NACK is zero */
+ if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
+ /*
+ * Version Negotiation has failed.
+ */
+ DWARN((vnetp, "vgen_handle_version_negotiate:"
+ " Version Negotiation Failed id (%lx)\n",
+ ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ idx = 0;
+
+ for (;;) {
+
+ if (vermsg->ver_major > versions[idx].ver_major) {
+ /* select next lower version */
+
+ ldcp->local_hparams.ver_major =
+ versions[idx].ver_major;
+ ldcp->local_hparams.ver_minor =
+ versions[idx].ver_minor;
+ break;
+ }
+
+ if (vermsg->ver_major == versions[idx].ver_major) {
+ /* major version match */
+
+ ldcp->local_hparams.ver_major =
+ versions[idx].ver_major;
+
+ ldcp->local_hparams.ver_minor =
+ versions[idx].ver_minor;
+ break;
+ }
+
+ idx++;
+
+ if (idx == VGEN_NUM_VER) {
+ /*
+ * no version match.
+ * Version Negotiation has failed.
+ */
+ DWARN((vnetp, "vgen_handle_version_negotiate:"
+ " Version Negotiation Failed id (%lx)\n",
+ ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ }
+
+ if (vgen_send_version_negotiate(ldcp) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ break;
+ }
+ DBG1((vnetp, "vgen_handle_version_negotiate: exit\n"));
+}
+
+/* Check if the attributes are supported */
+static int
+vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
+{
+ _NOTE(ARGUNUSED(ldcp))
+
+#if 0
+ uint64_t port_macaddr;
+ port_macaddr = vgen_macaddr_strtoul((uint8_t *)
+ &(ldcp->portp->macaddr));
+#endif
+ /*
+ * currently, we support these attr values:
+ * mtu of ethernet, addr_type of mac, xfer_mode of
+ * ldc shared memory, ack_freq of 0 (data is acked if
+ * the ack bit is set in the descriptor) and the address should
+ * match the address in the port node.
+ */
+ if ((msg->mtu != ETHERMAX) ||
+ (msg->addr_type != ADDR_TYPE_MAC) ||
+ (msg->xfer_mode != VIO_DRING_MODE) ||
+ (msg->ack_freq > 64)) {
+#if 0
+ (msg->addr != port_macaddr))
+cmn_err(CE_CONT, "vgen_check_attr_info: msg->addr(%lx), port_macaddr(%lx)\n",
+ msg->addr, port_macaddr);
+#endif
+ return (VGEN_FAILURE);
+ }
+
+ return (VGEN_SUCCESS);
+}
+
+/*
+ * Handle an attribute info msg from the peer or an ACK/NACK from the peer
+ * to an attr info msg that we sent.
+ */
+static void
+vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ vnet_attr_msg_t *attrmsg = (vnet_attr_msg_t *)tagp;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ int ack = 0;
+
+ DBG1((vnetp, "vgen_handle_attr_info: enter\n"));
+ if (ldcp->hphase != VH_PHASE2) {
+ DWARN((vnetp,
+ "vgen_handle_attr_info: Rcvd ATTR_INFO id(%lx)"
+ " subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
+ tagp->vio_subtype, ldcp->hphase));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+ switch (tagp->vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+
+ DBG2((vnetp, "vgen_handle_attr_info: ATTR_INFO_RCVD id(%lx)\n",
+ ldcp->ldc_id));
+ ldcp->hstate |= ATTR_INFO_RCVD;
+
+ /* save peer's values */
+ ldcp->peer_hparams.mtu = attrmsg->mtu;
+ ldcp->peer_hparams.addr = attrmsg->addr;
+ ldcp->peer_hparams.addr_type = attrmsg->addr_type;
+ ldcp->peer_hparams.xfer_mode = attrmsg->xfer_mode;
+ ldcp->peer_hparams.ack_freq = attrmsg->ack_freq;
+
+ if (vgen_check_attr_info(ldcp, attrmsg) == VGEN_FAILURE) {
+ /* unsupported attr, send NACK */
+ tagp->vio_subtype = VIO_SUBTYPE_NACK;
+ } else {
+ ack = 1;
+ tagp->vio_subtype = VIO_SUBTYPE_ACK;
+ }
+ tagp->vio_sid = ldcp->local_sid;
+
+ /* send reply msg back to peer */
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*attrmsg),
+ B_FALSE) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ if (ack) {
+ ldcp->hstate |= ATTR_ACK_SENT;
+ DBG2((vnetp, "vgen_handle_attr_info:"
+ " ATTR_ACK_SENT id(%lx)\n", ldcp->ldc_id));
+#ifdef DEBUG
+#if 0
+ vgen_print_attr_info(ldcp, VGEN_PEER);
+#endif
+#endif
+ } else {
+ /* failed */
+ DWARN((vnetp, "vgen_handle_attr_info:"
+ " ATTR_NACK_SENT id(%lx)\n", ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+
+ break;
+
+ case VIO_SUBTYPE_ACK:
+
+ ldcp->hstate |= ATTR_ACK_RCVD;
+
+ DBG2((vnetp, "vgen_handle_attr_info: ATTR_ACK_RCVD id(%lx)\n",
+ ldcp->ldc_id));
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+ break;
+
+ case VIO_SUBTYPE_NACK:
+
+ DBG2((vnetp, "vgen_handle_attr_info: ATTR_NACK_RCVD id(%lx)\n",
+ ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ break;
+ }
+ DBG1((vnetp, "vgen_handle_attr_info: exit\n"));
+}
+
+/* Check if the dring info msg is ok */
+static int
+vgen_check_dring_reg(vio_dring_reg_msg_t *msg)
+{
+ /* check if msg contents are ok */
+ if ((msg->num_descriptors < 128) || (msg->descriptor_size <
+ sizeof (vnet_public_desc_t))) {
+ return (VGEN_FAILURE);
+ }
+ return (VGEN_SUCCESS);
+}
+
+/*
+ * Handle a descriptor ring register msg from the peer or an ACK/NACK from
+ * the peer to a dring register msg that we sent.
+ */
+static void
+vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp;
+ void *vnetp = LDC_TO_VNET(ldcp);
+ ldc_mem_cookie_t dcookie;
+ int ack = 0;
+ int rv = 0;
+
+ DBG1((vnetp, "vgen_handle_dring_reg: enter\n"));
+ if (ldcp->hphase < VH_PHASE2) {
+ /* dring_info can be rcvd in any of the phases after Phase1 */
+ DWARN((vnetp,
+ "vgen_handle_dring_reg: Rcvd DRING_INFO, id (%lx)"
+ " Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
+ tagp->vio_subtype, ldcp->hphase));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+ switch (tagp->vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+
+ DBG2((vnetp, "vgen_handle_dring_reg: DRING_INFO_RCVD id(%lx)\n",
+ ldcp->ldc_id));
+ ldcp->hstate |= DRING_INFO_RCVD;
+ bcopy((msg->cookie), &dcookie, sizeof (dcookie));
+
+ ASSERT(msg->ncookies == 1);
+
+ if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) {
+ /*
+ * verified dring info msg to be ok,
+ * now try to map the remote dring.
+ */
+ rv = vgen_init_rxds(ldcp, msg->num_descriptors,
+ msg->descriptor_size, &dcookie,
+ msg->ncookies);
+ if (rv == DDI_SUCCESS) {
+ /* now we can ack the peer */
+ ack = 1;
+ }
+ }
+ if (ack == 0) {
+ /* failed, send NACK */
+ tagp->vio_subtype = VIO_SUBTYPE_NACK;
+ } else {
+ if (!(ldcp->peer_hparams.dring_ready)) {
+
+ /* save peer's dring_info values */
+ bcopy(&dcookie,
+ &(ldcp->peer_hparams.dring_cookie),
+ sizeof (dcookie));
+ ldcp->peer_hparams.num_desc =
+ msg->num_descriptors;
+ ldcp->peer_hparams.desc_size =
+ msg->descriptor_size;
+ ldcp->peer_hparams.num_dcookies =
+ msg->ncookies;
+
+ /* set dring_ident for the peer */
+ ldcp->peer_hparams.dring_ident =
+ (uint64_t)ldcp->rxdp;
+ /* return the dring_ident in ack msg */
+ msg->dring_ident =
+ (uint64_t)ldcp->rxdp;
+
+ ldcp->peer_hparams.dring_ready = B_TRUE;
+ }
+ tagp->vio_subtype = VIO_SUBTYPE_ACK;
+ }
+ tagp->vio_sid = ldcp->local_sid;
+ /* send reply msg back to peer */
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
+ B_FALSE) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ if (ack) {
+ ldcp->hstate |= DRING_ACK_SENT;
+ DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_SENT"
+ " id (%lx)\n", ldcp->ldc_id));
+ } else {
+ DWARN((vnetp, "vgen_handle_dring_reg: DRING_NACK_SENT"
+ " id (%lx)\n", ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+
+ break;
+
+ case VIO_SUBTYPE_ACK:
+
+ ldcp->hstate |= DRING_ACK_RCVD;
+
+ DBG2((vnetp, "vgen_handle_dring_reg: DRING_ACK_RCVD"
+ " id (%lx)\n", ldcp->ldc_id));
+
+ if (!(ldcp->local_hparams.dring_ready)) {
+ /* local dring is now ready */
+ ldcp->local_hparams.dring_ready = B_TRUE;
+
+ /* save dring_ident acked by peer */
+ ldcp->local_hparams.dring_ident =
+ msg->dring_ident;
+ }
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+
+ break;
+
+ case VIO_SUBTYPE_NACK:
+
+ DBG2((vnetp, "vgen_handle_dring_reg: DRING_NACK_RCVD"
+ " id (%lx)\n", ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ break;
+ }
+ DBG1((vnetp, "vgen_handle_dring_reg: exit\n"));
+}
+
+/*
+ * Handle a rdx info msg from the peer or an ACK/NACK
+ * from the peer to a rdx info msg that we sent.
+ */
+static void
+vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ DBG1((vnetp, "vgen_handle_rdx_info: enter\n"));
+ if (ldcp->hphase != VH_PHASE3) {
+ DWARN((vnetp,
+ "vgen_handle_rdx_info: Rcvd RDX_INFO, id (%lx)"
+ " Subtype (%d), Invalid Phase(%u)\n", ldcp->ldc_id,
+ tagp->vio_subtype, ldcp->hphase));
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+ switch (tagp->vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+
+ DBG2((vnetp, "vgen_handle_rdx_info: RDX_INFO_RCVD id (%lx)\n",
+ ldcp->ldc_id));
+ ldcp->hstate |= RDX_INFO_RCVD;
+
+ tagp->vio_subtype = VIO_SUBTYPE_ACK;
+ tagp->vio_sid = ldcp->local_sid;
+ /* send reply msg back to peer */
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp,
+ sizeof (vio_rdx_msg_t), B_FALSE) != VGEN_SUCCESS) {
+ vgen_handshake_reset(ldcp);
+ return;
+ }
+
+ ldcp->hstate |= RDX_ACK_SENT;
+ DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_SENT id (%lx)\n",
+ ldcp->ldc_id));
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+
+ break;
+
+ case VIO_SUBTYPE_ACK:
+
+ ldcp->hstate |= RDX_ACK_RCVD;
+
+ DBG2((vnetp, "vgen_handle_rdx_info: RDX_ACK_RCVD id (%lx)\n",
+ ldcp->ldc_id));
+
+ if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
+ vgen_handshake(vh_nextphase(ldcp));
+ }
+ break;
+
+ case VIO_SUBTYPE_NACK:
+
+ DBG2((vnetp, "vgen_handle_rdx_info: RDX_NACK_RCVD id (%lx)\n",
+ ldcp->ldc_id));
+ vgen_handshake_reset(ldcp);
+ break;
+ }
+ DBG1((vnetp, "vgen_handle_rdx_info: exit\n"));
+}
+
+/* Handle ACK/NACK from vsw to a set multicast msg that we sent */
+static void
+vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ void *vnetp = LDC_TO_VNET(ldcp);
+ vgen_t *vgenp = LDC_TO_VGEN(ldcp);
+ vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
+ struct ether_addr *addrp;
+ int count;
+ int i;
+
+ DBG1((vnetp, "vgen_handle_mcast_info: enter\n"));
+ switch (tagp->vio_subtype) {
+
+ case VIO_SUBTYPE_INFO:
+
+ /* vnet shouldn't recv set mcast msg, only vsw handles it */
+ DWARN((vnetp,
+ "vgen_handle_mcast_info: rcvd SET_MCAST_INFO id (%lx)\n",
+ ldcp->ldc_id));
+ break;
+
+ case VIO_SUBTYPE_ACK:
+
+ /* success adding/removing multicast addr */
+ DBG2((vnetp,
+ "vgen_handle_mcast_info: rcvd SET_MCAST_ACK id (%lx)\n",
+ ldcp->ldc_id));
+ break;
+
+ case VIO_SUBTYPE_NACK:
+
+ DWARN((vnetp,
+ "vgen_handle_mcast_info: rcvd SET_MCAST_NACK id (%lx)\n",
+ ldcp->ldc_id));
+ if (!(msgp->set)) {
+ /* multicast remove request failed */
+ break;
+ }
+
+ /* multicast add request failed */
+ for (count = 0; count < msgp->count; count++) {
+ addrp = &(msgp->mca[count]);
+
+ /* delete address from the table */
+ for (i = 0; i < vgenp->mccount; i++) {
+ if (ether_cmp(addrp,
+ &(vgenp->mctab[i])) == 0) {
+ if (vgenp->mccount > 1) {
+ vgenp->mctab[i] =
+ vgenp->mctab[vgenp->mccount-1];
+ }
+ vgenp->mccount--;
+ break;
+ }
+ }
+ }
+ break;
+
+ }
+ DBG1((vnetp, "vgen_handle_mcast_info: exit\n"));
+}
+
+/* handler for control messages received from the peer ldc end-point */
+static void
+vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ DBG1((vnetp, "vgen_handle_ctrlmsg: enter\n"));
+ switch (tagp->vio_subtype_env) {
+
+ case VIO_VER_INFO:
+ vgen_handle_version_negotiate(ldcp, tagp);
+ break;
+
+ case VIO_ATTR_INFO:
+ vgen_handle_attr_info(ldcp, tagp);
+ break;
+
+ case VIO_DRING_REG:
+ vgen_handle_dring_reg(ldcp, tagp);
+ break;
+
+ case VIO_RDX:
+ vgen_handle_rdx_info(ldcp, tagp);
+ break;
+
+ case VNET_MCAST_INFO:
+ vgen_handle_mcast_info(ldcp, tagp);
+ break;
+
+ }
+ DBG1((vnetp, "vgen_handle_ctrlmsg: exit\n"));
+}
+
+/* handler for data messages received from the peer ldc end-point */
+static void
+vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
+ mblk_t **headp, mblk_t **tailp)
+{
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ DBG1((vnetp, "vgen_handle_datamsg: enter\n"));
+
+ if (ldcp->hphase != VH_DONE)
+ return;
+ switch (tagp->vio_subtype_env) {
+ case VIO_DRING_DATA:
+ vgen_handle_dring_data(ldcp, tagp, headp, tailp);
+ break;
+ default:
+ break;
+ }
+
+ DBG1((vnetp, "vgen_handle_datamsg: exit\n"));
+}
+
+static void
+vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
+ mblk_t **headp, mblk_t **tailp)
+{
+ vio_dring_msg_t *dringmsg;
+ vnet_public_desc_t *rxdp;
+ vnet_public_desc_t *txdp;
+ vio_dring_entry_hdr_t *hdrp;
+ vgen_stats_t *statsp;
+ struct ether_header *ehp;
+ mblk_t *mp = NULL;
+ mblk_t *bp = NULL;
+ mblk_t *bpt = NULL;
+ size_t nbytes;
+ size_t nread;
+ uint64_t off = 0;
+ uint32_t start;
+ uint32_t end;
+ uint32_t datalen;
+ uint32_t ncookies;
+ uint32_t sync_start;
+ uint32_t sync_end;
+ uint32_t rxi;
+ uint32_t txi;
+ int rv;
+ boolean_t rxd_err = B_FALSE;
+ boolean_t sync_done = B_FALSE;
+#ifdef VGEN_HANDLE_LOST_PKTS
+ int n;
+#endif
+#ifdef VGEN_REXMIT
+ uint64_t seqnum;
+ vgen_private_desc_t *tbufp;
+#endif
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ dringmsg = (vio_dring_msg_t *)tagp;
+ start = dringmsg->start_idx;
+ end = dringmsg->end_idx;
+ statsp = ldcp->statsp;
+
+ DBG1((vnetp, "vgen_handle_dring_data: enter\n"));
+ switch (tagp->vio_subtype) {
+
+ case VIO_SUBTYPE_INFO:
+ /*
+ * received a data msg, which contains the start and end
+ * indeces of the descriptors within the rx ring holding data,
+ * the seq_num of data packet corresponding to the start index,
+ * and the dring_ident.
+ * We can now read the contents of each of these descriptors
+ * and gather data from it.
+ */
+ DBG2((vnetp,
+ "vgen_handle_dring_data: INFO: start(%d), end(%d)\n",
+ start, end));
+
+ /* validate rx start and end indeces */
+ if (!(CHECK_RXI(start, ldcp)) || !(CHECK_RXI(end, ldcp))) {
+ /* drop the message if invalid index */
+ break;
+ }
+
+ /* validate dring_ident */
+ if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) {
+ /* invalid dring_ident, drop the msg */
+ break;
+ }
+#ifdef DEBUG
+ if (vgen_trigger_rxlost) {
+ /* drop this msg to simulate lost pkts for debugging */
+ vgen_trigger_rxlost = 0;
+ break;
+ }
+#endif
+
+#ifdef VGEN_HANDLE_LOST_PKTS
+
+ /* receive start index doesn't match expected index */
+ if (ldcp->next_rxi != start) {
+
+ DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
+ "next_rxi(%d) != start(%d)\n",
+ ldcp->ldc_id, ldcp->next_rxi, start));
+
+ /* calculate the number of pkts lost */
+ if (start >= ldcp->next_rxi) {
+ n = start - ldcp->next_rxi;
+ } else {
+ n = ldcp->num_rxds - (ldcp->next_rxi - start);
+ }
+
+ /*
+ * Starting sequence number of the received packets
+ * is less than the next sequence number that
+ * is expected:
+ *
+ * drop the message and the corresponding packets.
+ */
+ if (ldcp->next_rxseq > dringmsg->seq_num) {
+ DWARN((vnetp, "vgen_handle_dring_data: id(%lx) "
+ "dropping pkts, expected rxseq(0x%lx) "
+ "> recvd(0x%lx)\n",
+ ldcp->ldc_id, ldcp->next_rxseq,
+ dringmsg->seq_num));
+ /*
+ * duplicate/multiple retransmissions from
+ * sender?? drop this msg.
+ */
+ break;
+ }
+
+ /*
+ * Starting sequence number of the received packets
+ * is greater than the next expected sequence number
+ *
+ * send a NACK back to the peer to indicate lost
+ * packets.
+ */
+ if (dringmsg->seq_num > ldcp->next_rxseq) {
+ statsp->rx_lost_pkts += n;
+ tagp->vio_subtype = VIO_SUBTYPE_NACK;
+ tagp->vio_sid = ldcp->local_sid;
+ /* indicate the range of lost descriptors */
+ dringmsg->start_idx = ldcp->next_rxi;
+ rxi = start;
+ DECR_RXI(rxi, ldcp);
+ dringmsg->end_idx = rxi;
+ /* dring ident is left unchanged */
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp,
+ sizeof (*dringmsg), B_FALSE)) {
+ DWARN((vnetp,
+ "vgen_handle_dring_data: id(%lx) "
+ "vgen_sendmsg failed, "
+ "stype: NACK\n", ldcp->ldc_id));
+ }
+#ifdef VGEN_REXMIT
+ /*
+ * stop further processing until peer
+ * retransmits with the right index and seqnum.
+ */
+ break;
+#else /* VGEN_REXMIT */
+ /*
+ * treat this range of descrs/pkts as dropped
+ * and set the new expected values for next_rxi
+ * and next_rxseq. continue(below) to process
+ * from the new start index.
+ */
+ ldcp->next_rxi = start;
+ ldcp->next_rxseq += n;
+#endif /* VGEN_REXMIT */
+
+ } else if (dringmsg->seq_num == ldcp->next_rxseq) {
+ /*
+ * expected and starting seqnums match, but
+ * the descriptor indeces don't?
+ *
+ * restart handshake with peer.
+ */
+ DWARN((vnetp,
+ "vgen_handle_dring_data: id(%lx) "
+ "next_rxseq(0x%lx) == seq_num(0x%lx)\n",
+ ldcp->ldc_id, ldcp->next_rxseq,
+ dringmsg->seq_num));
+
+#if 0
+ vgen_handshake_retry(ldcp);
+ break;
+#endif
+
+ }
+
+ } else {
+ /* expected and start dring indeces match */
+
+ if (dringmsg->seq_num != ldcp->next_rxseq) {
+
+ /* seqnums don't match */
+
+ DWARN((vnetp,
+ "vgen_handle_dring_data: id(%lx) "
+ "next_rxseq(0x%lx) != seq_num(0x%lx)\n",
+ ldcp->ldc_id, ldcp->next_rxseq,
+ dringmsg->seq_num));
+
+#if 0
+ vgen_handshake_retry(ldcp);
+ break;
+#endif
+ }
+ }
+
+#endif /* VGEN_HANDLE_LOST_PKTS */
+
+ /*
+ * Start processing the descriptor range, specified
+ * in the dring data msg.
+ */
+ if (ldc_mem_dring_acquire(ldcp->rx_dhandle, start, end)) {
+ DWARN((vnetp, "vgen_handle_dring_data: "
+ "id(%lx), ldc_mem_dring_acquire() failed\n",
+ ldcp->ldc_id));
+ statsp->ierrors++;
+ }
+ rxi = start;
+ sync_start = start;
+ do {
+ /* recv packets from 'start' to 'end' */
+
+ rxdp = &(ldcp->rxdp[rxi]);
+ hdrp = &rxdp->hdr;
+
+ datalen = rxdp->nbytes;
+ ncookies = rxdp->ncookies;
+ if ((datalen < ETHERMIN) ||
+ (ncookies == 0) ||
+ (ncookies > (uint64_t)MAX_COOKIES) ||
+ (hdrp->dstate != VIO_DESC_READY)) {
+ rxd_err = B_TRUE;
+ } else {
+ /*
+ * The data buffer returned by allocb(9F) is
+ * 8byte aligned. We allocate extra 8 bytes to
+ * ensure size is multiple of 8 bytes for
+ * ldc_mem_copy().
+ */
+ mp = allocb(datalen + 8, BPRI_MED);
+ nbytes = (datalen + 7) & ~7;
+ }
+ if ((rxd_err) || (mp == NULL)) {
+ /*
+ * rxd_err or allocb() failure,
+ * drop this packet, get next.
+ */
+ if (rxd_err) {
+ statsp->ierrors++;
+ rxd_err = B_FALSE;
+ } else {
+ statsp->rx_allocb_fail++;
+ }
+
+ /* set descriptor done bit */
+ hdrp->dstate = VIO_DESC_DONE;
+
+ if (hdrp->ack) {
+ /*
+ * sender needs ack for this packet.
+ * sync pkts upto this index and
+ * send the ack to the peer.
+ */
+ sync_end = rxi;
+ (void) ldc_mem_dring_release(
+ ldcp->rx_dhandle, sync_start,
+ sync_end);
+ tagp->vio_subtype = VIO_SUBTYPE_ACK;
+ tagp->vio_sid = ldcp->local_sid;
+ dringmsg = (vio_dring_msg_t *)tagp;
+ dringmsg->start_idx = sync_start;
+ dringmsg->end_idx = sync_end;
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp,
+ sizeof (*dringmsg), B_FALSE)) {
+ DWARN((vnetp,
+ "vgen_handle_dring_data: "
+ "id(%lx) vgen_sendmsg "
+ "failed, stype: ACK\n",
+ ldcp->ldc_id));
+ }
+ /* save new sync index start */
+ if (sync_end != end) {
+ INCR_RXI(sync_end, ldcp);
+ sync_start = sync_end;
+ } else
+ sync_done = B_TRUE;
+ }
+ goto vgen_next_rxi;
+ }
+
+ nread = nbytes;
+ rv = ldc_mem_copy(ldcp->ldc_handle,
+ (caddr_t)mp->b_rptr, off, &nread,
+ rxdp->memcookie, ncookies, LDC_COPY_IN);
+
+ /* set done bit irrespective of rv of ldc_mem_copy() */
+ hdrp->dstate = VIO_DESC_DONE;
+
+ if (hdrp->ack) {
+ /*
+ * sender needs ack for this packet.
+ * sync pkts upto this index and
+ * send the ack to the peer.
+ */
+ sync_end = rxi;
+ (void) ldc_mem_dring_release(ldcp->rx_dhandle,
+ sync_start, sync_end);
+ tagp->vio_subtype = VIO_SUBTYPE_ACK;
+ tagp->vio_sid = ldcp->local_sid;
+ dringmsg = (vio_dring_msg_t *)tagp;
+ dringmsg->start_idx = sync_start;
+ dringmsg->end_idx = sync_end;
+ if (vgen_sendmsg(ldcp, (caddr_t)tagp,
+ sizeof (*dringmsg), B_FALSE)) {
+ DWARN((vnetp,
+ "vgen_handle_dring_data: id(%lx) "
+ "vgen_sendmsg failed stype: ACK\n",
+ ldcp->ldc_id));
+ }
+ /* save new sync index start */
+ if (sync_end != end) {
+ INCR_RXI(sync_end, ldcp);
+ sync_start = sync_end;
+ } else
+ sync_done = B_TRUE;
+ }
+ /* if ldc_mem_copy() failed */
+ if (rv) {
+ DWARN((vnetp,
+ "vgen_handle_dring_data: id(%lx) "
+ "ldc_mem_copy failed\n", ldcp->ldc_id));
+ statsp->ierrors++;
+ freemsg(mp);
+ goto vgen_next_rxi;
+ }
+ if (nread != nbytes) {
+ DWARN((vnetp,
+ "vgen_handle_dring_data: id(%lx) "
+ "ldc_mem_copy nread(%lx), nbytes(%lx)\n",
+ ldcp->ldc_id, nread, nbytes));
+ statsp->ierrors++;
+ freemsg(mp);
+ goto vgen_next_rxi;
+ }
+
+ /* point to the actual end of data */
+ mp->b_wptr = mp->b_rptr + datalen;
+
+ /* update stats */
+ statsp->ipackets++;
+ statsp->rbytes += datalen;
+ ehp = (struct ether_header *)mp->b_rptr;
+ if (IS_BROADCAST(ehp))
+ statsp->brdcstrcv++;
+ else if (IS_MULTICAST(ehp))
+ statsp->multircv++;
+
+ /* build a chain of received packets */
+ if (bp == NULL) {
+ /* first pkt */
+ bp = mp;
+ bpt = bp;
+ bpt->b_next = NULL;
+ } else {
+ mp->b_next = NULL;
+ bpt->b_next = mp;
+ bpt = mp;
+ }
+
+vgen_next_rxi: if (rxi == end) {
+ break;
+ }
+ /* increment recv index */
+ INCR_RXI(rxi, ldcp);
+
+ _NOTE(CONSTCOND)
+ } while (1);
+
+ if (!sync_done) {
+ /* sync remote descriptor range */
+ sync_end = rxi;
+ (void) ldc_mem_dring_release(ldcp->rx_dhandle,
+ sync_start, sync_end);
+ DBG2((vnetp,
+ "vgen_handle_dring_data: not sending ACK\n"));
+ }
+
+ /* save new recv index */
+ INCR_RXI(rxi, ldcp);
+ ldcp->next_rxi = rxi;
+ ldcp->next_rxseq += ((end >= start) ?
+ ((end - start) + 1) : (start - end));
+
+ /* try to reclaim transmit descrs also */
+ vgen_reclaim(ldcp);
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ /*
+ * received an ack corresponding to a specific descriptor for
+ * which we had set the ACK bit in the descriptor (during
+ * transmit). This enables us to reclaim descriptors.
+ */
+ DBG2((vnetp,
+ "vgen_handle_dring_data: ACK: start(%d), end(%d)\n",
+ start, end));
+
+ /* validate start and end indeces in the tx ack msg */
+ if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
+ /* drop the message if invalid index */
+ break;
+ }
+ /* validate dring_ident */
+ if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
+ /* invalid dring_ident, drop the msg */
+ break;
+ }
+ statsp->dring_data_acks++;
+ vgen_reclaim(ldcp);
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ /*
+ * peer sent a NACK msg to indicate lost packets.
+ * The start and end correspond to the range of descriptors
+ * for which the peer didn't receive a dring data msg and so
+ * didn't receive the corresponding data.
+ */
+ DWARN((vnetp,
+ "vgen_handle_dring_data: NACK: start(%d), end(%d)\n",
+ start, end));
+
+ /* validate start and end indeces in the tx nack msg */
+ if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) {
+ /* drop the message if invalid index */
+ break;
+ }
+ /* validate dring_ident */
+ if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) {
+ /* invalid dring_ident, drop the msg */
+ break;
+ }
+ mutex_enter(&ldcp->txlock);
+ mutex_enter(&ldcp->tclock);
+
+ if (ldcp->next_tbufp == ldcp->cur_tbufp) {
+ /* no busy descriptors, bogus nack ? */
+ mutex_exit(&ldcp->tclock);
+ mutex_exit(&ldcp->txlock);
+ break;
+ }
+
+#ifdef VGEN_REXMIT
+ /* send a new dring data msg including the lost descrs */
+ end = ldcp->next_tbufp - ldcp->tbufp;
+ DECR_TXI(end, ldcp);
+ seqnum = ldcp->tbufp[start].seqnum;
+ /* no need to increment ldcp->next_txseq as this is rexmit */
+ rv = vgen_send_dring_data(ldcp, start, end, seqnum);
+ if (rv != 0) {
+ /*
+ * vgen_send_dring_data() error: drop all packets
+ * in this descr range
+ */
+ DWARN((vnetp,
+ "vgen_handle_dring_data: "
+ "vgen_send_dring_data failed :"
+ "id(%lx) rv(%d)\n", ldcp->ldc_id, rv));
+ for (txi = start; txi <= end; ) {
+ tbufp = &(ldcp->tbufp[txi]);
+ txdp = tbufp->descp;
+ hdrp = &txdp->hdr;
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ freemsg(tbufp->mp);
+ tbufp->flags = VGEN_PRIV_DESC_FREE;
+ hdrp->dstate = VIO_DESC_FREE;
+ hdrp->ack = B_FALSE;
+ statsp->oerrors++;
+ }
+
+ /* update next pointer */
+ ldcp->next_tbufp = &(ldcp->tbufp[start]);
+ ldcp->next_txseq = seqnum;
+ ldcp->next_txi = start;
+ }
+ DBG2((vnetp,
+ "vgen_handle_dring_data: rexmit: start(%d) end(%d)\n",
+ start, end));
+#else /* VGEN_REXMIT */
+ /* we just mark the descrs as done so they can be reclaimed */
+ for (txi = start; txi <= end; ) {
+ txdp = &(ldcp->txdp[txi]);
+ hdrp = &txdp->hdr;
+ if (hdrp->dstate == VIO_DESC_READY)
+ hdrp->dstate = VIO_DESC_DONE;
+ INCR_TXI(txi, ldcp);
+ }
+#endif /* VGEN_REXMIT */
+ mutex_exit(&ldcp->tclock);
+ mutex_exit(&ldcp->txlock);
+
+ vgen_reclaim(ldcp);
+
+ break;
+ }
+
+ DBG1((vnetp, "vgen_handle_dring_data: exit\n"));
+ *headp = bp;
+ *tailp = bpt;
+}
+
+static void
+vgen_reclaim(vgen_ldc_t *ldcp)
+{
+ if (mutex_tryenter(&ldcp->tclock) == 0)
+ return; /* already in progress */
+ vgen_reclaim_dring(ldcp);
+ ldcp->reclaim_lbolt = ddi_get_lbolt();
+ mutex_exit(&ldcp->tclock);
+}
+
+/*
+ * transmit reclaim function. starting from the current reclaim index
+ * look for descriptors marked DONE and reclaim the descriptor and the
+ * corresponding buffers (tbuf).
+ */
+static void
+vgen_reclaim_dring(vgen_ldc_t *ldcp)
+{
+ vnet_public_desc_t *txdp;
+ vgen_private_desc_t *tbufp;
+ vio_dring_entry_hdr_t *hdrp;
+#ifdef VGEN_USE_MAC_TX_UPDATE
+ vgen_t *vgenp = (vgen_t *)ldcp->vgenp;
+#endif
+
+#ifdef DEBUG
+ if (vgen_trigger_txtimeout)
+ return;
+#endif
+
+ tbufp = ldcp->cur_tbufp;
+ txdp = tbufp->descp;
+ hdrp = &txdp->hdr;
+
+ while ((hdrp->dstate == VIO_DESC_DONE) &&
+ (tbufp != ldcp->next_tbufp)) {
+ (void) ldc_mem_unbind_handle(tbufp->memhandle);
+ freemsg(tbufp->mp);
+ tbufp->mp = NULL;
+ tbufp->flags = VGEN_PRIV_DESC_FREE;
+ hdrp->dstate = VIO_DESC_FREE;
+ hdrp->ack = B_FALSE;
+
+ tbufp = NEXTTBUF(ldcp, tbufp);
+ txdp = tbufp->descp;
+ hdrp = &txdp->hdr;
+ }
+
+ ldcp->cur_tbufp = tbufp;
+
+ /*
+ * Check if mac layer should be notified to restart transmissions
+ */
+ if (ldcp->need_resched) {
+ ldcp->need_resched = B_FALSE;
+#ifdef VGEN_USE_MAC_TX_UPDATE
+ mac_tx_update(vgenp->vnetmacp);
+#endif
+ }
+}
+
+/* return the number of pending transmits for the channel */
+static int
+vgen_num_txpending(vgen_ldc_t *ldcp)
+{
+ int n;
+
+ if (ldcp->next_tbufp >= ldcp->cur_tbufp) {
+ n = ldcp->next_tbufp - ldcp->cur_tbufp;
+ } else {
+ /* cur_tbufp > next_tbufp */
+ n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp);
+ }
+
+ return (n);
+}
+
+/* determine if the transmit descriptor ring is full */
+static int
+vgen_tx_dring_full(vgen_ldc_t *ldcp)
+{
+ vgen_private_desc_t *tbufp;
+ vgen_private_desc_t *ntbufp;
+
+ tbufp = ldcp->next_tbufp;
+ ntbufp = NEXTTBUF(ldcp, tbufp);
+ if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */
+#if 0
+ void *vnetp = LDC_TO_VNET(ldcp);
+ DWARN((vnetp, "vgen_tx_dring_full: id(%lx)\n",
+ ldcp->ldc_id));
+#endif
+ return (VGEN_SUCCESS);
+ }
+ return (VGEN_FAILURE);
+}
+
+/* determine if timeout condition has occured */
+static int
+vgen_ldc_txtimeout(vgen_ldc_t *ldcp)
+{
+ if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) >
+ drv_usectohz(vnet_ldcwd_txtimeout * 1000)) &&
+ (vnet_ldcwd_txtimeout) &&
+ (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) {
+#if 0
+ void *vnetp = LDC_TO_VNET(ldcp);
+ DWARN((vnetp, "vgen_ldc_txtimeout: id(%lx)\n",
+ ldcp->ldc_id));
+#endif
+ return (VGEN_SUCCESS);
+ } else {
+ return (VGEN_FAILURE);
+ }
+}
+
+/* transmit watchdog timeout handler */
+static void
+vgen_ldc_watchdog(void *arg)
+{
+ vgen_ldc_t *ldcp;
+ void *vnetp;
+ int rv;
+
+ ldcp = (vgen_ldc_t *)arg;
+ vnetp = LDC_TO_VNET(ldcp);
+
+ rv = vgen_ldc_txtimeout(ldcp);
+ if (rv == VGEN_SUCCESS) {
+ DWARN((vnetp,
+ "vgen_ldc_watchdog: transmit timeout ldcid(%lx)\n",
+ ldcp->ldc_id));
+#ifdef DEBUG
+ if (vgen_trigger_txtimeout) {
+ /* tx timeout triggered for debugging */
+ vgen_trigger_txtimeout = 0;
+ }
+#endif
+ mutex_enter(&ldcp->cblock);
+ vgen_handshake_retry(ldcp);
+ mutex_exit(&ldcp->cblock);
+ if (ldcp->need_resched) {
+ ldcp->need_resched = B_FALSE;
+#ifdef VGEN_USE_MAC_TX_UPDATE
+ mac_tx_update(ldcp->vgenp->vnetmacp);
+#endif
+ }
+ }
+
+ ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp,
+ drv_usectohz(vnet_ldcwd_interval * 1000));
+}
+
+/* based on mcopymsg() */
+static void
+vgen_copymsg(mblk_t *mp, void *bufp)
+{
+ caddr_t dest = bufp;
+ mblk_t *bp;
+ size_t n;
+
+ for (bp = mp; bp != NULL; bp = bp->b_cont) {
+ n = MBLKL(bp);
+ bcopy(bp->b_rptr, dest, n);
+ dest += n;
+ }
+}
+
+static int
+vgen_setup_kstats(vgen_ldc_t *ldcp)
+{
+ vgen_t *vgenp;
+ struct kstat *ksp;
+ vgen_stats_t *statsp;
+ vgen_kstats_t *ldckp;
+ int instance;
+ size_t size;
+ char name[MAXNAMELEN];
+
+ vgenp = LDC_TO_VGEN(ldcp);
+ instance = ddi_get_instance(vgenp->vnetdip);
+ (void) sprintf(name, "vnetldc0x%lx", ldcp->ldc_id);
+ statsp = kmem_zalloc(sizeof (vgen_stats_t), KM_SLEEP);
+ if (statsp == NULL) {
+ return (VGEN_FAILURE);
+ }
+ size = sizeof (vgen_kstats_t) / sizeof (kstat_named_t);
+ ksp = kstat_create("vnet", instance, name, "net", KSTAT_TYPE_NAMED,
+ size, 0);
+ if (ksp == NULL) {
+ KMEM_FREE(statsp);
+ return (VGEN_FAILURE);
+ }
+
+ ldckp = (vgen_kstats_t *)ksp->ks_data;
+ kstat_named_init(&ldckp->ipackets, "ipackets",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->ipackets64, "ipackets64",
+ KSTAT_DATA_ULONGLONG);
+ kstat_named_init(&ldckp->ierrors, "ierrors",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->opackets, "opackets",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->opackets64, "opackets64",
+ KSTAT_DATA_ULONGLONG);
+ kstat_named_init(&ldckp->oerrors, "oerrors",
+ KSTAT_DATA_ULONG);
+
+
+ /* MIB II kstat variables */
+ kstat_named_init(&ldckp->rbytes, "rbytes",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->rbytes64, "rbytes64",
+ KSTAT_DATA_ULONGLONG);
+ kstat_named_init(&ldckp->obytes, "obytes",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->obytes64, "obytes64",
+ KSTAT_DATA_ULONGLONG);
+ kstat_named_init(&ldckp->multircv, "multircv",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->multixmt, "multixmt",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->brdcstrcv, "brdcstrcv",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->brdcstxmt, "brdcstxmt",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->norcvbuf, "norcvbuf",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->noxmtbuf, "noxmtbuf",
+ KSTAT_DATA_ULONG);
+
+ /* Tx stats */
+ kstat_named_init(&ldckp->tx_no_desc, "tx_no_desc",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->tx_allocb_fail, "tx_allocb_fail",
+ KSTAT_DATA_ULONG);
+
+ /* Rx stats */
+ kstat_named_init(&ldckp->rx_no_desc, "rx_no_desc",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->rx_allocb_fail, "rx_allocb_fail",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->rx_lost_pkts, "rx_lost_pkts",
+ KSTAT_DATA_ULONG);
+
+ /* Interrupt stats */
+ kstat_named_init(&ldckp->callbacks, "callbacks",
+ KSTAT_DATA_ULONG);
+ kstat_named_init(&ldckp->dring_data_acks, "dring_data_acks",
+ KSTAT_DATA_ULONG);
+
+ ksp->ks_update = vgen_kstat_update;
+ ksp->ks_private = (void *)ldcp;
+ kstat_install(ksp);
+
+ ldcp->ksp = ksp;
+ ldcp->statsp = statsp;
+ return (VGEN_SUCCESS);
+}
+
+static void
+vgen_destroy_kstats(vgen_ldc_t *ldcp)
+{
+ if (ldcp->ksp)
+ kstat_delete(ldcp->ksp);
+ KMEM_FREE(ldcp->statsp);
+}
+
+static int
+vgen_kstat_update(kstat_t *ksp, int rw)
+{
+ vgen_ldc_t *ldcp;
+ vgen_stats_t *statsp;
+ vgen_kstats_t *ldckp;
+
+ ldcp = (vgen_ldc_t *)ksp->ks_private;
+ statsp = ldcp->statsp;
+ ldckp = (vgen_kstats_t *)ksp->ks_data;
+
+ if (rw == KSTAT_READ) {
+ ldckp->ipackets.value.ul = (uint32_t)statsp->ipackets;
+ ldckp->ipackets64.value.ull = statsp->ipackets;
+ ldckp->ierrors.value.ul = statsp->ierrors;
+ ldckp->opackets.value.ul = (uint32_t)statsp->opackets;
+ ldckp->opackets64.value.ull = statsp->opackets;
+ ldckp->oerrors.value.ul = statsp->oerrors;
+
+ /*
+ * MIB II kstat variables
+ */
+ ldckp->rbytes.value.ul = (uint32_t)statsp->rbytes;
+ ldckp->rbytes64.value.ull = statsp->rbytes;
+ ldckp->obytes.value.ul = (uint32_t)statsp->obytes;
+ ldckp->obytes64.value.ull = statsp->obytes;
+ ldckp->multircv.value.ul = statsp->multircv;
+ ldckp->multixmt.value.ul = statsp->multixmt;
+ ldckp->brdcstrcv.value.ul = statsp->brdcstrcv;
+ ldckp->brdcstxmt.value.ul = statsp->brdcstxmt;
+ ldckp->norcvbuf.value.ul = statsp->norcvbuf;
+ ldckp->noxmtbuf.value.ul = statsp->noxmtbuf;
+
+ ldckp->tx_no_desc.value.ul = statsp->tx_no_desc;
+ ldckp->tx_allocb_fail.value.ul = statsp->tx_allocb_fail;
+
+ ldckp->rx_no_desc.value.ul = statsp->rx_no_desc;
+ ldckp->rx_allocb_fail.value.ul = statsp->rx_allocb_fail;
+ ldckp->rx_lost_pkts.value.ul = statsp->rx_lost_pkts;
+
+ ldckp->callbacks.value.ul = statsp->callbacks;
+ ldckp->dring_data_acks.value.ul = statsp->dring_data_acks;
+ } else {
+ statsp->ipackets = ldckp->ipackets64.value.ull;
+ statsp->ierrors = ldckp->ierrors.value.ul;
+ statsp->opackets = ldckp->opackets64.value.ull;
+ statsp->oerrors = ldckp->oerrors.value.ul;
+
+ /*
+ * MIB II kstat variables
+ */
+ statsp->rbytes = ldckp->rbytes64.value.ull;
+ statsp->obytes = ldckp->obytes64.value.ull;
+ statsp->multircv = ldckp->multircv.value.ul;
+ statsp->multixmt = ldckp->multixmt.value.ul;
+ statsp->brdcstrcv = ldckp->brdcstrcv.value.ul;
+ statsp->brdcstxmt = ldckp->brdcstxmt.value.ul;
+ statsp->norcvbuf = ldckp->norcvbuf.value.ul;
+ statsp->noxmtbuf = ldckp->noxmtbuf.value.ul;
+
+ statsp->tx_no_desc = ldckp->tx_no_desc.value.ul;
+ statsp->tx_allocb_fail = ldckp->tx_allocb_fail.value.ul;
+
+ statsp->rx_no_desc = ldckp->rx_no_desc.value.ul;
+ statsp->rx_allocb_fail = ldckp->rx_allocb_fail.value.ul;
+ statsp->rx_lost_pkts = ldckp->rx_lost_pkts.value.ul;
+
+ statsp->callbacks = ldckp->callbacks.value.ul;
+ statsp->dring_data_acks = ldckp->dring_data_acks.value.ul;
+ }
+
+ return (VGEN_SUCCESS);
+}
+
+/* handler for error messages received from the peer ldc end-point */
+static void
+vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ _NOTE(ARGUNUSED(ldcp, tagp))
+}
+
+/* Check if the session id in the received message is valid */
+static int
+vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
+{
+ if (tagp->vio_sid != ldcp->peer_sid) {
+ void *vnetp = LDC_TO_VNET(ldcp);
+ DWARN((vnetp,
+ "sid mismatch: expected(%x), rcvd(%x)\n",
+ ldcp->peer_sid, tagp->vio_sid));
+ return (VGEN_FAILURE);
+ }
+ else
+ return (VGEN_SUCCESS);
+}
+
+/* convert mac address from string to uint64_t */
+static uint64_t
+vgen_macaddr_strtoul(const uint8_t *macaddr)
+{
+ uint64_t val = 0;
+ int i;
+
+#if 0
+ for (i = ETHERADDRL - 1; i >= 0; i--) {
+#endif
+ for (i = 0; i < ETHERADDRL; i++) {
+ val <<= 8;
+ val |= macaddr[i];
+ }
+
+#if 0
+ cmn_err(CE_CONT, "vgen_macaddr_strtoul: str(%x:%x:%x:%x:%x:%x)\n",
+ macaddr[0], macaddr[1], macaddr[2],
+ macaddr[3], macaddr[4], macaddr[5]);
+ cmn_err(CE_CONT, "vgen_macaddr_strtoul: val(0x%lx)\n", val);
+#endif
+ return (val);
+}
+
+/* convert mac address from uint64_t to string */
+static int
+vgen_macaddr_ultostr(uint64_t val, uint8_t *macaddr)
+{
+ int i;
+ uint64_t value;
+
+ value = val;
+#if 0
+ for (i = 0; i < ETHERADDRL; i++) {
+#endif
+ for (i = ETHERADDRL - 1; i >= 0; i--) {
+ macaddr[i] = value & 0xFF;
+ value >>= 8;
+ }
+#if 0
+ cmn_err(CE_CONT, "vgen_macaddr_ultostr: val(0x%lx)\n", val);
+ cmn_err(CE_CONT, "vgen_macaddr_ultostr: str(%x:%x:%x:%x:%x:%x)\n",
+ macaddr[0], macaddr[1], macaddr[2],
+ macaddr[3], macaddr[4], macaddr[5]);
+#endif
+ return (VGEN_SUCCESS);
+}
+
+static caddr_t
+vgen_print_ethaddr(uint8_t *a, char *ebuf)
+{
+ (void) sprintf(ebuf,
+ "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]);
+ return (ebuf);
+}
+
+/* Handshake watchdog timeout handler */
+static void
+vgen_hwatchdog(void *arg)
+{
+ vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
+ void *vnetp = LDC_TO_VNET(ldcp);
+
+ DWARN((vnetp,
+ "vgen_hwatchdog: handshake timeout ldc(%lx) phase(%x) state(%x)\n",
+ ldcp->ldc_id, ldcp->hphase, ldcp->hstate));
+
+ mutex_enter(&ldcp->cblock);
+ ldcp->htid = 0;
+ vgen_handshake_retry(ldcp);
+ mutex_exit(&ldcp->cblock);
+}
+
+static void
+vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint)
+{
+ vgen_hparams_t *hp;
+ char ep[8];
+ uint8_t addr[6];
+ char ea[6];
+
+ if (endpoint == VGEN_LOCAL) {
+ hp = &ldcp->local_hparams;
+ (void) sprintf(ep, "Local");
+ } else {
+ hp = &ldcp->peer_hparams;
+ (void) sprintf(ep, "Peer");
+ }
+ (void) vgen_macaddr_ultostr(hp->addr, addr);
+ cmn_err(CE_CONT, "attr_info: %s: \n", ep);
+ cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
+ vgen_print_ethaddr(addr, ea));
+ cmn_err(CE_CONT, "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
+ hp->addr_type, hp->xfer_mode, hp->ack_freq);
+}
+
+static void
+vgen_print_hparams(vgen_hparams_t *hp)
+{
+ uint8_t addr[6];
+ char ea[6];
+ ldc_mem_cookie_t *dc;
+
+ cmn_err(CE_CONT, "version_info:\n");
+ cmn_err(CE_CONT,
+ "\tver_major: %d, ver_minor: %d, dev_class: %d\n",
+ hp->ver_major, hp->ver_minor, hp->dev_class);
+
+ (void) vgen_macaddr_ultostr(hp->addr, addr);
+ cmn_err(CE_CONT, "attr_info:\n");
+ cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu,
+ vgen_print_ethaddr(addr, ea));
+ cmn_err(CE_CONT,
+ "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n",
+ hp->addr_type, hp->xfer_mode, hp->ack_freq);
+
+ dc = &hp->dring_cookie;
+ cmn_err(CE_CONT, "dring_info:\n");
+ cmn_err(CE_CONT,
+ "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size);
+ cmn_err(CE_CONT,
+ "\tldc_addr: 0x%lx, ldc_size: %ld\n",
+ dc->addr, dc->size);
+ cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident);
+}
+
+static void
+vgen_print_ldcinfo(vgen_ldc_t *ldcp)
+{
+ vgen_hparams_t *hp;
+
+ cmn_err(CE_CONT, "Channel Information:\n");
+ cmn_err(CE_CONT,
+ "\tldc_id: 0x%lx, ldc_status: 0x%x\n",
+ ldcp->ldc_id, ldcp->ldc_status);
+ cmn_err(CE_CONT,
+ "\tlocal_sid: 0x%x, peer_sid: 0x%x\n",
+ ldcp->local_sid, ldcp->peer_sid);
+ cmn_err(CE_CONT,
+ "\thphase: 0x%x, hstate: 0x%x\n",
+ ldcp->hphase, ldcp->hstate);
+
+ cmn_err(CE_CONT, "Local handshake params:\n");
+ hp = &ldcp->local_hparams;
+ vgen_print_hparams(hp);
+
+ cmn_err(CE_CONT, "Peer handshake params:\n");
+ hp = &ldcp->peer_hparams;
+ vgen_print_hparams(hp);
+}
diff --git a/usr/src/uts/sun4v/io/vnex.c b/usr/src/uts/sun4v/io/vnex.c
index 1eb0856a8e..e30506c23a 100644
--- a/usr/src/uts/sun4v/io/vnex.c
+++ b/usr/src/uts/sun4v/io/vnex.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -96,7 +95,8 @@ static struct vnex_pil_map vnex_name_to_pil[] = {
{"loop", PIL_3},
{"sunmc", PIL_3},
{"sunvts", PIL_3},
- {"explorer", PIL_3}
+ {"explorer", PIL_3},
+ {"ncp", PIL_8}
};
#define VNEX_MAX_DEVS (sizeof (vnex_name_to_pil) / \
@@ -423,6 +423,20 @@ vnex_disable_intr(dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp)
return (DDI_SUCCESS);
}
+int
+vnex_ino_to_inum(dev_info_t *dip, uint32_t ino)
+{
+ vnex_id_t *vid_p;
+ ddi_intr_handle_impl_t *hdlp;
+
+ if ((vid_p = vnex_locate_id(dip, ino)) == NULL)
+ return (-1);
+ else if ((hdlp = vid_p->vid_ddi_hdlp) == NULL)
+ return (-1);
+ else
+ return (hdlp->ih_inum);
+}
+
static int
vnex_add_intr(dev_info_t *dip, dev_info_t *rdip,
ddi_intr_handle_impl_t *hdlp)
diff --git a/usr/src/uts/sun4v/io/vsw.c b/usr/src/uts/sun4v/io/vsw.c
new file mode 100644
index 0000000000..6038ea2874
--- /dev/null
+++ b/usr/src/uts/sun4v/io/vsw.c
@@ -0,0 +1,6959 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <sys/time.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/user.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/strsubr.h>
+#include <sys/cmn_err.h>
+#include <sys/cpu.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ksynch.h>
+#include <sys/stat.h>
+#include <sys/kstat.h>
+#include <sys/vtrace.h>
+#include <sys/strsun.h>
+#include <sys/dlpi.h>
+#include <sys/ethernet.h>
+#include <net/if.h>
+#include <sys/varargs.h>
+#include <sys/machsystm.h>
+#include <sys/modctl.h>
+#include <sys/modhash.h>
+#include <sys/mac.h>
+#include <sys/taskq.h>
+#include <sys/note.h>
+#include <sys/mach_descrip.h>
+#include <sys/mac.h>
+#include <sys/mdeg.h>
+#include <sys/ldc.h>
+#include <sys/vsw_fdb.h>
+#include <sys/vsw.h>
+#include <sys/vio_mailbox.h>
+#include <sys/vnet_mailbox.h>
+#include <sys/vnet_common.h>
+
+/*
+ * Function prototypes.
+ */
+static int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
+static int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
+static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static void vsw_get_md_properties(vsw_t *vswp);
+static int vsw_setup_layer2(vsw_t *);
+static int vsw_setup_layer3(vsw_t *);
+
+/* MAC layer routines */
+static int vsw_mac_attach(vsw_t *vswp);
+static void vsw_mac_detach(vsw_t *vswp);
+static void vsw_notify_cb(void *, mac_notify_type_t);
+static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *);
+static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
+static int vsw_mac_register(vsw_t *);
+static int vsw_mac_unregister(vsw_t *);
+static uint64_t vsw_m_stat(void *arg, enum mac_stat);
+static void vsw_m_stop(void *arg);
+static int vsw_m_start(void *arg);
+static int vsw_m_unicst(void *arg, const uint8_t *);
+static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
+static int vsw_m_promisc(void *arg, boolean_t);
+static mblk_t *vsw_m_tx(void *arg, mblk_t *);
+static void vsw_m_resources(void *arg);
+static void vsw_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
+
+/* MDEG routines */
+static void vsw_mdeg_register(vsw_t *vswp);
+static void vsw_mdeg_unregister(vsw_t *vswp);
+static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
+
+/* Port add/deletion routines */
+static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
+static int vsw_port_attach(vsw_t *vswp, int p_instance,
+ uint64_t *ldcids, int nids, struct ether_addr *macaddr);
+static int vsw_detach_ports(vsw_t *vswp);
+static int vsw_port_detach(vsw_t *vswp, int p_instance);
+static int vsw_port_delete(vsw_port_t *port);
+static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id);
+static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id);
+static int vsw_init_ldcs(vsw_port_t *port);
+static int vsw_uninit_ldcs(vsw_port_t *port);
+static int vsw_ldc_init(vsw_ldc_t *ldcp);
+static int vsw_ldc_uninit(vsw_ldc_t *ldcp);
+static int vsw_drain_ldcs(vsw_port_t *port);
+static int vsw_drain_port_taskq(vsw_port_t *port);
+static void vsw_marker_task(void *);
+static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
+static int vsw_plist_del_node(vsw_t *, vsw_port_t *port);
+
+/* Interrupt routines */
+static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg);
+
+/* Handshake routines */
+static void vsw_restart_handshake(vsw_ldc_t *);
+static int vsw_check_flag(vsw_ldc_t *, int, uint64_t);
+static void vsw_next_milestone(vsw_ldc_t *);
+static int vsw_supported_version(vio_ver_msg_t *);
+
+/* Data processing routines */
+static void vsw_process_pkt(void *);
+static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t);
+static void vsw_process_ctrl_pkt(void *);
+static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *);
+static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *);
+static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *);
+static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *);
+static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *);
+static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *);
+static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
+static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *);
+static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *);
+static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *);
+static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
+
+/* Switching/data transmit routines */
+static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
+ vsw_port_t *port, mac_resource_handle_t);
+static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
+ vsw_port_t *port, mac_resource_handle_t);
+static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller,
+ vsw_port_t *port);
+static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller,
+ vsw_port_t *port);
+static int vsw_portsend(vsw_port_t *, mblk_t *);
+static int vsw_dringsend(vsw_ldc_t *, mblk_t *);
+static int vsw_descrsend(vsw_ldc_t *, mblk_t *);
+
+/* Packet creation routines */
+static void vsw_send_ver(vsw_ldc_t *);
+static void vsw_send_attr(vsw_ldc_t *);
+static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *);
+static void vsw_send_dring_info(vsw_ldc_t *);
+static void vsw_send_rdx(vsw_ldc_t *);
+
+static void vsw_send_msg(vsw_ldc_t *, void *, int);
+
+/* Forwarding database (FDB) routines */
+static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
+static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
+static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *);
+static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
+static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
+static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
+static void vsw_del_addr(uint8_t, void *, uint64_t);
+static void vsw_del_mcst_port(vsw_port_t *);
+static void vsw_del_mcst_vsw(vsw_t *);
+
+/* Dring routines */
+static dring_info_t *vsw_create_dring(vsw_ldc_t *);
+static void vsw_create_privring(vsw_ldc_t *);
+static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp);
+static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **,
+ int *);
+static void vsw_dring_priv2pub(vsw_private_desc_t *);
+static dring_info_t *vsw_ident2dring(lane_t *, uint64_t);
+
+static void vsw_set_lane_attr(vsw_t *, lane_t *);
+static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *);
+static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg);
+static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *);
+static int vsw_check_dring_info(vio_dring_reg_msg_t *);
+
+/* Misc support routines */
+static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf);
+
+static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t);
+static int vsw_free_ring(dring_info_t *);
+
+/* Debugging routines */
+static void dump_flags(uint64_t);
+static void display_state(void);
+static void display_lane(lane_t *);
+static void display_ring(dring_info_t *);
+
+int vsw_num_handshakes = 3; /* # of handshake attempts */
+int vsw_wretries = 100; /* # of write attempts */
+
+/*
+ * mode specific frame switching function
+ */
+void (*vsw_switch_frame)(vsw_t *, mblk_t *, int, vsw_port_t *,
+ mac_resource_handle_t);
+
+static struct cb_ops vsw_cb_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* cb_stream */
+ D_MP, /* cb_flag */
+ CB_REV, /* rev */
+ nodev, /* int (*cb_aread)() */
+ nodev /* int (*cb_awrite)() */
+};
+
+static struct dev_ops vsw_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ vsw_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ vsw_attach, /* devo_attach */
+ vsw_detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &vsw_cb_ops, /* devo_cb_ops */
+ (struct bus_ops *)NULL, /* devo_bus_ops */
+ ddi_power /* devo_power */
+};
+
+extern struct mod_ops mod_driverops;
+static struct modldrv vswmodldrv = {
+ &mod_driverops,
+ "sun4v Virtual Switch Driver %I%",
+ &vsw_ops,
+};
+
+#define LDC_ENTER_LOCK(ldcp) \
+ mutex_enter(&((ldcp)->ldc_cblock));\
+ mutex_enter(&((ldcp)->ldc_txlock));
+#define LDC_EXIT_LOCK(ldcp) \
+ mutex_exit(&((ldcp)->ldc_txlock));\
+ mutex_exit(&((ldcp)->ldc_cblock));
+
+/* Driver soft state ptr */
+static void *vsw_state;
+
+/*
+ * Linked list of "vsw_t" structures - one per instance.
+ */
+vsw_t *vsw_head = NULL;
+krwlock_t vsw_rw;
+
+/*
+ * Property names
+ */
+static char vdev_propname[] = "virtual-device";
+static char vsw_propname[] = "virtual-network-switch";
+static char physdev_propname[] = "vsw-phys-dev";
+static char smode_propname[] = "vsw-switch-mode";
+static char macaddr_propname[] = "local-mac-address";
+static char remaddr_propname[] = "remote-mac-address";
+static char ldcids_propname[] = "ldc-ids";
+static char chan_propname[] = "channel-endpoint";
+static char id_propname[] = "id";
+static char reg_propname[] = "reg";
+
+/* supported versions */
+static ver_sup_t vsw_versions[] = { {1, 0} };
+
+/*
+ * Matching criteria passed to the MDEG to register interest
+ * in changes to 'virtual-device-port' nodes identified by their
+ * 'id' property.
+ */
+static md_prop_match_t vport_prop_match[] = {
+ { MDET_PROP_VAL, "id" },
+ { MDET_LIST_END, NULL }
+};
+
+static mdeg_node_match_t vport_match = { "virtual-device-port",
+ vport_prop_match };
+
+/*
+ * Specification of an MD node passed to the MDEG to filter any
+ * 'vport' nodes that do not belong to the specified node. This
+ * template is copied for each vsw instance and filled in with
+ * the appropriate 'cfg-handle' value before being passed to the MDEG.
+ */
+static mdeg_prop_spec_t vsw_prop_template[] = {
+ { MDET_PROP_STR, "name", vsw_propname },
+ { MDET_PROP_VAL, "cfg-handle", NULL },
+ { MDET_LIST_END, NULL, NULL }
+};
+
+#define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val);
+
+/*
+ * Print debug messages - set to 0x1f to enable all msgs
+ * or 0x0 to turn all off.
+ */
+int vswdbg = 0x0;
+
+/*
+ * debug levels:
+ * 0x01: Function entry/exit tracing
+ * 0x02: Internal function messages
+ * 0x04: Verbose internal messages
+ * 0x08: Warning messages
+ * 0x10: Error messages
+ */
+
+static void
+vswdebug(vsw_t *vswp, const char *fmt, ...)
+{
+ char buf[512];
+ va_list ap;
+
+ va_start(ap, fmt);
+ (void) vsprintf(buf, fmt, ap);
+ va_end(ap);
+
+ if (vswp == NULL)
+ cmn_err(CE_CONT, "%s\n", buf);
+ else
+ cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
+}
+
+/*
+ * For the moment the state dump routines have their own
+ * private flag.
+ */
+#define DUMP_STATE 0
+
+#if DUMP_STATE
+
+#define DUMP_TAG(tag) \
+{ \
+ D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \
+ D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \
+ D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \
+}
+
+#define DUMP_TAG_PTR(tag) \
+{ \
+ D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \
+ D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \
+ D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \
+}
+
+#define DUMP_FLAGS(flags) dump_flags(flags);
+#define DISPLAY_STATE() display_state()
+
+#else
+
+#define DUMP_TAG(tag)
+#define DUMP_TAG_PTR(tag)
+#define DUMP_FLAGS(state)
+#define DISPLAY_STATE()
+
+#endif /* DUMP_STATE */
+
+#ifdef DEBUG
+
+#define D1 \
+if (vswdbg & 0x01) \
+ vswdebug
+
+#define D2 \
+if (vswdbg & 0x02) \
+ vswdebug
+
+#define D3 \
+if (vswdbg & 0x04) \
+ vswdebug
+
+#define DWARN \
+if (vswdbg & 0x08) \
+ vswdebug
+
+#define DERR \
+if (vswdbg & 0x10) \
+ vswdebug
+
+#else
+
+#define DERR if (0) vswdebug
+#define DWARN if (0) vswdebug
+#define D1 if (0) vswdebug
+#define D2 if (0) vswdebug
+#define D3 if (0) vswdebug
+
+#endif /* DEBUG */
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &vswmodldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ int status;
+
+ rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
+
+ status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
+ if (status != 0) {
+ return (status);
+ }
+
+ mac_init_ops(&vsw_ops, "vsw");
+ status = mod_install(&modlinkage);
+ if (status != 0) {
+ ddi_soft_state_fini(&vsw_state);
+ }
+ return (status);
+}
+
+int
+_fini(void)
+{
+ int status;
+
+ status = mod_remove(&modlinkage);
+ if (status != 0)
+ return (status);
+ mac_fini_ops(&vsw_ops);
+ ddi_soft_state_fini(&vsw_state);
+
+ rw_destroy(&vsw_rw);
+
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+static int
+vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ vsw_t *vswp;
+ int smode, instance, i;
+ char hashname[MAXNAMELEN];
+ char qname[TASKQ_NAMELEN];
+ int rv = 1;
+ enum { PROG_init = 0x0, PROG_if_lock = 0x1,
+ PROG_fdb = 0x2, PROG_mfdb = 0x4,
+ PROG_report_dev = 0x8, PROG_plist = 0x10,
+ PROG_taskq = 0x20}
+ progress;
+
+ progress = PROG_init;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ /* nothing to do for this non-device */
+ return (DDI_SUCCESS);
+ case DDI_PM_RESUME:
+ default:
+ return (DDI_FAILURE);
+ }
+
+ instance = ddi_get_instance(dip);
+ if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
+ DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
+ return (DDI_FAILURE);
+ }
+ vswp = ddi_get_soft_state(vsw_state, instance);
+
+ if (vswp == NULL) {
+ DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
+ goto vsw_attach_fail;
+ }
+
+ vswp->dip = dip;
+ vswp->instance = instance;
+ ddi_set_driver_private(dip, (caddr_t)vswp);
+
+ rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
+
+ progress |= PROG_if_lock;
+
+ /*
+ * User specifies (via MD) an array of switching modes in
+ * decreasing order of preference. Default mode is always
+ * layer 2 (mac switching), so init array with that value.
+ */
+ vswp->smode_idx = 0;
+ for (i = 0; i < NUM_SMODES; i++)
+ vswp->smode[i] = VSW_LAYER2;
+
+ /*
+ * Get the various properties such as physical device name
+ * (vsw-phys-dev), switch mode etc from the MD.
+ */
+ vsw_get_md_properties(vswp);
+
+ /* setup the unicast forwarding database */
+ (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
+ vswp->instance);
+ D2(vswp, "creating unicast hash table (%s)...", hashname);
+ vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
+ mod_hash_null_valdtor, sizeof (void *));
+
+ progress |= PROG_fdb;
+
+ /* setup the multicast fowarding database */
+ (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
+ vswp->instance);
+ D2(vswp, "creating multicast hash table %s)...", hashname);
+ rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
+ vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
+ mod_hash_null_valdtor, sizeof (void *));
+
+ progress |= PROG_mfdb;
+
+ /*
+ * create lock protecting list of multicast addresses
+ * which could come via m_multicst() entry point when plumbed.
+ */
+ mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
+ vswp->mcap = NULL;
+
+ ddi_report_dev(vswp->dip);
+
+ progress |= PROG_report_dev;
+
+ WRITE_ENTER(&vsw_rw);
+ vswp->next = vsw_head;
+ vsw_head = vswp;
+ RW_EXIT(&vsw_rw);
+
+ /* setup the port list */
+ rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
+ vswp->plist.head = NULL;
+
+ progress |= PROG_plist;
+
+ /*
+ * Create the taskq which will process all the VIO
+ * control messages.
+ */
+ (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
+ if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
+ TASKQ_DEFAULTPRI, 0)) == NULL) {
+ cmn_err(CE_WARN, "Unable to create task queue");
+ goto vsw_attach_fail;
+ }
+
+ progress |= PROG_taskq;
+
+ /* select best switching mode */
+ for (i = 0; i < NUM_SMODES; i++) {
+ smode = vswp->smode[i];
+ switch (smode) {
+ case VSW_LAYER2:
+ rv = vsw_setup_layer2(vswp);
+ break;
+
+ case VSW_LAYER2_PROMISC:
+ rv = vsw_setup_layer2(vswp);
+ break;
+
+ case VSW_LAYER3:
+ rv = vsw_setup_layer3(vswp);
+ break;
+
+ default:
+ DERR(vswp, "unknown switch mode");
+ break;
+ }
+
+ if (rv == 0) {
+ vswp->smode_idx = i;
+ break;
+ }
+ }
+
+ if (rv == 1) {
+ cmn_err(CE_WARN, "Unable to setup switching mode");
+ goto vsw_attach_fail;
+ }
+
+ D2(vswp, "Operating in mode %d", vswp->smode[vswp->smode_idx]);
+
+ /*
+ * Register with the MAC layer as a network device so
+ * we can be plumbed if desired.
+ *
+ * Do this in both layer 2 and layer 3 mode.
+ */
+ vswp->if_state &= ~VSW_IF_UP;
+ vswp->if_macp = NULL;
+ vswp->if_mrh = NULL;
+ if (vswp->mdprops & VSW_MD_MACADDR) {
+ if (vsw_mac_register(vswp) != 0) {
+ cmn_err(CE_WARN, "Unable to register as provider "
+ " with MAC layer, continuing with attach");
+ }
+ }
+
+ /*
+ * Now we have everything setup, register for MD change
+ * events.
+ */
+ vsw_mdeg_register(vswp);
+
+ return (DDI_SUCCESS);
+
+vsw_attach_fail:
+ DERR(NULL, "vsw_attach: failed");
+
+ if (progress & PROG_taskq)
+ ddi_taskq_destroy(vswp->taskq_p);
+
+ if (progress & PROG_plist)
+ rw_destroy(&vswp->plist.lockrw);
+
+ if (progress & PROG_report_dev) {
+ ddi_remove_minor_node(dip, NULL);
+ mutex_destroy(&vswp->mca_lock);
+ }
+
+ if (progress & PROG_mfdb) {
+ mod_hash_destroy_hash(vswp->mfdb);
+ vswp->mfdb = NULL;
+ rw_destroy(&vswp->mfdbrw);
+ }
+
+ if (progress & PROG_fdb) {
+ mod_hash_destroy_hash(vswp->fdb);
+ vswp->fdb = NULL;
+ }
+
+ if (progress & PROG_if_lock)
+ rw_destroy(&vswp->if_lockrw);
+
+ ddi_soft_state_free(vsw_state, instance);
+ return (DDI_FAILURE);
+}
+
+static int
+vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ vsw_t **vswpp, *vswp;
+ int instance;
+
+ instance = ddi_get_instance(dip);
+ vswp = ddi_get_soft_state(vsw_state, instance);
+
+ if (vswp == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ case DDI_PM_SUSPEND:
+ default:
+ return (DDI_FAILURE);
+ }
+
+ D2(vswp, "detaching instance %d", instance);
+
+ if (vswp->mdprops & VSW_MD_MACADDR) {
+ if (vsw_mac_unregister(vswp) != 0) {
+ cmn_err(CE_WARN, "Unable to detach from MAC layer");
+ return (DDI_FAILURE);
+ }
+ }
+ rw_destroy(&vswp->if_lockrw);
+
+ vsw_mdeg_unregister(vswp);
+
+ if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
+ (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) {
+ vsw_mac_detach(vswp);
+ }
+
+ if (vsw_detach_ports(vswp) != 0) {
+ cmn_err(CE_WARN, "Unable to detach ports");
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Remove this instance from any entries it may be on in
+ * the hash table by using the list of addresses maintained
+ * in the vsw_t structure.
+ */
+ vsw_del_mcst_vsw(vswp);
+
+ vswp->mcap = NULL;
+ mutex_destroy(&vswp->mca_lock);
+
+ /*
+ * By now any pending tasks have finished and the underlying
+ * ldc's have been destroyed, so its safe to delete the control
+ * message taskq.
+ */
+ if (vswp->taskq_p != NULL)
+ ddi_taskq_destroy(vswp->taskq_p);
+
+ /*
+ * At this stage all the data pointers in the hash table
+ * should be NULL, as all the ports have been removed and will
+ * have deleted themselves from the port lists which the data
+ * pointers point to. Hence we can destroy the table using the
+ * default destructors.
+ */
+ D2(vswp, "vsw_detach: destroying hash tables..");
+ mod_hash_destroy_hash(vswp->fdb);
+ vswp->fdb = NULL;
+
+ WRITE_ENTER(&vswp->mfdbrw);
+ mod_hash_destroy_hash(vswp->mfdb);
+ vswp->mfdb = NULL;
+ RW_EXIT(&vswp->mfdbrw);
+ rw_destroy(&vswp->mfdbrw);
+
+ ddi_remove_minor_node(dip, NULL);
+
+ rw_destroy(&vswp->plist.lockrw);
+ WRITE_ENTER(&vsw_rw);
+ for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
+ if (*vswpp == vswp) {
+ *vswpp = vswp->next;
+ break;
+ }
+ }
+ RW_EXIT(&vsw_rw);
+ ddi_soft_state_free(vsw_state, instance);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ _NOTE(ARGUNUSED(dip))
+
+ vsw_t *vswp = NULL;
+ dev_t dev = (dev_t)arg;
+ int instance;
+
+ instance = getminor(dev);
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
+ *result = NULL;
+ return (DDI_FAILURE);
+ }
+ *result = vswp->dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(uintptr_t)instance;
+ return (DDI_SUCCESS);
+
+ default:
+ *result = NULL;
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * Get the properties from our MD node.
+ */
+static void
+vsw_get_md_properties(vsw_t *vswp)
+{
+ md_t *mdp = NULL;
+ int num_nodes = 0;
+ int len = 0, listsz = 0;
+ int num_vdev = 0;
+ int i, idx;
+ boolean_t found_node = B_FALSE;
+ char *smode = NULL;
+ char *curr_mode = NULL;
+ char *physname = NULL;
+ char *node_name = NULL;
+ char *dev;
+ uint64_t macaddr = 0;
+ uint64_t md_inst, obp_inst;
+ mde_cookie_t *listp = NULL;
+ mde_cookie_t rootnode;
+
+ D1(vswp, "%s: enter", __func__);
+
+ /*
+ * Further down we compare the obp 'reg' property to the
+ * 'cfg-handle' property in the vsw MD node to determine
+ * if the node refers to this particular instance. So if
+ * we can't read the obp value then there is no point
+ * in proceeding further.
+ */
+ if (ddi_prop_exists(DDI_DEV_T_ANY, vswp->dip,
+ DDI_PROP_DONTPASS, reg_propname) != 1) {
+ cmn_err(CE_WARN, "Unable to read %s property "
+ "from OBP device node", reg_propname);
+ return;
+ }
+
+ obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
+ DDI_PROP_DONTPASS, reg_propname, 0);
+
+ D2(vswp, "%s: obp_inst 0x%llx", __func__, obp_inst);
+
+ if ((mdp = md_get_handle()) == NULL) {
+ DERR(vswp, "%s: unable to init MD", __func__);
+ return;
+ }
+
+ if ((num_nodes = md_node_count(mdp)) <= 0) {
+ DERR(vswp, "%s: invalid number of nodes found %d",
+ __func__, num_nodes);
+ (void) md_fini_handle(mdp);
+ return;
+ }
+
+ D2(vswp, "%s: %d nodes in total in MD", __func__, num_nodes);
+
+ /* allocate enough space for node list */
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_zalloc(listsz, KM_SLEEP);
+
+ rootnode = md_root_node(mdp);
+
+ /* Get the list of virtual devices */
+ num_vdev = md_scan_dag(mdp, rootnode,
+ md_find_name(mdp, vdev_propname),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (num_vdev <= 0) {
+ DERR(vswp, "%s: didn't find any virtual-device nodes in MD",
+ __func__);
+ goto md_prop_exit;
+ }
+
+ D2(vswp, "%s: %d virtual-device nodes found", __func__, num_vdev);
+
+ /* Look for the virtual switch nodes in the list */
+ for (idx = 0; idx < num_vdev; idx++) {
+ if (md_get_prop_str(mdp, listp[idx],
+ "name", &node_name) != 0) {
+ DERR(vswp, "%s: unable to get node name", __func__);
+ continue;
+
+ }
+
+ if (strcmp(node_name, vsw_propname) == 0) {
+ /* Virtual switch node */
+ if (md_get_prop_val(mdp, listp[idx],
+ "cfg-handle", &md_inst) != 0) {
+ DERR(vswp, "%s: unable to get cfg-handle from"
+ " node %d", __func__, idx);
+ goto md_prop_exit;
+ } else if (md_inst == obp_inst) {
+ D2(vswp, "%s: found matching node (%d)"
+ " 0x%llx == 0x%llx", __func__, idx,
+ md_inst, obp_inst);
+ found_node = B_TRUE;
+ break;
+ }
+ }
+ }
+
+ if (!found_node) {
+ DWARN(vswp, "%s: couldn't find correct vsw node", __func__);
+ goto md_prop_exit;
+ }
+
+ /*
+ * Now, having found the correct node, get the various properties.
+ */
+
+ if (md_get_prop_data(mdp, listp[idx], physdev_propname,
+ (uint8_t **)(&physname), &len) != 0) {
+ cmn_err(CE_WARN, "%s: unable to get name(s) of physical "
+ "device(s) from MD", __func__);
+ } else if ((strlen(physname) + 1) > LIFNAMSIZ) {
+ cmn_err(CE_WARN, "%s is too long a device name", physname);
+ } else {
+ (void) strncpy(vswp->physname, physname, strlen(physname) + 1);
+ vswp->mdprops |= VSW_MD_PHYSNAME;
+ D2(vswp, "%s: using first device specified (%s)",
+ __func__, vswp->physname);
+ }
+
+
+#ifdef DEBUG
+ /*
+ * As a temporary measure to aid testing we check to see if there
+ * is a vsw.conf file present. If there is we use the value of the
+ * vsw_physname property in the file as the name of the physical
+ * device, overriding the value from the MD.
+ *
+ * There may be multiple devices listed, but for the moment
+ * we just use the first one.
+ */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
+ "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
+ if ((strlen(dev) + 1) > LIFNAMSIZ) {
+ cmn_err(CE_WARN, "%s is too long a device name", dev);
+ } else {
+ cmn_err(CE_NOTE, "%s: using device name (%s) from "
+ "config file", __func__, dev);
+
+ (void) strncpy(vswp->physname, dev, strlen(dev) + 1);
+ vswp->mdprops |= VSW_MD_PHYSNAME;
+ }
+
+ ddi_prop_free(dev);
+
+ }
+#endif
+
+ /* local mac address */
+ if (md_get_prop_val(mdp, listp[idx],
+ macaddr_propname, &macaddr) != 0) {
+ cmn_err(CE_WARN, "%s: unable to get local MAC address",
+ __func__);
+ } else {
+ READ_ENTER(&vswp->if_lockrw);
+ for (i = ETHERADDRL - 1; i >= 0; i--) {
+ vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
+ macaddr >>= 8;
+ }
+ RW_EXIT(&vswp->if_lockrw);
+ vswp->mdprops |= VSW_MD_MACADDR;
+ }
+
+ /*
+ * Get the switch-mode property. The modes are listed in
+ * decreasing order of preference, i.e. prefered mode is
+ * first item in list.
+ */
+ len = 0;
+ if (md_get_prop_data(mdp, listp[idx], smode_propname,
+ (uint8_t **)(&smode), &len) != 0) {
+ /*
+ * Unable to get switch-mode property, so just use
+ * default values which vswp->smode[] array has already
+ * been pre-populated with, namely layer2.
+ */
+ cmn_err(CE_WARN, "%s: unable to get switch mode property, "
+ "defaulting to layer 2 mode", __func__);
+ } else {
+ i = 0;
+ curr_mode = smode;
+ /*
+ * Modes of operation:
+ * 'switched' - layer 2 switching, underlying HW in
+ * non-promiscuous mode.
+ * 'promiscuous' - layer 2 switching, underlying HW in
+ * promiscuous mode.
+ * 'routed' - layer 3 (i.e. IP) routing, underlying HW
+ * in non-promiscuous mode.
+ */
+ while ((curr_mode < (smode + len)) && (i < NUM_SMODES)) {
+ D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
+ if (strcmp(curr_mode, "switched") == 0)
+ vswp->smode[i] = VSW_LAYER2;
+ else if (strcmp(curr_mode, "promiscuous") == 0)
+ vswp->smode[i] = VSW_LAYER2_PROMISC;
+ else if (strcmp(curr_mode, "routed") == 0)
+ vswp->smode[i] = VSW_LAYER3;
+ else {
+ DERR(vswp, "%s: unknown mode %s",
+ __func__, curr_mode);
+ /* default to layer 2 */
+ vswp->smode[i] = VSW_LAYER2;
+ }
+ curr_mode += strlen(curr_mode) + 1;
+ i++;
+ }
+
+ vswp->mdprops |= VSW_MD_SMODE;
+ }
+
+md_prop_exit:
+ (void) md_fini_handle(mdp);
+
+ kmem_free(listp, listsz);
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+static int
+vsw_setup_layer2(vsw_t *vswp)
+{
+ int rv = 0;
+
+ D1(vswp, "%s: enter", __func__);
+
+ vsw_switch_frame = vsw_switch_l2_frame;
+
+ /*
+ * Attempt to link into the MAC layer so we can get
+ * and send packets out over the physical adapter.
+ */
+ if (vswp->mdprops & VSW_MD_PHYSNAME) {
+ if (vsw_mac_attach(vswp) != 0) {
+ /*
+ * Registration with the MAC layer has failed,
+ * so return 1 so that can fall back to next
+ * prefered switching method.
+ */
+ cmn_err(CE_WARN, "!unable to join as MAC layer "
+ "client, continuing with attach");
+ rv = 1;
+ }
+ } else {
+ /* No physical device name found in MD */
+ DERR(vswp, "%s: no physical device name specified", __func__);
+ rv = 1;
+ }
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (rv);
+}
+
+static int
+vsw_setup_layer3(vsw_t *vswp)
+{
+ D1(vswp, "%s: enter", __func__);
+
+ D2(vswp, "%s: operating in layer 3 mode", __func__);
+ vsw_switch_frame = vsw_switch_l3_frame;
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+/*
+ * Link into the MAC layer to gain access to the services provided by
+ * the underlying physical device driver (which should also have
+ * registered with the MAC layer).
+ *
+ * Only when in layer 2 mode.
+ */
+static int
+vsw_mac_attach(vsw_t *vswp)
+{
+ D1(vswp, "vsw_mac_attach: enter");
+
+ vswp->mh = NULL;
+ vswp->mrh = NULL;
+ vswp->mnh = NULL;
+
+ ASSERT(vswp->mdprops & VSW_MD_PHYSNAME);
+
+ if ((mac_open(vswp->physname, 0, &vswp->mh)) != 0) {
+ cmn_err(CE_WARN, "mac_open %s failed", vswp->physname);
+ goto mac_fail_exit;
+ }
+
+ D2(vswp, "vsw_mac_attach: using device %s", vswp->physname);
+
+ /* register for changes in the interface */
+ vswp->mnh = mac_notify_add(vswp->mh, vsw_notify_cb, (void *)vswp);
+
+ /* register our rx callback function */
+ vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp);
+
+ /* get the MAC tx fn */
+ vswp->txinfo = mac_tx_get(vswp->mh);
+
+ /* start the interface */
+ if (mac_start(vswp->mh) != 0) {
+ cmn_err(CE_WARN, "could not start mac interface");
+ goto mac_fail_exit;
+ }
+
+ /* get and store original promisc setting */
+ vswp->init_promisc = mac_promisc_get(vswp->mh, MAC_DEVPROMISC);
+
+ /*
+ * FUTURE: When we have the ability to set multiple unicast
+ * mac address then we won't have to set the device into
+ * promisc mode, but for the moment its the only way we.
+ * can see pkts that logical domains we are serving are
+ * interested in.
+ */
+ if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) &&
+ (vswp->init_promisc == B_FALSE)) {
+ DERR(vswp, "vsw_mac_attach: enabling promisc mode..");
+
+ if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) {
+ DERR(vswp, "vsw_mac_attach: unable to set device"
+ " into promiscuous mode");
+ goto mac_fail_exit;
+ }
+ }
+
+ D1(vswp, "vsw_mac_attach: exit");
+ return (0);
+
+mac_fail_exit:
+ if (vswp->mh != NULL) {
+ mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC);
+ if (vswp->mrh != NULL)
+ mac_rx_remove(vswp->mh, vswp->mrh);
+
+ if (vswp->mnh != NULL)
+ mac_notify_remove(vswp->mh, vswp->mnh);
+
+ mac_close(vswp->mh);
+ }
+
+ vswp->mrh = NULL;
+ vswp->mnh = NULL;
+ vswp->mh = NULL;
+ vswp->txinfo = NULL;
+
+ D1(vswp, "vsw_mac_attach: fail exit");
+ return (1);
+}
+
+static void
+vsw_mac_detach(vsw_t *vswp)
+{
+ D1(vswp, "vsw_mac_detach: enter");
+
+ if (vswp->mh != NULL) {
+ /* restore promisc to original setting */
+ mac_promisc_set(vswp->mh, vswp->init_promisc, MAC_DEVPROMISC);
+ if (vswp->mrh != NULL)
+ mac_rx_remove(vswp->mh, vswp->mrh);
+
+ if (vswp->mnh != NULL)
+ mac_notify_remove(vswp->mh, vswp->mnh);
+
+ mac_close(vswp->mh);
+ }
+
+ vswp->mrh = NULL;
+ vswp->mnh = NULL;
+ vswp->mh = NULL;
+ vswp->txinfo = NULL;
+
+ D1(vswp, "vsw_mac_detach: exit");
+}
+
+/*
+ * Get notified of changes to the interface.
+ *
+ * For the moment we brute force the interface back
+ * into promisc mode if it is unset (e.g. by snoop).
+ * When we have the ability to set multiple mac addresses,
+ * we will need to see if this is necessary.
+ */
+static void
+vsw_notify_cb(void *arg, mac_notify_type_t type)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ switch (type) {
+ case MAC_NOTE_PROMISC:
+ vswp->txinfo = mac_tx_get(vswp->mh);
+ if (mac_promisc_get(vswp->mh, MAC_DEVPROMISC) == B_TRUE) {
+ D2(vswp, "%s: still in PROMISC mode", __func__);
+ } else {
+ D2(vswp, "%s: now in NON-PROMISC mode", __func__);
+ D2(vswp, "...re-enabling");
+ mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * receive callback routine. Invoked by MAC layer when there
+ * are pkts being passed up from physical device.
+ *
+ * PERF: It may be more efficient when the card is in promisc
+ * mode to check the dest address of the pkts here (against
+ * the FDB) rather than checking later. Needs to be investigated.
+ */
+static void
+vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
+{
+ _NOTE(ARGUNUSED(mrh))
+
+ vsw_t *vswp = (vsw_t *)arg;
+
+ ASSERT(vswp != NULL);
+
+ D1(vswp, "vsw_rx_cb: enter");
+
+ /* switch the chain of packets received */
+ vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL);
+
+ D1(vswp, "vsw_rx_cb: exit");
+}
+
+/*
+ * Send a message out over the physical device via the MAC layer.
+ *
+ * Returns any mblks that it was unable to transmit.
+ */
+static mblk_t *
+vsw_tx_msg(vsw_t *vswp, mblk_t *mp)
+{
+ const mac_txinfo_t *mtp;
+ mblk_t *nextp;
+
+ if (vswp->mh == NULL) {
+ DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail");
+ return (mp);
+ } else {
+ for (;;) {
+ nextp = mp->b_next;
+ mp->b_next = NULL;
+
+ mtp = vswp->txinfo;
+ if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) {
+ mp->b_next = nextp;
+ break;
+ }
+
+ if ((mp = nextp) == NULL)
+ break;
+
+ }
+
+ }
+
+ return (mp);
+}
+
+/*
+ * Register with the MAC layer as a network device, so we
+ * can be plumbed if necessary.
+ */
+static int
+vsw_mac_register(vsw_t *vswp)
+{
+ mac_t *macp = NULL;
+ mac_info_t *mip = NULL;
+ int rv = 0;
+
+ D1(vswp, "%s: enter", __func__);
+
+ macp = kmem_zalloc(sizeof (mac_t), KM_SLEEP);
+
+ /*
+ * Setup the m_info fields.
+ */
+ mip = &(macp->m_info);
+ mip->mi_media = DL_ETHER;
+ mip->mi_sdu_min = 0;
+ mip->mi_sdu_max = ETHERMTU;
+ mip->mi_cksum = 0;
+ mip->mi_poll = DL_CAPAB_POLL;
+
+ mip->mi_addr_length = ETHERADDRL;
+ bcopy(&etherbroadcastaddr, mip->mi_brdcst_addr, ETHERADDRL);
+
+ READ_ENTER(&vswp->if_lockrw);
+ bcopy(&vswp->if_addr, mip->mi_unicst_addr, ETHERADDRL);
+ RW_EXIT(&vswp->if_lockrw);
+
+ MAC_STAT_MIB(mip->mi_stat);
+ MAC_STAT_ETHER(mip->mi_stat);
+
+ /* entry points */
+ macp->m_stat = vsw_m_stat;
+ macp->m_stop = vsw_m_stop;
+ macp->m_start = vsw_m_start;
+ macp->m_unicst = vsw_m_unicst;
+ macp->m_multicst = vsw_m_multicst;
+ macp->m_promisc = vsw_m_promisc;
+ macp->m_tx = vsw_m_tx;
+ macp->m_resources = vsw_m_resources;
+ macp->m_ioctl = vsw_m_ioctl;
+
+ macp->m_port = 0;
+ macp->m_dip = vswp->dip;
+ macp->m_ident = MAC_IDENT;
+ macp->m_driver = vswp;
+
+ vswp->if_macp = macp;
+
+ /* register */
+ rv = mac_register(macp);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (rv);
+}
+
+static int
+vsw_mac_unregister(vsw_t *vswp)
+{
+ int rv = 0;
+
+ D1(vswp, "%s: enter", __func__);
+
+ WRITE_ENTER(&vswp->if_lockrw);
+
+ if (vswp->if_macp != NULL) {
+ rv = mac_unregister(vswp->if_macp);
+ if (rv != 0) {
+ DWARN(vswp, "%s: unable to unregister from MAC "
+ "framework", __func__);
+
+ RW_EXIT(&vswp->if_lockrw);
+ D1(vswp, "%s: fail exit", __func__);
+ return (rv);
+ }
+
+ /* mark i/f as down and promisc off */
+ vswp->if_state &= ~VSW_IF_UP;
+
+ kmem_free(vswp->if_macp, sizeof (mac_t));
+ vswp->if_macp = NULL;
+ }
+ RW_EXIT(&vswp->if_lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (rv);
+}
+
+static uint64_t
+vsw_m_stat(void *arg, enum mac_stat stat)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+ const mac_info_t *mip;
+
+ D1(vswp, "%s: enter", __func__);
+
+ if (vswp->mh != NULL)
+ mip = mac_info(vswp->mh);
+ else
+ return (0);
+
+ if (!mip->mi_stat[stat])
+ return (0);
+
+ /* return stats from underlying device */
+ return (mac_stat_get(vswp->mh, stat));
+
+}
+
+static void
+vsw_m_stop(void *arg)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ D1(vswp, "%s: enter", __func__);
+
+ WRITE_ENTER(&vswp->if_lockrw);
+ vswp->if_state &= ~VSW_IF_UP;
+ RW_EXIT(&vswp->if_lockrw);
+
+ D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
+}
+
+static int
+vsw_m_start(void *arg)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ D1(vswp, "%s: enter", __func__);
+
+ WRITE_ENTER(&vswp->if_lockrw);
+ vswp->if_state |= VSW_IF_UP;
+ RW_EXIT(&vswp->if_lockrw);
+
+ D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
+ return (0);
+}
+
+/*
+ * Change the local interface address.
+ */
+static int
+vsw_m_unicst(void *arg, const uint8_t *macaddr)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ D1(vswp, "%s: enter", __func__);
+
+ WRITE_ENTER(&vswp->if_lockrw);
+ ether_copy(macaddr, &vswp->if_addr);
+ RW_EXIT(&vswp->if_lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+static int
+vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+ mcst_addr_t *mcst_p = NULL;
+ uint64_t addr = 0x0;
+ int i;
+
+ D1(vswp, "%s: enter", __func__);
+
+ /*
+ * Convert address into form that can be used
+ * as hash table key.
+ */
+ for (i = 0; i < ETHERADDRL; i++) {
+ addr = (addr << 8) | mca[i];
+ }
+
+ D2(vswp, "%s: addr = 0x%llx", __func__, addr);
+
+ if (add) {
+ D2(vswp, "%s: adding multicast", __func__);
+ if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
+ /*
+ * Update the list of multicast addresses
+ * contained within the vsw_t structure to
+ * include this new one.
+ */
+ mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
+ if (mcst_p == NULL) {
+ DERR(vswp, "%s unable to alloc mem", __func__);
+ return (1);
+ }
+ mcst_p->addr = addr;
+
+ mutex_enter(&vswp->mca_lock);
+ mcst_p->nextp = vswp->mcap;
+ vswp->mcap = mcst_p;
+ mutex_exit(&vswp->mca_lock);
+
+ /*
+ * Call into the underlying driver to program the
+ * address into HW.
+ *
+ * Note:
+ * Can safely ignore the return value as the card
+ * will for the moment always be in promisc mode.
+ * When we can program multiple MAC addresses into the
+ * HW then we will need to care about the return
+ * value here.
+ */
+ if (vswp->mh != NULL)
+ (void) mac_multicst_add(vswp->mh, mca);
+ }
+ } else {
+ D2(vswp, "%s: removing multicast", __func__);
+ /*
+ * Remove the address from the hash table..
+ */
+ if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
+
+ /*
+ * ..and then from the list maintained in the
+ * vsw_t structure.
+ */
+ vsw_del_addr(VSW_LOCALDEV, vswp, addr);
+
+ if (vswp->mh != NULL)
+ (void) mac_multicst_remove(vswp->mh, mca);
+ }
+ }
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+static int
+vsw_m_promisc(void *arg, boolean_t on)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ D1(vswp, "%s: enter", __func__);
+
+ WRITE_ENTER(&vswp->if_lockrw);
+ if (on)
+ vswp->if_state |= VSW_IF_PROMISC;
+ else
+ vswp->if_state &= ~VSW_IF_PROMISC;
+ RW_EXIT(&vswp->if_lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+static mblk_t *
+vsw_m_tx(void *arg, mblk_t *mp)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ D1(vswp, "%s: enter", __func__);
+
+ vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (NULL);
+}
+
+static void
+vsw_m_resources(void *arg)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+ mac_rx_fifo_t mrf;
+
+ D1(vswp, "%s: enter", __func__);
+
+ mrf.mrf_type = MAC_RX_FIFO;
+ mrf.mrf_blank = NULL;
+ mrf.mrf_arg = (void *)vswp;
+ mrf.mrf_normal_blank_time = 0;
+ mrf.mrf_normal_pkt_count = 0;
+
+ WRITE_ENTER(&vswp->if_lockrw);
+ vswp->if_mrh = mac_resource_add(vswp->if_macp, (mac_resource_t *)&mrf);
+ RW_EXIT(&vswp->if_lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+static void
+vsw_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
+{
+ vsw_t *vswp = (vsw_t *)arg;
+
+ D1(vswp, "%s: enter", __func__);
+
+ miocnak(q, mp, 0, ENOTSUP);
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+/*
+ * Register for machine description (MD) updates.
+ */
+static void
+vsw_mdeg_register(vsw_t *vswp)
+{
+ mdeg_prop_spec_t *pspecp;
+ mdeg_node_spec_t *inst_specp;
+ mdeg_handle_t mdeg_hdl;
+ size_t templatesz;
+ int inst, rv;
+
+ D1(vswp, "%s: enter", __func__);
+
+ inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
+ DDI_PROP_DONTPASS, reg_propname, -1);
+ if (inst == -1) {
+ DERR(vswp, "%s: unable to get %s property",
+ __func__, reg_propname);
+ return;
+ }
+
+ D2(vswp, "%s: instance %d registering with mdeg", __func__, inst);
+
+ /*
+ * Allocate and initialize a per-instance copy
+ * of the global property spec array that will
+ * uniquely identify this vsw instance.
+ */
+ templatesz = sizeof (vsw_prop_template);
+ pspecp = kmem_zalloc(templatesz, KM_SLEEP);
+
+ bcopy(vsw_prop_template, pspecp, templatesz);
+
+ VSW_SET_MDEG_PROP_INST(pspecp, inst);
+
+ /* initialize the complete prop spec structure */
+ inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
+ inst_specp->namep = "virtual-device";
+ inst_specp->specp = pspecp;
+
+ /* perform the registration */
+ rv = mdeg_register(inst_specp, &vport_match, vsw_mdeg_cb,
+ (void *)vswp, &mdeg_hdl);
+
+ if (rv != MDEG_SUCCESS) {
+ DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
+ kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
+ kmem_free(pspecp, templatesz);
+ return;
+ }
+
+ /* save off data that will be needed later */
+ vswp->inst_spec = inst_specp;
+ vswp->mdeg_hdl = mdeg_hdl;
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+static void
+vsw_mdeg_unregister(vsw_t *vswp)
+{
+ D1(vswp, "vsw_mdeg_unregister: enter");
+
+ (void) mdeg_unregister(vswp->mdeg_hdl);
+
+ if (vswp->inst_spec->specp != NULL) {
+ (void) kmem_free(vswp->inst_spec->specp,
+ sizeof (vsw_prop_template));
+ vswp->inst_spec->specp = NULL;
+ }
+
+ if (vswp->inst_spec != NULL) {
+ (void) kmem_free(vswp->inst_spec,
+ sizeof (mdeg_node_spec_t));
+ vswp->inst_spec = NULL;
+ }
+
+ D1(vswp, "vsw_mdeg_unregister: exit");
+}
+
+static int
+vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
+{
+ vsw_t *vswp;
+ int idx;
+ md_t *mdp;
+ mde_cookie_t node;
+ uint64_t inst;
+
+ if (resp == NULL)
+ return (MDEG_FAILURE);
+
+ vswp = (vsw_t *)cb_argp;
+
+ D1(vswp, "%s: added %d : removed %d : matched %d",
+ __func__, resp->added.nelem, resp->removed.nelem,
+ resp->match_prev.nelem);
+
+ /* process added ports */
+ for (idx = 0; idx < resp->added.nelem; idx++) {
+ mdp = resp->added.mdp;
+ node = resp->added.mdep[idx];
+
+ D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
+
+ if (vsw_port_add(vswp, mdp, &node) != 0) {
+ cmn_err(CE_WARN, "Unable to add new port (0x%lx)",
+ node);
+ }
+ }
+
+ /* process removed ports */
+ for (idx = 0; idx < resp->removed.nelem; idx++) {
+ mdp = resp->removed.mdp;
+ node = resp->removed.mdep[idx];
+
+ if (md_get_prop_val(mdp, node, id_propname, &inst)) {
+ DERR(vswp, "%s: prop(%s) not found port(%d)",
+ __func__, id_propname, idx);
+ continue;
+ }
+
+ D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
+
+ if (vsw_port_detach(vswp, inst) != 0) {
+ cmn_err(CE_WARN, "Unable to remove port %ld", inst);
+ }
+ }
+
+ /*
+ * Currently no support for updating already active ports.
+ * So, ignore the match_curr and match_priv arrays for now.
+ */
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (MDEG_SUCCESS);
+}
+
+/*
+ * Add a new port to the system.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+int
+vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
+{
+ uint64_t ldc_id;
+ uint8_t *addrp;
+ int i, addrsz;
+ int num_nodes = 0, nchan = 0;
+ int listsz = 0;
+ mde_cookie_t *listp = NULL;
+ struct ether_addr ea;
+ uint64_t macaddr;
+ uint64_t inst = 0;
+ vsw_port_t *port;
+
+ if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
+ DWARN(vswp, "%s: prop(%s) not found", __func__,
+ id_propname);
+ return (1);
+ }
+
+ /*
+ * Find the channel endpoint node(s) (which should be under this
+ * port node) which contain the channel id(s).
+ */
+ if ((num_nodes = md_node_count(mdp)) <= 0) {
+ DERR(vswp, "%s: invalid number of nodes found (%d)",
+ __func__, num_nodes);
+ return (1);
+ }
+
+ /* allocate enough space for node list */
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_zalloc(listsz, KM_SLEEP);
+
+ nchan = md_scan_dag(mdp, *node,
+ md_find_name(mdp, chan_propname),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (nchan <= 0) {
+ DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
+ kmem_free(listp, listsz);
+ return (1);
+ }
+
+ D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
+
+ /* use property from first node found */
+ if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
+ DWARN(vswp, "%s: prop(%s) not found\n", __func__,
+ id_propname);
+ kmem_free(listp, listsz);
+ return (1);
+ }
+
+ /* don't need list any more */
+ kmem_free(listp, listsz);
+
+ D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
+
+ /* read mac-address property */
+ if (md_get_prop_data(mdp, *node, remaddr_propname,
+ &addrp, &addrsz)) {
+ DWARN(vswp, "%s: prop(%s) not found",
+ __func__, remaddr_propname);
+ return (1);
+ }
+
+ if (addrsz < ETHERADDRL) {
+ DWARN(vswp, "%s: invalid address size", __func__);
+ return (1);
+ }
+
+ macaddr = *((uint64_t *)addrp);
+ D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
+
+ for (i = ETHERADDRL - 1; i >= 0; i--) {
+ ea.ether_addr_octet[i] = macaddr & 0xFF;
+ macaddr >>= 8;
+ }
+
+ if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) {
+ DERR(vswp, "%s: failed to attach port", __func__);
+ return (1);
+ }
+
+ port = vsw_lookup_port(vswp, (int)inst);
+
+ /* just successfuly created the port, so it should exist */
+ ASSERT(port != NULL);
+
+ return (0);
+}
+
+/*
+ * Attach the specified port.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids,
+struct ether_addr *macaddr)
+{
+ vsw_port_list_t *plist = &vswp->plist;
+ vsw_port_t *port, **prev_port;
+ int i;
+
+ D1(vswp, "%s: enter : port %d", __func__, p_instance);
+
+ /* port already exists? */
+ READ_ENTER(&plist->lockrw);
+ for (port = plist->head; port != NULL; port = port->p_next) {
+ if (port->p_instance == p_instance) {
+ DWARN(vswp, "%s: port instance %d already attached",
+ __func__, p_instance);
+ RW_EXIT(&plist->lockrw);
+ return (1);
+ }
+ }
+ RW_EXIT(&plist->lockrw);
+
+ port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
+ port->p_vswp = vswp;
+ port->p_instance = p_instance;
+ port->p_ldclist.num_ldcs = 0;
+ port->p_ldclist.head = NULL;
+
+ rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL);
+
+ mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL);
+
+ mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL);
+
+ mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&port->state_cv, NULL, CV_DRIVER, NULL);
+ port->state = VSW_PORT_INIT;
+
+ if (nids > VSW_PORT_MAX_LDCS) {
+ D2(vswp, "%s: using first of %d ldc ids",
+ __func__, nids);
+ nids = VSW_PORT_MAX_LDCS;
+ }
+
+ D2(vswp, "%s: %d nids", __func__, nids);
+ for (i = 0; i < nids; i++) {
+ D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]);
+ if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) {
+ DERR(vswp, "%s: ldc_attach failed", __func__);
+
+ rw_destroy(&port->p_ldclist.lockrw);
+
+ cv_destroy(&port->ref_cv);
+ mutex_destroy(&port->ref_lock);
+
+ cv_destroy(&port->state_cv);
+ mutex_destroy(&port->state_lock);
+
+ mutex_destroy(&port->tx_lock);
+ mutex_destroy(&port->mca_lock);
+ kmem_free(port, sizeof (vsw_port_t));
+ return (1);
+ }
+ }
+
+ ether_copy(macaddr, &port->p_macaddr);
+
+ WRITE_ENTER(&plist->lockrw);
+
+ /* create the fdb entry for this port/mac address */
+ (void) vsw_add_fdb(vswp, port);
+
+ /* link it into the list of ports for this vsw instance */
+ prev_port = (vsw_port_t **)(&plist->head);
+ port->p_next = *prev_port;
+ *prev_port = port;
+ plist->num_ports++;
+ RW_EXIT(&plist->lockrw);
+
+ /*
+ * Initialise the port and any ldc's under it.
+ */
+ (void) vsw_init_ldcs(port);
+
+ D1(vswp, "%s: exit", __func__);
+ return (0);
+}
+
+/*
+ * Detach the specified port.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_port_detach(vsw_t *vswp, int p_instance)
+{
+ vsw_port_t *port = NULL;
+ vsw_port_list_t *plist = &vswp->plist;
+
+ D1(vswp, "%s: enter: port id %d", __func__, p_instance);
+
+ WRITE_ENTER(&plist->lockrw);
+
+ if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) {
+ RW_EXIT(&plist->lockrw);
+ return (1);
+ }
+
+ if (vsw_plist_del_node(vswp, port)) {
+ RW_EXIT(&plist->lockrw);
+ return (1);
+ }
+
+ /* Remove the fdb entry for this port/mac address */
+ (void) vsw_del_fdb(vswp, port);
+
+ /* Remove any multicast addresses.. */
+ vsw_del_mcst_port(port);
+
+ /*
+ * No longer need to hold lock on port list now that we
+ * have unlinked the target port from the list.
+ */
+ RW_EXIT(&plist->lockrw);
+
+ if (vsw_port_delete(port)) {
+ return (1);
+ }
+
+ D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance);
+ return (0);
+}
+
+/*
+ * Detach all active ports.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_detach_ports(vsw_t *vswp)
+{
+ vsw_port_list_t *plist = &vswp->plist;
+ vsw_port_t *port = NULL;
+
+ D1(vswp, "%s: enter", __func__);
+
+ WRITE_ENTER(&plist->lockrw);
+
+ while ((port = plist->head) != NULL) {
+ if (vsw_plist_del_node(vswp, port)) {
+ DERR(vswp, "%s: Error deleting port %d"
+ " from port list", __func__,
+ port->p_instance);
+ RW_EXIT(&plist->lockrw);
+ return (1);
+ }
+
+ /* Remove the fdb entry for this port/mac address */
+ (void) vsw_del_fdb(vswp, port);
+
+ /* Remove any multicast addresses.. */
+ vsw_del_mcst_port(port);
+
+ /*
+ * No longer need to hold the lock on the port list
+ * now that we have unlinked the target port from the
+ * list.
+ */
+ RW_EXIT(&plist->lockrw);
+ if (vsw_port_delete(port)) {
+ DERR(vswp, "%s: Error deleting port %d",
+ __func__, port->p_instance);
+ return (1);
+ }
+ WRITE_ENTER(&plist->lockrw);
+ }
+ RW_EXIT(&plist->lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+/*
+ * Delete the specified port.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_port_delete(vsw_port_t *port)
+{
+ vsw_ldc_list_t *ldcl;
+ vsw_t *vswp = port->p_vswp;
+
+ D1(vswp, "%s: enter : port id %d", __func__, port->p_instance);
+
+ (void) vsw_uninit_ldcs(port);
+
+ /*
+ * Wait for any pending ctrl msg tasks which reference this
+ * port to finish.
+ */
+ if (vsw_drain_port_taskq(port))
+ return (1);
+
+ /*
+ * Wait for port reference count to hit zero.
+ */
+ mutex_enter(&port->ref_lock);
+ while (port->ref_cnt != 0)
+ cv_wait(&port->ref_cv, &port->ref_lock);
+ mutex_exit(&port->ref_lock);
+
+ /*
+ * Wait for any active callbacks to finish
+ */
+ if (vsw_drain_ldcs(port))
+ return (1);
+
+ ldcl = &port->p_ldclist;
+ WRITE_ENTER(&ldcl->lockrw);
+ while (ldcl->num_ldcs > 0) {
+ if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {;
+ cmn_err(CE_WARN, "unable to detach ldc %ld",
+ ldcl->head->ldc_id);
+ RW_EXIT(&ldcl->lockrw);
+ return (1);
+ }
+ }
+ RW_EXIT(&ldcl->lockrw);
+
+ rw_destroy(&port->p_ldclist.lockrw);
+
+ mutex_destroy(&port->mca_lock);
+ mutex_destroy(&port->tx_lock);
+ cv_destroy(&port->ref_cv);
+ mutex_destroy(&port->ref_lock);
+
+ cv_destroy(&port->state_cv);
+ mutex_destroy(&port->state_lock);
+
+ kmem_free(port, sizeof (vsw_port_t));
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+/*
+ * Attach a logical domain channel (ldc) under a specified port.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id)
+{
+ vsw_t *vswp = port->p_vswp;
+ vsw_ldc_list_t *ldcl = &port->p_ldclist;
+ vsw_ldc_t *ldcp = NULL;
+ ldc_attr_t attr;
+ ldc_status_t istatus;
+ int status = DDI_FAILURE;
+
+ D1(vswp, "%s: enter", __func__);
+
+ ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP);
+ if (ldcp == NULL) {
+ DERR(vswp, "%s: kmem_zalloc failed", __func__);
+ return (1);
+ }
+ ldcp->ldc_id = ldc_id;
+
+ mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL);
+
+ /* required for handshake with peer */
+ ldcp->local_session = (uint64_t)ddi_get_lbolt();
+ ldcp->peer_session = 0;
+ ldcp->session_status = 0;
+
+ mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL);
+ ldcp->hss_id = 1; /* Initial handshake session id */
+
+ /* only set for outbound lane, inbound set by peer */
+ vsw_set_lane_attr(vswp, &ldcp->lane_out);
+
+ attr.devclass = LDC_DEV_NT_SVC;
+ attr.instance = ddi_get_instance(vswp->dip);
+ attr.mode = LDC_MODE_UNRELIABLE;
+ attr.qlen = VSW_LDC_QLEN;
+ status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
+ if (status != 0) {
+ DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)",
+ __func__, ldc_id, status);
+ mutex_destroy(&ldcp->ldc_txlock);
+ mutex_destroy(&ldcp->ldc_cblock);
+ cv_destroy(&ldcp->drain_cv);
+ mutex_destroy(&ldcp->drain_cv_lock);
+ mutex_destroy(&ldcp->hss_lock);
+ kmem_free(ldcp, sizeof (vsw_ldc_t));
+ return (1);
+ }
+
+ status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp);
+ if (status != 0) {
+ DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)",
+ __func__, ldc_id, status);
+ mutex_destroy(&ldcp->ldc_txlock);
+ mutex_destroy(&ldcp->ldc_cblock);
+ cv_destroy(&ldcp->drain_cv);
+ mutex_destroy(&ldcp->drain_cv_lock);
+ mutex_destroy(&ldcp->hss_lock);
+ (void) ldc_fini(ldcp->ldc_handle);
+ kmem_free(ldcp, sizeof (vsw_ldc_t));
+ return (1);
+ }
+
+
+ if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
+ DERR(vswp, "%s: ldc_status failed", __func__);
+ return (1);
+ }
+
+ ldcp->ldc_status = istatus;
+ ldcp->ldc_port = port;
+ ldcp->ldc_vswp = vswp;
+
+ /* link it into the list of channels for this port */
+ WRITE_ENTER(&ldcl->lockrw);
+ ldcp->ldc_next = ldcl->head;
+ ldcl->head = ldcp;
+ ldcl->num_ldcs++;
+ RW_EXIT(&ldcl->lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+ return (0);
+}
+
+/*
+ * Detach a logical domain channel (ldc) belonging to a
+ * particular port.
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id)
+{
+ vsw_t *vswp = port->p_vswp;
+ vsw_ldc_t *ldcp, *prev_ldcp;
+ vsw_ldc_list_t *ldcl = &port->p_ldclist;
+ int rv;
+
+ prev_ldcp = ldcl->head;
+ for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) {
+ if (ldcp->ldc_id == ldc_id) {
+ break;
+ }
+ }
+
+ /* specified ldc id not found */
+ if (ldcp == NULL) {
+ DERR(vswp, "%s: ldcp = NULL", __func__);
+ return (1);
+ }
+
+ D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id);
+
+ /*
+ * Before we can close the channel we must release any mapped
+ * resources (e.g. drings).
+ */
+ vsw_free_lane_resources(ldcp, INBOUND);
+ vsw_free_lane_resources(ldcp, OUTBOUND);
+
+ /*
+ * If the close fails we are in serious trouble, as won't
+ * be able to delete the parent port.
+ */
+ if ((rv = ldc_close(ldcp->ldc_handle)) != 0) {
+ DERR(vswp, "%s: error %d closing channel %lld",
+ __func__, rv, ldcp->ldc_id);
+ return (1);
+ }
+
+ (void) ldc_fini(ldcp->ldc_handle);
+
+ ldcp->ldc_status = LDC_INIT;
+ ldcp->ldc_handle = NULL;
+ ldcp->ldc_vswp = NULL;
+ mutex_destroy(&ldcp->ldc_txlock);
+ mutex_destroy(&ldcp->ldc_cblock);
+ cv_destroy(&ldcp->drain_cv);
+ mutex_destroy(&ldcp->drain_cv_lock);
+ mutex_destroy(&ldcp->hss_lock);
+
+ /* unlink it from the list */
+ prev_ldcp = ldcp->ldc_next;
+ ldcl->num_ldcs--;
+ kmem_free(ldcp, sizeof (vsw_ldc_t));
+
+ return (0);
+}
+
+/*
+ * Open and attempt to bring up the channel. Note that channel
+ * can only be brought up if peer has also opened channel.
+ *
+ * Returns 0 if can open and bring up channel, otherwise
+ * returns 1.
+ */
+static int
+vsw_ldc_init(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ ldc_status_t istatus = 0;
+ int rv;
+
+ D1(vswp, "%s: enter", __func__);
+
+ LDC_ENTER_LOCK(ldcp);
+
+ /* don't start at 0 in case clients don't like that */
+ ldcp->next_ident = 1;
+
+ rv = ldc_open(ldcp->ldc_handle);
+ if (rv != 0) {
+ DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)",
+ __func__, ldcp->ldc_id, rv);
+ LDC_EXIT_LOCK(ldcp);
+ return (1);
+ }
+
+ if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
+ DERR(vswp, "%s: unable to get status", __func__);
+ LDC_EXIT_LOCK(ldcp);
+ return (1);
+
+ } else if (istatus != LDC_OPEN && istatus != LDC_READY) {
+ DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY",
+ __func__, ldcp->ldc_id, istatus);
+ LDC_EXIT_LOCK(ldcp);
+ return (1);
+ }
+
+ ldcp->ldc_status = istatus;
+ rv = ldc_up(ldcp->ldc_handle);
+ if (rv != 0) {
+ /*
+ * Not a fatal error for ldc_up() to fail, as peer
+ * end point may simply not be ready yet.
+ */
+ D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__,
+ ldcp->ldc_id, rv);
+ LDC_EXIT_LOCK(ldcp);
+ return (1);
+ }
+
+ /*
+ * ldc_up() call is non-blocking so need to explicitly
+ * check channel status to see if in fact the channel
+ * is UP.
+ */
+ if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
+ DERR(vswp, "%s: unable to get status", __func__);
+ LDC_EXIT_LOCK(ldcp);
+ return (1);
+
+ } else if (istatus != LDC_UP) {
+ DERR(vswp, "%s: id(%lld) status(%d) is not UP",
+ __func__, ldcp->ldc_id, istatus);
+ } else {
+ ldcp->ldc_status = istatus;
+ }
+
+ LDC_EXIT_LOCK(ldcp);
+
+ D1(vswp, "%s: exit", __func__);
+ return (0);
+}
+
+/* disable callbacks on the channel */
+static int
+vsw_ldc_uninit(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ int rv;
+
+ D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id);
+
+ LDC_ENTER_LOCK(ldcp);
+
+ rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
+ if (rv != 0) {
+ DERR(vswp, "vsw_ldc_uninit(%lld): error disabling "
+ "interrupts (rv = %d)\n", ldcp->ldc_id, rv);
+ LDC_EXIT_LOCK(ldcp);
+ return (1);
+ }
+
+ ldcp->ldc_status = LDC_INIT;
+
+ LDC_EXIT_LOCK(ldcp);
+
+ D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id);
+
+ return (0);
+}
+
+static int
+vsw_init_ldcs(vsw_port_t *port)
+{
+ vsw_ldc_list_t *ldcl = &port->p_ldclist;
+ vsw_ldc_t *ldcp;
+
+ READ_ENTER(&ldcl->lockrw);
+ ldcp = ldcl->head;
+ for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
+ (void) vsw_ldc_init(ldcp);
+ }
+ RW_EXIT(&ldcl->lockrw);
+
+ return (0);
+}
+
+static int
+vsw_uninit_ldcs(vsw_port_t *port)
+{
+ vsw_ldc_list_t *ldcl = &port->p_ldclist;
+ vsw_ldc_t *ldcp;
+
+ D1(NULL, "vsw_uninit_ldcs: enter\n");
+
+ READ_ENTER(&ldcl->lockrw);
+ ldcp = ldcl->head;
+ for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
+ (void) vsw_ldc_uninit(ldcp);
+ }
+ RW_EXIT(&ldcl->lockrw);
+
+ D1(NULL, "vsw_uninit_ldcs: exit\n");
+
+ return (0);
+}
+
+/*
+ * Wait until the callback(s) associated with the ldcs under the specified
+ * port have completed.
+ *
+ * Prior to this function being invoked each channel under this port
+ * should have been quiesced via ldc_set_cb_mode(DISABLE).
+ *
+ * A short explaination of what we are doing below..
+ *
+ * The simplest approach would be to have a reference counter in
+ * the ldc structure which is increment/decremented by the callbacks as
+ * they use the channel. The drain function could then simply disable any
+ * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
+ * there is a tiny window here - before the callback is able to get the lock
+ * on the channel it is interrupted and this function gets to execute. It
+ * sees that the ref count is zero and believes its free to delete the
+ * associated data structures.
+ *
+ * We get around this by taking advantage of the fact that before the ldc
+ * framework invokes a callback it sets a flag to indicate that there is a
+ * callback active (or about to become active). If when we attempt to
+ * unregister a callback when this active flag is set then the unregister
+ * will fail with EWOULDBLOCK.
+ *
+ * If the unregister fails we do a cv_timedwait. We will either be signaled
+ * by the callback as it is exiting (note we have to wait a short period to
+ * allow the callback to return fully to the ldc framework and it to clear
+ * the active flag), or by the timer expiring. In either case we again attempt
+ * the unregister. We repeat this until we can succesfully unregister the
+ * callback.
+ *
+ * The reason we use a cv_timedwait rather than a simple cv_wait is to catch
+ * the case where the callback has finished but the ldc framework has not yet
+ * cleared the active flag. In this case we would never get a cv_signal.
+ */
+static int
+vsw_drain_ldcs(vsw_port_t *port)
+{
+ vsw_ldc_list_t *ldcl = &port->p_ldclist;
+ vsw_ldc_t *ldcp;
+ vsw_t *vswp = port->p_vswp;
+
+ D1(vswp, "%s: enter", __func__);
+
+ READ_ENTER(&ldcl->lockrw);
+
+ ldcp = ldcl->head;
+
+ for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
+ /*
+ * If we can unregister the channel callback then we
+ * know that there is no callback either running or
+ * scheduled to run for this channel so move on to next
+ * channel in the list.
+ */
+ mutex_enter(&ldcp->drain_cv_lock);
+
+ /* prompt active callbacks to quit */
+ ldcp->drain_state = VSW_LDC_DRAINING;
+
+ if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) {
+ D2(vswp, "%s: unreg callback for chan %ld", __func__,
+ ldcp->ldc_id);
+ mutex_exit(&ldcp->drain_cv_lock);
+ continue;
+ } else {
+ /*
+ * If we end up here we know that either 1) a callback
+ * is currently executing, 2) is about to start (i.e.
+ * the ldc framework has set the active flag but
+ * has not actually invoked the callback yet, or 3)
+ * has finished and has returned to the ldc framework
+ * but the ldc framework has not yet cleared the
+ * active bit.
+ *
+ * Wait for it to finish.
+ */
+ while (ldc_unreg_callback(ldcp->ldc_handle)
+ == EWOULDBLOCK)
+ (void) cv_timedwait(&ldcp->drain_cv,
+ &ldcp->drain_cv_lock, lbolt + hz);
+
+ mutex_exit(&ldcp->drain_cv_lock);
+ D2(vswp, "%s: unreg callback for chan %ld after "
+ "timeout", __func__, ldcp->ldc_id);
+ }
+ }
+ RW_EXIT(&ldcl->lockrw);
+
+ D1(vswp, "%s: exit", __func__);
+ return (0);
+}
+
+/*
+ * Wait until all tasks which reference this port have completed.
+ *
+ * Prior to this function being invoked each channel under this port
+ * should have been quiesced via ldc_set_cb_mode(DISABLE).
+ */
+static int
+vsw_drain_port_taskq(vsw_port_t *port)
+{
+ vsw_t *vswp = port->p_vswp;
+
+ D1(vswp, "%s: enter", __func__);
+
+ /*
+ * Mark the port as in the process of being detached, and
+ * dispatch a marker task to the queue so we know when all
+ * relevant tasks have completed.
+ */
+ mutex_enter(&port->state_lock);
+ port->state = VSW_PORT_DETACHING;
+
+ if ((vswp->taskq_p == NULL) ||
+ (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task,
+ port, DDI_NOSLEEP) != DDI_SUCCESS)) {
+ DERR(vswp, "%s: unable to dispatch marker task",
+ __func__);
+ mutex_exit(&port->state_lock);
+ return (1);
+ }
+
+ /*
+ * Wait for the marker task to finish.
+ */
+ while (port->state != VSW_PORT_DETACHABLE)
+ cv_wait(&port->state_cv, &port->state_lock);
+
+ mutex_exit(&port->state_lock);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+static void
+vsw_marker_task(void *arg)
+{
+ vsw_port_t *port = arg;
+ vsw_t *vswp = port->p_vswp;
+
+ D1(vswp, "%s: enter", __func__);
+
+ mutex_enter(&port->state_lock);
+
+ /*
+ * No further tasks should be dispatched which reference
+ * this port so ok to mark it as safe to detach.
+ */
+ port->state = VSW_PORT_DETACHABLE;
+
+ cv_signal(&port->state_cv);
+
+ mutex_exit(&port->state_lock);
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+static vsw_port_t *
+vsw_lookup_port(vsw_t *vswp, int p_instance)
+{
+ vsw_port_list_t *plist = &vswp->plist;
+ vsw_port_t *port;
+
+ for (port = plist->head; port != NULL; port = port->p_next) {
+ if (port->p_instance == p_instance) {
+ D2(vswp, "vsw_lookup_port: found p_instance\n");
+ return (port);
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Search for and remove the specified port from the port
+ * list. Returns 0 if able to locate and remove port, otherwise
+ * returns 1.
+ */
+static int
+vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port)
+{
+ vsw_port_list_t *plist = &vswp->plist;
+ vsw_port_t *curr_p, *prev_p;
+
+ if (plist->head == NULL)
+ return (1);
+
+ curr_p = prev_p = plist->head;
+
+ while (curr_p != NULL) {
+ if (curr_p == port) {
+ if (prev_p == curr_p) {
+ plist->head = curr_p->p_next;
+ } else {
+ prev_p->p_next = curr_p->p_next;
+ }
+ plist->num_ports--;
+ break;
+ } else {
+ prev_p = curr_p;
+ curr_p = curr_p->p_next;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Interrupt handler for ldc messages.
+ */
+static uint_t
+vsw_ldc_cb(uint64_t event, caddr_t arg)
+{
+ vsw_ldc_t *ldcp = (vsw_ldc_t *)arg;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ ldc_status_t lstatus;
+ int rv;
+
+ D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
+
+ mutex_enter(&ldcp->ldc_cblock);
+
+ if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
+ mutex_exit(&ldcp->ldc_cblock);
+ return (LDC_SUCCESS);
+ }
+
+ if (event & LDC_EVT_UP) {
+ /*
+ * Channel has come up, get the state and then start
+ * the handshake.
+ */
+ rv = ldc_status(ldcp->ldc_handle, &lstatus);
+ if (rv != 0) {
+ cmn_err(CE_WARN, "Unable to read channel state");
+ }
+ ldcp->ldc_status = lstatus;
+
+ D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)",
+ __func__, ldcp->ldc_id, event, ldcp->ldc_status);
+
+ vsw_restart_handshake(ldcp);
+
+ ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
+ }
+
+ if (event & LDC_EVT_READ) {
+ /*
+ * Data available for reading.
+ */
+ D2(vswp, "%s: id(ld) event(%llx) data READ",
+ __func__, ldcp->ldc_id, event);
+
+ vsw_process_pkt(ldcp);
+
+ ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
+
+ goto vsw_cb_exit;
+ }
+
+ if (event & LDC_EVT_RESET) {
+ rv = ldc_status(ldcp->ldc_handle, &lstatus);
+ if (rv != 0) {
+ cmn_err(CE_WARN, "Unable to read channel state");
+ } else {
+ ldcp->ldc_status = lstatus;
+ }
+ D2(vswp, "%s: id(%ld) event(%llx) RESET: status (%ld)",
+ __func__, ldcp->ldc_id, event, ldcp->ldc_status);
+ }
+
+ if (event & LDC_EVT_DOWN) {
+ rv = ldc_status(ldcp->ldc_handle, &lstatus);
+ if (rv != 0) {
+ cmn_err(CE_WARN, "Unable to read channel state");
+ } else {
+ ldcp->ldc_status = lstatus;
+ }
+
+ D2(vswp, "%s: id(%ld) event(%llx) DOWN: status (%ld)",
+ __func__, ldcp->ldc_id, event, ldcp->ldc_status);
+
+ }
+
+ /*
+ * Catch either LDC_EVT_WRITE which we don't support or any
+ * unknown event.
+ */
+ if (event & ~(LDC_EVT_UP | LDC_EVT_RESET
+ | LDC_EVT_DOWN | LDC_EVT_READ)) {
+
+ DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)",
+ __func__, ldcp->ldc_id, event, ldcp->ldc_status);
+ }
+
+vsw_cb_exit:
+ mutex_exit(&ldcp->ldc_cblock);
+
+ /*
+ * Let the drain function know we are finishing if it
+ * is waiting.
+ */
+ mutex_enter(&ldcp->drain_cv_lock);
+ if (ldcp->drain_state == VSW_LDC_DRAINING)
+ cv_signal(&ldcp->drain_cv);
+ mutex_exit(&ldcp->drain_cv_lock);
+
+ return (LDC_SUCCESS);
+}
+
+/*
+ * (Re)start a handshake with our peer by sending them
+ * our version info.
+ */
+static void
+vsw_restart_handshake(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ vsw_port_t *port;
+ vsw_ldc_list_t *ldcl;
+
+ D1(vswp, "vsw_restart_handshake: enter");
+
+ port = ldcp->ldc_port;
+ ldcl = &port->p_ldclist;
+
+ WRITE_ENTER(&ldcl->lockrw);
+
+ D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__,
+ ldcp->lane_in.lstate, ldcp->lane_out.lstate);
+
+ vsw_free_lane_resources(ldcp, INBOUND);
+ vsw_free_lane_resources(ldcp, OUTBOUND);
+ RW_EXIT(&ldcl->lockrw);
+
+ ldcp->lane_in.lstate = 0;
+ ldcp->lane_out.lstate = 0;
+
+ /*
+ * Remove parent port from any multicast groups
+ * it may have registered with. Client must resend
+ * multicast add command after handshake completes.
+ */
+ (void) vsw_del_fdb(vswp, port);
+
+ vsw_del_mcst_port(port);
+
+ ldcp->hphase = VSW_MILESTONE0;
+
+ ldcp->peer_session = 0;
+ ldcp->session_status = 0;
+
+ /*
+ * We now increment the transaction group id. This allows
+ * us to identify and disard any tasks which are still pending
+ * on the taskq and refer to the handshake session we are about
+ * to restart. These stale messages no longer have any real
+ * meaning.
+ */
+ mutex_enter(&ldcp->hss_lock);
+ ldcp->hss_id++;
+ mutex_exit(&ldcp->hss_lock);
+
+ if (ldcp->hcnt++ > vsw_num_handshakes) {
+ cmn_err(CE_WARN, "exceeded number of permitted "
+ "handshake attempts (%d) on channel %ld",
+ ldcp->hcnt, ldcp->ldc_id);
+ return;
+ }
+
+ vsw_send_ver(ldcp);
+
+ D1(vswp, "vsw_restart_handshake: exit");
+}
+
+/*
+ * returns 0 if legal for event signified by flag to have
+ * occured at the time it did. Otherwise returns 1.
+ */
+int
+vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ uint64_t state;
+ uint64_t phase;
+
+ if (dir == INBOUND)
+ state = ldcp->lane_in.lstate;
+ else
+ state = ldcp->lane_out.lstate;
+
+ phase = ldcp->hphase;
+
+ switch (flag) {
+ case VSW_VER_INFO_RECV:
+ if (phase > VSW_MILESTONE0) {
+ DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV"
+ " when in state %d\n", ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ }
+ break;
+
+ case VSW_VER_ACK_RECV:
+ case VSW_VER_NACK_RECV:
+ if (!(state & VSW_VER_INFO_SENT)) {
+ DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK"
+ " or VER_NACK when in state %d\n",
+ ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ } else
+ state &= ~VSW_VER_INFO_SENT;
+ break;
+
+ case VSW_ATTR_INFO_RECV:
+ if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) {
+ DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV"
+ " when in state %d\n", ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ }
+ break;
+
+ case VSW_ATTR_ACK_RECV:
+ case VSW_ATTR_NACK_RECV:
+ if (!(state & VSW_ATTR_INFO_SENT)) {
+ DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK"
+ " or ATTR_NACK when in state %d\n",
+ ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ } else
+ state &= ~VSW_ATTR_INFO_SENT;
+ break;
+
+ case VSW_DRING_INFO_RECV:
+ if (phase < VSW_MILESTONE1) {
+ DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV"
+ " when in state %d\n", ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ }
+ break;
+
+ case VSW_DRING_ACK_RECV:
+ case VSW_DRING_NACK_RECV:
+ if (!(state & VSW_DRING_INFO_SENT)) {
+ DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK"
+ " or DRING_NACK when in state %d\n",
+ ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ } else
+ state &= ~VSW_DRING_INFO_SENT;
+ break;
+
+ case VSW_RDX_INFO_RECV:
+ if (phase < VSW_MILESTONE3) {
+ DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV"
+ " when in state %d\n", ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ }
+ break;
+
+ case VSW_RDX_ACK_RECV:
+ case VSW_RDX_NACK_RECV:
+ if (!(state & VSW_RDX_INFO_SENT)) {
+ DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK"
+ " or RDX_NACK when in state %d\n",
+ ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ } else
+ state &= ~VSW_RDX_INFO_SENT;
+ break;
+
+ case VSW_MCST_INFO_RECV:
+ if (phase < VSW_MILESTONE3) {
+ DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV"
+ " when in state %d\n", ldcp->ldc_id, phase);
+ vsw_restart_handshake(ldcp);
+ return (1);
+ }
+ break;
+
+ default:
+ DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)",
+ ldcp->ldc_id, flag);
+ return (1);
+ }
+
+ if (dir == INBOUND)
+ ldcp->lane_in.lstate = state;
+ else
+ ldcp->lane_out.lstate = state;
+
+ D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id);
+
+ return (0);
+}
+
+void
+vsw_next_milestone(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__,
+ ldcp->ldc_id, ldcp->hphase);
+
+ DUMP_FLAGS(ldcp->lane_in.lstate);
+ DUMP_FLAGS(ldcp->lane_out.lstate);
+
+ switch (ldcp->hphase) {
+
+ case VSW_MILESTONE0:
+ /*
+ * If we haven't started to handshake with our peer,
+ * start to do so now.
+ */
+ if (ldcp->lane_out.lstate == 0) {
+ D2(vswp, "%s: (chan %lld) starting handshake "
+ "with peer", __func__, ldcp->ldc_id);
+ vsw_restart_handshake(ldcp);
+ }
+
+ /*
+ * Only way to pass this milestone is to have successfully
+ * negotiated version info.
+ */
+ if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) &&
+ (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) {
+
+ D2(vswp, "%s: (chan %lld) leaving milestone 0",
+ __func__, ldcp->ldc_id);
+
+ /*
+ * Next milestone is passed when attribute
+ * information has been successfully exchanged.
+ */
+ ldcp->hphase = VSW_MILESTONE1;
+ vsw_send_attr(ldcp);
+
+ }
+ break;
+
+ case VSW_MILESTONE1:
+ /*
+ * Only way to pass this milestone is to have successfully
+ * negotiated attribute information.
+ */
+ if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) {
+
+ ldcp->hphase = VSW_MILESTONE2;
+
+ /*
+ * If the peer device has said it wishes to
+ * use descriptor rings then we send it our ring
+ * info, otherwise we just set up a private ring
+ * which we use an internal buffer
+ */
+ if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE)
+ vsw_send_dring_info(ldcp);
+ }
+ break;
+
+
+ case VSW_MILESTONE2:
+ /*
+ * If peer has indicated in its attribute message that
+ * it wishes to use descriptor rings then the only way
+ * to pass this milestone is for us to have received
+ * valid dring info.
+ *
+ * If peer is not using descriptor rings then just fall
+ * through.
+ */
+ if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) &&
+ (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)))
+ break;
+
+ D2(vswp, "%s: (chan %lld) leaving milestone 2",
+ __func__, ldcp->ldc_id);
+
+ ldcp->hphase = VSW_MILESTONE3;
+ vsw_send_rdx(ldcp);
+ break;
+
+ case VSW_MILESTONE3:
+ /*
+ * Pass this milestone when all paramaters have been
+ * successfully exchanged and RDX sent in both directions.
+ *
+ * Mark outbound lane as available to transmit data.
+ */
+ if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) &&
+ (ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) {
+
+ D2(vswp, "%s: (chan %lld) leaving milestone 3",
+ __func__, ldcp->ldc_id);
+ D2(vswp, "%s: ** handshake complete **", __func__);
+ ldcp->lane_out.lstate |= VSW_LANE_ACTIVE;
+ ldcp->hphase = VSW_MILESTONE4;
+ ldcp->hcnt = 0;
+ DISPLAY_STATE();
+ }
+ break;
+
+ case VSW_MILESTONE4:
+ D2(vswp, "%s: (chan %lld) in milestone 4", __func__,
+ ldcp->ldc_id);
+ break;
+
+ default:
+ DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__,
+ ldcp->ldc_id, ldcp->hphase);
+ }
+
+ D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id,
+ ldcp->hphase);
+}
+
+/*
+ * Check if major version is supported.
+ *
+ * Returns 0 if finds supported major number, and if necessary
+ * adjusts the minor field.
+ *
+ * Returns 1 if can't match major number exactly. Sets mjor/minor
+ * to next lowest support values, or to zero if no other values possible.
+ */
+static int
+vsw_supported_version(vio_ver_msg_t *vp)
+{
+ int i;
+
+ D1(NULL, "vsw_supported_version: enter");
+
+ for (i = 0; i < VSW_NUM_VER; i++) {
+ if (vsw_versions[i].ver_major == vp->ver_major) {
+ /*
+ * Matching or lower major version found. Update
+ * minor number if necessary.
+ */
+ if (vp->ver_minor > vsw_versions[i].ver_minor) {
+ D2(NULL, "%s: adjusting minor value"
+ " from %d to %d", __func__,
+ vp->ver_minor,
+ vsw_versions[i].ver_minor);
+ vp->ver_minor = vsw_versions[i].ver_minor;
+ }
+
+ return (0);
+ }
+
+ if (vsw_versions[i].ver_major < vp->ver_major) {
+ if (vp->ver_minor > vsw_versions[i].ver_minor) {
+ D2(NULL, "%s: adjusting minor value"
+ " from %d to %d", __func__,
+ vp->ver_minor,
+ vsw_versions[i].ver_minor);
+ vp->ver_minor = vsw_versions[i].ver_minor;
+ }
+ return (1);
+ }
+ }
+
+ /* No match was possible, zero out fields */
+ vp->ver_major = 0;
+ vp->ver_minor = 0;
+
+ D1(NULL, "vsw_supported_version: exit");
+
+ return (1);
+}
+
+/*
+ * Main routine for processing messages received over LDC.
+ */
+static void
+vsw_process_pkt(void *arg)
+{
+ vsw_ldc_t *ldcp = (vsw_ldc_t *)arg;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ size_t msglen;
+ vio_msg_tag_t tag;
+ def_msg_t dmsg;
+ int rv = 0;
+
+ D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
+
+ /*
+ * If channel is up read messages until channel is empty.
+ */
+ do {
+ msglen = sizeof (dmsg);
+ rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen);
+
+ if (rv != 0) {
+ DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) "
+ "len(%d)\n", __func__, ldcp->ldc_id,
+ rv, msglen);
+ break;
+ }
+
+ if (msglen == 0) {
+ D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__,
+ ldcp->ldc_id);
+ break;
+ }
+
+ D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__,
+ ldcp->ldc_id, msglen);
+
+ /*
+ * Figure out what sort of packet we have gotten by
+ * examining the msg tag, and then switch it appropriately.
+ */
+ bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t));
+
+ switch (tag.vio_msgtype) {
+ case VIO_TYPE_CTRL:
+ vsw_dispatch_ctrl_task(ldcp, &dmsg, tag);
+ break;
+ case VIO_TYPE_DATA:
+ vsw_process_data_pkt(ldcp, &dmsg, tag);
+ break;
+ case VIO_TYPE_ERR:
+ vsw_process_err_pkt(ldcp, &dmsg, tag);
+ break;
+ default:
+ DERR(vswp, "%s: Unknown tag(%lx) ", __func__,
+ "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id);
+ break;
+ }
+ } while (msglen);
+
+ D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Dispatch a task to process a VIO control message.
+ */
+static void
+vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag)
+{
+ vsw_ctrl_task_t *ctaskp = NULL;
+ vsw_port_t *port = ldcp->ldc_port;
+ vsw_t *vswp = port->p_vswp;
+
+ D1(vswp, "%s: enter", __func__);
+
+ /*
+ * We need to handle RDX ACK messages in-band as once they
+ * are exchanged it is possible that we will get an
+ * immediate (legitimate) data packet.
+ */
+ if ((tag.vio_subtype_env == VIO_RDX) &&
+ (tag.vio_subtype == VIO_SUBTYPE_ACK)) {
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV))
+ return;
+
+ ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV;
+ vsw_next_milestone(ldcp);
+ D2(vswp, "%s (%ld) handling RDX_ACK in place", __func__,
+ ldcp->ldc_id);
+ return;
+ }
+
+ ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP);
+
+ if (ctaskp == NULL) {
+ DERR(vswp, "%s: unable to alloc space for ctrl"
+ " msg", __func__);
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+
+ ctaskp->ldcp = ldcp;
+ bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t));
+ mutex_enter(&ldcp->hss_lock);
+ ctaskp->hss_id = ldcp->hss_id;
+ mutex_exit(&ldcp->hss_lock);
+
+ /*
+ * Dispatch task to processing taskq if port is not in
+ * the process of being detached.
+ */
+ mutex_enter(&port->state_lock);
+ if (port->state == VSW_PORT_INIT) {
+ if ((vswp->taskq_p == NULL) ||
+ (ddi_taskq_dispatch(vswp->taskq_p,
+ vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP)
+ != DDI_SUCCESS)) {
+ DERR(vswp, "%s: unable to dispatch task to taskq",
+ __func__);
+ kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
+ mutex_exit(&port->state_lock);
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+ } else {
+ DWARN(vswp, "%s: port %d detaching, not dispatching "
+ "task", __func__, port->p_instance);
+ }
+
+ mutex_exit(&port->state_lock);
+
+ D2(vswp, "%s: dispatched task to taskq for chan %d", __func__,
+ ldcp->ldc_id);
+ D1(vswp, "%s: exit", __func__);
+}
+
+/*
+ * Process a VIO ctrl message. Invoked from taskq.
+ */
+static void
+vsw_process_ctrl_pkt(void *arg)
+{
+ vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg;
+ vsw_ldc_t *ldcp = ctaskp->ldcp;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ vio_msg_tag_t tag;
+ uint16_t env;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t));
+ env = tag.vio_subtype_env;
+
+ /* stale pkt check */
+ mutex_enter(&ldcp->hss_lock);
+ if (ctaskp->hss_id < ldcp->hss_id) {
+ DWARN(vswp, "%s: discarding stale packet belonging to"
+ " earlier (%ld) handshake session", __func__,
+ ctaskp->hss_id);
+ mutex_exit(&ldcp->hss_lock);
+ return;
+ }
+ mutex_exit(&ldcp->hss_lock);
+
+ /* session id check */
+ if (ldcp->session_status & VSW_PEER_SESSION) {
+ if (ldcp->peer_session != tag.vio_sid) {
+ DERR(vswp, "%s (chan %d): invalid session id (%llx)",
+ __func__, ldcp->ldc_id, tag.vio_sid);
+ kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+ }
+
+ /*
+ * Switch on vio_subtype envelope, then let lower routines
+ * decide if its an INFO, ACK or NACK packet.
+ */
+ switch (env) {
+ case VIO_VER_INFO:
+ vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp);
+ break;
+ case VIO_DRING_REG:
+ vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp);
+ break;
+ case VIO_DRING_UNREG:
+ vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp);
+ break;
+ case VIO_ATTR_INFO:
+ vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp);
+ break;
+ case VNET_MCAST_INFO:
+ vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp);
+ break;
+ case VIO_RDX:
+ vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp);
+ break;
+ default:
+ DERR(vswp, "%s : unknown vio_subtype_env (%x)\n",
+ __func__, env);
+ }
+
+ kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Version negotiation. We can end up here either because our peer
+ * has responded to a handshake message we have sent it, or our peer
+ * has initiated a handshake with us. If its the former then can only
+ * be ACK or NACK, if its the later can only be INFO.
+ *
+ * If its an ACK we move to the next stage of the handshake, namely
+ * attribute exchange. If its a NACK we see if we can specify another
+ * version, if we can't we stop.
+ *
+ * If it is an INFO we reset all params associated with communication
+ * in that direction over this channel (remember connection is
+ * essentially 2 independent simplex channels).
+ */
+void
+vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vio_ver_msg_t *ver_pkt;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ /*
+ * We know this is a ctrl/version packet so
+ * cast it into the correct structure.
+ */
+ ver_pkt = (vio_ver_msg_t *)pkt;
+
+ switch (ver_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n");
+
+ /*
+ * Record the session id, which we will use from now
+ * until we see another VER_INFO msg. Even then the
+ * session id in most cases will be unchanged, execpt
+ * if channel was reset.
+ */
+ if ((ldcp->session_status & VSW_PEER_SESSION) &&
+ (ldcp->peer_session != ver_pkt->tag.vio_sid)) {
+ DERR(vswp, "%s: updating session id for chan %lld "
+ "from %llx to %llx", __func__, ldcp->ldc_id,
+ ldcp->peer_session, ver_pkt->tag.vio_sid);
+ }
+
+ ldcp->peer_session = ver_pkt->tag.vio_sid;
+ ldcp->session_status |= VSW_PEER_SESSION;
+
+ /* Legal message at this time ? */
+ if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV))
+ return;
+
+ /*
+ * First check the device class. Currently only expect
+ * to be talking to a network device. In the future may
+ * also talk to another switch.
+ */
+ if (ver_pkt->dev_class != VDEV_NETWORK) {
+ DERR(vswp, "%s: illegal device class %d", __func__,
+ ver_pkt->dev_class);
+
+ ver_pkt->tag.vio_sid = ldcp->local_session;
+ ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
+
+ vsw_send_msg(ldcp, (void *)ver_pkt,
+ sizeof (vio_ver_msg_t));
+
+ ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
+ vsw_next_milestone(ldcp);
+ return;
+ } else {
+ ldcp->dev_class = ver_pkt->dev_class;
+ }
+
+ /*
+ * Now check the version.
+ */
+ if (vsw_supported_version(ver_pkt) == 0) {
+ /*
+ * Support this major version and possibly
+ * adjusted minor version.
+ */
+
+ D2(vswp, "%s: accepted ver %d:%d", __func__,
+ ver_pkt->ver_major, ver_pkt->ver_minor);
+
+ /* Store accepted values */
+ ldcp->lane_in.ver_major = ver_pkt->ver_major;
+ ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
+
+ ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
+
+ ldcp->lane_in.lstate |= VSW_VER_ACK_SENT;
+ } else {
+ /*
+ * NACK back with the next lower major/minor
+ * pairing we support (if don't suuport any more
+ * versions then they will be set to zero.
+ */
+
+ D2(vswp, "%s: replying with ver %d:%d", __func__,
+ ver_pkt->ver_major, ver_pkt->ver_minor);
+
+ /* Store updated values */
+ ldcp->lane_in.ver_major = ver_pkt->ver_major;
+ ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
+
+ ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
+
+ ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
+ }
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
+ ver_pkt->tag.vio_sid = ldcp->local_session;
+ vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t));
+
+ vsw_next_milestone(ldcp);
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV))
+ return;
+
+ /* Store updated values */
+ ldcp->lane_in.ver_major = ver_pkt->ver_major;
+ ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
+
+
+ ldcp->lane_out.lstate |= VSW_VER_ACK_RECV;
+ vsw_next_milestone(ldcp);
+
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV))
+ return;
+
+ /*
+ * If our peer sent us a NACK with the ver fields set to
+ * zero then there is nothing more we can do. Otherwise see
+ * if we support either the version suggested, or a lesser
+ * one.
+ */
+ if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
+ DERR(vswp, "%s: peer unable to negotiate any "
+ "further.", __func__);
+ ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
+ vsw_next_milestone(ldcp);
+ return;
+ }
+
+ /*
+ * Check to see if we support this major version or
+ * a lower one. If we don't then maj/min will be set
+ * to zero.
+ */
+ (void) vsw_supported_version(ver_pkt);
+ if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
+ /* Nothing more we can do */
+ DERR(vswp, "%s: version negotiation failed.\n",
+ __func__);
+ ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
+ vsw_next_milestone(ldcp);
+ } else {
+ /* found a supported major version */
+ ldcp->lane_out.ver_major = ver_pkt->ver_major;
+ ldcp->lane_out.ver_minor = ver_pkt->ver_minor;
+
+ D2(vswp, "%s: resending with updated values (%x, %x)",
+ __func__, ver_pkt->ver_major,
+ ver_pkt->ver_minor);
+
+ ldcp->lane_out.lstate |= VSW_VER_INFO_SENT;
+ ver_pkt->tag.vio_sid = ldcp->local_session;
+ ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
+
+ vsw_send_msg(ldcp, (void *)ver_pkt,
+ sizeof (vio_ver_msg_t));
+
+ vsw_next_milestone(ldcp);
+
+ }
+ break;
+
+ default:
+ DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
+ ver_pkt->tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Process an attribute packet. We can end up here either because our peer
+ * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our
+ * peer has sent us an attribute INFO message
+ *
+ * If its an ACK we then move to the next stage of the handshake which
+ * is to send our descriptor ring info to our peer. If its a NACK then
+ * there is nothing more we can (currently) do.
+ *
+ * If we get a valid/acceptable INFO packet (and we have already negotiated
+ * a version) we ACK back and set channel state to ATTR_RECV, otherwise we
+ * NACK back and reset channel state to INACTIV.
+ *
+ * FUTURE: in time we will probably negotiate over attributes, but for
+ * the moment unacceptable attributes are regarded as a fatal error.
+ *
+ */
+void
+vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vnet_attr_msg_t *attr_pkt;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ vsw_port_t *port = ldcp->ldc_port;
+ uint64_t macaddr = 0;
+ int i;
+
+ D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
+
+ /*
+ * We know this is a ctrl/attr packet so
+ * cast it into the correct structure.
+ */
+ attr_pkt = (vnet_attr_msg_t *)pkt;
+
+ switch (attr_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
+
+ if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV))
+ return;
+
+ /*
+ * If the attributes are unacceptable then we NACK back.
+ */
+ if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) {
+
+ DERR(vswp, "%s (chan %d): invalid attributes",
+ __func__, ldcp->ldc_id);
+
+ vsw_free_lane_resources(ldcp, INBOUND);
+
+ attr_pkt->tag.vio_sid = ldcp->local_session;
+ attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
+ ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT;
+ vsw_send_msg(ldcp, (void *)attr_pkt,
+ sizeof (vnet_attr_msg_t));
+
+ vsw_next_milestone(ldcp);
+ return;
+ }
+
+ /*
+ * Otherwise store attributes for this lane and update
+ * lane state.
+ */
+ ldcp->lane_in.mtu = attr_pkt->mtu;
+ ldcp->lane_in.addr = attr_pkt->addr;
+ ldcp->lane_in.addr_type = attr_pkt->addr_type;
+ ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode;
+ ldcp->lane_in.ack_freq = attr_pkt->ack_freq;
+
+ macaddr = ldcp->lane_in.addr;
+ for (i = ETHERADDRL - 1; i >= 0; i--) {
+ port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF;
+ macaddr >>= 8;
+ }
+
+ /* create the fdb entry for this port/mac address */
+ (void) vsw_add_fdb(vswp, port);
+
+ /* setup device specifc xmit routines */
+ mutex_enter(&port->tx_lock);
+ if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) {
+ D2(vswp, "%s: mode = VIO_DRING_MODE", __func__);
+ port->transmit = vsw_dringsend;
+ } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) {
+ D2(vswp, "%s: mode = VIO_DESC_MODE", __func__);
+ vsw_create_privring(ldcp);
+ port->transmit = vsw_descrsend;
+ }
+ mutex_exit(&port->tx_lock);
+
+ attr_pkt->tag.vio_sid = ldcp->local_session;
+ attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
+
+ ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT;
+
+ vsw_send_msg(ldcp, (void *)attr_pkt,
+ sizeof (vnet_attr_msg_t));
+
+ vsw_next_milestone(ldcp);
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV))
+ return;
+
+ ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV;
+ vsw_next_milestone(ldcp);
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV))
+ return;
+
+ ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV;
+ vsw_next_milestone(ldcp);
+ break;
+
+ default:
+ DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
+ attr_pkt->tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Process a dring info packet. We can end up here either because our peer
+ * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our
+ * peer has sent us a dring INFO message.
+ *
+ * If we get a valid/acceptable INFO packet (and we have already negotiated
+ * a version) we ACK back and update the lane state, otherwise we NACK back.
+ *
+ * FUTURE: nothing to stop client from sending us info on multiple dring's
+ * but for the moment we will just use the first one we are given.
+ *
+ */
+void
+vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vio_dring_reg_msg_t *dring_pkt;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ ldc_mem_info_t minfo;
+ dring_info_t *dp, *dbp;
+ int dring_found = 0;
+
+ /*
+ * We know this is a ctrl/dring packet so
+ * cast it into the correct structure.
+ */
+ dring_pkt = (vio_dring_reg_msg_t *)pkt;
+
+ D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
+
+ switch (dring_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
+
+ if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
+ return;
+
+ /*
+ * If the dring params are unacceptable then we NACK back.
+ */
+ if (vsw_check_dring_info(dring_pkt)) {
+
+ DERR(vswp, "%s (%lld): invalid dring info",
+ __func__, ldcp->ldc_id);
+
+ vsw_free_lane_resources(ldcp, INBOUND);
+
+ dring_pkt->tag.vio_sid = ldcp->local_session;
+ dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
+
+ ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
+
+ vsw_send_msg(ldcp, (void *)dring_pkt,
+ sizeof (vio_dring_reg_msg_t));
+
+ vsw_next_milestone(ldcp);
+ return;
+ }
+
+ /*
+ * Otherwise, attempt to map in the dring using the
+ * cookie. If that succeeds we send back a unique dring
+ * identifier that the sending side will use in future
+ * to refer to this descriptor ring.
+ */
+ dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
+
+ dp->num_descriptors = dring_pkt->num_descriptors;
+ dp->descriptor_size = dring_pkt->descriptor_size;
+ dp->options = dring_pkt->options;
+ dp->ncookies = dring_pkt->ncookies;
+
+ /*
+ * Note: should only get one cookie. Enforced in
+ * the ldc layer.
+ */
+ bcopy(&dring_pkt->cookie[0], &dp->cookie[0],
+ sizeof (ldc_mem_cookie_t));
+
+ D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__,
+ dp->num_descriptors, dp->descriptor_size);
+ D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__,
+ dp->options, dp->ncookies);
+
+ if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0],
+ dp->ncookies, dp->num_descriptors,
+ dp->descriptor_size, LDC_SHADOW_MAP,
+ &(dp->handle))) != 0) {
+
+ DERR(vswp, "%s: dring_map failed\n", __func__);
+
+ kmem_free(dp, sizeof (dring_info_t));
+ vsw_free_lane_resources(ldcp, INBOUND);
+
+ dring_pkt->tag.vio_sid = ldcp->local_session;
+ dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
+
+ ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
+ vsw_send_msg(ldcp, (void *)dring_pkt,
+ sizeof (vio_dring_reg_msg_t));
+
+ vsw_next_milestone(ldcp);
+ return;
+ }
+
+ if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
+
+ DERR(vswp, "%s: dring_addr failed\n", __func__);
+
+ kmem_free(dp, sizeof (dring_info_t));
+ vsw_free_lane_resources(ldcp, INBOUND);
+
+ dring_pkt->tag.vio_sid = ldcp->local_session;
+ dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
+
+ ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
+ vsw_send_msg(ldcp, (void *)dring_pkt,
+ sizeof (vio_dring_reg_msg_t));
+
+ vsw_next_milestone(ldcp);
+ return;
+ } else {
+ /* store the address of the pub part of ring */
+ dp->pub_addr = minfo.vaddr;
+ }
+
+ /* no private section as we are importing */
+ dp->priv_addr = NULL;
+
+ /*
+ * Using simple mono increasing int for ident at
+ * the moment.
+ */
+ dp->ident = ldcp->next_ident;
+ ldcp->next_ident++;
+
+ dp->end_idx = 0;
+ dp->next = NULL;
+
+ /*
+ * Link it onto the end of the list of drings
+ * for this lane.
+ */
+ if (ldcp->lane_in.dringp == NULL) {
+ D2(vswp, "%s: adding first INBOUND dring", __func__);
+ ldcp->lane_in.dringp = dp;
+ } else {
+ dbp = ldcp->lane_in.dringp;
+
+ while (dbp->next != NULL)
+ dbp = dbp->next;
+
+ dbp->next = dp;
+ }
+
+ /* acknowledge it */
+ dring_pkt->tag.vio_sid = ldcp->local_session;
+ dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
+ dring_pkt->dring_ident = dp->ident;
+
+ vsw_send_msg(ldcp, (void *)dring_pkt,
+ sizeof (vio_dring_reg_msg_t));
+
+ ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT;
+ vsw_next_milestone(ldcp);
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV))
+ return;
+
+ /*
+ * Peer is acknowledging our dring info and will have
+ * sent us a dring identifier which we will use to
+ * refer to this ring w.r.t. our peer.
+ */
+ dp = ldcp->lane_out.dringp;
+ if (dp != NULL) {
+ /*
+ * Find the ring this ident should be associated
+ * with.
+ */
+ if (vsw_dring_match(dp, dring_pkt)) {
+ dring_found = 1;
+
+ } else while (dp != NULL) {
+ if (vsw_dring_match(dp, dring_pkt)) {
+ dring_found = 1;
+ break;
+ }
+ dp = dp->next;
+ }
+
+ if (dring_found == 0) {
+ DERR(NULL, "%s: unrecognised ring cookie",
+ __func__);
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+
+ } else {
+ DERR(vswp, "%s: DRING ACK received but no drings "
+ "allocated", __func__);
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+
+ /* store ident */
+ dp->ident = dring_pkt->dring_ident;
+ ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV;
+ vsw_next_milestone(ldcp);
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV))
+ return;
+
+ ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV;
+ vsw_next_milestone(ldcp);
+ break;
+
+ default:
+ DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
+ dring_pkt->tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Process a request from peer to unregister a dring.
+ *
+ * For the moment we just restart the handshake if our
+ * peer endpoint attempts to unregister a dring.
+ */
+void
+vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ vio_dring_unreg_msg_t *dring_pkt;
+
+ /*
+ * We know this is a ctrl/dring packet so
+ * cast it into the correct structure.
+ */
+ dring_pkt = (vio_dring_unreg_msg_t *)pkt;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ switch (dring_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
+
+ DWARN(vswp, "%s: restarting handshake..", __func__);
+ vsw_restart_handshake(ldcp);
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
+
+ DWARN(vswp, "%s: restarting handshake..", __func__);
+ vsw_restart_handshake(ldcp);
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
+
+ DWARN(vswp, "%s: restarting handshake..", __func__);
+ vsw_restart_handshake(ldcp);
+ break;
+
+ default:
+ DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
+ dring_pkt->tag.vio_subtype);
+ vsw_restart_handshake(ldcp);
+ }
+
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+}
+
+#define SND_MCST_NACK(ldcp, pkt) \
+ pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
+ pkt->tag.vio_sid = ldcp->local_session; \
+ vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t));
+
+/*
+ * Process a multicast request from a vnet.
+ *
+ * Vnet's specify a multicast address that they are interested in. This
+ * address is used as a key into the hash table which forms the multicast
+ * forwarding database (mFDB).
+ *
+ * The table keys are the multicast addresses, while the table entries
+ * are pointers to lists of ports which wish to receive packets for the
+ * specified multicast address.
+ *
+ * When a multicast packet is being switched we use the address as a key
+ * into the hash table, and then walk the appropriate port list forwarding
+ * the pkt to each port in turn.
+ *
+ * If a vnet is no longer interested in a particular multicast grouping
+ * we simply find the correct location in the hash table and then delete
+ * the relevant port from the port list.
+ *
+ * To deal with the case whereby a port is being deleted without first
+ * removing itself from the lists in the hash table, we maintain a list
+ * of multicast addresses the port has registered an interest in, within
+ * the port structure itself. We then simply walk that list of addresses
+ * using them as keys into the hash table and remove the port from the
+ * appropriate lists.
+ */
+static void
+vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vnet_mcast_msg_t *mcst_pkt;
+ vsw_port_t *port = ldcp->ldc_port;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ int i;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ /*
+ * We know this is a ctrl/mcast packet so
+ * cast it into the correct structure.
+ */
+ mcst_pkt = (vnet_mcast_msg_t *)pkt;
+
+ switch (mcst_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
+
+ /*
+ * Check if in correct state to receive a multicast
+ * message (i.e. handshake complete). If not reset
+ * the handshake.
+ */
+ if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV))
+ return;
+
+ /*
+ * Before attempting to add or remove address check
+ * that they are valid multicast addresses.
+ * If not, then NACK back.
+ */
+ for (i = 0; i < mcst_pkt->count; i++) {
+ if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) {
+ DERR(vswp, "%s: invalid multicast address",
+ __func__);
+ SND_MCST_NACK(ldcp, mcst_pkt);
+ return;
+ }
+ }
+
+ /*
+ * Now add/remove the addresses. If this fails we
+ * NACK back.
+ */
+ if (vsw_add_rem_mcst(mcst_pkt, port) != 0) {
+ SND_MCST_NACK(ldcp, mcst_pkt);
+ return;
+ }
+
+ mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
+ mcst_pkt->tag.vio_sid = ldcp->local_session;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt);
+
+ vsw_send_msg(ldcp, (void *)mcst_pkt,
+ sizeof (vnet_mcast_msg_t));
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
+
+ /*
+ * We shouldn't ever get a multicast ACK message as
+ * at the moment we never request multicast addresses
+ * to be set on some other device. This may change in
+ * the future if we have cascading switches.
+ */
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV))
+ return;
+
+ /* Do nothing */
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
+
+ /*
+ * We shouldn't get a multicast NACK packet for the
+ * same reasons as we shouldn't get a ACK packet.
+ */
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV))
+ return;
+
+ /* Do nothing */
+ break;
+
+ default:
+ DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
+ mcst_pkt->tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+}
+
+static void
+vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vio_rdx_msg_t *rdx_pkt;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ /*
+ * We know this is a ctrl/rdx packet so
+ * cast it into the correct structure.
+ */
+ rdx_pkt = (vio_rdx_msg_t *)pkt;
+
+ D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
+
+ switch (rdx_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
+
+ if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV))
+ return;
+
+ rdx_pkt->tag.vio_sid = ldcp->local_session;
+ rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt);
+
+ ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT;
+
+ vsw_send_msg(ldcp, (void *)rdx_pkt,
+ sizeof (vio_rdx_msg_t));
+
+ vsw_next_milestone(ldcp);
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ /*
+ * Should be handled in-band by callback handler.
+ */
+ DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__);
+ vsw_restart_handshake(ldcp);
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
+
+ if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV))
+ return;
+
+ ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV;
+ vsw_next_milestone(ldcp);
+ break;
+
+ default:
+ DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
+ rdx_pkt->tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+}
+
+static void
+vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag)
+{
+ uint16_t env = tag.vio_subtype_env;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ /* session id check */
+ if (ldcp->session_status & VSW_PEER_SESSION) {
+ if (ldcp->peer_session != tag.vio_sid) {
+ DERR(vswp, "%s (chan %d): invalid session id (%llx)",
+ __func__, ldcp->ldc_id, tag.vio_sid);
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+ }
+
+ /*
+ * It is an error for us to be getting data packets
+ * before the handshake has completed.
+ */
+ if (ldcp->hphase != VSW_MILESTONE4) {
+ DERR(vswp, "%s: got data packet before handshake complete "
+ "hphase %d (%x: %x)", __func__, ldcp->hphase,
+ ldcp->lane_in.lstate, ldcp->lane_out.lstate);
+ DUMP_FLAGS(ldcp->lane_in.lstate);
+ DUMP_FLAGS(ldcp->lane_out.lstate);
+ vsw_restart_handshake(ldcp);
+ return;
+ }
+
+ /*
+ * Switch on vio_subtype envelope, then let lower routines
+ * decide if its an INFO, ACK or NACK packet.
+ */
+ if (env == VIO_DRING_DATA) {
+ vsw_process_data_dring_pkt(ldcp, dpkt);
+ } else if (env == VIO_PKT_DATA) {
+ vsw_process_data_raw_pkt(ldcp, dpkt);
+ } else if (env == VIO_DESC_DATA) {
+ vsw_process_data_ibnd_pkt(ldcp, dpkt);
+ } else {
+ DERR(vswp, "%s : unknown vio_subtype_env (%x)\n",
+ __func__, env);
+ }
+
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+}
+
+#define SND_DRING_NACK(ldcp, pkt) \
+ pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
+ pkt->tag.vio_sid = ldcp->local_session; \
+ vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t));
+
+static void
+vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt)
+{
+ vio_dring_msg_t *dring_pkt;
+ vnet_public_desc_t *pub_addr = NULL;
+ vsw_private_desc_t *priv_addr = NULL;
+ dring_info_t *dp = NULL;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ mblk_t *mp = NULL;
+ mblk_t *bp = NULL;
+ mblk_t *bpt = NULL;
+ size_t nbytes = 0;
+ size_t off = 0;
+ uint64_t ncookies = 0;
+ uint64_t chain = 0;
+ uint64_t j, len, num;
+ uint32_t start, end, datalen;
+ int i, last_sync, rv;
+ boolean_t ack_needed = B_FALSE;
+ boolean_t sync_needed = B_TRUE;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ /*
+ * We know this is a data/dring packet so
+ * cast it into the correct structure.
+ */
+ dring_pkt = (vio_dring_msg_t *)dpkt;
+
+ /*
+ * Switch on the vio_subtype. If its INFO then we need to
+ * process the data. If its an ACK we need to make sure
+ * it makes sense (i.e did we send an earlier data/info),
+ * and if its a NACK then we maybe attempt a retry.
+ */
+ switch (dring_pkt->tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
+
+ if ((dp = vsw_ident2dring(&ldcp->lane_in,
+ dring_pkt->dring_ident)) == NULL) {
+
+ DERR(vswp, "%s(%lld): unable to find dring from "
+ "ident 0x%llx", __func__, ldcp->ldc_id,
+ dring_pkt->dring_ident);
+
+ SND_DRING_NACK(ldcp, dring_pkt);
+ return;
+ }
+
+ start = end = 0;
+ start = dring_pkt->start_idx;
+ end = dring_pkt->end_idx;
+
+ D3(vswp, "%s(%lld): start index %ld : end %ld\n",
+ __func__, ldcp->ldc_id, start, end);
+
+ /* basic sanity check */
+ len = dp->num_descriptors;
+ if (end > len) {
+ DERR(vswp, "%s(%lld): endpoint %lld outside ring"
+ " length %lld", __func__, ldcp->ldc_id,
+ end, len);
+
+ SND_DRING_NACK(ldcp, dring_pkt);
+ return;
+ }
+
+ /* sync data */
+ if ((rv = ldc_mem_dring_acquire(dp->handle,
+ start, end)) != 0) {
+ DERR(vswp, "%s(%lld): unable to acquire dring : err %d",
+ __func__, ldcp->ldc_id, rv);
+ return;
+ }
+
+ pub_addr = (vnet_public_desc_t *)dp->pub_addr;
+
+ j = num = 0;
+
+ /* calculate # descriptors taking into a/c wrap around */
+ num = end >= start ? end - start + 1: (len - start + 1) + end;
+
+ last_sync = start;
+
+ for (i = start; j < num; i = (i + 1) % len, j++) {
+ pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
+
+ /*
+ * Data is padded to align on 8 byte boundary,
+ * datalen is actual data length, i.e. minus that
+ * padding.
+ */
+ datalen = pub_addr->nbytes;
+
+ /*
+ * Does peer wish us to ACK when we have finished
+ * with this descriptor ?
+ */
+ if (pub_addr->hdr.ack)
+ ack_needed = B_TRUE;
+
+ D2(vswp, "%s(%lld): processing desc %lld at pos"
+ " 0x%llx : dstate 0x%lx : datalen 0x%lx",
+ __func__, ldcp->ldc_id, i, pub_addr,
+ pub_addr->hdr.dstate, datalen);
+
+ /*
+ * XXXX : Is it a fatal error to be told to
+ * process a packet when the READY bit is not
+ * set ?
+ */
+ if (pub_addr->hdr.dstate != VIO_DESC_READY) {
+ DERR(vswp, "%s(%d): descriptor %lld at pos "
+ " 0x%llx not READY (0x%lx)", __func__,
+ ldcp->ldc_id, i, pub_addr,
+ pub_addr->hdr.dstate);
+
+ SND_DRING_NACK(ldcp, dring_pkt);
+ (void) ldc_mem_dring_release(dp->handle,
+ start, end);
+ return;
+ }
+
+ /*
+ * Mark that we are starting to process descriptor.
+ */
+ pub_addr->hdr.dstate = VIO_DESC_ACCEPTED;
+
+ /*
+ * allocb(9F) returns an aligned data block. We
+ * need to ensure that we ask ldc for an aligned
+ * number of bytes also.
+ */
+ nbytes = datalen;
+ if (nbytes & 0x7) {
+ off = 8 - (nbytes & 0x7);
+ nbytes += off;
+ }
+ mp = allocb(datalen, BPRI_MED);
+ if (mp == NULL) {
+ DERR(vswp, "%s(%lld): allocb failed",
+ __func__, ldcp->ldc_id);
+ (void) ldc_mem_dring_release(dp->handle,
+ start, end);
+ return;
+ }
+
+ ncookies = pub_addr->ncookies;
+ rv = ldc_mem_copy(ldcp->ldc_handle,
+ (caddr_t)mp->b_rptr, 0, &nbytes,
+ pub_addr->memcookie, ncookies,
+ LDC_COPY_IN);
+
+ if (rv != 0) {
+ DERR(vswp, "%s(%d): unable to copy in "
+ "data from %d cookies", __func__,
+ ldcp->ldc_id, ncookies);
+ freemsg(mp);
+ (void) ldc_mem_dring_release(dp->handle,
+ start, end);
+ return;
+ } else {
+ D2(vswp, "%s(%d): copied in %ld bytes"
+ " using %d cookies", __func__,
+ ldcp->ldc_id, nbytes, ncookies);
+ }
+
+ /* point to the actual end of data */
+ mp->b_wptr = mp->b_rptr + datalen;
+
+ /* build a chain of received packets */
+ if (bp == NULL) {
+ /* first pkt */
+ bp = mp;
+ bp->b_next = bp->b_prev = NULL;
+ bpt = bp;
+ chain = 1;
+ } else {
+ mp->b_next = NULL;
+ mp->b_prev = bpt;
+ bpt->b_next = mp;
+ bpt = mp;
+ chain++;
+ }
+
+ /* mark we are finished with this descriptor */
+ pub_addr->hdr.dstate = VIO_DESC_DONE;
+
+ /*
+ * Send an ACK back to peer if requested, and sync
+ * the rings up to this point so the remote side sees
+ * the descriptor flag in a consistent state.
+ */
+ if (ack_needed) {
+ if ((rv = ldc_mem_dring_release(
+ dp->handle, last_sync, i)) != 0) {
+ DERR(vswp, "%s(%lld): unable to sync"
+ " from %d to %d", __func__,
+ ldcp->ldc_id, last_sync, i);
+ }
+
+ ack_needed = B_FALSE;
+
+ if (i == end)
+ sync_needed = B_FALSE;
+ else
+ sync_needed = B_TRUE;
+
+ last_sync = (i + 1) % len;
+
+ dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
+ dring_pkt->tag.vio_sid = ldcp->local_session;
+ vsw_send_msg(ldcp, (void *)dring_pkt,
+ sizeof (vio_dring_msg_t));
+ }
+ }
+
+ if (sync_needed) {
+ if ((rv = ldc_mem_dring_release(dp->handle,
+ last_sync, end)) != 0) {
+ DERR(vswp, "%s(%lld): unable to sync"
+ " from %d to %d", __func__,
+ ldcp->ldc_id, last_sync, end);
+ }
+ }
+
+ /* send the chain of packets to be switched */
+ D3(vswp, "%s(%lld): switching chain of %d msgs", __func__,
+ ldcp->ldc_id, chain);
+ vsw_switch_frame(vswp, bp, VSW_VNETPORT,
+ ldcp->ldc_port, NULL);
+
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id);
+ /*
+ * Verify that the relevant descriptors are all
+ * marked as DONE
+ */
+ if ((dp = vsw_ident2dring(&ldcp->lane_out,
+ dring_pkt->dring_ident)) == NULL) {
+ DERR(vswp, "%s: unknown ident in ACK", __func__);
+ return;
+ }
+
+ pub_addr = (vnet_public_desc_t *)dp->pub_addr;
+ priv_addr = (vsw_private_desc_t *)dp->priv_addr;
+
+ start = end = 0;
+ start = dring_pkt->start_idx;
+ end = dring_pkt->end_idx;
+ len = dp->num_descriptors;
+
+
+ j = num = 0;
+ /* calculate # descriptors taking into a/c wrap around */
+ num = end >= start ? end - start + 1: (len - start + 1) + end;
+
+ D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n",
+ __func__, ldcp->ldc_id, start, end, num);
+
+ for (i = start; j < num; i = (i + 1) % len, j++) {
+ pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
+ priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
+
+ if (pub_addr->hdr.dstate != VIO_DESC_DONE) {
+ DERR(vswp, "%s: descriptor %lld at pos "
+ " 0x%llx not DONE (0x%lx)\n", __func__,
+ i, pub_addr, pub_addr->hdr.dstate);
+ return;
+ } else {
+ /* clear all the fields */
+ bzero(priv_addr->datap, priv_addr->datalen);
+ priv_addr->datalen = 0;
+
+ pub_addr->hdr.dstate = VIO_DESC_FREE;
+ pub_addr->hdr.ack = 0;
+ priv_addr->dstate = VIO_DESC_FREE;
+
+ D3(vswp, "clearing descp %d : pub state "
+ "0x%llx : priv state 0x%llx", i,
+ pub_addr->hdr.dstate,
+ priv_addr->dstate);
+ }
+ }
+
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK",
+ __func__, ldcp->ldc_id);
+ /*
+ * Something is badly wrong if we are getting NACK's
+ * for our data pkts. So reset the channel.
+ */
+ vsw_restart_handshake(ldcp);
+
+ break;
+
+ default:
+ DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
+ ldcp->ldc_id, dring_pkt->tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * VIO_PKT_DATA (a.k.a raw data mode )
+ *
+ * Note - currently not supported. Do nothing.
+ */
+static void
+vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt)
+{
+ _NOTE(ARGUNUSED(dpkt))
+
+ D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
+
+ DERR(NULL, "%s (%lld): currently not supported",
+ __func__, ldcp->ldc_id);
+
+ D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
+}
+
+#define SND_IBND_DESC_NACK(ldcp, pkt) \
+ pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
+ pkt->tag.vio_sid = ldcp->local_session; \
+ vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t));
+
+/*
+ * Process an in-band descriptor message (most likely from
+ * OBP).
+ */
+static void
+vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt)
+{
+ vio_ibnd_desc_t *ibnd_desc;
+ dring_info_t *dp = NULL;
+ vsw_private_desc_t *priv_addr = NULL;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ mblk_t *mp = NULL;
+ size_t nbytes = 0;
+ size_t off = 0;
+ uint64_t idx = 0;
+ uint32_t datalen = 0;
+ uint64_t ncookies = 0;
+ int rv;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ ibnd_desc = (vio_ibnd_desc_t *)pkt;
+
+ switch (ibnd_desc->hdr.tag.vio_subtype) {
+ case VIO_SUBTYPE_INFO:
+ D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
+
+ if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
+ return;
+
+ /*
+ * Data is padded to align on a 8 byte boundary,
+ * nbytes is actual data length, i.e. minus that
+ * padding.
+ */
+ datalen = ibnd_desc->nbytes;
+
+ D2(vswp, "%s(%lld): processing inband desc : "
+ ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen);
+
+ ncookies = ibnd_desc->ncookies;
+
+ /*
+ * allocb(9F) returns an aligned data block. We
+ * need to ensure that we ask ldc for an aligned
+ * number of bytes also.
+ */
+ nbytes = datalen;
+ if (nbytes & 0x7) {
+ off = 8 - (nbytes & 0x7);
+ nbytes += off;
+ }
+
+ mp = allocb(datalen, BPRI_MED);
+ if (mp == NULL) {
+ DERR(vswp, "%s(%lld): allocb failed",
+ __func__, ldcp->ldc_id);
+ return;
+ }
+
+ rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr,
+ 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies,
+ LDC_COPY_IN);
+
+ if (rv != 0) {
+ DERR(vswp, "%s(%d): unable to copy in data from "
+ "%d cookie(s)", __func__,
+ ldcp->ldc_id, ncookies);
+ freemsg(mp);
+ return;
+ } else {
+ D2(vswp, "%s(%d): copied in %ld bytes using %d "
+ "cookies", __func__, ldcp->ldc_id, nbytes,
+ ncookies);
+ }
+
+ /* point to the actual end of data */
+ mp->b_wptr = mp->b_rptr + datalen;
+
+ /*
+ * We ACK back every in-band descriptor message we process
+ */
+ ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK;
+ ibnd_desc->hdr.tag.vio_sid = ldcp->local_session;
+ vsw_send_msg(ldcp, (void *)ibnd_desc,
+ sizeof (vio_ibnd_desc_t));
+
+ /* send the packet to be switched */
+ vsw_switch_frame(vswp, mp, VSW_VNETPORT,
+ ldcp->ldc_port, NULL);
+
+ break;
+
+ case VIO_SUBTYPE_ACK:
+ D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
+
+ /* Verify the ACK is valid */
+ idx = ibnd_desc->hdr.desc_handle;
+
+ if (idx >= VSW_RING_NUM_EL) {
+ cmn_err(CE_WARN, "%s: corrupted ACK received "
+ "(idx %ld)", __func__, idx);
+ return;
+ }
+
+ if ((dp = ldcp->lane_out.dringp) == NULL) {
+ DERR(vswp, "%s: no dring found", __func__);
+ return;
+ }
+
+ priv_addr = (vsw_private_desc_t *)dp->priv_addr;
+
+ /* move to correct location in ring */
+ priv_addr += idx;
+
+ /*
+ * When we sent the in-band message to our peer we
+ * marked the copy in our private ring as READY. We now
+ * check that the descriptor we are being ACK'ed for is in
+ * fact READY, i.e. it is one we have shared with our peer.
+ */
+ if (priv_addr->dstate != VIO_DESC_READY) {
+ cmn_err(CE_WARN, "%s: (%ld) desc at index %ld not "
+ "READY (0x%lx)", __func__, ldcp->ldc_id, idx,
+ priv_addr->dstate);
+ cmn_err(CE_CONT, "%s: bound %d: ncookies %ld\n",
+ __func__, priv_addr->bound,
+ priv_addr->ncookies);
+ cmn_err(CE_CONT, "datalen %ld\n", priv_addr->datalen);
+ return;
+ } else {
+ D2(vswp, "%s: (%lld) freeing descp at %lld", __func__,
+ ldcp->ldc_id, idx);
+
+ /* release resources associated with sent msg */
+ bzero(priv_addr->datap, priv_addr->datalen);
+ priv_addr->datalen = 0;
+ priv_addr->dstate = VIO_DESC_FREE;
+ }
+ break;
+
+ case VIO_SUBTYPE_NACK:
+ DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
+
+ /*
+ * We should only get a NACK if our peer doesn't like
+ * something about a message we have sent it. If this
+ * happens we just release the resources associated with
+ * the message. (We are relying on higher layers to decide
+ * whether or not to resend.
+ */
+
+ /* limit check */
+ idx = ibnd_desc->hdr.desc_handle;
+
+ if (idx >= VSW_RING_NUM_EL) {
+ DERR(vswp, "%s: corrupted NACK received (idx %lld)",
+ __func__, idx);
+ return;
+ }
+
+ if ((dp = ldcp->lane_out.dringp) == NULL) {
+ DERR(vswp, "%s: no dring found", __func__);
+ return;
+ }
+
+ priv_addr = (vsw_private_desc_t *)dp->priv_addr;
+
+ /* move to correct location in ring */
+ priv_addr += idx;
+
+ /* release resources associated with sent msg */
+ bzero(priv_addr->datap, priv_addr->datalen);
+ priv_addr->datalen = 0;
+ priv_addr->dstate = VIO_DESC_FREE;
+
+ break;
+
+ default:
+ DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
+ ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype);
+ }
+
+ D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
+}
+
+static void
+vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag)
+{
+ _NOTE(ARGUNUSED(epkt))
+
+ vsw_t *vswp = ldcp->ldc_vswp;
+ uint16_t env = tag.vio_subtype_env;
+
+ D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
+
+ /*
+ * Error vio_subtypes have yet to be defined. So for
+ * the moment we can't do anything.
+ */
+ D2(vswp, "%s: (%x) vio_subtype env", __func__, env);
+
+ D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Switch the given ethernet frame when operating in layer 2 mode.
+ *
+ * vswp: pointer to the vsw instance
+ * mp: pointer to chain of ethernet frame(s) to be switched
+ * caller: identifies the source of this frame as:
+ * 1. VSW_VNETPORT - a vsw port (connected to a vnet).
+ * 2. VSW_PHYSDEV - the physical ethernet device
+ * 3. VSW_LOCALDEV - vsw configured as a virtual interface
+ * arg: argument provided by the caller.
+ * 1. for VNETPORT - pointer to the corresponding vsw_port_t.
+ * 2. for PHYSDEV - NULL
+ * 3. for LOCALDEV - pointer to to this vsw_t(self)
+ */
+void
+vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
+ vsw_port_t *arg, mac_resource_handle_t mrh)
+{
+ struct ether_header *ehp;
+ vsw_port_t *port = NULL;
+ mblk_t *bp, *ret_m;
+ mblk_t *nmp = NULL;
+ vsw_port_list_t *plist = &vswp->plist;
+
+ D1(vswp, "%s: enter (caller %d)", __func__, caller);
+
+ /*
+ * PERF: rather than breaking up the chain here, scan it
+ * to find all mblks heading to same destination and then
+ * pass that sub-chain to the lower transmit functions.
+ */
+
+ /* process the chain of packets */
+ bp = mp;
+ while (bp) {
+ mp = bp;
+ bp = bp->b_next;
+ mp->b_next = mp->b_prev = NULL;
+ ehp = (struct ether_header *)mp->b_rptr;
+
+ D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
+ __func__, MBLKSIZE(mp), MBLKL(mp));
+
+ READ_ENTER(&vswp->if_lockrw);
+ if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
+ /*
+ * If destination is VSW_LOCALDEV (vsw as an eth
+ * interface) and if the device is up & running,
+ * send the packet up the stack on this host.
+ * If the virtual interface is down, drop the packet.
+ */
+ if (caller != VSW_LOCALDEV) {
+ if (vswp->if_state & VSW_IF_UP) {
+ RW_EXIT(&vswp->if_lockrw);
+ mac_rx(vswp->if_macp, mrh, mp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ /* Interface down, drop pkt */
+ freemsg(mp);
+ }
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ freemsg(mp);
+ }
+ continue;
+ }
+ RW_EXIT(&vswp->if_lockrw);
+
+ READ_ENTER(&plist->lockrw);
+ port = vsw_lookup_fdb(vswp, ehp);
+ if (port) {
+ /*
+ * Mark the port as in-use.
+ */
+ mutex_enter(&port->ref_lock);
+ port->ref_cnt++;
+ mutex_exit(&port->ref_lock);
+ RW_EXIT(&plist->lockrw);
+
+ /*
+ * If plumbed and in promisc mode then copy msg
+ * and send up the stack.
+ */
+ READ_ENTER(&vswp->if_lockrw);
+ if (VSW_U_P(vswp->if_state)) {
+ RW_EXIT(&vswp->if_lockrw);
+ nmp = copymsg(mp);
+ if (nmp)
+ mac_rx(vswp->if_macp, mrh, nmp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ }
+
+ /*
+ * If the destination is in FDB, the packet
+ * should be forwarded to the correponding
+ * vsw_port (connected to a vnet device -
+ * VSW_VNETPORT)
+ */
+ (void) vsw_portsend(port, mp);
+
+ /*
+ * Decrement use count in port and check if
+ * should wake delete thread.
+ */
+ mutex_enter(&port->ref_lock);
+ port->ref_cnt--;
+ if (port->ref_cnt == 0)
+ cv_signal(&port->ref_cv);
+ mutex_exit(&port->ref_lock);
+ } else {
+ RW_EXIT(&plist->lockrw);
+ /*
+ * Destination not in FDB.
+ *
+ * If the destination is broadcast or
+ * multicast forward the packet to all
+ * (VNETPORTs, PHYSDEV, LOCALDEV),
+ * except the caller.
+ */
+ if (IS_BROADCAST(ehp)) {
+ D3(vswp, "%s: BROADCAST pkt", __func__);
+ (void) vsw_forward_all(vswp, mp,
+ caller, arg);
+ } else if (IS_MULTICAST(ehp)) {
+ D3(vswp, "%s: MULTICAST pkt", __func__);
+ (void) vsw_forward_grp(vswp, mp,
+ caller, arg);
+ } else {
+ /*
+ * If the destination is unicast, and came
+ * from either a logical network device or
+ * the switch itself when it is plumbed, then
+ * send it out on the physical device and also
+ * up the stack if the logical interface is
+ * in promiscious mode.
+ *
+ * NOTE: The assumption here is that if we
+ * cannot find the destination in our fdb, its
+ * a unicast address, and came from either a
+ * vnet or down the stack (when plumbed) it
+ * must be destinded for an ethernet device
+ * outside our ldoms.
+ */
+ if (caller == VSW_VNETPORT) {
+ READ_ENTER(&vswp->if_lockrw);
+ if (VSW_U_P(vswp->if_state)) {
+ RW_EXIT(&vswp->if_lockrw);
+ nmp = copymsg(mp);
+ if (nmp)
+ mac_rx(vswp->if_macp,
+ mrh, nmp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ }
+ if ((ret_m = vsw_tx_msg(vswp, mp))
+ != NULL) {
+ DERR(vswp, "%s: drop mblks to "
+ "phys dev", __func__);
+ freemsg(ret_m);
+ }
+
+ } else if (caller == VSW_PHYSDEV) {
+ /*
+ * Pkt seen because card in promisc
+ * mode. Send up stack if plumbed in
+ * promisc mode, else drop it.
+ */
+ READ_ENTER(&vswp->if_lockrw);
+ if (VSW_U_P(vswp->if_state)) {
+ RW_EXIT(&vswp->if_lockrw);
+ mac_rx(vswp->if_macp, mrh, mp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ freemsg(mp);
+ }
+
+ } else if (caller == VSW_LOCALDEV) {
+ /*
+ * Pkt came down the stack, send out
+ * over physical device.
+ */
+ if ((ret_m = vsw_tx_msg(vswp, mp))
+ != NULL) {
+ DERR(vswp, "%s: drop mblks to "
+ "phys dev", __func__);
+ freemsg(ret_m);
+ }
+ }
+ }
+ }
+ }
+ D1(vswp, "%s: exit\n", __func__);
+}
+
+/*
+ * Switch ethernet frame when in layer 3 mode (i.e. using IP
+ * layer to do the routing).
+ *
+ * There is a large amount of overlap between this function and
+ * vsw_switch_l2_frame. At some stage we need to revisit and refactor
+ * both these functions.
+ */
+void
+vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
+ vsw_port_t *arg, mac_resource_handle_t mrh)
+{
+ struct ether_header *ehp;
+ vsw_port_t *port = NULL;
+ mblk_t *bp = NULL;
+ vsw_port_list_t *plist = &vswp->plist;
+
+ D1(vswp, "%s: enter (caller %d)", __func__, caller);
+
+ /*
+ * In layer 3 mode should only ever be switching packets
+ * between IP layer and vnet devices. So make sure thats
+ * who is invoking us.
+ */
+ if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
+ DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
+ freemsgchain(mp);
+ return;
+ }
+
+ /* process the chain of packets */
+ bp = mp;
+ while (bp) {
+ mp = bp;
+ bp = bp->b_next;
+ mp->b_next = mp->b_prev = NULL;
+ ehp = (struct ether_header *)mp->b_rptr;
+
+ D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
+ __func__, MBLKSIZE(mp), MBLKL(mp));
+
+ READ_ENTER(&plist->lockrw);
+ port = vsw_lookup_fdb(vswp, ehp);
+ if (port) {
+ /*
+ * Mark port as in-use.
+ */
+ mutex_enter(&port->ref_lock);
+ port->ref_cnt++;
+ mutex_exit(&port->ref_lock);
+ RW_EXIT(&plist->lockrw);
+
+ D2(vswp, "%s: sending to target port", __func__);
+ (void) vsw_portsend(port, mp);
+
+ /*
+ * Finished with port so decrement ref count and
+ * check if should wake delete thread.
+ */
+ mutex_enter(&port->ref_lock);
+ port->ref_cnt--;
+ if (port->ref_cnt == 0)
+ cv_signal(&port->ref_cv);
+ mutex_exit(&port->ref_lock);
+ } else {
+ RW_EXIT(&plist->lockrw);
+ /*
+ * Destination not in FDB
+ *
+ * If the destination is broadcast or
+ * multicast forward the packet to all
+ * (VNETPORTs, PHYSDEV, LOCALDEV),
+ * except the caller.
+ */
+ if (IS_BROADCAST(ehp)) {
+ D2(vswp, "%s: BROADCAST pkt", __func__);
+ (void) vsw_forward_all(vswp, mp,
+ caller, arg);
+ } else if (IS_MULTICAST(ehp)) {
+ D2(vswp, "%s: MULTICAST pkt", __func__);
+ (void) vsw_forward_grp(vswp, mp,
+ caller, arg);
+ } else {
+ /*
+ * Unicast pkt from vnet that we don't have
+ * an FDB entry for, so must be destinded for
+ * the outside world. Attempt to send up to the
+ * IP layer to allow it to deal with it.
+ */
+ if (caller == VSW_VNETPORT) {
+ READ_ENTER(&vswp->if_lockrw);
+ if (vswp->if_state & VSW_IF_UP) {
+ RW_EXIT(&vswp->if_lockrw);
+ D2(vswp, "%s: sending up",
+ __func__);
+ mac_rx(vswp->if_macp, mrh, mp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ /* Interface down, drop pkt */
+ D2(vswp, "%s I/F down",
+ __func__);
+ freemsg(mp);
+ }
+ }
+ }
+ }
+ }
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+/*
+ * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
+ * except the caller (port on which frame arrived).
+ */
+static int
+vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
+{
+ vsw_port_list_t *plist = &vswp->plist;
+ vsw_port_t *portp;
+ mblk_t *nmp = NULL;
+ mblk_t *ret_m = NULL;
+ int skip_port = 0;
+
+ D1(vswp, "vsw_forward_all: enter\n");
+
+ /*
+ * Broadcast message from inside ldoms so send to outside
+ * world if in either of layer 2 modes.
+ */
+ if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
+ (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
+ ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
+
+ nmp = dupmsg(mp);
+ if (nmp) {
+ if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
+ DERR(vswp, "%s: dropping pkt(s) "
+ "consisting of %ld bytes of data for"
+ " physical device", __func__, MBLKL(ret_m));
+ freemsg(ret_m);
+ }
+ }
+ }
+
+ if (caller == VSW_VNETPORT)
+ skip_port = 1;
+
+ /*
+ * Broadcast message from other vnet (layer 2 or 3) or outside
+ * world (layer 2 only), send up stack if plumbed.
+ */
+ if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
+ READ_ENTER(&vswp->if_lockrw);
+ if (vswp->if_state & VSW_IF_UP) {
+ RW_EXIT(&vswp->if_lockrw);
+ nmp = copymsg(mp);
+ if (nmp)
+ mac_rx(vswp->if_macp, vswp->if_mrh, nmp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ }
+ }
+
+ /* send it to all VNETPORTs */
+ READ_ENTER(&plist->lockrw);
+ for (portp = plist->head; portp != NULL; portp = portp->p_next) {
+ D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
+ /*
+ * Caution ! - don't reorder these two checks as arg
+ * will be NULL if the caller is PHYSDEV. skip_port is
+ * only set if caller is VNETPORT.
+ */
+ if ((skip_port) && (portp == arg))
+ continue;
+ else {
+ nmp = dupmsg(mp);
+ if (nmp) {
+ (void) vsw_portsend(portp, nmp);
+ } else {
+ DERR(vswp, "vsw_forward_all: nmp NULL");
+ }
+ }
+ }
+ RW_EXIT(&plist->lockrw);
+
+ freemsg(mp);
+
+ D1(vswp, "vsw_forward_all: exit\n");
+ return (0);
+}
+
+/*
+ * Forward pkts to any devices or interfaces which have registered
+ * an interest in them (i.e. multicast groups).
+ */
+static int
+vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
+{
+ struct ether_header *ehp = (struct ether_header *)mp->b_rptr;
+ mfdb_ent_t *entp = NULL;
+ mfdb_ent_t *tpp = NULL;
+ vsw_port_t *port;
+ uint64_t key = 0;
+ mblk_t *nmp = NULL;
+ mblk_t *ret_m = NULL;
+ boolean_t check_if = B_TRUE;
+
+ /*
+ * Convert address to hash table key
+ */
+ KEY_HASH(key, ehp->ether_dhost);
+
+ D1(vswp, "%s: key 0x%llx", __func__, key);
+
+ /*
+ * If pkt came from either a vnet or down the stack (if we are
+ * plumbed) and we are in layer 2 mode, then we send the pkt out
+ * over the physical adapter, and then check to see if any other
+ * vnets are interested in it.
+ */
+ if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
+ (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
+ ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
+ nmp = dupmsg(mp);
+ if (nmp) {
+ if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
+ DERR(vswp, "%s: dropping pkt(s) "
+ "consisting of %ld bytes of "
+ "data for physical device",
+ __func__, MBLKL(ret_m));
+ freemsg(ret_m);
+ }
+ }
+ }
+
+ READ_ENTER(&vswp->mfdbrw);
+ if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
+ (mod_hash_val_t *)&entp) != 0) {
+ D3(vswp, "%s: no table entry found for addr 0x%llx",
+ __func__, key);
+ } else {
+ /*
+ * Send to list of devices associated with this address...
+ */
+ for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
+
+ /* dont send to ourselves */
+ if ((caller == VSW_VNETPORT) &&
+ (tpp->d_addr == (void *)arg)) {
+ port = (vsw_port_t *)tpp->d_addr;
+ D3(vswp, "%s: not sending to ourselves"
+ " : port %d", __func__,
+ port->p_instance);
+ continue;
+
+ } else if ((caller == VSW_LOCALDEV) &&
+ (tpp->d_type == VSW_LOCALDEV)) {
+ D3(vswp, "%s: not sending back up stack",
+ __func__);
+ continue;
+ }
+
+ if (tpp->d_type == VSW_VNETPORT) {
+ port = (vsw_port_t *)tpp->d_addr;
+ D3(vswp, "%s: sending to port %ld for "
+ " addr 0x%llx", __func__,
+ port->p_instance, key);
+
+ nmp = dupmsg(mp);
+ if (nmp)
+ (void) vsw_portsend(port, nmp);
+ } else {
+ if (vswp->if_state & VSW_IF_UP) {
+ nmp = copymsg(mp);
+ if (nmp)
+ mac_rx(vswp->if_macp,
+ vswp->if_mrh, nmp);
+ check_if = B_FALSE;
+ D3(vswp, "%s: sending up stack"
+ " for addr 0x%llx", __func__,
+ key);
+ }
+ }
+ }
+ }
+
+ RW_EXIT(&vswp->mfdbrw);
+
+ /*
+ * If the pkt came from either a vnet or from physical device,
+ * and if we havent already sent the pkt up the stack then we
+ * check now if we can/should (i.e. the interface is plumbed
+ * and in promisc mode).
+ */
+ if ((check_if) &&
+ ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
+ READ_ENTER(&vswp->if_lockrw);
+ if (VSW_U_P(vswp->if_state)) {
+ RW_EXIT(&vswp->if_lockrw);
+ D3(vswp, "%s: (caller %d) finally sending up stack"
+ " for addr 0x%llx", __func__, caller, key);
+ nmp = copymsg(mp);
+ if (nmp)
+ mac_rx(vswp->if_macp, vswp->if_mrh, nmp);
+ } else {
+ RW_EXIT(&vswp->if_lockrw);
+ }
+ }
+
+ freemsg(mp);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+/* transmit the packet over the given port */
+static int
+vsw_portsend(vsw_port_t *port, mblk_t *mp)
+{
+ vsw_ldc_list_t *ldcl = &port->p_ldclist;
+ vsw_ldc_t *ldcp;
+ int status = 0;
+
+
+ READ_ENTER(&ldcl->lockrw);
+ /*
+ * Note for now, we have a single channel.
+ */
+ ldcp = ldcl->head;
+ if (ldcp == NULL) {
+ DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n");
+ freemsg(mp);
+ RW_EXIT(&ldcl->lockrw);
+ return (1);
+ }
+
+ /*
+ * Send the message out using the appropriate
+ * transmit function which will free mblock when it
+ * is finished with it.
+ */
+ mutex_enter(&port->tx_lock);
+ if (port->transmit != NULL)
+ status = (*port->transmit)(ldcp, mp);
+ else {
+ freemsg(mp);
+ }
+ mutex_exit(&port->tx_lock);
+
+ RW_EXIT(&ldcl->lockrw);
+
+ return (status);
+}
+
+/*
+ * Send packet out via descriptor ring to a logical device.
+ */
+static int
+vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp)
+{
+ vio_dring_msg_t dring_pkt;
+ dring_info_t *dp = NULL;
+ vsw_private_desc_t *priv_desc = NULL;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ mblk_t *bp;
+ size_t n, size;
+ caddr_t bufp;
+ int idx;
+ int status = LDC_TX_SUCCESS;
+
+ D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id);
+
+ /* TODO: make test a macro */
+ if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
+ (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
+ DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
+ "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
+ ldcp->lane_out.lstate);
+ freemsg(mp);
+ return (LDC_TX_FAILURE);
+ }
+
+ /*
+ * Note - using first ring only, this may change
+ * in the future.
+ */
+ if ((dp = ldcp->lane_out.dringp) == NULL) {
+ DERR(vswp, "%s(%lld): no dring for outbound lane on"
+ " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
+ freemsg(mp);
+ return (LDC_TX_FAILURE);
+ }
+
+ mutex_enter(&dp->dlock);
+
+ size = msgsize(mp);
+ if (size > (size_t)ETHERMAX) {
+ DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
+ ldcp->ldc_id, size);
+ status = LDC_TX_FAILURE;
+ goto vsw_dringsend_free_exit;
+ }
+
+ /*
+ * Find a free descriptor
+ *
+ * Note: for the moment we are assuming that we will only
+ * have one dring going from the switch to each of its
+ * peers. This may change in the future.
+ */
+ if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
+ DERR(vswp, "%s(%lld): no descriptor available for ring "
+ "at 0x%llx", __func__, ldcp->ldc_id, dp);
+
+ /* nothing more we can do */
+ status = LDC_TX_NORESOURCES;
+ goto vsw_dringsend_free_exit;
+ } else {
+ D2(vswp, "%s(%lld): free private descriptor found at pos "
+ "%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx,
+ priv_desc);
+ }
+
+ /* copy data into the descriptor */
+ bufp = priv_desc->datap;
+ for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
+ n = MBLKL(bp);
+ bcopy(bp->b_rptr, bufp, n);
+ bufp += n;
+ }
+
+ priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
+ priv_desc->dstate = VIO_DESC_READY;
+
+ /*
+ * Copy relevant sections of private descriptor
+ * to public section
+ */
+ vsw_dring_priv2pub(priv_desc);
+
+ /*
+ * Send a vio_dring_msg to peer to prompt them to read
+ * the updated descriptor ring.
+ */
+ dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA;
+ dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA;
+ dring_pkt.tag.vio_sid = ldcp->local_session;
+
+ /* Note - for now using first ring */
+ dring_pkt.dring_ident = dp->ident;
+
+ /*
+ * Access to the seq_num is implicitly protected by the
+ * fact that we have only one dring associated with the
+ * lane currently and we hold the associated dring lock.
+ */
+ dring_pkt.seq_num = ldcp->lane_out.seq_num++;
+
+ /* Note - only updating single descrip at time at the moment */
+ dring_pkt.start_idx = idx;
+ dring_pkt.end_idx = idx;
+
+ D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__,
+ ldcp->ldc_id, dp, dring_pkt.dring_ident);
+ D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", __func__,
+ ldcp->ldc_id, dring_pkt.start_idx, dring_pkt.end_idx,
+ dring_pkt.seq_num);
+
+ vsw_send_msg(ldcp, (void *)&dring_pkt, sizeof (vio_dring_msg_t));
+
+vsw_dringsend_free_exit:
+
+ mutex_exit(&dp->dlock);
+
+ /* free the message block */
+ freemsg(mp);
+
+ D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
+ return (status);
+}
+
+/*
+ * Send an in-band descriptor message over ldc.
+ */
+static int
+vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ vio_ibnd_desc_t ibnd_msg;
+ vsw_private_desc_t *priv_desc = NULL;
+ dring_info_t *dp = NULL;
+ size_t n, size = 0;
+ caddr_t bufp;
+ mblk_t *bp;
+ int idx, i;
+ int status = LDC_TX_SUCCESS;
+ static int warn_msg = 1;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ ASSERT(mp != NULL);
+
+ if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
+ (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
+ DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt",
+ __func__, ldcp->ldc_id, ldcp->ldc_status,
+ ldcp->lane_out.lstate);
+ freemsg(mp);
+ return (LDC_TX_FAILURE);
+ }
+
+ /*
+ * only expect single dring to exist, which we use
+ * as an internal buffer, rather than a transfer channel.
+ */
+ if ((dp = ldcp->lane_out.dringp) == NULL) {
+ DERR(vswp, "%s(%lld): no dring for outbound lane",
+ __func__, ldcp->ldc_id);
+ DERR(vswp, "%s(%lld) status(%d) state (0x%llx)",
+ __func__, ldcp->ldc_id, ldcp->ldc_status,
+ ldcp->lane_out.lstate);
+ freemsg(mp);
+ return (LDC_TX_FAILURE);
+ }
+
+ mutex_enter(&dp->dlock);
+
+ size = msgsize(mp);
+ if (size > (size_t)ETHERMAX) {
+ DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
+ ldcp->ldc_id, size);
+ status = LDC_TX_FAILURE;
+ goto vsw_descrsend_free_exit;
+ }
+
+ /*
+ * Find a free descriptor in our buffer ring
+ */
+ if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
+ if (warn_msg) {
+ DERR(vswp, "%s(%lld): no descriptor available for ring "
+ "at 0x%llx", __func__, ldcp->ldc_id, dp);
+ warn_msg = 0;
+ }
+
+ /* nothing more we can do */
+ status = LDC_TX_NORESOURCES;
+ goto vsw_descrsend_free_exit;
+ } else {
+ D2(vswp, "%s(%lld): free private descriptor found at pos "
+ "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx,
+ priv_desc);
+ warn_msg = 1;
+ }
+
+ /* copy data into the descriptor */
+ bufp = priv_desc->datap;
+ for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
+ n = MBLKL(bp);
+ bcopy(bp->b_rptr, bufp, n);
+ bufp += n;
+ }
+
+ priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
+ priv_desc->dstate = VIO_DESC_READY;
+
+ /* create and send the in-band descp msg */
+ ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA;
+ ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA;
+ ibnd_msg.hdr.tag.vio_sid = ldcp->local_session;
+
+ /*
+ * Access to the seq_num is implicitly protected by the
+ * fact that we have only one dring associated with the
+ * lane currently and we hold the associated dring lock.
+ */
+ ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++;
+
+ /*
+ * Copy the mem cookies describing the data from the
+ * private region of the descriptor ring into the inband
+ * descriptor.
+ */
+ for (i = 0; i < priv_desc->ncookies; i++) {
+ bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i],
+ sizeof (ldc_mem_cookie_t));
+ }
+
+ ibnd_msg.hdr.desc_handle = idx;
+ ibnd_msg.ncookies = priv_desc->ncookies;
+ ibnd_msg.nbytes = size;
+
+ vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t));
+
+vsw_descrsend_free_exit:
+
+ mutex_exit(&dp->dlock);
+
+ /* free the allocated message blocks */
+ freemsg(mp);
+
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+ return (status);
+}
+
+static void
+vsw_send_ver(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ lane_t *lp = &ldcp->lane_out;
+ vio_ver_msg_t ver_msg;
+
+ D1(vswp, "%s enter", __func__);
+
+ ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
+ ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ ver_msg.tag.vio_subtype_env = VIO_VER_INFO;
+ ver_msg.tag.vio_sid = ldcp->local_session;
+
+ ver_msg.ver_major = vsw_versions[0].ver_major;
+ ver_msg.ver_minor = vsw_versions[0].ver_minor;
+ ver_msg.dev_class = VDEV_NETWORK_SWITCH;
+
+ lp->lstate |= VSW_VER_INFO_SENT;
+ lp->ver_major = ver_msg.ver_major;
+ lp->ver_minor = ver_msg.ver_minor;
+
+ DUMP_TAG(ver_msg.tag);
+
+ vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t));
+
+ D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id);
+}
+
+static void
+vsw_send_attr(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ lane_t *lp = &ldcp->lane_out;
+ vnet_attr_msg_t attr_msg;
+
+ D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
+
+ /*
+ * Subtype is set to INFO by default
+ */
+ attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
+ attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO;
+ attr_msg.tag.vio_sid = ldcp->local_session;
+
+ /* payload copied from default settings for lane */
+ attr_msg.mtu = lp->mtu;
+ attr_msg.addr_type = lp->addr_type;
+ attr_msg.xfer_mode = lp->xfer_mode;
+ attr_msg.ack_freq = lp->xfer_mode;
+
+ READ_ENTER(&vswp->if_lockrw);
+ bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL);
+ RW_EXIT(&vswp->if_lockrw);
+
+ ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT;
+
+ DUMP_TAG(attr_msg.tag);
+
+ vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t));
+
+ D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Create dring info msg (which also results in the creation of
+ * a dring).
+ */
+static vio_dring_reg_msg_t *
+vsw_create_dring_info_pkt(vsw_ldc_t *ldcp)
+{
+ vio_dring_reg_msg_t *mp;
+ dring_info_t *dp;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "vsw_create_dring_info_pkt enter\n");
+
+ /*
+ * If we can't create a dring, obviously no point sending
+ * a message.
+ */
+ if ((dp = vsw_create_dring(ldcp)) == NULL)
+ return (NULL);
+
+ mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP);
+
+ mp->tag.vio_msgtype = VIO_TYPE_CTRL;
+ mp->tag.vio_subtype = VIO_SUBTYPE_INFO;
+ mp->tag.vio_subtype_env = VIO_DRING_REG;
+ mp->tag.vio_sid = ldcp->local_session;
+
+ /* payload */
+ mp->num_descriptors = dp->num_descriptors;
+ mp->descriptor_size = dp->descriptor_size;
+ mp->options = dp->options;
+ mp->ncookies = dp->ncookies;
+ bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t));
+
+ mp->dring_ident = 0;
+
+ D1(vswp, "vsw_create_dring_info_pkt exit\n");
+
+ return (mp);
+}
+
+static void
+vsw_send_dring_info(vsw_ldc_t *ldcp)
+{
+ vio_dring_reg_msg_t *dring_msg;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id);
+
+ dring_msg = vsw_create_dring_info_pkt(ldcp);
+ if (dring_msg == NULL) {
+ cmn_err(CE_WARN, "vsw_send_dring_info: error creating msg");
+ return;
+ }
+
+ ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT;
+
+ DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg);
+
+ vsw_send_msg(ldcp, dring_msg,
+ sizeof (vio_dring_reg_msg_t));
+
+ kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t));
+
+ D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id);
+}
+
+static void
+vsw_send_rdx(vsw_ldc_t *ldcp)
+{
+ vsw_t *vswp = ldcp->ldc_vswp;
+ vio_rdx_msg_t rdx_msg;
+
+ D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
+
+ rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
+ rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
+ rdx_msg.tag.vio_subtype_env = VIO_RDX;
+ rdx_msg.tag.vio_sid = ldcp->local_session;
+
+ ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT;
+
+ DUMP_TAG(rdx_msg.tag);
+
+ vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t));
+
+ D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Generic routine to send message out over ldc channel.
+ */
+static void
+vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size)
+{
+ int rv;
+ size_t msglen = size;
+ vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes",
+ ldcp->ldc_id, size);
+
+ D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype);
+ D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype);
+ D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env);
+
+ mutex_enter(&ldcp->ldc_txlock);
+ do {
+ msglen = size;
+ rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen);
+ } while (rv == EWOULDBLOCK && --vsw_wretries > 0);
+
+ mutex_exit(&ldcp->ldc_txlock);
+
+ if ((rv != 0) || (msglen != size)) {
+ DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) "
+ "rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id,
+ rv, size, msglen);
+ }
+
+ D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes",
+ ldcp->ldc_id, msglen);
+}
+
+/*
+ * Add an entry into FDB, for the given mac address and port_id.
+ * Returns 0 on success, 1 on failure.
+ *
+ * Lock protecting FDB must be held by calling process.
+ */
+static int
+vsw_add_fdb(vsw_t *vswp, vsw_port_t *port)
+{
+ uint64_t addr = 0;
+
+ D1(vswp, "%s: enter", __func__);
+
+ KEY_HASH(addr, port->p_macaddr);
+
+ D2(vswp, "%s: key = 0x%llx", __func__, addr);
+
+ /*
+ * Note: duplicate keys will be rejected by mod_hash.
+ */
+ if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr,
+ (mod_hash_val_t)port) != 0) {
+ DERR(vswp, "%s: unable to add entry into fdb.", __func__);
+ return (1);
+ }
+
+ D1(vswp, "%s: exit", __func__);
+ return (0);
+}
+
+/*
+ * Remove an entry from FDB.
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_del_fdb(vsw_t *vswp, vsw_port_t *port)
+{
+ uint64_t addr = 0;
+
+ D1(vswp, "%s: enter", __func__);
+
+ KEY_HASH(addr, port->p_macaddr);
+
+ D2(vswp, "%s: key = 0x%llx", __func__, addr);
+
+ (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr);
+
+ D1(vswp, "%s: enter", __func__);
+
+ return (0);
+}
+
+/*
+ * Search fdb for a given mac address.
+ * Returns pointer to the entry if found, else returns NULL.
+ */
+static vsw_port_t *
+vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp)
+{
+ uint64_t key = 0;
+ vsw_port_t *port = NULL;
+
+ D1(vswp, "%s: enter", __func__);
+
+ KEY_HASH(key, ehp->ether_dhost);
+
+ D2(vswp, "%s: key = 0x%llx", __func__, key);
+
+ if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key,
+ (mod_hash_val_t *)&port) != 0) {
+ return (NULL);
+ }
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (port);
+}
+
+/*
+ * Add or remove multicast address(es).
+ *
+ * Returns 0 on success, 1 on failure.
+ */
+static int
+vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
+{
+ mcst_addr_t *mcst_p = NULL;
+ vsw_t *vswp = port->p_vswp;
+ uint64_t addr = 0x0;
+ int i;
+
+ D1(vswp, "%s: enter", __func__);
+
+ D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
+
+ for (i = 0; i < mcst_pkt->count; i++) {
+ /*
+ * Convert address into form that can be used
+ * as hash table key.
+ */
+ KEY_HASH(addr, mcst_pkt->mca[i]);
+
+ /*
+ * Add or delete the specified address/port combination.
+ */
+ if (mcst_pkt->set == 0x1) {
+ D3(vswp, "%s: adding multicast address 0x%llx for "
+ "port %ld", __func__, addr, port->p_instance);
+ if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
+ /*
+ * Update the list of multicast
+ * addresses contained within the
+ * port structure to include this new
+ * one.
+ */
+ mcst_p = kmem_alloc(sizeof (mcst_addr_t),
+ KM_NOSLEEP);
+ if (mcst_p == NULL) {
+ DERR(vswp, "%s: unable to alloc mem",
+ __func__);
+ return (1);
+ }
+
+ mcst_p->nextp = NULL;
+ mcst_p->addr = addr;
+
+ mutex_enter(&port->mca_lock);
+ mcst_p->nextp = port->mcap;
+ port->mcap = mcst_p;
+ mutex_exit(&port->mca_lock);
+
+ /*
+ * Program the address into HW. If the addr
+ * has already been programmed then the MAC
+ * just increments a ref counter (which is
+ * used when the address is being deleted)
+ *
+ * Note:
+ * For the moment we dont care if this
+ * succeeds because the card must be in
+ * promics mode. When we have the ability
+ * to program multiple unicst address into
+ * the card then we will need to check this
+ * return value.
+ */
+ if (vswp->mh != NULL)
+ (void) mac_multicst_add(vswp->mh,
+ (uchar_t *)&mcst_pkt->mca[i]);
+
+ } else {
+ DERR(vswp, "%s: error adding multicast "
+ "address 0x%llx for port %ld",
+ __func__, addr, port->p_instance);
+ return (1);
+ }
+ } else {
+ /*
+ * Delete an entry from the multicast hash
+ * table and update the address list
+ * appropriately.
+ */
+ if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
+ D3(vswp, "%s: deleting multicast address "
+ "0x%llx for port %ld", __func__, addr,
+ port->p_instance);
+
+ vsw_del_addr(VSW_VNETPORT, port, addr);
+
+ /*
+ * Remove the address from HW. The address
+ * will actually only be removed once the ref
+ * count within the MAC layer has dropped to
+ * zero. I.e. we can safely call this fn even
+ * if other ports are interested in this
+ * address.
+ */
+ if (vswp->mh != NULL)
+ (void) mac_multicst_remove(vswp->mh,
+ (uchar_t *)&mcst_pkt->mca[i]);
+
+ } else {
+ DERR(vswp, "%s: error deleting multicast "
+ "addr 0x%llx for port %ld",
+ __func__, addr, port->p_instance);
+ return (1);
+ }
+ }
+ }
+ D1(vswp, "%s: exit", __func__);
+ return (0);
+}
+
+/*
+ * Add a new multicast entry.
+ *
+ * Search hash table based on address. If match found then
+ * update associated val (which is chain of ports), otherwise
+ * create new key/val (addr/port) pair and insert into table.
+ */
+static int
+vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
+{
+ int dup = 0;
+ int rv = 0;
+ mfdb_ent_t *ment = NULL;
+ mfdb_ent_t *tmp_ent = NULL;
+ mfdb_ent_t *new_ent = NULL;
+ void *tgt = NULL;
+
+ if (devtype == VSW_VNETPORT) {
+ /*
+ * Being invoked from a vnet.
+ */
+ ASSERT(arg != NULL);
+ tgt = arg;
+ D2(NULL, "%s: port %d : address 0x%llx", __func__,
+ ((vsw_port_t *)arg)->p_instance, addr);
+ } else {
+ /*
+ * We are being invoked via the m_multicst mac entry
+ * point.
+ */
+ D2(NULL, "%s: address 0x%llx", __func__, addr);
+ tgt = (void *)vswp;
+ }
+
+ WRITE_ENTER(&vswp->mfdbrw);
+ if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
+ (mod_hash_val_t *)&ment) != 0) {
+
+ /* address not currently in table */
+ ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
+ ment->d_addr = (void *)tgt;
+ ment->d_type = devtype;
+ ment->nextp = NULL;
+
+ if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
+ (mod_hash_val_t)ment) != 0) {
+ DERR(vswp, "%s: hash table insertion failed", __func__);
+ kmem_free(ment, sizeof (mfdb_ent_t));
+ rv = 1;
+ } else {
+ D2(vswp, "%s: added initial entry for 0x%llx to "
+ "table", __func__, addr);
+ }
+ } else {
+ /*
+ * Address in table. Check to see if specified port
+ * is already associated with the address. If not add
+ * it now.
+ */
+ tmp_ent = ment;
+ while (tmp_ent != NULL) {
+ if (tmp_ent->d_addr == (void *)tgt) {
+ if (devtype == VSW_VNETPORT) {
+ DERR(vswp, "%s: duplicate port entry "
+ "found for portid %ld and key "
+ "0x%llx", __func__,
+ ((vsw_port_t *)arg)->p_instance,
+ addr);
+ } else {
+ DERR(vswp, "%s: duplicate entry found"
+ "for key 0x%llx",
+ __func__, addr);
+ }
+ rv = 1;
+ dup = 1;
+ break;
+ }
+ tmp_ent = tmp_ent->nextp;
+ }
+
+ /*
+ * Port not on list so add it to end now.
+ */
+ if (0 == dup) {
+ D2(vswp, "%s: added entry for 0x%llx to table",
+ __func__, addr);
+ new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
+ new_ent->d_addr = (void *)tgt;
+ new_ent->d_type = devtype;
+ new_ent->nextp = NULL;
+
+ tmp_ent = ment;
+ while (tmp_ent->nextp != NULL)
+ tmp_ent = tmp_ent->nextp;
+
+ tmp_ent->nextp = new_ent;
+ }
+ }
+
+ RW_EXIT(&vswp->mfdbrw);
+ return (rv);
+}
+
+/*
+ * Remove a multicast entry from the hashtable.
+ *
+ * Search hash table based on address. If match found, scan
+ * list of ports associated with address. If specified port
+ * found remove it from list.
+ */
+static int
+vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
+{
+ mfdb_ent_t *ment = NULL;
+ mfdb_ent_t *curr_p, *prev_p;
+ void *tgt = NULL;
+
+ D1(vswp, "%s: enter", __func__);
+
+ if (devtype == VSW_VNETPORT) {
+ tgt = (vsw_port_t *)arg;
+ D2(vswp, "%s: removing port %d from mFDB for address"
+ " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance,
+ addr);
+ } else {
+ D2(vswp, "%s: removing entry", __func__);
+ tgt = (void *)vswp;
+ }
+
+ WRITE_ENTER(&vswp->mfdbrw);
+ if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
+ (mod_hash_val_t *)&ment) != 0) {
+ D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
+ RW_EXIT(&vswp->mfdbrw);
+ return (1);
+ }
+
+ prev_p = curr_p = ment;
+
+ while (curr_p != NULL) {
+ if (curr_p->d_addr == (void *)tgt) {
+ if (devtype == VSW_VNETPORT) {
+ D2(vswp, "%s: port %d found", __func__,
+ ((vsw_port_t *)tgt)->p_instance);
+ } else {
+ D2(vswp, "%s: instance found", __func__);
+ }
+
+ if (prev_p == curr_p) {
+ /*
+ * head of list, if no other element is in
+ * list then destroy this entry, otherwise
+ * just replace it with updated value.
+ */
+ ment = curr_p->nextp;
+ kmem_free(curr_p, sizeof (mfdb_ent_t));
+ if (ment == NULL) {
+ (void) mod_hash_destroy(vswp->mfdb,
+ (mod_hash_val_t)addr);
+ } else {
+ (void) mod_hash_replace(vswp->mfdb,
+ (mod_hash_key_t)addr,
+ (mod_hash_val_t)ment);
+ }
+ } else {
+ /*
+ * Not head of list, no need to do
+ * replacement, just adjust list pointers.
+ */
+ prev_p->nextp = curr_p->nextp;
+ kmem_free(curr_p, sizeof (mfdb_ent_t));
+ }
+ break;
+ }
+
+ prev_p = curr_p;
+ curr_p = curr_p->nextp;
+ }
+
+ RW_EXIT(&vswp->mfdbrw);
+
+ D1(vswp, "%s: exit", __func__);
+
+ return (0);
+}
+
+/*
+ * Port is being deleted, but has registered an interest in one
+ * or more multicast groups. Using the list of addresses maintained
+ * within the port structure find the appropriate entry in the hash
+ * table and remove this port from the list of interested ports.
+ */
+static void
+vsw_del_mcst_port(vsw_port_t *port)
+{
+ mcst_addr_t *mcst_p = NULL;
+ vsw_t *vswp = port->p_vswp;
+
+ D1(vswp, "%s: enter", __func__);
+
+ mutex_enter(&port->mca_lock);
+ while (port->mcap != NULL) {
+ (void) vsw_del_mcst(vswp, VSW_VNETPORT,
+ port->mcap->addr, port);
+
+ mcst_p = port->mcap->nextp;
+ kmem_free(port->mcap, sizeof (mcst_addr_t));
+ port->mcap = mcst_p;
+ }
+ mutex_exit(&port->mca_lock);
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+/*
+ * This vsw instance is detaching, but has registered an interest in one
+ * or more multicast groups. Using the list of addresses maintained
+ * within the vsw structure find the appropriate entry in the hash
+ * table and remove this instance from the list of interested ports.
+ */
+static void
+vsw_del_mcst_vsw(vsw_t *vswp)
+{
+ mcst_addr_t *next_p = NULL;
+
+ D1(vswp, "%s: enter", __func__);
+
+ mutex_enter(&vswp->mca_lock);
+
+ while (vswp->mcap != NULL) {
+ DERR(vswp, "%s: deleting addr 0x%llx",
+ __func__, vswp->mcap->addr);
+ (void) vsw_del_mcst(vswp, VSW_LOCALDEV,
+ vswp->mcap->addr, NULL);
+
+ next_p = vswp->mcap->nextp;
+ kmem_free(vswp->mcap, sizeof (mcst_addr_t));
+ vswp->mcap = next_p;
+ }
+
+ vswp->mcap = NULL;
+ mutex_exit(&vswp->mca_lock);
+
+ D1(vswp, "%s: exit", __func__);
+}
+
+
+/*
+ * Remove the specified address from the list of address maintained
+ * in this port node.
+ */
+static void
+vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr)
+{
+ vsw_t *vswp = NULL;
+ vsw_port_t *port = NULL;
+ mcst_addr_t *prev_p = NULL;
+ mcst_addr_t *curr_p = NULL;
+
+ D1(NULL, "%s: enter : devtype %d : addr 0x%llx",
+ __func__, devtype, addr);
+
+ if (devtype == VSW_VNETPORT) {
+ port = (vsw_port_t *)arg;
+ mutex_enter(&port->mca_lock);
+ prev_p = curr_p = port->mcap;
+ } else {
+ vswp = (vsw_t *)arg;
+ mutex_enter(&vswp->mca_lock);
+ prev_p = curr_p = vswp->mcap;
+ }
+
+ while (curr_p != NULL) {
+ if (curr_p->addr == addr) {
+ D2(NULL, "%s: address found", __func__);
+ /* match found */
+ if (prev_p == curr_p) {
+ /* list head */
+ if (devtype == VSW_VNETPORT)
+ port->mcap = curr_p->nextp;
+ else
+ vswp->mcap = curr_p->nextp;
+ } else {
+ prev_p->nextp = curr_p->nextp;
+ }
+ kmem_free(curr_p, sizeof (mcst_addr_t));
+ break;
+ } else {
+ prev_p = curr_p;
+ curr_p = curr_p->nextp;
+ }
+ }
+
+ if (devtype == VSW_VNETPORT)
+ mutex_exit(&port->mca_lock);
+ else
+ mutex_exit(&vswp->mca_lock);
+
+ D1(NULL, "%s: exit", __func__);
+}
+
+/*
+ * Creates a descriptor ring (dring) and links it into the
+ * link of outbound drings for this channel.
+ *
+ * Returns NULL if creation failed.
+ */
+static dring_info_t *
+vsw_create_dring(vsw_ldc_t *ldcp)
+{
+ vsw_private_desc_t *priv_addr = NULL;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ ldc_mem_info_t minfo;
+ dring_info_t *dp, *tp;
+ int i;
+
+ dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
+
+ mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
+
+ /* create public section of ring */
+ if ((ldc_mem_dring_create(VSW_RING_NUM_EL,
+ VSW_PUB_SIZE, &dp->handle)) != 0) {
+
+ DERR(vswp, "vsw_create_dring(%lld): ldc dring create "
+ "failed", ldcp->ldc_id);
+ goto create_fail_exit;
+ }
+
+ ASSERT(dp->handle != NULL);
+
+ /*
+ * Get the base address of the public section of the ring.
+ */
+ if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
+ DERR(vswp, "vsw_create_dring(%lld): dring info failed\n",
+ ldcp->ldc_id);
+ goto dring_fail_exit;
+ } else {
+ ASSERT(minfo.vaddr != 0);
+ dp->pub_addr = minfo.vaddr;
+ }
+
+ dp->num_descriptors = VSW_RING_NUM_EL;
+ dp->descriptor_size = VSW_PUB_SIZE;
+ dp->options = VIO_TX_DRING;
+ dp->ncookies = 1; /* guaranteed by ldc */
+
+ /*
+ * create private portion of ring
+ */
+ dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc(
+ (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP);
+
+ if (vsw_setup_ring(ldcp, dp)) {
+ DERR(vswp, "%s: unable to setup ring", __func__);
+ goto dring_fail_exit;
+ }
+
+ /* haven't used any descriptors yet */
+ dp->end_idx = 0;
+
+ /* bind dring to the channel */
+ if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle,
+ LDC_SHADOW_MAP, LDC_MEM_RW,
+ &dp->cookie[0], &dp->ncookies)) != 0) {
+ DERR(vswp, "vsw_create_dring: unable to bind to channel "
+ "%lld", ldcp->ldc_id);
+ goto dring_fail_exit;
+ }
+
+ /*
+ * Only ever create rings for outgoing lane. Link it onto
+ * end of list.
+ */
+ if (ldcp->lane_out.dringp == NULL) {
+ D2(vswp, "vsw_create_dring: adding first outbound ring");
+ ldcp->lane_out.dringp = dp;
+ } else {
+ tp = ldcp->lane_out.dringp;
+ while (tp->next != NULL)
+ tp = tp->next;
+
+ tp->next = dp;
+ }
+
+ return (dp);
+
+dring_fail_exit:
+ (void) ldc_mem_dring_destroy(dp->handle);
+
+create_fail_exit:
+ if (dp->priv_addr != NULL) {
+ priv_addr = dp->priv_addr;
+ for (i = 0; i < VSW_RING_NUM_EL; i++) {
+ if (priv_addr->memhandle != NULL)
+ (void) ldc_mem_free_handle(
+ priv_addr->memhandle);
+ priv_addr++;
+ }
+ kmem_free(dp->priv_addr,
+ (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
+ }
+ mutex_destroy(&dp->dlock);
+
+ kmem_free(dp, sizeof (dring_info_t));
+ return (NULL);
+}
+
+/*
+ * Create a ring consisting of just a private portion and link
+ * it into the list of rings for the outbound lane.
+ *
+ * These type of rings are used primarily for temporary data
+ * storage (i.e. as data buffers).
+ */
+void
+vsw_create_privring(vsw_ldc_t *ldcp)
+{
+ dring_info_t *dp, *tp;
+ vsw_t *vswp = ldcp->ldc_vswp;
+
+ D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
+
+ dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
+
+ mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
+
+ /* no public section */
+ dp->pub_addr = NULL;
+
+ dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) *
+ VSW_RING_NUM_EL), KM_SLEEP);
+
+ if (vsw_setup_ring(ldcp, dp)) {
+ DERR(vswp, "%s: setup of ring failed", __func__);
+ kmem_free(dp->priv_addr,
+ (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
+ mutex_destroy(&dp->dlock);
+ kmem_free(dp, sizeof (dring_info_t));
+ return;
+ }
+
+ /* haven't used any descriptors yet */
+ dp->end_idx = 0;
+
+ /*
+ * Only ever create rings for outgoing lane. Link it onto
+ * end of list.
+ */
+ if (ldcp->lane_out.dringp == NULL) {
+ D2(vswp, "%s: adding first outbound privring", __func__);
+ ldcp->lane_out.dringp = dp;
+ } else {
+ tp = ldcp->lane_out.dringp;
+ while (tp->next != NULL)
+ tp = tp->next;
+
+ tp->next = dp;
+ }
+
+ D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Setup the descriptors in the dring. Returns 0 on success, 1 on
+ * failure.
+ */
+int
+vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp)
+{
+ vnet_public_desc_t *pub_addr = NULL;
+ vsw_private_desc_t *priv_addr = NULL;
+ vsw_t *vswp = ldcp->ldc_vswp;
+ uint64_t *tmpp;
+ uint64_t offset = 0;
+ uint32_t ncookies = 0;
+ static char *name = "vsw_setup_ring";
+ int i, j, rv;
+
+ /* note - public section may be null */
+ priv_addr = dp->priv_addr;
+ pub_addr = dp->pub_addr;
+
+ /*
+ * Allocate the region of memory which will be used to hold
+ * the data the descriptors will refer to.
+ */
+ dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ);
+ dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP);
+
+ D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name,
+ dp->data_sz, dp->data_addr);
+
+ tmpp = (uint64_t *)dp->data_addr;
+ offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp);
+
+ /*
+ * Initialise some of the private and public (if they exist)
+ * descriptor fields.
+ */
+ for (i = 0; i < VSW_RING_NUM_EL; i++) {
+ if ((ldc_mem_alloc_handle(ldcp->ldc_handle,
+ &priv_addr->memhandle)) != 0) {
+ DERR(vswp, "%s: alloc mem handle failed", name);
+ goto setup_ring_cleanup;
+ }
+
+ priv_addr->datap = (void *)tmpp;
+
+ rv = ldc_mem_bind_handle(priv_addr->memhandle,
+ (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ,
+ LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W,
+ &(priv_addr->memcookie[0]), &ncookies);
+ if (rv != 0) {
+ DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed "
+ "(rv %d)", name, ldcp->ldc_id, rv);
+ goto setup_ring_cleanup;
+ }
+ priv_addr->bound = 1;
+
+ D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx",
+ name, i, priv_addr->memcookie[0].addr,
+ priv_addr->memcookie[0].size);
+
+ if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) {
+ DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned "
+ "invalid num of cookies (%d) for size 0x%llx",
+ name, ldcp->ldc_id, ncookies,
+ VSW_RING_EL_DATA_SZ);
+
+ goto setup_ring_cleanup;
+ } else {
+ for (j = 1; j < ncookies; j++) {
+ rv = ldc_mem_nextcookie(priv_addr->memhandle,
+ &(priv_addr->memcookie[j]));
+ if (rv != 0) {
+ DERR(vswp, "%s: ldc_mem_nextcookie "
+ "failed rv (%d)", name, rv);
+ goto setup_ring_cleanup;
+ }
+ D3(vswp, "%s: memcookie %d : addr 0x%llx : "
+ "size 0x%llx", name, j,
+ priv_addr->memcookie[j].addr,
+ priv_addr->memcookie[j].size);
+ }
+
+ }
+ priv_addr->ncookies = ncookies;
+ priv_addr->dstate = VIO_DESC_FREE;
+
+ if (pub_addr != NULL) {
+
+ /* link pub and private sides */
+ priv_addr->descp = pub_addr;
+
+ pub_addr->hdr.dstate = VIO_DESC_FREE;
+ pub_addr++;
+ }
+
+ /*
+ * move to next element in the dring and the next
+ * position in the data buffer.
+ */
+ priv_addr++;
+ tmpp += offset;
+ }
+
+ return (0);
+
+setup_ring_cleanup:
+ priv_addr = dp->priv_addr;
+
+ for (i = 0; i < VSW_RING_NUM_EL; i++) {
+ (void) ldc_mem_unbind_handle(priv_addr->memhandle);
+ (void) ldc_mem_free_handle(priv_addr->memhandle);
+
+ priv_addr++;
+ }
+ kmem_free(dp->data_addr, dp->data_sz);
+
+ return (1);
+}
+
+/*
+ * Searches the private section of a ring for a free descriptor,
+ * starting at the location of the last free descriptor found
+ * previously.
+ *
+ * Returns 0 if free descriptor is available, 1 otherwise.
+ *
+ * FUTURE: might need to return contiguous range of descriptors
+ * as dring info msg assumes all will be contiguous.
+ */
+static int
+vsw_dring_find_free_desc(dring_info_t *dringp,
+ vsw_private_desc_t **priv_p, int *idx)
+{
+ vsw_private_desc_t *addr;
+ uint64_t i;
+ uint64_t j = 0;
+ uint64_t start = dringp->end_idx;
+ int num = VSW_RING_NUM_EL;
+ int ret = 1;
+
+ D1(NULL, "%s enter\n", __func__);
+
+ addr = dringp->priv_addr;
+
+ D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld",
+ __func__, dringp, start);
+
+ for (i = start; j < num; i = (i + 1) % num, j++) {
+ addr = (vsw_private_desc_t *)dringp->priv_addr + i;
+ D2(NULL, "%s: descriptor %lld : dstate 0x%llx\n",
+ __func__, i, addr->dstate);
+ if (addr->dstate == VIO_DESC_FREE) {
+ D2(NULL, "%s: descriptor %lld is available",
+ __func__, i);
+ *priv_p = addr;
+ *idx = i;
+ dringp->end_idx = (i + 1) % num;
+ ret = 0;
+ break;
+ }
+ }
+
+ /* ring full */
+ if (ret == 1) {
+ D2(NULL, "%s: no desp free: started at %d", __func__, start);
+ }
+
+ D1(NULL, "%s: exit\n", __func__);
+
+ return (ret);
+}
+
+/*
+ * Copy relevant fields from the private descriptor into the
+ * associated public side.
+ */
+static void
+vsw_dring_priv2pub(vsw_private_desc_t *priv)
+{
+ vnet_public_desc_t *pub;
+ int i;
+
+ D1(NULL, "vsw_dring_priv2pub enter\n");
+
+ pub = priv->descp;
+
+ pub->ncookies = priv->ncookies;
+ pub->nbytes = priv->datalen;
+
+ for (i = 0; i < pub->ncookies; i++) {
+ bcopy(&priv->memcookie[i], &pub->memcookie[i],
+ sizeof (ldc_mem_cookie_t));
+ }
+
+ pub->hdr.ack = 1;
+ pub->hdr.dstate = VIO_DESC_READY;
+
+ D1(NULL, "vsw_dring_priv2pub exit");
+}
+
+/*
+ * Map from a dring identifier to the ring itself. Returns
+ * pointer to ring or NULL if no match found.
+ */
+static dring_info_t *
+vsw_ident2dring(lane_t *lane, uint64_t ident)
+{
+ dring_info_t *dp = NULL;
+
+ if ((dp = lane->dringp) == NULL) {
+ return (NULL);
+ } else {
+ if (dp->ident == ident)
+ return (dp);
+
+ while (dp != NULL) {
+ if (dp->ident == ident)
+ break;
+ dp = dp->next;
+ }
+ }
+
+ return (dp);
+}
+
+/*
+ * Set the default lane attributes. These are copied into
+ * the attr msg we send to our peer. If they are not acceptable
+ * then (currently) the handshake ends.
+ */
+static void
+vsw_set_lane_attr(vsw_t *vswp, lane_t *lp)
+{
+ bzero(lp, sizeof (lane_t));
+
+ READ_ENTER(&vswp->if_lockrw);
+ ether_copy(&(vswp->if_addr), &(lp->addr));
+ RW_EXIT(&vswp->if_lockrw);
+
+ lp->mtu = VSW_MTU;
+ lp->addr_type = ADDR_TYPE_MAC;
+ lp->xfer_mode = VIO_DRING_MODE;
+ lp->ack_freq = 0; /* for shared mode */
+ lp->seq_num = VNET_ISS;
+}
+
+/*
+ * Verify that the attributes are acceptable.
+ *
+ * FUTURE: If some attributes are not acceptable, change them
+ * our desired values.
+ */
+static int
+vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port)
+{
+ int ret = 0;
+
+ D1(NULL, "vsw_check_attr enter\n");
+
+ /*
+ * Note we currently only support in-band descriptors
+ * and descriptor rings, not packet based transfer (VIO_PKT_MODE)
+ */
+ if ((pkt->xfer_mode != VIO_DESC_MODE) &&
+ (pkt->xfer_mode != VIO_DRING_MODE)) {
+ D2(NULL, "vsw_check_attr: unknown mode %x\n",
+ pkt->xfer_mode);
+ ret = 1;
+ }
+
+ /* Only support MAC addresses at moment. */
+ if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) {
+ D2(NULL, "vsw_check_attr: invalid addr_type %x, "
+ "or address 0x%llx\n", pkt->addr_type,
+ pkt->addr);
+ ret = 1;
+ }
+
+ /*
+ * MAC address supplied by device should match that stored
+ * in the vsw-port OBP node. Need to decide what to do if they
+ * don't match, for the moment just warn but don't fail.
+ */
+ if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) {
+ DERR(NULL, "vsw_check_attr: device supplied address "
+ "0x%llx doesn't match node address 0x%llx\n",
+ pkt->addr, port->p_macaddr);
+ }
+
+ /*
+ * Ack freq only makes sense in pkt mode, in shared
+ * mode the ring descriptors say whether or not to
+ * send back an ACK.
+ */
+ if ((pkt->xfer_mode == VIO_DRING_MODE) &&
+ (pkt->ack_freq > 0)) {
+ D2(NULL, "vsw_check_attr: non zero ack freq "
+ " in SHM mode\n");
+ ret = 1;
+ }
+
+ /*
+ * Note: for the moment we only support ETHER
+ * frames. This may change in the future.
+ */
+ if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) {
+ D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n",
+ pkt->mtu);
+ ret = 1;
+ }
+
+ D1(NULL, "vsw_check_attr exit\n");
+
+ return (ret);
+}
+
+/*
+ * Returns 1 if there is a problem, 0 otherwise.
+ */
+static int
+vsw_check_dring_info(vio_dring_reg_msg_t *pkt)
+{
+ _NOTE(ARGUNUSED(pkt))
+
+ int ret = 0;
+
+ D1(NULL, "vsw_check_dring_info enter\n");
+
+ if ((pkt->num_descriptors == 0) ||
+ (pkt->descriptor_size == 0) ||
+ (pkt->ncookies != 1)) {
+ DERR(NULL, "vsw_check_dring_info: invalid dring msg");
+ ret = 1;
+ }
+
+ D1(NULL, "vsw_check_dring_info exit\n");
+
+ return (ret);
+}
+
+/*
+ * Returns 1 if two memory cookies match. Otherwise returns 0.
+ */
+static int
+vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2)
+{
+ if ((m1->addr != m2->addr) ||
+ (m2->size != m2->size)) {
+ return (0);
+ } else {
+ return (1);
+ }
+}
+
+/*
+ * Returns 1 if ring described in reg message matches that
+ * described by dring_info structure. Otherwise returns 0.
+ */
+static int
+vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg)
+{
+ if ((msg->descriptor_size != dp->descriptor_size) ||
+ (msg->num_descriptors != dp->num_descriptors) ||
+ (msg->ncookies != dp->ncookies) ||
+ !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) {
+ return (0);
+ } else {
+ return (1);
+ }
+
+}
+
+static caddr_t
+vsw_print_ethaddr(uint8_t *a, char *ebuf)
+{
+ (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+ return (ebuf);
+}
+
+/*
+ * Reset and free all the resources associated with
+ * the channel.
+ */
+static void
+vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir)
+{
+ dring_info_t *dp, *dpp;
+ lane_t *lp = NULL;
+ int rv = 0;
+
+ ASSERT(ldcp != NULL);
+
+ D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id);
+
+ if (dir == INBOUND) {
+ D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane"
+ " of channel %lld", __func__, ldcp->ldc_id);
+ lp = &ldcp->lane_in;
+ } else {
+ D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane"
+ " of channel %lld", __func__, ldcp->ldc_id);
+ lp = &ldcp->lane_out;
+ }
+
+ lp->lstate = VSW_LANE_INACTIV;
+ lp->seq_num = VNET_ISS;
+ if (lp->dringp) {
+ if (dir == INBOUND) {
+ dp = lp->dringp;
+ while (dp != NULL) {
+ dpp = dp->next;
+ if (dp->handle != NULL)
+ (void) ldc_mem_dring_unmap(dp->handle);
+ kmem_free(dp, sizeof (dring_info_t));
+ dp = dpp;
+ }
+ } else {
+ /*
+ * unbind, destroy exported dring, free dring struct
+ */
+ dp = lp->dringp;
+ rv = vsw_free_ring(dp);
+ }
+ if (rv == 0) {
+ lp->dringp = NULL;
+ }
+ }
+
+ D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id);
+}
+
+/*
+ * Free ring and all associated resources.
+ */
+static int
+vsw_free_ring(dring_info_t *dp)
+{
+ vsw_private_desc_t *paddr = NULL;
+ dring_info_t *dpp;
+ int i, rv = 1;
+
+ while (dp != NULL) {
+ mutex_enter(&dp->dlock);
+ dpp = dp->next;
+ if (dp->priv_addr != NULL) {
+ /*
+ * First unbind and free the memory handles
+ * stored in each descriptor within the ring.
+ */
+ for (i = 0; i < VSW_RING_NUM_EL; i++) {
+ paddr = (vsw_private_desc_t *)
+ dp->priv_addr + i;
+ if (paddr->memhandle != NULL) {
+ if (paddr->bound == 1) {
+ rv = ldc_mem_unbind_handle(
+ paddr->memhandle);
+
+ if (rv != 0) {
+ DERR(NULL, "error "
+ "unbinding handle for "
+ "ring 0x%llx at pos %d",
+ dp, i);
+ mutex_exit(&dp->dlock);
+ return (rv);
+ }
+ paddr->bound = 0;
+ }
+
+ rv = ldc_mem_free_handle(
+ paddr->memhandle);
+ if (rv != 0) {
+ DERR(NULL, "error freeing "
+ "handle for ring "
+ "0x%llx at pos %d",
+ dp, i);
+ mutex_exit(&dp->dlock);
+ return (rv);
+ }
+ paddr->memhandle = NULL;
+ }
+ }
+ kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t)
+ * VSW_RING_NUM_EL));
+ }
+
+ /*
+ * Now unbind and destroy the ring itself.
+ */
+ if (dp->handle != NULL) {
+ (void) ldc_mem_dring_unbind(dp->handle);
+ (void) ldc_mem_dring_destroy(dp->handle);
+ }
+
+ if (dp->data_addr != NULL) {
+ kmem_free(dp->data_addr, dp->data_sz);
+ }
+
+ mutex_exit(&dp->dlock);
+ mutex_destroy(&dp->dlock);
+ kmem_free(dp, sizeof (dring_info_t));
+
+ dp = dpp;
+ }
+ return (0);
+}
+
+/*
+ * Debugging routines
+ */
+static void
+display_state(void)
+{
+ vsw_t *vswp;
+ vsw_port_list_t *plist;
+ vsw_port_t *port;
+ vsw_ldc_list_t *ldcl;
+ vsw_ldc_t *ldcp;
+
+ cmn_err(CE_NOTE, "***** system state *****");
+
+ for (vswp = vsw_head; vswp; vswp = vswp->next) {
+ plist = &vswp->plist;
+ READ_ENTER(&plist->lockrw);
+ cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n",
+ vswp->instance, plist->num_ports);
+
+ for (port = plist->head; port != NULL; port = port->p_next) {
+ ldcl = &port->p_ldclist;
+ cmn_err(CE_CONT, "port %d : %d ldcs attached\n",
+ port->p_instance, ldcl->num_ldcs);
+ READ_ENTER(&ldcl->lockrw);
+ ldcp = ldcl->head;
+ for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
+ cmn_err(CE_CONT, "chan %lu : dev %d : "
+ "status %d : phase %u\n",
+ ldcp->ldc_id, ldcp->dev_class,
+ ldcp->ldc_status, ldcp->hphase);
+ cmn_err(CE_CONT, "chan %lu : lsession %lu : "
+ "psession %lu\n",
+ ldcp->ldc_id,
+ ldcp->local_session,
+ ldcp->peer_session);
+
+ cmn_err(CE_CONT, "Inbound lane:\n");
+ display_lane(&ldcp->lane_in);
+ cmn_err(CE_CONT, "Outbound lane:\n");
+ display_lane(&ldcp->lane_out);
+ }
+ RW_EXIT(&ldcl->lockrw);
+ }
+ RW_EXIT(&plist->lockrw);
+ }
+ cmn_err(CE_NOTE, "***** system state *****");
+}
+
+static void
+display_lane(lane_t *lp)
+{
+ dring_info_t *drp;
+
+ cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n",
+ lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu);
+ cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n",
+ lp->addr_type, lp->addr, lp->xfer_mode);
+ cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp);
+
+ cmn_err(CE_CONT, "Dring info:\n");
+ for (drp = lp->dringp; drp != NULL; drp = drp->next) {
+ cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n",
+ drp->num_descriptors, drp->descriptor_size);
+ cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle);
+ cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n",
+ (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr);
+ cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n",
+ drp->ident, drp->end_idx);
+ display_ring(drp);
+ }
+}
+
+static void
+display_ring(dring_info_t *dringp)
+{
+ uint64_t i;
+ uint64_t priv_count = 0;
+ uint64_t pub_count = 0;
+ vnet_public_desc_t *pub_addr = NULL;
+ vsw_private_desc_t *priv_addr = NULL;
+
+ for (i = 0; i < VSW_RING_NUM_EL; i++) {
+ if (dringp->pub_addr != NULL) {
+ pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i;
+
+ if (pub_addr->hdr.dstate == VIO_DESC_FREE)
+ pub_count++;
+ }
+
+ if (dringp->priv_addr != NULL) {
+ priv_addr =
+ (vsw_private_desc_t *)dringp->priv_addr + i;
+
+ if (priv_addr->dstate == VIO_DESC_FREE)
+ priv_count++;
+ }
+ }
+ cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n",
+ i, priv_count, pub_count);
+}
+
+static void
+dump_flags(uint64_t state)
+{
+ int i;
+
+ typedef struct flag_name {
+ int flag_val;
+ char *flag_name;
+ } flag_name_t;
+
+ flag_name_t flags[] = {
+ VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT",
+ VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV",
+ VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV",
+ VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT",
+ VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV",
+ VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT",
+ VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT",
+ VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV",
+ VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT",
+ VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV",
+ VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT",
+ VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV",
+ VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT",
+ VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV",
+ VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT",
+ VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV",
+ VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT",
+ VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV",
+ VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT",
+ VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV",
+ VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT",
+ VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV",
+ VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT",
+ VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV",
+ VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT",
+ VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV",
+ VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT",
+ VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV",
+ VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT",
+ VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV",
+ VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"};
+
+ DERR(NULL, "DUMP_FLAGS: %llx\n", state);
+ for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) {
+ if (state & flags[i].flag_val)
+ DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name);
+ }
+}
diff --git a/usr/src/uts/sun4v/ldc/Makefile b/usr/src/uts/sun4v/ldc/Makefile
new file mode 100644
index 0000000000..ef3961d65c
--- /dev/null
+++ b/usr/src/uts/sun4v/ldc/Makefile
@@ -0,0 +1,96 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# uts/sun4v/ldc/Makefile
+#
+# This makefile drives the production of the LDC transport kernel module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ldc
+OBJECTS = $(LDC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LDC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += -v
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/ml/hcall.s b/usr/src/uts/sun4v/ml/hcall.s
index d445127644..360626f2eb 100644
--- a/usr/src/uts/sun4v/ml/hcall.s
+++ b/usr/src/uts/sun4v/ml/hcall.s
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -37,6 +38,30 @@
#if defined(lint) || defined(__lint)
/*ARGSUSED*/
+uint64_t
+hv_mach_exit(uint64_t exit_code)
+{ return (0); }
+
+uint64_t
+hv_mach_sir(void)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_cpu_start(uint64_t cpuid, uint64_t pc, uint64_t rtba, uint64_t arg)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_cpu_stop(uint64_t cpuid)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_cpu_set_rtba(uint64_t *rtba)
+{ return (0); }
+
+/*ARGSUSED*/
int64_t
hv_cnputchar(uint8_t ch)
{ return (0); }
@@ -61,6 +86,11 @@ uint64_t
hv_mmu_map_perm_addr(void *vaddr, int ctx, uint64_t tte, int flags)
{ return (0); }
+/*ARGSUSED */
+uint64_t
+hv_mmu_fault_area_conf(void *raddr)
+{ return (0); }
+
/*ARGSUSED*/
uint64_t
hv_mmu_unmap_perm_addr(void *vaddr, int ctx, int flags)
@@ -171,7 +201,7 @@ hv_ttrace_freeze(uint64_t freeze, uint64_t *prev_freeze)
uint64_t
hv_mach_desc(uint64_t buffer_ra, uint64_t *buffer_sizep)
{ return (0); }
-
+
/*ARGSUSED*/
uint64_t
hv_ra2pa(uint64_t ra)
@@ -182,31 +212,190 @@ uint64_t
hv_hpriv(void *func, uint64_t arg1, uint64_t arg2, uint64_t arg3)
{ return (0); }
+/*ARGSUSED*/
+uint64_t
+hv_ldc_tx_qconf(uint64_t channel, uint64_t ra_base, uint64_t nentries)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_tx_qinfo(uint64_t channel, uint64_t *ra_base, uint64_t *nentries)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_tx_get_state(uint64_t channel,
+ uint64_t *headp, uint64_t *tailp, uint64_t *state)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_tx_set_qtail(uint64_t channel, uint64_t tail)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_rx_qconf(uint64_t channel, uint64_t ra_base, uint64_t nentries)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_rx_qinfo(uint64_t channel, uint64_t *ra_base, uint64_t *nentries)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_rx_get_state(uint64_t channel,
+ uint64_t *headp, uint64_t *tailp, uint64_t *state)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_rx_set_qhead(uint64_t channel, uint64_t head)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_send_msg(uint64_t channel, uint64_t msg_ra)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_set_map_table(uint64_t channel, uint64_t tbl_ra, uint64_t tbl_entries)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_ldc_copy(uint64_t channel, uint64_t request, uint64_t cookie,
+ uint64_t raddr, uint64_t length, uint64_t *lengthp)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_getcookie(uint64_t dev_hdl, uint32_t devino, uint64_t *cookie)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_setcookie(uint64_t dev_hdl, uint32_t devino, uint64_t cookie)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_getvalid(uint64_t dev_hdl, uint32_t devino, int *intr_valid_state)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_setvalid(uint64_t dev_hdl, uint32_t devino, int intr_valid_state)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_getstate(uint64_t dev_hdl, uint32_t devino, int *intr_state)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_setstate(uint64_t dev_hdl, uint32_t devino, int intr_state)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_gettarget(uint64_t dev_hdl, uint32_t devino, uint32_t *cpuid)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hvldc_intr_settarget(uint64_t dev_hdl, uint32_t devino, uint32_t cpuid)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_api_get_version(uint64_t api_group, uint64_t *majorp, uint64_t *minorp)
+{ return (0); }
+
+/*ARGSUSED*/
+uint64_t
+hv_api_set_version(uint64_t api_group, uint64_t major, uint64_t minor,
+ uint64_t *supported_minor)
+{ return (0); }
+
#else /* lint || __lint */
/*
- * %o0 - character
+ * int hv_mach_exit(uint64_t exit_code)
+ */
+ ENTRY(hv_mach_exit)
+ mov HV_MACH_EXIT, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_mach_exit)
+
+ /*
+ * uint64_t hv_mach_sir(void)
+ */
+ ENTRY(hv_mach_sir)
+ mov HV_MACH_SIR, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_mach_sir)
+
+ /*
+ * hv_cpu_start(uint64_t cpuid, uint64_t pc, ui64_t rtba,
+ * uint64_t arg)
+ */
+ ENTRY(hv_cpu_start)
+ mov HV_CPU_START, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_cpu_start)
+
+ /*
+ * hv_cpu_stop(uint64_t cpuid)
+ */
+ ENTRY(hv_cpu_stop)
+ mov HV_CPU_STOP, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_cpu_stop)
+
+ /*
+ * hv_cpu_set_rtba(uint64_t *rtba)
+ */
+ ENTRY(hv_cpu_set_rtba)
+ mov %o0, %o2
+ ldx [%o2], %o0
+ mov HV_CPU_SET_RTBA, %o5
+ ta FAST_TRAP
+ stx %o1, [%o2]
+ retl
+ nop
+ SET_SIZE(hv_cpu_set_rtba)
+
+ /*
+ * int64_t hv_cnputchar(uint8_t ch)
*/
ENTRY(hv_cnputchar)
- mov CONS_WRITE, %o5
+ mov CONS_PUTCHAR, %o5
ta FAST_TRAP
- tst %o0
retl
- movnz %xcc, -1, %o0
+ nop
SET_SIZE(hv_cnputchar)
/*
- * %o0 pointer to character buffer
- * return values:
- * 0 success
- * hv_errno failure
+ * int64_t hv_cngetchar(uint8_t *ch)
*/
ENTRY(hv_cngetchar)
mov %o0, %o2
- mov CONS_READ, %o5
+ mov CONS_GETCHAR, %o5
ta FAST_TRAP
brnz,a %o0, 1f ! failure, just return error
- mov 1, %o0
+ nop
cmp %o1, H_BREAK
be 1f
@@ -220,7 +409,7 @@ hv_hpriv(void *func, uint64_t arg1, uint64_t arg2, uint64_t arg3)
mov 0, %o0
1:
retl
- nop
+ nop
SET_SIZE(hv_cngetchar)
ENTRY(hv_tod_get)
@@ -253,6 +442,19 @@ hv_hpriv(void *func, uint64_t arg1, uint64_t arg2, uint64_t arg3)
SET_SIZE(hv_mmu_map_perm_addr)
/*
+ * hv_mmu_fault_area_conf(void *raddr)
+ */
+ ENTRY(hv_mmu_fault_area_conf)
+ mov %o0, %o2
+ ldx [%o2], %o0
+ mov MMU_SET_INFOPTR, %o5
+ ta FAST_TRAP
+ stx %o1, [%o2]
+ retl
+ nop
+ SET_SIZE(hv_mmu_fault_area_conf)
+
+ /*
* Unmap permanent address
* arg0 vaddr (%o0)
* arg1 context (%o1)
@@ -308,7 +510,7 @@ hv_hpriv(void *func, uint64_t arg1, uint64_t arg2, uint64_t arg3)
* arg2 Size (%o2)
*/
ENTRY(hv_cpu_qconf)
- mov CPU_QCONF, %o5
+ mov HV_CPU_QCONF, %o5
ta FAST_TRAP
retl
nop
@@ -537,7 +739,7 @@ hv_hpriv(void *func, uint64_t arg1, uint64_t arg2, uint64_t arg3)
* arg0 enable/ freeze (%o0)
* ret0 status (%o0)
* ret1 previous freeze state (%o1)
- */
+ */
ENTRY(hv_ttrace_freeze)
mov %o1, %o2
mov TTRACE_FREEZE, %o5
@@ -597,4 +799,320 @@ hv_hpriv(void *func, uint64_t arg1, uint64_t arg2, uint64_t arg3)
nop
SET_SIZE(hv_hpriv)
+ /*
+ * hv_ldc_tx_qconf(uint64_t channel, uint64_t ra_base,
+ * uint64_t nentries);
+ */
+ ENTRY(hv_ldc_tx_qconf)
+ mov LDC_TX_QCONF, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_ldc_tx_qconf)
+
+
+ /*
+ * hv_ldc_tx_qinfo(uint64_t channel, uint64_t *ra_base,
+ * uint64_t *nentries);
+ */
+ ENTRY(hv_ldc_tx_qinfo)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov LDC_TX_QINFO, %o5
+ ta FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ stx %o2, [%g2]
+ SET_SIZE(hv_ldc_tx_qinfo)
+
+
+ /*
+ * hv_ldc_tx_get_state(uint64_t channel,
+ * uint64_t *headp, uint64_t *tailp, uint64_t *state);
+ */
+ ENTRY(hv_ldc_tx_get_state)
+ mov LDC_TX_GET_STATE, %o5
+ mov %o1, %g1
+ mov %o2, %g2
+ mov %o3, %g3
+ ta FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ stx %o3, [%g3]
+ SET_SIZE(hv_ldc_tx_get_state)
+
+
+ /*
+ * hv_ldc_tx_set_qtail(uint64_t channel, uint64_t tail)
+ */
+ ENTRY(hv_ldc_tx_set_qtail)
+ mov LDC_TX_SET_QTAIL, %o5
+ ta FAST_TRAP
+ retl
+ SET_SIZE(hv_ldc_tx_set_qtail)
+
+
+ /*
+ * hv_ldc_rx_qconf(uint64_t channel, uint64_t ra_base,
+ * uint64_t nentries);
+ */
+ ENTRY(hv_ldc_rx_qconf)
+ mov LDC_RX_QCONF, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_ldc_rx_qconf)
+
+
+ /*
+ * hv_ldc_rx_qinfo(uint64_t channel, uint64_t *ra_base,
+ * uint64_t *nentries);
+ */
+ ENTRY(hv_ldc_rx_qinfo)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov LDC_RX_QINFO, %o5
+ ta FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ stx %o2, [%g2]
+ SET_SIZE(hv_ldc_rx_qinfo)
+
+
+ /*
+ * hv_ldc_rx_get_state(uint64_t channel,
+ * uint64_t *headp, uint64_t *tailp, uint64_t *state);
+ */
+ ENTRY(hv_ldc_rx_get_state)
+ mov LDC_RX_GET_STATE, %o5
+ mov %o1, %g1
+ mov %o2, %g2
+ mov %o3, %g3
+ ta FAST_TRAP
+ stx %o1, [%g1]
+ stx %o2, [%g2]
+ retl
+ stx %o3, [%g3]
+ SET_SIZE(hv_ldc_rx_get_state)
+
+
+ /*
+ * hv_ldc_rx_set_qhead(uint64_t channel, uint64_t head)
+ */
+ ENTRY(hv_ldc_rx_set_qhead)
+ mov LDC_RX_SET_QHEAD, %o5
+ ta FAST_TRAP
+ retl
+ SET_SIZE(hv_ldc_rx_set_qhead)
+
+ /*
+ * hv_ldc_set_map_table(uint64_t channel, uint64_t tbl_ra,
+ * uint64_t tbl_entries)
+ */
+ ENTRY(hv_ldc_set_map_table)
+ mov LDC_SET_MAP_TABLE, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_ldc_set_map_table)
+
+
+ /*
+ * hv_ldc_get_map_table(uint64_t channel, uint64_t *tbl_ra,
+ * uint64_t *tbl_entries)
+ */
+ ENTRY(hv_ldc_get_map_table)
+ mov %o1, %g1
+ mov %o2, %g2
+ mov LDC_GET_MAP_TABLE, %o5
+ ta FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ stx %o2, [%g2]
+ SET_SIZE(hv_ldc_get_map_table)
+
+
+ /*
+ * hv_ldc_copy(uint64_t channel, uint64_t request, uint64_t cookie,
+ * uint64_t raddr, uint64_t length, uint64_t *lengthp);
+ */
+ ENTRY(hv_ldc_copy)
+ mov %o5, %g1
+ mov LDC_COPY, %o5
+ ta FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ SET_SIZE(hv_ldc_copy)
+
+
+ /*
+ * hv_ldc_mapin(uint64_t channel, uint64_t cookie, uint64_t *raddr,
+ * uint64_t *perm)
+ */
+ ENTRY(hv_ldc_mapin)
+ mov %o2, %g1
+ mov %o3, %g2
+ mov LDC_MAPIN, %o5
+ ta FAST_TRAP
+ stx %o1, [%g1]
+ retl
+ stx %o2, [%g2]
+ SET_SIZE(hv_ldc_mapin)
+
+
+ /*
+ * hv_ldc_unmap(uint64_t raddr)
+ */
+ ENTRY(hv_ldc_unmap)
+ mov LDC_UNMAP, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_ldc_unmap)
+
+
+ /*
+ * hv_ldc_revoke(uint64_t raddr)
+ */
+ ENTRY(hv_ldc_revoke)
+ mov LDC_REVOKE, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hv_ldc_revoke)
+
+
+ /*
+ * hvldc_intr_getcookie(uint64_t dev_hdl, uint32_t devino,
+ * uint64_t *cookie);
+ */
+ ENTRY(hvldc_intr_getcookie)
+ mov %o2, %g1
+ mov VINTR_GET_COOKIE, %o5
+ ta FAST_TRAP
+ retl
+ stx %o1, [%g1]
+ SET_SIZE(hvldc_intr_getcookie)
+
+ /*
+ * hvldc_intr_setcookie(uint64_t dev_hdl, uint32_t devino,
+ * uint64_t cookie);
+ */
+ ENTRY(hvldc_intr_setcookie)
+ mov VINTR_SET_COOKIE, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hvldc_intr_setcookie)
+
+
+ /*
+ * hvldc_intr_getvalid(uint64_t dev_hdl, uint32_t devino,
+ * int *intr_valid_state);
+ */
+ ENTRY(hvldc_intr_getvalid)
+ mov %o2, %g1
+ mov VINTR_GET_VALID, %o5
+ ta FAST_TRAP
+ retl
+ stuw %o1, [%g1]
+ SET_SIZE(hvldc_intr_getvalid)
+
+ /*
+ * hvldc_intr_setvalid(uint64_t dev_hdl, uint32_t devino,
+ * int intr_valid_state);
+ */
+ ENTRY(hvldc_intr_setvalid)
+ mov VINTR_SET_VALID, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hvldc_intr_setvalid)
+
+ /*
+ * hvldc_intr_getstate(uint64_t dev_hdl, uint32_t devino,
+ * int *intr_state);
+ */
+ ENTRY(hvldc_intr_getstate)
+ mov %o2, %g1
+ mov VINTR_GET_STATE, %o5
+ ta FAST_TRAP
+ retl
+ stuw %o1, [%g1]
+ SET_SIZE(hvldc_intr_getstate)
+
+ /*
+ * hvldc_intr_setstate(uint64_t dev_hdl, uint32_t devino,
+ * int intr_state);
+ */
+ ENTRY(hvldc_intr_setstate)
+ mov VINTR_SET_STATE, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hvldc_intr_setstate)
+
+ /*
+ * hvldc_intr_gettarget(uint64_t dev_hdl, uint32_t devino,
+ * uint32_t *cpuid);
+ */
+ ENTRY(hvldc_intr_gettarget)
+ mov %o2, %g1
+ mov VINTR_GET_TARGET, %o5
+ ta FAST_TRAP
+ retl
+ stuw %o1, [%g1]
+ SET_SIZE(hvldc_intr_gettarget)
+
+ /*
+ * hvldc_intr_settarget(uint64_t dev_hdl, uint32_t devino,
+ * uint32_t cpuid);
+ */
+ ENTRY(hvldc_intr_settarget)
+ mov VINTR_SET_TARGET, %o5
+ ta FAST_TRAP
+ retl
+ nop
+ SET_SIZE(hvldc_intr_settarget)
+
+ /*
+ * hv_api_get_version(uint64_t api_group, uint64_t *majorp,
+ * uint64_t *minorp)
+ *
+ * API_GET_VERSION
+ * arg0 API group
+ * ret0 status
+ * ret1 major number
+ * ret2 minor number
+ */
+ ENTRY(hv_api_get_version)
+ mov %o1, %o3
+ mov %o2, %o4
+ mov API_GET_VERSION, %o5
+ ta CORE_TRAP
+ stx %o1, [%o3]
+ retl
+ stx %o2, [%o4]
+ SET_SIZE(hv_api_get_version)
+
+ /*
+ * hv_api_set_version(uint64_t api_group, uint64_t major,
+ * uint64_t minor, uint64_t *supported_minor)
+ *
+ * API_SET_VERSION
+ * arg0 API group
+ * arg1 major number
+ * arg2 requested minor number
+ * ret0 status
+ * ret1 actual minor number
+ */
+ ENTRY(hv_api_set_version)
+ mov %o3, %o4
+ mov API_SET_VERSION, %o5
+ ta CORE_TRAP
+ retl
+ stx %o1, [%o4]
+ SET_SIZE(hv_api_set_version)
+
#endif /* lint || __lint */
diff --git a/usr/src/uts/sun4v/ml/mach_offsets.in b/usr/src/uts/sun4v/ml/mach_offsets.in
index 8f032ed908..1770b73bb2 100644
--- a/usr/src/uts/sun4v/ml/mach_offsets.in
+++ b/usr/src/uts/sun4v/ml/mach_offsets.in
@@ -2,9 +2,8 @@
\ CDDL HEADER START
\
\ The contents of this file are subject to the terms of the
-\ Common Development and Distribution License, Version 1.0 only
-\ (the "License"). You may not use this file except in compliance
-\ with the License.
+\ Common Development and Distribution License (the "License").
+\ You may not use this file except in compliance with the License.
\
\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
\ or http://www.opensolaris.org/os/licensing.
@@ -19,7 +18,7 @@
\
\ CDDL HEADER END
\
-\ Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+\ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
\ Use is subject to license terms.
\
\ offsets.in: input file to produce assym.h using the stabs program
@@ -79,6 +78,7 @@
#include <vm/hat_sfmmu.h>
#include <sys/traptrace.h>
+#include <sys/lpad.h>
machcpu
intrstat MCPU_INTRSTAT
@@ -280,3 +280,16 @@ ptl1_gregs
ptl1_g6
ptl1_g7
+lpad_data
+ magic LPAD_MAGIC
+ inuse LPAD_INUSE
+ mmfsa_ra LPAD_MMFSA_RA
+ pc LPAD_PC
+ arg LPAD_ARG
+ nmap LPAD_NMAP
+ map LPAD_MAP
+
+lpad_map LPAD_MAP_SIZE
+ flags LPAD_MAP_FLAGS
+ va LPAD_MAP_VA
+ tte LPAD_MAP_TTE
diff --git a/usr/src/uts/sun4v/ml/mach_proc_init.s b/usr/src/uts/sun4v/ml/mach_proc_init.s
new file mode 100644
index 0000000000..20d4d3c3cc
--- /dev/null
+++ b/usr/src/uts/sun4v/ml/mach_proc_init.s
@@ -0,0 +1,211 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v processor initialization
+ *
+ * This is the kernel entry point for CPUs that enter Solaris
+ * directly from the hypervisor. i.e. without going through OBP.
+ */
+
+#if !defined(lint)
+#include "assym.h"
+#endif /* !lint */
+
+#include <sys/asm_linkage.h>
+#include <sys/hypervisor_api.h>
+#include <sys/machasi.h>
+#include <sys/machpcb.h>
+#include <sys/machlock.h>
+#include <sys/mmu.h>
+#include <sys/lpad.h>
+
+#if defined(lint)
+
+/* ARGSUSED */
+void
+mach_cpu_startup(uint64_t rabase, uint64_t memsz)
+{}
+
+#else /* lint */
+
+ /*
+ * %o0 - hcall specified arg (cpuid)
+ * %i0 - real memory base
+ * %i1 - memory size
+ */
+ ENTRY_NP(mach_cpu_startup)
+ /*
+ * Calculate the data pointer. The landing pad
+ * data immediately follows the landing pad text.
+ */
+ rd %pc, %l0
+ add %l0, LPAD_TEXT_SIZE, %l1 ! %l1 has start of data
+
+ /*
+ * Setup the initial state of the CPU.
+ */
+ wrpr %g0, 0, %tl
+ wrpr %g0, 0, %gl
+ wrpr %g0, MAXWIN - 2, %cansave
+ wrpr %g0, MAXWIN - 2, %cleanwin
+ wrpr %g0, 0, %canrestore
+ wrpr %g0, 0, %otherwin
+ wrpr %g0, 0, %cwp
+ wrpr %g0, 0, %wstate
+ wr %g0, %y
+ wrpr %g0, PIL_MAX, %pil
+
+ set trap_table, %g1
+ wrpr %g1, %tba
+
+ ! initialize cpuid into scratchpad register
+ mov SCRATCHPAD_CPUID, %g1
+ stxa %o0, [%g1]ASI_SCRATCHPAD
+
+ ! sanity check the data section
+ setx LPAD_MAGIC_VAL, %g2, %g1
+ ldx [%l1 + LPAD_MAGIC], %g2
+ cmp %g1, %g2
+ bne startup_error
+ nop
+
+ /*
+ * Loop through the array of TTE's, installing the
+ * VA to RA mapping for each one.
+ */
+ ldx [%l1 + LPAD_NMAP], %l2 ! %l2 = number of mappings
+ add %l1, LPAD_MAP, %l3 ! %l3 = the current mapping
+
+ /*
+ * Sanity check the number of mappings.
+ */
+ mulx %l2, LPAD_MAP_SIZE, %g1
+ add %l3, %g1, %g1 ! %g1 = end of the array
+ add %l1, LPAD_DATA_SIZE, %g2 ! %g2 = end of data section
+ sub %g2, %g1, %g2
+ brlz %g2, startup_error
+ nop
+
+0:
+ cmp %l2, %g0
+ be 3f
+ nop
+
+ ldx [%l3 + LPAD_MAP_FLAGS], %l4 ! %l4 = flags
+
+ /*
+ * Generate args for the HV call
+ */
+ ldx [%l3 + LPAD_MAP_VA], %o0 ! %o0 = virtual address
+ mov KCONTEXT, %o1 ! %o1 = context
+ ldx [%l3 + LPAD_MAP_TTE], %o2 ! %o2 = TTE
+ and %l4, FLAG_MMUFLAGS_MASK, %o3 ! %o3 = MMU flags
+
+ ! check if this is a locked TTE
+ and %l4, FLAG_LOCK_MASK, %l4
+ cmp %l4, %g0
+ bne 1f
+ nop
+
+ ! install an unlocked entry
+ ta MMU_MAP_ADDR
+ ba 2f
+ nop
+1:
+ ! install a locked entry
+ mov MAP_PERM_ADDR, %o5
+ ta FAST_TRAP
+
+2:
+ ! check for errors from the hcall
+ cmp %o0, %g0
+ bne startup_error
+ nop
+
+ sub %l2, 1, %l2 ! decrement counter
+ add %l3, LPAD_MAP_SIZE, %l3 ! increment pointer
+
+ ba 0b
+ nop
+
+3:
+ /*
+ * Set the MMU fault status area
+ */
+ ldx [%l1 + LPAD_MMFSA_RA], %o0
+
+ mov MMU_SET_INFOPTR, %o5
+ ta FAST_TRAP
+
+ ! check for errors from the hcall
+ cmp %o0, %g0
+ bne startup_error
+ nop
+
+ /*
+ * Load remaining arguments before enabling the
+ * MMU so that the loads can be done using real
+ * addresses.
+ */
+ ldx [%l1 + LPAD_PC], %l3 ! %l3 = specified entry point
+ ldx [%l1 + LPAD_ARG], %l4 ! %l4 = specified argument
+ ldx [%l1 + LPAD_INUSE], %l5 ! %l5 = va of inuse mailbox
+
+ /*
+ * Enable the MMU. On success, it returns to the
+ * global version of the landing pad text, rather
+ * than the text copied into the lpad buffer.
+ */
+ mov 1, %o0 ! %o0 = enable flag (1 = enable)
+ set startup_complete, %o1 ! VA of return address
+ mov MMU_ENABLE, %o5
+ ta FAST_TRAP
+
+ /*
+ * On errors, just enter a spin loop until the
+ * CPU that initiated the start recovers the CPU.
+ */
+startup_error:
+ ba startup_error
+ nop
+
+ /*
+ * Jump to the generic CPU initialization code.
+ */
+startup_complete:
+ mov %l4, %o0
+ jmpl %l3, %g0
+ stx %g0, [%l5] ! clear the inuse mailbox
+
+ SET_SIZE(mach_cpu_startup)
+
+ .global mach_cpu_startup_end
+mach_cpu_startup_end:
+
+#endif /* lint */
diff --git a/usr/src/uts/sun4v/ml/mach_subr_asm.s b/usr/src/uts/sun4v/ml/mach_subr_asm.s
index f0a9255abf..28d3e2a0d8 100644
--- a/usr/src/uts/sun4v/ml/mach_subr_asm.s
+++ b/usr/src/uts/sun4v/ml/mach_subr_asm.s
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -74,6 +73,80 @@ get_mmfsa_scratchpad()
SET_SIZE(get_mmfsa_scratchpad)
#endif /* lint */
+
+
+#if defined(lint)
+/* ARGSUSED */
+void
+cpu_intrq_unregister_powerdown(uint64_t doneflag_va)
+{}
+
+#else /* lint */
+
+/*
+ * Called from a x-trap at tl1 must use %g1 as arg
+ * and save/restore %o0-%o5 after hypervisor calls
+ */
+
+ ENTRY(cpu_intrq_unregister_powerdown)
+
+ CPU_ADDR(%g2, %g3)
+ add %g2, CPU_MCPU, %g2
+ /*
+ * Save %o regs
+ */
+ mov %o0, %g3
+ mov %o1, %g4
+ mov %o2, %g5
+ mov %o5, %g6
+
+ ldx [%g2 + MCPU_CPU_Q_BASE], %o1
+ mov INTR_CPU_Q, %o0
+ call hv_cpu_qconf
+ mov %g0, %o2
+
+ ldx [%g2 + MCPU_DEV_Q_BASE], %o1
+ mov INTR_DEV_Q, %o0
+ call hv_cpu_qconf
+ mov %g0, %o2
+
+ ldx [%g2 + MCPU_RQ_BASE], %o1
+ mov CPU_RQ, %o0
+ call hv_cpu_qconf
+ mov %g0, %o2
+
+ ldx [%g2 + MCPU_NRQ_BASE], %o1
+ mov CPU_NRQ, %o0
+ call hv_cpu_qconf
+ mov %g0, %o2
+
+ /*
+ * set done flag to 0
+ */
+ stub %g0, [%g1]
+
+ /*
+ * Restore %o regs
+ */
+ mov %g3, %o0
+ mov %g4, %o1
+ mov %g5, %o2
+ mov %g6, %o5
+
+ /*
+ * This CPU is on its way out. Spin here
+ * until the DR unconfigure code stops it.
+ * Returning would put it back in the OS
+ * where it might grab resources like locks,
+ * causing some nastiness to occur.
+ */
+0:
+ ba,a 0b
+
+ SET_SIZE(cpu_intrq_unregister_powerdown)
+#endif /* lint */
+
+
#if defined(lint)
/* ARGSUSED */
int
diff --git a/usr/src/uts/sun4v/ml/trap_table.s b/usr/src/uts/sun4v/ml/trap_table.s
index 391bb34f2e..24fb20058c 100644
--- a/usr/src/uts/sun4v/ml/trap_table.s
+++ b/usr/src/uts/sun4v/ml/trap_table.s
@@ -151,11 +151,7 @@
.align 32
#define NOTP4 NOTP; NOTP; NOTP; NOTP
-/*
- * RED is for traps that use the red mode handler.
- * We should never see these either.
- */
-#define RED NOT
+
/*
* BAD is used for trap vectors we don't have a kernel
* handler for.
@@ -824,6 +820,25 @@ tt_pil/**/level: ;\
.align 32
/*
+ * We take over the rtba after we set our trap table and
+ * fault status area. The watchdog reset trap is now handled by the OS.
+ */
+#define WATCHDOG_RESET \
+ mov PTL1_BAD_WATCHDOG, %g1 ;\
+ ba,a,pt %xcc, .watchdog_trap ;\
+ .align 32
+
+/*
+ * RED is for traps that use the red mode handler.
+ * We should never see these either.
+ */
+#define RED \
+ mov PTL1_BAD_RED, %g1 ;\
+ ba,a,pt %xcc, .watchdog_trap ;\
+ .align 32
+
+
+/*
* MMU Trap Handlers.
*/
@@ -1124,7 +1139,7 @@ trap_table0:
/* hardware traps */
NOT; /* 000 reserved */
RED; /* 001 power on reset */
- RED; /* 002 watchdog reset */
+ WATCHDOG_RESET; /* 002 watchdog reset */
RED; /* 003 externally initiated reset */
RED; /* 004 software initiated reset */
RED; /* 005 red mode exception */
@@ -2683,6 +2698,20 @@ trace_dataprot:
#endif /* TRAPTRACE */
/*
+ * Handle watchdog reset trap. Enable the MMU using the MMU_ENABLE
+ * HV service, which requires the return target to be specified as a VA
+ * since we are enabling the MMU. We set the target to ptl1_panic.
+ */
+
+ .type .watchdog_trap, #function
+.watchdog_trap:
+ mov 1, %o0
+ setx ptl1_panic, %g2, %o1
+ mov MMU_ENABLE, %o5
+ ta FAST_TRAP
+ done
+ SET_SIZE(.watchdog_trap)
+/*
* synthesize for trap(): SFAR in %g2, SFSR in %g3
*/
.type .dmmu_exc_lddf_not_aligned, #function
diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c
index 7cfb68fe7f..173019f902 100644
--- a/usr/src/uts/sun4v/os/fillsysinfo.c
+++ b/usr/src/uts/sun4v/os/fillsysinfo.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -44,373 +43,750 @@
#include <sys/cmp.h>
#include <sys/async.h>
#include <vm/page.h>
-
-/*
- * The OpenBoot Standalone Interface supplies the kernel with
- * implementation dependent parameters through the devinfo/property mechanism
- */
-typedef enum { XDRBOOL, XDRINT, XDRSTRING } xdrs;
-
-/*
- * structure describing properties that we are interested in querying the
- * OBP for.
- */
-struct getprop_info {
- char *name;
- xdrs type;
- uint_t *var;
-};
-
-/*
- * structure used to convert between a string returned by the OBP & a type
- * used within the kernel. We prefer to paramaterize rather than type.
- */
-struct convert_info {
- char *name;
- uint_t var;
- char *realname;
-};
-
-/*
- * structure describing nodes that we are interested in querying the OBP for
- * properties.
- */
-struct node_info {
- char *name;
- int size;
- struct getprop_info *prop;
- struct getprop_info *prop_end;
- unsigned int *value;
-};
-
-/*
- * macro definitions for routines that form the OBP interface
- */
-#define NEXT prom_nextnode
-#define CHILD prom_childnode
-#define GETPROP prom_getprop
-#define GETPROPLEN prom_getproplen
-
-/* 0=quiet; 1=verbose; 2=debug */
-int debug_fillsysinfo = 0;
-#define VPRINTF if (debug_fillsysinfo) prom_printf
+#include <vm/hat_sfmmu.h>
+#include <sys/sysmacros.h>
+#include <sys/mach_descrip.h>
+#include <sys/mdesc.h>
+#include <sys/archsystm.h>
+#include <sys/error.h>
+#include <sys/mmu.h>
+#include <sys/bitmap.h>
int ncpunode;
struct cpu_node cpunodes[NCPU];
-void fill_cpu(pnode_t);
-void plat_fill_mc(pnode_t);
-#pragma weak plat_fill_mc
+uint64_t cpu_q_entries;
+uint64_t dev_q_entries;
+uint64_t cpu_rq_entries;
+uint64_t cpu_nrq_entries;
+
+void fill_cpu(md_t *, mde_cookie_t);
+
+static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t);
+static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t);
+static char *construct_isalist(md_t *, mde_cookie_t, char **);
+static void set_at_flags(char *, int, char **);
+static void init_md_broken(md_t *);
+static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *,
+ uint64_t *);
+static id_t get_exec_unit_mapping(md_t *, mde_cookie_t, mde_cookie_t *);
+static int find_exec_unit_id(mde_cookie_t, mde_cookie_t *);
+static void get_q_sizes(md_t *, mde_cookie_t);
+static void get_va_bits(md_t *, mde_cookie_t);
+static size_t get_ra_limit(md_t *);
uint64_t system_clock_freq;
int niobus = 0;
uint_t niommu_tsbs = 0;
-/*
- * Hardware watchdog support.
- */
-#define CHOSEN_EEPROM "eeprom"
-static pnode_t chosen_eeprom;
-
-/*
- * If this variable is non-zero, cpr should return "not supported" when
- * it is queried even though it would normally be supported on this platform.
- */
-int cpr_supported_override;
+void
+map_wellknown_devices()
+{
+}
-/*
- * Some platforms may need to support CPR even in the absence of the
- * energystar-v* property (Enchilada server, for example). If this
- * variable is non-zero, cpr should proceed even in the absence
- * of the energystar-v* property.
- */
-int cpr_platform_enable = 0;
+#define S_VAC_SIZE MMU_PAGESIZE
+#define S_VAC_SHIFT MMU_PAGESHIFT
/*
- * Some nodes have functions that need to be called when they're seen.
+ * For backward compatibility we need to verify that we can handle
+ * running on platforms which shipped with missing MD properties.
*/
-static void have_pci(pnode_t);
-
-static struct wkdevice {
- char *wk_namep;
- void (*wk_func)(pnode_t);
- caddr_t *wk_vaddrp;
- ushort_t wk_flags;
-#define V_OPTIONAL 0x0000
-#define V_MUSTHAVE 0x0001
-#define V_MAPPED 0x0002
-#define V_MULTI 0x0003 /* optional, may be more than one */
-} wkdevice[] = {
- { "pci", have_pci, NULL, V_MULTI },
- { 0, },
-};
-
-static void map_wellknown(pnode_t);
+#define ONTARIO_PLATNAME1 "SUNW,Sun-Fire-T200"
+#define ONTARIO_PLATNAME2 "SUNW,Sun-Fire-T2000"
+#define ERIE_PLATNAME1 "SUNW,Sun-Fire-T100"
+#define ERIE_PLATNAME2 "SUNW,Sun-Fire-T1000"
void
-map_wellknown_devices()
+fill_cpu(md_t *mdp, mde_cookie_t cpuc)
{
- struct wkdevice *wkp;
- phandle_t ieeprom;
- pnode_t root;
- uint_t stick_freq;
+ struct cpu_node *cpunode;
+ uint64_t cpuid;
+ uint64_t clk_freq;
+ char *namebuf;
+ char *namebufp;
+ int namelen;
+ uint64_t associativity = 0, linesize = 0, size = 0;
+ int status;
+
+ if (md_get_prop_val(mdp, cpuc, "id", &cpuid)) {
+ return;
+ }
+ if (cpuid >= NCPU) {
+ cmn_err(CE_CONT, "fill_cpu: out of range cpuid %ld - "
+ "cpu excluded from configuration", cpuid);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Since the CPU cannot be used, make sure it
+ * is in a safe place. If the firmware does not
+ * support CPU stop, this is known to be true.
+ * If it fails to stop for any other reason, the
+ * system is in an inconsistent state and cannot
+ * be allowed to continue.
+ */
+ status = stopcpu_bycpuid(cpuid);
+
+ if ((status != 0) && (status != ENOTSUP)) {
+ cmn_err(CE_PANIC, "failed to stop cpu %lu (%d)",
+ cpuid, status);
+ }
+
+ mutex_exit(&cpu_lock);
+ return;
+ }
+
+ cpunode = &cpunodes[cpuid];
+ cpunode->cpuid = (int)cpuid;
+ cpunode->device_id = cpuid;
+
+ if (sizeof (cpunode->fru_fmri) > strlen(CPU_FRU_FMRI))
+ (void) strcpy(cpunode->fru_fmri, CPU_FRU_FMRI);
+
+ if (md_get_prop_data(mdp, cpuc,
+ "compatible", (uint8_t **)&namebuf, &namelen)) {
+ cmn_err(CE_PANIC, "fill_cpu: Cannot read compatible "
+ "property");
+ }
+ namebufp = namebuf;
+ if (strncmp(namebufp, "SUNW,", 5) == 0)
+ namebufp += 5;
+ if (strlen(namebufp) > sizeof (cpunode->name))
+ cmn_err(CE_PANIC, "Compatible property too big to "
+ "fit into the cpunode name buffer");
+ (void) strcpy(cpunode->name, namebufp);
+
+ if (md_get_prop_val(mdp, cpuc,
+ "clock-frequency", &clk_freq)) {
+ clk_freq = 0;
+ }
+ cpunode->clock_freq = clk_freq;
+
+ ASSERT(cpunode->clock_freq != 0);
/*
- * if there is a chosen eeprom, note it (for have_eeprom())
+ * Compute scaling factor based on rate of %tick. This is used
+ * to convert from ticks derived from %tick to nanoseconds. See
+ * comment in sun4u/sys/clock.h for details.
*/
- if (GETPROPLEN(prom_chosennode(), CHOSEN_EEPROM) ==
- sizeof (phandle_t) &&
- GETPROP(prom_chosennode(), CHOSEN_EEPROM, (caddr_t)&ieeprom) != -1)
- chosen_eeprom = (pnode_t)prom_decode_int(ieeprom);
+ cpunode->tick_nsec_scale = (uint_t)(((uint64_t)NANOSEC <<
+ (32 - TICK_NSEC_SHIFT)) / cpunode->clock_freq);
- root = prom_nextnode((pnode_t)0);
/*
- * Get System clock frequency from root node if it exists.
+ * The nodeid is not used in sun4v at all. Setting it
+ * to positive value to make starting of slave CPUs
+ * code happy.
*/
- if (GETPROP(root, "stick-frequency", (caddr_t)&stick_freq) != -1)
- system_clock_freq = stick_freq;
-
- map_wellknown(NEXT((pnode_t)0));
+ cpunode->nodeid = cpuid + 1;
/*
- * See if it worked
+ * Obtain the L2 cache information from MD.
+ * If "Cache" node exists, then set L2 cache properties
+ * as read from MD.
+ * If node does not exists, then set the L2 cache properties
+ * in individual CPU module.
*/
- for (wkp = wkdevice; wkp->wk_namep; ++wkp) {
- if (wkp->wk_flags == V_MUSTHAVE) {
- cmn_err(CE_PANIC, "map_wellknown_devices: required "
- "device %s not mapped", wkp->wk_namep);
- }
+ if ((!get_l2_cache_info(mdp, cpuc,
+ &associativity, &size, &linesize)) ||
+ associativity == 0 || size == 0 || linesize == 0) {
+ cpu_fiximp(cpunode);
+ } else {
+ /*
+ * Do not expect L2 cache properties to be bigger
+ * than 32-bit quantity.
+ */
+ cpunode->ecache_associativity = (int)associativity;
+ cpunode->ecache_size = (int)size;
+ cpunode->ecache_linesize = (int)linesize;
}
+
+ cpunode->ecache_setsize =
+ cpunode->ecache_size / cpunode->ecache_associativity;
+
+ /*
+ * Start off by assigning the cpu id as the default
+ * mapping index.
+ */
+
+ cpunode->exec_unit_mapping = NO_EU_MAPPING_FOUND;
+
+ if (ecache_setsize == 0)
+ ecache_setsize = cpunode->ecache_setsize;
+ if (ecache_alignsize == 0)
+ ecache_alignsize = cpunode->ecache_linesize;
+
+ ncpunode++;
}
-/*
- * map_wellknown - map known devices & registers
- */
-static void
-map_wellknown(pnode_t curnode)
+void
+empty_cpu(int cpuid)
{
- extern int status_okay(int, char *, int);
- char tmp_name[MAXSYSNAME];
- static void fill_address(pnode_t, char *);
- int sok;
+ bzero(&cpunodes[cpuid], sizeof (struct cpu_node));
+ ncpunode--;
+}
-#ifdef VPRINTF
- VPRINTF("map_wellknown(%x)\n", curnode);
-#endif /* VPRINTF */
+void
+setup_exec_unit_mappings(md_t *mdp)
+{
+ uint64_t num, num_eunits;
+ mde_cookie_t cpus_node;
+ mde_cookie_t *node, *eunit;
+ int idx, i, j;
+ processorid_t cpuid;
+ char *eunit_name = broken_md_flag ? "exec_unit" : "exec-unit";
- for (curnode = CHILD(curnode); curnode; curnode = NEXT(curnode)) {
- /*
- * prune subtree if status property indicating not okay
- */
- sok = status_okay((int)curnode, (char *)NULL, 0);
- if (!sok) {
- char devtype_buf[OBP_MAXPROPNAME];
- int size;
-
-#ifdef VPRINTF
- VPRINTF("map_wellknown: !okay status property\n");
-#endif /* VPRINTF */
- /*
- * a status property indicating bad memory will be
- * associated with a node which has a "device_type"
- * property with a value of "memory-controller"
- */
- if ((size = GETPROPLEN(curnode,
- OBP_DEVICETYPE)) == -1)
- continue;
- if (size > OBP_MAXPROPNAME) {
- cmn_err(CE_CONT, "node %x '%s' prop too "
- "big\n", curnode, OBP_DEVICETYPE);
- continue;
- }
- if (GETPROP(curnode, OBP_DEVICETYPE,
- devtype_buf) == -1) {
- cmn_err(CE_CONT, "node %x '%s' get failed\n",
- curnode, OBP_DEVICETYPE);
- continue;
+ /*
+ * Find the cpu integer exec units - and
+ * setup the mappings appropriately.
+ */
+ num = md_alloc_scan_dag(mdp, md_root_node(mdp), "cpus", "fwd", &node);
+ if (num < 1)
+ cmn_err(CE_PANIC, "No cpus node in machine desccription");
+ if (num > 1)
+ cmn_err(CE_PANIC, "More than 1 cpus node in machine"
+ " description");
+
+ cpus_node = node[0];
+ md_free_scan_dag(mdp, &node);
+
+ num_eunits = md_alloc_scan_dag(mdp, cpus_node, eunit_name,
+ "fwd", &eunit);
+ if (num_eunits > 0) {
+ char *match_type = broken_md_flag ? "int" : "integer";
+
+ /* Spin through and find all the integer exec units */
+ for (i = 0; i < num_eunits; i++) {
+ char *p;
+ char *val;
+ int vallen;
+ uint64_t lcpuid;
+
+ /* ignore nodes with no type */
+ if (md_get_prop_data(mdp, eunit[i], "type",
+ (uint8_t **)&val, &vallen)) continue;
+
+ for (p = val; *p != '\0'; p += strlen(p) + 1) {
+ if (strcmp(p, match_type) == 0)
+ goto found;
}
- if (strcmp(devtype_buf, "memory-controller") != 0)
- continue;
+
+ continue;
+found:
+ idx = NCPU + i;
/*
- * ...else fall thru and process the node...
+ * find the cpus attached to this EU and
+ * update their mapping indices
*/
+ num = md_alloc_scan_dag(mdp, eunit[i], "cpu",
+ "back", &node);
+
+ if (num < 1)
+ cmn_err(CE_PANIC, "exec-unit node in MD"
+ " not attached to a cpu node");
+
+ for (j = 0; j < num; j++) {
+ if (md_get_prop_val(mdp, node[j], "id",
+ &lcpuid))
+ continue;
+ if (lcpuid >= NCPU)
+ continue;
+ cpuid = (processorid_t)lcpuid;
+ cpunodes[cpuid].exec_unit_mapping = idx;
+ }
+ md_free_scan_dag(mdp, &node);
}
- bzero(tmp_name, MAXSYSNAME);
- if (GETPROP(curnode, OBP_NAME, (caddr_t)tmp_name) != -1)
- fill_address(curnode, tmp_name);
- if (GETPROP(curnode, OBP_DEVICETYPE, tmp_name) != -1 &&
- strcmp(tmp_name, "cpu") == 0) {
- fill_cpu(curnode);
- }
- if (sok && (strcmp(tmp_name, "memory-controller") == 0) &&
- (&plat_fill_mc != NULL))
- plat_fill_mc(curnode);
- map_wellknown(curnode);
+
+ md_free_scan_dag(mdp, &eunit);
}
}
-static void
-fill_address(pnode_t curnode, char *namep)
+/*
+ * All the common setup of sun4v CPU modules is done by this routine.
+ */
+void
+cpu_setup_common(char **cpu_module_isa_set)
{
- struct wkdevice *wkp;
- int size;
- uint32_t vaddr;
-
- for (wkp = wkdevice; wkp->wk_namep; ++wkp) {
- if (strcmp(wkp->wk_namep, namep) != 0)
- continue;
- if (wkp->wk_flags == V_MAPPED)
- return;
- if (wkp->wk_vaddrp != NULL) {
- if ((size = GETPROPLEN(curnode, OBP_ADDRESS)) == -1) {
- cmn_err(CE_CONT, "device %s size %d\n",
- namep, size);
- continue;
- }
- if (size != sizeof (vaddr)) {
- cmn_err(CE_CONT, "device %s address prop too "
- "big\n", namep);
- continue;
- }
- if (GETPROP(curnode, OBP_ADDRESS,
- (caddr_t)&vaddr) == -1) {
- cmn_err(CE_CONT, "device %s not mapped\n",
- namep);
- continue;
- }
+ extern int disable_delay_tlb_flush, delay_tlb_flush;
+ extern int mmu_exported_pagesize_mask;
+ extern int vac_size, vac_shift;
+ extern uint_t vac_mask;
+ int nocpus, i;
+ size_t ra_limit;
+ mde_cookie_t *cpulist;
+ md_t *mdp;
+
+ if ((mdp = md_get_handle()) == NULL)
+ cmn_err(CE_PANIC, "Unable to initialize machine description");
+
+ init_md_broken(mdp);
+
+ nocpus = md_alloc_scan_dag(mdp,
+ md_root_node(mdp), "cpu", "fwd", &cpulist);
+ if (nocpus < 1) {
+ cmn_err(CE_PANIC, "cpu_common_setup: cpulist allocation "
+ "failed or incorrect number of CPUs in MD");
+ }
- /* make into a native pointer */
- *wkp->wk_vaddrp = (caddr_t)(uintptr_t)vaddr;
-#ifdef VPRINTF
- VPRINTF("fill_address: %s mapped to %p\n", namep,
- *wkp->wk_vaddrp);
-#endif /* VPRINTF */
+ if (use_page_coloring) {
+ do_pg_coloring = 1;
+ if (use_virtual_coloring) {
+ /*
+ * XXX Sun4v cpus don't have virtual caches
+ */
+ do_virtual_coloring = 1;
}
- if (wkp->wk_func != NULL)
- (*wkp->wk_func)(curnode);
+ }
+
+ /*
+ * Get the valid contexts, mmu page sizes mask, Q sizes and isalist/r
+ * from the MD for the first available CPU in cpulist.
+ */
+
+ if (nctxs == 0)
+ nctxs = (uint_t)(1 << get_mmu_ctx_bits(mdp, cpulist[0]));
+
+ if (nctxs > MAX_NCTXS)
+ nctxs = MAX_NCTXS;
+
+ /* Do not expect the MMU page sizes mask to be more than 32-bit. */
+ mmu_exported_pagesize_mask = (int)get_cpu_pagesizes(mdp, cpulist[0]);
+
+ for (i = 0; i < nocpus; i++)
+ fill_cpu(mdp, cpulist[i]);
+
+ setup_exec_unit_mappings(mdp);
+
+ vac_size = S_VAC_SIZE;
+ vac_mask = MMU_PAGEMASK & (vac_size - 1);
+ vac_shift = S_VAC_SHIFT;
+ shm_alignment = vac_size;
+ vac = 0;
+
+ /*
+ * If MD is broken then append the passed ISA set,
+ * otherwise trust the MD.
+ */
+
+ if (broken_md_flag)
+ isa_list = construct_isalist(mdp, cpulist[0],
+ cpu_module_isa_set);
+ else
+ isa_list = construct_isalist(mdp, cpulist[0], NULL);
+
+ get_q_sizes(mdp, cpulist[0]);
+
+ get_va_bits(mdp, cpulist[0]);
+
+ /*
+ * ra_limit is the highest real address in the machine.
+ */
+ ra_limit = get_ra_limit(mdp);
+
+ md_free_scan_dag(mdp, &cpulist);
+
+ (void) md_fini_handle(mdp);
+
+ /*
+ * Block stores invalidate all pages of the d$ so pagecopy
+ * et. al. do not need virtual translations with virtual
+ * coloring taken into consideration.
+ */
+ pp_consistent_coloring = 0;
+
+ /*
+ * The kpm mapping window.
+ * kpm_size:
+ * The size of a single kpm range.
+ * The overall size will be: kpm_size * vac_colors.
+ * kpm_vbase:
+ * The virtual start address of the kpm range within the kernel
+ * virtual address space. kpm_vbase has to be kpm_size aligned.
+ */
+
+ /*
+ * Make kpm_vbase, kpm_size aligned to kpm_size_shift.
+ * To do this find the nearest power of 2 size that the
+ * actual ra_limit fits within.
+ * If it is an even power of two use that, otherwise use the
+ * next power of two larger than ra_limit.
+ */
+
+ ASSERT(ra_limit != 0);
+
+ kpm_size_shift = (ra_limit & (ra_limit - 1)) != 0 ?
+ highbit(ra_limit) : highbit(ra_limit) - 1;
+
+ /*
+ * No virtual caches on sun4v so size matches size shift
+ */
+ kpm_size = 1ul << kpm_size_shift;
+
+ if (va_bits < VA_ADDRESS_SPACE_BITS) {
/*
- * If this one is optional and there may be more than
- * one, don't set V_MAPPED, which would cause us to skip it
- * next time around
+ * In case of VA hole
+ * kpm_base = hole_end + 1TB
+ * Starting 1TB beyond where VA hole ends because on Niagara
+ * processor software must not use pages within 4GB of the
+ * VA hole as instruction pages to avoid problems with
+ * prefetching into the VA hole.
*/
- if (wkp->wk_flags != V_MULTI)
- wkp->wk_flags = V_MAPPED;
+ kpm_vbase = (caddr_t)((0ull - (1ull << (va_bits - 1))) +
+ (1ull << 40));
+ } else { /* Number of VA bits 64 ... no VA hole */
+ kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8 EB */
}
+
+ /*
+ * The traptrace code uses either %tick or %stick for
+ * timestamping. The sun4v require use of %stick.
+ */
+ traptrace_use_stick = 1;
+
+ /*
+ * sun4v provides demap_all
+ */
+ if (!disable_delay_tlb_flush)
+ delay_tlb_flush = 1;
}
-void
-fill_cpu(pnode_t node)
+/*
+ * Get the nctxs from MD. If absent panic.
+ */
+static uint64_t
+get_mmu_ctx_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
{
- struct cpu_node *cpunode;
- processorid_t cpuid;
- uint_t clk_freq;
- char namebuf[OBP_MAXPROPNAME], unum[UNUM_NAMLEN];
- char *namebufp;
+ uint64_t ctx_bits;
- if (GETPROP(node, "cpuid", (caddr_t)&cpuid) == -1) {
- if (GETPROP(node, "reg", (caddr_t)&cpuid) == -1)
- cmn_err(CE_PANIC, "reg prop not found in cpu node");
- cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
- }
+ if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#context-bits",
+ &ctx_bits))
+ ctx_bits = 0;
- if (cpuid < 0 || cpuid >= NCPU) {
- cmn_err(CE_CONT, "cpu (dnode %x): out of range cpuid %d - "
- "cpu excluded from configuration\n", node, cpuid);
- return;
- }
+ if (ctx_bits < MIN_NCTXS_BITS || ctx_bits > MAX_NCTXS_BITS)
+ cmn_err(CE_PANIC, "Incorrect %ld number of contexts bits "
+ "returned by MD", ctx_bits);
- cpunode = &cpunodes[cpuid];
- cpunode->cpuid = cpuid;
- cpunode->device_id = cpuid;
+ return (ctx_bits);
+}
- unum[0] = '\0';
- (void) snprintf(cpunode->fru_fmri, sizeof (cpunode->fru_fmri),
- "%s%s", CPU_FRU_FMRI, unum);
- (void) GETPROP(node, "compatible", namebuf);
- namebufp = namebuf;
- if (strncmp(namebufp, "SUNW,", 5) == 0)
- namebufp += 5;
- (void) strcpy(cpunode->name, namebufp);
+/*
+ * Initalize supported page sizes information.
+ * Set to 0, if the page sizes mask information is absent in MD.
+ */
+static uint64_t
+get_cpu_pagesizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
+{
+ uint64_t mmu_page_size_list;
- if (GETPROP(node, "clock-frequency", (caddr_t)&clk_freq) == -1) {
- /*
- * If we didn't find it in the CPU node, look in the root node.
- */
- pnode_t root = prom_nextnode((pnode_t)0);
- if (GETPROP(root, "clock-frequency", (caddr_t)&clk_freq) == -1)
- clk_freq = 0;
- }
- cpunode->clock_freq = clk_freq;
+ if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-page-size-list",
+ &mmu_page_size_list))
+ mmu_page_size_list = 0;
+
+ if (mmu_page_size_list == 0 || mmu_page_size_list > MAX_PAGESIZE_MASK)
+ cmn_err(CE_PANIC, "Incorrect 0x%lx pagesize mask returned"
+ "by MD", mmu_page_size_list);
+
+ return (mmu_page_size_list);
+}
+
+/*
+ * This routine gets the isalist information from MD and appends
+ * the CPU module ISA set if required.
+ */
+static char *
+construct_isalist(md_t *mdp, mde_cookie_t cpu_node_cookie,
+ char **cpu_module_isa_set)
+{
+ extern int at_flags;
+ char *md_isalist;
+ int md_isalen;
+ char *isabuf;
+ int isalen;
+ char **isa_set;
+ char *p, *q;
+ int cpu_module_isalen = 0, found = 0;
+
+ (void) md_get_prop_data(mdp, cpu_node_cookie,
+ "isalist", (uint8_t **)&isabuf, &isalen);
- ASSERT(cpunode->clock_freq != 0);
/*
- * Compute scaling factor based on rate of %tick. This is used
- * to convert from ticks derived from %tick to nanoseconds. See
- * comment in sun4u/sys/clock.h for details.
+ * We support binaries for all the cpus that have shipped so far.
+ * The kernel emulates instructions that are not supported by hardware.
*/
- cpunode->tick_nsec_scale = (uint_t)(((uint64_t)NANOSEC <<
- (32 - TICK_NSEC_SHIFT)) / cpunode->clock_freq);
+ at_flags = EF_SPARC_SUN_US3 | EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
+ /*
+ * Construct the space separated isa_list.
+ */
+ if (cpu_module_isa_set != NULL) {
+ for (isa_set = cpu_module_isa_set; *isa_set != NULL;
+ isa_set++) {
+ cpu_module_isalen += strlen(*isa_set);
+ cpu_module_isalen++; /* for space character */
+ }
+ }
- cpunode->nodeid = node;
+ /*
+ * Allocate the buffer of MD isa buffer length + CPU module
+ * isa buffer length.
+ */
+ md_isalen = isalen + cpu_module_isalen + 2;
+ md_isalist = (char *)prom_alloc((caddr_t)0, md_isalen, 0);
+ if (md_isalist == NULL)
+ cmn_err(CE_PANIC, "construct_isalist: Allocation failed for "
+ "md_isalist");
+
+ md_isalist[0] = '\0'; /* create an empty string to start */
+ for (p = isabuf, q = p + isalen; p < q; p += strlen(p) + 1) {
+ (void) strlcat(md_isalist, p, md_isalen);
+ (void) strcat(md_isalist, " ");
+ }
/*
- * Call cpu module specific code to fill in the cpu properities
+ * Check if the isa_set is present in isalist returned by MD.
+ * If yes, then no need to append it, if no then append it to
+ * isalist returned by MD.
*/
- cpu_fiximp(cpunode);
+ if (cpu_module_isa_set != NULL) {
+ for (isa_set = cpu_module_isa_set; *isa_set != NULL;
+ isa_set++) {
+ found = 0;
+ for (p = isabuf, q = p + isalen; p < q;
+ p += strlen(p) + 1) {
+ if (strcmp(p, *isa_set) == 0) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ (void) strlcat(md_isalist, *isa_set, md_isalen);
+ (void) strcat(md_isalist, " ");
+ }
+ }
+ }
+
+ /* Get rid of any trailing white spaces */
+ md_isalist[strlen(md_isalist) - 1] = '\0';
+
+ return (md_isalist);
}
-#define IOMMU_PER_SCHIZO 2
+uint64_t
+get_ra_limit(md_t *mdp)
+{
+ mde_cookie_t *mem_list;
+ mde_cookie_t *mblock_list;
+ int i;
+ int memnodes;
+ int nmblock;
+ uint64_t base;
+ uint64_t size;
+ uint64_t ra_limit = 0, new_limit = 0;
+
+ memnodes = md_alloc_scan_dag(mdp,
+ md_root_node(mdp), "memory", "fwd", &mem_list);
+
+ ASSERT(memnodes == 1);
+
+ nmblock = md_alloc_scan_dag(mdp,
+ mem_list[0], "mblock", "fwd", &mblock_list);
+ if (nmblock < 1)
+ cmn_err(CE_PANIC, "cannot find mblock nodes in MD");
+
+ for (i = 0; i < nmblock; i++) {
+ if (md_get_prop_val(mdp, mblock_list[i], "base", &base))
+ cmn_err(CE_PANIC, "base property missing from MD"
+ " mblock node");
+ if (md_get_prop_val(mdp, mblock_list[i], "size", &size))
+ cmn_err(CE_PANIC, "size property missing from MD"
+ " mblock node");
+
+ ASSERT(size != 0);
+
+ new_limit = base + size;
+
+ if (base > new_limit)
+ cmn_err(CE_PANIC, "mblock in MD wrapped around");
+
+ if (new_limit > ra_limit)
+ ra_limit = new_limit;
+ }
+
+ ASSERT(ra_limit != 0);
+
+ if (ra_limit > MAX_REAL_ADDRESS) {
+ cmn_err(CE_WARN, "Highest real address in MD too large"
+ " clipping to %llx\n", MAX_REAL_ADDRESS);
+ ra_limit = MAX_REAL_ADDRESS;
+ }
+
+ md_free_scan_dag(mdp, &mblock_list);
+
+ md_free_scan_dag(mdp, &mem_list);
+
+ return (ra_limit);
+}
/*
- * The first psycho must always programmed up for the system clock and error
- * handling purposes.
+ * This routine sets the globals for CPU and DEV mondo queue entries and
+ * resumable and non-resumable error queue entries.
*/
+static uint64_t
+get_single_q_size(md_t *mdp, mde_cookie_t cpu_node_cookie,
+ char *qnamep, uint64_t default_entries)
+{
+ uint64_t entries;
+
+ if (md_get_prop_val(mdp, cpu_node_cookie, qnamep, &entries)) {
+ if (!broken_md_flag)
+ cmn_err(CE_PANIC, "Missing %s property in MD cpu node",
+ qnamep);
+ entries = default_entries;
+ } else {
+ entries = 1 << entries;
+ }
+ return (entries);
+}
+
+
static void
-have_pci(pnode_t node)
+get_q_sizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
{
- int size;
- uint_t portid;
- char compatible[OBP_MAXDRVNAME];
+ cpu_q_entries = get_single_q_size(mdp, cpu_node_cookie,
+ "q-cpu-mondo-#bits", DEFAULT_CPU_Q_ENTRIES);
+
+ dev_q_entries = get_single_q_size(mdp, cpu_node_cookie,
+ "q-dev-mondo-#bits", DEFAULT_DEV_Q_ENTRIES);
+
+ cpu_rq_entries = get_single_q_size(mdp, cpu_node_cookie,
+ "q-resumable-#bits", CPU_RQ_ENTRIES);
+
+ cpu_nrq_entries = get_single_q_size(mdp, cpu_node_cookie,
+ "q-nonresumable-#bits", CPU_NRQ_ENTRIES);
+}
+
+
+static void
+get_va_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
+{
+ uint64_t value = VA_ADDRESS_SPACE_BITS;
+
+ if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#va-bits", &value))
+ cmn_err(CE_PANIC, "mmu-#va-bits property not found in MD");
- size = GETPROPLEN(node, "portid");
- if (size == -1) size = GETPROPLEN(node, "upa-portid");
- if (size == -1)
- return;
- if (size > sizeof (portid))
- cmn_err(CE_PANIC, "portid size wrong");
- if (GETPROP(node, "portid", (caddr_t)&portid) == -1)
- if (GETPROP(node, "upa-portid", (caddr_t)&portid) == -1)
- cmn_err(CE_PANIC, "portid not found");
+ if (value == 0 || value > VA_ADDRESS_SPACE_BITS)
+ cmn_err(CE_PANIC, "Incorrect number of va bits in MD");
- niobus++;
+ /* Do not expect number of VA bits to be more than 32-bit quantity */
+ va_bits = (int)value;
/*
- * Need two physical TSBs for Schizo-compatible nodes,
- * one otherwise.
+ * Correct the value for VA bits on UltraSPARC-T1 based systems
+ * in case of broken MD.
*/
- compatible[0] = '\0';
- (void) prom_getprop(node, OBP_COMPATIBLE, compatible);
- if (strcmp(compatible, "pci108e,8001") == 0)
- niommu_tsbs += IOMMU_PER_SCHIZO;
- else
- niommu_tsbs++;
+ if (broken_md_flag)
+ va_bits = DEFAULT_VA_ADDRESS_SPACE_BITS;
}
+/*
+ * This routine returns the L2 cache information such as -- associativity,
+ * size and linesize.
+ */
+static int
+get_l2_cache_info(md_t *mdp, mde_cookie_t cpu_node_cookie,
+ uint64_t *associativity, uint64_t *size, uint64_t *linesize)
+{
+ mde_cookie_t *cachelist;
+ int ncaches, i;
+ uint64_t max_level;
+
+ ncaches = md_alloc_scan_dag(mdp, cpu_node_cookie, "cache",
+ "fwd", &cachelist);
+ /*
+ * The "cache" node is optional in MD, therefore ncaches can be 0.
+ */
+ if (ncaches < 1) {
+ return (0);
+ }
+
+ max_level = 0;
+ for (i = 0; i < ncaches; i++) {
+ uint64_t cache_level;
+ uint64_t local_assoc;
+ uint64_t local_size;
+ uint64_t local_lsize;
+
+ if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
+ continue;
+
+ if (cache_level <= max_level) continue;
+
+ /* If properties are missing from this cache ignore it */
+
+ if ((md_get_prop_val(mdp, cachelist[i],
+ "associativity", &local_assoc))) {
+ continue;
+ }
+
+ if ((md_get_prop_val(mdp, cachelist[i],
+ "size", &local_size))) {
+ continue;
+ }
+
+ if ((md_get_prop_val(mdp, cachelist[i],
+ "line-size", &local_lsize))) {
+ continue;
+ }
+
+ max_level = cache_level;
+ *associativity = local_assoc;
+ *size = local_size;
+ *linesize = local_lsize;
+ }
-int
-get_cpu_pagesizes(void)
+ md_free_scan_dag(mdp, &cachelist);
+
+ return ((max_level > 0) ? 1 : 0);
+}
+
+/*
+ * The broken_md_flag is set to 1, if the MD doesn't have
+ * the domaining-enabled property in the platform node and the platforms
+ * are Ontario and Erie. This flag is used to workaround some of the
+ * incorrect MD properties.
+ */
+static void
+init_md_broken(md_t *mdp)
{
+ int nrnode;
+ mde_cookie_t *platlist, rootnode;
+ char *vbuf;
+ uint64_t val = 0;
+
+ rootnode = md_root_node(mdp);
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ nrnode = md_alloc_scan_dag(mdp, md_root_node(mdp), "platform", "fwd",
+ &platlist);
+
+ ASSERT(nrnode == 1);
+
+ if (md_get_prop_str(mdp, platlist[0], "name", &vbuf) != 0)
+ panic("platform name not found in machine description");
+
/*
- * XXXQ Get supported page sizes information from the PD
- * and return a bit mask indicating which page sizes are
- * supported.
- *
- * Return 0 when no information is available.
+ * If domaining-enable prop doesn't exist and the platform name is
+ * Ontario or Erie the md is broken.
*/
- return (0); /* XXXQ for now return 0 as no PD */
+ if (md_get_prop_val(mdp, platlist[0], "domaining-enabled", &val) != 0 &&
+ ((strcmp(vbuf, ONTARIO_PLATNAME1) == 0) ||
+ (strcmp(vbuf, ONTARIO_PLATNAME2) == 0) ||
+ (strcmp(vbuf, ERIE_PLATNAME1) == 0) ||
+ (strcmp(vbuf, ERIE_PLATNAME2) == 0)))
+ broken_md_flag = 1;
+
+ md_free_scan_dag(mdp, &platlist);
}
diff --git a/usr/src/uts/sun4v/os/hsvc.c b/usr/src/uts/sun4v/os/hsvc.c
index 4b88b60222..e06012e920 100644
--- a/usr/src/uts/sun4v/os/hsvc.c
+++ b/usr/src/uts/sun4v/os/hsvc.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -653,12 +654,15 @@ hsvc_init(void)
* uses hypervisor services belonging to the HSVC_GROUP_CORE API
* group only for itself.
*
- * Note that the HSVC_GROUP_DIAG is negotiated on behalf of
- * any driver/module using DIAG services.
+ * Rest of the API groups are currently negotiated on behalf
+ * of the pcitool, glvc support. In future, when these drivers
+ * are modified to do the negotiation themselves, corresponding
+ * entry should be removed from the table below.
*/
static hsvc_info_t hsvcinfo_unix[] = {
{HSVC_REV_1, NULL, HSVC_GROUP_SUN4V, 1, 0, NULL},
- {HSVC_REV_1, NULL, HSVC_GROUP_CORE, 1, 0, NULL},
+ {HSVC_REV_1, NULL, HSVC_GROUP_CORE, 1, 1, NULL},
+ {HSVC_REV_1, NULL, HSVC_GROUP_VSC, 1, 0, NULL},
{HSVC_REV_1, NULL, HSVC_GROUP_DIAG, 1, 0, NULL}
};
diff --git a/usr/src/uts/sun4v/os/intrq.c b/usr/src/uts/sun4v/os/intrq.c
index 0ddf35c033..ae905ed312 100644
--- a/usr/src/uts/sun4v/os/intrq.c
+++ b/usr/src/uts/sun4v/os/intrq.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -34,28 +34,6 @@
#include <sys/error.h>
#include <sys/hypervisor_api.h>
-/*
- * XXX needs to be set by some algorithm that derives this
- * from the partition description
- */
-int cpu_q_entries = 128;
-int dev_q_entries = 128;
-
-/*
- * Once the partition description if finallized
- * cpu_q_entries and dev_q_entries will be set
- * and be garaunteed to be two's power multiples.
- */
-#define INTR_CPU_Q 0x3c
-#define INTR_DEV_Q 0x3d
-#define INTR_REPORT_SIZE 64
-#define INTR_CPU_Q_SIZE (cpu_q_entries * INTR_REPORT_SIZE)
-#define INTR_DEV_Q_SIZE (dev_q_entries * INTR_REPORT_SIZE)
-
-/*
- * XXX - This needs to be rewritten with prom calls to
- * let OBP know the queues are allocated
- */
void
cpu_intrq_register(struct cpu *cpu)
{
@@ -72,13 +50,12 @@ cpu_intrq_register(struct cpu *cpu)
cmn_err(CE_PANIC, "cpu%d: dev_mondo queue configuration "
"failed, error %lu", cpu->cpu_id, ret);
- ret = hv_cpu_qconf(CPU_RQ, mcpup->cpu_rq_base_pa, CPU_RQ_ENTRIES);
+ ret = hv_cpu_qconf(CPU_RQ, mcpup->cpu_rq_base_pa, cpu_rq_entries);
if (ret != H_EOK)
cmn_err(CE_PANIC, "cpu%d: resumable error queue configuration "
"failed, error %lu", cpu->cpu_id, ret);
- ret = hv_cpu_qconf(CPU_NRQ, mcpup->cpu_nrq_base_pa,
- CPU_NRQ_ENTRIES);
+ ret = hv_cpu_qconf(CPU_NRQ, mcpup->cpu_nrq_base_pa, cpu_nrq_entries);
if (ret != H_EOK)
cmn_err(CE_PANIC, "cpu%d: non-resumable error queue "
"configuration failed, error %lu", cpu->cpu_id, ret);
@@ -89,6 +66,10 @@ cpu_intrq_setup(struct cpu *cpu)
{
struct machcpu *mcpup = &cpu->cpu_m;
int cpu_list_size;
+ uint64_t cpu_q_size;
+ uint64_t dev_q_size;
+ uint64_t cpu_rq_size;
+ uint64_t cpu_nrq_size;
/*
* Allocate mondo data for xcalls.
@@ -120,38 +101,109 @@ cpu_intrq_setup(struct cpu *cpu)
/*
* Allocate sun4v interrupt and error queues.
*/
- mcpup->cpu_q_va = contig_mem_alloc(INTR_CPU_Q_SIZE);
+ cpu_q_size = cpu_q_entries * INTR_REPORT_SIZE;
+ mcpup->cpu_q_va = contig_mem_alloc(cpu_q_size);
if (mcpup->cpu_q_va == NULL)
cmn_err(CE_PANIC, "cpu%d: cpu intrq allocation failed",
cpu->cpu_id);
mcpup->cpu_q_base_pa = va_to_pa(mcpup->cpu_q_va);
- mcpup->cpu_q_size = INTR_CPU_Q_SIZE;
+ mcpup->cpu_q_size = cpu_q_size;
- mcpup->dev_q_va = contig_mem_alloc(INTR_DEV_Q_SIZE);
+ dev_q_size = dev_q_entries * INTR_REPORT_SIZE;
+ mcpup->dev_q_va = contig_mem_alloc(dev_q_size);
if (mcpup->dev_q_va == NULL)
cmn_err(CE_PANIC, "cpu%d: dev intrq allocation failed",
cpu->cpu_id);
mcpup->dev_q_base_pa = va_to_pa(mcpup->dev_q_va);
- mcpup->dev_q_size = INTR_DEV_Q_SIZE;
+ mcpup->dev_q_size = dev_q_size;
/* Allocate resumable queue and its kernel buffer */
- mcpup->cpu_rq_va = contig_mem_alloc(2 * CPU_RQ_SIZE);
+ cpu_rq_size = cpu_rq_entries * Q_ENTRY_SIZE;
+ mcpup->cpu_rq_va = contig_mem_alloc(2 * cpu_rq_size);
if (mcpup->cpu_rq_va == NULL)
cmn_err(CE_PANIC, "cpu%d: resumable queue allocation failed",
cpu->cpu_id);
mcpup->cpu_rq_base_pa = va_to_pa(mcpup->cpu_rq_va);
- mcpup->cpu_rq_size = CPU_RQ_SIZE;
+ mcpup->cpu_rq_size = cpu_rq_size;
/* zero out the memory */
- bzero(mcpup->cpu_rq_va, 2 * CPU_RQ_SIZE);
+ bzero(mcpup->cpu_rq_va, 2 * cpu_rq_size);
/* Allocate nonresumable queue here */
- mcpup->cpu_nrq_va = contig_mem_alloc(2 * CPU_NRQ_SIZE);
+ cpu_nrq_size = cpu_nrq_entries * Q_ENTRY_SIZE;
+ mcpup->cpu_nrq_va = contig_mem_alloc(2 * cpu_nrq_size);
if (mcpup->cpu_nrq_va == NULL)
cmn_err(CE_PANIC, "cpu%d: nonresumable queue "
"allocation failed", cpu->cpu_id);
mcpup->cpu_nrq_base_pa = va_to_pa(mcpup->cpu_nrq_va);
- mcpup->cpu_nrq_size = CPU_NRQ_SIZE;
+ mcpup->cpu_nrq_size = cpu_nrq_size;
/* zero out the memory */
- bzero(mcpup->cpu_nrq_va, 2 * CPU_NRQ_SIZE);
+ bzero(mcpup->cpu_nrq_va, 2 * cpu_nrq_size);
+}
+void
+cpu_intrq_cleanup(struct cpu *cpu)
+{
+ struct machcpu *mcpup = &cpu->cpu_m;
+ int cpu_list_size;
+ uint64_t cpu_q_size;
+ uint64_t dev_q_size;
+ uint64_t cpu_rq_size;
+ uint64_t cpu_nrq_size;
+
+ /*
+ * Free mondo data for xcalls.
+ */
+ if (mcpup->mondo_data) {
+ contig_mem_free(mcpup->mondo_data, INTR_REPORT_SIZE);
+ mcpup->mondo_data = NULL;
+ mcpup->mondo_data_ra = NULL;
+ }
+
+ /*
+ * Free percpu list of NCPU for xcalls
+ */
+ cpu_list_size = NCPU * sizeof (uint16_t);
+ if (cpu_list_size < INTR_REPORT_SIZE)
+ cpu_list_size = INTR_REPORT_SIZE;
+
+ if (mcpup->cpu_list) {
+ contig_mem_free(mcpup->cpu_list, cpu_list_size);
+ mcpup->cpu_list = NULL;
+ mcpup->cpu_list_ra = NULL;
+ }
+
+ /*
+ * Free sun4v interrupt and error queues.
+ */
+ if (mcpup->cpu_q_va) {
+ cpu_q_size = cpu_q_entries * INTR_REPORT_SIZE;
+ contig_mem_free(mcpup->cpu_q_va, cpu_q_size);
+ mcpup->cpu_q_va = NULL;
+ mcpup->cpu_q_base_pa = NULL;
+ mcpup->cpu_q_size = 0;
+ }
+
+ if (mcpup->dev_q_va) {
+ dev_q_size = dev_q_entries * INTR_REPORT_SIZE;
+ contig_mem_free(mcpup->dev_q_va, dev_q_size);
+ mcpup->dev_q_va = NULL;
+ mcpup->dev_q_base_pa = NULL;
+ mcpup->dev_q_size = 0;
+ }
+
+ if (mcpup->cpu_rq_va) {
+ cpu_rq_size = cpu_rq_entries * Q_ENTRY_SIZE;
+ contig_mem_free(mcpup->cpu_rq_va, 2 * cpu_rq_size);
+ mcpup->cpu_rq_va = NULL;
+ mcpup->cpu_rq_base_pa = NULL;
+ mcpup->cpu_rq_size = 0;
+ }
+
+ if (mcpup->cpu_nrq_va) {
+ cpu_nrq_size = cpu_nrq_entries * Q_ENTRY_SIZE;
+ contig_mem_free(mcpup->cpu_nrq_va, 2 * cpu_nrq_size);
+ mcpup->cpu_nrq_va = NULL;
+ mcpup->cpu_nrq_base_pa = NULL;
+ mcpup->cpu_nrq_size = 0;
+ }
}
diff --git a/usr/src/uts/sun4v/os/lpad.c b/usr/src/uts/sun4v/os/lpad.c
new file mode 100644
index 0000000000..a2c22badde
--- /dev/null
+++ b/usr/src/uts/sun4v/os/lpad.c
@@ -0,0 +1,231 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/machsystm.h>
+#include <sys/machparam.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/note.h>
+#include <sys/hypervisor_api.h>
+#include <sys/lpad.h>
+
+typedef struct {
+ uint64_t inuse;
+ uint64_t buf[LPAD_SIZE / sizeof (uint64_t)];
+} lpad_t;
+
+/*
+ * A global pool of landing pad memory. Currently, CPUs are only
+ * brought into the system one at a time, so the pool is only a
+ * single landing pad. In the future, it may be desirable to bring
+ * CPUs into the systems in parallel. At that time, the size of
+ * the pool can be increased by changing the pool size constant.
+ */
+#define LPAD_POOL_SIZE 1
+
+static lpad_t lpad_pool[LPAD_POOL_SIZE];
+
+#ifdef DEBUG
+static int lpad_dbg = 0;
+
+#define LPAD_DBG if (lpad_dbg) printf
+#define LPAD_DUMP_DATA lpad_dump_data
+
+static void lpad_dump_data(uint64_t *lpd_start, uint64_t *lpd_end);
+
+#else /* DEBUG */
+
+#define LPAD_DBG _NOTE(CONSTCOND) if (0) printf
+#define LPAD_DUMP_DATA
+#endif /* DEBUG */
+
+extern void mach_cpu_startup(uint64_t rabase, uint64_t memsize);
+extern void mach_cpu_startup_end(void);
+extern int promif_in_cif(void);
+
+static lpad_t *lpad_alloc(void);
+
+uint64_t *
+lpad_setup(int cpuid, uint64_t pc, uint64_t arg)
+{
+ lpad_t *lpp;
+ uint64_t textsz;
+ uint64_t datasz;
+ lpad_data_t *lpd;
+ lpad_map_t *lpm;
+
+ /* external parameters */
+ extern caddr_t textva;
+ extern caddr_t datava;
+ extern tte_t ktext_tte;
+ extern tte_t kdata_tte;
+ extern caddr_t mmu_fault_status_area;
+
+ LPAD_DBG("lpad_setup...\n");
+
+ if ((cpuid < 0) || (cpuid > NCPU)) {
+ cmn_err(CE_PANIC, "lpad_setup: invalid cpuid");
+ }
+
+ /* allocate our landing pad */
+ if ((lpp = lpad_alloc()) == NULL) {
+ cmn_err(CE_PANIC, "lpad_setup: unable to allocate lpad");
+ }
+
+ /* calculate the size of our text */
+ textsz = (uint64_t)mach_cpu_startup_end - (uint64_t)mach_cpu_startup;
+
+ LPAD_DBG("lpad textsz=%ld\n", textsz);
+
+ ASSERT(textsz <= LPAD_TEXT_SIZE);
+
+ /* copy over text section */
+ bcopy((void *)mach_cpu_startup, lpp->buf, textsz);
+
+ lpd = (lpad_data_t *)(((caddr_t)lpp->buf) + LPAD_TEXT_SIZE);
+ lpm = (lpad_map_t *)lpd->map;
+
+ ASSERT(mmu_fault_status_area);
+
+ bzero(lpd, LPAD_TEXT_SIZE);
+ lpd->magic = LPAD_MAGIC_VAL;
+ lpd->inuse = &(lpp->inuse);
+ lpd->mmfsa_ra = va_to_pa(mmu_fault_status_area) + (MMFSA_SIZE * cpuid);
+ lpd->pc = pc;
+ lpd->arg = arg;
+
+ /*
+ * List of mappings:
+ *
+ * - permanent inst/data mapping for kernel text
+ * - permanent data mapping for kernel data
+ * - non-permanent inst mapping for kernel data,
+ * required for landing pad text
+ */
+ lpd->nmap = 3;
+
+ /* verify the lpad has enough room for the data */
+ datasz = sizeof (lpad_data_t);
+ datasz += (lpd->nmap - 1) * sizeof (lpad_map_t);
+
+ ASSERT(datasz <= LPAD_DATA_SIZE);
+
+ /*
+ * Kernel Text Mapping
+ */
+ lpm->va = (uint64_t)textva;
+ lpm->tte = ktext_tte;
+ lpm->flag_mmuflags = (MAP_ITLB | MAP_DTLB);
+ lpm->flag_perm = 1;
+ lpm++;
+
+ /*
+ * Kernel Data Mapping
+ */
+ lpm->va = (uint64_t)datava;
+ lpm->tte = kdata_tte;
+ lpm->flag_mmuflags = MAP_DTLB;
+ lpm->flag_perm = 1;
+ lpm++;
+
+ /*
+ * Landing Pad Text Mapping
+ *
+ * Because this mapping should not be permanent,
+ * the permanent mapping above cannot be used.
+ */
+ lpm->va = (uint64_t)datava;
+ lpm->tte = kdata_tte;
+ lpm->flag_mmuflags = MAP_ITLB;
+ lpm->flag_perm = 0;
+ lpm++;
+
+ ASSERT(((uint64_t)lpm - (uint64_t)lpd) == datasz);
+
+ LPAD_DBG("copied %ld bytes of data into lpad\n", datasz);
+
+ LPAD_DUMP_DATA((uint64_t *)lpd, (uint64_t *)lpm);
+
+ return (lpp->buf);
+}
+
+static lpad_t *
+lpad_alloc(void)
+{
+ int idx;
+
+ /*
+ * No locking is required for the global lpad pool since
+ * it should only be accessed while in the CIF which is
+ * single threaded. If this assumption changes, locking
+ * would be required.
+ */
+ ASSERT(promif_in_cif());
+
+ /*
+ * Wait until an lpad buffer becomes available.
+ */
+ for (;;) {
+ LPAD_DBG("checking lpad pool:\n");
+
+ /* walk the lpad buffer array */
+ for (idx = 0; idx < LPAD_POOL_SIZE; idx++) {
+
+ LPAD_DBG("\tchecking lpad_pool[%d]\n", idx);
+
+ if (lpad_pool[idx].inuse == 0) {
+ LPAD_DBG("found empty lpad (%d)\n", idx);
+
+ /* mark the buffer as busy */
+ lpad_pool[idx].inuse = 1;
+
+ return (&lpad_pool[idx]);
+ }
+ }
+ }
+}
+
+#ifdef DEBUG
+static void
+lpad_dump_data(uint64_t *lpd_start, uint64_t *lpd_end)
+{
+ uint64_t *lp;
+ uint_t offset = 0;
+
+ if (lpad_dbg == 0)
+ return;
+
+ printf("lpad data:\n");
+
+ for (lp = lpd_start; lp < lpd_end; lp++) {
+ printf("\t0x%02x 0x%016lx\n", offset, *lp);
+ offset += sizeof (uint64_t);
+ }
+}
+#endif /* DEBUG */
diff --git a/usr/src/uts/sun4v/os/mach_cpu_states.c b/usr/src/uts/sun4v/os/mach_cpu_states.c
index f43356ac1b..a045ea00e2 100644
--- a/usr/src/uts/sun4v/os/mach_cpu_states.c
+++ b/usr/src/uts/sun4v/os/mach_cpu_states.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -49,6 +48,8 @@
#include <sys/dtrace.h>
#include <sys/xc_impl.h>
#include <sys/callb.h>
+#include <sys/mdesc.h>
+#include <sys/mach_descrip.h>
/*
* hvdump_buf_va is a pointer to the currently-configured hvdump_buf.
@@ -438,6 +439,12 @@ ptl1_panic_handler(ptl1_state_t *pstate)
"CPU ECC error loop", /* PTL1_BAD_ECC */
"unexpected error from hypervisor call", /* PTL1_BAD_HCALL */
"unexpected global level(%gl)", /* PTL1_BAD_GL */
+ "Watchdog Reset", /* PTL1_BAD_WATCHDOG */
+ "unexpected RED mode trap", /* PTL1_BAD_RED */
+ "return value EINVAL from hcall: "\
+ "UNMAP_PERM_ADDR", /* PTL1_BAD_HCALL_UNMAP_PERM_EINVAL */
+ "return value ENOMAP from hcall: "\
+ "UNMAP_PERM_ADDR", /* PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP */
};
uint_t reason = pstate->ptl1_regs.ptl1_gregs[0].ptl1_g1;
@@ -559,7 +566,45 @@ getintprop(pnode_t node, char *name, int deflt)
void
cpu_init_tick_freq(void)
{
- sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
+ md_t *mdp;
+ mde_cookie_t rootnode;
+ int listsz;
+ mde_cookie_t *listp = NULL;
+ int num_nodes;
+ uint64_t stick_prop;
+
+ if (broken_md_flag) {
+ sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
+ return;
+ }
+
+ if ((mdp = md_get_handle()) == NULL)
+ panic("stick_frequency property not found in MD");
+
+ rootnode = md_root_node(mdp);
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ num_nodes = md_node_count(mdp);
+
+ ASSERT(num_nodes > 0);
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = (mde_cookie_t *)prom_alloc((caddr_t)0, listsz, 0);
+
+ if (listp == NULL)
+ panic("cannot allocate list for MD properties");
+
+ num_nodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "platform"),
+ md_find_name(mdp, "fwd"), listp);
+
+ ASSERT(num_nodes == 1);
+
+ if (md_get_prop_val(mdp, *listp, "stick-frequency", &stick_prop) != 0)
+ panic("stick_frequency property not found in MD");
+
+ sys_tick_freq = stick_prop;
+
+ prom_free((caddr_t)listp, listsz);
+ (void) md_fini_handle(mdp);
}
int shipit(int n, uint64_t cpu_list_ra);
diff --git a/usr/src/uts/sun4v/os/mach_descrip.c b/usr/src/uts/sun4v/os/mach_descrip.c
index d603a1c06e..fe4b9f3724 100644
--- a/usr/src/uts/sun4v/os/mach_descrip.c
+++ b/usr/src/uts/sun4v/os/mach_descrip.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,13 +18,33 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Kernel Machine Description (MD)
+ *
+ * The Kernel maintains a global copy of the machine description for
+ * the system. This is for use by all kernel subsystems and is exported
+ * to user applications through the the 'mdesc' device driver. It is
+ * initially copied in from the Hypervisor at boot time, but can be
+ * updated dynamically on demand. The Kernel provides an interface
+ * for consumers to obtain a handle to the global MD. Consumers of the
+ * MD must use the specified interfaces. An update interface is provided
+ * for platform services to intiate an MD update on notification by a
+ * service entity.
+ *
+ * Locks
+ * The current global MD is protected by the curr_mach_descrip_lock.
+ * Each Machine description has a lock to synchornize its ref count.
+ * The Obsolete MD list is protected by the obs_list_lock.
+ */
+
#include <sys/machsystm.h>
#include <sys/vm.h>
#include <sys/cpu.h>
@@ -37,32 +56,93 @@
#include <sys/error.h>
#include <sys/hypervisor_api.h>
#include <sys/types.h>
-#include <sys/kstat.h>
-#ifdef MACH_DESC_DEBUG
-#include <sys/promif.h> /* for prom_printf */
-#endif
#include <sys/sysmacros.h>
+#include <sys/mdesc.h>
+#include <sys/mdesc_impl.h>
#include <sys/mach_descrip.h>
+#include <sys/prom_plat.h>
+#include <sys/bootconf.h>
+#include <sys/promif.h>
+
+
+static void *mach_descrip_strt_meta_alloc(size_t size);
+static void mach_descrip_strt_meta_free(void *buf, size_t size);
+static void *mach_descrip_strt_buf_alloc(size_t size, size_t align);
+static void mach_descrip_strt_buf_free(void *buf, size_t size);
+static void *mach_descrip_buf_alloc(size_t size, size_t align);
+static void *mach_descrip_meta_alloc(size_t size);
+static uint64_t mach_descrip_find_md_gen(caddr_t ptr);
+static void init_md_params(void);
+static void init_domaining_enabled(md_t *mdp, mde_cookie_t *listp);
+
+extern struct bootops *bootops;
+
+/*
+ * Global ptr of the current generation Machine Description
+ */
+static machine_descrip_t *curr_mach_descrip;
/*
- * Basic code to pull in the machine description from the Hypervisor
- * An equivalent to this should really be available from mlsetup
- * for really early info, but for the time being we are content to
- * invoke this from startup_end once the VM system has been initialised.
- * To do this we use the intrq allocator which means that
- * this function should be called after intrq_init();
- * We try and do this early enough however that it is useful to other
- * components within the kernel.
- * Also, user-level entities can grab the machine description via
- * kstat and/or the mdesc device driver.
+ * Initialized by machine_descrip_startup_init in startup.
+ * machine_descript_init will reintialize the structure with
+ * the vmem allocators once the vmem is available in the boot up
+ * process.
*/
+static machine_descrip_memops_t *curr_mach_descrip_memops = NULL;
+static machine_descrip_memops_t startup_memops = {
+ mach_descrip_strt_buf_alloc,
+ mach_descrip_strt_buf_free,
+ mach_descrip_strt_meta_alloc,
+ mach_descrip_strt_meta_free,
+};
-machine_descrip_t machine_descrip;
+static machine_descrip_memops_t mach_descrip_memops = {
+ mach_descrip_buf_alloc,
+ contig_mem_free,
+ mach_descrip_meta_alloc,
+ kmem_free,
+};
+static kmutex_t curr_mach_descrip_lock;
+/*
+ * List of obsolete Machine Descriptions
+ * Machine descriptions that have users are put on this list
+ * and freed after the last user has called md_fini_handle.
+ */
+static machine_descrip_t *obs_machine_descrip_list;
-#ifdef MACH_DESC_DEBUG
-#define MDP(ARGS) prom_printf ARGS
+static kmutex_t obs_list_lock;
+
+static const char alloc_fail_msg[] =
+ "MD: cannot allocate MD buffer of size %ld bytes\n";
+
+/*
+ * Global flag that indicates whether domaining features are
+ * available. The value is set at boot time based on the value
+ * of the 'domaining-enabled' property in the MD and the global
+ * override flag below. Updates to this variable after boot are
+ * not supported.
+ */
+uint_t domaining_enabled;
+
+/*
+ * Global override for the 'domaining_enabled' flag. If this
+ * flag is set in /etc/system, domaining features are disabled,
+ * ignoring the value of the 'domaining-enabled' property in
+ * the MD.
+ */
+uint_t force_domaining_disabled;
+
+#define HAS_GEN(x) (x != MDESC_INVAL_GEN)
+
+#ifdef DEBUG
+static int mach_descrip_debug = 0;
+
+#define MDP(ARGS) if (mach_descrip_debug) prom_printf ARGS
+#define PRINT_LIST() if (mach_descrip_debug) print_obs_list()
+
+#ifdef MACH_DESC_DEBUG
static void
dump_buf(uint8_t *bufp, int size)
{
@@ -75,74 +155,711 @@ dump_buf(uint8_t *bufp, int size)
prom_printf("\n");
}
}
+#endif /* MACH_DESC_DEBUG */
+
+static void
+print_obs_list(void)
+{
+ machine_descrip_t *lmdescp;
+ mutex_enter(&obs_list_lock);
+
+ lmdescp = obs_machine_descrip_list;
+ prom_printf("MD_obs_list->");
+ while (lmdescp != NULL) {
+ prom_printf("g:%ld,r:%d", lmdescp->gen, lmdescp->refcnt);
+
+ lmdescp = lmdescp->next;
+ prom_printf("->");
+ }
+ prom_printf("NULL\n");
+ mutex_exit(&obs_list_lock);
+}
+
#else
-#define MDP(x)
-#endif
+#define MDP(ARGS)
+#define PRINT_LIST()
+#endif /* DEBUG */
+/*
+ * MD obsolete list managment functions
+ */
+static machine_descrip_t *
+md_obs_list_look_up_by_gen(uint64_t gen)
+{
+ machine_descrip_t *mdescp;
+ mutex_enter(&obs_list_lock);
+ mdescp = obs_machine_descrip_list;
+ while (mdescp != NULL) {
+ if (mdescp->gen == gen) {
+ mutex_exit(&obs_list_lock);
+ return (mdescp);
+ }
+ mdescp = mdescp->next;
+ }
+ mutex_exit(&obs_list_lock);
+ return (mdescp);
+}
-void
-mach_descrip_init(void)
+static void
+md_obs_list_remove(machine_descrip_t *mdescp)
+{
+ machine_descrip_t *lmdescp;
+
+ mutex_enter(&obs_list_lock);
+
+ lmdescp = obs_machine_descrip_list;
+
+ if (obs_machine_descrip_list == mdescp) {
+ obs_machine_descrip_list = mdescp->next;
+ } else {
+ while (lmdescp != NULL) {
+ if (lmdescp->next == mdescp) {
+ lmdescp->next = mdescp->next;
+ mdescp->next = NULL;
+ break;
+ }
+ lmdescp = lmdescp->next;
+ }
+ }
+ mutex_exit(&obs_list_lock);
+ PRINT_LIST();
+}
+
+static void
+md_obs_list_add(machine_descrip_t *mdescp)
+{
+ mutex_enter(&obs_list_lock);
+
+ mdescp->next = obs_machine_descrip_list;
+ obs_machine_descrip_list = mdescp;
+
+ mutex_exit(&obs_list_lock);
+ PRINT_LIST();
+}
+
+/*
+ * Allocate a machine_descrip meta structure and intitialize it.
+ */
+static machine_descrip_t *
+new_mach_descrip(void)
+{
+ machine_descrip_t *mdescp;
+
+ mdescp = (machine_descrip_t *)(*curr_mach_descrip_memops->meta_allocp)
+ (sizeof (machine_descrip_t));
+ if (mdescp != NULL) {
+ bzero(mdescp, sizeof (*mdescp));
+ mdescp->memops = curr_mach_descrip_memops;
+ mutex_init(&mdescp->lock, NULL, MUTEX_DRIVER, NULL);
+ }
+
+ return (mdescp);
+}
+
+/*
+ * Free a machine_descrip meta structure and intitialize it.
+ * Also free the MD buffer.
+ */
+static void
+destroy_machine_descrip(machine_descrip_t *mdescp)
{
- uint64_t md_size, ret;
+ machine_descrip_memops_t *mdesc_memopsp;
+
+ ASSERT((mdescp != NULL));
+
+ mdesc_memopsp = mdescp->memops;
+ if (mdescp->memops == NULL)
+ panic("destroy_machine_descrip: memops NULL\n");
+
+ (*mdesc_memopsp->buf_freep)(mdescp->va, mdescp->space);
+ mutex_destroy(&mdescp->lock);
+ (*mdesc_memopsp->meta_freep)(mdescp, sizeof (*mdescp));
+}
+
+/*
+ * Call into the Hypervisor to retrieve the most recent copy of the
+ * machine description. If references to the current MD are active
+ * stow it in the obsolete MD list and update the current MD reference
+ * with the new one.
+ * The obsolete list contains one MD per generation. If the firmware
+ * doesn't support MD generation fail the call.
+ */
+int
+mach_descrip_update(void)
+{
+ uint64_t md_size0, md_size;
+ uint64_t md_space = 0;
+ uint64_t hvret;
+ caddr_t tbuf = NULL;
+ uint64_t tbuf_pa;
+ uint64_t tgen;
+ int ret = 0;
MDP(("MD: Requesting buffer size\n"));
- md_size = 0LL;
- (void) hv_mach_desc((uint64_t)0, &md_size);
- MDP(("MD: buffer size is %d\n", md_size));
+ ASSERT((curr_mach_descrip != NULL));
+
+ mutex_enter(&curr_mach_descrip_lock);
/*
- * Align allocated space to nearest page contig_mem_alloc_align
- * requires a Power of 2 alignment
+ * If the required MD size changes between our first call
+ * to hv_mach_desc (to find the required buf size) and the
+ * second call (to get the actual MD), the MD was in the
+ * process of being updated. Loop until the two sizes are
+ * identical.
*/
- machine_descrip.space = P2ROUNDUP(md_size, PAGESIZE);
- MDP(("MD: allocated space is %d\n", machine_descrip.space));
- machine_descrip.va = contig_mem_alloc_align(machine_descrip.space,
- PAGESIZE);
- if (machine_descrip.va == NULL)
- cmn_err(CE_PANIC, "Allocation for machine description failed");
+ do {
+ if (tbuf != NULL)
+ (*curr_mach_descrip_memops->buf_freep)(tbuf, md_space);
- MDP(("MD: allocated va = 0x%p (size 0x%llx)\n",
- machine_descrip.va, machine_descrip.space));
+ md_size0 = 0LL;
+ (void) hv_mach_desc((uint64_t)0, &md_size0);
+ MDP(("MD: buffer size is %ld\n", md_size0));
- machine_descrip.pa = va_to_pa(machine_descrip.va);
+ /*
+ * Align allocated space to nearest page.
+ * contig_mem_alloc_align() requires a power of 2 alignment.
+ */
+ md_space = P2ROUNDUP(md_size0, PAGESIZE);
+ MDP(("MD: allocated space is %ld\n", md_space));
- MDP(("MD: allocated pa = 0x%llx\n", machine_descrip.pa));
+ tbuf = (caddr_t)(*curr_mach_descrip_memops->buf_allocp)
+ (md_space, PAGESIZE);
+ if (tbuf == NULL) {
+ ret = -1;
+ goto done;
+ }
- ret = hv_mach_desc(machine_descrip.pa, &md_size);
- MDP(("MD: HV return code = %ld\n", ret));
+ tbuf_pa = va_to_pa(tbuf);
+ hvret = hv_mach_desc(tbuf_pa, &md_size);
+ MDP(("MD: HV return code = %ld\n", hvret));
- if (ret != H_EOK) {
- MDP(("MD: Failed with code %ld from HV\n", ret));
+ /*
+ * We get H_EINVAL if our buffer size is too small. In
+ * that case stay in the loop, reallocate the buffer
+ * and try again.
+ */
+ if (hvret != H_EOK && hvret != H_EINVAL) {
+ MDP(("MD: Failed with code %ld from HV\n", hvret));
+ ret = -1;
+ goto done;
+ }
- machine_descrip.size = 0;
+ } while (md_size0 != md_size || hvret == H_EINVAL);
- } else {
- MDP(("MD: Grabbed %d bytes from HV\n", md_size));
-#ifdef MACH_DESC_DEBUG
- dump_buf((uint8_t *)machine_descrip.va, md_size);
-#endif /* MACH_DESC_DEBUG */
+ tgen = mach_descrip_find_md_gen(tbuf);
+
+#ifdef DEBUG
+ if (!HAS_GEN(tgen)) {
+ MDP(("MD: generation number not found\n"));
+ } else
+ MDP(("MD: generation number %ld\n", tgen));
+#endif /* DEBUG */
- machine_descrip.size = md_size;
+ if (curr_mach_descrip->va != NULL) {
+ /* check for the same generation number */
+ if (HAS_GEN(tgen) && ((curr_mach_descrip->gen == tgen) &&
+ (curr_mach_descrip->size == md_size))) {
+#ifdef DEBUG
/*
- * Allocate the kstat to get at the data
+ * Pedantic Check for generation number. If the
+ * generation number is the same, make sure the
+ * MDs are really identical.
*/
- machine_descrip.ksp = kstat_create("unix", 0, "machdesc",
- "misc",
- KSTAT_TYPE_RAW,
- (uint_t)machine_descrip.size,
- KSTAT_FLAG_VIRTUAL);
-
- if (machine_descrip.ksp == NULL) {
- cmn_err(CE_PANIC,
- "Failed to create kstat for machine description");
+ if (bcmp(curr_mach_descrip->va, tbuf, md_size) != 0) {
+ cmn_err(CE_WARN, "machine_descrip_update: MDs "
+ "with the same generation (%ld) are not "
+ "identical", tgen);
+ ret = -1;
+ goto done;
+ }
+#endif
+ cmn_err(CE_WARN, "machine_descrip_update: new MD has "
+ "the same generation (%ld) as the old MD", tgen);
+ ret = 0;
+ goto done;
+ }
+
+ /* check for generations moving backwards */
+ if (HAS_GEN(tgen) && HAS_GEN(curr_mach_descrip->gen) &&
+ (curr_mach_descrip->gen > tgen)) {
+ cmn_err(CE_WARN, "machine_descrip_update: new MD"
+ " older generation (%ld) than current MD (%ld)",
+ tgen, curr_mach_descrip->gen);
+ ret = -1;
+ goto done;
+ }
+
+ if (curr_mach_descrip->refcnt == 0) {
+
+ MDP(("MD: freeing old md buffer gen %ld\n",
+ curr_mach_descrip->gen));
+
+ /* Free old space */
+ ASSERT(curr_mach_descrip->space > 0);
+
+ (*curr_mach_descrip_memops->buf_freep)
+ (curr_mach_descrip->va, curr_mach_descrip->space);
} else {
- machine_descrip.ksp->ks_data = machine_descrip.va;
- kstat_install(machine_descrip.ksp);
+ if (!HAS_GEN(tgen)) {
+ /*
+ * No update support if FW
+ * doesn't have MD generation id
+ * feature.
+ */
+ prom_printf("WARNING: F/W does not support MD "
+ "generation count, MD update failed\n");
+ ret = -1;
+ goto done;
+ }
+
+ MDP(("MD: adding to obs list %ld\n",
+ curr_mach_descrip->gen));
+
+ md_obs_list_add(curr_mach_descrip);
+
+ curr_mach_descrip = new_mach_descrip();
+
+ if (curr_mach_descrip == NULL) {
+ panic("Allocation for machine description"
+ " failed\n");
+ }
+ }
+ }
+
+ curr_mach_descrip->va = tbuf;
+ curr_mach_descrip->gen = tgen;
+ curr_mach_descrip->size = md_size;
+ curr_mach_descrip->space = md_space;
+
+#ifdef MACH_DESC_DEBUG
+ dump_buf((uint8_t *)curr_mach_descrip->va, md_size);
+#endif /* MACH_DESC_DEBUG */
+
+ mutex_exit(&curr_mach_descrip_lock);
+ return (ret);
+
+done:
+ if (tbuf != NULL)
+ (*curr_mach_descrip_memops->buf_freep)(tbuf, md_space);
+ mutex_exit(&curr_mach_descrip_lock);
+ return (ret);
+}
+
+static void *
+mach_descrip_buf_alloc(size_t size, size_t align)
+{
+ void *p;
+
+ if ((p = contig_mem_alloc_align(size, align)) == NULL)
+ cmn_err(CE_WARN, alloc_fail_msg, size);
+
+ return (p);
+}
+
+static void *
+mach_descrip_strt_meta_alloc(size_t size)
+{
+ return (BOP_ALLOC(bootops, (caddr_t)0, size, PAGESIZE));
+}
+
+static void
+mach_descrip_strt_meta_free(void *buf, size_t size)
+{
+ BOP_FREE(bootops, buf, size);
+}
+
+static void *
+mach_descrip_strt_buf_alloc(size_t size, size_t align)
+{
+ void *p = prom_alloc((caddr_t)0, size, align);
+
+ if (p == NULL)
+ prom_printf(alloc_fail_msg, size);
+
+ return (p);
+}
+
+static void
+mach_descrip_strt_buf_free(void *buf, size_t size)
+{
+ prom_free((caddr_t)buf, size);
+}
+
+static void *
+mach_descrip_meta_alloc(size_t size)
+{
+ return (kmem_alloc(size, KM_SLEEP));
+}
+
+/*
+ * Initialize the kernel's Machine Description(MD) framework
+ * early on in startup during mlsetup() so consumers
+ * can get to the MD before the VM system has been initialized.
+ *
+ * Also get the most recent version of the MD.
+ */
+void
+mach_descrip_startup_init(void)
+{
+
+ mutex_init(&curr_mach_descrip_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&obs_list_lock, NULL, MUTEX_DRIVER, NULL);
+
+ obs_machine_descrip_list = NULL;
+
+ curr_mach_descrip_memops = &startup_memops;
+
+ curr_mach_descrip = new_mach_descrip();
+ if (curr_mach_descrip == NULL)
+ panic("Allocation for machine description failed\n");
+
+ if (mach_descrip_update())
+ panic("Machine description initialization failed\n");
+
+}
+
+/*
+ * Counterpart to the above init function. Free up resources
+ * allocated at startup by mach_descrip_startup_setup().
+ * And reset machine description framework state.
+ *
+ * All consumers must have fini'ed their handles at this point.
+ */
+void
+mach_descrip_startup_fini(void)
+{
+
+ ASSERT((curr_mach_descrip != NULL));
+ ASSERT((curr_mach_descrip->refcnt == 0));
+ ASSERT((obs_machine_descrip_list == NULL));
+
+ destroy_machine_descrip(curr_mach_descrip);
+ curr_mach_descrip = NULL;
+ curr_mach_descrip_memops = NULL;
+}
+
+/*
+ * Initialize the kernel's Machine Description(MD) framework
+ * after the the VM system has been initialized.
+ *
+ * Also get the most recent version of the MD.
+ * Assumes that the machine description frame work is in a clean
+ * state and the machine description intialized during startup
+ * has been cleaned up and resources deallocated.
+ */
+void
+mach_descrip_init(void)
+{
+ ASSERT((curr_mach_descrip == NULL &&
+ curr_mach_descrip_memops == NULL));
+
+ curr_mach_descrip_memops = &mach_descrip_memops;
+
+ curr_mach_descrip = new_mach_descrip();
+ if (curr_mach_descrip == NULL)
+ panic("Allocation for machine description failed\n");
+
+ if (mach_descrip_update())
+ panic("Machine description intialization failed\n");
+
+ /* read in global params */
+ init_md_params();
+}
+
+/*
+ * Client interface to get a handle to the current MD.
+ * The md_fini_handle() interface should be used to
+ * clean up the refernce to the MD returned by this function.
+ */
+md_t *
+md_get_handle(void)
+{
+ md_t *mdp;
+
+ mutex_enter(&curr_mach_descrip_lock);
+
+ if (curr_mach_descrip == NULL) {
+ return (NULL);
+ }
+
+ curr_mach_descrip->refcnt++;
+ mdp = md_init_intern(curr_mach_descrip->va,
+ curr_mach_descrip->memops->meta_allocp,
+ curr_mach_descrip->memops->meta_freep);
+
+ mutex_exit(&curr_mach_descrip_lock);
+
+ return (mdp);
+}
+
+/*
+ * Client interface to clean up the refernce to the MD returned
+ * by md_get_handle().
+ */
+int
+md_fini_handle(md_t *ptr)
+{
+ machine_descrip_t *mdescp;
+ md_impl_t *mdp;
+
+
+ mdp = (md_impl_t *)ptr;
+
+ if (mdp == NULL)
+ return (-1);
+ /*
+ * Check if mdp is current MD gen
+ */
+ mutex_enter(&curr_mach_descrip_lock);
+
+ if (curr_mach_descrip->gen == mdp->gen) {
+ curr_mach_descrip->refcnt--;
+ mutex_exit(&curr_mach_descrip_lock);
+ goto fini;
+ }
+ mutex_exit(&curr_mach_descrip_lock);
+
+ /*
+ * MD is in the obsolete list
+ */
+ mdescp = md_obs_list_look_up_by_gen(mdp->gen);
+ if (mdescp == NULL)
+ return (-1);
+
+ mutex_enter(&mdescp->lock);
+ mdescp->refcnt--;
+ if (mdescp->refcnt == 0) {
+ md_obs_list_remove(mdescp);
+ mutex_exit(&mdescp->lock);
+ destroy_machine_descrip(mdescp);
+ goto fini;
+ }
+ mutex_exit(&mdescp->lock);
+
+fini:
+ return (md_fini(ptr));
+}
+
+/*
+ * General purpose initialization function used to extract parameters
+ * from the MD during the boot process. This is called immediately after
+ * the in kernel copy of the MD has been initialized so that global
+ * flags are available to various subsystems as they get initialized.
+ */
+static void
+init_md_params(void)
+{
+ md_t *mdp;
+ int num_nodes;
+ mde_cookie_t *listp;
+ int listsz;
+
+ mdp = md_get_handle();
+ ASSERT(mdp);
+ num_nodes = md_node_count(mdp);
+ ASSERT(num_nodes >= 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = (mde_cookie_t *)
+ (*curr_mach_descrip_memops->meta_allocp)(listsz);
+
+ /*
+ * Import various parameters from the MD. For now,
+ * the only parameter of interest is whether or not
+ * domaining features are supported.
+ */
+ init_domaining_enabled(mdp, listp);
+
+ (*curr_mach_descrip_memops->meta_freep)(listp, listsz);
+ (void) md_fini_handle(mdp);
+}
+
+static void
+init_domaining_enabled(md_t *mdp, mde_cookie_t *listp)
+{
+ mde_cookie_t rootnode;
+ int num_nodes;
+ uint64_t val = 0;
+
+ /*
+ * If domaining has been manually disabled, always
+ * honor that and ignore the value in the MD.
+ */
+ if (force_domaining_disabled) {
+ domaining_enabled = 0;
+ MDP(("domaining manually disabled\n"));
+ return;
+ }
+
+ rootnode = md_root_node(mdp);
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ num_nodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "platform"),
+ md_find_name(mdp, "fwd"), listp);
+
+ /* should only be one platform node */
+ ASSERT(num_nodes == 1);
+
+ if (md_get_prop_val(mdp, *listp, "domaining-enabled", &val) != 0) {
+ /*
+ * The property is not present. This implies
+ * that the firmware does not support domaining
+ * features.
+ */
+ MDP(("'domaining-enabled' property not present\n"));
+
+ domaining_enabled = 0;
+ return;
+ }
+
+ domaining_enabled = val;
+
+ MDP(("domaining_enabled = 0x%x\n", domaining_enabled));
+}
+
+/*
+ * Client interface to get a pointer to the raw MD buffer
+ * Private to kernel and mdesc driver.
+ */
+caddr_t
+md_get_md_raw(md_t *ptr)
+{
+ md_impl_t *mdp;
+
+ mdp = (md_impl_t *)ptr;
+ if (mdp == NULL)
+ return (NULL);
+ return (mdp->caddr);
+}
+
+/*
+ * This is called before an MD structure is intialized, so
+ * it walks the raw MD looking for the generation property.
+ */
+static uint64_t
+mach_descrip_find_md_gen(caddr_t ptr)
+{
+ md_header_t *hdrp;
+ md_element_t *mdep;
+ md_element_t *rootnode = NULL;
+ md_element_t *elem = NULL;
+ char *namep;
+ boolean_t done;
+ int idx;
+
+ hdrp = (md_header_t *)ptr;
+ mdep = (md_element_t *)(ptr + MD_HEADER_SIZE);
+ namep = (char *)(ptr + MD_HEADER_SIZE + hdrp->node_blk_sz);
+
+ /*
+ * Very basic check for alignment to avoid
+ * bus error issues.
+ */
+ if ((((uint64_t)ptr) & 7) != 0)
+ return (MDESC_INVAL_GEN);
+
+ if (mdtoh32(hdrp->transport_version) != MD_TRANSPORT_VERSION) {
+ return (MDESC_INVAL_GEN);
+ }
+
+ /*
+ * Search for the root node. Perform the walk manually
+ * since the MD structure is not set up yet.
+ */
+ for (idx = 0, done = B_FALSE; done == B_FALSE; ) {
+
+ md_element_t *np = &(mdep[idx]);
+
+ switch (MDE_TAG(np)) {
+ case MDET_LIST_END:
+ done = B_TRUE;
+ break;
+
+ case MDET_NODE:
+ if (strcmp(namep + MDE_NAME(np), "root") == 0) {
+ /* found root node */
+ rootnode = np;
+ done = B_TRUE;
+ break;
+ }
+ idx = MDE_PROP_INDEX(np);
+ break;
+
+ default:
+ /* ignore */
+ idx++;
}
}
+
+ if (rootnode == NULL) {
+ /* root not found */
+ return (MDESC_INVAL_GEN);
+ }
+
+ /* search the rootnode for the generation property */
+ for (elem = (rootnode + 1); MDE_TAG(elem) != MDET_NODE_END; elem++) {
+
+ char *prop_name;
+
+ /* generation field is a prop_val */
+ if (MDE_TAG(elem) != MDET_PROP_VAL)
+ continue;
+
+ prop_name = namep + MDE_NAME(elem);
+
+ if (strcmp(prop_name, "md-generation#") == 0) {
+ return (MDE_PROP_VALUE(elem));
+ }
+ }
+
+ return (MDESC_INVAL_GEN);
+}
+
+/*
+ * Failed to allocate the list : Return value -1
+ * md_scan_dag API failed : Return the result from md_scan_dag API
+ */
+int
+md_alloc_scan_dag(md_t *ptr,
+ mde_cookie_t startnode,
+ char *node_name,
+ char *dag,
+ mde_cookie_t **list)
+{
+ int res;
+ md_impl_t *mdp = (md_impl_t *)ptr;
+
+ *list = (mde_cookie_t *)mdp->allocp(sizeof (mde_cookie_t) *
+ mdp->node_count);
+ if (*list == NULL)
+ return (-1);
+
+ res = md_scan_dag(ptr, startnode,
+ md_find_name(ptr, node_name),
+ md_find_name(ptr, dag), *list);
+
+ /*
+ * If md_scan_dag API returned 0 or -1 then free the buffer
+ * and return -1 to indicate the error from this API.
+ */
+ if (res < 1) {
+ md_free_scan_dag(ptr, list);
+ *list = NULL;
+ }
+
+ return (res);
+}
+
+void
+md_free_scan_dag(md_t *ptr,
+ mde_cookie_t **list)
+{
+ md_impl_t *mdp = (md_impl_t *)ptr;
+
+ mdp->freep(*list, sizeof (mde_cookie_t) * mdp->node_count);
}
diff --git a/usr/src/uts/sun4v/os/mach_mp_startup.c b/usr/src/uts/sun4v/os/mach_mp_startup.c
index 25a37ecdf4..421546277a 100644
--- a/usr/src/uts/sun4v/os/mach_mp_startup.c
+++ b/usr/src/uts/sun4v/os/mach_mp_startup.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -30,6 +30,8 @@
#include <sys/cpu_module.h>
#include <sys/dtrace.h>
#include <sys/cpu_sgnblk_defs.h>
+#include <sys/mdesc.h>
+#include <sys/mach_descrip.h>
/*
* Useful for disabling MP bring-up for an MP capable kernel
@@ -87,25 +89,67 @@ init_cpu_info(struct cpu *cp)
}
}
-/* ARGSUSED */
/*
- * Routine used to cleanup a CPU that has been powered off. This will
+ * Routine used to cleanup a CPU that has been powered off. This will
* destroy all per-cpu information related to this cpu.
*/
int
mp_cpu_unconfigure(int cpuid)
{
- return (0);
+ int retval;
+ extern void empty_cpu(int);
+ extern int cleanup_cpu_common(int);
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ retval = cleanup_cpu_common(cpuid);
+
+ empty_cpu(cpuid);
+
+ return (retval);
}
-/* ARGSUSED */
+struct mp_find_cpu_arg {
+ int cpuid; /* set by mp_cpu_configure() */
+ dev_info_t *dip; /* set by mp_find_cpu() */
+};
+
int
mp_find_cpu(dev_info_t *dip, void *arg)
{
- return (0);
+ struct mp_find_cpu_arg *target = (struct mp_find_cpu_arg *)arg;
+ char *type;
+ int rv = DDI_WALK_CONTINUE;
+ int cpuid;
+
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "device_type", &type))
+ return (DDI_WALK_CONTINUE);
+
+ if (strcmp(type, "cpu") != 0)
+ goto out;
+
+ cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "reg", -1);
+
+ if (cpuid == -1) {
+ cmn_err(CE_PANIC, "reg prop not found in cpu node");
+ }
+
+ cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
+
+ if (cpuid != target->cpuid)
+ goto out;
+
+ /* Found it */
+ rv = DDI_WALK_TERMINATE;
+ target->dip = dip;
+
+out:
+ ddi_prop_free(type);
+ return (rv);
}
-/* ARGSUSED */
/*
* Routine used to setup a newly inserted CPU in preparation for starting
* it running code.
@@ -113,5 +157,68 @@ mp_find_cpu(dev_info_t *dip, void *arg)
int
mp_cpu_configure(int cpuid)
{
+ extern void fill_cpu(md_t *, mde_cookie_t);
+ extern void setup_cpu_common(int);
+ extern void setup_exec_unit_mappings(md_t *);
+ md_t *mdp;
+ mde_cookie_t rootnode, cpunode = MDE_INVAL_ELEM_COOKIE;
+ int listsz, i;
+ mde_cookie_t *listp = NULL;
+ int num_nodes;
+ uint64_t cpuid_prop;
+
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if ((mdp = md_get_handle()) == NULL)
+ return (ENODEV);
+
+ rootnode = md_root_node(mdp);
+
+ ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
+
+ num_nodes = md_node_count(mdp);
+
+ ASSERT(num_nodes > 0);
+
+ listsz = num_nodes * sizeof (mde_cookie_t);
+ listp = kmem_zalloc(listsz, KM_SLEEP);
+
+ num_nodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
+ md_find_name(mdp, "fwd"), listp);
+
+ if (num_nodes < 0)
+ return (ENODEV);
+
+ for (i = 0; i < num_nodes; i++) {
+ if (md_get_prop_val(mdp, listp[i], "id", &cpuid_prop))
+ break;
+ if (cpuid_prop == (uint64_t)cpuid) {
+ cpunode = listp[i];
+ break;
+ }
+ }
+
+ if (cpunode == MDE_INVAL_ELEM_COOKIE)
+ return (ENODEV);
+
+ kmem_free(listp, listsz);
+
+ /*
+ * Note: uses cpu_lock to protect cpunodes and ncpunodes
+ * which will be modified inside of fill_cpu and
+ * setup_exec_unit_mappings.
+ */
+ fill_cpu(mdp, cpunode);
+
+ /*
+ * Remap all the cpunodes' execunit mappings.
+ */
+ setup_exec_unit_mappings(mdp);
+
+ (void) md_fini_handle(mdp);
+
+ setup_cpu_common(cpuid);
+
return (0);
}
diff --git a/usr/src/uts/sun4v/os/mach_mp_states.c b/usr/src/uts/sun4v/os/mach_mp_states.c
index e10feb6f48..d680cec27e 100644
--- a/usr/src/uts/sun4v/os/mach_mp_states.c
+++ b/usr/src/uts/sun4v/os/mach_mp_states.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,14 +18,26 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/cpuvar.h>
#include <sys/cpu_module.h>
+#include <sys/machsystm.h>
+#include <sys/archsystm.h>
+#include <sys/prom_plat.h>
+#include <sys/hypervisor_api.h>
+#include <sys/hsvc.h>
+
+extern uint64_t xc_tick_limit;
+extern uint64_t xc_tick_jump_limit;
+
+extern void cpu_intrq_unregister_powerdown(uint64_t doneflag_va);
/*
* set_idle_cpu is called from idle() when a CPU becomes idle.
@@ -45,3 +56,224 @@ void
unset_idle_cpu(int cpun)
{
}
+
+/*
+ * Stop a CPU based on its cpuid, using the cpu_stop hypervisor call.
+ * Since this requires that the hypervisor force a remote CPU to stop,
+ * the assumption is made that this should take roughly the same amount
+ * of time as a CPU mondo. Consequently, the mondo timeout is used to
+ * determine when to give up waiting for the CPU to stop.
+ *
+ * Attempts to stop a CPU already in the stopped or error state will
+ * silently succeed. Zero is returned on success and a non-negative
+ * errno value is returned on failure.
+ */
+int
+stopcpu_bycpuid(int cpuid)
+{
+ uint64_t loop_cnt;
+ uint64_t state;
+ uint64_t rv;
+ uint64_t major = 0;
+ uint64_t minor = 0;
+ uint64_t cpu_stop_time_limit;
+ extern uint64_t xc_mondo_time_limit;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Check the state of the CPU up front to see if an
+ * attempt to stop it is even necessary.
+ */
+ if (hv_cpu_state(cpuid, &state) != H_EOK)
+ return (EINVAL);
+
+ /* treat stopped and error state the same */
+ if (state != CPU_STATE_RUNNING) {
+ /* nothing to do */
+ return (0);
+ }
+
+ /*
+ * The HV API to stop a CPU is only supported in
+ * version 1.1 and later of the core group. If an
+ * older version of the HV is in use, return not
+ * supported.
+ */
+ if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
+ return (EINVAL);
+
+ ASSERT(major != 0);
+
+ if ((major == 1) && (minor < 1))
+ return (ENOTSUP);
+
+ /* use the mondo timeout if it has been initialized */
+ cpu_stop_time_limit = xc_mondo_time_limit;
+
+ /*
+ * If called early in boot before the mondo time limit
+ * is set, use a reasonable timeout based on the the
+ * clock frequency of the current CPU.
+ */
+ if (cpu_stop_time_limit == 0)
+ cpu_stop_time_limit = cpunodes[CPU->cpu_id].clock_freq;
+
+ /* should only fail if called too early in boot */
+ ASSERT(cpu_stop_time_limit > 0);
+
+ loop_cnt = 0;
+
+ /*
+ * Attempt to stop the CPU, retrying if it is busy.
+ */
+ while (loop_cnt++ < cpu_stop_time_limit) {
+
+ if ((rv = hv_cpu_stop(cpuid)) != H_EWOULDBLOCK)
+ break;
+ }
+
+ if (loop_cnt == cpu_stop_time_limit)
+ return (ETIMEDOUT);
+
+ if (rv != H_EOK)
+ return (EINVAL);
+
+ /*
+ * Verify that the CPU has reached the stopped state.
+ */
+ while (loop_cnt++ < cpu_stop_time_limit) {
+
+ if (hv_cpu_state(cpuid, &state) != H_EOK)
+ return (EINVAL);
+
+ /* treat stopped and error state the same */
+ if (state != CPU_STATE_RUNNING)
+ break;
+ }
+
+ return ((loop_cnt == cpu_stop_time_limit) ? ETIMEDOUT : 0);
+}
+
+/*
+ * X-trap to the target to unregister its interrupt and error queues
+ * and put it in a safe place just before the CPU is stopped. After
+ * unregistering its queues, the target CPU must not return from the
+ * trap to priv or user context. Ensure that the interrupt CPU unregister
+ * succeeded.
+ */
+void
+xt_cpu_unreg_powerdown(struct cpu *cpup)
+{
+ uint8_t volatile not_done;
+ uint64_t starttick, endtick, tick, lasttick;
+ processorid_t cpuid = cpup->cpu_id;
+
+ kpreempt_disable();
+
+ /*
+ * Sun4v uses a queue for receiving mondos. Successful
+ * transmission of a mondo only indicates that the mondo
+ * has been written into the queue.
+ *
+ * Set the not_done flag to 1 before sending the cross
+ * trap and wait until the other cpu resets it to 0.
+ */
+
+ not_done = 1;
+
+ xt_one_unchecked(cpuid, (xcfunc_t *)cpu_intrq_unregister_powerdown,
+ (uint64_t)&not_done, 0);
+
+ starttick = lasttick = gettick();
+ endtick = starttick + xc_tick_limit;
+
+ while (not_done) {
+
+ tick = gettick();
+
+ /*
+ * If there is a big jump between the current tick
+ * count and lasttick, we have probably hit a break
+ * point. Adjust endtick accordingly to avoid panic.
+ */
+ if (tick > (lasttick + xc_tick_jump_limit)) {
+ endtick += (tick - lasttick);
+ }
+
+ lasttick = tick;
+ if (tick > endtick) {
+ cmn_err(CE_CONT, "Cross trap timeout at cpu id %x\n",
+ cpuid);
+ cmn_err(CE_WARN, "xt_intrq_unreg_powerdown: timeout");
+ }
+ }
+
+ kpreempt_enable();
+}
+
+int
+plat_cpu_poweroff(struct cpu *cp)
+{
+ int rv = 0;
+ int status;
+ processorid_t cpuid = cp->cpu_id;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Capture all CPUs (except for detaching proc) to prevent
+ * crosscalls to the detaching proc until it has cleared its
+ * bit in cpu_ready_set.
+ *
+ * The CPU's remain paused and the prom_mutex is known to be free.
+ * This prevents the x-trap victim from blocking when doing prom
+ * IEEE-1275 calls at a high PIL level.
+ */
+ promsafe_pause_cpus();
+
+ /*
+ * Quiesce interrupts on the target CPU. We do this by setting
+ * the CPU 'not ready'- (i.e. removing the CPU from cpu_ready_set)
+ * to prevent it from receiving cross calls and cross traps. This
+ * prevents the processor from receiving any new soft interrupts.
+ */
+ mp_cpu_quiesce(cp);
+
+ /*
+ * Send a cross trap to the cpu to unregister its interrupt
+ * error queues.
+ */
+ xt_cpu_unreg_powerdown(cp);
+
+ cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+
+ /* call into the Hypervisor to stop the CPU */
+ if ((status = stopcpu_bycpuid(cpuid)) != 0) {
+ rv = -1;
+ }
+
+ start_cpus();
+
+ if (rv != 0) {
+ cmn_err(CE_WARN, "failed to stop cpu %d (%d)", cpuid, status);
+ /* mark the CPU faulted so that it cannot be onlined */
+ cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_FAULTED;
+ }
+
+ return (rv);
+}
+
+int
+plat_cpu_poweron(struct cpu *cp)
+{
+ extern void restart_other_cpu(int);
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ cp->cpu_flags &= ~CPU_POWEROFF;
+
+ restart_other_cpu(cp->cpu_id);
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/os/mach_startup.c b/usr/src/uts/sun4v/os/mach_startup.c
index 44d199782b..e2e4c5857b 100644
--- a/usr/src/uts/sun4v/os/mach_startup.c
+++ b/usr/src/uts/sun4v/os/mach_startup.c
@@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -36,6 +37,8 @@
#include <sys/disp.h>
#include <sys/hypervisor_api.h>
#include <sys/traptrace.h>
+#include <sys/modctl.h>
+#include <sys/ldoms.h>
#ifdef TRAPTRACE
int mach_htraptrace_enable = 1;
@@ -61,7 +64,7 @@ setup_trap_table(void)
mmfsa_va =
mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
- intr_init(CPU); /* init interrupt request free list */
+ intr_init(CPU); /* init interrupt request free list */
setwstate(WSTATE_KERN);
set_mmfsa_scratchpad(mmfsa_va);
prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
@@ -427,3 +430,54 @@ mach_htraptrace_cleanup(int cpuid)
ctlp->d.hpaddr_base = NULL;
}
}
+
+/*
+ * Load any required machine class (sun4v) specific drivers.
+ */
+void
+load_mach_drivers(void)
+{
+ /*
+ * We don't want to load these LDOMs-specific
+ * modules if domaining has been disabled. Also,
+ * we must be able to run on non-LDOMs firmware.
+ */
+ if (!domaining_enabled)
+ return;
+
+ /*
+ * Load the core domain services module
+ */
+ if (modload("misc", "ds") == -1)
+ cmn_err(CE_NOTE, "!'ds' module failed to load");
+
+ /*
+ * Load the rest of the domain services
+ */
+ if (modload("misc", "fault_iso") == -1)
+ cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
+
+ if (modload("misc", "platsvc") == -1)
+ cmn_err(CE_NOTE, "!'platsvc' module failed to load");
+
+ if (modload("misc", "dr_cpu") == -1)
+ cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
+
+ /*
+ * Attempt to attach any virtual device servers. These
+ * drivers must be loaded at start of day so that they
+ * can respond to any updates to the machine description.
+ *
+ * Since it is quite likely that a domain will not support
+ * one or more of these servers, failures are ignored.
+ */
+
+ /* virtual disk server */
+ (void) i_ddi_attach_hw_nodes("vds");
+
+ /* virtual network switch */
+ (void) i_ddi_attach_hw_nodes("vsw");
+
+ /* virtual console concentrator */
+ (void) i_ddi_attach_hw_nodes("vcc");
+}
diff --git a/usr/src/uts/sun4v/platsvc/Makefile b/usr/src/uts/sun4v/platsvc/Makefile
new file mode 100644
index 0000000000..f7e729c43e
--- /dev/null
+++ b/usr/src/uts/sun4v/platsvc/Makefile
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the platsvc kernel module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = platsvc
+OBJECTS = $(PLATSVC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PLATSVC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += -v
+
+#
+# Turn on doubleword alignment for 64 bit registers
+#
+CFLAGS += -dalign
+
+#
+# Module Dependencies
+#
+LDFLAGS += -dy -Nmisc/ds
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/promif/promif_asr.c b/usr/src/uts/sun4v/promif/promif_asr.c
new file mode 100644
index 0000000000..7bc20982f0
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_asr.c
@@ -0,0 +1,75 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+
+/*
+ * The Automatic System Recovery (ASR) database present in some
+ * versions of firmware is not supported on sun4v platforms.
+ * However, there is an external interface to these prom interfaces
+ * from the openprom(7D) driver. They are not documented in the
+ * man page, but they should still be handled here, just enough
+ * so the user gets a sensible error back if they stumble onto
+ * them.
+ */
+
+int
+promif_asr_list_keys_len(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+
+ ci[3] = p1275_int2cell(-1);
+
+ return (-1);
+}
+
+int
+promif_asr_list_keys(void *p)
+{
+ _NOTE(ARGUNUSED(p))
+
+ return (-1);
+}
+
+int
+promif_asr_export_len(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+
+ ci[3] = p1275_int2cell(-1);
+
+ return (-1);
+}
+
+int
+promif_asr_export(void *p)
+{
+ _NOTE(ARGUNUSED(p))
+
+ return (-1);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_cpu.c b/usr/src/uts/sun4v/promif/promif_cpu.c
new file mode 100644
index 0000000000..fdeaa656ab
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_cpu.c
@@ -0,0 +1,122 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/machsystm.h>
+#include <sys/hypervisor_api.h>
+#include <sys/lpad.h>
+
+extern int (*prom_cif_handler)(void *);
+extern int cif_cpu_mp_ready;
+
+int
+promif_set_mmfsa_traptable(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ uint64_t rtba;
+ caddr_t tba;
+ uint64_t mmfsa_ra;
+ int rv, ret;
+
+ ASSERT(ci[1] == 2);
+
+ /*
+ * We use the same trap table for the rtba as well.
+ */
+ rtba = va_to_pa(p1275_cell2ptr(ci[3]));
+
+ /*
+ * if cif_cpu_mp_ready is not set the prom is still
+ * setting the mmfsa and trap table. Set the rtba
+ * after the prom cif call.
+ */
+ if (!cif_cpu_mp_ready) {
+ ret = (*prom_cif_handler)(p);
+ if ((rv = hv_cpu_set_rtba(&rtba)) != H_EOK)
+ panic("hv_cpu_set_rtba failed: %d\n", rv);
+ return (ret);
+ }
+
+ tba = p1275_cell2ptr(ci[3]);
+ mmfsa_ra = (uint64_t)p1275_cell2ptr(ci[4]);
+
+ if (tba != (caddr_t)KERNELBASE)
+ return (-1);
+
+ (void) set_tba(tba);
+
+ if ((rv = hv_mmu_fault_area_conf(&mmfsa_ra)) != H_EOK) {
+ panic("hv_mmu_fault_area_conf failed: %d\n", rv);
+ }
+
+ if ((rv = hv_cpu_set_rtba(&rtba)) != H_EOK) {
+ panic("hv_cpu_set_rtba failed: %d\n", rv);
+ }
+
+ return (0);
+}
+
+int
+promif_start_cpu(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ int cpuid;
+ caddr_t pc;
+ int arg;
+ uint64_t rtba = 0;
+ int rv;
+ uint64_t *lpp;
+
+ ASSERT(ci[1] == 3);
+
+ cpuid = p1275_cell2int(ci[3]);
+ pc = p1275_cell2ptr(ci[4]);
+ arg = p1275_cell2int(ci[5]);
+
+ if (!cif_cpu_mp_ready)
+ return ((*prom_cif_handler)(p));
+
+ rtba = va_to_pa(&trap_table);
+
+ lpp = lpad_setup(cpuid, (uint64_t)pc, (uint64_t)arg);
+
+ ASSERT(lpp);
+
+ pc = (caddr_t)lpp;
+
+ rv = hv_cpu_start(cpuid, va_to_pa(pc), rtba, cpuid);
+
+ if (rv != H_EOK) {
+ panic("promif_start_cpu: failed to start cpu %d (%d)\n",
+ cpuid, rv);
+ }
+
+ ci[6] = p1275_int2cell(rv);
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_emul.c b/usr/src/uts/sun4v/promif/promif_emul.c
new file mode 100644
index 0000000000..8e4e41dbb3
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_emul.c
@@ -0,0 +1,268 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/machsystm.h>
+#include <sys/lpad.h>
+#include <sys/vmsystm.h>
+#include <sys/prom_plat.h>
+#include <sys/ldoms.h>
+#include <sys/kobj.h>
+#include <sys/reboot.h>
+#include <sys/hypervisor_api.h>
+
+#ifndef _KMDB
+static processorid_t cif_cpu;
+static struct translation *cif_prom_trans;
+static size_t cif_prom_ntrans;
+
+int cif_cpu_mp_ready;
+int (*prom_cif_handler)(void *) = NULL;
+#endif
+
+#ifdef DEBUG
+uint_t cif_debug;
+#endif /* DEBUG */
+
+extern int (*cif_handler)(void *);
+
+typedef struct {
+ char *name;
+ cif_func_t func;
+} cif_callback_t;
+
+static cif_callback_t cb_table[] = {
+ { "getprop", promif_getprop },
+ { "getproplen", promif_getproplen },
+ { "nextprop", promif_nextprop },
+ { "peer", promif_nextnode },
+ { "child", promif_childnode },
+ { "parent", promif_parentnode },
+ { "enter", promif_enter_mon },
+ { "exit", promif_exit_to_mon },
+ { "boot", promif_reboot },
+ { "write", promif_write },
+ { "read", promif_read },
+ { "interpret", promif_interpret },
+ { "finddevice", promif_finddevice },
+ { "instance-to-package", promif_instance_to_package },
+#ifndef _KMDB
+ { "setprop", promif_setprop },
+ { "test", promif_test },
+ { "instance-to-path", promif_instance_to_path },
+ { "SUNW,power-off", promif_power_off },
+ { "SUNW,asr-list-keys-len", promif_asr_list_keys_len },
+ { "SUNW,asr-list-keys", promif_asr_list_keys },
+ { "SUNW,asr-export-len", promif_asr_export_len },
+ { "SUNW,asr-export", promif_asr_export },
+ { "SUNW,set-security-key", promif_set_security_key },
+ { "SUNW,get-security-key", promif_get_security_key },
+ { "SUNW,start-cpu-by-cpuid", promif_start_cpu },
+ { "SUNW,set-trap-table", promif_set_mmfsa_traptable },
+ { "SUNW,set-sun4v-api-version", promif_set_sun4v_api_version },
+ { "SUNW,get-sun4v-api-version", promif_get_sun4v_api_version },
+#endif
+ { NULL, NULL }
+};
+
+cif_func_t
+promif_find_cif_callback(char *opname)
+{
+ cif_callback_t *cb;
+
+ if (opname == NULL)
+ return (NULL);
+
+ for (cb = cb_table; cb->name; cb++) {
+ if (prom_strcmp(cb->name, opname) == 0)
+ break;
+ }
+
+ return (cb->func);
+}
+
+static int
+kern_cif_handler(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ char *opname;
+ cif_func_t func;
+ int rv;
+
+ ASSERT(cif_handler == kern_cif_handler);
+
+#ifndef _KMDB
+ cif_cpu = getprocessorid();
+#endif
+
+ opname = p1275_cell2ptr(ci[0]);
+
+ /* lookup the callback for the desired operation */
+ func = promif_find_cif_callback(opname);
+
+ if (func == NULL) {
+#ifdef _KMDB
+ prom_fatal_error("sun4v unsupported CIFs\n");
+#else
+ cmn_err(CE_CONT, "!sun4v unsupported CIF: %s\n", opname);
+ return (-1);
+#endif
+ }
+
+ /* callback found, execute it */
+ rv = func(p);
+
+#ifndef _KMDB
+ cif_cpu = -1;
+#endif
+
+ return (rv);
+}
+
+#ifdef _KMDB
+
+void
+cif_init(char *pgmname, caddr_t root, ihandle_t in, ihandle_t out,
+ phandle_t pin, phandle_t pout, pnode_t chosen, pnode_t options)
+{
+ /* initialize pointer to a copy of OBP device tree */
+ promif_stree_setroot(root);
+
+ promif_set_nodes(chosen, options);
+
+ /* initialize io parameters */
+ promif_io_init(in, out, pin, pout);
+
+ /*
+ * Switch CIF handler to the kernel.
+ */
+ if (pgmname != NULL)
+ prom_init(pgmname, (void *)kern_cif_handler);
+ else
+ cif_handler = kern_cif_handler;
+}
+
+#else
+
+static void cache_prom_data(void);
+
+/*
+ * This function returns 1 if the current thread is executing in
+ * the CIF and 0 otherwise. This is useful information to know
+ * since code that implements CIF handlers can assume that it has
+ * gone through the kern_preprom() entry point, implying it is
+ * running single threaded, has preemption disabled, etc.
+ */
+int
+promif_in_cif(void)
+{
+ int mycpuid = getprocessorid();
+
+ return ((cif_cpu == mycpuid) ? 1 : 0);
+}
+
+void
+cif_init(void)
+{
+ void (*kmdb_cb)(void);
+ uint64_t rtba;
+ uint64_t rv;
+
+ /*
+ * Check if domaining is enabled. If not, do not
+ * initialize the kernel CIF handler.
+ */
+ if (!domaining_enabled)
+ return;
+
+ /*
+ * Cache PROM data that is needed later, e.g. a shadow
+ * copy of the device tree, IO mappings, etc.
+ */
+ cache_prom_data();
+
+ /*
+ * Prepare to take over the get/set of environmental variables.
+ */
+ promif_prop_init();
+
+ /*
+ * Switch CIF handler to the kernel.
+ */
+ prom_cif_handler = cif_handler;
+
+ promif_preprom();
+ cif_handler = kern_cif_handler;
+
+ /*
+ * Take over rtba for the boot CPU. The rtba for
+ * all other CPUs are set as they enter the system.
+ */
+ rtba = va_to_pa(&trap_table);
+ if ((rv = hv_cpu_set_rtba(&rtba)) != H_EOK)
+ panic("hv_cpu_set_rtba failed: %ld\n", rv);
+
+ promif_postprom();
+
+ /*
+ * If the system has been booted with kmdb we need kmdb to
+ * use the kernel cif handler instead of the PROM cif handler.
+ */
+ if (boothowto & RB_KMDB) {
+ kmdb_cb = (void (*)(void))modlookup("misc/kmdbmod",
+ "kctl_switch_promif");
+ ASSERT(kmdb_cb != NULL);
+ (*kmdb_cb)();
+ }
+}
+
+static void
+cache_prom_data(void)
+{
+ /* initialize copy of OBP device tree */
+ promif_stree_init();
+
+ /* initialize io parameters */
+ promif_io_init();
+}
+
+
+/*
+ * Platform-specific actions to be taken when all cpus are running
+ * in the OS.
+ */
+void
+cpu_mp_init(void)
+{
+ if (!domaining_enabled)
+ return;
+
+ cif_cpu_mp_ready = 1;
+}
+
+#endif /* _KMDB */
diff --git a/usr/src/uts/sun4v/promif/promif_interp.c b/usr/src/uts/sun4v/promif/promif_interp.c
new file mode 100644
index 0000000000..fcf27b59ad
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_interp.c
@@ -0,0 +1,42 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+
+/*
+ * There is no support for prom_interpret() once the kernel
+ * takes over the CIF, so this function just returns an error.
+ * Having this stub keeps harmless messages out of the log file
+ * that report that prom_interpret() is not supported.
+ */
+/*ARGSUSED*/
+int
+promif_interpret(void *p)
+{
+ return (-1);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_io.c b/usr/src/uts/sun4v/promif/promif_io.c
new file mode 100644
index 0000000000..73ca7d3afa
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_io.c
@@ -0,0 +1,220 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/systm.h>
+#include <sys/hypervisor_api.h>
+#ifndef _KMDB
+#include <sys/kmem.h>
+#endif
+
+#define PROM_REG_TO_UNIT_ADDR(r) ((r) & ~(0xful << 28))
+
+static pnode_t instance_to_package(ihandle_t ih);
+
+/* cached copies of IO params */
+static phandle_t pstdin;
+static phandle_t pstdout;
+
+static ihandle_t istdin;
+static ihandle_t istdout;
+
+int
+promif_instance_to_package(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ ihandle_t ih;
+ phandle_t ph;
+
+ ih = p1275_cell2ihandle(ci[3]);
+
+ ph = instance_to_package(ih);
+
+ ci[4] = p1275_phandle2cell(ph);
+
+ return (0);
+}
+
+int
+promif_write(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ uint_t fd;
+ char *buf;
+ size_t len;
+ size_t rlen;
+
+ ASSERT(ci[1] == 3);
+
+ fd = p1275_cell2uint(ci[3]);
+ buf = p1275_cell2ptr(ci[4]);
+ len = p1275_cell2size(ci[5]);
+
+ /* only support stdout (console) */
+ ASSERT(fd == istdout);
+
+ for (rlen = 0; rlen < len; rlen++) {
+ while (hv_cnputchar((uint8_t)buf[rlen]) == H_EWOULDBLOCK)
+ /* try forever */;
+ }
+
+ /* return the length written */
+ ci[6] = p1275_size2cell(rlen);
+
+ return (0);
+}
+
+int
+promif_read(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ uint_t fd;
+ char *buf;
+ size_t len;
+ size_t rlen;
+
+ ASSERT(ci[1] == 3);
+
+ /* unpack arguments */
+ fd = p1275_cell2uint(ci[3]);
+ buf = p1275_cell2ptr(ci[4]);
+ len = p1275_cell2size(ci[5]);
+
+ /* only support stdin (console) */
+ ASSERT(fd == istdin);
+
+ for (rlen = 0; rlen < len; rlen++) {
+ if (hv_cngetchar((uint8_t *)&buf[rlen]) != H_EOK)
+ break;
+ }
+
+ /* return the length read */
+ ci[6] = p1275_size2cell(rlen);
+
+ return (0);
+}
+
+static pnode_t
+instance_to_package(ihandle_t ih)
+{
+ /* only support stdin and stdout */
+ ASSERT((ih == istdin) || (ih == istdout));
+
+ if (ih == istdin)
+ return (pstdin);
+
+ if (ih == istdout)
+ return (pstdout);
+
+ return (OBP_BADNODE);
+}
+
+#ifdef _KMDB
+
+void
+promif_io_init(ihandle_t in, ihandle_t out, phandle_t pin, phandle_t pout)
+{
+ istdin = in;
+ istdout = out;
+ pstdin = pin;
+ pstdout = pout;
+}
+
+#else
+
+void
+promif_io_init(void)
+{
+ /*
+ * Cache the mapping between the stdin and stdout
+ * ihandles and their respective phandles.
+ */
+ pstdin = prom_stdin_node();
+ pstdout = prom_stdout_node();
+
+ istdin = prom_stdin_ihandle();
+ istdout = prom_stdout_ihandle();
+}
+
+int
+promif_instance_to_path(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t node;
+ ihandle_t ih;
+ char *buf;
+ int rlen;
+ char *regval;
+ uint_t *csaddr;
+ char name[OBP_MAXPROPNAME];
+ char scratch[OBP_MAXPATHLEN];
+ int rvlen;
+
+ ih = p1275_cell2ihandle(ci[3]);
+ buf = p1275_cell2ptr(ci[4]);
+
+ ci[6] = p1275_uint2cell(0);
+
+ node = instance_to_package(ih);
+
+ *buf = '\0';
+
+ while (node != prom_rootnode()) {
+ if (prom_getprop(node, OBP_NAME, name) == -1) {
+ prom_printf("instance_to_path: no name property "
+ "node=0x%x\n", node);
+ return (-1);
+ }
+
+ /* construct the unit address from the 'reg' property */
+ if ((rlen = prom_getproplen(node, OBP_REG)) == -1)
+ return (-1);
+
+ regval = kmem_zalloc(rlen, KM_SLEEP);
+
+ (void) prom_getprop(node, OBP_REG, regval);
+
+ csaddr = (uint_t *)regval;
+
+ (void) prom_sprintf(scratch, "/%s@%lx%s", name,
+ PROM_REG_TO_UNIT_ADDR(*csaddr), buf);
+
+ kmem_free(regval, rlen);
+
+ (void) prom_strcpy(buf, scratch);
+
+ node = prom_parentnode(node);
+ }
+
+ rvlen = prom_strlen(buf);
+ ci[6] = p1275_uint2cell(rvlen);
+
+ return (0);
+}
+
+#endif /* _KMDB */
diff --git a/usr/src/uts/sun4v/promif/promif_key.c b/usr/src/uts/sun4v/promif/promif_key.c
new file mode 100644
index 0000000000..f35064d085
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_key.c
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+
+/*
+ * Secure WAN boot requires firmware support for storing and
+ * retrieving security keys. The user command to set these
+ * keys in firmware storage is ickey(1M). Currently, sun4v
+ * platforms do not support this functionality. However, there
+ * is an external interface to these prom interfaces from the
+ * openprom(7D) driver. They are not documented in the man page,
+ * but they should still be handled just well enough so that
+ * the user gets a sensible error back.
+ */
+
+int
+promif_set_security_key(void *p)
+{
+ _NOTE(ARGUNUSED(p))
+
+ return (-1);
+}
+
+int
+promif_get_security_key(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+
+ ci[6] = p1275_int2cell(-1);
+
+ return (-1);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_mon.c b/usr/src/uts/sun4v/promif/promif_mon.c
new file mode 100644
index 0000000000..73c66778ee
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_mon.c
@@ -0,0 +1,203 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/uadmin.h>
+#include <sys/machsystm.h>
+#include <sys/hypervisor_api.h>
+
+#ifdef _KMDB
+
+extern int kmdb_dpi_get_master_cpuid(void);
+extern void kmdb_dpi_kernpanic(int cpuid);
+extern void prom_reboot(char *bootstr);
+
+#define PIL_DECL(p)
+#define PIL_SET7(p)
+#define PIL_REST(p)
+
+#else
+
+extern int vx_handler(cell_t *argument_array);
+
+#define PIL_DECL(p) int p
+#define PIL_SET7(p) (p = spl7())
+#define PIL_REST(p) (splx(p))
+
+#endif
+
+#define PROMIF_ENTER 0
+#define PROMIF_EXIT 1
+
+#define PROMIF_ISPRINT(c) (((c) >= ' ') && ((c) <= '~'))
+
+static void promif_mon(int mode);
+
+/*ARGSUSED*/
+int
+promif_enter_mon(void *p)
+{
+ PIL_DECL(pil);
+
+ PIL_SET7(pil);
+
+ prom_printf("\n");
+
+#ifdef _KMDB
+ promif_mon(PROMIF_ENTER);
+#else
+ idle_other_cpus();
+ promif_mon(PROMIF_ENTER);
+ resume_other_cpus();
+#endif
+
+ PIL_REST(pil);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+promif_exit_to_mon(void *p)
+{
+ PIL_DECL(pil);
+
+ PIL_SET7(pil);
+
+ prom_printf("Program terminated\n");
+
+ promif_mon(PROMIF_EXIT);
+
+ PIL_REST(pil);
+
+ return (0);
+}
+
+static void
+promif_mon(int mode)
+{
+ char cmd;
+ char *prompt;
+ boolean_t invalid_option;
+#ifdef _KMDB
+ static char *exit_prompt = "r)eboot, h)alt? ";
+#else
+ char value[ 8 ]; /* holds "true" or "false" */
+ char *boot_msg;
+ static char *null_msg = ".\" \"";
+ static char *ignore_msg =
+ "cr .\" Ignoring auto-boot? setting for this boot.\" cr";
+ static char *exit_prompt = "r)eboot, o)k prompt, h)alt? ";
+#endif
+ static char *enter_prompt = "c)ontinue, s)ync, r)eboot, h)alt? ";
+
+ prompt = (mode == PROMIF_EXIT) ? exit_prompt : enter_prompt;
+
+ for (;;) {
+ prom_printf("%s", prompt);
+
+ while (hv_cngetchar((uint8_t *)&cmd) != H_EOK)
+ ;
+
+ prom_printf("%c\n", cmd);
+
+ invalid_option = B_FALSE;
+
+ switch (cmd) {
+
+ case 'r':
+ prom_reboot("");
+ break;
+
+ case 'h':
+ (void) hv_mach_exit(0);
+ ASSERT(0);
+
+ break;
+
+#ifndef _KMDB
+ case 'o':
+ /*
+ * This option gives the user an "ok" prompt after
+ * the system reset regardless of the value of
+ * auto-boot? We offer this option because halt(1m)
+ * doesn't leave the user at the ok prompt (as it
+ * does on non-ldoms systems). If auto-boot? is
+ * true tell user we are overriding the setting
+ * for this boot only.
+ */
+ if (mode == PROMIF_EXIT) {
+ bzero(value, sizeof (value));
+ (void) promif_stree_getprop(prom_optionsnode(),
+ "auto-boot?", value);
+ boot_msg = strcmp(value, "true") ? null_msg :
+ ignore_msg;
+ (void) promif_ldom_setprop("reboot-command",
+ boot_msg, strlen(boot_msg) + 1);
+ (void) hv_mach_sir();
+ } else {
+ invalid_option = B_TRUE;
+ }
+ break;
+#endif
+
+ case '\r':
+ break;
+
+ case 's':
+ if (mode == PROMIF_ENTER) {
+#ifdef _KMDB
+ kmdb_dpi_kernpanic(kmdb_dpi_get_master_cpuid());
+#else
+ cell_t arg = p1275_ptr2cell("sync");
+ (void) vx_handler(&arg);
+#endif
+ } else {
+ invalid_option = B_TRUE;
+ }
+ break;
+
+ case 'c':
+ if (mode == PROMIF_ENTER) {
+ return;
+ } else {
+ invalid_option = B_TRUE;
+ }
+ break;
+
+ default:
+ invalid_option = B_TRUE;
+ break;
+ }
+
+ if (invalid_option && PROMIF_ISPRINT(cmd))
+ prom_printf("invalid option (%c)\n", cmd);
+ }
+
+ _NOTE(NOTREACHED)
+}
diff --git a/usr/src/uts/sun4v/promif/promif_node.c b/usr/src/uts/sun4v/promif/promif_node.c
new file mode 100644
index 0000000000..36ec3893fc
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_node.c
@@ -0,0 +1,293 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/esunddi.h>
+#include <sys/promif_impl.h>
+
+#ifdef _KMDB
+static pnode_t chosennode;
+static pnode_t optionsnode;
+#else
+static char *gettoken(char *tp, char *token);
+static pnode_t finddevice(char *path);
+#endif
+
+/*
+ * Routines for walking the PROMs devinfo tree
+ */
+
+#ifdef _KMDB
+
+void
+promif_set_nodes(pnode_t chosen, pnode_t options)
+{
+ chosennode = chosen;
+ optionsnode = options;
+}
+
+int
+promif_finddevice(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ char *path;
+
+ ASSERT(ci[1] == 1);
+
+ path = p1275_cell2ptr(ci[3]);
+
+ if (strcmp("/chosen", path) == 0) {
+ ci[4] = p1275_dnode2cell(chosennode);
+ } else if (strcmp("/options", path) == 0) {
+ ci[4] = p1275_dnode2cell(optionsnode);
+ } else {
+ /* only supports known nodes */
+ ASSERT(0);
+ }
+
+ return (0);
+}
+
+#else
+
+int
+promif_finddevice(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t node;
+
+ ASSERT(ci[1] == 1);
+
+ node = finddevice(p1275_cell2ptr(ci[3]));
+
+ ci[4] = p1275_dnode2cell(node);
+
+ return (0);
+}
+
+#endif
+
+int
+promif_nextnode(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t next;
+
+ ASSERT(ci[1] == 1);
+
+ next = promif_stree_nextnode(p1275_cell2dnode(ci[3]));
+
+ ci[4] = p1275_dnode2cell(next);
+
+ return (0);
+}
+
+int
+promif_childnode(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t child;
+
+ ASSERT(ci[1] == 1);
+
+ child = promif_stree_childnode(p1275_cell2dnode(ci[3]));
+
+ ci[4] = p1275_dnode2cell(child);
+
+ return (0);
+}
+
+int
+promif_parentnode(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t parent;
+
+ ASSERT(ci[1] == 1);
+
+ parent = promif_stree_parentnode(p1275_cell2dnode(ci[3]));
+
+ ci[4] = p1275_dnode2cell(parent);
+
+ return (0);
+}
+
+#ifndef _KMDB
+
+/*
+ * Get a token from a prom pathname, collecting everything
+ * until a non-comma, non-colon separator is found. Any
+ * options, including the ':' option separator, on the end
+ * of the token are removed.
+ */
+static char *
+gettoken(char *tp, char *token)
+{
+ char *result = token;
+
+ for (;;) {
+ tp = prom_path_gettoken(tp, token);
+ token += prom_strlen(token);
+ if ((*tp == ',') || (*tp == ':')) {
+ *token++ = *tp++;
+ *token = '\0';
+ continue;
+ }
+ break;
+ }
+
+ /* strip off any options from the token */
+ prom_strip_options(result, result);
+
+ return (tp);
+}
+
+/*
+ * Retrieve the unit address for a node by looking it up
+ * in the corresponding dip. -1 is returned if no unit
+ * address can be determined.
+ */
+static int
+get_unit_addr(pnode_t np, char *paddr)
+{
+ dev_info_t *dip;
+ char *addr;
+
+ if ((dip = e_ddi_nodeid_to_dip(np)) == NULL) {
+ return (-1);
+ }
+
+ if ((addr = ddi_get_name_addr(dip)) == NULL) {
+ ddi_release_devi(dip);
+ return (-1);
+ }
+
+ (void) prom_strcpy(paddr, addr);
+
+ ddi_release_devi(dip);
+
+ return (0);
+}
+
+/*
+ * Get node id of node in prom tree that path identifies
+ */
+static pnode_t
+finddevice(char *path)
+{
+ char name[OBP_MAXPROPNAME];
+ char addr[OBP_MAXPROPNAME];
+ char pname[OBP_MAXPROPNAME];
+ char paddr[OBP_MAXPROPNAME];
+ char *tp;
+ pnode_t np;
+ pnode_t device;
+
+ CIF_DBG_NODE("finddevice: %s\n", path);
+
+ tp = path;
+ np = prom_rootnode();
+ device = OBP_BADNODE;
+
+ /* must be a fully specified path */
+ if (*tp++ != '/')
+ goto done;
+
+ for (;;) {
+ /* get the name from the path */
+ tp = gettoken(tp, name);
+ if (*name == '\0')
+ break;
+
+ /* get the address from the path */
+ if (*tp == '@') {
+ tp++;
+ tp = gettoken(tp, addr);
+ } else {
+ addr[0] = '\0';
+ }
+
+ CIF_DBG_NODE("looking for: %s%s%s\n", name,
+ (*addr != '\0') ? "@" : "", addr);
+
+ if ((np = prom_childnode(np)) == OBP_NONODE)
+ break;
+
+ while (np != OBP_NONODE) {
+
+ /* get the name from the current node */
+ if (prom_getprop(np, OBP_NAME, pname) < 0)
+ goto done;
+
+ /* get the address from the current node */
+ if (get_unit_addr(np, paddr) < 0)
+ paddr[0] = '\0';
+
+ /* compare the names and addresses */
+ if ((prom_strcmp(name, pname) == 0) &&
+ (prom_strcmp(addr, paddr) == 0)) {
+ CIF_DBG_NODE("found dev: %s%s%s (0x%x)\n",
+ pname, (*paddr != '\0') ? "@" : "",
+ paddr, np);
+ break;
+ } else {
+ CIF_DBG_NODE(" no match: %s%s%s vs %s%s%s\n",
+ name, (*addr != '\0') ? "@" : "", addr,
+ pname, (*paddr != '\0') ? "@" : "", paddr);
+ }
+ np = prom_nextnode(np);
+ }
+
+ /* path does not map to a node */
+ if (np == OBP_NONODE)
+ break;
+
+ if (*tp == '\0') {
+ /* found a matching node */
+ device = np;
+ break;
+ }
+
+ /*
+ * Continue the loop with the
+ * next component of the path.
+ */
+ tp++;
+ }
+done:
+
+ if (device == OBP_BADNODE) {
+ CIF_DBG_NODE("device not found\n\n");
+ } else {
+ CIF_DBG_NODE("returning 0x%x\n\n", device);
+ }
+
+ return (device);
+}
+
+#endif
diff --git a/usr/src/uts/sun4v/promif/promif_power_off.c b/usr/src/uts/sun4v/promif/promif_power_off.c
new file mode 100644
index 0000000000..fb54d006ca
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_power_off.c
@@ -0,0 +1,45 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/hypervisor_api.h>
+
+int
+promif_power_off(void *p)
+{
+ _NOTE(ARGUNUSED(p))
+
+ int rv = 0;
+
+ rv = hv_mach_exit(0);
+
+ /* should not return */
+ ASSERT(0);
+
+ return (rv);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_prop.c b/usr/src/uts/sun4v/promif/promif_prop.c
new file mode 100644
index 0000000000..42cdffe32a
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_prop.c
@@ -0,0 +1,327 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/promif_impl.h>
+#include <sys/ds.h>
+#include <sys/modctl.h>
+#include <sys/ksynch.h>
+#include <sys/varconfig.h>
+
+#ifndef _KMDB
+
+#define PROMIF_DS_TIMEOUT_SEC 15
+
+static kmutex_t promif_prop_lock;
+static kcondvar_t promif_prop_cv;
+static var_config_msg_t promif_ds_resp;
+static var_config_resp_t *cfg_rsp = &promif_ds_resp.var_config_resp;
+static int (*ds_send)();
+static int (*ds_init)();
+
+/*
+ * Domains Services interaction
+ */
+static ds_svc_hdl_t ds_primary_handle;
+static ds_svc_hdl_t ds_backup_handle;
+
+static ds_ver_t vc_version[] = { { 1, 0 } };
+
+#define VC_NVERS (sizeof (vc_version) / sizeof (vc_version[0]))
+
+static ds_capability_t vc_primary_cap = {
+ "var-config", /* svc_id */
+ vc_version, /* vers */
+ VC_NVERS /* nvers */
+};
+
+static ds_capability_t vc_backup_cap = {
+ "var-config-backup", /* svc_id */
+ vc_version, /* vers */
+ VC_NVERS /* nvers */
+};
+
+static void vc_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
+static void vc_unreg_handler(ds_cb_arg_t);
+static void vc_data_handler(ds_cb_arg_t, void *, size_t);
+
+static ds_clnt_ops_t vc_primary_ops = {
+ vc_reg_handler, /* ds_primary_reg_cb */
+ vc_unreg_handler, /* ds_primary_unreg_cb */
+ vc_data_handler, /* ds_data_cb */
+ &ds_primary_handle /* cb_arg */
+};
+
+static ds_clnt_ops_t vc_backup_ops = {
+ vc_reg_handler, /* ds_backup_reg_cb */
+ vc_unreg_handler, /* ds_backup_unreg_cb */
+ vc_data_handler, /* ds_data_cb */
+ &ds_backup_handle /* cb_arg */
+};
+
+static void
+vc_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
+{
+ _NOTE(ARGUNUSED(ver))
+
+ if ((ds_svc_hdl_t *)arg == &ds_primary_handle)
+ ds_primary_handle = hdl;
+ else if ((ds_svc_hdl_t *)arg == &ds_backup_handle)
+ ds_primary_handle = hdl;
+}
+
+static void
+vc_unreg_handler(ds_cb_arg_t arg)
+{
+ if ((ds_svc_hdl_t *)arg == &ds_primary_handle)
+ ds_primary_handle = DS_INVALID_HDL;
+ else if ((ds_svc_hdl_t *)arg == &ds_backup_handle)
+ ds_backup_handle = DS_INVALID_HDL;
+}
+
+static void
+vc_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
+{
+ _NOTE(ARGUNUSED(arg))
+
+ bcopy(buf, &promif_ds_resp, buflen);
+ mutex_enter(&promif_prop_lock);
+ cv_signal(&promif_prop_cv);
+ mutex_exit(&promif_prop_lock);
+}
+
+/*
+ * Initialize the linkage with DS (Domain Services). We assume that
+ * the DS module has already been loaded by the platmod.
+ *
+ * The call to the DS init functions will eventually result in the
+ * invocation of our registration callback handlers, at which time DS
+ * is able to accept requests.
+ */
+static void
+promif_ds_init(void)
+{
+ static char *me = "promif_ds_init";
+ int rv;
+
+ if ((ds_init =
+ (int (*)())modgetsymvalue("ds_cap_init", 0)) == 0) {
+ cmn_err(CE_WARN, "%s: can't find ds_cap_init", me);
+ return;
+ }
+
+ if ((ds_send =
+ (int (*)())modgetsymvalue("ds_cap_send", 0)) == 0) {
+ cmn_err(CE_WARN, "%s: can't find ds_cap_send", me);
+ return;
+ }
+
+ if ((rv = (*ds_init)(&vc_primary_cap, &vc_primary_ops)) != 0) {
+ cmn_err(CE_NOTE,
+ "%s: ds_cap_init failed (primary): %d", me, rv);
+ }
+
+
+ if ((rv = (*ds_init)(&vc_backup_cap, &vc_backup_ops)) != 0) {
+ cmn_err(CE_NOTE,
+ "%s: ds_cap_init failed (backup): %d", me, rv);
+ }
+}
+
+/*
+ * Prepare for ldom variable requests.
+ */
+void
+promif_prop_init(void)
+{
+ mutex_init(&promif_prop_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&promif_prop_cv, NULL, CV_DEFAULT, NULL);
+
+ promif_ds_init();
+}
+
+
+/*
+ * Replace the current value of a property string given its name and
+ * new value.
+ */
+int
+promif_ldom_setprop(char *name, void *value, int valuelen)
+{
+ var_config_msg_t *req;
+ var_config_set_req_t *setp;
+ var_config_cmd_t cmd;
+ ds_svc_hdl_t ds_handle;
+ int rv;
+ int namelen = strlen(name);
+ int paylen = namelen + 1 + valuelen; /* valuelen includes the null */
+ static char *me = "promif_ldom_setprop";
+
+ if (ds_primary_handle != DS_INVALID_HDL)
+ ds_handle = ds_primary_handle;
+ else if (ds_backup_handle != DS_INVALID_HDL)
+ ds_handle = ds_backup_handle;
+ else
+ return (-1);
+
+ req = kmem_zalloc(sizeof (var_config_hdr_t) + paylen, KM_SLEEP);
+ req->var_config_cmd = VAR_CONFIG_SET_REQ;
+ setp = &req->var_config_set;
+ (void) strcpy(setp->name_and_value, name);
+ (void) strncpy(&setp->name_and_value[namelen + 1], value, valuelen);
+
+ if ((rv = (*ds_send)(ds_handle, req,
+ sizeof (var_config_hdr_t) + paylen)) != 0) {
+ cmn_err(CE_WARN, "%s: ds_cap_send failed: %d", me, rv);
+ kmem_free(req, sizeof (var_config_hdr_t) + paylen);
+ return (-1);
+ }
+
+ kmem_free(req, sizeof (var_config_hdr_t) + paylen);
+
+ /*
+ * Since we are emulating OBP, we must comply with the promif
+ * infrastructure and execute only on the originating cpu.
+ */
+ thread_affinity_set(curthread, CPU_CURRENT);
+
+ mutex_enter(&promif_prop_lock);
+ if (cv_timedwait(&promif_prop_cv,
+ &promif_prop_lock, lbolt + PROMIF_DS_TIMEOUT_SEC * hz) == -1) {
+ cmn_err(CE_WARN, "%s: ds response timeout", me);
+ rv = -1;
+ goto out;
+ }
+
+ cmd = promif_ds_resp.vc_hdr.cmd;
+ if (cmd != VAR_CONFIG_SET_RESP) {
+ cmn_err(CE_WARN, "%s: bad response type: %d", me, cmd);
+ rv = -1;
+ goto out;
+ }
+ rv = (cfg_rsp->result == VAR_CONFIG_SUCCESS) ? valuelen : -1;
+
+out:
+ mutex_exit(&promif_prop_lock);
+ thread_affinity_clear(curthread);
+ return (rv);
+}
+
+int
+promif_setprop(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t node;
+ caddr_t name;
+ caddr_t value;
+ int len;
+
+ ASSERT(ci[1] == 4);
+
+ node = p1275_cell2dnode(ci[3]);
+ ASSERT(node == prom_optionsnode());
+ name = p1275_cell2ptr(ci[4]);
+ value = p1275_cell2ptr(ci[5]);
+ len = p1275_cell2int(ci[6]);
+
+ if (promif_stree_getproplen(node, name) != -1)
+ len = promif_ldom_setprop(name, value, len);
+
+ if (len >= 0)
+ len = promif_stree_setprop(node, name, (void *)value, len);
+
+
+ ci[7] = p1275_int2cell(len);
+
+ return ((len == -1) ? len : 0);
+}
+
+#endif
+
+int
+promif_getprop(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t node;
+ caddr_t name;
+ caddr_t value;
+ int len;
+
+ ASSERT(ci[1] == 4);
+
+ node = p1275_cell2dnode(ci[3]);
+ name = p1275_cell2ptr(ci[4]);
+ value = p1275_cell2ptr(ci[5]);
+
+ len = promif_stree_getprop(node, name, value);
+
+ ci[7] = p1275_int2cell(len);
+
+ return ((len == -1) ? len : 0);
+}
+
+int
+promif_getproplen(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t node;
+ caddr_t name;
+ int len;
+
+ ASSERT(ci[1] == 2);
+
+ node = p1275_cell2dnode(ci[3]);
+ name = p1275_cell2ptr(ci[4]);
+
+ len = promif_stree_getproplen(node, name);
+
+ ci[5] = p1275_int2cell(len);
+
+ return (0);
+}
+
+int
+promif_nextprop(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ pnode_t node;
+ caddr_t prev;
+ caddr_t next;
+
+ ASSERT(ci[1] == 3);
+
+ node = p1275_cell2dnode(ci[3]);
+ prev = p1275_cell2ptr(ci[4]);
+ next = p1275_cell2ptr(ci[5]);
+
+ (void) promif_stree_nextprop(node, prev, next);
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_reboot.c b/usr/src/uts/sun4v/promif/promif_reboot.c
new file mode 100644
index 0000000000..15a696184b
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_reboot.c
@@ -0,0 +1,115 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/hypervisor_api.h>
+
+/*
+ * Reboot Command String
+ *
+ * The prom_reboot() CIF handler takes an optional string containing
+ * arguments to the boot command that are to be applied to the reboot.
+ * This information is used to create a full boot command string that
+ * is stored in a well known ldom variable (REBOOT_CMD_VAR_NAME). The
+ * string is constructed to take the following form:
+ *
+ * boot <specified boot arguments><NULL>
+ *
+ * When the domain comes back up, OBP consults this variable. If set,
+ * it will use the unmodified boot command string to boot the domain.
+ * The maximum length of the boot command is specified by the constant
+ * REBOOT_CMD_MAX_LEN. If the specified arguments cause the command
+ * string to exceed this length, the arguments are truncated.
+ */
+#define REBOOT_CMD_VAR_NAME "reboot-command"
+#define REBOOT_CMD_BASE "boot "
+#define REBOOT_CMD_MAX_LEN 256
+#define REBOOT_CMD_ARGS_MAX_LEN (REBOOT_CMD_MAX_LEN - \
+ prom_strlen(REBOOT_CMD_BASE) - 1)
+int
+promif_reboot(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ int rv = 0;
+#ifndef _KMDB
+ char *bootargs;
+ char bootcmd[REBOOT_CMD_MAX_LEN];
+ char *cmd_end;
+ int cmd_len;
+#endif
+
+ /* one argument expected */
+ ASSERT(ci[1] == 1);
+
+#ifndef _KMDB
+ bootargs = p1275_cell2ptr(ci[3]);
+
+ if (bootargs == NULL)
+ bootargs = "";
+
+ /* verify the length of the command string */
+ cmd_len = prom_strlen(REBOOT_CMD_BASE) + prom_strlen(bootargs) + 1;
+
+ if (cmd_len > REBOOT_CMD_MAX_LEN) {
+ /*
+ * Unable to set the requested boot arguments.
+ * Truncate them so that the boot command will
+ * fit within the maximum length. This follows
+ * the policy also used by OBP.
+ */
+ cmd_end = bootargs + REBOOT_CMD_ARGS_MAX_LEN;
+ *cmd_end = '\0';
+
+ prom_printf("WARNING: reboot command length (%d) too long, "
+ "truncating command arguments\n", cmd_len);
+ }
+
+ /* construct the boot command string */
+ (void) prom_sprintf(bootcmd, "%s%s", REBOOT_CMD_BASE, bootargs);
+
+ cmd_len = prom_strlen(bootcmd) + 1;
+ ASSERT(cmd_len <= REBOOT_CMD_MAX_LEN);
+
+ CIF_DBG_REBOOT("bootcmd='%s'\n", bootcmd);
+
+ /* attempt to set the ldom variable */
+ if (promif_ldom_setprop(REBOOT_CMD_VAR_NAME, bootcmd, cmd_len) == -1) {
+ prom_printf("WARNING: unable to store boot command for "
+ "use on reboot\n");
+ }
+#endif
+
+ prom_printf("Resetting...\n");
+
+ rv = hv_mach_sir();
+
+ /* should not return */
+ ASSERT(0);
+
+ return (rv);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_stree.c b/usr/src/uts/sun4v/promif/promif_stree.c
new file mode 100644
index 0000000000..c52545ed16
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_stree.c
@@ -0,0 +1,455 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/kmem.h>
+#include <sys/machsystm.h>
+
+/*
+ * A property attached to a node in the kernel's
+ * shadow copy of the PROM device tree.
+ */
+typedef struct prom_prop {
+ struct prom_prop *pp_next;
+ char *pp_name;
+ int pp_len;
+ void *pp_val;
+} prom_prop_t;
+
+/*
+ * A node in the kernel's shadow copy of the PROM
+ * device tree.
+ */
+typedef struct prom_node {
+ pnode_t pn_nodeid;
+ struct prom_prop *pn_propp;
+ struct prom_node *pn_parent;
+ struct prom_node *pn_child;
+ struct prom_node *pn_sibling;
+} prom_node_t;
+
+static prom_node_t *promif_root;
+
+static prom_node_t *find_node(pnode_t nodeid);
+static prom_node_t *find_node_work(prom_node_t *np, pnode_t node);
+static int getproplen(prom_node_t *pnp, char *name);
+static void *getprop(prom_node_t *pnp, char *name);
+static char *nextprop(prom_node_t *pnp, char *name);
+
+#ifndef _KMDB
+static void create_prop(prom_node_t *pnp, char *name, void *val, int len);
+static prom_node_t *create_node(prom_node_t *parent, pnode_t node);
+static void create_peers(prom_node_t *pnp, pnode_t node);
+static void create_children(prom_node_t *pnp, pnode_t parent);
+#endif
+
+/*
+ * Hooks for kmdb for accessing the PROM shadow tree. The driver portion
+ * of kmdb will retrieve the root of the tree and pass it down to the
+ * debugger portion of kmdb. As the kmdb debugger is standalone, it has
+ * its own promif_root pointer that it will be set to the value passed by
+ * the driver so that kmdb points to the shadow tree maintained by the kernel.
+ * So the "get" function is in the kernel while the "set" function is in kmdb.
+ */
+#ifdef _KMDB
+void
+promif_stree_setroot(void *root)
+{
+ promif_root = (prom_node_t *)root;
+}
+#else
+void *
+promif_stree_getroot(void)
+{
+ return (promif_root);
+}
+#endif
+
+/*
+ * Interfaces used internally by promif functions.
+ * These hide all accesses to the shadow tree.
+ */
+
+pnode_t
+promif_stree_parentnode(pnode_t nodeid)
+{
+ prom_node_t *pnp;
+
+ pnp = find_node(nodeid);
+ if (pnp && pnp->pn_parent) {
+ return (pnp->pn_parent->pn_nodeid);
+ }
+
+ return (OBP_NONODE);
+}
+
+pnode_t
+promif_stree_childnode(pnode_t nodeid)
+{
+ prom_node_t *pnp;
+
+ pnp = find_node(nodeid);
+ if (pnp && pnp->pn_child)
+ return (pnp->pn_child->pn_nodeid);
+
+ return (OBP_NONODE);
+}
+
+pnode_t
+promif_stree_nextnode(pnode_t nodeid)
+{
+ prom_node_t *pnp;
+
+ /*
+ * Note: next(0) returns the root node
+ */
+ pnp = find_node(nodeid);
+ if (pnp && (nodeid == OBP_NONODE))
+ return (pnp->pn_nodeid);
+ if (pnp && pnp->pn_sibling)
+ return (pnp->pn_sibling->pn_nodeid);
+
+ return (OBP_NONODE);
+}
+
+int
+promif_stree_getproplen(pnode_t nodeid, char *name)
+{
+ prom_node_t *pnp;
+
+ pnp = find_node(nodeid);
+ if (pnp == NULL)
+ return (-1);
+
+ return (getproplen(pnp, name));
+}
+
+int
+promif_stree_getprop(pnode_t nodeid, char *name, void *value)
+{
+ prom_node_t *pnp;
+ void *prop;
+ int len;
+
+ pnp = find_node(nodeid);
+ if (pnp == NULL) {
+ prom_printf("find_node: no node?\n");
+ return (-1);
+ }
+
+ len = getproplen(pnp, name);
+ if (len > 0) {
+ prop = getprop(pnp, name);
+ bcopy(prop, value, len);
+ } else {
+ prom_printf("find_node: getproplen: %d\n", len);
+ }
+
+ return (len);
+}
+
+char *
+promif_stree_nextprop(pnode_t nodeid, char *name, char *next)
+{
+ prom_node_t *pnp;
+ char *propname;
+
+ next[0] = '\0';
+
+ pnp = find_node(nodeid);
+ if (pnp == NULL)
+ return (NULL);
+
+ propname = nextprop(pnp, name);
+ if (propname == NULL)
+ return (next);
+
+ (void) prom_strcpy(next, propname);
+
+ return (next);
+}
+
+static prom_node_t *
+find_node_work(prom_node_t *np, pnode_t node)
+{
+ prom_node_t *nnp;
+
+ if (np->pn_nodeid == node)
+ return (np);
+
+ if (np->pn_child)
+ if ((nnp = find_node_work(np->pn_child, node)) != NULL)
+ return (nnp);
+
+ if (np->pn_sibling)
+ if ((nnp = find_node_work(np->pn_sibling, node)) != NULL)
+ return (nnp);
+
+ return (NULL);
+}
+
+static prom_node_t *
+find_node(pnode_t nodeid)
+{
+
+ if (nodeid == OBP_NONODE)
+ return (promif_root);
+
+ if (promif_root == NULL)
+ return (NULL);
+
+ return (find_node_work(promif_root, nodeid));
+}
+
+static int
+getproplen(prom_node_t *pnp, char *name)
+{
+ struct prom_prop *propp;
+
+ for (propp = pnp->pn_propp; propp != NULL; propp = propp->pp_next)
+ if (prom_strcmp(propp->pp_name, name) == 0)
+ return (propp->pp_len);
+
+ return (-1);
+}
+
+static void *
+getprop(prom_node_t *np, char *name)
+{
+ struct prom_prop *propp;
+
+ for (propp = np->pn_propp; propp != NULL; propp = propp->pp_next)
+ if (prom_strcmp(propp->pp_name, name) == 0)
+ return (propp->pp_val);
+
+ return (NULL);
+}
+
+static char *
+nextprop(prom_node_t *pnp, char *name)
+{
+ struct prom_prop *propp;
+
+ /*
+ * getting next of NULL or a null string returns the first prop name
+ */
+ if (name == NULL || *name == '\0')
+ if (pnp->pn_propp)
+ return (pnp->pn_propp->pp_name);
+
+ for (propp = pnp->pn_propp; propp != NULL; propp = propp->pp_next)
+ if (prom_strcmp(propp->pp_name, name) == 0)
+ if (propp->pp_next)
+ return (propp->pp_next->pp_name);
+
+ return (NULL);
+}
+
+#ifndef _KMDB
+
+int
+promif_stree_setprop(pnode_t nodeid, char *name, void *value, int len)
+{
+ prom_node_t *pnp;
+ struct prom_prop *prop;
+
+ pnp = find_node(nodeid);
+ if (pnp == NULL) {
+ prom_printf("find_node: no node?\n");
+ return (-1);
+ }
+
+ /*
+ * If a property with this name exists, replace the existing
+ * value.
+ */
+ for (prop = pnp->pn_propp; prop; prop = prop->pp_next)
+ if (prom_strcmp(prop->pp_name, name) == 0) {
+ kmem_free(prop->pp_val, prop->pp_len);
+ prop->pp_val = NULL;
+ if (len > 0) {
+ prop->pp_val = kmem_zalloc(len, KM_SLEEP);
+ bcopy(value, prop->pp_val, len);
+ }
+ prop->pp_len = len;
+ return (len);
+ }
+
+ return (-1);
+}
+
+/*
+ * Create a promif private copy of boot's device tree.
+ */
+void
+promif_stree_init(void)
+{
+ pnode_t node;
+ prom_node_t *pnp;
+
+ node = prom_rootnode();
+ promif_root = pnp = create_node(OBP_NONODE, node);
+
+ create_peers(pnp, node);
+ create_children(pnp, node);
+}
+
+static void
+create_children(prom_node_t *pnp, pnode_t parent)
+{
+ prom_node_t *cnp;
+ pnode_t child;
+
+ _NOTE(CONSTCOND)
+ while (1) {
+ child = prom_childnode(parent);
+ if (child == 0)
+ break;
+ if (prom_getproplen(child, "name") <= 0) {
+ parent = child;
+ continue;
+ }
+ cnp = create_node(pnp, child);
+ pnp->pn_child = cnp;
+ create_peers(cnp, child);
+ pnp = cnp;
+ parent = child;
+ }
+}
+
+static void
+create_peers(prom_node_t *np, pnode_t node)
+{
+ prom_node_t *pnp;
+ pnode_t peer;
+
+ _NOTE(CONSTCOND)
+ while (1) {
+ peer = prom_nextnode(node);
+ if (peer == 0)
+ break;
+ if (prom_getproplen(peer, "name") <= 0) {
+ node = peer;
+ continue;
+ }
+ pnp = create_node(np->pn_parent, peer);
+ np->pn_sibling = pnp;
+ create_children(pnp, peer);
+ np = pnp;
+ node = peer;
+ }
+}
+
+static prom_node_t *
+create_node(prom_node_t *parent, pnode_t node)
+{
+ prom_node_t *pnp;
+ char prvname[OBP_MAXPROPNAME];
+ char propname[OBP_MAXPROPNAME];
+ int proplen;
+ void *propval;
+
+ pnp = kmem_zalloc(sizeof (prom_node_t), KM_SLEEP);
+ pnp->pn_nodeid = node;
+ pnp->pn_parent = parent;
+
+ prvname[0] = '\0';
+
+ _NOTE(CONSTCOND)
+ while (1) {
+ (void) prom_nextprop(node, prvname, propname);
+ if (prom_strlen(propname) == 0)
+ break;
+ if ((proplen = prom_getproplen(node, propname)) == -1)
+ continue;
+ propval = NULL;
+ if (proplen != 0) {
+ propval = kmem_zalloc(proplen, KM_SLEEP);
+ (void) prom_getprop(node, propname, propval);
+ }
+ create_prop(pnp, propname, propval, proplen);
+
+ (void) prom_strcpy(prvname, propname);
+ }
+
+ return (pnp);
+}
+
+static void
+create_prop(prom_node_t *pnp, char *name, void *val, int len)
+{
+ struct prom_prop *prop;
+ struct prom_prop *newprop;
+
+ newprop = kmem_zalloc(sizeof (*newprop), KM_SLEEP);
+ newprop->pp_name = kmem_zalloc(prom_strlen(name) + 1, KM_SLEEP);
+ (void) prom_strcpy(newprop->pp_name, name);
+ newprop->pp_val = val;
+ newprop->pp_len = len;
+
+ if (pnp->pn_propp == NULL) {
+ pnp->pn_propp = newprop;
+ return;
+ }
+
+ /* move to the end of the prop list */
+ for (prop = pnp->pn_propp; prop->pp_next != NULL; prop = prop->pp_next)
+ /* empty */;
+
+ /* append the new prop */
+ prop->pp_next = newprop;
+}
+
+static void
+promif_dump_tree(prom_node_t *pnp)
+{
+ int i;
+ static int level = 0;
+
+ if (pnp == NULL)
+ return;
+
+ for (i = 0; i < level; i++) {
+ prom_printf(" ");
+ }
+
+ prom_printf("Node 0x%x (parent=0x%x, sibling=0x%x)\n", pnp->pn_nodeid,
+ (pnp->pn_parent) ? pnp->pn_parent->pn_nodeid : 0,
+ (pnp->pn_sibling) ? pnp->pn_sibling->pn_nodeid : 0);
+
+ if (pnp->pn_child != NULL) {
+ level++;
+ promif_dump_tree(pnp->pn_child);
+ level--;
+ }
+
+ if (pnp->pn_sibling != NULL)
+ promif_dump_tree(pnp->pn_sibling);
+}
+
+#endif
diff --git a/usr/src/uts/sun4v/promif/promif_test.c b/usr/src/uts/sun4v/promif/promif_test.c
new file mode 100644
index 0000000000..ceb9ec3947
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_test.c
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+
+int
+promif_test(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ char *opname;
+ cif_func_t func;
+ int rv;
+
+ ASSERT(ci[1] == 1);
+
+ opname = p1275_cell2ptr(ci[3]);
+
+ func = promif_find_cif_callback(opname);
+
+ /* zero indicates operation is supported */
+ rv = (func != NULL) ? 0 : 1;
+
+ ci[4] = p1275_int2cell(rv);
+
+ return (0);
+}
diff --git a/usr/src/uts/sun4v/promif/promif_version.c b/usr/src/uts/sun4v/promif/promif_version.c
new file mode 100644
index 0000000000..c79e02513f
--- /dev/null
+++ b/usr/src/uts/sun4v/promif/promif_version.c
@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/promif_impl.h>
+#include <sys/hypervisor_api.h>
+
+/*
+ * Wrappers to get/set the API version with Hypervisor.
+ */
+
+int
+promif_set_sun4v_api_version(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ uint64_t api_group;
+ uint64_t major;
+ uint64_t minor;
+ uint64_t status;
+ uint64_t supported_minor;
+
+ ASSERT(ci[1] == 3);
+ ASSERT(ci[2] == 2);
+
+ api_group = (uint64_t)p1275_cell2int(ci[3]);
+ major = (uint64_t)p1275_cell2int(ci[4]);
+ minor = (uint64_t)p1275_cell2int(ci[5]);
+
+ status = hv_api_set_version(api_group, major, minor, &supported_minor);
+
+ ci[6] = p1275_int2cell(status);
+ ci[7] = p1275_int2cell(supported_minor);
+
+ return ((status == H_EOK) ? 0 : -1);
+}
+
+int
+promif_get_sun4v_api_version(void *p)
+{
+ cell_t *ci = (cell_t *)p;
+ uint64_t api_group;
+ uint64_t major;
+ uint64_t minor;
+ uint64_t status;
+
+ ASSERT(ci[1] == 1);
+ ASSERT(ci[2] == 3);
+
+ api_group = (uint64_t)p1275_cell2int(ci[3]);
+
+ status = hv_api_get_version(api_group, &major, &minor);
+
+ ci[4] = p1275_int2cell(status);
+ ci[5] = p1275_int2cell(major);
+ ci[6] = p1275_int2cell(minor);
+
+ return ((status == H_EOK) ? 0 : -1);
+}
diff --git a/usr/src/uts/sun4v/sys/cnex.h b/usr/src/uts/sun4v/sys/cnex.h
new file mode 100644
index 0000000000..f2b01a8ae7
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/cnex.h
@@ -0,0 +1,98 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _CNEX_H
+#define _CNEX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Channel nexus "reg" spec
+ */
+typedef struct cnex_regspec {
+ uint64_t physaddr;
+ uint64_t size;
+} cnex_regspec_t;
+
+/*
+ * Channel nexus interrupt map
+ */
+struct cnex_pil_map {
+ ldc_dev_t devclass; /* LDC device class */
+ uint32_t pil; /* PIL for device class */
+};
+
+/*
+ * Channel interrupt information
+ */
+typedef struct cnex_intr {
+ uint64_t ino; /* dev intr number */
+ uint64_t icookie; /* dev intr cookie */
+ uint_t (*hdlr)(); /* intr handler */
+ caddr_t arg1; /* intr argument 1 */
+ caddr_t arg2; /* intr argument 2 */
+ void *ssp; /* back ptr to soft state */
+} cnex_intr_t;
+
+/* cnex interrupt types */
+typedef enum {
+ CNEX_TX_INTR = 1, /* transmit interrupt */
+ CNEX_RX_INTR /* receive interrupt */
+} cnex_intrtype_t;
+
+/*
+ * Channel information
+ */
+typedef struct cnex_ldc {
+ kmutex_t lock; /* Channel lock */
+ struct cnex_ldc *next;
+
+ uint64_t id;
+ ldc_dev_t devclass; /* Device class channel belongs to */
+
+ cnex_intr_t tx; /* Transmit interrupt */
+ cnex_intr_t rx; /* Receive interrupt */
+} cnex_ldc_t;
+
+/*
+ * Channel nexus soft state pointer
+ */
+typedef struct cnex_soft_state {
+ dev_info_t *devi;
+ uint64_t cfghdl; /* cnex config handle */
+ kmutex_t clist_lock; /* lock to protect channel list */
+ cnex_ldc_t *clist; /* list of registered channels */
+} cnex_soft_state_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CNEX_H */
diff --git a/usr/src/uts/sun4v/sys/cpu_module.h b/usr/src/uts/sun4v/sys/cpu_module.h
index e1c386533b..902c088411 100644
--- a/usr/src/uts/sun4v/sys/cpu_module.h
+++ b/usr/src/uts/sun4v/sys/cpu_module.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -154,6 +153,18 @@ extern void bzero(void *addr, size_t count);
int cpu_trapstat_conf(int cmd);
void cpu_trapstat_data(void *buf, uint_t pgszs);
+#define NO_EU_MAPPING_FOUND 0xffffffff
+/*
+ * Default MMU pagesize mask for sun4v architecture.
+ */
+#define DEFAULT_SUN4V_MMU_PAGESIZE_MASK ((1 << TTE8K) | (1 << TTE64K) \
+ | (1 << TTE4M))
+
+void cpu_setup_common(char **);
+
+boolean_t broken_md_flag;
+int va_bits;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/sun4v/sys/dr_cpu.h b/usr/src/uts/sun4v/sys/dr_cpu.h
new file mode 100644
index 0000000000..070645c556
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/dr_cpu.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DR_CPU_H
+#define _DR_CPU_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * CPU DR Control Protocol
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * CPU DR Message Header
+ */
+typedef struct {
+ uint64_t req_num; /* request number */
+ uint32_t msg_type; /* message type */
+ uint32_t num_records; /* number of records */
+} dr_cpu_hdr_t;
+
+/*
+ * CPU command and response messages
+ */
+
+#define DR_CPU_DS_ID "dr-cpu"
+
+#define DR_CPU_CONFIGURE ('C')
+#define DR_CPU_UNCONFIGURE ('U')
+#define DR_CPU_FORCE_UNCONFIG ('F')
+#define DR_CPU_STATUS ('S')
+
+#define DR_CPU_OK ('o')
+#define DR_CPU_ERROR ('e')
+
+/*
+ * Response Message
+ */
+typedef struct {
+ uint32_t cpuid; /* virtual CPU ID */
+ uint32_t result; /* result of the operation */
+ uint32_t status; /* status of the CPU */
+ uint32_t string_off; /* informational string offset */
+} dr_cpu_stat_t;
+
+/*
+ * Result Codes
+ */
+#define DR_CPU_RES_OK 0x0 /* operation succeeded */
+#define DR_CPU_RES_FAILURE 0x1 /* operation failed */
+#define DR_CPU_RES_BLOCKED 0x2 /* operation was blocked */
+#define DR_CPU_RES_CPU_NOT_RESPONDING 0x3 /* CPU was not responding */
+#define DR_CPU_RES_NOT_IN_MD 0x4 /* CPU not defined in MD */
+
+/*
+ * Status Codes
+ */
+#define DR_CPU_STAT_NOT_PRESENT 0x0 /* CPU ID not in MD */
+#define DR_CPU_STAT_UNCONFIGURED 0x1 /* CPU unconfigured */
+#define DR_CPU_STAT_CONFIGURED 0x2 /* CPU configured */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DR_CPU_H */
diff --git a/usr/src/uts/sun4v/sys/dr_util.h b/usr/src/uts/sun4v/sys/dr_util.h
new file mode 100644
index 0000000000..944738ff29
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/dr_util.h
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DR_UTIL_H
+#define _DR_UTIL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v Common DR Header
+ */
+
+#include <sys/ksynch.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Debugging support
+ */
+#ifdef DEBUG
+
+extern uint_t dr_debug;
+
+#define DR_DBG_FLAG_CTL 0x01
+#define DR_DBG_FLAG_CPU 0x02
+#define DR_DBG_FLAG_MEM 0x04
+#define DR_DBG_FLAG_IO 0x08
+#define DR_DBG_FLAG_TRANS 0x10
+
+#define DR_DBG_ALL if (dr_debug) printf
+#define DR_DBG_CTL if (dr_debug & DR_DBG_FLAG_CTL) printf
+#define DR_DBG_CPU if (dr_debug & DR_DBG_FLAG_CPU) printf
+#define DR_DBG_MEM if (dr_debug & DR_DBG_FLAG_MEM) printf
+#define DR_DBG_IO if (dr_debug & DR_DBG_FLAG_IO) printf
+#define DR_DBG_TRANS if (dr_debug & DR_DBG_FLAG_TRANS) printf
+
+#define DR_DBG_DUMP_MSG(buf, len) dr_dbg_dump_msg(buf, len)
+
+extern void dr_dbg_dump_msg(void *buf, size_t len);
+
+#else /* DEBUG */
+
+#define DR_DBG_ALL _NOTE(CONSTCOND) if (0) printf
+#define DR_DBG_CTL DR_DBG_ALL
+#define DR_DBG_CPU DR_DBG_ALL
+#define DR_DBG_MEM DR_DBG_ALL
+#define DR_DBG_IO DR_DBG_ALL
+#define DR_DBG_TRANS DR_DBG_ALL
+
+#define DR_DBG_DUMP_MSG(buf, len)
+
+#endif /* DEBUG */
+
+typedef enum {
+ DR_TYPE_INVAL,
+ DR_TYPE_CPU,
+ DR_TYPE_MEM,
+ DR_TYPE_VIO,
+ DR_TYPE_DIO
+} dr_type_t;
+
+/*
+ * Macro to convert a dr_type_t into a string. These strings are
+ * used to generate DR events and should only be modified using
+ * extreme caution.
+ */
+#define DR_TYPE2STR(t) ((t) == DR_TYPE_INVAL ? "invalid" : \
+ (t) == DR_TYPE_CPU ? OBP_CPU : \
+ (t) == DR_TYPE_MEM ? "memory" : \
+ (t) == DR_TYPE_VIO ? "vio" : \
+ (t) == DR_TYPE_DIO ? "dio" : \
+ "unknown")
+
+extern boolean_t dr_is_disabled(dr_type_t type);
+extern void dr_generate_event(dr_type_t type, int se_hint);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DR_UTIL_H */
diff --git a/usr/src/uts/sun4v/sys/ds.h b/usr/src/uts/sun4v/sys/ds.h
new file mode 100644
index 0000000000..cd5efa807f
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/ds.h
@@ -0,0 +1,114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DS_H
+#define _DS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Domain Services Client Interface
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t ds_svc_hdl_t; /* opaque service handle */
+typedef void *ds_cb_arg_t; /* client specified callback arg */
+
+#define DS_INVALID_HDL (0) /* a ds handle cannot be zero */
+
+/*
+ * Domain Services Versioning
+ */
+typedef struct ds_ver {
+ uint16_t major;
+ uint16_t minor;
+} ds_ver_t;
+
+/*
+ * Domain Services Capability
+ *
+ * A DS capability is exported by a client using a unique service
+ * identifier string. Along with this identifier is the list of
+ * versions of the capability that the client supports.
+ */
+typedef struct ds_capability {
+ char *svc_id; /* service identifier */
+ ds_ver_t *vers; /* list of supported versions */
+ int nvers; /* number of supported versions */
+} ds_capability_t;
+
+/*
+ * Domain Services Client Event Callbacks
+ *
+ * A client implementing a DS capability provides a set of callbacks
+ * when it registers with the DS framework. The use of these callbacks
+ * is described below:
+ *
+ * ds_reg_cb(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
+ *
+ * The ds_reg_cb() callback is invoked when the DS framework
+ * has successfully completed version negotiation with the
+ * remote endpoint for the capability. It provides the client
+ * with the negotiated version and a handle to use when sending
+ * data.
+ *
+ * ds_unreg_cb(ds_cb_arg_t arg)
+ *
+ * The ds_unreg_cb() callback is invoked when the DS framework
+ * detects an event that causes the registered capability to
+ * become unavailable. This includes an explicit unregister
+ * message, a failure in the underlying communication transport,
+ * etc. Any such event invalidates the service handle that was
+ * received from the register callback.
+ *
+ * ds_data_cb(ds_cb_arg_t arg, void *buf, size_t buflen)
+ *
+ * The ds_data_cb() callback is invoked whenever there is an
+ * incoming data message for the client to process. It provides
+ * the contents of the message along with the message length.
+ */
+typedef struct ds_clnt_ops {
+ void (*ds_reg_cb)(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl);
+ void (*ds_unreg_cb)(ds_cb_arg_t arg);
+ void (*ds_data_cb)(ds_cb_arg_t arg, void *buf, size_t buflen);
+ ds_cb_arg_t cb_arg;
+} ds_clnt_ops_t;
+
+/*
+ * Domain Services Capability Interface
+ */
+extern int ds_cap_init(ds_capability_t *cap, ds_clnt_ops_t *ops);
+extern int ds_cap_fini(ds_capability_t *cap);
+extern int ds_cap_send(ds_svc_hdl_t hdl, void *buf, size_t buflen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DS_H */
diff --git a/usr/src/uts/sun4v/sys/ds_impl.h b/usr/src/uts/sun4v/sys/ds_impl.h
new file mode 100644
index 0000000000..461214f4e3
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/ds_impl.h
@@ -0,0 +1,332 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DS_IMPL_H
+#define _DS_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The Domain Services Protocol
+ *
+ * The DS protocol is divided into two parts. The first is fixed and
+ * must remain exactly the same for *all* versions of the DS protocol.
+ * The only messages supported by the fixed portion of the protocol are
+ * to negotiate a version to use for the rest of the protocol.
+ */
+
+/*
+ * Domain Services Header
+ */
+typedef struct ds_hdr {
+ uint32_t msg_type; /* message type */
+ uint32_t payload_len; /* payload length */
+} ds_hdr_t;
+
+#define DS_HDR_SZ (sizeof (ds_hdr_t))
+
+/*
+ * DS Fixed Message Types
+ */
+#define DS_INIT_REQ 0x0 /* initiate DS connection */
+#define DS_INIT_ACK 0x1 /* initiation acknowledgement */
+#define DS_INIT_NACK 0x2 /* initiation negative acknowledgment */
+
+/*
+ * DS Fixed Initialization Messages
+ */
+typedef struct ds_init_req {
+ uint16_t major_vers; /* requested major version */
+ uint16_t minor_vers; /* requested minor version */
+} ds_init_req_t;
+
+typedef struct ds_init_ack {
+ uint16_t minor_vers; /* highest supported minor version */
+} ds_init_ack_t;
+
+typedef struct ds_init_nack {
+ uint16_t major_vers; /* alternate supported major version */
+} ds_init_nack_t;
+
+/*
+ * DS Message Types for Version 1.0
+ */
+#define DS_REG_REQ 0x3 /* register a service */
+#define DS_REG_ACK 0x4 /* register acknowledgement */
+#define DS_REG_NACK 0x5 /* register failed */
+#define DS_UNREG 0x6 /* unregister a service */
+#define DS_UNREG_ACK 0x7 /* unregister acknowledgement */
+#define DS_UNREG_NACK 0x8 /* unregister failed */
+#define DS_DATA 0x9 /* data message */
+#define DS_NACK 0xa /* data error */
+
+/* result codes */
+#define DS_OK 0x0 /* success */
+#define DS_REG_VER_NACK 0x1 /* unsupported major version */
+#define DS_REG_DUP 0x2 /* duplicate registration attempted */
+#define DS_INV_HDL 0x3 /* service handle not valid */
+#define DS_TYPE_UNKNOWN 0x4 /* unknown message type received */
+
+/*
+ * Service Register Messages
+ */
+typedef struct ds_reg_req {
+ uint64_t svc_handle; /* service handle to register */
+ uint16_t major_vers; /* requested major version */
+ uint16_t minor_vers; /* requested minor version */
+ char svc_id[1]; /* service identifier string */
+} ds_reg_req_t;
+
+typedef struct ds_reg_ack {
+ uint64_t svc_handle; /* service handle sent in register */
+ uint16_t minor_vers; /* highest supported minor version */
+} ds_reg_ack_t;
+
+typedef struct ds_reg_nack {
+ uint64_t svc_handle; /* service handle sent in register */
+ uint64_t result; /* reason for the failure */
+ uint16_t major_vers; /* alternate supported major version */
+} ds_reg_nack_t;
+
+/*
+ * Service Unregister Messages
+ */
+typedef struct ds_unreg_req {
+ uint64_t svc_handle; /* service handle to unregister */
+} ds_unreg_req_t;
+
+typedef struct ds_unreg_ack {
+ uint64_t svc_handle; /* service handle sent in unregister */
+} ds_unreg_ack_t;
+
+typedef struct ds_unreg_nack {
+ uint64_t svc_handle; /* service handle sent in unregister */
+} ds_unreg_nack_t;
+
+/*
+ * Data Transfer Messages
+ */
+typedef struct ds_data_handle {
+ uint64_t svc_handle; /* service handle for data */
+} ds_data_handle_t;
+
+typedef struct ds_data_nack {
+ uint64_t svc_handle; /* service handle sent in data msg */
+ uint64_t result; /* reason for failure */
+} ds_data_nack_t;
+
+/*
+ * Message Processing Utilities
+ */
+#define DS_MSG_TYPE_VALID(type) ((type) <= DS_NACK)
+#define DS_MSG_LEN(ds_type) (sizeof (ds_hdr_t) + sizeof (ds_type))
+
+
+/*
+ * Domain Service Port
+ *
+ * A DS port is a logical representation of an LDC dedicated to
+ * communication between DS endpoints. The ds_port_t maintains state
+ * associated with a connection to a remote endpoint. This includes
+ * the state of the port, the LDC state, the current version of the
+ * DS protocol in use on the port, and other port properties.
+ *
+ * Locking: The port is protected by a single mutex. It must be held
+ * while the port structure is being accessed and also when data is
+ * being read or written using the port
+ */
+typedef enum {
+ DS_PORT_FREE, /* port structure not in use */
+ DS_PORT_INIT, /* port structure created */
+ DS_PORT_LDC_INIT, /* ldc successfully initialized */
+ DS_PORT_INIT_REQ, /* initialization handshake sent */
+ DS_PORT_READY /* init handshake completed */
+} ds_port_state_t;
+
+typedef struct ds_ldc {
+ uint64_t id; /* LDC id */
+ ldc_handle_t hdl; /* LDC handle */
+ ldc_status_t state; /* current LDC state */
+} ds_ldc_t;
+
+typedef struct ds_port {
+ kmutex_t lock; /* port lock */
+ uint64_t id; /* port id from MD */
+ ds_port_state_t state; /* state of the port */
+ ds_ver_t ver; /* DS protocol version in use */
+ uint32_t ver_idx; /* index of version during handshake */
+ ds_ldc_t ldc; /* LDC for this port */
+} ds_port_t;
+
+/*
+ * A DS portset is a bitmap that represents a collection of DS
+ * ports. Each bit represent a particular port id. The current
+ * implementation constrains the maximum number of ports to 64.
+ */
+typedef uint64_t ds_portset_t;
+
+#define DS_MAX_PORTS ((sizeof (ds_portset_t)) * 8)
+#define DS_MAX_PORT_ID (DS_MAX_PORTS - 1)
+
+#define DS_PORT_SET(port) (1UL << (port))
+#define DS_PORT_IN_SET(set, port) ((set) & DS_PORT_SET(port))
+#define DS_PORTSET_ADD(set, port) ((void)((set) |= DS_PORT_SET(port)))
+#define DS_PORTSET_DEL(set, port) ((void)((set) &= ~DS_PORT_SET(port)))
+#define DS_PORTSET_ISNULL(set) ((set) == 0)
+#define DS_PORTSET_DUP(set1, set2) ((void)((set1) = (set2)))
+
+/*
+ * LDC Information
+ */
+#define DS_QUEUE_LEN 128 /* LDC queue size */
+
+/*
+ * Machine Description Constants
+ */
+#define DS_MD_PORT_NAME "domain-services-port"
+#define DS_MD_CHAN_NAME "channel-endpoint"
+
+/*
+ * DS Services
+ *
+ * A DS Service is a mapping between a DS capability and a client
+ * of the DS framework that provides that capability. It includes
+ * information on the state of the service, the currently negotiated
+ * version of the capability specific protocol, the port that is
+ * currently in use by the capability, etc.
+ */
+
+typedef enum {
+ DS_SVC_INVAL, /* svc structure uninitialized */
+ DS_SVC_FREE, /* svc structure not in use */
+ DS_SVC_INACTIVE, /* svc not registered */
+ DS_SVC_REG_PENDING, /* register message sent */
+ DS_SVC_ACTIVE /* register message acknowledged */
+} ds_svc_state_t;
+
+typedef struct ds_svc {
+ ds_capability_t cap; /* capability information */
+ ds_clnt_ops_t ops; /* client ops vector */
+ ds_svc_hdl_t hdl; /* handle assigned by DS */
+ ds_svc_state_t state; /* current service state */
+ ds_ver_t ver; /* svc protocol version in use */
+ uint_t ver_idx; /* index into client version array */
+ ds_port_t *port; /* port for this service */
+ ds_portset_t avail; /* ports available to this service */
+} ds_svc_t;
+
+#define DS_SVC_ISFREE(svc) ((svc == NULL) || (svc->state == DS_SVC_FREE))
+
+/*
+ * A service handle is a 64 bit value with two pieces of information
+ * encoded in it. The upper 32 bits is the index into the table of
+ * a particular service structure. The lower 32 bits is a counter
+ * that is incremented each time a service structure is reused.
+ */
+#define DS_IDX_SHIFT 32
+#define DS_COUNT_MASK 0xfffffffful
+
+#define DS_ALLOC_HDL(_idx, _count) (((uint64_t)_idx << DS_IDX_SHIFT) | \
+ ((uint64_t)(_count + 1) & \
+ DS_COUNT_MASK))
+#define DS_HDL2IDX(hdl) (hdl >> DS_IDX_SHIFT)
+#define DS_HDL2COUNT(hdl) (hdl & DS_COUNT_MASK)
+
+/*
+ * DS Message Logging
+ *
+ * The DS framework logs all incoming and outgoing messages to a
+ * ring buffer. This provides the ability to reconstruct a trace
+ * of DS activity for use in debugging. In addition to the message
+ * data, each log entry contains a timestamp and the destination
+ * of the message. The destination is based on the port number the
+ * message passed through (port number + 1). The sign of the dest
+ * field distinguishes incoming messages from outgoing messages.
+ * Incoming messages have a negative destination field.
+ */
+
+typedef struct ds_log_entry {
+ struct ds_log_entry *next; /* next in log or free list */
+ struct ds_log_entry *prev; /* previous in log */
+ time_t timestamp; /* time message added to log */
+ size_t datasz; /* size of the data */
+ void *data; /* the data itself */
+ int32_t dest; /* message destination */
+} ds_log_entry_t;
+
+#define DS_LOG_IN(pid) (-(pid + 1))
+#define DS_LOG_OUT(pid) (pid + 1)
+
+/*
+ * DS Log Limits:
+ *
+ * The size of the log is controlled by two limits. The first is
+ * a soft limit that is configurable by the user (via the global
+ * variable ds_log_sz). When this limit is exceeded, each new
+ * message that is added to the log replaces the oldest message.
+ *
+ * The second is a hard limit that is calculated based on the soft
+ * limit (DS_LOG_LIMIT). It is defined to be ~3% above the soft limit.
+ * Once this limit is exceeded, a thread is scheduled to delete old
+ * messages until the size of the log is below the soft limit.
+ */
+#define DS_LOG_DEFAULT_SZ (128 * 1024) /* 128 KB */
+
+#define DS_LOG_LIMIT (ds_log_sz + (ds_log_sz >> 5))
+
+#define DS_LOG_ENTRY_SZ(ep) (sizeof (ds_log_entry_t) + (ep)->datasz)
+
+/*
+ * DS Log Memory Usage:
+ *
+ * The log free list is initialized from a pre-allocated pool of entry
+ * structures (the global ds_log_entry_pool). The number of entries
+ * in the pool (DS_LOG_NPOOL) is the number of entries that would
+ * take up half the default size of the log.
+ *
+ * As messages are added to the log, entry structures are pulled from
+ * the free list. If the free list is empty, memory is allocated for
+ * the entry. When entries are removed from the log, they are placed
+ * on the free list. Allocated memory is only deallocated when the
+ * entire log is destroyed.
+ */
+#define DS_LOG_NPOOL ((DS_LOG_DEFAULT_SZ >> 1) / \
+ sizeof (ds_log_entry_t))
+
+#define DS_LOG_POOL_END (ds_log_entry_pool + DS_LOG_NPOOL)
+
+#define DS_IS_POOL_ENTRY(ep) (((ep) >= ds_log_entry_pool) && \
+ ((ep) <= &(ds_log_entry_pool[DS_LOG_NPOOL])))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DS_IMPL_H */
diff --git a/usr/src/uts/sun4v/sys/error.h b/usr/src/uts/sun4v/sys/error.h
index bad9123cec..eac767ed56 100644
--- a/usr/src/uts/sun4v/sys/error.h
+++ b/usr/src/uts/sun4v/sys/error.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -33,15 +32,11 @@
extern "C" {
#endif
-#define CPU_RQ_ENTRIES 64
-#define CPU_NRQ_ENTRIES 64
-
-
/*
* Resumable and Non-resumable queues
*/
-#define CPU_RQ 0x3e
-#define CPU_NRQ 0x3f
+#define CPU_RQ_ENTRIES 64
+#define CPU_NRQ_ENTRIES 64
#define Q_ENTRY_SIZE 64
#define CPU_RQ_SIZE (CPU_RQ_ENTRIES * Q_ENTRY_SIZE)
#define CPU_NRQ_SIZE (CPU_NRQ_ENTRIES * Q_ENTRY_SIZE)
diff --git a/usr/src/uts/sun4v/sys/fault_iso.h b/usr/src/uts/sun4v/sys/fault_iso.h
new file mode 100644
index 0000000000..1566386df5
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/fault_iso.h
@@ -0,0 +1,96 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _FAULT_ISO_H
+#define _FAULT_ISO_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* FMA CPU message numbers */
+#define FMA_CPU_REQ_STATUS 0x0
+#define FMA_CPU_REQ_OFFLINE 0x1
+#define FMA_CPU_REQ_ONLINE 0x2
+
+typedef struct {
+ uint64_t req_num;
+ uint32_t msg_type;
+ uint32_t cpu_id;
+} fma_cpu_service_req_t;
+
+/* FMA CPU result codes */
+#define FMA_CPU_RESP_OK 0x0
+#define FMA_CPU_RESP_FAILURE 0x1
+
+/* FMA CPU status codes */
+#define FMA_CPU_STAT_ONLINE 0x0
+#define FMA_CPU_STAT_OFFLINE 0x1
+#define FMA_CPU_STAT_ILLEGAL 0x2
+
+typedef struct {
+ uint64_t req_num;
+ uint32_t result;
+ uint32_t status;
+} fma_cpu_resp_t;
+
+/* FMA memory services message numbers */
+#define FMA_MEM_REQ_STATUS 0x0
+#define FMA_MEM_REQ_RETIRE 0x1
+#define FMA_MEM_REQ_RESURRECT 0x2
+
+typedef struct {
+ uint64_t req_num;
+ uint32_t msg_type;
+ uint32_t _resvd;
+ uint64_t real_addr;
+ uint64_t length;
+} fma_mem_service_req_t;
+
+/* FMA result codes */
+#define FMA_MEM_RESP_OK 0x0
+#define FMA_MEM_RESP_FAILURE 0x1
+
+/* FMA status codes */
+#define FMA_MEM_STAT_NOTRETIRED 0x0
+#define FMA_MEM_STAT_RETIRED 0x1
+#define FMA_MEM_STAT_ILLEGAL 0x2
+
+typedef struct {
+ uint64_t req_num;
+ uint32_t result;
+ uint32_t status;
+ uint64_t res_addr;
+ uint64_t res_length;
+} fma_mem_resp_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _FAULT_ISO_H */
diff --git a/usr/src/uts/sun4v/sys/hsvc.h b/usr/src/uts/sun4v/sys/hsvc.h
index 80c2a58172..72e2042a80 100644
--- a/usr/src/uts/sun4v/sys/hsvc.h
+++ b/usr/src/uts/sun4v/sys/hsvc.h
@@ -43,6 +43,7 @@ extern "C" {
*/
#define HSVC_GROUP_SUN4V 0x0000
#define HSVC_GROUP_CORE 0x0001
+#define HSVC_GROUP_INTR 0x0002
#define HSVC_GROUP_VPCI 0x0100
#define HSVC_GROUP_LDC 0x0101
#define HSVC_GROUP_VSC 0x0102
diff --git a/usr/src/uts/sun4v/sys/hypervisor_api.h b/usr/src/uts/sun4v/sys/hypervisor_api.h
index 57e4808a75..d750b83ae5 100644
--- a/usr/src/uts/sun4v/sys/hypervisor_api.h
+++ b/usr/src/uts/sun4v/sys/hypervisor_api.h
@@ -49,6 +49,8 @@ extern "C" {
#define MMU_MAP_ADDR 0x83
#define MMU_UNMAP_ADDR 0x84
+#define CORE_TRAP 0xff
+
/*
* Error returns in %o0.
* (Additional result is returned in %o1.)
@@ -71,6 +73,8 @@ extern "C" {
#define H_ENOMAP 14 /* Mapping is not valid, */
/* no translation exists */
#define H_EBUSY 17 /* Resource busy */
+#define H_ETOOMANY 15 /* Hard resource limit exceeded */
+#define H_ECHANNEL 16 /* Illegal LDC channel */
#define H_BREAK -1 /* Console Break */
#define H_HUP -2 /* Console Break */
@@ -85,9 +89,15 @@ extern "C" {
*/
#define HV_MACH_EXIT 0x00
#define HV_MACH_DESC 0x01
+#define HV_MACH_SIR 0x02
+
+#define HV_CPU_START 0x10
+#define HV_CPU_STOP 0x11
#define HV_CPU_YIELD 0x12
-#define CPU_QCONF 0x14
+#define HV_CPU_QCONF 0x14
#define HV_CPU_STATE 0x17
+#define HV_CPU_SET_RTBA 0x18
+
#define MMU_TSB_CTX0 0x20
#define MMU_TSB_CTXNON0 0x21
#define MMU_DEMAP_PAGE 0x22
@@ -95,20 +105,24 @@ extern "C" {
#define MMU_DEMAP_ALL 0x24
#define MAP_PERM_ADDR 0x25
#define MMU_SET_INFOPTR 0x26
+#define MMU_ENABLE 0x27
#define UNMAP_PERM_ADDR 0x28
+
#define HV_MEM_SCRUB 0x31
#define HV_MEM_SYNC 0x32
+
#define HV_INTR_SEND 0x42
+
#define TOD_GET 0x50
#define TOD_SET 0x51
-#define CONS_READ 0x60
-#define CONS_WRITE 0x61
+
+#define CONS_GETCHAR 0x60
+#define CONS_PUTCHAR 0x61
#define TTRACE_BUF_CONF 0x90
#define TTRACE_BUF_INFO 0x91
#define TTRACE_ENABLE 0x92
#define TTRACE_FREEZE 0x93
-
#define DUMP_BUF_UPDATE 0x94
#define HVIO_INTR_DEVINO2SYSINO 0xa0
@@ -119,6 +133,31 @@ extern "C" {
#define HVIO_INTR_GETTARGET 0xa5
#define HVIO_INTR_SETTARGET 0xa6
+#define VINTR_GET_COOKIE 0xa7
+#define VINTR_SET_COOKIE 0xa8
+#define VINTR_GET_VALID 0xa9
+#define VINTR_SET_VALID 0xaa
+#define VINTR_GET_STATE 0xab
+#define VINTR_SET_STATE 0xac
+#define VINTR_GET_TARGET 0xad
+#define VINTR_SET_TARGET 0xae
+
+#define LDC_TX_QCONF 0xe0
+#define LDC_TX_QINFO 0xe1
+#define LDC_TX_GET_STATE 0xe2
+#define LDC_TX_SET_QTAIL 0xe3
+#define LDC_RX_QCONF 0xe4
+#define LDC_RX_QINFO 0xe5
+#define LDC_RX_GET_STATE 0xe6
+#define LDC_RX_SET_QHEAD 0xe7
+
+#define LDC_SET_MAP_TABLE 0xea
+#define LDC_GET_MAP_TABLE 0xeb
+#define LDC_COPY 0xec
+#define LDC_MAPIN 0xed
+#define LDC_UNMAP 0xee
+#define LDC_REVOKE 0xef
+
#ifdef SET_MMU_STATS
#define MMU_STAT_AREA 0xfc
#endif /* SET_MMU_STATS */
@@ -127,6 +166,15 @@ extern "C" {
#define HV_HPRIV 0x201
/*
+ * Function numbers for CORE_TRAP.
+ */
+#define API_SET_VERSION 0x00
+#define API_PUT_CHAR 0x01
+#define API_EXIT 0x02
+#define API_GET_VERSION 0x03
+
+
+/*
* Bits for MMU functions flags argument:
* arg3 of MMU_MAP_ADDR
* arg3 of MMU_DEMAP_CTX
@@ -188,14 +236,14 @@ struct mmu_stat {
};
#endif /* SET_MMU_STATS */
-#endif /* _ASM */
+#endif /* ! _ASM */
/*
* CPU States
*/
#define CPU_STATE_INVALID 0x0
-#define CPU_STATE_IDLE 0x1 /* cpu not started */
-#define CPU_STATE_GUEST 0x2 /* cpu running guest code */
+#define CPU_STATE_STOPPED 0x1 /* cpu not started */
+#define CPU_STATE_RUNNING 0x2 /* cpu running guest code */
#define CPU_STATE_ERROR 0x3 /* cpu is in the error state */
#define CPU_STATE_LAST_PUBLIC CPU_STATE_ERROR /* last valid state */
@@ -256,19 +304,34 @@ struct mmu_stat {
#define HVIO_DMA_SYNC_DIR_TO_DEV 0x01
#define HVIO_DMA_SYNC_DIR_FROM_DEV 0x02
+/*
+ * LDC Channel States
+ */
+#define LDC_CHANNEL_DOWN 0x0
+#define LDC_CHANNEL_UP 0x1
+#define LDC_CHANNEL_RESET 0x2
+
#ifndef _ASM
extern uint64_t hv_mmu_map_perm_addr(void *, int, uint64_t, int);
extern uint64_t hv_mmu_unmap_perm_addr(void *, int, int);
+extern uint64_t hv_mach_exit(uint64_t exit_code);
+extern uint64_t hv_mach_sir(void);
+
+extern uint64_t hv_cpu_start(uint64_t cpuid, uint64_t pc, uint64_t rtba,
+ uint64_t arg);
+extern uint64_t hv_cpu_stop(uint64_t cpuid);
+extern uint64_t hv_cpu_set_rtba(uint64_t *rtba);
+
extern uint64_t hv_set_ctx0(uint64_t, uint64_t);
extern uint64_t hv_set_ctxnon0(uint64_t, uint64_t);
+extern uint64_t hv_mmu_fault_area_conf(void *raddr);
#ifdef SET_MMU_STATS
extern uint64_t hv_mmu_set_stat_area(uint64_t, uint64_t);
#endif /* SET_MMU_STATS */
extern uint64_t hv_cpu_qconf(int queue, uint64_t paddr, int size);
-extern uint64_t hv_cpu_yield();
-
+extern uint64_t hv_cpu_yield(void);
extern uint64_t hv_cpu_state(uint64_t cpuid, uint64_t *cpu_state);
extern uint64_t hv_mem_scrub(uint64_t real_addr, uint64_t length,
uint64_t *scrubbed_len);
@@ -282,7 +345,6 @@ extern uint64_t hv_service_send(uint64_t s_id, uint64_t buf_pa,
extern uint64_t hv_service_getstatus(uint64_t s_id, uint64_t *vreg);
extern uint64_t hv_service_setstatus(uint64_t s_id, uint64_t bits);
extern uint64_t hv_service_clrstatus(uint64_t s_id, uint64_t bits);
-
extern uint64_t hv_mach_desc(uint64_t buffer_ra, uint64_t *buffer_sizep);
extern uint64_t hv_ttrace_buf_info(uint64_t *, uint64_t *);
@@ -300,16 +362,64 @@ extern uint64_t hv_tod_set(uint64_t);
extern uint64_t hvio_intr_devino_to_sysino(uint64_t dev_hdl, uint32_t devino,
uint64_t *sysino);
extern uint64_t hvio_intr_getvalid(uint64_t sysino,
- int *intr_valid_state);
+ int *intr_valid_state);
extern uint64_t hvio_intr_setvalid(uint64_t sysino,
- int intr_valid_state);
+ int intr_valid_state);
extern uint64_t hvio_intr_getstate(uint64_t sysino,
- int *intr_state);
+ int *intr_state);
extern uint64_t hvio_intr_setstate(uint64_t sysino, int intr_state);
extern uint64_t hvio_intr_gettarget(uint64_t sysino, uint32_t *cpuid);
extern uint64_t hvio_intr_settarget(uint64_t sysino, uint32_t cpuid);
-#endif
+extern uint64_t hv_ldc_tx_qconf(uint64_t channel, uint64_t ra_base,
+ uint64_t nentries);
+extern uint64_t hv_ldc_tx_qinfo(uint64_t channel, uint64_t *ra_base,
+ uint64_t *nentries);
+extern uint64_t hv_ldc_tx_get_state(uint64_t channel, uint64_t *headp,
+ uint64_t *tailp, uint64_t *state);
+extern uint64_t hv_ldc_tx_set_qtail(uint64_t channel, uint64_t tail);
+extern uint64_t hv_ldc_rx_qconf(uint64_t channel, uint64_t ra_base,
+ uint64_t nentries);
+extern uint64_t hv_ldc_rx_qinfo(uint64_t channel, uint64_t *ra_base,
+ uint64_t *nentries);
+extern uint64_t hv_ldc_rx_get_state(uint64_t channel, uint64_t *headp,
+ uint64_t *tailp, uint64_t *state);
+extern uint64_t hv_ldc_rx_set_qhead(uint64_t channel, uint64_t head);
+
+extern uint64_t hv_ldc_set_map_table(uint64_t channel, uint64_t tbl_ra,
+ uint64_t tbl_entries);
+extern uint64_t hv_ldc_get_map_table(uint64_t channel, uint64_t *tbl_ra,
+ uint64_t *tbl_entries);
+extern uint64_t hv_ldc_copy(uint64_t channel, uint64_t request,
+ uint64_t cookie, uint64_t raddr, uint64_t length, uint64_t *lengthp);
+extern uint64_t hv_ldc_mapin(uint64_t channel, uint64_t cookie,
+ uint64_t *raddr, uint64_t *perm);
+extern uint64_t hv_ldc_unmap(uint64_t raddr);
+extern uint64_t hv_ldc_revoke(uint64_t raddr);
+extern uint64_t hv_api_get_version(uint64_t api_group, uint64_t *majorp,
+ uint64_t *minorp);
+extern uint64_t hv_api_set_version(uint64_t api_group, uint64_t major,
+ uint64_t minor, uint64_t *supported_minor);
+
+extern uint64_t hvldc_intr_getcookie(uint64_t dev_hdl, uint32_t devino,
+ uint64_t *cookie);
+extern uint64_t hvldc_intr_setcookie(uint64_t dev_hdl, uint32_t devino,
+ uint64_t cookie);
+extern uint64_t hvldc_intr_getvalid(uint64_t dev_hdl, uint32_t devino,
+ int *intr_valid_state);
+extern uint64_t hvldc_intr_setvalid(uint64_t dev_hdl, uint32_t devino,
+ int intr_valid_state);
+extern uint64_t hvldc_intr_getstate(uint64_t dev_hdl, uint32_t devino,
+ int *intr_state);
+extern uint64_t hvldc_intr_setstate(uint64_t dev_hdl, uint32_t devino,
+ int intr_state);
+extern uint64_t hvldc_intr_gettarget(uint64_t dev_hdl, uint32_t devino,
+ uint32_t *cpuid);
+extern uint64_t hvldc_intr_settarget(uint64_t dev_hdl, uint32_t devino,
+ uint32_t cpuid);
+
+#endif /* ! _ASM */
+
#ifdef __cplusplus
}
diff --git a/usr/src/uts/sun4v/sys/ldc.h b/usr/src/uts/sun4v/sys/ldc.h
new file mode 100644
index 0000000000..a9718b6591
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/ldc.h
@@ -0,0 +1,221 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LDC_H
+#define _LDC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ioctl.h>
+#include <sys/processor.h>
+
+/* Types */
+typedef uint64_t ldc_handle_t; /* Channel handle */
+typedef uint64_t ldc_mem_handle_t; /* Channel memory handle */
+typedef uint64_t ldc_dring_handle_t; /* Descriptor ring handle */
+
+/* LDC transport mode */
+typedef enum {
+ LDC_MODE_RAW, /* Raw mode */
+ LDC_MODE_UNRELIABLE, /* Unreliable packet mode */
+ LDC_MODE_RELIABLE, /* Reliable packet mode */
+ LDC_MODE_STREAM /* Reliable byte stream */
+} ldc_mode_t;
+
+/* LDC message payload sizes */
+#define LDC_ELEM_SIZE 8 /* size in bytes */
+#define LDC_PACKET_SIZE (LDC_ELEM_SIZE * 8)
+#define LDC_PAYLOAD_SIZE_RAW (LDC_PACKET_SIZE)
+#define LDC_PAYLOAD_SIZE_UNRELIABLE (LDC_PACKET_SIZE - LDC_ELEM_SIZE)
+#define LDC_PAYLOAD_SIZE_RELIABLE (LDC_PACKET_SIZE - (LDC_ELEM_SIZE * 2))
+
+/* LDC Channel Status */
+typedef enum {
+ LDC_INIT = 1, /* Channel initialized */
+ LDC_OPEN, /* Channel open */
+ LDC_READY, /* Channel peer opened (hw-link-up) */
+ LDC_UP /* Channel UP - ready for data xfer */
+} ldc_status_t;
+
+/* Callback return values */
+#define LDC_SUCCESS 0
+#define LDC_FAILURE 1
+
+/* LDC callback mode */
+typedef enum {
+ LDC_CB_ENABLE, /* Enable callbacks */
+ LDC_CB_DISABLE /* Disable callbacks */
+} ldc_cb_mode_t;
+
+/* Callback events */
+#define LDC_EVT_DOWN 0x1 /* Channel DOWN, status = OPEN */
+#define LDC_EVT_RESET 0x2 /* Channel RESET, status = READY */
+#define LDC_EVT_UP 0x4 /* Channel UP, status = UP */
+#define LDC_EVT_READ 0x8 /* Channel has data for read */
+#define LDC_EVT_WRITE 0x10 /* Channel has space for write */
+
+/* LDC device classes */
+typedef enum {
+ LDC_DEV_GENERIC = 1, /* generic device */
+ LDC_DEV_BLK, /* block device, eg. vdc */
+ LDC_DEV_BLK_SVC, /* block device service, eg. vds */
+ LDC_DEV_NT, /* network device, eg. vnet */
+ LDC_DEV_NT_SVC, /* network service eg. vsw */
+ LDC_DEV_SERIAL /* serial device eg. vldc, vcc */
+} ldc_dev_t;
+
+/* Channel nexus registration */
+typedef struct ldc_cnex {
+ dev_info_t *dip; /* dip of channel nexus */
+ int (*reg_chan)(); /* interface for channel register */
+ int (*unreg_chan)(); /* interface for channel unregister */
+ int (*add_intr)(); /* interface for adding interrupts */
+ int (*rem_intr)(); /* interface for removing interrupts */
+ int (*clr_intr)(); /* interface for clearing interrupts */
+} ldc_cnex_t;
+
+/* LDC attribute structure */
+
+/*
+ * FIXME: Attribute passed in should be an MTU size
+ * Allocate the queue internally to ldc module to accomodate
+ * based on MTU size. For streaming mode, size can be zero.
+ */
+
+typedef struct ldc_attr {
+ ldc_dev_t devclass; /* device class */
+ uint64_t instance; /* device class instance */
+ ldc_mode_t mode; /* channel mode */
+ uint64_t qlen; /* channel queue elements */
+} ldc_attr_t;
+
+/* LDC memory cookie */
+typedef struct ldc_mem_cookie {
+ uint64_t addr; /* cookie address */
+ uint64_t size; /* size @ offset */
+} ldc_mem_cookie_t;
+
+/*
+ * LDC Memory Map Type
+ * Specifies how shared memory being created is shared with its
+ * peer and/or how the peer has mapped in the exported memory.
+ */
+#define LDC_SHADOW_MAP 0x1 /* share mem via shadow copy only */
+#define LDC_DIRECT_MAP 0x2 /* share mem direct access */
+#define LDC_IO_MAP 0x4 /* share mem for IOMMU/DMA access */
+
+/* LDC Memory Access Permissions */
+#define LDC_MEM_R 0x1 /* Memory region is read only */
+#define LDC_MEM_W 0x2 /* Memory region is write only */
+#define LDC_MEM_X 0x4 /* Memory region is execute only */
+#define LDC_MEM_RW (LDC_MEM_R|LDC_MEM_W)
+#define LDC_MEM_RWX (LDC_MEM_R|LDC_MEM_W|LDC_MEM_X)
+
+/* LDC Memory Copy Direction */
+#define LDC_COPY_IN 0x0 /* Copy data to VA from cookie mem */
+#define LDC_COPY_OUT 0x1 /* Copy data from VA to cookie mem */
+
+/* LDC memory/dring (handle) status */
+typedef enum {
+ LDC_UNBOUND, /* Memory handle is unbound */
+ LDC_BOUND, /* Memory handle is bound */
+ LDC_MAPPED /* Memory handle is mapped */
+} ldc_mstatus_t;
+
+/* LDC [dring] memory info */
+typedef struct ldc_mem_info {
+ uint8_t mtype; /* map type */
+ uint8_t perm; /* RWX permissions */
+ caddr_t vaddr; /* base VA */
+ uintptr_t raddr; /* base RA */
+ ldc_mstatus_t status; /* dring/mem handle status */
+} ldc_mem_info_t;
+
+/* API functions */
+int ldc_register(ldc_cnex_t *cinfo);
+int ldc_unregister(ldc_cnex_t *cinfo);
+
+int ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle);
+int ldc_fini(ldc_handle_t handle);
+int ldc_open(ldc_handle_t handle);
+int ldc_close(ldc_handle_t handle);
+int ldc_up(ldc_handle_t handle);
+int ldc_reset(ldc_handle_t handle);
+int ldc_reg_callback(ldc_handle_t handle,
+ uint_t(*callback)(uint64_t event, caddr_t arg), caddr_t arg);
+int ldc_unreg_callback(ldc_handle_t handle);
+int ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t imode);
+int ldc_chkq(ldc_handle_t handle, boolean_t *isempty);
+int ldc_read(ldc_handle_t handle, caddr_t buf, size_t *size);
+int ldc_write(ldc_handle_t handle, caddr_t buf, size_t *size);
+int ldc_status(ldc_handle_t handle, ldc_status_t *status);
+
+int ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle);
+int ldc_mem_free_handle(ldc_mem_handle_t mhandle);
+int ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
+ uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount);
+int ldc_mem_unbind_handle(ldc_mem_handle_t mhandle);
+int ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo);
+int ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie);
+int ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *len,
+ ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction);
+int ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
+ caddr_t paddr, uint8_t direction);
+int ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie,
+ uint32_t ccount, uint8_t mtype, caddr_t *vaddr, caddr_t *raddr);
+int ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size);
+int ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size);
+
+int ldc_mem_dring_create(uint32_t len, uint32_t dsize,
+ ldc_dring_handle_t *dhandle);
+int ldc_mem_dring_destroy(ldc_dring_handle_t dhandle);
+int ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
+ uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *dcookie, uint32_t *ccount);
+int ldc_mem_dring_nextcookie(ldc_dring_handle_t mhandle,
+ ldc_mem_cookie_t *cookie);
+int ldc_mem_dring_unbind(ldc_dring_handle_t dhandle);
+int ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo);
+int ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
+ uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
+ ldc_dring_handle_t *dhandle);
+int ldc_mem_dring_unmap(ldc_dring_handle_t dhandle);
+int ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start,
+ uint64_t end);
+int ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start,
+ uint64_t end);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LDC_H */
diff --git a/usr/src/uts/sun4v/sys/ldc_impl.h b/usr/src/uts/sun4v/sys/ldc_impl.h
new file mode 100644
index 0000000000..c4fd0ef973
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/ldc_impl.h
@@ -0,0 +1,487 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LDC_IMPL_H
+#define _LDC_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ioctl.h>
+
+/* Memory map table size */
+#define MTBL_MAX_SIZE 65536 /* 64K */
+
+/* Define LDC Queue info */
+#define LDC_PACKET_SHIFT 6
+#define LDC_QUEUE_ENTRIES 128
+#define LDC_QUEUE_SIZE (LDC_QUEUE_ENTRIES << LDC_PACKET_SHIFT)
+#define LDC_STREAM_MTU (LDC_QUEUE_SIZE >> 1)
+
+/*
+ * LDC Reliable mode - initial packet seqid
+ * - If peer initiated handshake, RDX should contain init_seqid + 1
+ * - If this endpoint initiated handshake first data packet should
+ * contain the message init_seqid + 1
+ */
+#define LDC_INIT_SEQID 0x0
+
+/* LDC Message types */
+#define LDC_CTRL 0x01 /* Control Pkt */
+#define LDC_DATA 0x02 /* Data Pkt */
+#define LDC_ERR 0x10 /* Error Pkt */
+
+/* LDC Message Subtypes */
+#define LDC_INFO 0x01 /* Control/Data/Error info pkt */
+#define LDC_ACK 0x02 /* Control/Data ACK */
+#define LDC_NACK 0x04 /* Control/Data NACK */
+
+/* LDC Control Messages */
+#define LDC_VER 0x01 /* Version message */
+#define LDC_RTS 0x02 /* Request to Send */
+#define LDC_RTR 0x03 /* Ready To Receive */
+#define LDC_RDX 0x04 /* Ready for data exchange */
+
+#define LDC_CTRL_MASK 0x0f /* Mask to read control bits */
+
+/* LDC Channel Transport State (tstate) */
+#define TS_TXQ_RDY 0x01 /* allocated TX queue */
+#define TS_RXQ_RDY 0x02 /* allocated RX queue */
+#define TS_INIT (TS_TXQ_RDY | TS_RXQ_RDY)
+#define TS_QCONF_RDY 0x04 /* registered queues with HV */
+#define TS_CNEX_RDY 0x08 /* registered channel with cnex */
+#define TS_OPEN (TS_INIT | TS_QCONF_RDY | TS_CNEX_RDY)
+#define TS_LINK_READY 0x10 /* both endpts registered Rx queues */
+#define TS_READY (TS_OPEN | TS_LINK_READY)
+#define TS_VER_DONE 0x20 /* negotiated version */
+#define TS_VREADY (TS_READY | TS_VER_DONE)
+#define TS_HSHAKE_DONE 0x40 /* completed handshake */
+#define TS_UP (TS_READY | TS_VER_DONE | TS_HSHAKE_DONE)
+
+/* LDC Channel Transport Handshake states */
+#define TS_SENT_RTS 0x01 /* Sent RTS */
+#define TS_RCVD_RTR 0x02 /* Received RTR */
+#define TS_SENT_RDX 0x04 /* Sent RDX */
+#define TS_RCVD_RTS 0x10 /* Received RTS */
+#define TS_SENT_RTR 0x20 /* Sent RTR */
+#define TS_RCVD_RDX 0x40 /* Received RDX */
+
+/* LDC MSG Envelope */
+#define LDC_LEN_MASK 0x3F
+#define LDC_FRAG_MASK 0xC0
+
+#define LDC_FRAG_START 0x40 /* frag_info = 0x01 */
+#define LDC_FRAG_STOP 0x80 /* frag_info = 0x02 */
+#define LDC_FRAG_CONT 0x00 /* frag_info = 0x00 */
+
+/*
+ * LDC fragmented xfer loop wait cnt
+ * When data is arriving in fragments, the read thread will
+ * look for a packet 'LDC_CHK_CNT' times. Between each check
+ * it will loop 'LDC_LOOP_CNT' times
+ */
+#define LDC_CHK_CNT 1000
+#define LDC_LOOP_CNT 1000
+
+/*
+ * LDC Version information
+ */
+#define LDC_PAYLOAD_VER_OFF 8 /* offset of version in payload */
+
+typedef struct ldc_ver {
+ uint16_t major;
+ uint16_t minor;
+} ldc_ver_t;
+
+/*
+ * Each guest consists of one or more LDC endpoints represented by a ldc_chan
+ * structure. Each ldc_chan structure points to a ldc_mtbl structure that
+ * contains information about the map table associated with this LDC endpoint.
+ * The map table contains the list of pages being shared by this guest over
+ * this endpoint with the guest at the other end of this endpoint. Each LDC
+ * endpoint also points to a list of memory handles used to bind and export
+ * memory segments from this guest. If a memory segment is bound, it points to
+ * a memory segment structure, which inturn consists of an array of ldc_page
+ * structure for all the pages within that segment. Each ldc_page structure
+ * contains information about the shared page and also points to the
+ * corresponding entry in the map table.
+ *
+ * Each LDC endpoint also points to a list of ldc_dring structures that refer
+ * to both imported and exported descriptor rings. If it is a exported
+ * descriptor ring, it then points to memory handle/memseg corresponding to
+ * the region of memory associated with the descriptor ring.
+ *
+ * +----------+ +----------+ +----------+
+ * | ldc_chan |-->| ldc_chan |-->| ldc_chan |-->....
+ * +----------+ +----------+ +----------+
+ * | | |
+ * | | |
+ * | | | +-----------+ +-----------+
+ * | | +----->| ldc_dring |---->| ldc_dring |---->......
+ * | | +-----------+ +-----------+
+ * | | |
+ * | | +----------------------------+
+ * | | |
+ * | | v
+ * | | +----------+ +----------+ +----------+
+ * | +----->| ldc_mhdl |---->| ldc_mhdl |---->| ldc_mhdl |---> ....
+ * | +----------+ +----------+ +----------+
+ * v | |
+ * +----------+ | +------------+ | +------------+
+ * | ldc_mtbl |--+ +--->| ldc_memseg |-----+ +--->| ldc_memseg |
+ * +----------+ | +------------+ | +------------+
+ * | | | | |
+ * v v v | v
+ * +--------------+ +----------+ +--------+ | +--------+
+ * | ldc_mte_slot |<--------| ldc_page | | cookie | | | cookie |
+ * +--------------+ +----------+ +--------+ | +--------+
+ * | ldc_mte_slot |<--------| ldc_page | | cookie | v
+ * +--------------+ +----------+ +--------+ +----------+
+ * | ldc_mte_slot |<-----------------------------------| ldc_page |
+ * +--------------+ +----------+
+ * | ldc_mte_slot |
+ * +--------------+
+ * | ...... |/ +------------+
+ * +--------------+ | entry |
+ * | ldc_mte_slot | +------------+
+ * +--------------+ | inv_cookie |
+ * \ +------------+
+ *
+ */
+
+/*
+ * Message format of each packet sent over the LDC channel.
+ * Each packet is 64-bytes long.
+ *
+ * Each packet that is sent over LDC can contain either data or acks.
+ * The type will reflect the contents. The len will contain in bytes
+ * the amount of data being sent. In the case of ACKs, the seqid and
+ * data fields will contain the SEQIDs of messages for which ACKs are
+ * being sent.
+ *
+ * Raw pkt format:
+ *
+ * +------------------------------------------------------+
+ * 0 - 7 | data payload |
+ * +------------------------------------------------------+
+ *
+ * Unreliable pkt format:
+ *
+ * +------------------------------------------------------+
+ * 0 | seqid | env | ctrl | stype | type |
+ * +------------------------------------------------------+
+ * 1 - 7 | data payload |
+ * +------------------------------------------------------+
+ *
+ * Reliable pkt format:
+ *
+ * +------------------------------------------------------+
+ * 0 | seqid | env | ctrl | stype | type |
+ * +------------------------------------------------------+
+ * 1 | ackid | unused |
+ * +------------------------------------------------------+
+ * 2 - 7 | data payload |
+ * +------------------------------------------------------+
+ */
+
+typedef struct ldc_msg {
+ union {
+ struct {
+ uint8_t _type; /* Message type */
+ uint8_t _stype; /* Message subtype */
+ uint8_t _ctrl; /* Control/Error Message */
+ uint8_t _env; /* Message Envelope */
+ uint32_t _seqid; /* Sequence ID */
+
+ union {
+ uint8_t _ud[LDC_PAYLOAD_SIZE_UNRELIABLE];
+ /* Unreliable data payload */
+ struct {
+ uint32_t _unused; /* unused */
+ uint32_t _ackid; /* ACK ID */
+ uint8_t _rd[LDC_PAYLOAD_SIZE_RELIABLE];
+ /* Reliable data payload */
+ } _rl;
+ } _data;
+ } _tpkt;
+
+ uint8_t _raw[LDC_PAYLOAD_SIZE_RAW];
+ } _pkt;
+
+} ldc_msg_t;
+
+#define raw _pkt._raw
+#define type _pkt._tpkt._type
+#define stype _pkt._tpkt._stype
+#define ctrl _pkt._tpkt._ctrl
+#define env _pkt._tpkt._env
+#define seqid _pkt._tpkt._seqid
+#define udata _pkt._tpkt._data._ud
+#define ackid _pkt._tpkt._data._rl._ackid
+#define rdata _pkt._tpkt._data._rl._rd
+
+/*
+ * LDC Map Table Entry (MTE)
+ *
+ * 6 6 1 1 1
+ * |3 0| psz| 3| 1| 0| 9| 8| 7|6|5|4| 0|
+ * +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
+ * | rsvd | PFN | 0 | 0 |CW|CR|IW|IR|X|W|R| pgszc |
+ * +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
+ * | hv invalidation cookie |
+ * +---------------------------------------------------------------------+
+ */
+typedef union {
+ struct {
+ uint64_t _rsvd2:8, /* <63:56> reserved */
+ rpfn:43, /* <55:13> real pfn */
+ _rsvd1:2, /* <12:11> reserved */
+ cw:1, /* <10> copy write access */
+ cr:1, /* <9> copy read perm */
+ iw:1, /* <8> iommu write perm */
+ ir:1, /* <7> iommu read perm */
+ x:1, /* <6> execute perm */
+ w:1, /* <5> write perm */
+ r:1, /* <4> read perm */
+ pgszc:4; /* <3:0> pgsz code */
+ } mte_bit;
+
+ uint64_t ll;
+
+} ldc_mte_t;
+
+#define mte_rpfn mte_bit.rpfn
+#define mte_cw mte_bit.cw
+#define mte_cr mte_bit.cr
+#define mte_iw mte_bit.iw
+#define mte_ir mte_bit.ir
+#define mte_x mte_bit.x
+#define mte_w mte_bit.w
+#define mte_r mte_bit.r
+#define mte_pgszc mte_bit.pgszc
+
+#define MTE_BSZS_SHIFT(sz) ((sz) * 3)
+#define MTEBYTES(sz) (MMU_PAGESIZE << MTE_BSZS_SHIFT(sz))
+#define MTEPAGES(sz) (1 << MTE_BSZS_SHIFT(sz))
+#define MTE_PAGE_SHIFT(sz) (MMU_PAGESHIFT + MTE_BSZS_SHIFT(sz))
+#define MTE_PAGE_OFFSET(sz) (MTEBYTES(sz) - 1)
+#define MTE_PAGEMASK(sz) (~MTE_PAGE_OFFSET(sz))
+#define MTE_PFNMASK(sz) (~(MTE_PAGE_OFFSET(sz) >> MMU_PAGESHIFT))
+
+/*
+ * LDC Map Table Slot
+ */
+typedef struct ldc_mte_slot {
+ ldc_mte_t entry;
+ uint64_t cookie;
+} ldc_mte_slot_t;
+
+/*
+ * LDC Memory Map Table
+ *
+ * Each LDC has a memory map table it uses to list all the pages
+ * it exporting to its peer over the channel. This structure
+ * contains information about the map table and is pointed to
+ * by the ldc_chan structure.
+ */
+typedef struct ldc_mtbl {
+ kmutex_t lock; /* Table lock */
+ size_t size; /* Table size (in bytes) */
+ uint64_t next_entry; /* Next entry to use */
+ uint64_t num_entries; /* Num entries in table */
+ uint64_t num_avail; /* Num of available entries */
+ ldc_mte_slot_t *table; /* The table itself */
+} ldc_mtbl_t;
+
+/*
+ * LDC page and memory segment information
+ */
+typedef struct ldc_page {
+ uintptr_t raddr; /* Exported page RA */
+ uint64_t offset; /* Exported page offset */
+ size_t size; /* Exported page size */
+ uint64_t index; /* Index in map table */
+ ldc_mte_slot_t *mte; /* Map table entry */
+} ldc_page_t;
+
+typedef struct ldc_memseg {
+ caddr_t vaddr; /* Exported segment VA */
+ uintptr_t raddr; /* Exported segment VA */
+ size_t size; /* Exported segment size */
+ uint64_t npages; /* Number of pages */
+ ldc_page_t *pages; /* Array of exported pages */
+ uint32_t ncookies; /* Number of cookies */
+ ldc_mem_cookie_t *cookies;
+ uint64_t next_cookie; /* Index to next cookie */
+} ldc_memseg_t;
+
+/*
+ * LDC Cookie address format
+ *
+ * 6 6 m+n
+ * |3| 0| | m| 0|
+ * +-+-------+----------+-------------------+-------------------+
+ * |X| pgszc | rsvd | table_idx | page_offset |
+ * +-+-------+----------+-------------------+-------------------+
+ */
+#define LDC_COOKIE_PGSZC_MASK 0x7
+#define LDC_COOKIE_PGSZC_SHIFT 60
+
+/*
+ * LDC Memory handle
+ */
+typedef struct ldc_chan ldc_chan_t;
+
+typedef struct ldc_mhdl {
+ kmutex_t lock; /* Mutex for memory handle */
+ ldc_mstatus_t status; /* Memory map status */
+
+ uint8_t mtype; /* Type of sharing */
+ uint8_t perm; /* Access permissions */
+ boolean_t myshadow; /* TRUE=alloc'd shadow mem */
+
+ ldc_chan_t *ldcp; /* Pointer to channel struct */
+ ldc_memseg_t *memseg; /* Bound memory segment */
+ struct ldc_mhdl *next; /* Next memory handle */
+} ldc_mhdl_t;
+
+/*
+ * LDC Descriptor rings
+ */
+
+typedef struct ldc_dring {
+ kmutex_t lock; /* Desc ring lock */
+ ldc_mstatus_t status; /* Desc ring status */
+
+ uint32_t dsize; /* Descriptor size */
+ uint32_t length; /* Descriptor ring length */
+ uint64_t size; /* Desc ring size (in bytes) */
+ caddr_t base; /* Descriptor ring base addr */
+
+ ldc_chan_t *ldcp; /* Pointer to bound channel */
+ ldc_mem_handle_t mhdl; /* Mem handle to desc ring */
+
+ struct ldc_dring *ch_next; /* Next dring in channel */
+ struct ldc_dring *next; /* Next dring overall */
+
+} ldc_dring_t;
+
+
+/*
+ * Channel specific information is kept in a separate
+ * structure. These are then stored on a array indexed
+ * by the channel number.
+ */
+struct ldc_chan {
+ ldc_chan_t *next; /* Next channel */
+
+ kmutex_t lock; /* Channel lock */
+ uint64_t id; /* Channel ID */
+ ldc_status_t status; /* Channel status */
+ uint32_t tstate; /* Channel transport state */
+ uint32_t hstate; /* Channel transport handshake state */
+
+ ldc_dev_t devclass; /* Associated device class */
+ uint64_t devinst; /* Associated device instance */
+ ldc_mode_t mode; /* Channel mode */
+
+ uint64_t mtu; /* Max TU size (streaming for now) */
+
+ ldc_ver_t version; /* Channel version */
+ uint32_t next_vidx; /* Next version to match */
+
+ uint_t (*cb)(uint64_t event, caddr_t arg);
+ caddr_t cb_arg; /* Channel callback and arg */
+ boolean_t cb_inprogress; /* Channel callback in progress */
+ boolean_t cb_enabled; /* Channel callbacks are enabled */
+
+ boolean_t intr_pending; /* TRUE if interrupts are pending */
+
+ uint64_t tx_q_entries; /* Num entries in transmit queue */
+ uint64_t tx_q_va; /* Virtual addr of transmit queue */
+ uint64_t tx_q_ra; /* Real addr of transmit queue */
+ uint64_t tx_head; /* Tx queue head */
+ uint64_t tx_ackd_head; /* Tx queue ACKd head (Reliable) */
+ uint64_t tx_tail; /* Tx queue tail */
+
+ uint64_t rx_q_entries; /* Num entries in receive queue */
+ uint64_t rx_q_va; /* Virtual addr of receive queue */
+ uint64_t rx_q_ra; /* Real addr of receive queue */
+
+ uint64_t link_state; /* Underlying HV channel state */
+
+ ldc_mtbl_t *mtbl; /* Memory table used by channel */
+ ldc_mhdl_t *mhdl_list; /* List of memory handles */
+ kmutex_t mlist_lock; /* Mem handle list lock */
+
+ ldc_dring_t *exp_dring_list; /* Exported desc ring list */
+ kmutex_t exp_dlist_lock; /* Lock for exported desc ring list */
+ ldc_dring_t *imp_dring_list; /* Imported desc ring list */
+ kmutex_t imp_dlist_lock; /* Lock for imported desc ring list */
+
+ uint8_t pkt_payload; /* Size of packet payload */
+
+ uint32_t first_fragment; /* Seqid of first msg fragment */
+ uint32_t last_msg_snt; /* Seqid of last packet sent */
+ uint32_t last_ack_rcd; /* Seqid of last ACK recd */
+ uint32_t last_msg_rcd; /* Seqid of last packet received */
+
+ uint32_t stream_remains; /* Number of bytes in stream */
+ /* packet buffer */
+ uint32_t stream_offset; /* Offset into packet buffer for */
+ /* next read */
+ uint8_t *stream_bufferp; /* Stream packet buffer */
+
+ int (*read_p)(ldc_chan_t *ldcp, caddr_t bufferp,
+ size_t *sizep);
+ int (*write_p)(ldc_chan_t *ldcp, caddr_t bufferp,
+ size_t *sizep);
+};
+
+
+/*
+ * LDC module soft state structure
+ */
+typedef struct ldc_soft_state {
+ kmutex_t lock; /* Protects ldc_soft_state_t */
+ ldc_cnex_t cinfo; /* channel nexus info */
+ uint64_t channel_count; /* Number of channels */
+ uint64_t channels_open; /* Number of open channels */
+ ldc_chan_t *chan_list; /* List of LDC endpoints */
+ ldc_dring_t *dring_list; /* Descriptor rings (for export) */
+} ldc_soft_state_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LDC_IMPL_H */
diff --git a/usr/src/uts/sun4v/sys/ldoms.h b/usr/src/uts/sun4v/sys/ldoms.h
new file mode 100644
index 0000000000..5e86dde864
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/ldoms.h
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LDOMS_H
+#define _LDOMS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/param.h> /* for MAXHOSTNAMELEN */
+
+/*
+ * Global LDoms definitions.
+ */
+
+/* Maximum number of logical domains supported */
+#define LDOMS_MAX_DOMAINS 32
+
+/* maximum number of characters in the logical domain name */
+#define LDOMS_MAX_NAME_LEN MAXHOSTNAMELEN
+
+/*
+ * Global flag that indicates whether domaining features are
+ * available. The value is set at boot time based on the value
+ * of the 'domaining-enabled' property in the MD. Updates to
+ * this variable after boot are not supported.
+ */
+extern uint_t domaining_enabled;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LDOMS_H */
diff --git a/usr/src/uts/sun4v/sys/lpad.h b/usr/src/uts/sun4v/sys/lpad.h
new file mode 100644
index 0000000000..e538702220
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/lpad.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LPAD_H
+#define _LPAD_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * sun4v Landing Pad
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ASM
+
+#include <sys/pte.h>
+
+typedef union {
+ struct {
+ unsigned int rsvd0:32;
+ unsigned int rsvd1:29;
+ unsigned int perm:1;
+ unsigned int mmuflags:2;
+ } flag_bits;
+ uint64_t ll;
+} lpad_map_flag_t;
+
+typedef struct lpad_map {
+ lpad_map_flag_t flags;
+ uint64_t va;
+ tte_t tte;
+} lpad_map_t;
+
+#define flag_mmuflags flags.flag_bits.mmuflags
+#define flag_perm flags.flag_bits.perm
+
+typedef struct lpad_data {
+ uint64_t magic; /* magic value for sanity checking */
+ uint64_t *inuse; /* clear flag when done with lpad */
+ uint64_t mmfsa_ra; /* RA of MMU fault status area */
+ uint64_t pc; /* VA of CPU startup function */
+ uint64_t arg; /* argument to startup function */
+ uint64_t nmap; /* number of mappings */
+ lpad_map_t map[1]; /* array of mappings */
+} lpad_data_t;
+
+extern uint64_t *lpad_setup(int cpuid, uint64_t pc, uint64_t arg);
+
+#endif /* ! _ASM */
+
+/*
+ * General landing pad constants
+ */
+#define LPAD_TEXT_SIZE 1024
+#define LPAD_DATA_SIZE 1024
+#define LPAD_SIZE (LPAD_TEXT_SIZE + LPAD_DATA_SIZE)
+#define LPAD_MAGIC_VAL 0x4C502D4D41474943 /* "LP-MAGIC" */
+
+/*
+ * Masks for the lpad_map_t flag bitfield
+ */
+#define FLAG_MMUFLAGS_MASK 0x3
+#define FLAG_LOCK_MASK 0x4
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LPAD_H */
diff --git a/usr/src/uts/sun4v/sys/mach_descrip.h b/usr/src/uts/sun4v/sys/mach_descrip.h
index 2bf0c686fa..a003a9b23b 100644
--- a/usr/src/uts/sun4v/sys/mach_descrip.h
+++ b/usr/src/uts/sun4v/sys/mach_descrip.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -33,22 +33,58 @@
extern "C" {
#endif
+#include <sys/kstat.h>
+#include <sys/ksynch.h>
+#include <sys/mdesc.h>
+
/*
- * Common structure between kernel and mdesc driver
- * enabling the current machine description to be retrieved
- * or updated.
+ * MD memory operations (memops) are of two types:
+ * buf:
+ * Buffer allocator routines used to allocate the MD buffer.
+ * Allocator must support an alignment argument.
+ *
+ * meta:
+ * Meta allocator routines to allocate meta data strcutures.
+ * These allocations are small and don't have alignment
+ * requirements. Examples, md_t handles and the machine_descrip_t
+ * structure.
*/
-struct machine_descrip_s {
- void *va;
- uint64_t pa;
- uint64_t size;
- uint64_t space;
- kstat_t *ksp;
-};
+typedef struct machine_descrip_memops {
+ void *(*buf_allocp)(size_t size, size_t align);
+ void (*buf_freep)(void *, size_t size);
+ void *(*meta_allocp)(size_t size);
+ void (*meta_freep)(void *, size_t size);
+} machine_descrip_memops_t;
-typedef struct machine_descrip_s machine_descrip_t;
+/*
+ * Common structure/list between kernel and mdesc driver enabling
+ * the current machine description to be retrieved or updated.
+ *
+ * Locks:
+ * The current global MD is protected by the curr_mach_descrip_lock.
+ * Each Machine description has a lock to synchronize its ref count.
+ * The Obsolete MD list is protected by the obs_list_lock.
+ */
+typedef struct machine_descrip_s {
+ uint64_t gen; /* Generation number for MD */
+ kmutex_t lock; /* synchronize access to MD */
+ void *va; /* virtual address */
+ uint64_t size; /* size of MD */
+ uint64_t space; /* space allocated for MD */
+ int refcnt; /* MD ref count */
+ struct machine_descrip_s *next; /* Next MD in list */
+ machine_descrip_memops_t *memops; /* Memory operations for MD */
+} machine_descrip_t;
-extern machine_descrip_t machine_descrip;
+/*
+ * Utility wrappers to get/fini a handle to the current MD.
+ */
+extern md_t *md_get_handle(void);
+extern int md_fini_handle(md_t *);
+extern caddr_t md_get_md_raw(md_t *);
+extern int md_alloc_scan_dag(md_t *, mde_cookie_t, char *, char *,
+ mde_cookie_t **);
+extern void md_free_scan_dag(md_t *, mde_cookie_t **);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/sun4v/sys/machcpuvar.h b/usr/src/uts/sun4v/sys/machcpuvar.h
index 24050ea18f..8d9ac241aa 100644
--- a/usr/src/uts/sun4v/sys/machcpuvar.h
+++ b/usr/src/uts/sun4v/sys/machcpuvar.h
@@ -207,6 +207,7 @@ struct cpu_node {
int ecache_associativity;
int ecache_setsize;
uint64_t device_id;
+ id_t exec_unit_mapping;
};
extern struct cpu_node cpunodes[];
diff --git a/usr/src/uts/sun4v/sys/machparam.h b/usr/src/uts/sun4v/sys/machparam.h
index 6deb0ea3f6..130e8e662f 100644
--- a/usr/src/uts/sun4v/sys/machparam.h
+++ b/usr/src/uts/sun4v/sys/machparam.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -281,22 +280,26 @@ extern "C" {
* names defined in sun4u/os/mach_cpu_states.c which should be kept up to
* date if new #defines are added.
*/
-#define PTL1_BAD_DEBUG 0
-#define PTL1_BAD_WTRAP 1
-#define PTL1_BAD_KMISS 2
-#define PTL1_BAD_KPROT_FAULT 3
-#define PTL1_BAD_ISM 4
-#define PTL1_BAD_MMUTRAP 5
-#define PTL1_BAD_TRAP 6
-#define PTL1_BAD_FPTRAP 7
-#define PTL1_BAD_INTR_REQ 8
-#define PTL1_BAD_TRACE_PTR 9
-#define PTL1_BAD_STACK 10
-#define PTL1_BAD_DTRACE_FLAGS 11
-#define PTL1_BAD_CTX_STEAL 12
-#define PTL1_BAD_ECC 13
-#define PTL1_BAD_HCALL 14
-#define PTL1_BAD_GL 15
+#define PTL1_BAD_DEBUG 0
+#define PTL1_BAD_WTRAP 1
+#define PTL1_BAD_KMISS 2
+#define PTL1_BAD_KPROT_FAULT 3
+#define PTL1_BAD_ISM 4
+#define PTL1_BAD_MMUTRAP 5
+#define PTL1_BAD_TRAP 6
+#define PTL1_BAD_FPTRAP 7
+#define PTL1_BAD_INTR_REQ 8
+#define PTL1_BAD_TRACE_PTR 9
+#define PTL1_BAD_STACK 10
+#define PTL1_BAD_DTRACE_FLAGS 11
+#define PTL1_BAD_CTX_STEAL 12
+#define PTL1_BAD_ECC 13
+#define PTL1_BAD_HCALL 14
+#define PTL1_BAD_GL 15
+#define PTL1_BAD_WATCHDOG 16
+#define PTL1_BAD_RED 17
+#define PTL1_BAD_HCALL_UNMAP_PERM_EINVAL 18
+#define PTL1_BAD_HCALL_UNMAP_PERM_ENOMAP 19
/*
* Defines the max trap level allowed
diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h
index 64d6e5dd8f..c1e973cd51 100644
--- a/usr/src/uts/sun4v/sys/machsystm.h
+++ b/usr/src/uts/sun4v/sys/machsystm.h
@@ -337,6 +337,7 @@ extern void idle_stop_xcall(void);
extern void set_idle_cpu(int);
extern void unset_idle_cpu(int);
extern void mp_cpu_quiesce(struct cpu *);
+extern int stopcpu_bycpuid(int);
/*
* Panic at TL > 0
@@ -396,6 +397,12 @@ extern uchar_t kpm_size_shift;
extern caddr_t kpm_vbase;
#define INVALID_VADDR(a) (((a) >= hole_start && (a) < hole_end))
+#define VA_ADDRESS_SPACE_BITS 64
+#define RA_ADDRESS_SPACE_BITS 56
+#define MAX_REAL_ADDRESS (1ull << RA_ADDRESS_SPACE_BITS)
+#define DEFAULT_VA_ADDRESS_SPACE_BITS 48 /* def. Niagara (broken MD) */
+#define PAGESIZE_MASK_BITS 16
+#define MAX_PAGESIZE_MASK ((1<<PAGESIZE_MASK_BITS) - 1)
extern void adjust_hw_copy_limits(int);
@@ -466,6 +473,25 @@ void sticksync_master(void);
#define HV_TOD_RETRY_THRESH 100
#define HV_TOD_WAIT_USEC 5
+/*
+ * Interrupt Queues and Error Queues
+ */
+
+#define INTR_CPU_Q 0x3c
+#define INTR_DEV_Q 0x3d
+#define CPU_RQ 0x3e
+#define CPU_NRQ 0x3f
+#define DEFAULT_CPU_Q_ENTRIES 0x100
+#define DEFAULT_DEV_Q_ENTRIES 0x100
+#define INTR_REPORT_SIZE 64
+
+#ifndef _ASM
+extern uint64_t cpu_q_entries;
+extern uint64_t dev_q_entries;
+extern uint64_t cpu_rq_entries;
+extern uint64_t cpu_nrq_entries;
+#endif /* _ASM */
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/sun4v/sys/mdeg.h b/usr/src/uts/sun4v/sys/mdeg.h
new file mode 100644
index 0000000000..c8149afaa6
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/mdeg.h
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _MDEG_H
+#define _MDEG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * MD Event Generator (mdeg) interface.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/mdesc.h>
+
+/*
+ * Specification of a node property.
+ */
+typedef struct {
+ uint8_t type;
+ char *namep;
+ union {
+ char *strp;
+ uint64_t val;
+ } _p;
+
+} mdeg_prop_spec_t;
+
+#define ps_str _p.strp
+#define ps_val _p.val
+
+/*
+ * Specification of unique node in the MD. The array
+ * of property name value pairs is used to determine
+ * whether the node matches the specification.
+ */
+typedef struct {
+ char *namep;
+ mdeg_prop_spec_t *specp;
+} mdeg_node_spec_t;
+
+/*
+ * Specification of a method to match nodes. The
+ * array of properties are used to match two nodes
+ * from different MDs. If the specified properties
+ * match, the nodes are the same.
+ */
+typedef struct {
+ char *namep;
+ md_prop_match_t *matchp;
+} mdeg_node_match_t;
+
+/*
+ * The result of the MD update as communicated
+ * through the parameter to the registered callback.
+ */
+typedef struct {
+ md_t *mdp;
+ mde_cookie_t *mdep;
+ uint_t nelem;
+} mdeg_diff_t;
+
+/*
+ * Results of the MD update for a specific registration
+ */
+typedef struct {
+ mdeg_diff_t added;
+ mdeg_diff_t removed;
+ mdeg_diff_t match_curr;
+ mdeg_diff_t match_prev;
+} mdeg_result_t;
+
+/*
+ * Client Interface
+ */
+
+#define MDEG_SUCCESS 0
+#define MDEG_FAILURE 1
+
+typedef uint64_t mdeg_handle_t;
+
+typedef int (*mdeg_cb_t)(void *cb_argp, mdeg_result_t *resp);
+
+int mdeg_register(mdeg_node_spec_t *pspecp, mdeg_node_match_t *nmatchp,
+ mdeg_cb_t cb, void *cb_argp, mdeg_handle_t *hdlp);
+
+int mdeg_unregister(mdeg_handle_t hdl);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MDEG_H */
diff --git a/usr/src/uts/sun4v/sys/mmu.h b/usr/src/uts/sun4v/sys/mmu.h
index b38d007d83..61d0812ace 100644
--- a/usr/src/uts/sun4v/sys/mmu.h
+++ b/usr/src/uts/sun4v/sys/mmu.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -117,6 +117,10 @@ extern "C" {
*/
#define FLUSH_ADDR (KERNELBASE + 2 * MMU_PAGESIZE4M)
+#define MAX_NCTXS_BITS 16 /* sun4v max. contexts bits */
+#define MIN_NCTXS_BITS 2
+#define MAX_NCTXS (1ull << MAX_NCTXS_BITS)
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/sun4v/sys/ncp.h b/usr/src/uts/sun4v/sys/ncp.h
index 7203f84fdd..491a7bf622 100644
--- a/usr/src/uts/sun4v/sys/ncp.h
+++ b/usr/src/uts/sun4v/sys/ncp.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -30,7 +29,12 @@
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
#include <sys/kmem.h>
+#include <sys/mdesc.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
#include <sys/ncs.h>
#ifdef __cplusplus
@@ -45,11 +49,6 @@ extern "C" {
#define FALSE 0
#define TRUE 1
-/*
- * XXX
- * NCP_MAX_NMAUS should come from OBP/HV
- * NCP_MAX_CPUS_PER_MAU should come from OBP/HV
- */
#define NCP_MAX_NMAUS 8
#define NCP_MAX_CPUS_PER_MAU 4
#define NCP_CPUID2MAUID(c) ((c) / NCP_MAX_CPUS_PER_MAU)
@@ -96,8 +95,6 @@ typedef struct ncp_minor ncp_minor_t;
typedef struct ncp_listnode ncp_listnode_t;
typedef struct ncp_request ncp_request_t;
typedef struct ncp_stat ncp_stat_t;
-typedef struct ncp_mau_queue ncp_mau_queue_t;
-typedef struct ncp_desc ncp_desc_t;
@@ -246,46 +243,16 @@ struct ncp_stat {
kstat_named_t ns_status;
kstat_named_t ns_algs[DS_MAX];
struct {
+ kstat_named_t ns_mauid;
+ kstat_named_t ns_mauhandle;
+ kstat_named_t ns_maustate;
kstat_named_t ns_submit;
kstat_named_t ns_qfull;
+ kstat_named_t ns_qbusy;
kstat_named_t ns_qupdate_failure;
} ns_mau[NCP_MAX_NMAUS];
};
-
-struct ncp {
- kmutex_t n_lock;
- kmem_cache_t *n_ds_cache;
- kmem_cache_t *n_mactl_cache;
- kmem_cache_t *n_mabuf_cache;
- dev_info_t *n_dip;
- minor_t n_minor;
- int n_nmaus;
- int n_max_nmaus;
- int *n_mauids;
- ncp_mau_queue_t *n_mau_q;
- int n_mau_q_size;
-
- ddi_taskq_t *n_taskq;
-
- unsigned n_flags; /* dev state flags */
-
- kstat_t *n_ksp;
- kstat_t *n_intrstats;
- u_longlong_t n_stats[DS_MAX];
- u_longlong_t n_qfull[NCP_MAX_NMAUS];
- u_longlong_t n_qupdate_failure[NCP_MAX_NMAUS];
-
- ulong_t n_pagesize;
- crypto_kcf_provider_handle_t n_prov;
-
- kmutex_t n_freereqslock;
- ncp_listnode_t n_freereqs; /* available requests */
-
- kmutex_t n_ctx_list_lock;
- ncp_listnode_t n_ctx_list;
-};
-
/*
* Device flags (ncp_t.ncp_flags)
*/
@@ -294,10 +261,25 @@ struct ncp {
/*
* IMPORTANT:
- * (NCP_MAQUEUE_NENTRIES * sizeof (ncs_hvdesc_t)) <= PAGESIZE
+ * NCP_MAQUEUE_NENTRIES *must* be a power-of-2.
+ * requirement: sizeof (ncs_hvdesc_t) == 64
*/
-#define NCP_MAQUEUE_NENTRIES 64
+#define NCP_MAQUEUE_NENTRIES (1 << 9) /* 512 */
#define NCP_MAQUEUE_WRAPMASK (NCP_MAQUEUE_NENTRIES - 1)
+#define NCP_MAQUEUE_SIZE (NCP_MAQUEUE_NENTRIES * sizeof (ncs_hvdesc_t))
+#define NCP_MAQUEUE_ALIGN (NCP_MAQUEUE_SIZE - 1)
+#define NCP_MAQUEUE_SLOTS_AVAIL(q) \
+ (((q)->nmq_head > (q)->nmq_tail) ? \
+ ((q)->nmq_head > (q)->nmq_tail - 1) : \
+ (NCP_MAQUEUE_NENTRIES - \
+ ((q)->nmq_tail - (q)->nmq_head) - 1))
+
+#define NCP_QINDEX_TO_QOFFSET(i) ((i) * sizeof (ncs_hvdesc_t))
+#define NCP_QOFFSET_TO_QINDEX(o) ((o) / sizeof (ncs_hvdesc_t))
+#define NCP_QINDEX_INCR(i) (((i) + 1) & NCP_MAQUEUE_WRAPMASK)
+#define NCP_QINDEX_IS_VALID(i) (((i) >= 0) && \
+ ((i) < NCP_MAQUEUE_NENTRIES))
+#define NCP_QTIMEOUT_SECONDS 15
typedef struct ncp_ma {
kmutex_t nma_lock;
@@ -305,24 +287,141 @@ typedef struct ncp_ma {
int nma_ref; /* # of descriptor references */
} ncp_ma_t;
+typedef struct ncp_desc ncp_desc_t;
struct ncp_desc {
ncs_hvdesc_t nd_hv;
ncp_desc_t *nd_link; /* to string related descriptors */
ncp_ma_t *nd_ma; /* referenced MA buffer */
};
+typedef struct ncp_descjob {
+ int dj_id;
+ kcondvar_t dj_cv;
+ ncp_desc_t *dj_jobp;
+ struct ncp_descjob *dj_prev;
+ struct ncp_descjob *dj_next;
+} ncp_descjob_t;
+
/*
* nmq_head, nmq_tail = indexes into nmq_desc[].
*/
-struct ncp_mau_queue {
- int nmq_id;
+typedef struct {
+ uint64_t nmq_mauhandle;
+ uint64_t nmq_devino;
+ int nmq_inum;
+ int nmq_mauid;
+ int nmq_init;
+ int nmq_busy_wait;
+ kcondvar_t nmq_busy_cv;
kmutex_t nmq_lock;
int nmq_head;
int nmq_tail;
uint_t nmq_wrapmask;
+ ncp_descjob_t **nmq_jobs;
+ size_t nmq_jobs_size;
ncs_hvdesc_t *nmq_desc; /* descriptor array */
- int nmq_desc_size;
- uint64_t nmq_njobs;
+ char *nmq_mem;
+ size_t nmq_memsize;
+ ncp_descjob_t *nmq_joblist;
+ int nmq_joblistcnt;
+ struct {
+ uint64_t qks_njobs;
+ uint64_t qks_qfull;
+ uint64_t qks_qbusy;
+ uint64_t qks_qfail;
+ } nmq_ks;
+} ncp_mau_queue_t;
+
+#define MAU_STATE_ERROR (-1)
+#define MAU_STATE_OFFLINE 0
+#define MAU_STATE_ONLINE 1
+
+typedef struct {
+ int mm_mauid;
+ int mm_cpulistsz;
+ int *mm_cpulist;
+ int mm_ncpus;
+ int mm_nextcpuidx;
+ /*
+ * Only protects mm_nextcpuidx field.
+ */
+ kmutex_t mm_lock;
+ /*
+ * xxx - maybe need RW lock for mm_state?
+ */
+ int mm_state; /* MAU_STATE_... */
+
+ ncp_mau_queue_t mm_queue;
+} mau_entry_t;
+
+typedef struct {
+ int mc_cpuid;
+ int mc_mauid;
+ /*
+ * xxx - maybe need RW lock for mm_state?
+ * Mirrors mm_state in mau_entry_t. Duplicated
+ * for speed so we don't have search mau_entry
+ * table. Field rarely updated.
+ */
+ int mc_state; /* MAU_STATE_... */
+} cpu_entry_t;
+
+typedef struct {
+ /*
+ * MAU stuff
+ */
+ int m_maulistsz;
+ mau_entry_t *m_maulist;
+ int m_nmaus;
+ int m_nextmauidx;
+ /*
+ * Only protects m_nextmauidx field.
+ */
+ kmutex_t m_lock;
+
+ /*
+ * CPU stuff
+ */
+ int m_cpulistsz;
+ cpu_entry_t *m_cpulist;
+ int m_ncpus;
+} ncp_mau2cpu_map_t;
+
+struct ncp {
+ uint_t n_hvapi_minor_version;
+ kmutex_t n_lock;
+ kmem_cache_t *n_ds_cache;
+ kmem_cache_t *n_mactl_cache;
+ kmem_cache_t *n_mabuf_cache;
+ dev_info_t *n_dip;
+ minor_t n_minor;
+
+ ddi_taskq_t *n_taskq;
+
+ unsigned n_flags; /* dev state flags */
+
+ kstat_t *n_ksp;
+ kstat_t *n_intrstats;
+ u_longlong_t n_stats[DS_MAX];
+
+ ddi_intr_handle_t *n_htable;
+ int n_intr_mid[NCP_MAX_NMAUS];
+ int n_intr_type;
+ int n_intr_cnt;
+ size_t n_intr_size;
+ uint_t n_intr_pri;
+
+ ulong_t n_pagesize;
+ crypto_kcf_provider_handle_t n_prov;
+
+ kmutex_t n_freereqslock;
+ ncp_listnode_t n_freereqs; /* available requests */
+
+ kmutex_t n_ctx_list_lock;
+ ncp_listnode_t n_ctx_list;
+
+ md_t *n_mdp;
+ ncp_mau2cpu_map_t n_maumap;
};
#endif /* _KERNEL */
@@ -343,14 +442,18 @@ struct ncp_mau_queue {
#define DMA_LDST 0x00000004
#define DNCS_QTAIL 0x00000008
#define DATTACH 0x00000010
-#define DMOD 0x00000040 /* _init/_fini/_info/attach/detach */
-#define DENTRY 0x00000080 /* crypto routine entry/exit points */
+#define DMD 0x00000020
+#define DHV 0x00000040
+#define DINTR 0x00000080
+#define DMOD 0x00000100 /* _init/_fini/_info/attach/detach */
+#define DENTRY 0x00000200 /* crypto routine entry/exit points */
#define DALL 0xFFFFFFFF
#define DBG0 ncp_dprintf
#define DBG1 ncp_dprintf
#define DBG2 ncp_dprintf
#define DBG3 ncp_dprintf
+#define DBG4 ncp_dprintf
#define DBGCALL(flag, func) { if (ncp_dflagset(flag)) (void) func; }
void ncp_dprintf(ncp_t *, int, const char *, ...);
@@ -363,6 +466,7 @@ int ncp_dflagset(int);
#define DBG1(vca, lvl, fmt, arg1)
#define DBG2(vca, lvl, fmt, arg1, arg2)
#define DBG3(vca, lvl, fmt, arg1, arg2, arg3)
+#define DBG4(vca, lvl, fmt, arg1, arg2, arg3, arg4)
#define DBGCALL(flag, func)
#endif /* !defined(DEBUG) */
@@ -404,6 +508,16 @@ int ncp_dsaatomic(crypto_provider_handle_t, crypto_session_id_t,
crypto_data_t *, int, crypto_req_handle_t, int);
/*
+ * ncp_md.
+ */
+int ncp_init_mau2cpu_map(ncp_t *);
+void ncp_deinit_mau2cpu_map(ncp_t *);
+int ncp_map_cpu_to_mau(ncp_t *, int);
+int ncp_map_mau_to_cpu(ncp_t *, int);
+int ncp_map_nextmau(ncp_t *);
+mau_entry_t *ncp_map_findmau(ncp_t *, int);
+
+/*
* ncp_kstat.c
*/
void ncp_ksinit(ncp_t *);
diff --git a/usr/src/uts/sun4v/sys/ncs.h b/usr/src/uts/sun4v/sys/ncs.h
index bb28f7dc4c..11310e0817 100644
--- a/usr/src/uts/sun4v/sys/ncs.h
+++ b/usr/src/uts/sun4v/sys/ncs.h
@@ -33,12 +33,24 @@ extern "C" {
#endif
/*
- * NCS HV API versioni definitions.
+ * NCS HV API version definitions.
*/
#define NCS_MAJOR_VER 1
-#define NCS_MINOR_VER 0
+#define NCS_MINOR_VER 1
+/*
+ * NCS HV API v1.0
+ */
#define HV_NCS_REQUEST 0x110
+/*
+ * NCS HV API v1.1
+ */
+#define HV_NCS_QCONF 0x111
+#define HV_NCS_QINFO 0x112
+#define HV_NCS_GETHEAD 0x113
+#define HV_NCS_GETTAIL 0x114
+#define HV_NCS_SETTAIL 0x115
+#define HV_NCS_QHANDLE_TO_DEVINO 0x116
#ifndef _ASM
/* Forward typedefs */
@@ -62,7 +74,7 @@ union ma_ctl {
uint64_t length:6;
} bits;
};
-#endif /* !_ASM */
+#endif /* _ASM */
/* Values for ma_ctl operation field */
#define MA_OP_LOAD 0x0
@@ -114,7 +126,7 @@ union ma_ma {
#endif /* !_ASM */
/*
- * NCS API definitions
+ * NCS HV API v1.0 definitions (PSARC/2005/125)
*/
/*
@@ -164,8 +176,8 @@ typedef struct ma_regs {
} ma_regs_t;
#define ND_TYPE_UNASSIGNED 0
-#define ND_TYPE_MA 1
-#define ND_TYPE_SPU 2
+#define ND_TYPE_MA 1 /* v1.0 only */
+#define ND_TYPE_SPU 2 /* v1.0 only */
#define ND_STATE_FREE 0
#define ND_STATE_PENDING 1
@@ -190,7 +202,50 @@ typedef struct ncs_hvdesc {
extern uint64_t hv_ncs_request(int, uint64_t, size_t);
-#endif /* !_ASM */
+#endif /* _ASM */
+
+/*
+ * NCS HV API v1.1 definitions (FWARC/2006/174)
+ *
+ * Some of the structures above (v1.0) are inherited for v1.1
+ */
+/*
+ * In v1.1, the nhd_type field has the following values
+ * when non-zero (unassigned). The nhd_type field indicates
+ * whether the descriptor is the beginning of a crypto job,
+ * the continuation, or the end/last descriptor in a job.
+ * A job may be comprised of multiple descriptors.
+ */
+#define ND_TYPE_START 0x01
+#define ND_TYPE_CONT 0x02
+#define ND_TYPE_END 0x80
+
+/*
+ * Types of queues supported by NCS
+ */
+#define NCS_QTYPE_MAU 0x1
+#define NCS_QTYPE_CWQ 0x2
+
+/*
+ * This structure is accessed with offsets in ml/hcall.s.
+ * Any changes to this structure will require updates to
+ * the hv_ncs_qinfo entrypoint in ml/hcall.s.
+ */
+#ifndef _ASM
+typedef struct ncs_qinfo {
+ uint64_t qi_qtype;
+ uint64_t qi_baseaddr;
+ uint64_t qi_qsize;
+} ncs_qinfo_t;
+
+extern uint64_t hv_ncs_qconf(uint64_t, uint64_t, uint64_t, uint64_t *);
+extern uint64_t hv_ncs_qinfo(uint64_t, ncs_qinfo_t *);
+extern uint64_t hv_ncs_gethead(uint64_t, uint64_t *);
+extern uint64_t hv_ncs_gettail(uint64_t, uint64_t *);
+extern uint64_t hv_ncs_settail(uint64_t, uint64_t);
+extern uint64_t hv_ncs_qhandle_to_devino(uint64_t, uint64_t *);
+extern uint64_t hv_ncs_intr_clrstate(uint64_t);
+#endif /* _ASM */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/sun4v/sys/platsvc.h b/usr/src/uts/sun4v/sys/platsvc.h
new file mode 100644
index 0000000000..9b76f1548c
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/platsvc.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PLATSVC_H
+#define _PLATSVC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ds.h>
+
+#define MAX_REASON_SIZE 1
+
+/*
+ * PLATSVC STATUS
+ */
+#define PLATSVC_SUCCESS 0x0
+#define PLATSVC_FAILURE 0x1
+#define PLATSVC_INVALID_MESG 0x2
+
+#define MD_UPDATE_SUCCESS PLATSVC_SUCCESS
+#define MD_UPDATE_FAILURE PLATSVC_FAILURE
+#define MD_UPDATE_INVALID_MSG PLATSVC_INVALID_MESG
+
+#define DOMAIN_SHUTDOWN_SUCCESS PLATSVC_SUCCESS
+#define DOMAIN_SHUTDOWN_FAILURE PLATSVC_FAILURE
+#define DOMAIN_SHUTDOWN_INVALID_MSG PLATSVC_INVALID_MESG
+
+#define DOMAIN_PANIC_SUCCESS PLATSVC_SUCCESS
+#define DOMAIN_PANIC_FAILURE PLATSVC_FAILURE
+#define DOMAIN_PANIC_INVALID_MSG PLATSVC_INVALID_MESG
+
+typedef struct platsvc_md_update_req {
+ uint64_t req_num;
+} platsvc_md_update_req_t;
+
+typedef struct platsvc_md_update_resp {
+ uint64_t req_num;
+ uint32_t result;
+} platsvc_md_update_resp_t;
+
+
+typedef struct platsvc_shutdown_req {
+ uint64_t req_num;
+ uint32_t delay;
+} platsvc_shutdown_req_t;
+
+typedef struct platsvc_shutdown_resp {
+ uint64_t req_num;
+ uint32_t result;
+ char reason[MAX_REASON_SIZE];
+} platsvc_shutdown_resp_t;
+
+typedef struct platsvc_panic_req {
+ uint64_t req_num;
+} platsvc_panic_req_t;
+
+typedef struct platsvc_panic_resp {
+ uint64_t req_num;
+ uint32_t result;
+ char reason[MAX_REASON_SIZE];
+} platsvc_panic_resp_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PLATSVC_H */
diff --git a/usr/src/uts/sun4v/sys/promif_impl.h b/usr/src/uts/sun4v/sys/promif_impl.h
new file mode 100644
index 0000000000..2f5602a5b2
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/promif_impl.h
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_PROMIF_IMPL_H
+#define _SYS_PROMIF_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#ifdef _KERNEL
+#include <sys/promimpl.h>
+#endif
+#include <sys/obpdefs.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+
+/*
+ * CIF handler functions
+ */
+typedef int (*cif_func_t) (void *);
+extern int promif_getprop(void *p);
+extern int promif_getproplen(void *p);
+extern int promif_nextprop(void *p);
+extern int promif_nextnode(void *p);
+extern int promif_childnode(void *p);
+extern int promif_parentnode(void *p);
+extern int promif_enter_mon(void *p);
+extern int promif_exit_to_mon(void *p);
+extern int promif_reboot(void *p);
+extern int promif_write(void *p);
+extern int promif_read(void *p);
+extern int promif_interpret(void *p);
+extern int promif_finddevice(void *p);
+extern int promif_instance_to_package(void *p);
+#ifndef _KMDB
+extern int promif_setprop(void *p);
+extern int promif_test(void *p);
+extern int promif_instance_to_path(void *p);
+extern int promif_power_off(void *p);
+extern int promif_asr_list_keys_len(void *p);
+extern int promif_asr_list_keys(void *p);
+extern int promif_asr_export_len(void *p);
+extern int promif_asr_export(void *p);
+extern int promif_set_security_key(void *p);
+extern int promif_get_security_key(void *p);
+extern int promif_start_cpu(void *p);
+extern int promif_set_mmfsa_traptable(void *p);
+extern int promif_set_sun4v_api_version(void *p);
+extern int promif_get_sun4v_api_version(void *p);
+#endif
+
+/*
+ * Shadow device tree access functions
+ */
+extern pnode_t promif_stree_nextnode(pnode_t nodeid);
+extern pnode_t promif_stree_childnode(pnode_t nodeid);
+extern pnode_t promif_stree_parentnode(pnode_t nodeid);
+extern int promif_stree_getproplen(pnode_t, char *name);
+extern int promif_stree_getprop(pnode_t, char *name, void *value);
+extern int promif_stree_setprop(pnode_t, char *name, void *value, int len);
+extern char *promif_stree_nextprop(pnode_t nodeid, char *name, char *next);
+
+/*
+ * Hooks for kmdb to get and set a pointer to the PROM shadow tree
+ */
+#ifdef _KMDB
+extern void promif_stree_setroot(void *root);
+extern caddr_t promif_stree_getroot(void);
+#endif
+
+/*
+ * Miscellaneous functions
+ */
+extern cif_func_t promif_find_cif_callback(char *opname);
+extern int promif_ldom_setprop(char *name, void *value, int valuelen);
+
+/*
+ * Initialization functions
+ */
+#ifdef _KMDB
+extern void cif_init(char *, caddr_t, ihandle_t, ihandle_t,
+ phandle_t, phandle_t, pnode_t, pnode_t);
+extern void promif_io_init(ihandle_t, ihandle_t, phandle_t, phandle_t);
+extern void promif_set_nodes(pnode_t, pnode_t);
+#else
+extern void promif_io_init(void);
+extern void promif_stree_init(void);
+extern void promif_prop_init(void);
+#endif
+
+/*
+ * Debugging support
+ */
+#ifdef DEBUG
+
+extern uint_t cif_debug;
+
+#define CIF_DBG_FLAG_NODE 0x01
+#define CIF_DBG_FLAG_REBOOT 0x02
+
+#define CIF_DBG_ALL if (cif_debug) prom_printf
+#define CIF_DBG_NODE if (cif_debug & CIF_DBG_FLAG_NODE) prom_printf
+#define CIF_DBG_REBOOT if (cif_debug & CIF_DBG_FLAG_REBOOT) prom_printf
+
+#else /* DEBUG */
+
+#define CIF_DBG_ALL _NOTE(CONSTCOND) if (0) prom_printf
+#define CIF_DBG_NODE CIF_DBG_ALL
+#define CIF_DBG_REBOOT CIF_DBG_ALL
+
+#endif /* DEBUG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PROMIF_IMPL_H */
diff --git a/usr/src/uts/sun4v/sys/varconfig.h b/usr/src/uts/sun4v/sys/varconfig.h
new file mode 100644
index 0000000000..5d01809355
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/varconfig.h
@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_VARCONFIG_H
+#define _SYS_VARCONFIG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef enum {
+ VAR_CONFIG_SET_REQ,
+ VAR_CONFIG_DELETE_REQ,
+ VAR_CONFIG_SET_RESP,
+ VAR_CONFIG_DELETE_RESP
+} var_config_cmd_t;
+
+typedef struct {
+ var_config_cmd_t cmd;
+} var_config_hdr_t;
+
+
+typedef struct {
+ char name_and_value[1];
+} var_config_set_req_t;
+
+typedef struct {
+ char name[1];
+} var_config_delete_req_t;
+
+
+typedef enum {
+ VAR_CONFIG_SUCCESS = 0,
+ VAR_CONFIG_NO_SPACE,
+ VAR_CONFIG_INVALID_VAR,
+ VAR_CONFIG_INVALID_VAL,
+ VAR_CONFIG_VAR_NOT_PRESENT
+} var_config_status_t;
+
+typedef struct {
+ var_config_status_t result;
+} var_config_resp_t;
+
+
+typedef struct {
+ var_config_hdr_t vc_hdr;
+ union {
+ var_config_set_req_t vc_set;
+ var_config_delete_req_t vc_delete;
+ var_config_resp_t vc_resp;
+ } un;
+} var_config_msg_t;
+
+#define var_config_cmd vc_hdr.cmd
+#define var_config_set un.vc_set
+#define var_config_delete un.vc_delete
+#define var_config_resp un.vc_resp
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VARCONFIG_H */
diff --git a/usr/src/uts/sun4v/sys/vcc.h b/usr/src/uts/sun4v/sys/vcc.h
new file mode 100644
index 0000000000..378fdce8e2
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vcc.h
@@ -0,0 +1,110 @@
+
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VCC_H
+#define _VCC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/stream.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ioctl.h>
+
+/*
+ * vcc and vntsd exchange information using ioctl commands. When vntsd starts,
+ * it uses VCC_NUM_CONSOLE to get number of existing ports and
+ * VCC_CONS_TBL to obtain the table of existing consoles. In this table,
+ * vcc returns information about each of the console ports using vcc_console_t
+ * structure. Vntsd then sleeps on polling vcc control port.
+ *
+ * When there is a change in configuration, such as addtion or deletion
+ * of a console port, vcc wakes up vntsd via the poll events. Subsequently,
+ * vntsd uses VCC_INQUIRY ioctl to determine the reason for wakeup. In
+ * response to the inquiry, vcc provides a vcc_response_t structure
+ * containing reason and port number.
+ *
+ * If a port is being added or updated (group change), vntsd uses
+ * VCC_CONS_INFO ioctl with port number to obtain configuration of
+ * the port.
+ *
+ * If the port is being deleted, vntsd uses VCC_DEL_CONS_OK ioctl to notify
+ * vcc after its clean up is done. Vcc subsequently tears down
+ * its internal configuration and remove the associated TTY minor node.
+ *
+ * Only one open is allowd for each vcc port. If vntsd opens a port that is
+ * already open, vntsd will use VNTSD_FORCE_CLOSE to take port from other
+ * application
+ */
+
+/* VCC CNTRL IOCTL */
+
+#define VCC_IOCTL_CMD ('c' << 8)
+
+
+#define VCC_NUM_CONSOLE VCC_IOCTL_CMD | 0x1 /* num of consoles */
+#define VCC_CONS_TBL VCC_IOCTL_CMD | 0x2 /* config table */
+#define VCC_INQUIRY VCC_IOCTL_CMD | 0x3 /* inquiry by vntsd */
+#define VCC_CONS_INFO VCC_IOCTL_CMD | 0x4 /* config */
+#define VCC_CONS_STATUS VCC_IOCTL_CMD | 0x5 /* console status */
+#define VCC_FORCE_CLOSE VCC_IOCTL_CMD | 0x6 /* force to close */
+
+/* reasons to wake up vntsd */
+typedef enum {
+ VCC_CONS_ADDED, /* a port was added */
+ VCC_CONS_DELETED, /* a port was removed */
+ /* XXX not implemented yet */
+ VCC_CONS_UPDATED /* a port configuration was changed */
+} vcc_reason_t;
+
+/*
+ * structure that vcc returns to vntsd in response to VCC_CONS_TBL and
+ * VCC_CONS_INFO ioctl call.
+ */
+typedef struct vcc_console {
+ int cons_no; /* console port number */
+ uint64_t tcp_port; /* tcp port for the group */
+ char domain_name[MAXPATHLEN]; /* domain name */
+ char group_name[MAXPATHLEN]; /* group name */
+ char dev_name[MAXPATHLEN];
+} vcc_console_t;
+
+/* structure that vcc sends to vntsd in response to wake up inquiry */
+typedef struct vcc_response {
+ int cons_no; /* console port number */
+ vcc_reason_t reason; /* wake up reason */
+} vcc_response_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VCC_H */
diff --git a/usr/src/uts/sun4v/sys/vcc_impl.h b/usr/src/uts/sun4v/sys/vcc_impl.h
new file mode 100644
index 0000000000..8bb42fa15a
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vcc_impl.h
@@ -0,0 +1,304 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VCC_IMPL_H
+#define _VCC_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/stream.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ioctl.h>
+#include <sys/vcc.h>
+
+#define VCC_DEV_TO_INST(dev) (getminor(dev))
+#define VCC_INST_TO_DEV(instance) (instance)
+
+#define VCC_DRIVER_NAME "vcc"
+#define VCC_NAME VCC_DRIVER_NAME
+
+/*
+ * VCC Port States
+ */
+
+/*
+ * There is one lock in port structure to protect the states of the port.
+ * States of the port are:
+ * 1. VCC_PORT_AVAIL
+ * 2. VCC_PORT_OPEN
+ * 3. VCC_PORT_USE_READ_LDC - There is a thread doing vcc_read.
+ * 4. VCC_PORT_USE_WRITE_LDC - There is a thread doing vcc_write.
+ * 6. VCC_PORT_LDC_DATA_READY - Data is ready from ldc.
+ * 5. VCC_PORT_LDC_WRITE_READY - Ldc has space to receive data.
+ * 7. VCC_PORT_LDC_CHANNEL_READY - Ldc channel is up.
+ * 8. VCC_PORT_ADDED - A new port was added.
+ * 9. VCC_PORT_TERM_RD - Terminal read is enabled vs suspended
+ * 10. VCC_PORT_TERM_WR - Terminal write is enabled vc suspended
+ * 11. VCC_PORT_NONBLOCK - A port was opened with non blocking flag.
+ * 12. VCC_PORT_LDC_LINK_DOWN
+ *
+ *
+ * Code flow for port to transit from one state to another is as the follows:
+ *
+ * 1. VCC_PORT_AVAIL
+ *
+ * Transition from unavailable to available
+ * - obtain port lock
+ * Transit port to available and added states
+ * - release port lock
+ * - obtain softstate lock
+ * Increase total number of ports
+ * - release softsate lock
+ *
+ * after download added port to vntsd
+ * - obtain port lock
+ * Transit port to not added state
+ * - release port lock
+ *
+ * Transition from available to unavailable
+ * - obtain port lock
+ * - cv_wait read available
+ * Transit port to read unavailable
+ * - cv_wait write available
+ * Transit port to write unavailable
+ * Transit port to not ready. (close ldc channel)
+ * Transit port to deleted state
+ * Transit port to read and write available
+ * - cv_broadcast
+ * - release lock
+ *
+ * vntsd close the deleted port
+ * - obtained port lock
+ * Transit port to close and deleted state
+ * - release port lock
+ *
+ * after vntsd deletion of the port
+ * - obtain softstate lock
+ * - cv_wait port table unlocked
+ * Transit softstate to port table locked
+ * - release softstate lock
+ * - obtain port lock
+ * Transit port to unavailable
+ * destroy port lock
+ * - obtain softstate lock
+ * Transit softstate to port table unlocked
+ * - cv_broadcast
+ * - release softsate lock
+ *
+ * 2. VCC_PORT_OPEN
+ *
+ * Transition from close to open
+ * - obtain port lock
+ * transit port to open
+ * - release port lock
+ *
+ * Transition from open to close
+ * - obtain port lock
+ * - cv_wait read available
+ * Transit port to read unavailable
+ * - cv_wait write available
+ * Transit port to write unavailable
+ * Transit port to not ready. (close ldc channel)
+ * Transit port to close state
+ * Transit port to read and write available
+ * - cv_broadcast
+ * - release lock
+ *
+ * 3. VCC_PORT_USE_READ_LDC/VCC_PORT_USE_WRITE_LDC
+ * Transition from read availale/write available
+ * to read unavailable/write unavailable
+ * - obtain port lock
+ * - cv_wait read available
+ * Transit to read/write unavailable
+ * - release port lock
+ *
+ * Transition from read unavailale/write unavailable
+ * to read available/write available
+ * - obtain port lock
+ * Transit to read/write available
+ * - cv_broadcast
+ * - release port lock
+ *
+ * 4. VCC_PORT_LDC_CHANNEL_READY
+ * Transition from data not ready to data ready
+ * - obtain port lock
+ * Transit to data ready
+ * - cv_broadcast
+ * - release port lock
+ *
+ * Transition from data ready to data not ready
+ * - obtain port lock
+ * Transit to data not ready
+ * - release port lock
+ */
+
+#define VCC_PORT_AVAIL 0x1 /* port is configured */
+#define VCC_PORT_OPEN 0x2 /* port is opened */
+#define VCC_PORT_LDC_CHANNEL_READY 0x4 /* ready for data transfer */
+#define VCC_PORT_USE_READ_LDC 0x8 /* read lock */
+#define VCC_PORT_USE_WRITE_LDC 0x10 /* write lock */
+#define VCC_PORT_LDC_DATA_READY 0x20 /* data ready */
+#define VCC_PORT_LDC_WRITE_READY 0x40 /* ldc ready receive data */
+#define VCC_PORT_ADDED 0x80 /* added, no ack from vntsd */
+#define VCC_PORT_UPDATED 0x100 /* updated, no ack from vntsd */
+#define VCC_PORT_TERM_RD 0x200 /* suspend write */
+#define VCC_PORT_TERM_WR 0x400 /* suspend read */
+#define VCC_PORT_NONBLOCK 0x800 /* open with non block flag */
+#define VCC_PORT_LDC_LINK_DOWN 0x1000 /* ldc link down */
+
+/* Poll Flags */
+#define VCC_POLL_CONFIG 0x1 /* poll configuration change */
+
+/* Poll evnets */
+#define VCC_POLL_ADD_PORT 0x10 /* add a console port */
+#define VCC_POLL_UPDATE_PORT 0x20 /* update a console port */
+
+/* softstate port table state */
+#define VCC_LOCK_PORT_TBL 0x1
+
+/* VCC limits */
+#define VCC_MAX_PORTS 0x800 /* number of domains */
+#define VCC_MAX_MINORS VCC_MAX_PORTS /* number of minors */
+
+
+#define VCC_MAX_PORT_MINORS (VCC_MAX_MINORS - 1)
+#define VCC_CONTROL_MINOR_IDX (VCC_MAX_MINORS - 1)
+
+/* size of vcc message data */
+#define VCC_MTU_SZ 56
+
+
+/* Default values */
+#define VCC_HDR_SZ 8 /* header size */
+#define VCC_BUF_SZ (VCC_HDR_SZ + VCC_MTU_SZ)
+
+#define VCC_CONTROL_PORT 0x7ff /* port 2047 is control port */
+#define VCC_INST_SHIFT 11
+#define VCC_INVALID_CHANNEL -1
+#define VCC_NO_PID_BLOCKING -1
+
+#define VCC_QUEUE_LEN 0x80 /* ldc queue size */
+
+#define VCC_MINOR_NAME_PREFIX "ldom-" /* device name prefix */
+
+/* HV message data type */
+#define LDC_CONSOLE_CTRL 0x1 /* ctrl msg */
+#define LDC_CONSOLE_DATA 0x2 /* data msg */
+
+/* HV control messages */
+#define LDC_CONSOLE_BREAK -1 /* brk */
+#define LDC_CONSOLE_HUP -2 /* hup */
+
+/* minor number to port number */
+#define VCCPORT(p, minor) (p->minor_tbl[(minor & \
+ VCC_CONTROL_PORT)].portno)
+
+/* minor number to minor pointer */
+#define VCCMINORP(p, minor) (&(p->minor_tbl[(minor & \
+ VCC_CONTROL_PORT)]))
+
+/* minor number to instance */
+#define VCCINST(minor) ((minor) >> VCC_INST_SHIFT)
+
+
+/* hv console packet format */
+typedef struct vcc_msg {
+ uint8_t type; /* type - data or ctrl */
+ uint8_t size; /* data size */
+ uint16_t unused; /* not used */
+ int32_t ctrl_msg; /* data if type is ctrl */
+ uint8_t data[VCC_MTU_SZ]; /* data if type is data */
+} vcc_msg_t;
+
+/*
+ * minor node to port mapping table
+ */
+typedef struct vcc_minor {
+ uint_t portno; /* port number */
+ char domain_name[MAXPATHLEN]; /* doman name */
+} vcc_minor_t;
+
+/* console port structure */
+typedef struct vcc_port {
+
+ kmutex_t lock; /* protects port */
+ kcondvar_t read_cv; /* cv to sleep for reads */
+ kcondvar_t write_cv; /* cv to sleep for writes */
+
+ uint_t number; /* port number */
+ uint32_t status; /* port status */
+
+ char group_name[MAXPATHLEN];
+ uint64_t tcp_port; /* tcp port num */
+
+ struct termios term; /* terminal emulation */
+
+ vcc_minor_t *minorp; /* pointer to minor table entry */
+
+ uint64_t ldc_id; /* Channel number */
+ ldc_handle_t ldc_handle; /* Channel handle */
+ ldc_status_t ldc_status; /* Channel Status */
+
+ uint_t pollflag; /* indicated poll status */
+ struct pollhead poll;
+ uint32_t pollevent;
+ pid_t valid_pid; /* pid that allows cb_ops */
+
+} vcc_port_t;
+
+/*
+ * vcc driver's soft state structure
+ */
+typedef struct vcc {
+
+ /* protects vcc_t (soft state) */
+ kmutex_t lock;
+
+ uint_t status;
+
+ dev_info_t *dip; /* dev_info */
+
+ mdeg_node_spec_t *md_ispecp; /* mdeg prop spec */
+ mdeg_handle_t mdeg_hdl; /* mdeg handle */
+
+ vcc_port_t port[VCC_MAX_PORTS]; /* port table */
+ uint_t num_ports; /* avail ports */
+
+ vcc_minor_t minor_tbl[VCC_MAX_PORTS]; /* minor table */
+ uint_t minors_assigned; /* assigned minors */
+} vcc_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VCC_IMPL_H */
diff --git a/usr/src/uts/sun4v/sys/vdc.h b/usr/src/uts/sun4v/sys/vdc.h
new file mode 100644
index 0000000000..a551d6a7f0
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vdc.h
@@ -0,0 +1,260 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VDC_H
+#define _VDC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Virtual disk client implementation definitions
+ */
+
+#include <sys/sysmacros.h>
+#include <sys/note.h>
+
+#include <sys/ldc.h>
+#include <sys/vio_mailbox.h>
+#include <sys/vdsk_mailbox.h>
+#include <sys/vdsk_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VDC_DRIVER_NAME "vdc"
+
+/*
+ * Bit-field values to indicate if parts of the vdc driver are initialised.
+ */
+#define VDC_SOFT_STATE 0x0001
+#define VDC_LOCKS 0x0002
+#define VDC_MINOR 0x0004
+#define VDC_THREAD 0x0008
+#define VDC_LDC 0x0010
+#define VDC_LDC_INIT 0x0020
+#define VDC_LDC_CB 0x0040
+#define VDC_LDC_OPEN 0x0080
+#define VDC_DRING_INIT 0x0100 /* The DRing was created */
+#define VDC_DRING_BOUND 0x0200 /* The DRing was bound to an LDC channel */
+#define VDC_DRING_LOCAL 0x0400 /* The local private DRing was allocated */
+#define VDC_DRING_ENTRY 0x0800 /* At least one DRing entry was initialised */
+#define VDC_DRING (VDC_DRING_INIT | VDC_DRING_BOUND | \
+ VDC_DRING_LOCAL | VDC_DRING_ENTRY)
+#define VDC_HANDSHAKE 0x1000 /* Indicates if a handshake is in progress */
+#define VDC_HANDSHAKE_STOP 0x2000 /* stop further handshakes */
+
+/*
+ * Bit-field values to indicate status of local DRing entry
+ *
+ * The lowest 8 bits are reserved for the DRing state.
+ */
+#define VDC_ALLOC_HANDLE 0x10
+
+/*
+ * Definitions of strings to be used to create device node properties.
+ * (vdc uses the capitalised versions of these properties as they are 64-bit)
+ */
+#define VDC_NBLOCKS_PROP_NAME "Nblocks"
+#define VDC_SIZE_PROP_NAME "Size"
+
+/*
+ * Definitions of MD nodes/properties.
+ */
+#define VDC_MD_CHAN_NAME "channel-endpoint"
+#define VDC_MD_VDEV_NAME "virtual-device"
+#define VDC_MD_DISK_NAME "disk"
+#define VDC_MD_CFG_HDL "cfg-handle"
+#define VDC_ID_PROP "id"
+
+/*
+ * Scheme to store the instance number and the slice number in the minor number.
+ * (Uses the same format and definitions as the sd(7D) driver)
+ */
+#define VD_MAKE_DEV(instance, minor) ((instance << SDUNIT_SHIFT) | minor)
+
+/*
+ * variables controlling how long to wait before timing out and how many
+ * retries to attempt before giving up when communicating with vds.
+ */
+#define VDC_RETRIES 10
+
+#define VDC_USEC_TIMEOUT_MIN (30 * MICROSEC) /* 30 sec */
+
+#define VD_GET_TIMEOUT_HZ(mul) \
+ (ddi_get_lbolt() + (vdc_hz_timeout * MAX(1, mul)))
+
+/*
+ * Macros to manipulate Descriptor Ring variables in the soft state
+ * structure.
+ */
+#define VDC_GET_NEXT_REQ_ID(vdc) ((vdc->req_id)++)
+
+#define VDC_GET_DRING_ENTRY_PTR(vdc, idx) \
+ (vd_dring_entry_t *)(vdc->dring_mem_info.vaddr + \
+ (idx * vdc->dring_entry_size))
+
+#define VDC_MARK_DRING_ENTRY_FREE(vdc, idx) \
+ { \
+ vd_dring_entry_t *dep = NULL; \
+ ASSERT(vdc != NULL); \
+ ASSERT((idx >= 0) && (idx < VD_DRING_LEN)); \
+ ASSERT(vdc->dring_mem_info.vaddr != NULL); \
+ dep = (vd_dring_entry_t *)(vdc->dring_mem_info.vaddr + \
+ (idx * vdc->dring_entry_size)); \
+ ASSERT(dep != NULL); \
+ dep->hdr.dstate = VIO_DESC_FREE; \
+ }
+
+/* Initialise the Session ID and Sequence Num in the DRing msg */
+#define VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc) \
+ ASSERT(vdc != NULL); \
+ dmsg.tag.vio_sid = vdc->session_id; \
+ dmsg.seq_num = ++(vdc->seq_num);
+
+/*
+ * The states the message processing thread can be in.
+ */
+typedef enum vdc_thr_state {
+ VDC_THR_RUNNING, /* thread is running & ready to process */
+ VDC_THR_STOP, /* The detach func signals the thread to stop */
+ VDC_THR_DONE /* Thread has exited */
+} vdc_thr_state_t;
+
+/*
+ * Local Descriptor Ring entry
+ *
+ * vdc creates a Local (private) descriptor ring the same size as the
+ * public descriptor ring it exports to vds.
+ */
+typedef struct vdc_local_desc {
+ kmutex_t lock; /* protects all fields */
+ kcondvar_t cv; /* indicate processing done */
+ int flags; /* Dring entry state, etc */
+ int operation; /* VD_OP_xxx to be performed */
+ caddr_t addr; /* addr passed in by consumer */
+ caddr_t align_addr; /* used if addr non-aligned */
+ struct buf *buf; /* buf passed to strategy() */
+ ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */
+ vd_dring_entry_t *dep; /* public Dring Entry Pointer */
+} vdc_local_desc_t;
+
+/*
+ * vdc soft state structure
+ */
+typedef struct vdc {
+ kmutex_t attach_lock; /* used by CV which waits in attach */
+ kcondvar_t attach_cv; /* signal when attach can finish */
+
+ kmutex_t lock; /* protects next 2 sections of vars */
+ kcondvar_t cv; /* signal when upper layers can send */
+
+ dev_info_t *dip; /* device info pointer */
+ int instance; /* driver instance number */
+ int initialized; /* keeps track of what's init'ed */
+ int open; /* count of outstanding opens */
+ int dkio_flush_pending; /* # outstanding DKIO flushes */
+
+ uint64_t session_id; /* common ID sent with all messages */
+ uint64_t seq_num; /* most recent sequence num generated */
+ uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */
+ uint64_t req_id; /* Most recent Request ID generated */
+ vd_state_t state; /* Current handshake state */
+ vd_disk_type_t vdisk_type; /* type of device/disk being imported */
+ uint64_t vdisk_size; /* device size in bytes */
+ uint64_t max_xfer_sz; /* maximum block size of a descriptor */
+ uint64_t block_size; /* device block size used */
+ struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */
+ struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */
+ struct vtoc *vtoc; /* structure to store VTOC data */
+
+ /*
+ * The mutex 'msg_proc_lock' protects the following group of fields.
+ *
+ * The callback function checks to see if LDC triggered it due to
+ * there being data available and the callback will signal to
+ * the message processing thread waiting on 'msg_proc_cv'.
+ */
+ kmutex_t msg_proc_lock;
+ kcondvar_t msg_proc_cv;
+ boolean_t msg_pending;
+ vdc_thr_state_t msg_proc_thr_state;
+ kthread_t *msg_proc_thr_id;
+
+ /*
+ * The mutex 'dring_lock' protects the following group of fields.
+ */
+ kmutex_t dring_lock;
+ ldc_mem_info_t dring_mem_info;
+ uint_t dring_curr_idx;
+ uint32_t dring_len;
+ uint32_t dring_cookie_count;
+ uint32_t dring_entry_size;
+ ldc_mem_cookie_t *dring_cookie;
+ uint64_t dring_ident;
+
+ vdc_local_desc_t *local_dring;
+
+ uint64_t ldc_id;
+ ldc_status_t ldc_state;
+ ldc_handle_t ldc_handle;
+ ldc_dring_handle_t ldc_dring_hdl;
+} vdc_t;
+
+/*
+ * Debugging macros
+ */
+#ifdef DEBUG
+extern int vdc_msglevel;
+
+#define PR0 if (vdc_msglevel > 0) \
+ vdc_msg
+
+#define PR1 if (vdc_msglevel > 1) \
+ vdc_msg
+
+#define PR2 if (vdc_msglevel > 2) \
+ vdc_msg
+
+#define VDC_DUMP_DRING_MSG(dmsgp) \
+ vdc_msg("sq:%d start:%d end:%d ident:%x\n", \
+ dmsgp->seq_num, dmsgp->start_idx, \
+ dmsgp->end_idx, dmsgp->dring_ident);
+
+#else /* !DEBUG */
+#define PR0(...)
+#define PR1(...)
+#define PR2(...)
+
+#define VDC_DUMP_DRING_MSG(dmsgp)
+
+#endif /* !DEBUG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDC_H */
diff --git a/usr/src/uts/sun4v/sys/vdsk_common.h b/usr/src/uts/sun4v/sys/vdsk_common.h
new file mode 100644
index 0000000000..7cfffda28c
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vdsk_common.h
@@ -0,0 +1,194 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VDSK_COMMON_H
+#define _VDSK_COMMON_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This header file contains the private LDoms Virtual Disk (vDisk) definitions
+ * common to both the server (vds) and the client (vdc)
+ */
+
+#include <sys/machparam.h>
+#include <sys/vtoc.h>
+
+#include <sys/ldc.h>
+#include <sys/vio_common.h>
+#include <sys/vio_mailbox.h>
+
+/*
+ * vDisk definitions
+ */
+
+/*
+ * The number of Descriptor Ring entries
+ *
+ * Constraints:
+ * - overall DRing size must be greater than 8K (MMU_PAGESIZE)
+ * - overall DRing size should be 8K aligned (desirable but not enforced)
+ * - DRing entry must be 8 byte aligned
+ */
+#define VD_DRING_LEN 512
+
+/*
+ *
+ */
+#define VD_DRING_ENTRY_SZ (sizeof (vd_dring_entry_t) + \
+ (sizeof (ldc_mem_cookie_t) * (VD_MAX_COOKIES - 1)))
+
+/*
+ * The maximum block size we can transmit using one Descriptor Ring entry
+ *
+ * Currently no FS uses more than 128K and it doesn't look like they
+ * will either as there is no perf gain to be had by larger values.
+ * ( see ZFS comment at definition of SPA_MAXBLOCKSIZE ).
+ *
+ * We choose 256K to give us some headroom.
+ */
+#define VD_MAX_BLOCK_SIZE (256 * 1024)
+
+#define VD_MAX_COOKIES ((VD_MAX_BLOCK_SIZE / PAGESIZE) + 1)
+#define VD_USEC_TIMEOUT 20000
+#define VD_LDC_IDS_PROP "ldc-ids"
+#define VD_LDC_QLEN 32
+
+/*
+ * Flags used by ioctl routines to indicate if a copyin/copyout is needed
+ */
+#define VD_COPYOUT 0x1
+#define VD_COPYIN 0x2
+
+/*
+ * vDisk operations on physical devices
+ */
+#define VD_OP_BREAD 0x01 /* Block Read */
+#define VD_OP_BWRITE 0x02 /* Block Write */
+#define VD_OP_FLUSH 0x03 /* Flush disk write cache contents */
+#define VD_OP_GET_WCE 0x04 /* Get disk W$ status */
+#define VD_OP_SET_WCE 0x05 /* Enable/Disable disk W$ */
+#define VD_OP_GET_VTOC 0x06 /* Get VTOC */
+#define VD_OP_SET_VTOC 0x07 /* Set VTOC */
+#define VD_OP_GET_DISKGEOM 0x08 /* Get disk geometry */
+#define VD_OP_SET_DISKGEOM 0x09 /* Set disk geometry */
+#define VD_OP_SCSICMD 0x0a /* SCSI control command */
+#define VD_OP_MASK 0xFF /* mask of all possible operations */
+#define VD_OP_COUNT 10 /* Number of operations */
+
+/*
+ * Definitions of the various ways vds can export disk support to vdc.
+ */
+typedef enum vd_disk_type {
+ VD_DISK_TYPE_UNK = 0, /* Unknown device type */
+ VD_DISK_TYPE_SLICE, /* slice in block device */
+ VD_DISK_TYPE_DISK /* entire disk (slice 2) */
+} vd_disk_type_t;
+
+/*
+ * vDisk Descriptor payload
+ */
+typedef struct vd_dring_payload {
+ uint64_t req_id; /* The request ID being processed */
+ uint8_t operation; /* operation for server to perform */
+ uint8_t slice; /* The disk slice being accessed */
+ uint16_t resv1; /* padding */
+ uint32_t status; /* "errno" of server operation */
+ uint64_t addr; /* LP64 diskaddr_t (block I/O) */
+ uint64_t nbytes; /* LP64 size_t */
+ uint32_t ncookies; /* Number of cookies used */
+ uint32_t resv2; /* padding */
+
+ ldc_mem_cookie_t cookie[1]; /* variable sized array */
+} vd_dring_payload_t;
+
+
+/*
+ * vDisk Descriptor entry
+ */
+typedef struct vd_dring_entry {
+ vio_dring_entry_hdr_t hdr; /* common header */
+ vd_dring_payload_t payload; /* disk specific data */
+} vd_dring_entry_t;
+
+
+/*
+ * vDisk control operation structures
+ *
+ * XXX FIXME - future support - add structures for VD_OP_XXXX
+ */
+
+/*
+ * VTOC message
+ *
+ * vDisk Get Volume Table of Contents (VD_OP_GET_VTOC)
+ *
+ */
+typedef struct vd_partition {
+ uint16_t p_tag; /* ID tag of partition */
+ uint16_t p_flag; /* permision flags */
+ uint32_t reserved; /* padding */
+ int64_t p_start; /* start sector no of partition */
+ int64_t p_size; /* # of blocks in partition */
+} vd_partition_t;
+
+typedef struct vd_vtoc {
+ uint8_t v_volume[LEN_DKL_VVOL]; /* volume name */
+ uint16_t v_sectorsz; /* sector size in bytes */
+ uint16_t v_nparts; /* num of partitions */
+ uint32_t reserved; /* padding */
+ uint8_t v_asciilabel[LEN_DKL_ASCII]; /* for compatibility */
+
+} vd_vtoc_t;
+
+
+/*
+ * vDisk Get Geometry (VD_OP_GET_GEOM)
+ */
+typedef struct vd_geom {
+ uint16_t dkg_ncyl; /* # of data cylinders */
+ uint16_t dkg_acyl; /* # of alternate cylinders */
+ uint16_t dkg_bcyl; /* cyl offset (for fixed head area) */
+ uint16_t dkg_nhead; /* # of heads */
+ uint16_t dkg_nsect; /* # of data sectors per track */
+ uint16_t dkg_intrlv; /* interleave factor */
+ uint16_t dkg_apc; /* alternates per cyl (SCSI only) */
+ uint16_t dkg_rpm; /* revolutions per minute */
+ uint16_t dkg_pcyl; /* # of physical cylinders */
+ uint16_t dkg_write_reinstruct; /* # sectors to skip, writes */
+ uint16_t dkg_read_reinstruct; /* # sectors to skip, reads */
+} vd_geom_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDSK_COMMON_H */
diff --git a/usr/src/uts/sun4v/sys/vdsk_mailbox.h b/usr/src/uts/sun4v/sys/vdsk_mailbox.h
new file mode 100644
index 0000000000..553ac2c9b6
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vdsk_mailbox.h
@@ -0,0 +1,100 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VDSK_MAILBOX_H
+#define _VDSK_MAILBOX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This header file contains the private LDoms Virtual Disk (vDisk) mailbox
+ * definitions common to both the server (vds) and the client (vdc)
+ */
+
+#include <sys/vio_mailbox.h>
+#include <sys/vio_common.h>
+#include <sys/vdsk_common.h>
+
+/*
+ * Definition of the various states the vDisk state machine can
+ * be in during the handshake between vdc and vds.
+ */
+typedef enum vd_state {
+ VD_STATE_INIT = 0,
+ VD_STATE_VER,
+ VD_STATE_ATTR,
+ VD_STATE_DRING,
+ VD_STATE_RDX,
+ VD_STATE_DATA
+} vd_state_t;
+
+#define VD_VER_MAJOR 0x1
+#define VD_VER_MINOR 0x0
+
+/*
+ * vDisk device attributes information message.
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_ATTR_INFO
+ */
+typedef struct vd_attr_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* vdisk-attribute-specific payload */
+ uint8_t xfer_mode; /* data exchange method. */
+ uint8_t vdisk_type; /* disk, slice, read-only, etc. */
+ uint16_t resv; /* padding */
+ uint32_t vdisk_block_size; /* bytes per disk block */
+ uint64_t operations; /* bit-field of server supported ops */
+ uint64_t vdisk_size; /* size for Nblocks property. */
+ uint64_t max_xfer_sz; /* maximum block transfer size */
+
+ uint64_t resv2[VIO_PAYLOAD_ELEMS - 4]; /* padding */
+} vd_attr_msg_t;
+
+/*
+ * vDisk inband descriptor message.
+ *
+ * For clients that do not use descriptor rings, the descriptor contents
+ * are sent as part of an inband message.
+ */
+typedef struct vd_dring_inband_msg {
+ vio_inband_desc_msg_hdr_t hdr;
+ vd_dring_payload_t payload;
+} vd_dring_inband_msg_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDSK_MAILBOX_H */
diff --git a/usr/src/uts/sun4v/sys/vio_common.h b/usr/src/uts/sun4v/sys/vio_common.h
new file mode 100644
index 0000000000..f2a6d7968b
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vio_common.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_VIO_COMMON_H
+#define _SYS_VIO_COMMON_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Common header for VIO descriptor ring entries
+ */
+typedef struct vio_dring_entry_hdr {
+ uint8_t dstate; /* Current state of Dring entry */
+ uint8_t ack:1; /* 1 => receiver must ACK when DONE */
+
+ /*
+ * Padding.
+ */
+ uint16_t resv[3];
+} vio_dring_entry_hdr_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VIO_COMMON_H */
diff --git a/usr/src/uts/sun4v/sys/vio_mailbox.h b/usr/src/uts/sun4v/sys/vio_mailbox.h
new file mode 100644
index 0000000000..eef1e1d5ef
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vio_mailbox.h
@@ -0,0 +1,331 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_VIO_MAILBOX_H
+#define _SYS_VIO_MAILBOX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/ldc.h>
+
+/* Message types */
+#define VIO_TYPE_CTRL 0x1
+#define VIO_TYPE_DATA 0x2
+#define VIO_TYPE_ERR 0x4
+
+/* Message sub-types */
+#define VIO_SUBTYPE_INFO 0x1
+#define VIO_SUBTYPE_ACK 0x2
+#define VIO_SUBTYPE_NACK 0x4
+
+/*
+ * VIO specific control envelopes: 0x0000 - 0x00FF
+ * VNET specific control envelopes: 0x0100 - 0x01FF
+ * VDSK specific control envelopes: 0x0200 - 0x02FF
+ * UNUSED envelopes: 0x0300 - 0x0FFF
+ */
+
+/*
+ * Generic Control Subtype Envelopes:
+ * type == VIO_TYPE_CTRL
+ * subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ *
+ * 0x0000 - 0x003F
+ */
+#define VIO_VER_INFO 0x0001
+#define VIO_ATTR_INFO 0x0002
+#define VIO_DRING_REG 0x0003
+#define VIO_DRING_UNREG 0x0004
+#define VIO_RDX 0x0005
+
+/*
+ * Generic subtype Data envelopes
+ * type == VIO_TYPE_DATA
+ * subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ *
+ * 0x0040 - 0x007F
+ */
+#define VIO_PKT_DATA 0x0040
+#define VIO_DESC_DATA 0x0041
+#define VIO_DRING_DATA 0x0042
+
+
+/*
+ * Generic subtype Error envelopes
+ * type == VIO_TYPE_ERR
+ * subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ *
+ * 0x0080 - 0x00FF
+ *
+ * Currently unused
+ */
+
+/*
+ * Supported Device Types
+ */
+#define VDEV_NETWORK 0x1
+#define VDEV_NETWORK_SWITCH 0x2
+#define VDEV_DISK 0x3
+#define VDEV_DISK_SERVER 0x4
+
+/* addr_type */
+#define ADDR_TYPE_MAC 0x1 /* XXX move to vnet_mailbox.h ? */
+
+/*
+ * VIO data transfer mode
+ */
+#define VIO_PKT_MODE 0x1
+#define VIO_DESC_MODE 0x2
+#define VIO_DRING_MODE 0x3
+
+/*
+ * VIO Descriptor Ring registration options
+ * (intended use for Descriptor Ring)
+ */
+#define VIO_TX_DRING 0x1
+#define VIO_RX_DRING 0x2
+
+/*
+ * Size of message payload
+ */
+#define VIO_MSGTAG_SZ (sizeof (vio_msg_tag_t)) /* bytes */
+#define VIO_PAYLOAD_SZ (LDC_PAYLOAD_SIZE_UNRELIABLE - VIO_MSGTAG_SZ)
+#define VIO_PAYLOAD_ELEMS (VIO_PAYLOAD_SZ / LDC_ELEM_SIZE) /* num words */
+
+/*
+ * VIO device message tag.
+ *
+ * These 64 bits are used as a common header for all VIO message types.
+ */
+typedef union vio_msg_tag {
+ struct {
+ uint8_t _msgtype;
+ uint8_t _subtype;
+ uint16_t _subtype_env;
+ uint32_t _sid; /* session id */
+ } _hdr;
+ uint64_t tagword;
+} vio_msg_tag_t;
+
+#define vio_msgtype _hdr._msgtype
+#define vio_subtype _hdr._subtype
+#define vio_subtype_env _hdr._subtype_env
+#define vio_sid _hdr._sid
+
+/*
+ * VIO version negotation message.
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_VER_INFO
+ */
+typedef struct vio_ver_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* version specific payload */
+ uint32_t ver_major:16, /* major version number */
+ ver_minor:16; /* minor version number */
+
+ uint8_t dev_class; /* type of device */
+
+ /* padding */
+ uint8_t resv1;
+ uint16_t resv2;
+ uint64_t resv3[VIO_PAYLOAD_ELEMS - 1];
+} vio_ver_msg_t;
+
+/*
+ * VIO Descriptor Ring Register message.
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_DRING_REG
+ */
+typedef struct vio_dring_reg_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* Descriptor ring information */
+ uint64_t dring_ident; /* =0 for SUBTYPE_INFO msg */
+ uint32_t num_descriptors; /* # of desc in the ring */
+ uint32_t descriptor_size; /* size of each entry */
+ uint16_t options; /* intended use */
+ uint16_t resv; /* padding */
+ uint32_t ncookies; /* # cookies exporting ring */
+
+ /*
+ * cookie is a variable sized array. If the number of cookies is 1,
+ * the message can be sent by LDC without fragmentation.
+ */
+ ldc_mem_cookie_t cookie[1];
+} vio_dring_reg_msg_t;
+
+/*
+ * VIO Descriptor Ring Unregister message.
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_DRING_UNREG
+ */
+typedef struct vio_dring_unreg_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* Descriptor ring information */
+ uint64_t dring_ident;
+ uint64_t resv[VIO_PAYLOAD_ELEMS - 1];
+} vio_dring_unreg_msg_t;
+
+
+/*
+ * Definition of a generic VIO message (with no payload) which can be cast
+ * to other message types.
+ */
+typedef struct vio_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* no payload */
+ uint64_t resv[VIO_PAYLOAD_ELEMS];
+} vio_msg_t;
+
+/*
+ * VIO Ready to Receive message.
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK}
+ * tag.subtype_env == VIO_RDX
+ */
+typedef vio_msg_t vio_rdx_msg_t;
+
+/*
+ * VIO error message.
+ *
+ * tag.msgtype == VIO_TYPE_ERR
+ * tag.subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == TBD
+ */
+typedef vio_msg_t vio_err_msg_t;
+
+/*
+ * VIO descriptor ring data message.
+ *
+ * tag.msgtype == VIO_TYPE_DATA
+ * tag.subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_DRING_DATA
+ */
+typedef struct vio_dring_msg {
+ /* Common message tag */
+ vio_msg_tag_t tag;
+
+ /* Data dring info */
+ uint64_t seq_num;
+ uint64_t dring_ident; /* ident of modified DRing */
+ uint32_t start_idx; /* Indx of first updated elem */
+ int32_t end_idx; /* Indx of last updated elem */
+
+ /*
+ * Padding.
+ */
+ uint64_t resv[VIO_PAYLOAD_ELEMS - 3];
+} vio_dring_msg_t;
+
+/*
+ * VIO Common header for inband descriptor messages.
+ *
+ * Clients will then combine this header with a device specific payload.
+ */
+typedef struct vio_inband_desc_msg_hdr {
+ /* Common message tag */
+ vio_msg_tag_t tag;
+
+ uint64_t seq_num; /* sequence number */
+ uint64_t desc_handle; /* opaque descriptor handle */
+} vio_inband_desc_msg_hdr_t;
+
+/*
+ * VIO raw data message.
+ *
+ * tag.msgtype == VIO_TYPE_DATA
+ * tag.subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_PKT_DATA
+ *
+ * Note the data payload is so small to keep this message
+ * within the size LDC can cope with without fragmentation.
+ * If it turns out in the future that we are not concerned
+ * with fragmentation then we can increase the size of this
+ * field.
+ */
+typedef struct vio_raw_data_msg {
+ /* Common message tag */
+ vio_msg_tag_t tag;
+
+ /* Raw data packet payload */
+ uint64_t seq_num; /* sequence number */
+ uint64_t data[VIO_PAYLOAD_ELEMS - 1];
+} vio_raw_data_msg_t;
+
+/*
+ * Definitions of the valid states a Descriptor can be in.
+ */
+#define VIO_DESC_FREE 0x1
+#define VIO_DESC_READY 0x2
+#define VIO_DESC_ACCEPTED 0x3
+#define VIO_DESC_DONE 0x4
+#define VIO_DESC_MASK 0xf
+
+/* Macro to check that the state in variable supplied is a valid DRing state */
+#define VIO_IS_VALID_DESC_STATE(flag) \
+ (((flag | VIO_DESC_MASK) == VIO_DESC_FREE) || \
+ ((flag | VIO_DESC_MASK) == VIO_DESC_READY) || \
+ ((flag | VIO_DESC_MASK) == VIO_DESC_ACCEPTED) || \
+ ((flag | VIO_DESC_MASK) == VIO_DESC_READY))
+
+#define VIO_SET_DESC_STATE(flag, state) \
+ { \
+ flag &= (flag | ~VIO_DESC_MASK); \
+ flag |= (state & VIO_DESC_MASK); \
+ }
+
+#define VIO_GET_DESC_STATE(flag) ((flag) & VIO_DESC_MASK)
+
+/* Macro to populate the generic fields of the DRing data msg */
+#define VIO_INIT_DRING_DATA_TAG(dmsg) \
+ dmsg.tag.vio_msgtype = VIO_TYPE_DATA; \
+ dmsg.tag.vio_subtype = VIO_SUBTYPE_INFO; \
+ dmsg.tag.vio_subtype_env = VIO_DRING_DATA;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VIO_MAILBOX_H */
diff --git a/usr/src/uts/sun4v/sys/vldc.h b/usr/src/uts/sun4v/sys/vldc.h
new file mode 100644
index 0000000000..112f17c2a7
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vldc.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VLDC_H
+#define _VLDC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+
+/* Channel IOCTL Commands */
+
+#define VLDC_IOCTL_SHIFT 8
+#define VLDC_IOCTL ('1' << VLDC_IOCTL_SHIFT)
+
+#define VLDC_IOCTL_OPT_OP (VLDC_IOCTL | 0x1) /* ctrl op */
+#define VLDC_IOCTL_READ_COOKIE (VLDC_IOCTL | 0x2) /* read cookie */
+#define VLDC_IOCTL_WRITE_COOKIE (VLDC_IOCTL | 0x3) /* write cookie */
+
+/* supported ctrl operations */
+#define VLDC_OP_GET 0x1 /* get specified value */
+#define VLDC_OP_SET 0x2 /* set specified value */
+
+/* supported ctrl operation options */
+#define VLDC_OPT_MTU_SZ 0x1 /* MTU */
+#define VLDC_OPT_STATUS 0x2 /* port status */
+#define VLDC_OPT_MODE 0x3 /* port channel mode */
+
+/* values returned by VLDC_OPT_OP_STATUS */
+#define VLDC_PORT_CLOSED 0x1 /* port is closed */
+#define VLDC_PORT_OPEN 0x2 /* port is already open */
+#define VLDC_PORT_READY 0x4 /* port is open and ready */
+
+/*
+ * Values for VLDC_OPT_MODE are defined in ldc.h.
+ */
+
+/*
+ * Structure that is used by vldc driver and all its clients to communicate
+ * the type and nature of the option as well as for clients to get port
+ * status.
+ */
+typedef struct vldc_opt_op {
+ int32_t op_sel; /* operation selector(ex: GET) */
+ int32_t opt_sel; /* option selector (ex: MTU) */
+ uint32_t opt_val; /* option value to set or returned */
+} vldc_opt_op_t;
+
+/*
+ * Structure that is used by the LDom manager to download instruction
+ * sequences and read/write new machine descriptions.
+ */
+typedef struct vldc_data {
+ uint64_t src_addr; /* source address */
+ uint64_t dst_addr; /* destination address */
+ uint64_t length; /* size of transfer */
+} vldc_data_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VLDC_H */
diff --git a/usr/src/uts/sun4v/sys/vldc_impl.h b/usr/src/uts/sun4v/sys/vldc_impl.h
new file mode 100644
index 0000000000..8610344b42
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vldc_impl.h
@@ -0,0 +1,133 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VLDC_IMPL_H
+#define _VLDC_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/stream.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ldc.h>
+#include <sys/vldc.h>
+
+/* default values */
+#define VLDC_DEFAULT_MTU 0x800 /* default mtu size */
+
+/* VLDC limits */
+#define VLDC_MAX_COOKIE 0x40000 /* max. size of xfer to/from HV */
+#define VLDC_MAX_MTU 0x40000 /* 256K */
+#define VLDC_MAX_PORTS 0x800
+#define VLDC_MAX_MINORS VLDC_MAX_PORTS
+#define VLDC_QUEUE_LEN 0x80
+
+#define VLDC_MINOR_MASK (VLDC_MAX_PORTS - 1)
+#define VLDC_INST_SHIFT 11
+
+/* get port number from minor number */
+#define VLDCPORT(vldcp, minor) \
+ ((vldcp)->minor_tbl[(minor) & VLDC_MINOR_MASK].portno)
+
+/* get minor table entry from minor number */
+#define VLDCMINOR(vldcp, minor) \
+ (&((vldcp)->minor_tbl[(minor) & VLDC_MINOR_MASK]))
+
+/* get instance number from minor number */
+#define VLDCINST(minor) ((minor) >> VLDC_INST_SHIFT)
+
+/* indicates an invalid port number */
+#define VLDC_INVALID_PORTNO ((uint_t)-1)
+
+/*
+ * Minor node number to port number mapping table.
+ *
+ * The lock field in the vldc_minor structure is used to serialize operations
+ * on the port associated with the minor node. It also protects the minor node
+ * in_use field which is used to track the number of active users of the minor
+ * node. Driver ops will either hold the lock over the whole operation or
+ * will increment (and then decrement) the in use count if they need to
+ * release and re-acquire the lock, e.g. when copying data in from or out to
+ * userland. When the MDEG framework calls into the driver via the callback to
+ * remove a port, the driver must wait until the in use count for the minor
+ * node associated with the port drops to zero, before it can remove the
+ * port.
+ */
+typedef struct vldc_minor {
+ kmutex_t lock; /* protects port/in_use count */
+ kcondvar_t cv; /* for waiting on in use */
+ uint_t in_use; /* in use counter */
+ uint_t portno; /* port number */
+ char sname[MAXPATHLEN]; /* service name */
+} vldc_minor_t;
+
+typedef struct vldc_port {
+ uint_t number; /* port number */
+ uint32_t status; /* port status */
+ vldc_minor_t *minorp; /* minor table entry pointer */
+ uint32_t mtu; /* port mtu */
+ caddr_t send_buf; /* send buffer */
+ caddr_t recv_buf; /* receive buffer */
+
+ uint64_t ldc_id; /* Channel number */
+ ldc_handle_t ldc_handle; /* Channel handle */
+ ldc_mode_t ldc_mode; /* Channel mode */
+
+ boolean_t is_stream; /* streaming mode */
+ boolean_t hanged_up; /* port hanged up */
+
+ struct pollhead poll; /* for poll */
+} vldc_port_t;
+
+/*
+ * vldc driver's soft state structure
+ */
+typedef struct vldc {
+ kmutex_t lock; /* serializes detach and MDEG */
+ boolean_t detaching; /* true iff busy detaching */
+ dev_info_t *dip; /* dev_info */
+ mdeg_node_spec_t *inst_spec; /* vldc instance specifier */
+ mdeg_handle_t mdeg_hdl; /* MD event handle */
+
+ uint_t num_ports;
+ vldc_port_t port[VLDC_MAX_PORTS];
+
+ /* table for assigned minors */
+ vldc_minor_t minor_tbl[VLDC_MAX_MINORS];
+
+ /* number of minors already assigned */
+ uint_t minors_assigned;
+} vldc_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VLDC_IMPL_H */
diff --git a/usr/src/uts/sun4v/sys/vnet.h b/usr/src/uts/sun4v/sys/vnet.h
new file mode 100644
index 0000000000..b7b111eb61
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vnet.h
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VNET_H
+#define _VNET_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VNET_SUCCESS (0) /* successful return */
+#define VNET_FAILURE (-1) /* unsuccessful return */
+
+#define KMEM_FREE(_p) kmem_free((_p), sizeof (*(_p)))
+
+#define VNET_NTXDS 512 /* power of 2 tx descriptors */
+#define VNET_RECLAIM_LOWAT 32 /* tx reclaim low watermark */
+#define VNET_RECLAIM_HIWAT (512 - 32) /* tx reclaim high watermark */
+#define VNET_LDCWD_INTERVAL 1000 /* watchdog freq in msec */
+#define VNET_LDCWD_TXTIMEOUT 1000 /* tx timeout in msec */
+#define VNET_LDC_QLEN 1024 /* ldc qlen */
+
+/*
+ * vnet proxy transport layer information. There is one instance of this for
+ * every transport being used by a vnet device and a list of these transports
+ * is maintained by vnet.
+ */
+typedef struct vp_tl {
+ struct vp_tl *nextp; /* next in list */
+ mac_t *macp; /* transport ops */
+ char name[LIFNAMSIZ]; /* device name */
+ major_t major; /* driver major # */
+ uint_t instance; /* dev instance */
+} vp_tl_t;
+
+/*
+ * Forwarding database (FDB) entry, used by vnet to provide switching
+ * functionality. Each fdb entry corresponds to a destination vnet device
+ * within the ldoms which is directly reachable by invoking a transmit
+ * function provided by a vnet proxy transport layer. Currently, the generic
+ * transport layer adds/removes/modifies entries in fdb.
+ */
+typedef struct fdb {
+ struct fdb *nextp; /* next entry in the list */
+ uint8_t macaddr[ETHERADDRL]; /* destination mac address */
+ mac_tx_t m_tx; /* transmit function */
+ void *txarg; /* arg to the transmit func */
+} fdb_t;
+
+/* FDB hash queue head */
+typedef struct fdbf_s {
+ fdb_t *headp; /* head of fdb entries */
+ krwlock_t rwlock; /* protect the list */
+} fdb_fanout_t;
+
+#define VNET_NFDB_HASH 4 /* default number of hash queues in fdb */
+#define VNET_NFDB_HASH_MAX 32 /* max number of hash queues in fdb */
+
+/* Hash calculation using the mac address */
+#define MACHASH(a, n) ((*(((uchar_t *)(a)) + 0) ^ \
+ *(((uchar_t *)(a)) + 1) ^ \
+ *(((uchar_t *)(a)) + 2) ^ \
+ *(((uchar_t *)(a)) + 3) ^ \
+ *(((uchar_t *)(a)) + 4) ^ \
+ *(((uchar_t *)(a)) + 5)) % (uint32_t)n)
+
+/* rwlock macros */
+#define READ_ENTER(x) rw_enter(x, RW_READER)
+#define WRITE_ENTER(x) rw_enter(x, RW_WRITER)
+#define RW_EXIT(x) rw_exit(x)
+
+/*
+ * vnet instance state information
+ */
+typedef struct vnet {
+ int instance; /* instance # */
+ dev_info_t *dip; /* dev_info */
+ struct vnet *nextp; /* next in list */
+ mac_t *macp; /* MAC - macinfo */
+ uchar_t vendor_addr[ETHERADDRL]; /* orig macadr */
+ uchar_t curr_macaddr[ETHERADDRL]; /* current macadr */
+ vp_tl_t *tlp; /* list of vp_tl */
+ krwlock_t trwlock; /* lock for vp_tl list */
+ char vgen_name[MAXNAMELEN]; /* name of generic tl */
+ fdb_fanout_t *fdbhp; /* fdb hash queues */
+ int nfdb_hash; /* num fdb hash queues */
+} vnet_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VNET_H */
diff --git a/usr/src/uts/sun4v/sys/vnet_common.h b/usr/src/uts/sun4v/sys/vnet_common.h
new file mode 100644
index 0000000000..feed7025a2
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vnet_common.h
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VNET_COMMON_H
+#define _VNET_COMMON_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/vio_common.h>
+#include <sys/vio_mailbox.h>
+#include <sys/ethernet.h>
+
+/*
+ * This header file contains definitions common to LDoms Virtual Network
+ * server (vsw) and client (vnet).
+ */
+
+/* max # of cookies per frame size */
+#define MAX_COOKIES ((ETHERMAX >> MMU_PAGESHIFT) + 2)
+
+/* initial send sequence number */
+#define VNET_ISS 0x1
+
+/* vnet descriptor */
+typedef struct vnet_public_desc {
+ vio_dring_entry_hdr_t hdr; /* descriptor header */
+ uint32_t nbytes; /* data length */
+ uint32_t ncookies; /* number of data cookies */
+ ldc_mem_cookie_t memcookie[MAX_COOKIES]; /* data cookies */
+} vnet_public_desc_t;
+
+/*
+ * VIO in-band descriptor. Used by those vio clients
+ * such as OBP who do not use descriptor rings.
+ */
+typedef struct vio_ibnd_desc {
+ vio_inband_desc_msg_hdr_t hdr;
+
+ /* payload */
+ uint32_t nbytes;
+ uint32_t ncookies;
+ ldc_mem_cookie_t memcookie[MAX_COOKIES];
+} vio_ibnd_desc_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VNET_COMMON_H */
diff --git a/usr/src/uts/sun4v/sys/vnet_gen.h b/usr/src/uts/sun4v/sys/vnet_gen.h
new file mode 100644
index 0000000000..2ce1f390d8
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vnet_gen.h
@@ -0,0 +1,337 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VNET_GEN_H
+#define _VNET_GEN_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VGEN_SUCCESS (0) /* successful return */
+#define VGEN_FAILURE (-1) /* unsuccessful return */
+
+#define VGEN_NUM_VER 1 /* max # of vgen versions */
+
+#define VGEN_LOCAL 1 /* local ldc end-point */
+#define VGEN_PEER 2 /* peer ldc end-point */
+
+/* vgen_t flags */
+#define VGEN_STOPPED 0x0
+#define VGEN_STARTED 0x1
+
+#define KMEM_FREE(_p) kmem_free((_p), sizeof (*(_p)))
+
+#define VGEN_INIT_MCTAB_SIZE 16 /* initial size of multicast table */
+
+#define READ_ENTER(x) rw_enter(x, RW_READER)
+#define WRITE_ENTER(x) rw_enter(x, RW_WRITER)
+#define RW_EXIT(x) rw_exit(x)
+
+/* channel flags */
+#define CHANNEL_ATTACHED 0x1
+#define CHANNEL_STARTED 0x2
+
+/* transmit return values */
+#define VGEN_TX_SUCCESS 0 /* transmit success */
+#define VGEN_TX_FAILURE 1 /* transmit failure */
+#define VGEN_TX_NORESOURCES 2 /* out of tbufs/txds */
+
+/* private descriptor flags */
+#define VGEN_PRIV_DESC_FREE 0x0 /* desc is available */
+#define VGEN_PRIV_DESC_BUSY 0x1 /* desc in use */
+
+#define LDC_TO_VNET(ldcp) ((ldcp)->portp->vgenp->vnetp)
+#define LDC_TO_VGEN(ldcp) ((ldcp)->portp->vgenp)
+
+/* get the address of next tbuf */
+#define NEXTTBUF(ldcp, tbufp) (((tbufp) + 1) == (ldcp)->tbufendp \
+ ? (ldcp)->tbufp : ((tbufp) + 1))
+
+/* increment recv index */
+#define INCR_RXI(i, ldcp) \
+ ((i) = (((i) + 1) & ((ldcp)->num_rxds - 1)))
+
+/* decrement recv index */
+#define DECR_RXI(i, ldcp) \
+ ((i) = (((i) - 1) & ((ldcp)->num_rxds - 1)))
+
+/* increment tx index */
+#define INCR_TXI(i, ldcp) \
+ ((i) = (((i) + 1) & ((ldcp)->num_txds - 1)))
+
+/* decrement tx index */
+#define DECR_TXI(i, ldcp) \
+ ((i) = (((i) - 1) & ((ldcp)->num_txds - 1)))
+
+/* bounds check rx index */
+#define CHECK_RXI(i, ldcp) \
+ (((i) >= 0) && ((i) < (ldcp)->num_rxds))
+
+/* bounds check tx index */
+#define CHECK_TXI(i, ldcp) \
+ (((i) >= 0) && ((i) < (ldcp)->num_txds))
+
+/* private descriptor */
+typedef struct vgen_priv_desc {
+ uint64_t flags; /* flag bits */
+ vnet_public_desc_t *descp; /* associated public desc */
+ ldc_mem_handle_t memhandle; /* mem handle for data */
+ mblk_t *mp; /* associated packet */
+ uint64_t datap; /* mp->b_rptr */
+ uint64_t datalen; /* total actual datalen */
+ uint64_t seqnum; /* sequence number of pkt */
+ uint64_t ncookies; /* num ldc_mem_cookies */
+ ldc_mem_cookie_t memcookie[MAX_COOKIES]; /* data cookies */
+} vgen_private_desc_t;
+
+/*
+ * Handshake parameters (per vio_mailbox.h) of each ldc end point, used
+ * during handshake negotiation.
+ */
+typedef struct vgen_handshake_params {
+ /* version specific params */
+ uint32_t ver_major:16,
+ ver_minor:16; /* major, minor version */
+ uint8_t dev_class; /* device class */
+
+ /* attributes specific params */
+ uint64_t mtu; /* max transfer unit size */
+ uint64_t addr; /* address of the device */
+ uint8_t addr_type; /* type of address */
+ uint8_t xfer_mode; /* SHM or PKT */
+ uint16_t ack_freq; /* dring data ack freq */
+
+ /* descriptor ring params */
+ uint32_t num_desc; /* # of descriptors in ring */
+ uint32_t desc_size; /* size of descriptor */
+ ldc_mem_cookie_t dring_cookie; /* desc ring cookie */
+ uint32_t num_dcookies; /* # of dring cookies */
+ uint64_t dring_ident; /* ident=0 for INFO msg */
+ boolean_t dring_ready; /* dring ready flag */
+} vgen_hparams_t;
+
+/* version info */
+typedef struct vgen_ver {
+ uint32_t ver_major:16,
+ ver_minor:16;
+} vgen_ver_t;
+
+typedef struct vgen_stats {
+
+ /* Link Input/Output stats */
+ uint64_t ipackets;
+ uint64_t ierrors;
+ uint64_t opackets;
+ uint64_t oerrors;
+#if 0
+ uint64_t collisions;
+#endif
+
+ /* MIB II variables */
+ uint64_t rbytes; /* # bytes received */
+ uint64_t obytes; /* # bytes transmitted */
+ uint32_t multircv; /* # multicast packets received */
+ uint32_t multixmt; /* # multicast packets for xmit */
+ uint32_t brdcstrcv; /* # broadcast packets received */
+ uint32_t brdcstxmt; /* # broadcast packets for xmit */
+ uint32_t norcvbuf; /* # rcv packets discarded */
+ uint32_t noxmtbuf; /* # xmit packets discarded */
+
+ /* Tx Statistics */
+ uint32_t tx_no_desc;
+ uint32_t tx_allocb_fail;
+
+ /* Rx Statistics */
+ uint32_t rx_no_desc;
+ uint32_t rx_allocb_fail;
+ uint32_t rx_lost_pkts;
+
+ /* Callback statistics */
+ uint32_t callbacks;
+ uint32_t dring_data_acks;
+
+} vgen_stats_t;
+
+typedef struct vgen_kstats {
+ /*
+ * Link Input/Output stats
+ */
+ kstat_named_t ipackets;
+ kstat_named_t ipackets64;
+ kstat_named_t ierrors;
+ kstat_named_t opackets;
+ kstat_named_t opackets64;
+ kstat_named_t oerrors;
+#if 0
+ kstat_named_t collisions;
+#endif
+ /*
+ * required by kstat for MIB II objects(RFC 1213)
+ */
+ kstat_named_t rbytes; /* MIB - ifInOctets */
+ kstat_named_t rbytes64;
+ kstat_named_t obytes; /* MIB - ifOutOctets */
+ kstat_named_t obytes64;
+ kstat_named_t multircv; /* MIB - ifInNUcastPkts */
+ kstat_named_t multixmt; /* MIB - ifOutNUcastPkts */
+ kstat_named_t brdcstrcv; /* MIB - ifInNUcastPkts */
+ kstat_named_t brdcstxmt; /* MIB - ifOutNUcastPkts */
+ kstat_named_t norcvbuf; /* MIB - ifInDiscards */
+ kstat_named_t noxmtbuf; /* MIB - ifOutDiscards */
+
+ /* Tx Statistics */
+ kstat_named_t tx_no_desc;
+ kstat_named_t tx_allocb_fail;
+
+ /* Rx Statistics */
+ kstat_named_t rx_no_desc;
+ kstat_named_t rx_allocb_fail;
+ kstat_named_t rx_lost_pkts;
+
+ /* Callback statistics */
+ kstat_named_t callbacks;
+ kstat_named_t dring_data_acks;
+
+} vgen_kstats_t;
+
+/* Channel information associated with a vgen-port */
+typedef struct vgen_ldc {
+
+ struct vgen_ldc *nextp; /* next ldc in the list */
+ struct vgen_port *portp; /* associated port */
+
+ /*
+ * Locks:
+ * locking hierarchy when more than one lock is held concurrently:
+ * cblock > txlock > tclock.
+ */
+ kmutex_t cblock; /* sync callback processing */
+ kmutex_t txlock; /* sync transmits */
+ kmutex_t tclock; /* tx reclaim lock */
+
+ /* channel info from ldc layer */
+ uint64_t ldc_id; /* channel number */
+ uint64_t ldc_handle; /* channel handle */
+ ldc_status_t ldc_status; /* channel status */
+
+ /* handshake info */
+ vgen_ver_t vgen_versions[VGEN_NUM_VER]; /* versions */
+ int hphase; /* handshake phase */
+ int hstate; /* handshake state bits */
+ uint32_t local_sid; /* local session id */
+ uint32_t peer_sid; /* session id of peer */
+ vgen_hparams_t local_hparams; /* local handshake params */
+ vgen_hparams_t peer_hparams; /* peer's handshake params */
+ timeout_id_t htid; /* handshake wd timeout id */
+
+ /* transmit and receive descriptor ring info */
+ ldc_dring_handle_t tx_dhandle; /* tx descriptor ring handle */
+ ldc_mem_cookie_t tx_dcookie; /* tx descriptor ring cookie */
+ ldc_dring_handle_t rx_dhandle; /* mapped rx dhandle */
+ ldc_mem_cookie_t rx_dcookie; /* rx descriptor ring cookie */
+ vnet_public_desc_t *txdp; /* transmit frame descriptors */
+ vnet_public_desc_t *txdendp; /* txd ring end */
+ vgen_private_desc_t *tbufp; /* associated tx resources */
+ vgen_private_desc_t *tbufendp; /* tbuf ring end */
+ vgen_private_desc_t *next_tbufp; /* next free tbuf */
+ vgen_private_desc_t *cur_tbufp; /* next reclaim tbuf */
+ uint64_t next_txseq; /* next tx sequence number */
+ uint32_t num_txdcookies; /* # of tx dring cookies */
+ uint32_t num_rxdcookies; /* # of rx dring cookies */
+ uint32_t next_txi; /* next tx descriptor index */
+ uint32_t num_txds; /* number of tx descriptors */
+ uint32_t reclaim_lowat; /* lowat for tx reclaim */
+ uint32_t reclaim_hiwat; /* hiwat for tx reclaim */
+ clock_t reclaim_lbolt; /* time of last tx reclaim */
+ timeout_id_t wd_tid; /* tx watchdog timeout id */
+ vnet_public_desc_t *rxdp; /* receive frame descriptors */
+ uint64_t next_rxseq; /* next expected recv seqnum */
+ uint32_t next_rxi; /* next expected recv index */
+ uint32_t num_rxds; /* number of rx descriptors */
+
+ /* misc */
+ uint32_t flags; /* flags */
+ boolean_t need_resched; /* reschedule tx */
+ boolean_t need_ldc_reset; /* ldc_reset needed */
+ boolean_t need_mcast_sync; /* sync mcast table with vsw */
+ uint32_t hretries; /* handshake retry count */
+
+ /* channel statistics */
+ vgen_stats_t *statsp; /* channel statistics */
+ kstat_t *ksp; /* channel kstats */
+
+} vgen_ldc_t;
+
+/* Channel list structure */
+typedef struct vgen_ldclist_s {
+ vgen_ldc_t *headp; /* head of the list */
+ krwlock_t rwlock; /* sync access to the list */
+ int num_ldcs; /* number of channels in the list */
+} vgen_ldclist_t;
+
+/* port information structure */
+typedef struct vgen_port {
+ struct vgen_port *nextp; /* next port in the list */
+ struct vgen *vgenp; /* associated vgen_t */
+ int port_num; /* port number */
+ vgen_ldclist_t ldclist; /* list of ldcs for this port */
+ struct ether_addr macaddr; /* mac address of peer */
+} vgen_port_t;
+
+/* port list structure */
+typedef struct vgen_portlist {
+ vgen_port_t *headp; /* head of ports */
+ vgen_port_t *tailp; /* tail */
+ krwlock_t rwlock; /* sync access to the port list */
+} vgen_portlist_t;
+
+/* vgen instance information */
+typedef struct vgen {
+ void *vnetp; /* associated vnet instance */
+ dev_info_t *vnetdip; /* dip of vnet */
+ void *vnetmacp; /* mac_t of vnet */
+ uint8_t macaddr[ETHERADDRL]; /* mac addr of vnet */
+ mac_resource_handle_t mrh; /* handle for mac_rx() */
+ kmutex_t lock; /* synchornize ops */
+ int flags; /* flags */
+ vgen_portlist_t vgenports; /* Port List */
+ mdeg_node_spec_t *mdeg_parentp;
+ mdeg_handle_t mdeg_hdl;
+ vgen_port_t *vsw_portp; /* port connected to vsw */
+ mac_t vgenmac; /* vgen mac ops */
+ struct ether_addr *mctab; /* multicast addr table */
+ uint32_t mcsize; /* allocated size of mctab */
+ uint32_t mccount; /* # of valid addrs in mctab */
+} vgen_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VNET_GEN_H */
diff --git a/usr/src/uts/sun4v/sys/vnet_mailbox.h b/usr/src/uts/sun4v/sys/vnet_mailbox.h
new file mode 100644
index 0000000000..4812b6c6a6
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vnet_mailbox.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_VNET_MAILBOX_H
+#define _SYS_VNET_MAILBOX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/vio_mailbox.h>
+#include <sys/ethernet.h>
+
+/*
+ * VNET specific Control envelopes: 0x0100 - 0x01FF
+ * type == VIO_TYPE_CTRL
+ * subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ */
+#define VNET_MCAST_INFO 0x0101
+
+/*
+ * Vnet/Vswitch device attributes information message.
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.submsgtype = VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VIO_ATTR_INFO
+ */
+typedef struct vnet_attr_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* attributes specific payload */
+ uint8_t xfer_mode; /* data transfer mode */
+ uint8_t addr_type; /* device address type */
+ uint16_t ack_freq; /* ack after rcving # of pkts */
+ uint32_t resv1; /* padding */
+
+ uint64_t addr; /* device address */
+ uint64_t mtu; /* maximum data xfer unit */
+
+ /* padding to align things */
+ uint64_t resv2[3];
+
+} vnet_attr_msg_t;
+
+/*
+ * Vnet/Vswitch enable/disable multicast address msg
+ *
+ * tag.msgtype == VIO_TYPE_CTRL
+ * tag.subtype == VIO_SUBTYPE_{INFO|ACK|NACK}
+ * tag.subtype_env == VNET_MCAST_INFO
+ */
+#define VNET_NUM_MCAST 7 /* max # of multicast addresses in the msg */
+
+typedef struct vnet_mcast_msg {
+ /* Common tag */
+ vio_msg_tag_t tag;
+
+ /* multicast address information */
+ uint8_t set; /* add if set to 1, else remove */
+ uint8_t count; /* number of addrs in the msg */
+ struct ether_addr mca[VNET_NUM_MCAST]; /* mcast addrs */
+ uint32_t resv1; /* padding */
+} vnet_mcast_msg_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VNET_MAILBOX_H */
diff --git a/usr/src/uts/sun4v/sys/vnet_proxy.h b/usr/src/uts/sun4v/sys/vnet_proxy.h
new file mode 100644
index 0000000000..7d3872fb52
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vnet_proxy.h
@@ -0,0 +1,133 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VNET_PROXY_H
+#define _VNET_PROXY_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * vnet proxy client is a low level driver which provides link specific
+ * functionality required by the vnet device. The vnet leaf driver and vnet
+ * proxy implement generic mac functionality required by the mac module as
+ * part of NEMO network stack. A vnet proxy provides these entry points
+ * as defined below in the vnet_proxy_ops structure. Note that some of the
+ * entry points may not be implemented by certain modules and will be
+ * initialized to NULL. All entry points return 0 for success and non zero
+ * for failure.
+ */
+
+typedef uint64_t vp_handle_t; /* vnet proxy handle */
+
+typedef struct vnet_proxy_ops {
+
+/*
+ * vp_start() enables the client to send and receive data and generate
+ * interrupts. In addition a client may register opaque objects to be
+ * passed during transmit. This is done by a client which provides links
+ * to specific destination mac addresses by calling vnet_add_fdb().
+ * (described below: Functions exported by vnet).
+ * vp_stop() disables the client from generating interrupts and IO.
+ * The client will also unregister any opaque objects using vnet_del_fdb().
+ */
+ int (*vp_start)(vp_handle_t vp_handle);
+ int (*vp_stop)(vp_handle_t vp_handle);
+
+/*
+ * vp_tx() is invoked to transmit a packet. The first argument points
+ * to the client specific opaque object.
+ * The vp_tx routine must return 0 if unable to send the packet (eg, due to
+ * lack of resources).
+ */
+ int (*vp_tx)(void *arg, mblk_t *mp);
+
+/*
+ * vp_resources() is called to enable the client register its receive
+ * resources.
+ */
+ int (*vp_resources)(vp_handle_t vp_handle);
+
+/*
+ * vp_multicast() is used to add/remove addresses to and from the set of
+ * multicast addresses for which the client will receive packets.
+ * If the second argument is B_TRUE then the address pointed to by the
+ * third argument should be added to the set. If the second argument is
+ * B_FALSE then the address pointed to by the third argument should be
+ * removed.
+ */
+ int (*vp_multicast)(vp_handle_t vp_handle, boolean_t add,
+ const uint8_t *mca);
+
+/*
+ * vp_promisc() is used to set the promiscuity of the client.
+ * If the second argument is B_TRUE then the client should receive all
+ * packets. If it is set to B_FALSE then only packets destined for the
+ * vnet device's unicast address and broadcast address should be received.
+ */
+ int (*vp_promisc)(vp_handle_t vp_handle, boolean_t on);
+
+/* vp_unicast() is used to set a new unicast address for the vnet device */
+ int (*vp_unicast)(vp_handle_t vp_handle, const uint8_t *mca);
+
+/* TBD: vp_statistics */
+ uint64_t (*vp_statistics)(vp_handle_t vp_handle, enum mac_stat);
+
+/* TBD: vp_ctl is used to to support client specific control commands */
+ int (*vp_ctl)(vp_handle_t vp_handle, mblk_t *mp);
+
+} vnet_proxy_ops_t;
+
+/* vnet_proxy entry point types */
+
+typedef int (*vp_start_t)(vp_handle_t);
+typedef int (*vp_stop_t)(vp_handle_t);
+typedef int (*vp_tx_t)(void *, mblk_t *);
+typedef int (*vp_resources_t)(vp_handle_t);
+typedef int (*vp_multicast_t)(vp_handle_t, boolean_t,
+ const uint8_t *);
+typedef int (*vp_promisc_t)(vp_handle_t, boolean_t);
+typedef int (*vp_unicast_t)(vp_handle_t, const uint8_t *);
+typedef uint64_t (*vp_statistics_t)(vp_handle_t, enum mac_stat);
+typedef int (*vp_ctl_t)(vp_handle_t, mblk_t *);
+
+/*
+ * The client calls this function to add/remove an entry into vnet's FBD.
+ */
+void vnet_add_fdb(void *arg, uint8_t *macaddr, vp_tx_t vp_tx, void *txarg);
+void vnet_del_fdb(void *arg, uint8_t *macaddr);
+void vnet_modify_fdb(void *arg, uint8_t *macaddr, vp_tx_t vp_tx, void *txarg);
+void vnet_add_def_rte(void *arg, vp_tx_t vp_tx, void *txarg);
+void vnet_del_def_rte(void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VNET_PROXY_H */
diff --git a/usr/src/uts/sun4v/sys/vnetmsg.h b/usr/src/uts/sun4v/sys/vnetmsg.h
new file mode 100644
index 0000000000..79ed1d4336
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vnetmsg.h
@@ -0,0 +1,81 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VNETMSG_H
+#define _VNETMSG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LM_SIGNATURE 0x564E45544C4D5347 /* "VNETLMSG" */
+
+/* lm_type (below) */
+#define LM_DATA 0x1
+#define LM_ACK 0x2
+
+/*
+ * msg protocol used for ldc_mem IO. currently, 2 cookies are supported.
+ * (In Unreliable mode LDC-maxpayload is 56 bytes).
+ */
+
+typedef struct vnet_ldc_msg {
+ uint64_t lm_signature; /* signature: "VNETLMSG" */
+ uint8_t lm_type; /* data or ack */
+ uint8_t lm_ncookies; /* # of cookies in the msg */
+ uint16_t lm_id; /* opaque id (sender) */
+ uint16_t lm_dlen; /* actual data length */
+ uint16_t lm_resv; /* reserved */
+ ldc_mem_cookie_t lm_cookie[2]; /* array of cookies */
+} vnet_ldc_msg_t;
+
+/*
+ * XXX Co-ordinate these def's with Harsha, expect that these will
+ * come from vnet header file.
+ */
+#define MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2)
+
+#define VNET_PUB_DESC_FREE 0x0
+#define VNET_PUB_DESC_READY 0x1
+#define VNET_PUB_DESC_DONE 0x2
+#define VNET_PUB_DESC_ACK 0x4
+
+#define VNET_PRIV_DESC_FREE 0x0
+#define VNET_PRIV_DESC_BUSY 0x1
+
+typedef struct vnet_public_desc {
+ uint64_t flags;
+ uint64_t ncookies;
+ ldc_mem_cookie_t memcookie[MAX_COOKIES];
+} vnet_public_desc_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VNETMSG_H */
diff --git a/usr/src/uts/sun4v/sys/vsw.h b/usr/src/uts/sun4v/sys/vsw.h
new file mode 100644
index 0000000000..d284db9dc6
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vsw.h
@@ -0,0 +1,455 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * This header file contains the basic data structures which the
+ * virtual switch (vsw) uses to communicate with its clients and
+ * the outside world.
+ *
+ * The virtual switch reads the machine description (MD) to
+ * determine how many port_t structures to create (each port_t
+ * can support communications to a single network device). The
+ * port_t's are maintained in a linked list.
+ *
+ * Each port in turn contains a number of logical domain channels
+ * (ldc's) which are inter domain communications channels which
+ * are used for passing small messages between the domains. Their
+ * may be an unlimited number of channels associated with each port,
+ * though most devices only use a single channel.
+ *
+ * The ldc is a bi-directional channel, which is divided up into
+ * two directional 'lanes', one outbound from the switch to the
+ * virtual network device, the other inbound to the switch.
+ * Depending on the type of device each lane may have seperate
+ * communication paramaters (such as mtu etc).
+ *
+ * For those network clients which use descriptor rings the
+ * rings are associated with the appropriate lane. I.e. rings
+ * which the switch exports are associated with the outbound lanes
+ * while those which the network clients are exporting to the switch
+ * are associated with the inbound lane.
+ *
+ * In diagram form the data structures look as follows:
+ *
+ * vsw instance
+ * |
+ * +----->port_t----->port_t----->port_t----->
+ * |
+ * +--->ldc_t--->ldc_t--->ldc_t--->
+ * |
+ * +--->lane_t (inbound)
+ * | |
+ * | +--->dring--->dring--->
+ * |
+ * +--->lane_t (outbound)
+ * |
+ * +--->dring--->dring--->
+ *
+ */
+
+#ifndef _VSW_H
+#define _VSW_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/vio_mailbox.h>
+#include <sys/vnet_common.h>
+#include <sys/ethernet.h>
+
+/*
+ * Default message type.
+ */
+typedef struct def_msg {
+ uint64_t data[8];
+} def_msg_t;
+
+/*
+ * Currently only support one major/minor pair.
+ */
+#define VSW_NUM_VER 1
+
+typedef struct ver_sup {
+ uint32_t ver_major:16,
+ ver_minor:16;
+} ver_sup_t;
+
+/*
+ * Only support ETHER mtu at moment.
+ */
+#define VSW_MTU ETHERMAX
+
+/*
+ * Lane states.
+ */
+#define VSW_LANE_INACTIV 0x0 /* No params set for lane */
+
+#define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */
+#define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */
+#define VSW_VER_ACK_RECV 0x4
+#define VSW_VER_ACK_SENT 0x8
+#define VSW_VER_NACK_RECV 0x10
+#define VSW_VER_NACK_SENT 0x20
+
+#define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */
+#define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */
+#define VSW_ATTR_ACK_SENT 0x100
+#define VSW_ATTR_ACK_RECV 0x200
+#define VSW_ATTR_NACK_SENT 0x400
+#define VSW_ATTR_NACK_RECV 0x800
+
+#define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */
+#define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */
+#define VSW_DRING_ACK_SENT 0x4000
+#define VSW_DRING_ACK_RECV 0x8000
+#define VSW_DRING_NACK_SENT 0x10000
+#define VSW_DRING_NACK_RECV 0x20000
+
+#define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */
+#define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */
+#define VSW_RDX_ACK_SENT 0x100000
+#define VSW_RDX_ACK_RECV 0x200000
+#define VSW_RDX_NACK_SENT 0x400000
+#define VSW_RDX_NACK_RECV 0x800000
+
+#define VSW_MCST_INFO_SENT 0x1000000
+#define VSW_MCST_INFO_RECV 0x2000000
+#define VSW_MCST_ACK_SENT 0x4000000
+#define VSW_MCST_ACK_RECV 0x8000000
+#define VSW_MCST_NACK_SENT 0x10000000
+#define VSW_MCST_NACK_RECV 0x20000000
+
+#define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */
+
+/* Handshake milestones */
+#define VSW_MILESTONE0 0x1 /* ver info exchanged */
+#define VSW_MILESTONE1 0x2 /* attribute exchanged */
+#define VSW_MILESTONE2 0x4 /* dring info exchanged */
+#define VSW_MILESTONE3 0x8 /* rdx exchanged */
+#define VSW_MILESTONE4 0x10 /* handshake complete */
+
+/*
+ * Lane direction (relative to ourselves).
+ */
+#define INBOUND 0x1
+#define OUTBOUND 0x2
+
+/* Peer session id received */
+#define VSW_PEER_SESSION 0x1
+
+/*
+ * Maximum number of consecutive reads of data from channel
+ */
+#define VSW_MAX_CHAN_READ 50
+
+/*
+ * LDC queue length
+ */
+#define VSW_LDC_QLEN 1024
+
+/*
+ * Currently only support one ldc per port.
+ */
+#define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */
+
+/*
+ * Used for port add/deletion.
+ */
+#define VSW_PORT_UPDATED 0x1
+
+#define LDC_TX_SUCCESS 0 /* ldc transmit success */
+#define LDC_TX_FAILURE 1 /* ldc transmit failure */
+#define LDC_TX_NORESOURCES 2 /* out of descriptors */
+
+/* ID of the source of a frame being switched */
+#define VSW_PHYSDEV 1 /* physical device associated */
+#define VSW_VNETPORT 2 /* port connected to vnet (over ldc) */
+#define VSW_LOCALDEV 4 /* vsw configured as an eth interface */
+
+/*
+ * Descriptor ring info
+ *
+ * Each descriptor element has a pre-allocated data buffer
+ * associated with it, into which data being transmitted is
+ * copied. By pre-allocating we speed up the copying process.
+ * The buffer is re-used once the peer has indicated that it is
+ * finished with the descriptor.
+ */
+#define VSW_RING_NUM_EL 512 /* Num of entries in ring */
+#define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */
+#define VSW_PRIV_SIZE sizeof (vnet_private_desc_t)
+#define VSW_PUB_SIZE sizeof (vnet_public_desc_t)
+
+#define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2)
+
+/*
+ * Private descriptor
+ */
+typedef struct vsw_private_desc {
+ uint64_t dstate;
+ vnet_public_desc_t *descp;
+ ldc_mem_handle_t memhandle;
+ void *datap;
+ uint64_t datalen;
+ uint64_t ncookies;
+ ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES];
+ int bound;
+} vsw_private_desc_t;
+
+/*
+ * Descriptor ring structure
+ */
+typedef struct dring_info {
+ struct dring_info *next; /* next ring in chain */
+ kmutex_t dlock;
+ uint32_t num_descriptors;
+ uint32_t descriptor_size;
+ uint32_t options;
+ uint32_t ncookies;
+ ldc_mem_cookie_t cookie[1];
+
+ ldc_dring_handle_t handle;
+ uint64_t ident; /* identifier sent to peer */
+ uint64_t end_idx; /* last idx processed */
+
+ /*
+ * base address of private and public portions of the
+ * ring (where appropriate), and data block.
+ */
+ void *pub_addr; /* base of public section */
+ void *priv_addr; /* base of private section */
+ void *data_addr; /* base of data section */
+ size_t data_sz; /* size of data section */
+} dring_info_t;
+
+/*
+ * Each ldc connection is comprised of two lanes, incoming
+ * from a peer, and outgoing to that peer. Each lane shares
+ * common ldc parameters and also has private lane-specific
+ * parameters.
+ */
+typedef struct lane {
+ uint64_t lstate; /* Lane state */
+ uint32_t ver_major:16, /* Version major number */
+ ver_minor:16; /* Version minor number */
+ uint64_t seq_num; /* Sequence number */
+ uint64_t mtu; /* ETHERMTU */
+ uint64_t addr; /* Unique physical address */
+ uint8_t addr_type; /* Only MAC address at moment */
+ uint8_t xfer_mode; /* Dring or Pkt based */
+ uint8_t ack_freq; /* Only non zero for Pkt based xfer */
+ dring_info_t *dringp; /* List of drings for this lane */
+} lane_t;
+
+/* channel drain states */
+#define VSW_LDC_INIT 0x1 /* Initial non-drain state */
+#define VSW_LDC_DRAINING 0x2 /* Channel draining */
+
+/* ldc information associated with a vsw-port */
+typedef struct vsw_ldc {
+ struct vsw_ldc *ldc_next; /* next ldc in the list */
+ struct vsw_port *ldc_port; /* associated port */
+ struct vsw *ldc_vswp; /* associated vsw */
+ kmutex_t ldc_cblock; /* sync callback processing */
+ kmutex_t ldc_txlock; /* sync transmits */
+ uint64_t ldc_id; /* channel number */
+ ldc_handle_t ldc_handle; /* channel handle */
+ kmutex_t drain_cv_lock;
+ kcondvar_t drain_cv; /* channel draining */
+ int drain_state;
+ uint32_t hphase; /* handshake phase */
+ int hcnt; /* # handshake attempts */
+ ldc_status_t ldc_status; /* channel status */
+ uint64_t local_session; /* Our session id */
+ uint64_t peer_session; /* Our peers session id */
+ uint8_t session_status; /* Session recv'd, sent */
+ kmutex_t hss_lock;
+ uint32_t hss_id; /* Handshake session id */
+ uint64_t next_ident; /* Next dring ident # to use */
+ lane_t lane_in; /* Inbound lane */
+ lane_t lane_out; /* Outbound lane */
+ uint8_t dev_class; /* Peer device class */
+} vsw_ldc_t;
+
+/* list of ldcs per port */
+typedef struct vsw_ldc_list {
+ vsw_ldc_t *head; /* head of the list */
+ krwlock_t lockrw; /* sync access(rw) to the list */
+ int num_ldcs; /* number of ldcs in the list */
+} vsw_ldc_list_t;
+
+/* multicast addresses port is interested in */
+typedef struct mcst_addr {
+ struct mcst_addr *nextp;
+ uint64_t addr;
+} mcst_addr_t;
+
+/* Port detach states */
+#define VSW_PORT_INIT 0x1 /* Initial non-detach state */
+#define VSW_PORT_DETACHING 0x2 /* In process of being detached */
+#define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */
+
+/* port information associated with a vsw */
+typedef struct vsw_port {
+ int p_instance; /* port instance */
+ struct vsw_port *p_next; /* next port in the list */
+ struct vsw *p_vswp; /* associated vsw */
+ vsw_ldc_list_t p_ldclist; /* list of ldcs for this port */
+
+ kmutex_t tx_lock; /* transmit lock */
+ int (*transmit)(vsw_ldc_t *, mblk_t *);
+
+ int state; /* port state */
+ kmutex_t state_lock;
+ kcondvar_t state_cv;
+
+ int ref_cnt; /* # of active references */
+ kmutex_t ref_lock;
+ kcondvar_t ref_cv;
+
+ kmutex_t mca_lock; /* multicast lock */
+ mcst_addr_t *mcap; /* list of multicast addrs */
+
+ /*
+ * mac address of the port & connected device
+ */
+ struct ether_addr p_macaddr;
+} vsw_port_t;
+
+/* list of ports per vsw */
+typedef struct vsw_port_list {
+ vsw_port_t *head; /* head of the list */
+ krwlock_t lockrw; /* sync access(rw) to the list */
+ int num_ports; /* number of ports in the list */
+} vsw_port_list_t;
+
+/*
+ * Taskq control message
+ */
+typedef struct vsw_ctrl_task {
+ vsw_ldc_t *ldcp;
+ def_msg_t pktp;
+ uint32_t hss_id;
+} vsw_ctrl_task_t;
+
+/*
+ * Number of hash chains in the multicast forwarding database.
+ */
+#define VSW_NCHAINS 8
+
+/*
+ * State of interface if switch plumbed as network device.
+ */
+#define VSW_IF_UP 0x1 /* Interface UP */
+#define VSW_IF_PROMISC 0x2 /* Interface in promiscious mode */
+
+#define VSW_U_P(state) \
+ (state == (VSW_IF_UP | VSW_IF_PROMISC))
+
+/*
+ * Switching modes.
+ */
+#define VSW_LAYER2 0x1 /* Layer 2 - MAC switching */
+#define VSW_LAYER2_PROMISC 0x2 /* Layer 2 + promisc mode */
+#define VSW_LAYER3 0x4 /* Layer 3 - IP switching */
+
+#define NUM_SMODES 3 /* number of switching modes */
+
+/*
+ * Bits indicating which properties we've read from MD.
+ */
+#define VSW_MD_PHYSNAME 0x1
+#define VSW_MD_MACADDR 0x2
+#define VSW_MD_SMODE 0x4
+
+/*
+ * vsw instance state information.
+ */
+typedef struct vsw {
+ int instance; /* instance # */
+ dev_info_t *dip; /* associated dev_info */
+ struct vsw *next; /* next in list */
+ char physname[LIFNAMSIZ]; /* phys-dev */
+ uint8_t smode[NUM_SMODES]; /* switching mode */
+ int smode_idx; /* curr pos in smode array */
+ uint8_t mdprops; /* bitmask of props found */
+ vsw_port_list_t plist; /* associated ports */
+ ddi_taskq_t *taskq_p; /* VIO ctrl msg taskq */
+ mod_hash_t *fdb; /* forwarding database */
+
+ mod_hash_t *mfdb; /* multicast FDB */
+ krwlock_t mfdbrw; /* rwlock for mFDB */
+
+ /* mac layer */
+ mac_handle_t mh;
+ mac_rx_handle_t mrh;
+ mac_notify_handle_t mnh;
+ const mac_txinfo_t *txinfo; /* MAC tx routine */
+
+ /* Initial promisc setting of interface */
+ boolean_t init_promisc;
+
+ /* Machine Description updates */
+ mdeg_node_spec_t *inst_spec;
+ mdeg_handle_t mdeg_hdl;
+
+ /* if configured as an ethernet interface */
+ mac_t *if_macp; /* MAC structure */
+ mac_resource_handle_t if_mrh;
+ struct ether_addr if_addr; /* interface address */
+ krwlock_t if_lockrw;
+ uint8_t if_state; /* interface state */
+
+ /* multicast addresses when configured as eth interface */
+ kmutex_t mca_lock; /* multicast lock */
+ mcst_addr_t *mcap; /* list of multicast addrs */
+} vsw_t;
+
+
+/*
+ * Ethernet broadcast address definition.
+ */
+static struct ether_addr etherbroadcastaddr = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+#define IS_BROADCAST(ehp) \
+ (ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
+#define IS_MULTICAST(ehp) \
+ ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
+
+#define READ_ENTER(x) rw_enter(x, RW_READER)
+#define WRITE_ENTER(x) rw_enter(x, RW_WRITER)
+#define RW_EXIT(x) rw_exit(x)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VSW_H */
diff --git a/usr/src/uts/sun4v/sys/vsw_fdb.h b/usr/src/uts/sun4v/sys/vsw_fdb.h
new file mode 100644
index 0000000000..7f155cc6f7
--- /dev/null
+++ b/usr/src/uts/sun4v/sys/vsw_fdb.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _VSW_FDB_H
+#define _VSW_FDB_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Convert ethernet (mac) address to hash table key.
+ */
+#define KEY_HASH(key, addr) \
+ (key = ((((uint64_t)addr.ether_addr_octet[0]) << 40) | \
+ (((uint64_t)addr.ether_addr_octet[1]) << 32) | \
+ (((uint64_t)addr.ether_addr_octet[2]) << 24) | \
+ (((uint64_t)addr.ether_addr_octet[3]) << 16) | \
+ (((uint64_t)addr.ether_addr_octet[4]) << 8) | \
+ ((uint64_t)addr.ether_addr_octet[5])));
+
+/*
+ * Multicast forwarding database (mFDB) is a hashtable
+ * keyed off the mac address, with the value being a linked
+ * list of mfdb_ent_t structures, each of which is a destination
+ * (either a vsw_port or the vsw instance itself when plumbed as
+ * a network device) to which the multicast pkt should be forwarded.
+ */
+typedef struct mfdb_ent {
+ struct mfdb_ent *nextp; /* next entry in list */
+ void *d_addr; /* address of dest */
+ uint8_t d_type; /* destination type */
+} mfdb_ent_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VSW_FDB_H */
diff --git a/usr/src/uts/sun4v/vcc/Makefile b/usr/src/uts/sun4v/vcc/Makefile
new file mode 100644
index 0000000000..96f860045c
--- /dev/null
+++ b/usr/src/uts/sun4v/vcc/Makefile
@@ -0,0 +1,106 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# uts/sun4v/vcc/Makefile
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the vcc driver kernel module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vcc
+OBJECTS = $(VCC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VCC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# compiler failes with not reached statements
+#
+CERRWARN += -erroff=E_STATEMENT_NOT_REACHED
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/ldc -Nmisc/platsvc
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/vdc/Makefile b/usr/src/uts/sun4v/vdc/Makefile
new file mode 100644
index 0000000000..b20f06c368
--- /dev/null
+++ b/usr/src/uts/sun4v/vdc/Makefile
@@ -0,0 +1,108 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/sun4v/vdc/Makefile
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the vdc driver module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vdc
+OBJECTS = $(VDC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VDC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+CFLAGS += -errwarn=%all
+
+LDFLAGS += -dy -Nmisc/ldc -Nmisc/platsvc
+
+#
+# Re-enable C99 compilation to use stack allocation of variable-sized arrays.
+# According to usr/src/uts/Makefile.uts, C99 is disabled until a problem seen
+# on x86 machines can be fully diagnosed; presumably a sun4v (i.e., SPARC)
+# module should be "safe". Furthermore, only the variable-sized array
+# extension is needed/used.
+#
+C99MODE = $(C99_ENABLE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/vds/Makefile b/usr/src/uts/sun4v/vds/Makefile
new file mode 100644
index 0000000000..397ebc4309
--- /dev/null
+++ b/usr/src/uts/sun4v/vds/Makefile
@@ -0,0 +1,106 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/sun4v/vds/Makefile
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of the vds driver module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vds
+OBJECTS = $(VDS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VDS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/ldc -Nmisc/platsvc
+
+#
+# Manually turn on C99 compilation until the sync with snv_38 re-enables it
+#
+C99MODE = $(C99_ENABLE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/vldc/Makefile b/usr/src/uts/sun4v/vldc/Makefile
new file mode 100644
index 0000000000..c36cf42690
--- /dev/null
+++ b/usr/src/uts/sun4v/vldc/Makefile
@@ -0,0 +1,101 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+# uts/sun4v/vldc/Makefile
+#
+# This makefile drives the production of the vldc driver module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vldc
+OBJECTS = $(VLDC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VLDC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/ldc -Nmisc/platsvc
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/vnet/Makefile b/usr/src/uts/sun4v/vnet/Makefile
new file mode 100644
index 0000000000..2eed19f4bc
--- /dev/null
+++ b/usr/src/uts/sun4v/vnet/Makefile
@@ -0,0 +1,105 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# uts/sun4v/vnet/Makefile
+#
+# This makefile drives the production of the vnet driver module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vnet
+OBJECTS = $(VNET_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VNET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+CFLAGS += -DVGEN_HANDLE_LOST_PKTS
+#CFLAGS += -DVGEN_USE_MAC_TX_UPDATE
+#CFLAGS += -DVGEN_REXMIT
+
+
+#
+# Driver depends on MAC & IP
+#
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/ldc -N misc/platsvc
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ
diff --git a/usr/src/uts/sun4v/vsw/Makefile b/usr/src/uts/sun4v/vsw/Makefile
new file mode 100644
index 0000000000..88fdda49e6
--- /dev/null
+++ b/usr/src/uts/sun4v/vsw/Makefile
@@ -0,0 +1,112 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+#
+# uts/sun4v/vsw/Makefile
+#
+# This makefile drives the production of the vsw driver module.
+#
+# sun4v implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = vsw
+OBJECTS = $(VSW_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VSW_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sun4v/Makefile.sun4v
+
+#
+# Override defaults to build a unique, local modstubs.o.
+#
+MODSTUBS_DIR = $(OBJS_DIR)
+
+CLEANFILES += $(MODSTUBS_O)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# Module dependencies
+#
+LDFLAGS += -dy -Nmisc/ldc -Nmisc/mac -Nmisc/platsvc
+
+#
+# Re-enable C99 compilation to use stack allocation of variable-sized arrays.
+# According to usr/src/uts/Makefile.uts, C99 is disabled until a problem seen
+# on x86 machines can be fully diagnosed; presumably a sun4v (i.e., SPARC)
+# module should be "safe". Furthermore, only the variable-sized array
+# extension is needed/used.
+#
+# C99 mode also gives us macros such as __func__
+#
+C99MODE = $(99_ENABLE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/$(PLATFORM)/Makefile.targ