summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86pc/io/vmm/vmm_zsd.c
blob: 0271cc339e23b5ad33316a1e0aa54ea0519cf987 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright (c) 2018, Joyent, Inc.
 */

#include <sys/cpuvar.h>
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/ksynch.h>
#include <sys/list.h>
#include <sys/types.h>
#include <sys/vmm.h>
#include <sys/vmm_impl.h>
#include <sys/zone.h>

/*
 * zone specific data
 *
 * Zone specific data is used to keep an association between zones and the vmm
 * instances that may be running in them.  This is used to ensure that vmm
 * instances do not outlive their parent zone.
 *
 * Locking strategy
 *
 * The global vmm_zsd_lock is held while modifying vmm_zsd_list.
 *
 * The per zone vz_lock in vmm_zsd_t is held while reading or writing anything
 * within in vmm_zsd_t instance.  This is important to ensure that there's not
 * an accidental VM creating as a zone is going down.
 */

/*
 * One of these per zone.
 */
struct vmm_zsd {
	list_t		vz_vmms;	/* vmm instances in the zone */
	list_node_t	vz_linkage;	/* link to other zones */
	boolean_t	vz_active;	/* B_FALSE early in shutdown callback */
	zoneid_t	vz_zoneid;
	kmutex_t	vz_lock;
};

static kmutex_t vmm_zsd_lock;		/* Protects vmm_zsd_list */
static list_t vmm_zsd_list;		/* Linkage between all zsd instances */

static zone_key_t vmm_zsd_key;

int
vmm_zsd_add_vm(vmm_softc_t *sc)
{
	vmm_zsd_t *zsd;

	ASSERT(sc->vmm_zone != NULL);

	mutex_enter(&vmm_zsd_lock);

	for (zsd = list_head(&vmm_zsd_list); zsd != NULL;
	    zsd = list_next(&vmm_zsd_list, zsd)) {
		if (zsd->vz_zoneid == sc->vmm_zone->zone_id) {
			break;
		}
	}

	VERIFY(zsd != NULL);
	mutex_exit(&vmm_zsd_lock);

	mutex_enter(&zsd->vz_lock);
	if (!zsd->vz_active) {
		mutex_exit(&zsd->vz_lock);
		return (ENOSYS);
	}

	sc->vmm_zsd = zsd;
	list_insert_tail(&zsd->vz_vmms, sc);

	mutex_exit(&zsd->vz_lock);

	return (0);
}

void
vmm_zsd_rem_vm(vmm_softc_t *sc)
{
	vmm_zsd_t *zsd = sc->vmm_zsd;

	mutex_enter(&zsd->vz_lock);

	list_remove(&zsd->vz_vmms, sc);
	sc->vmm_zsd = NULL;

	mutex_exit(&zsd->vz_lock);
}

static void *
vmm_zsd_create(zoneid_t zid)
{
	vmm_zsd_t *zsd;
	zone_t *zone;

	zsd = kmem_zalloc(sizeof (*zsd), KM_SLEEP);

	list_create(&zsd->vz_vmms, sizeof (vmm_softc_t),
	    offsetof(vmm_softc_t, vmm_zsd_linkage));

	zsd->vz_zoneid = zid;

	mutex_init(&zsd->vz_lock, NULL, MUTEX_DEFAULT, NULL);

	/*
	 * If the vmm module is loaded while this zone is in the midst of
	 * shutting down, vmm_zsd_destroy() may be called without
	 * vmm_zsd_shutdown() ever being called. If it is shutting down, there
	 * is no sense in letting any in-flight VM creation succeed so set
	 * vz_active accordingly.
	 *
	 * zone_find_by_id_nolock() is used rather than zone_find_by_id()
	 * so that the zone is returned regardless of state.
	 */
	zone = zone_find_by_id_nolock(zid);
	VERIFY(zone != NULL);
	zsd->vz_active = zone_status_get(zone) < ZONE_IS_SHUTTING_DOWN;

	mutex_enter(&vmm_zsd_lock);
	list_insert_tail(&vmm_zsd_list, zsd);
	mutex_exit(&vmm_zsd_lock);

	return (zsd);
}

/*
 * Tells all runing VMs in the zone to poweroff.  This does not reclaim guest
 * resources (memory, etc.).
 */
static void
vmm_zsd_shutdown(zoneid_t zid, void *data)
{
	vmm_zsd_t *zsd = data;
	vmm_softc_t *sc;

	mutex_enter(&zsd->vz_lock);

	/*
	 * This may already be B_FALSE. See comment in vmm_zsd_create(). If it
	 * is already B_FALSE we will take a quick trip through the empty list.
	 */
	zsd->vz_active = B_FALSE;

	for (sc = list_head(&zsd->vz_vmms); sc != NULL;
	    sc = list_next(&zsd->vz_vmms, sc)) {
		/* Send a poweroff to the VM, whether running or not. */
		(void) vm_suspend(sc->vmm_vm, VM_SUSPEND_POWEROFF);
	}
	mutex_exit(&zsd->vz_lock);
}

/*
 * Reap all VMs that remain and free up guest resources.
 */
static void
vmm_zsd_destroy(zoneid_t zid, void *data)
{
	vmm_zsd_t *zsd = data;
	vmm_softc_t *sc;

	mutex_enter(&vmm_zsd_lock);
	list_remove(&vmm_zsd_list, zsd);
	mutex_exit(&vmm_zsd_lock);

	mutex_enter(&zsd->vz_lock);
	ASSERT(!zsd->vz_active);

	while ((sc = list_remove_head(&zsd->vz_vmms)) != NULL) {
		int err;

		/*
		 * This frees all resources associated with the vm, including
		 * sc.
		 */
		err = vmm_do_vm_destroy(sc, B_FALSE);
		ASSERT3S(err, ==, 0);
	}

	mutex_exit(&zsd->vz_lock);
	mutex_destroy(&zsd->vz_lock);

	kmem_free(zsd, sizeof (*zsd));
}

void
vmm_zsd_init(void)
{
	mutex_init(&vmm_zsd_lock, NULL, MUTEX_DEFAULT, NULL);
	list_create(&vmm_zsd_list, sizeof (vmm_zsd_t),
	    offsetof(vmm_zsd_t, vz_linkage));
	zone_key_create(&vmm_zsd_key, vmm_zsd_create, vmm_zsd_shutdown,
	    vmm_zsd_destroy);
}

void
vmm_zsd_fini(void)
{
	/* Calls vmm_zsd_destroy() on all zones. */
	zone_key_delete(vmm_zsd_key);
	ASSERT(list_is_empty(&vmm_zsd_list));

	list_destroy(&vmm_zsd_list);
	mutex_destroy(&vmm_zsd_lock);
}