summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86xpv/os/xen_mmu.c
blob: 5b4fe17e81a823a7f545e0fdda84eba54d5b9f09 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */


#include <sys/mach_mmu.h>
#include <sys/machsystm.h>
#include <sys/cmn_err.h>
#include <sys/promif.h>
#include <sys/hypervisor.h>
#include <sys/bootconf.h>
#include <sys/ontrap.h>
#include <sys/rwlock.h>
#include <sys/sysmacros.h>
#include <vm/seg_kmem.h>
#include <vm/kboot_mmu.h>
#include <vm/hat_pte.h>
#include <vm/hat.h>
#include <vm/htable.h>
#include <vm/hat_i86.h>

start_info_t *xen_info;
ulong_t mfn_count;
mfn_t *mfn_list;
mfn_t *mfn_list_pages;		/* pages that make a table of mfn's */
				/* that make up the pa_to_ma table */
mfn_t *mfn_list_pages_page;	/* page of mfn's for mfn_list_pages */
mfn_t cached_max_mfn;
uintptr_t xen_virt_start;
pfn_t *mfn_to_pfn_mapping;
caddr_t xb_addr;		/* virtual addr for the store_mfn page */


/*
 * We need to prevent migration or suspension of a domU while it's
 * manipulating MFN values, as the MFN values will spontaneously
 * change. The next 4 routines provide a mechanism for that.
 * The basic idea is to use reader/writer mutex, readers are any thread
 * that is manipulating MFNs. Only the thread which is going to actually call
 * HYPERVISOR_suspend() will become a writer.
 *
 * Since various places need to manipulate MFNs and also call the HAT,
 * we track if a thread acquires reader status and allow it to recursively
 * do so again. This prevents deadlocks if a migration request
 * is started and waits for some reader, but then the previous reader needs
 * to call into the HAT.
 */
#define	NUM_M2P_LOCKS 128
static struct {
	krwlock_t m2p_rwlock;
	char m2p_pad[64 - sizeof (krwlock_t)];	/* 64 byte cache line size */
} m2p_lock[NUM_M2P_LOCKS];

#define	XM2P_HASH	((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1))

void
xen_block_migrate(void)
{
	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
	    ++curthread->t_xpvcntr == 1)
		rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
}

void
xen_allow_migrate(void)
{
	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
	    --curthread->t_xpvcntr == 0)
		rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
}

void
xen_start_migrate(void)
{
	int i;

	ASSERT(curthread->t_xpvcntr == 0);
	++curthread->t_xpvcntr; /* this allows calls into HAT */
	for (i = 0; i < NUM_M2P_LOCKS; ++i)
		rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
}

void
xen_end_migrate(void)
{
	int i;

	for (i = 0; i < NUM_M2P_LOCKS; ++i)
		rw_exit(&m2p_lock[i].m2p_rwlock);
	ASSERT(curthread->t_xpvcntr == 1);
	--curthread->t_xpvcntr;
}

/*ARGSUSED*/
void
set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
{
	mmu_update_t t;
	maddr_t mtable = pa_to_ma(table);
	int retcnt;

	t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
	t.val = pteval;
	if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
		bop_panic("HYPERVISOR_mmu_update() failed");
}

/*
 * The start_info_t and mfn_list are initially mapped in low "boot" memory.
 * Each has a page aligned address and size. We relocate them up into the
 * kernel's normal address space at this point in time. We also create
 * the arrays that let the hypervisor suspend/resume a domain.
 */
void
xen_relocate_start_info(void)
{
	maddr_t mach_addr;
	size_t sz;
	size_t sz2;
	offset_t off;
	uintptr_t addr;
	uintptr_t old;
	int i, j;

	/*
	 * In dom0, we have to account for the console_info structure
	 * which might immediately follow the start_info in memory.
	 */
	sz = sizeof (start_info_t);
	if (DOMAIN_IS_INITDOMAIN(xen_info) &&
	    xen_info->console.dom0.info_off >= sizeof (start_info_t)) {
		sz += xen_info->console.dom0.info_off - sizeof (start_info_t) +
		    xen_info->console.dom0.info_size;
	}
	sz = P2ROUNDUP(sz, MMU_PAGESIZE);
	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
	for (off = 0; off < sz; off += MMU_PAGESIZE) {
		mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn(
		    (caddr_t)xen_info + off)));
		kbm_map_ma(mach_addr + off, addr + off, 0);
	}
	boot_mapin((caddr_t)addr, sz);
	old = (uintptr_t)xen_info;
	xen_info = (start_info_t *)addr;
	for (off = 0; off < sz; off += MMU_PAGESIZE)
		kbm_unmap(old + off);

	/*
	 * Relocate the mfn_list, any number of pages.
	 */
	sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE);
	addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0,
	    0, 0, 0, VM_SLEEP);
	for (off = 0; off < sz; off += MMU_PAGESIZE) {
		mach_addr =
		    pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off)));
		kbm_map_ma(mach_addr, addr + off, 0);
	}
	boot_mapin((caddr_t)addr, sz);
	old = (uintptr_t)mfn_list;
	mfn_list = (mfn_t *)addr;
	xen_info->mfn_list = (mfn_t)addr;
	for (off = 0; off < sz; off += MMU_PAGESIZE)
		kbm_unmap(old + off);

	/*
	 * Create the lists of mfn_list pages needed by suspend/resume.
	 * Note we skip this for domain 0 as it can't suspend/resume.
	 */
	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
		sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE);
		mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP);
		mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP);
		i = 0;
		for (off = 0; off < sz; off += MMU_PAGESIZE) {
			j = mmu_btop(off);
			if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
				mfn_list_pages_page[i++] =
				    pfn_to_mfn(va_to_pfn(&mfn_list_pages[j]));
			}
			mfn_list_pages[j] =
			    pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off));
		}
		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
		    pfn_to_mfn(va_to_pfn(mfn_list_pages_page));
		HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages;
	}

	/*
	 * Remap the shared info (for I/O) into high memory, too.
	 */
	sz = MMU_PAGESIZE;
	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
	kbm_map_ma(xen_info->shared_info, addr, 0);
	/* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */
	old = (uintptr_t)HYPERVISOR_shared_info;
	HYPERVISOR_shared_info = (void *)addr;
	kbm_unmap(old);

	/*
	 * Remap the console info into high memory, too.
	 */
	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
		sz = MMU_PAGESIZE;
		addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
		kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0);
		boot_mapin((caddr_t)addr, sz);
		old = (uintptr_t)HYPERVISOR_console_page;
		HYPERVISOR_console_page = (void *)addr;
		kbm_unmap(old);
	} else {
		HYPERVISOR_console_page = NULL;
	}

	/*
	 * On domUs we need to have the xenbus page (store_mfn) mapped into
	 * the kernel. This is referenced as xb_addr.
	 */
	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
		xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
		kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
		    (uintptr_t)xb_addr, 0);
		boot_mapin(xb_addr, MMU_PAGESIZE);
	}
}

/*
 * Generate the pfn value to use for a foreign mfn.
 */
pfn_t
xen_assign_pfn(mfn_t mfn)
{
	pfn_t pfn;

#ifdef DEBUG
	/*
	 * make sure this MFN isn't in our list of MFNs
	 */
	on_trap_data_t otd;
	uint_t	on_trap_ready = (t0.t_stk != NULL);

	if (on_trap_ready) {
		if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
			pfn = mfn_to_pfn_mapping[mfn];
			if (pfn < mfn_count && mfn_list[pfn] == mfn)
				panic("xen_assign_pfn() mfn belongs to us");
		}
		no_trap();
	}
#endif /* DEBUG */

	if (mfn == MFN_INVALID)
		panic("xen_assign_pfn(MFN_INVALID) not allowed");
	pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
	if (pfn == mfn)
		panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set");
	return (pfn);
}

void
xen_release_pfn(pfn_t pfn)
{
	if (pfn == PFN_INVALID)
		panic("xen_release_pfn(PFN_INVALID) not allowed");
	if ((pfn & PFN_IS_FOREIGN_MFN) == 0)
		panic("mfn high bit not set");
}

uint_t
pfn_is_foreign(pfn_t pfn)
{
	if (pfn == PFN_INVALID)
		return (0);
	return ((pfn & PFN_IS_FOREIGN_MFN) != 0);
}

pfn_t
pte2pfn(x86pte_t pte, level_t l)
{
	mfn_t mfn = PTE2MFN(pte, l);

	if ((pte & PT_SOFTWARE) >= PT_FOREIGN)
		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
	return (mfn_to_pfn(mfn));
}

mfn_t
pfn_to_mfn(pfn_t pfn)
{
	if (pfn == PFN_INVALID)
		panic("pfn_to_mfn(PFN_INVALID) not allowed");

	if (pfn & PFN_IS_FOREIGN_MFN)
		return (pfn & ~PFN_IS_FOREIGN_MFN);

	if (pfn >= mfn_count)
		panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn);

	return (mfn_list[pfn]);
}

/*
 * This routine translates an MFN back into the corresponding PFN value.
 * It has to be careful since the mfn_to_pfn_mapping[] might fault
 * as that table is sparse. It also has to check for non-faulting, but out of
 * range that exceed the table.
 */
pfn_t
mfn_to_pfn(mfn_t mfn)
{
	pfn_t pfn;
	on_trap_data_t otd;
	uint_t	on_trap_ready = (t0.t_stk != NULL);

	/*
	 * Cleared at a suspend or migrate
	 */
	if (cached_max_mfn == 0)
		cached_max_mfn =
		    HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);

	if (cached_max_mfn < mfn)
		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);

	if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) {
		pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
	} else {
		pfn = mfn_to_pfn_mapping[mfn];

		if (pfn == PFN_INVALID || pfn >= mfn_count ||
		    pfn_to_mfn(pfn) != mfn)
			pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
	}

	if (on_trap_ready)
		no_trap();

	/*
	 * If khat_running is set then we should be checking
	 * in domUs that migration is blocked while using the
	 * mfn_to_pfn_mapping[] table.
	 */
	ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) ||
	    rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock));

	return (pfn);
}

/*
 * From a pseudo-physical address, find the corresponding machine address.
 */
maddr_t
pa_to_ma(paddr_t pa)
{
	mfn_t mfn = pfn_to_mfn(mmu_btop(pa));

	if (mfn == MFN_INVALID)
		panic("pa_to_ma() got MFN_INVALID");
	return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET));
}

/*
 * From a machine address, find the corresponding pseudo-physical address.
 */
paddr_t
ma_to_pa(maddr_t ma)
{
	pfn_t pfn = mfn_to_pfn(mmu_btop(ma));

	if (pfn == PFN_INVALID)
		panic("ma_to_pa() got PFN_INVALID");
	return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET));
}

/*
 * When calling reassign_pfn(), the page must be (at least) read locked
 * to make sure swrand does not try to grab it.
 */
#ifdef DEBUG
#define	CHECK_PAGE_LOCK(pfn)	{			\
	page_t *pp = page_numtopp_nolock(pfn);		\
	if ((pp != NULL) && (!PAGE_LOCKED(pp))) {	\
		panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \
		    pfn);				\
	}						\
}
#else	/* DEBUG */
#define	CHECK_PAGE_LOCK(pfn)
#endif	/* DEBUG */

/*
 * Reassign a new machine page to back a physical address.
 */
void
reassign_pfn(pfn_t pfn, mfn_t mfn)
{
	int mmu_update_return;
	mmu_update_t t;
	extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t);

	ASSERT(pfn != PFN_INVALID);
	ASSERT(!pfn_is_foreign(pfn));

	ASSERT(pfn < mfn_count);
	update_contig_pfnlist(pfn, mfn_list[pfn], mfn);
	if (mfn == MFN_INVALID) {
		CHECK_PAGE_LOCK(pfn);
		if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0)
			panic("reassign_pfn(): failed to remove kpm mapping");
		mfn_list[pfn] = mfn;
		return;
	}

	/*
	 * Verify that previously given away pages are still page locked.
	 */
	if (mfn_list[pfn] == MFN_INVALID) {
		CHECK_PAGE_LOCK(pfn);
	}
	mfn_list[pfn] = mfn;

	t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE;
	t.val = pfn;

	if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF))
		panic("HYPERVISOR_mmu_update() failed");
	ASSERT(mmu_update_return == 1);

	if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0)
		panic("reassign_pfn(): failed to enable kpm mapping");
}