summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86pc/vm/hat_pte.h
blob: b65a69cb517ecd09b2f2b9f04eb970086ad8d8e4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 * Copyright 2018 Joyent, Inc.
 */

#ifndef	_VM_HAT_PTE_H
#define	_VM_HAT_PTE_H

#ifdef	__cplusplus
extern "C" {
#endif

#include <sys/types.h>
#include <sys/mach_mmu.h>

/*
 * macros to get/set/clear the PTE fields
 */
#define	PTE_SET(p, f)	((p) |= (f))
#define	PTE_CLR(p, f)	((p) &= ~(x86pte_t)(f))
#define	PTE_GET(p, f)	((p) & (f))

/*
 * Handy macro to check if a pagetable entry or pointer is valid
 */
#define	PTE_ISVALID(p)		PTE_GET(p, PT_VALID)

/*
 * Does a PTE map a large page.
 */
#define	PTE_IS_LGPG(p, l)	((l) > 0 && PTE_GET((p), PT_PAGESIZE))

/*
 * does this PTE represent a page (not a pointer to another page table)?
 */
#define	PTE_ISPAGE(p, l)	\
	(PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))

/*
 * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits.
 * On the 64 bit hypervisor we also have to ignore the high order
 * software bits and the global/user bit which are set/cleared
 * capriciously (by the hypervisor!)
 */
#if defined(__amd64) && defined(__xpv)
#define	PT_IGNORE	((0x7fful << 52) | PT_GLOBAL | PT_USER)
#else
#define	PT_IGNORE	(0)
#endif
#define	PTE_EQUIV(a, b)	 (((a) | (PT_IGNORE | PT_REF | PT_MOD)) == \
	((b) | (PT_IGNORE | PT_REF | PT_MOD)))

/*
 * Shorthand for converting a PTE to it's pfn.
 */
#define	PTE2MFN(p, l)	\
	mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
#ifdef __xpv
#define	PTE2PFN(p, l) pte2pfn(p, l)
#else
#define	PTE2PFN(p, l) PTE2MFN(p, l)
#endif

#define	PT_NX		(0x8000000000000000ull)
#define	PT_PADDR	(0x000ffffffffff000ull)
#define	PT_PADDR_LGPG	(0x000fffffffffe000ull)	/* phys addr for large pages */

/*
 * Macros to create a PTP or PTE from the pfn and level
 */
#ifdef __xpv

/*
 * we use the highest order bit in physical address pfns to mark foreign mfns
 */
#ifdef _LP64
#define	PFN_IS_FOREIGN_MFN (1ul << 51)
#else
#define	PFN_IS_FOREIGN_MFN (1ul << 31)
#endif

#define	MAKEPTP(pfn, l)	\
	(pa_to_ma(pfn_to_pa(pfn)) | mmu.ptp_bits[(l) + 1])
#define	MAKEPTE(pfn, l) \
	((pfn & PFN_IS_FOREIGN_MFN) ? \
	((pfn_to_pa(pfn & ~PFN_IS_FOREIGN_MFN) | mmu.pte_bits[l]) | \
	PT_FOREIGN | PT_REF | PT_MOD) : \
	(pa_to_ma(pfn_to_pa(pfn)) | mmu.pte_bits[l]))
#else
#define	MAKEPTP(pfn, l)	\
	(pfn_to_pa(pfn) | mmu.ptp_bits[(l) + 1])
#define	MAKEPTE(pfn, l)	\
	(pfn_to_pa(pfn) | mmu.pte_bits[l])
#endif

/*
 * The idea of "level" refers to the level where the page table is used in the
 * the hardware address translation steps. The level values correspond to the
 * following names of tables used in AMD/Intel architecture documents:
 *
 *	AMD/INTEL name		Level #
 *	----------------------	-------
 *	Page Map Level 4	   3
 *	Page Directory Pointer	   2
 *	Page Directory		   1
 *	Page Table		   0
 *
 * The numbering scheme is such that the values of 0 and 1 can correspond to
 * the pagesize codes used for MPSS support. For now the Maximum level at
 * which you can have a large page is a constant, that may change in
 * future processors.
 *
 * The type of "level_t" is signed so that it can be used like:
 *	level_t	l;
 *	...
 *	while (--l >= 0)
 *		...
 */
#define	MAX_NUM_LEVEL		4
#define	MAX_PAGE_LEVEL		2
#define	MIN_PAGE_LEVEL		0
typedef	int8_t level_t;
#define	LEVEL_SHIFT(l)	(mmu.level_shift[l])
#define	LEVEL_SIZE(l)	(mmu.level_size[l])
#define	LEVEL_OFFSET(l)	(mmu.level_offset[l])
#define	LEVEL_MASK(l)	(mmu.level_mask[l])

/*
 * Macros to:
 * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
 */
#define	PFN_4G		(4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
#define	PFN_64G		(64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
#define	PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
#define	PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)

/*
 * The CR3 register holds the physical address of the top level page table,
 * along with the current PCID if any.
 */
#define	MAKECR3(pfn, pcid)	(mmu_ptob(pfn) | pcid)

/*
 * HAT/MMU parameters that depend on kernel mode and/or processor type
 */
struct htable;
struct hat_mmu_info {
	x86pte_t pt_nx;		/* either 0 or PT_NX */
	x86pte_t pt_global;	/* either 0 or PT_GLOBAL */

	pfn_t highest_pfn;

	uint_t num_level;	/* number of page table levels in use */
	uint_t max_level;	/* just num_level - 1 */
	uint_t max_page_level;	/* maximum level at which we can map a page */
	uint_t umax_page_level; /* max user page map level */
	uint_t ptes_per_table;	/* # of entries in lower level page tables */
	uint_t top_level_count;	/* # of entries in top-level page table */
	uint_t top_level_uslots; /* # of user slots in top-level page table */
	uint_t num_copied_ents;	/* # of PCP-copied PTEs to create */
	/* 32-bit versions of values */
	uint_t top_level_uslots32;
	uint_t max_level32;
	uint_t num_copied_ents32;

	uint_t hash_cnt;	/* cnt of entries in htable_hash_cache */
	uint_t hat32_hash_cnt;	/* cnt of entries in 32-bit htable_hash_cache */

	uint_t pae_hat;		/* either 0 or 1 */

	uintptr_t hole_start;	/* start of VA hole (or -1 if none) */
	uintptr_t hole_end;	/* end of VA hole (or 0 if none) */

	struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
	x86pte_t *kmap_ptes;	/* mapping of pagetables that map kmap */
	uintptr_t kmap_addr;	/* start addr of kmap */
	uintptr_t kmap_eaddr;	/* end addr of kmap */

	uint_t pte_size;	/* either 4 or 8 */
	uint_t pte_size_shift;	/* either 2 or 3 */
	x86pte_t ptp_bits[MAX_NUM_LEVEL];	/* bits set for interior PTP */
	x86pte_t pte_bits[MAX_NUM_LEVEL];	/* bits set for leaf PTE */

	/*
	 * A range of VA used to window pages in the i86pc/vm code.
	 * See PWIN_XXX macros.
	 */
	caddr_t	pwin_base;
	caddr_t	pwin_pte_va;
	paddr_t	pwin_pte_pa;

	/*
	 * The following tables are equivalent to PAGEXXXXX at different levels
	 * in the page table hierarchy.
	 */
	uint_t level_shift[MAX_NUM_LEVEL];	/* PAGESHIFT for given level */
	uintptr_t level_size[MAX_NUM_LEVEL];	/* PAGESIZE for given level */
	uintptr_t level_offset[MAX_NUM_LEVEL];	/* PAGEOFFSET for given level */
	uintptr_t level_mask[MAX_NUM_LEVEL];	/* PAGEMASK for given level */
};


#if defined(_KERNEL)

/*
 * Macros to access the HAT's private page windows. They're used for
 * accessing pagetables, ppcopy() and page_zero().
 * The 1st two macros are used to get an index for the particular use.
 * The next three give you:
 * - the virtual address of the window
 * - the virtual address of the pte that maps the window
 * - the physical address of the pte that map the window
 */
#define	PWIN_TABLE(cpuid)	((cpuid) * 2)
#define	PWIN_SRC(cpuid)		((cpuid) * 2 + 1)	/* for x86pte_copy() */
#define	PWIN_VA(x)		(mmu.pwin_base + ((x) << MMU_PAGESHIFT))
#define	PWIN_PTE_VA(x)		(mmu.pwin_pte_va + ((x) << mmu.pte_size_shift))
#define	PWIN_PTE_PA(x)		(mmu.pwin_pte_pa + ((x) << mmu.pte_size_shift))

/*
 * The concept of a VA hole exists in AMD64. This might need to be made
 * model specific eventually.
 *
 * In the 64 bit kernel PTE loads are atomic, but need atomic_cas_64 on 32
 * bit kernel.
 */
#if defined(__amd64)

#ifdef lint
#define	IN_VA_HOLE(va)	(__lintzero)
#else
#define	IN_VA_HOLE(va)	(mmu.hole_start <= (va) && (va) < mmu.hole_end)
#endif

#define	FMT_PTE "0x%lx"
#define	GET_PTE(ptr)		(*(x86pte_t *)(ptr))
#define	SET_PTE(ptr, pte)	(*(x86pte_t *)(ptr) = pte)
#define	CAS_PTE(ptr, x, y)	atomic_cas_64(ptr, x, y)

#elif defined(__i386)

#define	IN_VA_HOLE(va)	(__lintzero)

#define	FMT_PTE "0x%llx"

/* on 32 bit kernels, 64 bit loads aren't atomic, use get_pte64() */
extern x86pte_t get_pte64(x86pte_t *ptr);
#define	GET_PTE(ptr)	(mmu.pae_hat ? get_pte64(ptr) : *(x86pte32_t *)(ptr))
#define	SET_PTE(ptr, pte)						\
	((mmu.pae_hat ? ((x86pte32_t *)(ptr))[1] = (pte >> 32) : 0),	\
	*(x86pte32_t *)(ptr) = pte)
#define	CAS_PTE(ptr, x, y)			\
	(mmu.pae_hat ? atomic_cas_64(ptr, x, y) :	\
	atomic_cas_32((uint32_t *)(ptr), (uint32_t)(x), (uint32_t)(y)))

#endif	/* __i386 */

/*
 * Return a pointer to the pte entry at the given index within a page table.
 */
#define	PT_INDEX_PTR(p, x) \
	((x86pte_t *)((uintptr_t)(p) + ((x) << mmu.pte_size_shift)))

/*
 * Return the physical address of the pte entry at the given index within a
 * page table.
 */
#define	PT_INDEX_PHYSADDR(p, x) \
	((paddr_t)(p) + ((x) << mmu.pte_size_shift))

/*
 * From pfn to bytes, careful not to lose bits on PAE.
 */
#define	pfn_to_pa(pfn) (mmu_ptob((paddr_t)(pfn)))

#ifdef __xpv
extern pfn_t pte2pfn(x86pte_t, level_t);
#endif

extern struct hat_mmu_info mmu;

#endif	/* _KERNEL */


#ifdef	__cplusplus
}
#endif

#endif	/* _VM_HAT_PTE_H */