usr/src/uts/intel/sys/fp.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
 * Copyright (c) 2018, Joyent, Inc.
 * Copyright 2022 Oxide Computer Company
 *
 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
 */

/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
/*		All Rights Reserved				*/

#ifndef _SYS_FP_H
#define	_SYS_FP_H

#ifdef __cplusplus
extern "C" {
#endif

/*
 * 80287/80387 and SSE/SSE2 floating point processor definitions
 */

/*
 * values that go into fp_kind
 */
#define	FP_NO	0	/* no fp chip, no emulator (no fp support)	*/
#define	FP_SW	1	/* no fp chip, using software emulator		*/
#define	FP_HW	2	/* chip present bit				*/
#define	FP_287	2	/* 80287 chip present				*/
#define	FP_387	3	/* 80387 chip present				*/
#define	FP_487	6	/* 80487 chip present				*/
#define	FP_486	6	/* 80486 chip present				*/
/*
 * The following values are bit flags instead of actual values.
 * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
 * of (value == __FP_SSE).
 */
#define	__FP_SSE	0x100	/* .. plus SSE-capable CPU		*/
#define	__FP_AVX	0x200	/* .. plus AVX-capable CPU		*/

/*
 * values that go into fp_save_mech
 */
#define	FP_FNSAVE	1	/* fnsave/frstor instructions		*/
#define	FP_FXSAVE	2	/* fxsave/fxrstor instructions		*/
#define	FP_XSAVE	3	/* xsave/xrstor instructions		*/

/*
 * masks for 80387 control word
 */
#define	FPIM	0x00000001	/* invalid operation			*/
#define	FPDM	0x00000002	/* denormalized operand			*/
#define	FPZM	0x00000004	/* zero divide				*/
#define	FPOM	0x00000008	/* overflow				*/
#define	FPUM	0x00000010	/* underflow				*/
#define	FPPM	0x00000020	/* precision				*/
#define	FPPC	0x00000300	/* precision control			*/
#define	FPRC	0x00000C00	/* rounding control			*/
#define	FPIC	0x00001000	/* infinity control			*/
#define	WFPDE	0x00000080	/* data chain exception			*/

/*
 * (Old symbol compatibility)
 */
#define	FPINV	FPIM
#define	FPDNO	FPDM
#define	FPZDIV	FPZM
#define	FPOVR	FPOM
#define	FPUNR	FPUM
#define	FPPRE	FPPM

/*
 * precision, rounding, and infinity options in control word
 */
#define	FPSIG24 0x00000000	/* 24-bit significand precision (short) */
#define	FPSIG53 0x00000200	/* 53-bit significand precision (long)	*/
#define	FPSIG64 0x00000300	/* 64-bit significand precision (temp)	*/
#define	FPRTN	0x00000000	/* round to nearest or even		*/
#define	FPRD	0x00000400	/* round down				*/
#define	FPRU	0x00000800	/* round up				*/
#define	FPCHOP	0x00000C00	/* chop (truncate toward zero)		*/
#define	FPP	0x00000000	/* projective infinity			*/
#define	FPA	0x00001000	/* affine infinity			*/
#define	WFPB17	0x00020000	/* bit 17				*/
#define	WFPB24	0x00040000	/* bit 24				*/

/*
 * masks for 80387 status word
 */
#define	FPS_IE	0x00000001	/* invalid operation			*/
#define	FPS_DE	0x00000002	/* denormalized operand			*/
#define	FPS_ZE	0x00000004	/* zero divide				*/
#define	FPS_OE	0x00000008	/* overflow				*/
#define	FPS_UE	0x00000010	/* underflow				*/
#define	FPS_PE	0x00000020	/* precision				*/
#define	FPS_SF	0x00000040	/* stack fault				*/
#define	FPS_ES	0x00000080	/* error summary bit			*/
#define	FPS_C0	0x00000100	/* C0 bit				*/
#define	FPS_C1	0x00000200	/* C1 bit				*/
#define	FPS_C2	0x00000400	/* C2 bit				*/
#define	FPS_TOP	0x00003800	/* top of stack pointer			*/
#define	FPS_C3	0x00004000	/* C3 bit				*/
#define	FPS_B	0x00008000	/* busy bit				*/

/*
 * Exception flags manually cleared during x87 exception handling.
 */
#define	FPS_SW_EFLAGS	\
	(FPS_IE|FPS_DE|FPS_ZE|FPS_OE|FPS_UE|FPS_PE|FPS_SF|FPS_ES|FPS_B)

/*
 * Initial value of FPU control word as per 4th ed. ABI document
 * - affine infinity
 * - round to nearest or even
 * - 64-bit double precision
 * - all exceptions masked
 */
#define	FPU_CW_INIT	0x133f

/*
 * masks and flags for SSE/SSE2 MXCSR
 */
#define	SSE_IE	0x00000001	/* invalid operation			*/
#define	SSE_DE	0x00000002	/* denormalized operand			*/
#define	SSE_ZE	0x00000004	/* zero divide				*/
#define	SSE_OE	0x00000008	/* overflow				*/
#define	SSE_UE	0x00000010	/* underflow				*/
#define	SSE_PE	0x00000020	/* precision				*/
#define	SSE_DAZ	0x00000040	/* denormals are zero			*/
#define	SSE_IM	0x00000080	/* invalid op exception mask		*/
#define	SSE_DM	0x00000100	/* denormalize exception mask		*/
#define	SSE_ZM	0x00000200	/* zero-divide exception mask		*/
#define	SSE_OM	0x00000400	/* overflow exception mask		*/
#define	SSE_UM	0x00000800	/* underflow exception mask		*/
#define	SSE_PM	0x00001000	/* precision exception mask		*/
#define	SSE_RC	0x00006000	/* rounding control			*/
#define	SSE_RD	0x00002000	/* rounding control: round down		*/
#define	SSE_RU	0x00004000	/* rounding control: round up		*/
#define	SSE_FZ	0x00008000	/* flush to zero for masked underflow	*/

#define	SSE_MXCSR_EFLAGS	\
	(SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE)	/* 0x3f */

#define	SSE_MXCSR_INIT	\
	(SSE_IM|SSE_DM|SSE_ZM|SSE_OM|SSE_UM|SSE_PM)	/* 0x1f80 */

#define	SSE_MXCSR_MASK_DEFAULT	\
	(0xffff & ~SSE_DAZ)				/* 0xffbf */

#define	SSE_FMT_MXCSR	\
	"\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm"	\
	"\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"

/*
 * This structure is written to memory by an 'fnsave' instruction
 */
struct fnsave_state {
	uint16_t	f_fcw;
	uint16_t	__f_ign0;
	uint16_t	f_fsw;
	uint16_t	__f_ign1;
	uint16_t	f_ftw;
	uint16_t	__f_ign2;
	uint32_t	f_eip;
	uint16_t	f_cs;
	uint16_t	f_fop;
	uint32_t	f_dp;
	uint16_t	f_ds;
	uint16_t	__f_ign3;
	union {
		uint16_t fpr_16[5];	/* 80-bits of x87 state */
	} f_st[8];
};	/* 108 bytes */

/*
 * This structure is written to memory by an 'fxsave' instruction
 * Note the variant behaviour of this instruction between long mode
 * and legacy environments!
 */
struct fxsave_state {
	uint16_t	fx_fcw;
	uint16_t	fx_fsw;
	uint16_t	fx_fctw;	/* compressed tag word */
	uint16_t	fx_fop;
#if defined(__amd64)
	uint64_t	fx_rip;
	uint64_t	fx_rdp;
#else
	uint32_t	fx_eip;
	uint16_t	fx_cs;
	uint16_t	__fx_ign0;
	uint32_t	fx_dp;
	uint16_t	fx_ds;
	uint16_t	__fx_ign1;
#endif
	uint32_t	fx_mxcsr;
	uint32_t	fx_mxcsr_mask;
	union {
		uint16_t fpr_16[5];	/* 80-bits of x87 state */
		u_longlong_t fpr_mmx;	/* 64-bit mmx register */
		uint32_t __fpr_pad[4];	/* (pad out to 128-bits) */
	} fx_st[8];
#if defined(__amd64)
	upad128_t	fx_xmm[16];	/* 128-bit registers */
	upad128_t	__fx_ign2[6];
#else
	upad128_t	fx_xmm[8];	/* 128-bit registers */
	upad128_t	__fx_ign2[14];
#endif
} __aligned(16);	/* 512 bytes */

/*
 * This structure represents the header portion of the data layout used by the
 * 'xsave' instruction variants.  It is documented in section 13.4.2 of the
 * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1
 * (IASDv1).  Although "header" is somewhat of a misnomer, considering the data
 * begins at offset 512 of the xsave area, its contents dictate which portions
 * of the area are present and how they may be formatted.
 */
struct xsave_header {
	uint64_t	xsh_xstate_bv;
	uint64_t	xsh_xcomp_bv;
	uint64_t	xsh_reserved[6];
};

/*
 * This structure is written to memory by one of the 'xsave' instruction
 * variants. The first 512 bytes are compatible with the format of the 'fxsave'
 * area.  The extended portion is documented in section 13.4.3.
 *
 * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
 * statically.  Enabling additional xsave-related CPU features requires an
 * increase in the size. We dynamically allocate the per-lwp xsave area at
 * runtime, based on the size needed for the CPU-specific features. This
 * xsave_state structure simply defines our historical layout for the beginning
 * of the xsave area. The locations and size of new, extended, components is
 * determined dynamically by querying the CPU. See the xsave_info structure in
 * cpuid.c.
 *
 * xsave component usage is tracked using bits in the xstate_bv field of the
 * header. The components are documented in section 13.1 of IASDv1. For easy
 * reference, this is a summary of the currently defined component bit
 * definitions:
 *	x87			0x0001
 *	SSE			0x0002
 *	AVX			0x0004
 *	bndreg (MPX)		0x0008
 *	bndcsr (MPX)		0x0010
 *	opmask (AVX512)		0x0020
 *	zmm hi256 (AVX512)	0x0040
 *	zmm hi16 (AVX512)	0x0080
 *	PT			0x0100
 *	PKRU			0x0200
 * When xsaveopt_ctxt is being used to save into the xsave_state area, the
 * xstate_bv field is updated by the xsaveopt instruction to indicate which
 * elements of the xsave area are active.
 *
 * The xcomp_bv field should always be 0, since we do not currently use the
 * compressed form of xsave (xsavec).
 */
struct xsave_state {
	struct fxsave_state	xs_fxsave;	/* 0-511 legacy region */
	struct xsave_header	xs_header;	/* 512-575 XSAVE header */
	upad128_t		xs_ymm[16];	/* 576 AVX component */
} __aligned(64);

/*
 * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU
 * state-saving, other consumers may constrain themselves to the minimum
 * possible xsave state structure, which features only the legacy area and the
 * bare xsave header.
 */
#define	MIN_XSAVE_SIZE	(sizeof (struct fxsave_state) + \
			    sizeof (struct xsave_header))

/*
 * Kernel's FPU save area
 */
typedef struct {
	union _kfpu_u {
		void *kfpu_generic;
		struct fxsave_state *kfpu_fx;
#if defined(__i386)
		struct fnsave_state *kfpu_fn;
#endif
		struct xsave_state *kfpu_xs;
	} kfpu_u;
	uint32_t kfpu_status;		/* saved at #mf exception */
	uint32_t kfpu_xstatus;		/* saved at #xm exception */
} kfpu_t;

extern int fp_kind;		/* kind of fp support			*/
extern int fp_save_mech;	/* fp save/restore mechanism		*/
extern int fpu_exists;		/* FPU hw exists			*/

#ifdef _KERNEL

extern int fpu_ignored;
extern int fpu_pentium_fdivbug;

extern uint32_t sse_mxcsr_mask;

extern void fpu_probe(void);
extern uint_t fpu_initial_probe(void);

extern void fpu_auxv_info(int *, size_t *);

extern void fpnsave_ctxt(void *);
extern void fpxsave_ctxt(void *);
extern void xsave_ctxt(void *);
extern void xsaveopt_ctxt(void *);
extern void fpxsave_excp_clr_ctxt(void *);
extern void xsave_excp_clr_ctxt(void *);
extern void xsaveopt_excp_clr_ctxt(void *);
extern void (*fpsave_ctxt)(void *);
extern void (*xsavep)(struct xsave_state *, uint64_t);

extern void fpxrestore_ctxt(void *);
extern void xrestore_ctxt(void *);
extern void (*fprestore_ctxt)(void *);

extern void fxsave_insn(struct fxsave_state *);
extern void fpsave(struct fnsave_state *);
extern void fprestore(struct fnsave_state *);
extern void fpxsave(struct fxsave_state *);
extern void fpxrestore(struct fxsave_state *);
extern void xsave(struct xsave_state *, uint64_t);
extern void xsaveopt(struct xsave_state *, uint64_t);
extern void xrestore(struct xsave_state *, uint64_t);

extern void fpenable(void);
extern void fpdisable(void);
extern void fpinit(void);

extern uint32_t fperr_reset(void);
extern uint32_t fpxerr_reset(void);

extern uint32_t fpgetcwsw(void);
extern uint32_t fpgetmxcsr(void);

struct regs;
extern int fpexterrflt(struct regs *);
extern int fpsimderrflt(struct regs *);
extern void fpsetcw(uint16_t, uint32_t);
extern void fp_seed(void);
extern void fp_exec(void);
struct _klwp;
extern void fp_lwp_init(struct _klwp *);
extern void fp_lwp_cleanup(struct _klwp *);
extern void fp_lwp_dup(struct _klwp *);

extern const struct fxsave_state sse_initial;
extern const struct xsave_state avx_initial;

#endif	/* _KERNEL */

#ifdef __cplusplus
}
#endif

#endif	/* _SYS_FP_H */