usr/src/uts/intel/sys/amdzen/smn.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461

/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2022 Oxide Computer Co.
 */

#ifndef _SYS_AMDZEN_SMN_H
#define	_SYS_AMDZEN_SMN_H

#include <sys/debug.h>
#include <sys/types.h>

/*
 * Generic definitions for the system management network (SMN) in Milan and many
 * other AMD Zen processors.  These are shared between the amdzen nexus and its
 * client drivers and kernel code that may require SMN access to resources.
 *
 * ------------------------
 * Endpoints and Addressing
 * ------------------------
 *
 * SMN addresses are 36 bits long but in practice we can use only 32.  Bits
 * [35:32] identify a destination node, but all consumers instead direct SMN
 * transactions to a specific node by selecting the address/data register pair
 * in the NBIO PCI config space corresponding to the destination.  Additional
 * information about nodes and the organisation of devices in the Zen
 * architecture may be found in the block comments in amdzen.c and cpuid.c.
 *
 * The SMN provides access to instances of various functional units present on
 * or accessed via each node.  Some functional units have only a single instance
 * per node while others may have many.  Each functional unit instance has one
 * or more apertures in which it decodes addresses.  The aperture portion of the
 * address consists of bits [31:20] and the remainder of the address is used to
 * specify a register instance within that functional unit.  To complicate
 * matters, some functional units have multiple smaller sub-units that decode
 * smaller regions within its parent's aperture; in some cases, the bits in a
 * mask describing the sub-unit's registers may not be contiguous.  To keep
 * software relatively simple, we generally treat sub-units and parent units the
 * same and try to choose collections of registers whose addresses can all be
 * computed in the same manner to form what we will describe as a unit.
 *
 * Each functional unit should typically have its own header containing register
 * definitions, accessors, and address calculation routines; some functional
 * units are small and straightforward while others may have numerous complex
 * sub-units, registers with many instances whose locations are computed in
 * unusual and nonstandard ways, and other features that need to be declared for
 * consumers.  Those functional units that are present across many processors
 * and have similar or identical contents across them should live in this
 * directory; umc.h is such an example.  Others may be specific to a particular
 * processor family (see cpuid.c) or other collection and may require their own
 * subdirectories, symbol prefixes, and so on.  Unlike the DF, the existence,
 * location, and format of registers accessible over SMN are not versioned nor
 * are they generally self-discoverable.  Each functional unit may be present or
 * absent, in varying numbers and with varying functionality, across the entire
 * Zen product range.  Therefore, at this time most per-unit headers are
 * intended for use only by code that will execute on a specific processor
 * family.  Unifying them over time is considered desirable to the extent the
 * hardware allows it.
 *
 * -----
 * Types
 * -----
 *
 * Practically every last one of us has screwed up the order of arguments to
 * functions like amdzen_smn_write32() when they take an address and a value of
 * the same type.  Repeatedly.  Often.  To safety this particularly annoying
 * footgun, we pass SMN register addresses around in a dedicated struct type
 * smn_reg_t, intended to be instantiated only by the amdzen_xx_smn_reg() and
 * analogous kernel functions and the macros that expand to them or, for the
 * YOLO crew, SMN_MAKE_REG().  Since the struct type and uint32_t are not
 * compatible, the compiler will always squawk if the register and value
 * arguments are reversed, leaving us far fewer baffling failures to debug at
 * runtime.  Typical callers don't require any awareness of this at all, but
 * those that want to pass the address around to e.g. log warnings can obtain
 * the uint32_t address via SMN_REG_ADDR().
 *
 * Register definitions within functional units are provided by objects of type
 * `const smn_reg_def_t`, the usage of which is described in detail in the next
 * section.  For now these are produced on demand by macros; see additional
 * notes on conventions below.  In time, this mechanism may be extended to
 * incorporate version information in a manner similar to that used in df.h.  An
 * automated mechanism for creating a single collection of register and field
 * definitions for C, in CTF, and/or for other language consumers as well as
 * automated register value decoding remains an open area for future work.
 *
 * -----------------------
 * Instances and Iterators
 * -----------------------
 *
 * Not only do some functional units have many instances, so too do many
 * registers.  AMD documentation describes registers in terms of a series of
 * iterators over various functional units, subunits, and other entities and
 * attributes that each multiply the number of register instances.  A concrete
 * example from the publicly-available Naples PPR (publication 54945 rev. 1.14)
 * may make this simpler to understand.  Unfortunately, SMN is not described by
 * this document, but the register instance syntax used is the same and is
 * described in additional detail in sections 1.3.3-4.  For our example, let us
 * consider the same MSR that AMD uses in their own example,
 * Core::X86::MSR::TSC.  We are given that this register has the following
 * instances: lthree[1:0]_core[3:0]_thread[1:0].  We therefore have three
 * iterators: one for 'lthree's, one for 'core's for each 'lthree', and one for
 * 'thread's for each 'core'.  We can also see that there are 16 total
 * instances; in fact, there are actually 16 per core-complex die (CCD), which
 * documents for more recent processors would expose as a fourth iterator.  To
 * keep things relatively simple, we will assume that there are only 16 per
 * processor.  If it were possible to access all of these instances via MMIO,
 * SMN, or some other flat address space (it isn't, as far as we can tell), a
 * function for computing the address of each instance would require three
 * parameters.  Let us suppose that this register really were accessible via
 * SMN; in that case, we would also be provided with a list of instance alias
 * such as
 *
 *	_thread[1:0]_core[7:0]_lthree[1:0]_alias_SMN: THREADREGS[1:0]x0000_0010;
 *	THREADREGS[1:0]=COREREGS[7:0]x0000_[4,0]000;
 *	COREREGS[7:0]=L3REGS[1:0]x000[7:0]_5000; L3REGS[1:0]=57[A,6]0_0000
 *
 * To compute the address of an instance of this hypothetical register, we would
 * begin by determining that its top-level functional unit is L3REGS with a base
 * aperture at 0x5760_0000.  There are two instances of this functional unit (01
 * and 1) and each subsequent instance is offset 0x40_0000 from the previous.
 * This allows us to compute the base address of each L3REGS block; a similar
 * process is then used to compute the base address of each COREREGS block, and
 * finally the address of each THREADREGS block that contains the register
 * instance.  In practice, we might choose instead to consider the COREREGS as
 * our functional unit, with instances at 0x5760_5000, 0x5761_5000, 0x57A0_5000,
 * and 0x57A1_5000; whether it is useful to do this depends on whether we need
 * to consider other registers in the L3REGS unit that may not have per-core
 * blocks or instances but would otherwise be interleaved with these.  This ends
 * up being something of a judgment call.  Let's suppose we want to consider the
 * entire L3REGS functional unit and write a function to compute the address of
 * any register (including our hypothetical TSC) in the subordinate THREADREGS
 * blocks.  We'll start by adding the new unit to the smn_unit_t enumeration;
 * let's call it SMN_UNIT_L3REGS_COREREGS since that's the sub-unit level at
 * which we can uniformly compute register instance addresses.  We have already
 * determined our base aperture and we know that we have 3 iterators and
 * therefore three parameters; all SMN address calculators return an smn_reg_t
 * and must accept an smn_reg_def_t.  Therefore our function's signature is:
 *
 * smn_reg_t amdzen_smn_l3regs_coreregs_reg(uint8_t l3no,
 *     const smn_reg_def_t def, uint16_t coreinst, uint16_t threadinst);
 *
 * We have chosen to use a base aperture of 0x5760_0000 and unit offset
 * 0x40_0000, so we can begin by computing a COREREGS aperture:
 *
 * const uint32_t aperture_base = 0x57600000;
 * const uint32_t aperture_off = l3no * 0x400000;
 * const uint32_t coreregs_aperture_base = 0x5000;
 * const uint32_t coreregs_aperture_off = coreinst * 0x10000;
 *
 * We can now consider the smn_reg_def_t our function will be given, which
 * describes THREADREGS::TSC.  Within the COREREGS functional sub-unit, each
 * thread register has 2 instances present at a stride of 0x4000 bytes (from our
 * hypothetical register definition), so the register would be defined as
 * follows:
 *
 * #define	D_L3REGS_COREREGS_THREAD_TSC	(const smn_reg_def_t){	\
 *	.srd_unit = SMN_UNIT_L3REGS_COREREGS,	\
 *	.srd_reg = 0x10,	\
 *	.srd_nents = 2,	\
 *	.srd_stride = 0x4000	\
 * }
 *
 * Note that describing the number of entries and their stride in the register
 * definition allows us to collapse the last functional sub-unit in our
 * calculation process: we need not compute the base aperture address of the
 * THREADREGS sub-unit.  Instead, we can follow our previous code with:
 *
 * const uint32_t aperture = aperture_base +
 *     coreregs_aperture_base + coreregs_aperture_off;
 * const uint32_t reg = def.srd_reg + threadinst * def.srd_stride;
 *
 * Finally, we convert the aperture address and register offset into the
 * appropriate type and return it:
 *
 * return (SMN_MAKE_REG(aperture + reg));
 *
 * As you can see, other registers in THREADREGS would be defined with the same
 * number entries and stride but a different offset (srd_reg member), while
 * other registers in the COREREGS block would have a different offset and
 * stride.  For example, if a block of per-core (not per-thread) registers were
 * located at COREREGS[7:0]x0000_1000, a register called "COREREGS::FrobberCntl"
 * in that block with a single instance at offset 0x48 might be defined as
 *
 * #define	D_L3REGS_COREREGS_FROB_CTL	(const smn_reg_def_t){	\
 *	.srd_unit = SMN_UNIT_L3REGS_COREREGS,	\
 *	.srd_reg = 0x1048,	\
 *	.srd_nents = 1	\
 * }
 *
 * You can satisfy yourself that the same calculation function we wrote above
 * will correctly compute the address of the sole instance (0) of this register.
 * To further simplify register definitions and callers, the actual address
 * calculation functions are written to treat srd_nents == 0 to mean a register
 * with a single instance, and to treat srd_stride == 0 as if it were 4 (the
 * space occupied by registers accessed by SMN is -- so far as we can tell,
 * practically always -- 4 bytes in size, even if the register itself is
 * smaller).  Additionally, a large number of assertions should be present in
 * such functions to guard against foreign unit register definitions,
 * out-of-bounds unit and register instance parameters, address overflow, and
 * register instance offsets that overflow improperly into an aperture base
 * address.  All of these conditions indicate either an incorrect register
 * definition or a bug in the caller.  See the template macro at the bottom of
 * this file and umc.h for additional examples of calculating and checking
 * register addresses.
 *
 * With address computation out of the way, we can then provide an accessor for
 * each instance this register:
 *
 * #define	L3REGS_COREREGS_THREAD_TSC(l3, core, thread)	\
 *	amdzen_l3regs_coreregs_reg(l3, D_L3REGS_COREREGS_THREAD_TSC, \
 *	core, thread)
 *
 * Our other per-core register's accessor would look like:
 *
 * #define	L3REGS_COREREGS_FROB_CTL(l3, core)	\
 *	amdzen_l3regs_coreregs_reg(l3, D_L3REGS_COREREGS_FROB_CTL, core, 0)
 *
 * The next section describes these conventions in greater detail.
 *
 * -----------
 * Conventions
 * -----------
 *
 * First, let's consider the names of the register definition and the
 * convenience macro supplied to obtain an instance of that register: we've
 * prefixed the global definition of the registers with D_ and the convenience
 * macros to return a specific instance are simply named for the register
 * itself.  Additionally, the two macros expand to objects of incompatible
 * types, so that using the wrong one will always be detected at compile time.
 * Why do we expose both of these?  The instance macro is useful for callers who
 * know at compile-time the name of the register of which they want instances;
 * this makes it unnecessary to remember the names of functions used to compute
 * register instance addresses.  The definition itself is useful to callers that
 * accept const smn_reg_def_t arguments referring to registers of which the
 * immediate caller does not know the names at compile time.
 *
 * You may wonder why we don't declare named constants for the definitions.
 * There are two ways we could do that and both are unfortunate: one would be to
 * declare them static in the header, the other to separate declarations in the
 * header from initialisation in a separate source file.  Measurements revealed
 * that the former causes a very substantial increase in data size, which will
 * be multiplied by the number of registers defined and the number of source
 * files including the header.  As convenient as it is to have these symbolic
 * constants available to debuggers and other tools at runtime, they're just too
 * big.  However, it is possible to generate code to be compiled into loadable
 * modules that would contain a single copy of the constants for this purpose as
 * well as for providing CTF to foreign-language binding generators.  The other
 * option considered here, putting the constants in separate source files, makes
 * maintenance significantly more challenging and makes it likely not only that
 * new registers may not be added properly but also that definitions, macros, or
 * both may be incorrect.  Neither of these options is terrible but for now
 * we've optimised for simplicity of maintenance and minimal data size at the
 * immediate but not necessarily permanent expense of some debugging
 * convenience.
 *
 * We wish to standardise as much as possible on conventions across all
 * Zen-related functional units and blocks (including those accessed by SMN,
 * through the DF directly, and by other means).  In general, some register and
 * field names are shortened from their official names for clarity and brevity;
 * the official names are always given in the comment above the definition.
 * AMD's functional units come from many internal teams and presumably several
 * outside vendors as well; as a result, there is no single convention to be
 * found throughout the PPRs and other documentation.  For example, different
 * units may have registers containing "CTL", "CNTL", "CTRL", "CNTRL", and
 * "CONTROL", as well as "FOO_CNTL", "FooCntl", and "Foo_Cntl".  Reflecting
 * longstanding illumos conventions, we collapse all such register names
 * regardless of case as follows:
 *
 * CTL/CTRL/CNTL/CNTRL/CONTROL				=> CTL
 * CFG/CONF/CONFIG/CONFIGURATION			=> CFG
 * EN/ENAB/ENABLE/ENABLED				=> EN
 * DIS/DISAB/DISABLE/DISABLED				=> DIS
 *
 * Note that if collapsing these would result in ambiguity, more of the official
 * names will be preserved.  In addition to collapsing register and field names
 * in this case-insensitive manner, we also follow standard code style practice
 * and name macros and constants in SCREAMING_SNAKE_CASE regardless of AMD's
 * official name.  It is similarly reasonable to truncate or abbreviate other
 * common terms in a consistent manner where doing so preserves uniqueness and
 * at least some semantic value; without doing so, some official register names
 * will be excessively unwieldy and may not even fit into 80 columns.  Please
 * maintain these practices and strive for consistency with existing examples
 * when abbreviation is required.
 *
 * As we have done elsewhere throughout the amdzen body of work, register fields
 * should always be given in order starting with the most significant bits and
 * working down toward 0; this matches AMD's documentation and makes it easier
 * for reviewers and other readers to follow.  The routines in bitext.h should
 * be used to extract and set bitfields unless there is a compelling reason to
 * do otherwise (e.g., assembly consumers).  Accessors should be named
 * UNIT_REG_GET_FIELD and UNIT_REG_SET_FIELD respectively, unless the register
 * has a single field that has no meaningful name (i.e., the field's name is the
 * same as the register's or it's otherwise obvious from the context what its
 * purpose is), in which case UNIT_REG_GET and UNIT_REG_SET are appropriate.
 * Additional getters and setters that select a particular bit from a register
 * or field consisting entirely of individual bits describing or controlling the
 * state of some entity may also be useful.  As with register names, be as brief
 * as possible without sacrificing too much information.
 *
 * Constant values associated with a field should be declared immediately
 * following that field.  If a constant or collection of constants is used in
 * multiple fields of the same register, the definitions should follow the last
 * such field; similarly, constants used in multiple registers should follow the
 * last such register, and a comment explaining the scope of their validity is
 * recommended.  Such constants should be named for the common elements of the
 * fields or registers in which they are valid.
 *
 * As noted above, SMN register definitions should omit the srd_nents and
 * srd_stride members when there is a single instance of the register within the
 * unit.  The srd_stride member should also be elided when the register
 * instances are contiguous.  All address calculation routines should be written
 * to support these conventions.  Each register should have an accessor macro or
 * function, and should accept instance numbers in order from superior to
 * inferior (e.g., from the largest functional unit to the smallest, ending with
 * the register instance itself).  This convention is similar to that used in
 * generic PCIe code in which a register is specified by bus, device, and
 * function numbers in that order.  Register accessor macros or inline functions
 * should not expose inapplicable taxons to callers; in our example above,
 * COREREGS_FROB_CTL has an instance for each core but is not associated with a
 * thread; therefore its accessor should not accept a thread instance argument
 * even though the address calculation function it uses does.
 *
 * Most of these conventions are not specific to registers accessed via SMN;
 * note also that some registers may be accessed in multiple ways (e.g., SMN and
 * MMIO, or SMN and the MSR instructions).  While the code here is generally
 * unaware of such aliased access methods, following these conventions will
 * simplify naming and usage if such a register needs to be accessed in multiple
 * ways.  Sensible additions to macro and symbol names such as the access method
 * to be used will generally be sufficient to disambiguate while allowing reuse
 * of associated field accessors, constants, and in some cases even register
 * offset, instance count, and stride.
 */

#ifdef __cplusplus
extern "C" {
#endif

#define	SMN_APERTURE_MASK	0xfff00000

/*
 * An instance of an SMN-accessible register.
 */
typedef struct smn_reg {
	uint32_t sr_addr;
} smn_reg_t;

/*CSTYLED*/
#define	SMN_MAKE_REG(x)	((const smn_reg_t){ .sr_addr = (x) })
#define	SMN_REG_ADDR(x)	((x).sr_addr)

/*
 * This exists so that address calculation functions can check that the register
 * definitions they're passed are something they understand how to use.  While
 * many address calculation functions are similar, some functional units define
 * registers with multiple iterators, have differently-sized apertures, or both;
 * it's important that we reject foreign register definitions in these
 * functions.  In principle this could be done at compile time, but the
 * preprocessor gymnastics required to do so are excessively vile and we are
 * really already hanging it pretty far over the edge in terms of what the C
 * preprocessor can do for us.
 */
typedef enum smn_unit {
	SMN_UNIT_UNKNOWN,
	SMN_UNIT_IOAPIC,
	SMN_UNIT_IOHC,
	SMN_UNIT_IOHCDEV_PCIE,
	SMN_UNIT_IOHCDEV_NBIF,
	SMN_UNIT_IOHCDEV_SB,
	SMN_UNIT_IOAGR,
	SMN_UNIT_SDPMUX,
	SMN_UNIT_UMC,
	SMN_UNIT_PCIE_CORE,
	SMN_UNIT_PCIE_PORT,
	SMN_UNIT_PCIE_RSMU,
	SMN_UNIT_SCFCTP,
	SMN_UNIT_SMUPWR,
	SMN_UNIT_IOMMUL1,
	SMN_UNIT_IOMMUL2,
	SMN_UNIT_NBIF,
	SMN_UNIT_NBIF_ALT,
	SMN_UNIT_NBIF_FUNC
} smn_unit_t;

/*
 * srd_unit and srd_reg are required; they describe the functional unit and the
 * register's address within that unit's aperture (which may be the SDP-defined
 * aperture described above or a smaller one if a unit has been broken down
 * logically into smaller units).  srd_nents is optional; if not set, all
 * existing consumers assume a value of 0 is equivalent to 1: the register has
 * but a single instance in each unit.  srd_stride is ignored if srd_nents is 0
 * or 1 and optional otherwise; it describes the number of bytes to be added to
 * the previous instance's address to obtain that of the next instance.  If left
 * at 0 it is assumed to be 4 bytes.
 *
 * There are units in which registers have more complicated collections of
 * instances that cannot be represented perfectly by this simple descriptor;
 * they require custom address calculation macros and functions that may take
 * additional arguments, and they may not be able to check their arguments or
 * the computed addresses as carefully as would be ideal.
 */
typedef struct smn_reg_def {
	smn_unit_t	srd_unit;
	uint32_t	srd_reg;
	uint32_t	srd_stride;
	uint16_t	srd_nents;
} smn_reg_def_t;

/*
 * This macro may be used by per-functional-unit code to construct an address
 * calculation function.  It is usable by some, BUT NOT ALL, functional units;
 * see the block comment above for an example that cannot be accommodated.  Here
 * we assume that there are at most 2 iterators in any register's definition.
 * Use this when possible, as it provides a large number of useful checks on
 * DEBUG bits.  Similar checks should be incorporated into implementations for
 * nonstandard functional units to the extent possible.
 */

#define	AMDZEN_MAKE_SMN_REG_FN(_fn, _unit, _base, _mask, _nunits, _unitshift) \
CTASSERT(((_base) & ~(_mask)) == 0);					\
static inline smn_reg_t							\
_fn(const uint8_t unitno, const smn_reg_def_t def, const uint16_t reginst) \
{									\
	const uint32_t unit32 = (const uint32_t)unitno;			\
	const uint32_t reginst32 = (const uint32_t)reginst;		\
	const uint32_t stride = (def.srd_stride == 0) ? 4 : def.srd_stride; \
	const uint32_t nents = (def.srd_nents == 0) ? 1 :		\
	    (const uint32_t)def.srd_nents;				\
									\
	ASSERT3S(def.srd_unit, ==, SMN_UNIT_ ## _unit);			\
	ASSERT3U(unit32, <, (_nunits));					\
	ASSERT3U(nents, >, reginst32);					\
	ASSERT0(def.srd_reg & (_mask));					\
									\
	const uint32_t aperture_base = (_base);				\
									\
	const uint32_t aperture_off = (unit32 << (_unitshift));		\
	ASSERT3U(aperture_off, <=, UINT32_MAX - aperture_base);		\
									\
	const uint32_t aperture = aperture_base + aperture_off;		\
	ASSERT0(aperture & ~(_mask));					\
									\
	const uint32_t reg = def.srd_reg + reginst32 * stride;		\
	ASSERT0(reg & (_mask));				\
									\
	return (SMN_MAKE_REG(aperture + reg));				\
}

#ifdef __cplusplus
}
#endif

#endif /* _SYS_AMDZEN_SMN_H */