summaryrefslogtreecommitdiff
path: root/usr/src/common/mc/zen_umc/zen_umc_decode.c
blob: acf03868cd260a7eea423c6c5952a273e1b0c946 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2022 Oxide Computer Company
 */

/*
 * Zen UMC Decoding logic. See zen_umc.c for an overview of everything. This
 * implements shared userland/kernel decoding.
 */

#include "zen_umc.h"

#ifndef _KERNEL
#include <strings.h>
#endif

/*
 * Address constants.
 */
#define	ZEN_UMC_TOM2_START	0x100000000ULL
#define	ZEN_UMC_TOM2_RSVD_BEGIN	0xfd00000000ULL
#define	ZEN_UMC_TOM2_RSVD_END	0x10000000000ULL

/*
 * COD based hashing constants.
 */
#define	ZEN_UMC_COD_NBITS	3
#define	ZEN_UMC_NPS_MOD_NBITS	3

/*
 * We want to apply some initial heuristics to determine if a physical address
 * is DRAM before we proceed because of the MMIO hole and related. The DRAM
 * ranges can overlap with these system reserved ranges so we have to manually
 * check these.  Effectively this means that we have a few valid ranges:
 *
 *  o [ 0, TOM )
 *  o [ 4 GiB, TOM2 )
 *
 * However, the above 4 GiB runs into trouble depending on size. There is a 12
 * GiB system reserved address region right below 1 TiB. So it really turns
 * into the following when we have more than 1 TiB of DRAM:
 *
 *  o [ 0, TOM )
 *  o [ 4 GiB, 1 TiB - 12 GiB )
 *  o [ 1 TiB, TOM2 )
 *
 * Note, this does not currently scan MTRRs or MMIO rules for what might be
 * redirected to MMIO.
 */
static boolean_t
zen_umc_decode_is_dram(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	if (dec->dec_pa < umc->umc_tom) {
		return (B_TRUE);
	}

	if (dec->dec_pa >= umc->umc_tom2) {
		dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
		return (B_FALSE);
	}

	/*
	 * If the address is in the reserved hole around 1 TiB, do not proceed.
	 */
	if (dec->dec_pa >= ZEN_UMC_TOM2_RSVD_BEGIN &&
	    dec->dec_pa < ZEN_UMC_TOM2_RSVD_END) {
		dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
		return (B_FALSE);
	}

	/*
	 * Now that we've validated we're not in the hole, check to see if we're
	 * actually in a valid region for TOM2.
	 */
	if (dec->dec_pa >= ZEN_UMC_TOM2_START &&
	    dec->dec_pa < umc->umc_tom2) {
		return (B_TRUE);
	}

	/*
	 * At this point we have eliminated all known DRAM regions described by
	 * TOM and TOM2, so we have to conclude that whatever we're looking at
	 * is now not part of DRAM.
	 */
	dec->dec_fail = ZEN_UMC_DECODE_F_OUTSIDE_DRAM;
	return (B_FALSE);
}

/*
 * In our first stop on decoding, we need to go through and take a physical
 * address and figure out what the corresponding initial DF rule that applies
 * is. This rule will then be used to figure out which target on the data fabric
 * we should be going to and what interleaving rules apply.
 *
 * Our DRAM rule may reflect that the DRAM hole is active. In this case the
 * specified range in the rule will be larger than the actual amount of DRAM
 * present. MMIO accesses take priority over DRAM accesses in the core and
 * therefore the MMIO portion of the rule is not actually decoded. When trying
 * to match a rule we do not need to worry about that and can just look whether
 * our physical address matches a rule. We will take into account whether
 * hoisting should adjust the address when we translate from a system address to
 * a normal address (e.g. an address in the channel) which will be done in a
 * subsequent step. If an address is in the hole, that has already been
 * accounted for.
 *
 * While gathering information, we have all the DRAM rules for a given CCM that
 * corresponds to a CPU core. This allows us to review all DRAM rules in one
 * place rather than walking through what's been assigned to each UMC instance,
 * which only has the rules that are directed towards that particular channel
 * and matter for determining channel offsets.
 */
static boolean_t
zen_umc_decode_find_df_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	const zen_umc_df_t *df = &umc->umc_dfs[0];

	for (uint_t i = 0; i < df->zud_dram_nrules; i++) {
		const df_dram_rule_t *rule = &df->zud_rules[i];

		/*
		 * If this rule is not enabled, skip it.
		 */
		if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0)
			continue;

		if (dec->dec_pa >= rule->ddr_base &&
		    dec->dec_pa < rule->ddr_limit) {
			dec->dec_df_ruleno = i;
			dec->dec_df_rule = rule;
			dec->dec_df_rulesrc = df;
			return (B_TRUE);
		}
	}

	dec->dec_fail = ZEN_UMC_DECODE_F_NO_DF_RULE;
	return (B_FALSE);
}

/*
 * This function takes care of the common logic of adjusting an address by the
 * base value in the rule and determining if we need to apply the DRAM hole or
 * not. This function is used in two different places:
 *
 *   o As part of adjusting the system address to construct the interleave
 *     address for DFv4 and Zen 3 based 6-channel hashing (see
 *     zen_umc_determine_ileave_addr() below).
 *   o As part of adjusting the system address at the beginning of normalization
 *     to a channel address.
 *
 * One thing to highlight is that the same adjustment we make in the first case
 * applies to a subset of things for interleaving; however, it applies to
 * everything when normalizing.
 */
static boolean_t
zen_umc_adjust_dram_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec,
    uint64_t *addrp, zen_umc_decode_failure_t errno)
{
	const uint64_t init_addr = *addrp;
	const df_dram_rule_t *rule = dec->dec_df_rule;
	const zen_umc_df_t *df = dec->dec_df_rulesrc;
	uint64_t mod_addr = init_addr;

	ASSERT3U(init_addr, >=, rule->ddr_base);
	ASSERT3U(init_addr, <, rule->ddr_limit);
	mod_addr -= rule->ddr_base;

	/*
	 * Determine if the hole applies to this rule.
	 */
	if ((rule->ddr_flags & DF_DRAM_F_HOLE) != 0 &&
	    (df->zud_flags & ZEN_UMC_DF_F_HOLE_VALID) != 0 &&
	    init_addr >= ZEN_UMC_TOM2_START) {
		uint64_t hole_size;
		hole_size = ZEN_UMC_TOM2_START -
		    umc->umc_dfs[0].zud_hole_base;
		if (mod_addr < hole_size) {
			dec->dec_fail = errno;
			dec->dec_fail_data = dec->dec_df_ruleno;
			return (B_FALSE);
		}

		mod_addr -= hole_size;
	}

	*addrp = mod_addr;
	return (B_TRUE);
}

/*
 * Take care of constructing the address we need to use for determining the
 * interleaving target fabric id. See the big theory statement in zen_umc.c for
 * more on this.
 */
static boolean_t
zen_umc_determine_ileave_addr(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	const df_dram_rule_t *rule = dec->dec_df_rule;

	if (umc->umc_df_rev <= DF_REV_3 &&
	    rule->ddr_chan_ileave != DF_CHAN_ILEAVE_6CH) {
		dec->dec_ilv_pa = dec->dec_pa;
		return (B_TRUE);
	}

	dec->dec_ilv_pa = dec->dec_pa;
	if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_ilv_pa,
	    ZEN_UMC_DECODE_F_ILEAVE_UNDERFLOW)) {
		return (B_FALSE);
	}

	return (B_TRUE);
}

/*
 * This is a simple interleaving case where we simply extract bits. No hashing
 * required! Per zen_umc.c, from lowest to highest, we have channel, die, and
 * then socket bits.
 */
static boolean_t
zen_umc_decode_ileave_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint32_t nchan_bit, ndie_bit, nsock_bit, addr_bit;
	const df_dram_rule_t *rule = dec->dec_df_rule;

	nsock_bit = rule->ddr_sock_ileave_bits;
	ndie_bit = rule->ddr_die_ileave_bits;
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_1CH:
		nchan_bit = 0;
		break;
	case DF_CHAN_ILEAVE_2CH:
		nchan_bit = 1;
		break;
	case DF_CHAN_ILEAVE_4CH:
		nchan_bit = 2;
		break;
	case DF_CHAN_ILEAVE_8CH:
		nchan_bit = 3;
		break;
	case DF_CHAN_ILEAVE_16CH:
		nchan_bit = 4;
		break;
	case DF_CHAN_ILEAVE_32CH:
		nchan_bit = 5;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	/*
	 * Zero all of these out in case no bits are dedicated to this purpose.
	 * In those cases, then the value for this is always zero.
	 */
	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
	addr_bit = rule->ddr_addr_start;
	if (nchan_bit > 0) {
		dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa,
		    addr_bit + nchan_bit - 1, addr_bit);
		addr_bit += nchan_bit;
	}

	if (ndie_bit > 0) {
		dec->dec_ilv_die = bitx64(dec->dec_ilv_pa,
		    addr_bit + ndie_bit - 1, addr_bit);
		addr_bit += ndie_bit;
	}

	if (nsock_bit > 0) {
		dec->dec_ilv_sock = bitx64(dec->dec_ilv_pa,
		    addr_bit + nsock_bit - 1, addr_bit);
		addr_bit += nsock_bit;
	}

	return (B_TRUE);
}

/*
 * Perform the Zen 2/Zen 3 "COD" based hashing. See the zen_umc.c interleaving
 * section of the big theory statement for an overview of how this works.
 */
static boolean_t
zen_umc_decode_ileave_cod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint32_t nchan_bit;
	const df_dram_rule_t *rule = dec->dec_df_rule;
	/*
	 * The order of bits here is defined by AMD. Yes, we do use the rule's
	 * address bit first and then skip to bit 12 for the second hash bit.
	 */
	const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 };

	if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) {
		dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE;
		dec->dec_fail_data = dec->dec_df_ruleno;
		return (B_FALSE);
	}

	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_COD4_2CH:
		nchan_bit = 1;
		break;
	case DF_CHAN_ILEAVE_COD2_4CH:
		nchan_bit = 2;
		break;
	case DF_CHAN_ILEAVE_COD1_8CH:
		nchan_bit = 3;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;

	/*
	 * Proceed to calculate the address hash based on the number of bits
	 * that we have been told to use based on the DF rule. Use the flags in
	 * the rule to determine which additional address ranges to hash in.
	 */
	for (uint_t i = 0; i < nchan_bit; i++) {
		uint8_t hash = 0;

		hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]);
		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i);
			hash ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i);
			hash ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i);
			hash ^= val;
		}

		dec->dec_ilv_chan |= hash << i;
	}

	return (B_TRUE);
}

/*
 * This implements the standard NPS hash for power of 2 based channel
 * configurations that is found in DFv4. For more information, please see the
 * interleaving portion of the zen_umc.c big theory statement.
 */
static boolean_t
zen_umc_decode_ileave_nps(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint32_t nchan_bit, nsock_bit;
	const df_dram_rule_t *rule = dec->dec_df_rule;
	/*
	 * The order of bits here is defined by AMD. Yes, this is start with the
	 * defined address bit and then skip to bit 12.
	 */
	const uint32_t addr_bits[4] = { rule->ddr_addr_start, 12, 13, 14 };

	if (rule->ddr_die_ileave_bits != 0) {
		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
		dec->dec_fail_data = dec->dec_df_ruleno;
		return (B_FALSE);
	}

	nsock_bit = rule->ddr_sock_ileave_bits;
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_NPS4_2CH:
		nchan_bit = 1;
		break;
	case DF_CHAN_ILEAVE_NPS2_4CH:
		nchan_bit = 2;
		break;
	case DF_CHAN_ILEAVE_NPS1_8CH:
		nchan_bit = 3;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	ASSERT3U(nchan_bit + nsock_bit, <=, 4);
	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;

	for (uint_t i = 0; i < nchan_bit + nsock_bit; i++) {
		uint8_t hash = 0;

		hash = bitx64(dec->dec_ilv_pa, addr_bits[i], addr_bits[i]);
		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 16 + i, 16 + i);
			hash ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 21 + i, 21 + i);
			hash ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 30 + i, 30 + i);
			hash ^= val;
		}

		/*
		 * If this is the first bit and we're not doing socket
		 * interleaving, then we need to add bit 14 to the running hash.
		 */
		if (i == 0 && nsock_bit == 0) {
			uint8_t val = bitx64(dec->dec_ilv_pa, 14, 14);
			hash ^= val;
		}

		/*
		 * If socket interleaving is going on we need to store the first
		 * bit as the socket hash and then redirect the remaining bits
		 * to the channel, taking into account that the shift will be
		 * adjusted as a result.
		 */
		if (nsock_bit > 0) {
			if (i == 0) {
				dec->dec_ilv_sock = hash;
			} else {
				dec->dec_ilv_chan |= hash << (i - 1);
			}
		} else {
			dec->dec_ilv_chan |= hash << i;
		}
	}

	return (B_TRUE);
}

/*
 * This implements the logic to perform the Zen 3 6ch special hash. It's worth
 * calling out that unlike all other hash functions, this does not support the
 * use of the DF_DRAM_F_HASH_16_18 flag.
 */
static void
zen_umc_decode_hash_zen3_6ch(const df_dram_rule_t *rule, uint64_t pa,
    uint8_t hashes[3])
{
	uint32_t addr_bit = rule->ddr_addr_start;
	/*
	 * Yes, we use these in a weird order. No, there is no 64K.
	 */
	const uint32_t bits_2M[3] = { 23, 21, 22 };
	const uint32_t bits_1G[3] = { 32, 30, 31 };

	hashes[0] = hashes[1] = hashes[2] = 0;
	for (uint_t i = 0; i < ZEN_UMC_COD_NBITS; i++) {
		hashes[i] = bitx64(pa, addr_bit + i, addr_bit + i);
		if (i == 0) {
			uint8_t val = bitx64(pa, addr_bit + 3, addr_bit + 3);
			hashes[i] ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
			uint8_t val = bitx64(pa, bits_2M[i], bits_2M[i]);
			hashes[i] ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
			uint8_t val = bitx64(pa, bits_1G[i], bits_1G[i]);
			hashes[i] ^= val;
		}
	}
}

/*
 * Perform Zen 3 6-channel hashing. This is pretty weird compared to others. See
 * the zen_umc.c big theory statement for the thorny details.
 */
static boolean_t
zen_umc_decode_ileave_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t hashes[3] = { 0 };
	const df_dram_rule_t *rule = dec->dec_df_rule;
	uint32_t addr_bit = rule->ddr_addr_start;

	if (rule->ddr_sock_ileave_bits != 0 || rule->ddr_die_ileave_bits != 0) {
		dec->dec_fail = ZEN_UMC_DECODE_F_COD_BAD_ILEAVE;
		dec->dec_fail_data = dec->dec_df_ruleno;
		return (B_FALSE);
	}

	zen_umc_decode_hash_zen3_6ch(rule, dec->dec_ilv_pa, hashes);
	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
	dec->dec_ilv_chan = hashes[0];
	if (hashes[1] == 1 && hashes[2] == 1) {
		uint64_t mod_addr = dec->dec_ilv_pa >> (addr_bit + 3);
		dec->dec_ilv_chan |= (mod_addr % 3) << 1;
	} else {
		dec->dec_ilv_chan |= hashes[1] << 1;
		dec->dec_ilv_chan |= hashes[2] << 2;
	}

	return (B_TRUE);
}

/*
 * This is the standard hash function for the non-power of two based NPS hashes.
 * See the big theory statement for more information. Unlike the normal NPS hash
 * which uses bit 14 conditionally based on socket interleaving, here it is
 * always used.
 */
static void
zen_umc_decode_hash_nps_mod(const df_dram_rule_t *rule, uint64_t pa,
    uint8_t hashes[3])
{
	const uint32_t addr_bits[3] = { rule->ddr_addr_start, 12, 13 };

	for (uint_t i = 0; i < ZEN_UMC_NPS_MOD_NBITS; i++) {
		hashes[i] = bitx64(pa, addr_bits[i], addr_bits[i]);
		if (i == 0) {
			uint8_t val = bitx64(pa, 14, 14);
			hashes[i] ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_16_18) != 0) {
			uint8_t val = bitx64(pa, 16 + i, 16 + i);
			hashes[i] ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_21_23) != 0) {
			uint8_t val = bitx64(pa, 21 + i, 21 + i);
			hashes[i] ^= val;
		}

		if ((rule->ddr_flags & DF_DRAM_F_HASH_30_32) != 0) {
			uint8_t val = bitx64(pa, 30 + i, 30 + i);
			hashes[i] ^= val;
		}
	}
}

/*
 * See the big theory statement in zen_umc.c which describes the rules for this
 * computation. This is a little less weird than the Zen 3 one, but still,
 * unique.
 */
static boolean_t
zen_umc_decode_ileave_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t hashes[3] = { 0 };
	uint32_t nsock_bit, chan_mod;
	const df_dram_rule_t *rule = dec->dec_df_rule;

	if (rule->ddr_die_ileave_bits != 0) {
		dec->dec_fail = ZEN_UMC_DECODE_F_NPS_BAD_ILEAVE;
		dec->dec_fail_data = dec->dec_df_ruleno;
		return (B_FALSE);
	}

	nsock_bit = rule->ddr_sock_ileave_bits;
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_NPS4_3CH:
	case DF_CHAN_ILEAVE_NPS2_6CH:
	case DF_CHAN_ILEAVE_NPS1_12CH:
		chan_mod = 3;
		break;
	case DF_CHAN_ILEAVE_NPS2_5CH:
	case DF_CHAN_ILEAVE_NPS1_10CH:
		chan_mod = 5;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan = 0;
	zen_umc_decode_hash_nps_mod(rule, dec->dec_ilv_pa, hashes);

	if (nsock_bit > 0) {
		ASSERT3U(nsock_bit, ==, 1);
		dec->dec_ilv_sock = hashes[0];
	}

	dec->dec_ilv_chan = bitx64(dec->dec_ilv_pa, 63, 14) % chan_mod;
	if (hashes[0] == 1) {
		dec->dec_ilv_chan = (dec->dec_ilv_chan + 1) % chan_mod;
	}

	/*
	 * Use the remaining hash bits based on the number of channels. There is
	 * nothing else to do for 3/5 channel configs.
	 */
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_NPS4_3CH:
	case DF_CHAN_ILEAVE_NPS2_5CH:
		break;
	case DF_CHAN_ILEAVE_NPS2_6CH:
	case DF_CHAN_ILEAVE_NPS1_10CH:
		dec->dec_ilv_chan += hashes[2] * chan_mod;
		break;
	case DF_CHAN_ILEAVE_NPS1_12CH:
		dec->dec_ilv_chan += ((hashes[2] << 1) | hashes[1]) * chan_mod;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	return (B_TRUE);
}

/*
 * Our next task is to attempt to translate the PA and the DF rule from a system
 * address into a normalized address and a particular DRAM channel that it's
 * targeting. There are several things that we need to take into account here
 * when performing interleaving and translation:
 *
 *  o The DRAM Hole modifying our base address
 *  o The various interleave bits
 *  o Potentially hashing based on channel and global settings
 *  o Potential CS re-targeting registers (only on some systems)
 *  o Finally, the question of how to adjust for the DRAM hole and the base
 *    address changes based on the DF generation and channel configuration. This
 *    influences what address we start interleaving with.
 *
 * Note, this phase does not actually construct the normalized (e.g. channel)
 * address. That's done in a subsequent step. For more background, please see
 * the 'Data Fabric Interleaving' section of the zen_umc.c big theory statement.
 */
static boolean_t
zen_umc_decode_sysaddr_to_csid(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint32_t sock, die, chan, remap_ruleset;
	const df_dram_rule_t *rule = dec->dec_df_rule;
	const zen_umc_cs_remap_t *remap;

	/*
	 * First, we must determine what the actual address used for
	 * interleaving is. This varies based on the interleaving and DF
	 * generation.
	 */
	if (!zen_umc_determine_ileave_addr(umc, dec)) {
		return (B_FALSE);
	}

	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_1CH:
	case DF_CHAN_ILEAVE_2CH:
	case DF_CHAN_ILEAVE_4CH:
	case DF_CHAN_ILEAVE_8CH:
	case DF_CHAN_ILEAVE_16CH:
	case DF_CHAN_ILEAVE_32CH:
		if (!zen_umc_decode_ileave_nohash(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_COD4_2CH:
	case DF_CHAN_ILEAVE_COD2_4CH:
	case DF_CHAN_ILEAVE_COD1_8CH:
		if (!zen_umc_decode_ileave_cod(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_NPS4_2CH:
	case DF_CHAN_ILEAVE_NPS2_4CH:
	case DF_CHAN_ILEAVE_NPS1_8CH:
		if (!zen_umc_decode_ileave_nps(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_6CH:
		if (!zen_umc_decode_ileave_zen3_6ch(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_NPS4_3CH:
	case DF_CHAN_ILEAVE_NPS2_6CH:
	case DF_CHAN_ILEAVE_NPS1_12CH:
	case DF_CHAN_ILEAVE_NPS2_5CH:
	case DF_CHAN_ILEAVE_NPS1_10CH:
		if (!zen_umc_decode_ileave_nps_mod(umc, dec)) {
			return (B_FALSE);
		}
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	/*
	 * At this point we have dealt with decoding the interleave into the
	 * logical elements that it contains. We need to transform that back
	 * into a fabric ID, so we can add it to the base fabric ID in our rule.
	 * After that, we need to see if there is any CS remapping going on. If
	 * there is, we will replace the component part of the decomposed fabric
	 * ID. With that done, we can then transform the components back into
	 * our target fabric ID, which indicates which UMC we're after.
	 */
	zen_fabric_id_compose(&umc->umc_decomp, dec->dec_ilv_sock,
	    dec->dec_ilv_die, dec->dec_ilv_chan, &dec->dec_ilv_fabid);
	dec->dec_log_fabid = dec->dec_ilv_fabid + rule->ddr_dest_fabid;

	/*
	 * If there's no remapping to do, then we're done. Simply assign the
	 * logical ID as our target.
	 */
	zen_fabric_id_decompose(&umc->umc_decomp, dec->dec_log_fabid, &sock,
	    &die, &chan);
	if ((rule->ddr_flags & DF_DRAM_F_REMAP_EN) == 0) {
		dec->dec_targ_fabid = dec->dec_log_fabid;
		return (B_TRUE);
	}

	/*
	 * The DF contains multiple remapping tables. We must figure out which
	 * of these to actually use. There are two different ways that this can
	 * work. The first way is the one added in DFv4 and is used since then.
	 * In that case, the DRAM rule includes both that remapping was enabled
	 * and which of the multiple mapping tables to use.
	 *
	 * This feature also exists prior to DFv4, but only in Milan. In that
	 * world, indicated by the DF_DRAM_F_REMAP_SOCK flag, there is one table
	 * in each DF per-socket. Based on the destination socket from the data
	 * fabric ID, you pick the actual table to use.
	 *
	 * Once the table has been selected, we maintain the socket and die
	 * portions of the fabric ID as constants and replace the component with
	 * the one the remapping table indicates.
	 *
	 * Technically each DF has its own copy of the remapping tables. To make
	 * this work we rely on the following assumption: a given DF node has to
	 * be able to fully route all DRAM rules to a target. That is, a given
	 * DF node doesn't really forward a system address to the remote die for
	 * further interleave processing and therefore we must have enough
	 * information here to map it totally from the same DF that we got the
	 * CCM rules from in the first place, DF 0.
	 */
	if ((rule->ddr_flags & DF_DRAM_F_REMAP_SOCK) != 0) {
		remap_ruleset = sock;
	} else {
		remap_ruleset = rule->ddr_remap_ent;
	}

	if (remap_ruleset >= dec->dec_df_rulesrc->zud_cs_nremap) {
		dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_SET;
		dec->dec_fail_data = remap_ruleset;
		return (B_FALSE);
	}

	remap = &dec->dec_df_rulesrc->zud_remap[remap_ruleset];
	if (chan >= remap->csr_nremaps) {
		dec->dec_fail = ZEN_UMC_DECODE_F_BAD_REMAP_ENTRY;
		dec->dec_fail_data = chan;
		return (B_FALSE);
	}

	dec->dec_remap_comp = remap->csr_remaps[chan];
	if ((dec->dec_remap_comp & ~umc->umc_decomp.dfd_comp_mask) != 0) {
		dec->dec_fail = ZEN_UMC_DECODE_F_REMAP_HAS_BAD_COMP;
		dec->dec_fail_data = dec->dec_remap_comp;
		return (B_FALSE);
	}

	zen_fabric_id_compose(&umc->umc_decomp, sock, die, dec->dec_remap_comp,
	    &dec->dec_targ_fabid);

	return (B_TRUE);
}

/*
 * Our next step here is to actually take our target ID and find the
 * corresponding DF, UMC, and actual rule that was used. Note, we don't
 * decompose the ID and look things up that way for a few reasons. While each
 * UMC should map linearly to its instance/component ID, there are suggestions
 * that they can be renumbered. This makes it simplest to just walk over
 * everything (and there aren't that many things to walk over either).
 */
static boolean_t
zen_umc_decode_find_umc_rule(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	for (uint_t dfno = 0; dfno < umc->umc_ndfs; dfno++) {
		const zen_umc_df_t *df = &umc->umc_dfs[dfno];
		for (uint_t umcno = 0; umcno < df->zud_nchan; umcno++) {
			const zen_umc_chan_t *chan = &df->zud_chan[umcno];

			if (chan->chan_fabid != dec->dec_targ_fabid) {
				continue;
			}

			/*
			 * At this point we have found the UMC that we were
			 * looking for. Snapshot that and then figure out which
			 * rule index of it corresponds to our mapping so we can
			 * properly determine an offset. We will still use the
			 * primary CCM rule for all other calculations.
			 */
			dec->dec_umc_chan = chan;
			for (uint32_t ruleno = 0; ruleno < chan->chan_nrules;
			    ruleno++) {
				const df_dram_rule_t *rule =
				    &chan->chan_rules[ruleno];
				if ((rule->ddr_flags & DF_DRAM_F_VALID) == 0) {
					continue;
				}

				if (dec->dec_pa >= rule->ddr_base &&
				    dec->dec_pa < rule->ddr_limit) {
					dec->dec_umc_ruleno = ruleno;
					return (B_TRUE);
				}
			}

			dec->dec_fail = ZEN_UMC_DECODE_F_UMC_DOESNT_HAVE_PA;
			return (B_FALSE);
		}
	}

	dec->dec_fail = ZEN_UMC_DECODE_F_CANNOT_MAP_FABID;
	return (B_FALSE);
}

/*
 * Non-hashing interleave modes system address normalization logic. See the
 * zen_umc.c big theory statement for more information.
 */
static boolean_t
zen_umc_decode_normalize_nohash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint_t nbits = 0;
	const df_dram_rule_t *rule = dec->dec_df_rule;

	nbits += rule->ddr_sock_ileave_bits;
	nbits += rule->ddr_die_ileave_bits;
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_1CH:
		break;
	case DF_CHAN_ILEAVE_2CH:
		nbits += 1;
		break;
	case DF_CHAN_ILEAVE_4CH:
		nbits += 2;
		break;
	case DF_CHAN_ILEAVE_8CH:
		nbits += 3;
		break;
	case DF_CHAN_ILEAVE_16CH:
		nbits += 4;
		break;
	case DF_CHAN_ILEAVE_32CH:
		nbits += 5;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	/*
	 * If we have a really simple configuration (e.g. no interleaving at
	 * all), then make sure that we do not actually do anything here.
	 */
	if (nbits > 0) {
		dec->dec_norm_addr = bitdel64(dec->dec_norm_addr,
		    rule->ddr_addr_start + nbits - 1, rule->ddr_addr_start);
	}

	return (B_TRUE);
}

/*
 * COD/NPS system address normalization logic. See the zen_umc.c big theory
 * statement for more information.
 */
static boolean_t
zen_umc_decode_normalize_hash(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint_t nbits = 0;
	const df_dram_rule_t *rule = dec->dec_df_rule;

	/*
	 * NPS hashes allow for socket interleaving, COD hashes do not. Add
	 * socket interleaving, skip die.
	 */
	nbits += rule->ddr_sock_ileave_bits;
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_COD4_2CH:
	case DF_CHAN_ILEAVE_NPS4_2CH:
		nbits += 1;
		break;
	case DF_CHAN_ILEAVE_COD2_4CH:
	case DF_CHAN_ILEAVE_NPS2_4CH:
		nbits += 2;
		break;
	case DF_CHAN_ILEAVE_COD1_8CH:
	case DF_CHAN_ILEAVE_NPS1_8CH:
		nbits += 3;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
	}

	/*
	 * Always remove high order bits before low order bits so we don't have
	 * to adjust the bits we need to remove.
	 */
	if (nbits > 1) {
		uint_t start = 12;
		uint_t end = start + (nbits - 2);
		dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start);
	}

	dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, rule->ddr_addr_start,
	    rule->ddr_addr_start);
	return (B_TRUE);
}

/*
 * Now it's time to perform normalization of our favorite interleaving type.
 * Please see the comments in zen_umc.c on this to understand what we're doing
 * here and why.
 */
static boolean_t
zen_umc_decode_normalize_zen3_6ch(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t hashes[3] = { 0 };
	uint_t start, end;
	const df_dram_rule_t *rule = dec->dec_df_rule;

	/*
	 * As per the theory statement, we always remove the hash bits here from
	 * the starting address. Because this is a 6-channel config, that turns
	 * into 3. Perform the hash again first.
	 */
	zen_umc_decode_hash_zen3_6ch(rule, dec->dec_norm_addr, hashes);
	start = rule->ddr_addr_start;
	end = rule->ddr_addr_start + ZEN_UMC_COD_NBITS - 1;
	dec->dec_norm_addr = bitdel64(dec->dec_norm_addr, end, start);

	/*
	 * This is the case the theory statement warned about. This gets
	 * normalized to the top of the DIMM's range (its two upper most bits
	 * are set).
	 */
	if (hashes[1] == 1 && hashes[2] == 1) {
		uint_t start = 14 - ZEN_UMC_COD_NBITS +
		    dec->dec_umc_chan->chan_np2_space0;
		dec->dec_norm_addr = bitset64(dec->dec_norm_addr, start + 1,
		    start, 0x3);
	}

	return (B_TRUE);
}

/*
 * Based on the algorithm of sorts described in zen_umc.c, we have a few
 * different phases of extraction and combination. This isn't quite like the
 * others where we simply delete bits.
 */
static boolean_t
zen_umc_decode_normalize_nps_mod(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint64_t low, high, mid;
	uint_t nbits, chan_mod, sock_bits, nmid_bits;
	uint_t mid_start, mid_end;
	uint8_t hashes[3] = { 0 };
	const df_dram_rule_t *rule = dec->dec_df_rule;

	sock_bits = rule->ddr_sock_ileave_bits;
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_NPS4_3CH:
		chan_mod = 3;
		nbits = 1;
		break;
	case DF_CHAN_ILEAVE_NPS2_5CH:
		chan_mod = 5;
		nbits = 1;
		break;
	case DF_CHAN_ILEAVE_NPS2_6CH:
		chan_mod = 3;
		nbits = 2;
		break;
	case DF_CHAN_ILEAVE_NPS1_10CH:
		chan_mod = 5;
		nbits = 2;
		break;
	case DF_CHAN_ILEAVE_NPS1_12CH:
		chan_mod = 3;
		nbits = 3;
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	/*
	 * First extract the low bit range that we're using which is everything
	 * below the starting interleave address. We also always extract the
	 * high bits, which are always [63:14] and divide it by the modulus.
	 * Note, we apply the hash after any such division if needed. It becomes
	 * the new least significant bit.
	 */
	low = bitx64(dec->dec_norm_addr, rule->ddr_addr_start - 1, 0);
	high = bitx64(dec->dec_norm_addr, 63, 14) / chan_mod;
	zen_umc_decode_hash_nps_mod(rule, dec->dec_norm_addr, hashes);
	if (sock_bits == 0) {
		high = (high << 1) | hashes[0];
	}

	/*
	 * Now for the weirdest bit here, extracting the middle bits. Recall
	 * this hash uses bit 8, then 13, then 12 (the hash order is still 8,
	 * 12, 13, but it uses the hashes[2] before hashes[1] in
	 * zen_umc_decode_ileave_nps_mod()). So if we're only using 1 interleave
	 * bit, we just remove bit 8 (assuming that is our starting address) and
	 * our range is [13:9]. If we're using two, our range becomes [12:9],
	 * and if three, [11:9]. The 6 - nbits below comes from the fact that in
	 * a 1 bit interleave we have 5 bits. Because our mid_start/mid_end
	 * range is inclusive, we subtract one at the end from mid_end.
	 */
	nmid_bits = 6 - nbits;
	mid_start = rule->ddr_addr_start + 1;
	mid_end = mid_start + nmid_bits - 1;
	mid = bitx64(dec->dec_norm_addr, mid_end, mid_start);

	/*
	 * Because we've been removing bits, we don't use any of the start and
	 * ending ranges we calculated above for shifts, as that was what we
	 * needed from the original address.
	 */
	dec->dec_norm_addr = low | (mid << rule->ddr_addr_start) | (high <<
	    (rule->ddr_addr_start + nmid_bits));

	return (B_TRUE);
}

/*
 * Now we need to go through and try to construct a normalized address using all
 * the information that we've gathered to date. To do this we need to take into
 * account all of the following transformations on the address that need to
 * occur. We apply modifications to the address in the following order:
 *
 *   o The base address of the rule
 *   o DRAM hole changes
 *   o Normalization of the address due to interleaving (more fun)
 *   o The DRAM offset register of the rule
 */
static boolean_t
zen_umc_decode_sysaddr_to_norm(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	const zen_umc_chan_t *chan = dec->dec_umc_chan;
	const df_dram_rule_t *rule = dec->dec_df_rule;

	dec->dec_norm_addr = dec->dec_pa;
	if (!zen_umc_adjust_dram_addr(umc, dec, &dec->dec_norm_addr,
	    ZEN_UMC_DECODE_F_CALC_NORM_UNDERFLOW)) {
		return (B_FALSE);
	}

	/*
	 * Now for the most annoying part of this whole thing, normalizing based
	 * on our actual interleave format. The reason for this is that when
	 * interleaving is going on, it actually is removing bits that are just
	 * being used to direct it somewhere; however, it's actually generally
	 * speaking the same value in each location. See the big theory
	 * statement in zen_umc.c for more information.
	 */
	switch (rule->ddr_chan_ileave) {
	case DF_CHAN_ILEAVE_1CH:
	case DF_CHAN_ILEAVE_2CH:
	case DF_CHAN_ILEAVE_4CH:
	case DF_CHAN_ILEAVE_8CH:
	case DF_CHAN_ILEAVE_16CH:
	case DF_CHAN_ILEAVE_32CH:
		if (!zen_umc_decode_normalize_nohash(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_COD4_2CH:
	case DF_CHAN_ILEAVE_COD2_4CH:
	case DF_CHAN_ILEAVE_COD1_8CH:
	case DF_CHAN_ILEAVE_NPS4_2CH:
	case DF_CHAN_ILEAVE_NPS2_4CH:
	case DF_CHAN_ILEAVE_NPS1_8CH:
		if (!zen_umc_decode_normalize_hash(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_6CH:
		if (!zen_umc_decode_normalize_zen3_6ch(umc, dec)) {
			return (B_FALSE);
		}
		break;
	case DF_CHAN_ILEAVE_NPS4_3CH:
	case DF_CHAN_ILEAVE_NPS2_6CH:
	case DF_CHAN_ILEAVE_NPS1_12CH:
	case DF_CHAN_ILEAVE_NPS2_5CH:
	case DF_CHAN_ILEAVE_NPS1_10CH:
		if (!zen_umc_decode_normalize_nps_mod(umc, dec)) {
			return (B_FALSE);
		}
		break;
	default:
		dec->dec_fail = ZEN_UMC_DECODE_F_CHAN_ILEAVE_NOTSUP;
		dec->dec_fail_data = rule->ddr_chan_ileave;
		return (B_FALSE);
	}

	/*
	 * Determine if this rule has an offset to apply. Note, there is never
	 * an offset for rule 0, hence the index into this is one less than the
	 * actual rule number. Unlike other transformations these offsets
	 * describe the start of a normalized range. Therefore we need to
	 * actually add this value instead of subtract.
	 */
	if (dec->dec_umc_ruleno > 0) {
		uint32_t offno = dec->dec_umc_ruleno - 1;
		const chan_offset_t *offset = &chan->chan_offsets[offno];

		if (offset->cho_valid) {
			dec->dec_norm_addr += offset->cho_offset;
		}
	}

	return (B_TRUE);
}

/*
 * This applies the formula that determines a chip-select actually matches which
 * is defined as (address & ~mask) == (base & ~mask) in the PPR. There is both a
 * primary and secondary mask here. We need to pay attention to which is used
 * (if any) for later on.
 */
static boolean_t
zen_umc_decoder_cs_matches(const umc_cs_t *cs, const uint64_t norm,
    boolean_t *matched_sec)
{
	if (cs->ucs_base.udb_valid != 0) {
		uint64_t imask = ~cs->ucs_base_mask;
		if ((norm & imask) == (cs->ucs_base.udb_base & imask)) {
			*matched_sec = B_FALSE;
			return (B_TRUE);
		}
	}

	if (cs->ucs_sec.udb_valid != 0) {
		uint64_t imask = ~cs->ucs_sec_mask;
		if ((norm & imask) == (cs->ucs_sec.udb_base & imask)) {
			*matched_sec = B_TRUE;
			return (B_TRUE);
		}
	}

	return (B_FALSE);
}

/*
 * Go through with our normalized address and map it to a given chip-select.
 * This as a side effect indicates which DIMM we're going out on as well. Note,
 * the final DIMM can change due to chip-select hashing; however, we use this
 * DIMM for determining all of the actual address translations.
 */
static boolean_t
zen_umc_decode_find_cs(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	const zen_umc_chan_t *chan = dec->dec_umc_chan;

	for (uint_t dimmno = 0; dimmno < ZEN_UMC_MAX_DIMMS; dimmno++) {
		const umc_dimm_t *dimm = &chan->chan_dimms[dimmno];

		if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0)
			continue;

		for (uint_t csno = 0; csno < ZEN_UMC_MAX_CS_PER_DIMM; csno++) {
			const umc_cs_t *cs = &dimm->ud_cs[csno];
			boolean_t is_sec = B_FALSE;

			if (zen_umc_decoder_cs_matches(cs, dec->dec_norm_addr,
			    &is_sec)) {
				dec->dec_dimm = dimm;
				dec->dec_cs = cs;
				dec->dec_log_csno = dimmno * ZEN_UMC_MAX_DIMMS +
				    csno;
				dec->dec_cs_sec = is_sec;
				return (B_TRUE);
			}
		}
	}

	dec->dec_fail = ZEN_UMC_DECODE_F_NO_CS_BASE_MATCH;
	return (B_FALSE);
}

/*
 * Extract the column from the address. For once, something that is almost
 * straightforward.
 */
static boolean_t
zen_umc_decode_cols(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint32_t cols = 0;
	const umc_cs_t *cs = dec->dec_cs;

	for (uint_t i = 0; i < cs->ucs_ncol; i++) {
		uint32_t index;

		index = cs->ucs_col_bits[i];
		cols |= bitx64(dec->dec_norm_addr, index, index) << i;
	}

	dec->dec_dimm_col = cols;
	return (B_TRUE);
}

/*
 * The row is split into two different regions. There's a low and high value,
 * though the high value is only present in DDR4. Unlike the column, where each
 * bit is spelled out, each set of row bits are contiguous (low and high are
 * independent).
 */
static boolean_t
zen_umc_decode_rows(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint32_t row = 0;
	uint8_t inv;
	const umc_cs_t *cs = dec->dec_cs;
	const uint_t total_bits = cs->ucs_nrow_lo + cs->ucs_nrow_hi;
	const uint_t lo_end = cs->ucs_nrow_lo + cs->ucs_row_low_bit - 1;

	row = bitx64(dec->dec_norm_addr, lo_end, cs->ucs_row_low_bit);
	if (cs->ucs_nrow_hi > 0) {
		const uint_t hi_end = cs->ucs_nrow_hi + cs->ucs_row_hi_bit - 1;
		const uint32_t hi = bitx64(dec->dec_norm_addr, hi_end,
		    cs->ucs_row_hi_bit);

		row |= hi << cs->ucs_nrow_lo;
	}

	if (dec->dec_cs_sec) {
		inv = cs->ucs_inv_msbs_sec;
	} else {
		inv = cs->ucs_inv_msbs;
	}

	/*
	 * We need to potentially invert the top two bits of the row address
	 * based on the low two bits of the inverted register below. Note, inv
	 * only has two valid bits below. So we shift them into place to perform
	 * the XOR. See the big theory statement in zen_umc.c for more on why
	 * this works.
	 */
	inv = inv << (total_bits - 2);
	row = row ^ inv;

	dec->dec_dimm_row = row;
	return (B_TRUE);
}

/*
 * Several of the hash schemes ask us to go through and xor all the bits that
 * are in an address to transform it into a single bit. This implements that for
 * a uint32_t. This is basically a bitwise XOR reduce.
 */
static uint8_t
zen_umc_running_xor32(const uint32_t in)
{
	uint8_t run = 0;

	for (uint_t i = 0; i < sizeof (in) * NBBY; i++) {
		run ^= bitx32(in, i, i);
	}

	return (run);
}

static uint8_t
zen_umc_running_xor64(const uint64_t in)
{
	uint8_t run = 0;

	for (uint_t i = 0; i < sizeof (in) * NBBY; i++) {
		run ^= bitx64(in, i, i);
	}

	return (run);
}

/*
 * Our goal here is to extract the number of banks and bank groups that are
 * used, if any.
 */
static boolean_t
zen_umc_decode_banks(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t bank = 0;
	const umc_cs_t *cs = dec->dec_cs;
	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;

	/*
	 * Get an initial bank address bit and then perform any hashing if
	 * bank hashing is enabled. Note, the memory controller's nbanks is the
	 * total number of bank and bank group bits, hence why it's used for
	 * the loop counter.
	 */
	for (uint_t i = 0; i < cs->ucs_nbanks; i++) {
		uint32_t row_hash, col_hash;
		uint8_t row_xor, col_xor;
		uint_t targ = cs->ucs_bank_bits[i];
		uint8_t val = bitx64(dec->dec_norm_addr, targ, targ);
		const umc_bank_hash_t *bank_hash = &hash->uch_bank_hashes[i];

		if ((hash->uch_flags & UMC_CHAN_HASH_F_BANK) == 0 ||
		    !hash->uch_bank_hashes[i].ubh_en) {
			bank |= val << i;
			continue;
		}

		/*
		 * See the big theory statement for more on this. Short form,
		 * bit-wise AND the row and column, then XOR shenanigans.
		 */
		row_hash = dec->dec_dimm_row & bank_hash->ubh_row_xor;
		col_hash = dec->dec_dimm_col & bank_hash->ubh_col_xor;
		row_xor = zen_umc_running_xor32(row_hash);
		col_xor = zen_umc_running_xor32(col_hash);
		bank |= (row_xor ^ col_xor ^ val) << i;
	}

	/*
	 * The bank and bank group are conjoined in the register and bit
	 * definitions. Once we've calculated that, extract it.
	 */
	dec->dec_dimm_bank_group = bitx8(bank, cs->ucs_nbank_groups - 1, 0);
	dec->dec_dimm_bank = bitx8(bank, cs->ucs_nbanks, cs->ucs_nbank_groups);
	return (B_TRUE);
}

/*
 * Extract the sub-channel. If not a DDR5 based device, simply set it to zero
 * and return. We can't forget to hash this if required.
 */
static boolean_t
zen_umc_decode_subchan(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t subchan;
	uint32_t row_hash, col_hash, bank_hash;
	uint8_t row_xor, col_xor, bank_xor;
	const umc_cs_t *cs = dec->dec_cs;
	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;

	switch (dec->dec_dimm->ud_type) {
	case UMC_DIMM_T_DDR5:
	case UMC_DIMM_T_LPDDR5:
		break;
	default:
		dec->dec_dimm_subchan = 0;
		return (B_TRUE);
	}

	subchan = bitx64(dec->dec_norm_addr, cs->ucs_subchan, cs->ucs_subchan);
	if ((hash->uch_flags & UMC_CHAN_HASH_F_PC) == 0 ||
	    !hash->uch_pc_hash.uph_en) {
		dec->dec_dimm_subchan = subchan;
		return (B_TRUE);
	}

	row_hash = dec->dec_dimm_row & hash->uch_pc_hash.uph_row_xor;
	col_hash = dec->dec_dimm_col & hash->uch_pc_hash.uph_col_xor;
	bank_hash = dec->dec_dimm_bank & hash->uch_pc_hash.uph_bank_xor;
	row_xor = zen_umc_running_xor32(row_hash);
	col_xor = zen_umc_running_xor32(col_hash);
	bank_xor = zen_umc_running_xor32(bank_hash);

	dec->dec_dimm_subchan = subchan ^ row_xor ^ col_xor ^ bank_xor;
	return (B_TRUE);
}

/*
 * Note that we have normalized the RM bits between the primary and secondary
 * base/mask registers so that way even though the DDR5 controller always uses
 * the same RM selection bits, it works in a uniform way for both DDR4 and DDR5.
 */
static boolean_t
zen_umc_decode_rank_mul(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t rm = 0;
	const umc_cs_t *cs = dec->dec_cs;
	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;

	for (uint_t i = 0; i < cs->ucs_nrm; i++) {
		uint8_t index = cs->ucs_rm_bits[i];
		uint8_t bit = bitx64(dec->dec_norm_addr, index, index);

		if ((hash->uch_flags & UMC_CHAN_HASH_F_RM) != 0 &&
		    hash->uch_rm_hashes[i].uah_en) {
			uint64_t norm_mask = dec->dec_norm_addr &
			    hash->uch_rm_hashes[i].uah_addr_xor;
			uint8_t norm_hash = zen_umc_running_xor64(norm_mask);
			bit = bit ^ norm_hash;
		}

		rm |= bit << i;
	}

	dec->dec_dimm_rm = rm;
	return (B_TRUE);
}

/*
 * Go through and determine the actual chip-select activated. This is subject to
 * hashing. Note, we first constructed a logical chip-select value based on
 * which of the four base/mask registers in the UMC we activated for the
 * channel. That basically seeded the two bit value we start with.
 */
static boolean_t
zen_umc_decode_chipsel(const zen_umc_t *umc, zen_umc_decoder_t *dec)
{
	uint8_t csno = 0;
	const umc_cs_t *cs = dec->dec_cs;
	const umc_chan_hash_t *hash = &dec->dec_umc_chan->chan_hash;

	for (uint_t i = 0; i < ZEN_UMC_MAX_CS_BITS; i++) {
		uint8_t bit = bitx8(dec->dec_log_csno, i, i);
		if ((hash->uch_flags & UMC_CHAN_HASH_F_CS) != 0 &&
		    hash->uch_cs_hashes[i].uah_en) {
			uint64_t mask = dec->dec_norm_addr &
			    hash->uch_cs_hashes[i].uah_addr_xor;
			uint8_t rxor = zen_umc_running_xor64(mask);
			bit = bit ^ rxor;
		}
		csno |= bit << i;
	}

	/*
	 * It is not entirely clear what the circumstances are that we need to
	 * apply the chip-select xor. Right now we always apply it. This only
	 * exists on a few DDR5 SoCs, it seems, and we zero out other cases to
	 * try and have a uniform and reasonable path. This tells us what the
	 * absolute chip-select is in the channel. We record this for debugging
	 * purposes and to derive the DIMM and CS.
	 */
	dec->dec_chan_csno = (csno ^ cs->ucs_cs_xor) & 0x3;

	/*
	 * Now that we actually know which chip-select we're targeting, go back
	 * and actual indicate which DIMM we'll go out to and what chip-select
	 * it is relative to the DIMM. This may have changed out due to CS
	 * hashing. As such we have to now snapshot our final DIMM and
	 * chip-select.
	 */
	dec->dec_dimm_no = dec->dec_chan_csno >> 1;
	dec->dec_dimm_csno = dec->dec_chan_csno % 2;
	return (B_TRUE);
}

/*
 * Initialize the decoder state. We do this by first zeroing it all and then
 * setting various result addresses to the UINTXX_MAX that is appropriate. These
 * work as better sentinel values than zero; however, we always zero the
 * structure to be defensive, cover pointers, etc.
 */
static void
zen_umc_decoder_init(zen_umc_decoder_t *dec)
{
	bzero(dec, sizeof (*dec));

	dec->dec_pa = dec->dec_ilv_pa = UINT64_MAX;
	dec->dec_df_ruleno = UINT32_MAX;
	dec->dec_ilv_sock = dec->dec_ilv_die = dec->dec_ilv_chan =
	    dec->dec_ilv_fabid = dec->dec_log_fabid = dec->dec_remap_comp =
	    dec->dec_targ_fabid = UINT32_MAX;
	dec->dec_umc_ruleno = UINT32_MAX;
	dec->dec_norm_addr = UINT64_MAX;
	dec->dec_dimm_col = dec->dec_dimm_row = UINT32_MAX;
	dec->dec_log_csno = dec->dec_dimm_bank = dec->dec_dimm_bank_group =
	    dec->dec_dimm_subchan = dec->dec_dimm_rm = dec->dec_chan_csno =
	    dec->dec_dimm_no = dec->dec_dimm_csno = UINT8_MAX;
}

boolean_t
zen_umc_decode_pa(const zen_umc_t *umc, const uint64_t pa,
    zen_umc_decoder_t *dec)
{
	zen_umc_decoder_init(dec);
	dec->dec_pa = pa;

	/*
	 * Before we proceed through decoding, the first thing we should try to
	 * do is verify that this is even something that could be DRAM.
	 */
	if (!zen_umc_decode_is_dram(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * The very first thing that we need to do is find a data fabric rule
	 * that corresponds to this memory address. This will be used to
	 * determine which set of rules for interleave and related we actually
	 * should then use.
	 */
	if (!zen_umc_decode_find_df_rule(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * Now that we have a DF rule, we must take a more involved step of
	 * mapping to a given CS, e.g. a specific UMC channel. This will tell us
	 * the socket and die as well. This takes care of all the interleaving
	 * and remapping and produces a target fabric ID.
	 */
	if (!zen_umc_decode_sysaddr_to_csid(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * With that target ID known, now actually map this to a corresponding
	 * UMC.
	 */
	if (!zen_umc_decode_find_umc_rule(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * With the target and corresponding rules and offset information,
	 * actually perform normalization.
	 */
	if (!zen_umc_decode_sysaddr_to_norm(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * Finally, we somehow managed to actually construct a normalized
	 * address. Now we must begin the act of transforming this channel
	 * address into something that makes sense to address a DIMM. To start
	 * with determine which logical chip-select, which determines where we
	 * source all our data to use.
	 */
	if (!zen_umc_decode_find_cs(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * Now that we have the logical chip-select matched that we're sourcing
	 * our data from, the next this is a bit more involved: we need to
	 * extract the row, column, rank/rank multiplication, bank, and bank
	 * group out of all this, while taking into account all of our hashes.
	 *
	 * To do this, we begin by first calculating the row and column as those
	 * will be needed to determine some of our other values here.
	 */
	if (!zen_umc_decode_rows(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	if (!zen_umc_decode_cols(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * Now that we have the rows and columns we can go through and determine
	 * the bank and bank group. This depends on the above.
	 */
	if (!zen_umc_decode_banks(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * If we have a DDR5 generation DIMM then we need to consider the
	 * subchannel. This doesn't exist in DDR4 systems (the function handles
	 * this reality). Because of potential hashing, this needs to come after
	 * the row, column, and bank have all been determined.
	 */
	if (!zen_umc_decode_subchan(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * Time for the last two pieces here: the actual chip select used and
	 * then figuring out which rank, taking into account rank
	 * multiplication. Don't worry, these both have hashing opportunities.
	 */
	if (!zen_umc_decode_rank_mul(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	if (!zen_umc_decode_chipsel(umc, dec)) {
		ASSERT3U(dec->dec_fail, !=, ZEN_UMC_DECODE_F_NONE);
		return (B_FALSE);
	}

	/*
	 * Somehow, that's it.
	 */
	return (B_TRUE);
}