summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjjc <none@none>2008-05-20 15:16:28 -0700
committerjjc <none@none>2008-05-20 15:16:28 -0700
commitdae2fa3732af276632393e33b86093e7c97f905e (patch)
tree174baf1b956f0cbdc7071b477a86e71a6f7f8732
parentc7158ae983f5a04c4a998f468ecefba6d23ba721 (diff)
downloadillumos-joyent-dae2fa3732af276632393e33b86093e7c97f905e.tar.gz
6699040 Solaris kernel uses wrong CPU APIC ID for looking up CPU in ACPI SRAT
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c13
-rw-r--r--usr/src/uts/i86pc/os/fakebop.c40
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c213
-rw-r--r--usr/src/uts/intel/sys/bootconf.h5
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h1
5 files changed, 171 insertions, 101 deletions
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index e8f83b5ee9..89ac3342f2 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -2623,19 +2623,6 @@ cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
return (dtlb_nent);
}
-uint_t
-cpuid_get_apicid(cpu_t *cpu)
-{
- struct cpuid_info *cpi;
-
- if (cpu == NULL)
- cpu = CPU;
- cpi = cpu->cpu_m.mcpu_cpi;
-
- ASSERT(cpuid_checkpass(cpu, 1));
- return (CPI_APIC_ID(cpi));
-}
-
/*
* Return 0 if the erratum is not present or not applicable, positive
* if it is, and negative if the status of the erratum is unknown.
diff --git a/usr/src/uts/i86pc/os/fakebop.c b/usr/src/uts/i86pc/os/fakebop.c
index 3ba83270a0..5345ab640a 100644
--- a/usr/src/uts/i86pc/os/fakebop.c
+++ b/usr/src/uts/i86pc/os/fakebop.c
@@ -1871,27 +1871,49 @@ process_madt(struct madt *tp)
{
struct madt_processor *cpu, *end;
uint32_t cpu_count = 0;
-
- /*
- * User-set boot-ncpus overrides firmware count
- */
- if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
- return;
+ uint8_t cpu_apicid_array[UINT8_MAX + 1];
if (tp != NULL) {
+ /*
+ * Determine number of CPUs and keep track of "final" APIC ID
+ * for each CPU by walking through ACPI MADT processor list
+ */
end = (struct madt_processor *)(tp->hdr.len + (uintptr_t)tp);
cpu = tp->list;
while (cpu < end) {
- if (cpu->type == MADT_PROCESSOR)
- if (cpu->flags & 1)
+ if (cpu->type == MADT_PROCESSOR) {
+ if (cpu->flags & 1) {
+ if (cpu_count < UINT8_MAX)
+ cpu_apicid_array[cpu_count] =
+ cpu->apic_id;
cpu_count++;
+ }
+ }
cpu = (struct madt_processor *)
(cpu->len + (uintptr_t)cpu);
}
- bsetpropsi("boot-ncpus", cpu_count);
+
+ /*
+ * Make boot property for array of "final" APIC IDs for each
+ * CPU
+ */
+ bsetprop(BP_CPU_APICID_ARRAY, strlen(BP_CPU_APICID_ARRAY),
+ cpu_apicid_array, cpu_count * sizeof (uint8_t));
}
+ /*
+ * User-set boot-ncpus overrides firmware count
+ */
+ if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+ return;
+
+ /*
+ * Set boot property for boot-ncpus to number of CPUs given in MADT
+ * if user hasn't set the property already
+ */
+ if (tp != NULL)
+ bsetpropsi("boot-ncpus", cpu_count);
}
static void
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index ee5ca7a92b..e244e6088a 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -85,9 +85,8 @@
* ---------------
* The main data structures used by this code are the following:
*
- * - lgrp_plat_cpu_node[] APIC ID to node ID mapping table
- * indexed by hashed APIC ID (only used
- * for SRAT)
+ * - lgrp_plat_cpu_node[] CPU to node ID mapping table indexed by
+ * CPU ID (only used for SRAT)
*
* - lgrp_plat_lat_stats.latencies[][] Table of latencies between same and
* different nodes indexed by node ID
@@ -120,6 +119,7 @@
#include <sys/archsystm.h> /* for {in,out}{b,w,l}() */
+#include <sys/bootconf.h>
#include <sys/cmn_err.h>
#include <sys/controlregs.h>
#include <sys/cpupart.h>
@@ -165,12 +165,6 @@
#define LGRP_PLAT_PROBE_VENDOR 0x4 /* probe vendor ID register */
/*
- * Hash CPU APIC ID into CPU to node mapping table using max_ncpus
- * to minimize span of entries used
- */
-#define CPU_NODE_HASH(apicid) ((apicid) % max_ncpus)
-
-/*
* Hash proximity domain ID into node to domain mapping table using to minimize
* span of entries used
*/
@@ -178,7 +172,7 @@
/*
- * CPU APIC ID to node ID mapping structure (only used with SRAT)
+ * CPU to node ID mapping structure (only used with SRAT)
*/
typedef struct cpu_node_map {
int exists;
@@ -237,9 +231,13 @@ typedef struct node_phys_addr_map {
uint32_t prox_domain;
} node_phys_addr_map_t;
+/*
+ * Error code from processing CPU to APIC ID array boot property
+ */
+static int lgrp_plat_cpu_apicid_error = 0;
/*
- * CPU APIC ID to node ID mapping table (only used for SRAT)
+ * CPU to node ID mapping table (only used for SRAT)
*/
static cpu_node_map_t lgrp_plat_cpu_node[NCPU];
@@ -368,6 +366,9 @@ lgrp_handle_t lgrp_plat_root_hand(void);
*/
static int is_opteron(void);
+static int lgrp_plat_cpu_node_update(node_domain_map_t *node_domain,
+ cpu_node_map_t *cpu_node, int nentries, uint32_t apicid, uint32_t domain);
+
static int lgrp_plat_cpu_to_node(cpu_t *cp, cpu_node_map_t *cpu_node);
static int lgrp_plat_domain_to_node(node_domain_map_t *node_domain,
@@ -394,11 +395,14 @@ static hrtime_t lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
lgrp_plat_latency_stats_t *lat_stats,
lgrp_plat_probe_stats_t *probe_stats);
+static int lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node,
+ int boot_ncpus);
+
static int lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats);
-static int lgrp_plat_process_srat(struct srat *tp, uint_t *node_cnt,
- node_domain_map_t *node_domain, cpu_node_map_t *cpu_node,
+static int lgrp_plat_process_srat(struct srat *tp, int cpu_count,
+ uint_t *node_cnt, node_domain_map_t *node_domain, cpu_node_map_t *cpu_node,
node_phys_addr_map_t *node_memory);
static int lgrp_plat_srat_domains(struct srat *tp);
@@ -690,18 +694,28 @@ lgrp_plat_init(void)
}
/*
+ * Read boot property with CPU to APIC ID mapping table/array and fill
+ * in CPU to node ID mapping table with APIC ID for each CPU
+ */
+ lgrp_plat_cpu_apicid_error =
+ lgrp_plat_process_cpu_apicids(lgrp_plat_cpu_node, boot_max_ncpus);
+
+ /*
* Determine which CPUs and memory are local to each other and number
* of NUMA nodes by reading ACPI System Resource Affinity Table (SRAT)
*/
- lgrp_plat_srat_error = lgrp_plat_process_srat(srat_ptr,
- &lgrp_plat_node_cnt, lgrp_plat_node_domain, lgrp_plat_cpu_node,
- lgrp_plat_node_memory);
+ if (!lgrp_plat_cpu_apicid_error) {
+ lgrp_plat_srat_error = lgrp_plat_process_srat(srat_ptr,
+ boot_max_ncpus, &lgrp_plat_node_cnt, lgrp_plat_node_domain,
+ lgrp_plat_cpu_node, lgrp_plat_node_memory);
+ }
/*
- * Try to use PCI config space registers on Opteron if SRAT doesn't
- * exist or there is some error processing the SRAT
+ * Try to use PCI config space registers on Opteron if there's an error
+ * processing CPU to APIC ID mapping or SRAT
*/
- if (lgrp_plat_srat_error != 0 && is_opteron())
+ if ((lgrp_plat_cpu_apicid_error != 0 || lgrp_plat_srat_error != 0) &&
+ is_opteron())
opt_get_numa_config(&lgrp_plat_node_cnt, &lgrp_plat_mem_intrlv,
lgrp_plat_node_memory);
@@ -1167,10 +1181,9 @@ lgrp_plat_root_hand(void)
*/
static int
lgrp_plat_cpu_node_update(node_domain_map_t *node_domain,
- cpu_node_map_t *cpu_node, uint32_t apicid, uint32_t domain)
+ cpu_node_map_t *cpu_node, int nentries, uint32_t apicid, uint32_t domain)
{
uint_t i;
- uint_t start;
int node;
/*
@@ -1184,67 +1197,55 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain,
}
/*
- * Hash given CPU APIC ID into CPU to node mapping table/array and
- * enter it and its corresponding node and proximity domain IDs into
- * first non-existent or matching entry
+ * Search for entry with given APIC ID and fill in its node and
+ * proximity domain IDs (if they haven't been set already)
*/
- i = start = CPU_NODE_HASH(apicid);
- do {
- if (cpu_node[i].exists) {
- /*
- * Update already existing entry for CPU
- */
- if (cpu_node[i].apicid == apicid) {
- /*
- * Just return when everything same
- */
- if (cpu_node[i].prox_domain == domain &&
- cpu_node[i].node == node)
- return (1);
-
- /*
- * Assert that proximity domain and node IDs
- * should be same and return error on non-debug
- * kernel
- */
- ASSERT(cpu_node[i].prox_domain == domain &&
- cpu_node[i].node == node);
- return (-1);
- }
- } else {
- /*
- * Create new entry for CPU
- */
- cpu_node[i].exists = 1;
- cpu_node[i].apicid = apicid;
- cpu_node[i].prox_domain = domain;
- cpu_node[i].node = node;
- return (0);
- }
- i = CPU_NODE_HASH(i + 1);
- } while (i != start);
+ for (i = 0; i < nentries; i++) {
+ /*
+ * Skip nonexistent entries and ones without matching APIC ID
+ */
+ if (!cpu_node[i].exists || cpu_node[i].apicid != apicid)
+ continue;
+
+ /*
+ * Just return if entry completely and correctly filled in
+ * already
+ */
+ if (cpu_node[i].prox_domain == domain &&
+ cpu_node[i].node == node)
+ return (1);
+
+ /*
+ * Fill in node and proximity domain IDs
+ */
+ cpu_node[i].prox_domain = domain;
+ cpu_node[i].node = node;
+
+ return (0);
+ }
/*
- * Ran out of supported number of entries which shouldn't happen....
+ * Return error when entry for APIC ID wasn't found in table
*/
- ASSERT(i != start);
- return (-1);
+ return (-2);
}
/*
- * Get node ID for given CPU ID
+ * Get node ID for given CPU
*/
static int
lgrp_plat_cpu_to_node(cpu_t *cp, cpu_node_map_t *cpu_node)
{
- uint32_t apicid;
- uint_t i;
- uint_t start;
+ processorid_t cpuid;
if (cp == NULL)
return (-1);
+ cpuid = cp->cpu_id;
+ if (cpuid < 0 || cpuid >= max_ncpus)
+ return (-1);
+
/*
* SRAT doesn't exist, isn't enabled, or there was an error processing
* it, so return chip ID for Opteron and -1 otherwise.
@@ -1257,17 +1258,13 @@ lgrp_plat_cpu_to_node(cpu_t *cp, cpu_node_map_t *cpu_node)
}
/*
- * SRAT does exist, so get APIC ID for given CPU and map that to its
- * node ID
+ * Return -1 when CPU to node ID mapping entry doesn't exist for given
+ * CPU
*/
- apicid = cpuid_get_apicid(cp);
- i = start = CPU_NODE_HASH(apicid);
- do {
- if (cpu_node[i].apicid == apicid && cpu_node[i].exists)
- return (cpu_node[i].node);
- i = CPU_NODE_HASH(i + 1);
- } while (i != start);
- return (-1);
+ if (!cpu_node[cpuid].exists)
+ return (-1);
+
+ return (cpu_node[cpuid].node);
}
@@ -1894,6 +1891,55 @@ lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
/*
+ * Read boot property with CPU to APIC ID array and fill in CPU to node ID
+ * mapping table with APIC ID for each CPU
+ *
+ * NOTE: This code assumes that CPU IDs are assigned in order that they appear
+ * in in cpu_apicid_array boot property which is based on and follows
+ * same ordering as processor list in ACPI MADT. If the code in
+ * usr/src/uts/i86pc/io/pcplusmp/apic.c that reads MADT and assigns
+ * CPU IDs ever changes, then this code will need to change too....
+ */
+static int
+lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node, int boot_ncpus)
+{
+ char *boot_prop_name = BP_CPU_APICID_ARRAY;
+ uint8_t cpu_apicid_array[UINT8_MAX + 1];
+ int i;
+ int boot_prop_len;
+
+ /*
+ * Nothing to do when no array to fill in or not enough CPUs
+ */
+ if (cpu_node == NULL || boot_ncpus <= 1)
+ return (1);
+
+ /*
+ * Check length of property value
+ */
+ boot_prop_len = BOP_GETPROPLEN(bootops, boot_prop_name);
+ if (boot_prop_len <= 0 || boot_prop_len > UINT8_MAX)
+ return (2);
+
+ /*
+ * Get CPU to APIC ID property value
+ */
+ if (BOP_GETPROP(bootops, boot_prop_name, cpu_apicid_array) < 0)
+ return (3);
+
+ /*
+ * Fill in CPU to node ID mapping table with APIC ID for each CPU
+ */
+ for (i = 0; i < boot_ncpus; i++) {
+ cpu_node[i].exists = 1;
+ cpu_node[i].apicid = cpu_apicid_array[i];
+ }
+
+ return (0);
+}
+
+
+/*
* Read ACPI System Locality Information Table (SLIT) to determine how far each
* NUMA node is from each other
*/
@@ -1970,13 +2016,14 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
* and memory are local to each other in the same NUMA node
*/
static int
-lgrp_plat_process_srat(struct srat *tp, uint_t *node_cnt,
+lgrp_plat_process_srat(struct srat *tp, int cpu_count, uint_t *node_cnt,
node_domain_map_t *node_domain, cpu_node_map_t *cpu_node,
node_phys_addr_map_t *node_memory)
{
struct srat_item *srat_end;
int i;
struct srat_item *item;
+ int proc_entry_count;
if (tp == NULL || !lgrp_plat_srat_enable)
return (1);
@@ -2002,6 +2049,7 @@ lgrp_plat_process_srat(struct srat *tp, uint_t *node_cnt,
*/
item = tp->list;
srat_end = (struct srat_item *)(tp->hdr.len + (uintptr_t)tp);
+ proc_entry_count = 0;
while (item < srat_end) {
uint32_t apic_id;
uint32_t domain;
@@ -2027,8 +2075,10 @@ lgrp_plat_process_srat(struct srat *tp, uint_t *node_cnt,
apic_id = item->i.p.apic_id;
if (lgrp_plat_cpu_node_update(node_domain, cpu_node,
- apic_id, domain) < 0)
+ cpu_count, apic_id, domain) < 0)
return (3);
+
+ proc_entry_count++;
break;
case SRAT_MEMORY: /* memory entry */
@@ -2056,6 +2106,13 @@ lgrp_plat_process_srat(struct srat *tp, uint_t *node_cnt,
item = (struct srat_item *)((uintptr_t)item + item->len);
}
+
+ /*
+ * Should have seen at least as many SRAT processor entries as CPUs
+ */
+ if (proc_entry_count >= cpu_count)
+ return (5);
+
return (0);
}
diff --git a/usr/src/uts/intel/sys/bootconf.h b/usr/src/uts/intel/sys/bootconf.h
index 1c6424915c..1ccea1a714 100644
--- a/usr/src/uts/intel/sys/bootconf.h
+++ b/usr/src/uts/intel/sys/bootconf.h
@@ -46,6 +46,11 @@ extern "C" {
#endif
/*
+ * Boot property names
+ */
+#define BP_CPU_APICID_ARRAY "cpu_apicid_array"
+
+/*
* masks to hand to bsys_alloc memory allocator
* XXX These names shouldn't really be srmmu derived.
*/
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 8dad09d1d7..3154a7cabb 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -540,7 +540,6 @@ extern uint_t cpuid_get_ncpu_sharing_last_cache(struct cpu *);
extern id_t cpuid_get_last_lvl_cacheid(struct cpu *);
extern int cpuid_get_chipid(struct cpu *);
extern id_t cpuid_get_coreid(struct cpu *);
-extern uint_t cpuid_get_apicid(struct cpu *);
extern int cpuid_get_pkgcoreid(struct cpu *);
extern int cpuid_get_clogid(struct cpu *);
extern int cpuid_is_cmt(struct cpu *);