1/* $NetBSD: acpi_srat.c,v 1.3 2010/03/05 14:00:17 jruoho Exp $ */
2
3/*
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christoph Egger.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.3 2010/03/05 14:00:17 jruoho Exp $");
34
35#include <sys/param.h>
36#include <sys/kmem.h>
37#include <sys/systm.h>
38
39#include <dev/acpi/acpivar.h>
40#include <dev/acpi/acpi_srat.h>
41
42static ACPI_TABLE_SRAT *srat;
43
44struct acpisrat_node {
45 acpisrat_nodeid_t nodeid;
46 uint32_t ncpus; /* Number of cpus in this node */
47 struct acpisrat_cpu **cpu; /* Array of cpus */
48 uint32_t nmems; /* Number of memory ranges in this node */
49 struct acpisrat_mem **mem; /* Array of memory ranges */
50};
51
52static uint32_t nnodes; /* Number of NUMA nodes */
53static struct acpisrat_node *node_array; /* Array of NUMA nodes */
54static uint32_t ncpus; /* Number of CPUs */
55static struct acpisrat_cpu *cpu_array; /* Array of cpus */
56static uint32_t nmems; /* Number of Memory ranges */
57static struct acpisrat_mem *mem_array;
58
59
60struct cpulist {
61 struct acpisrat_cpu cpu;
62 TAILQ_ENTRY(cpulist) entry;
63};
64
65static TAILQ_HEAD(, cpulist) cpulisthead;
66
67#define CPU_INIT TAILQ_INIT(&cpulisthead);
68#define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry)
69#define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
70#define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry)
71#define CPU_FIRST TAILQ_FIRST(&cpulisthead)
72
73
74struct memlist {
75 struct acpisrat_mem mem;
76 TAILQ_ENTRY(memlist) entry;
77};
78
79static TAILQ_HEAD(, memlist) memlisthead;
80
81#define MEM_INIT TAILQ_INIT(&memlisthead)
82#define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry)
83#define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
84#define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry)
85#define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry)
86#define MEM_FIRST TAILQ_FIRST(&memlisthead)
87
88
89static struct cpulist *
90cpu_alloc(void)
91{
92 return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
93}
94
95static void
96cpu_free(struct cpulist *c)
97{
98 kmem_free(c, sizeof(struct cpulist));
99}
100
101#if 0
102static struct cpulist *
103cpu_get(acpisrat_nodeid_t nodeid)
104{
105 struct cpulist *tmp;
106
107 CPU_FOREACH(tmp) {
108 if (tmp->cpu.nodeid == nodeid)
109 return tmp;
110 }
111
112 return NULL;
113}
114#endif
115
116static struct memlist *
117mem_alloc(void)
118{
119 return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
120}
121
122static void
123mem_free(struct memlist *m)
124{
125 kmem_free(m, sizeof(struct memlist));
126}
127
128static struct memlist *
129mem_get(acpisrat_nodeid_t nodeid)
130{
131 struct memlist *tmp;
132
133 MEM_FOREACH(tmp) {
134 if (tmp->mem.nodeid == nodeid)
135 return tmp;
136 }
137
138 return NULL;
139}
140
141
142bool
143acpisrat_exist(void)
144{
145 ACPI_TABLE_HEADER *table;
146 ACPI_STATUS rv;
147
148 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
149 if (ACPI_FAILURE(rv))
150 return false;
151
152 /* Check if header is valid */
153 if (table == NULL)
154 return false;
155
156 if (table->Length == 0xffffffff)
157 return false;
158
159 srat = (ACPI_TABLE_SRAT *)table;
160
161 return true;
162}
163
164static int
165acpisrat_parse(void)
166{
167 ACPI_SUBTABLE_HEADER *subtable;
168 ACPI_SRAT_CPU_AFFINITY *srat_cpu;
169 ACPI_SRAT_MEM_AFFINITY *srat_mem;
170 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
171
172 acpisrat_nodeid_t nodeid;
173 struct cpulist *cpuentry = NULL;
174 struct memlist *mementry;
175 uint32_t srat_pos;
176 bool ignore_cpu_affinity = false;
177
178 KASSERT(srat != NULL);
179
180 /* Content starts right after the header */
181 srat_pos = sizeof(ACPI_TABLE_SRAT);
182
183 while (srat_pos < srat->Header.Length) {
184 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
185 srat_pos += subtable->Length;
186
187 switch (subtable->Type) {
188 case ACPI_SRAT_TYPE_CPU_AFFINITY:
189 if (ignore_cpu_affinity)
190 continue;
191
192 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
193 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
194 (srat_cpu->ProximityDomainHi[1] << 16) |
195 (srat_cpu->ProximityDomainHi[0] << 8) |
196 (srat_cpu->ProximityDomainLo);
197
198 cpuentry = cpu_alloc();
199 if (cpuentry == NULL)
200 return ENOMEM;
201 CPU_ADD(cpuentry);
202
203 cpuentry->cpu.nodeid = nodeid;
204 cpuentry->cpu.apicid = srat_cpu->ApicId;
205 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
206 cpuentry->cpu.flags = srat_cpu->Flags;
207 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
208 break;
209
210 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
211 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
212 nodeid = srat_mem->ProximityDomain;
213
214 mementry = mem_alloc();
215 if (mementry == NULL)
216 return ENOMEM;
217 MEM_ADD(mementry);
218
219 mementry->mem.nodeid = nodeid;
220 mementry->mem.baseaddress = srat_mem->BaseAddress;
221 mementry->mem.length = srat_mem->Length;
222 mementry->mem.flags = srat_mem->Flags;
223 break;
224
225 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
226 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
227 nodeid = srat_x2apic->ProximityDomain;
228
229 /* This table entry overrides
230 * ACPI_SRAT_TYPE_CPU_AFFINITY.
231 */
232 if (!ignore_cpu_affinity) {
233 struct cpulist *citer;
234 while ((citer = CPU_FIRST) != NULL) {
235 CPU_REM(citer);
236 cpu_free(citer);
237 }
238 ignore_cpu_affinity = true;
239 }
240
241 cpuentry = cpu_alloc();
242 if (cpuentry == NULL)
243 return ENOMEM;
244 CPU_ADD(cpuentry);
245
246 cpuentry->cpu.nodeid = nodeid;
247 cpuentry->cpu.apicid = srat_x2apic->ApicId;
248 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
249 cpuentry->cpu.flags = srat_x2apic->Flags;
250 break;
251
252 case ACPI_SRAT_TYPE_RESERVED:
253 printf("ACPI SRAT subtable reserved, length: 0x%x\n",
254 subtable->Length);
255 break;
256 }
257 }
258
259 return 0;
260}
261
262static int
263acpisrat_quirks(void)
264{
265 struct cpulist *citer;
266 struct memlist *mem, *miter;
267
268 /* Some sanity checks. */
269
270 /* Deal with holes in the memory nodes.
271 * BIOS doesn't enlist memory nodes which
272 * don't have any memory modules plugged in.
273 * This behaviour has been observed on AMD machines.
274 *
275 * Do that by searching for CPUs in NUMA nodes
276 * which don't exist in the memory and then insert
277 * a zero memory range for the missing node.
278 */
279 CPU_FOREACH(citer) {
280 mem = mem_get(citer->cpu.nodeid);
281 if (mem != NULL)
282 continue;
283 mem = mem_alloc();
284 if (mem == NULL)
285 return ENOMEM;
286 mem->mem.nodeid = citer->cpu.nodeid;
287 /* all other fields are already zero filled */
288
289 MEM_FOREACH(miter) {
290 if (miter->mem.nodeid < citer->cpu.nodeid)
291 continue;
292 MEM_ADD_BEFORE(mem, miter);
293 break;
294 }
295 }
296
297 return 0;
298}
299
300int
301acpisrat_init(void)
302{
303 if (!acpisrat_exist())
304 return EEXIST;
305 return acpisrat_refresh();
306}
307
308int
309acpisrat_refresh(void)
310{
311 int rc, i, j, k;
312 struct cpulist *citer;
313 struct memlist *miter;
314 uint32_t cnodes = 0, mnodes = 0;
315
316 CPU_INIT;
317 MEM_INIT;
318
319 rc = acpisrat_parse();
320 if (rc)
321 return rc;
322
323 rc = acpisrat_quirks();
324 if (rc)
325 return rc;
326
327 /* cleanup resources */
328 rc = acpisrat_exit();
329 if (rc)
330 return rc;
331
332 nnodes = 0;
333 ncpus = 0;
334 CPU_FOREACH(citer) {
335 cnodes = MAX(citer->cpu.nodeid, cnodes);
336 ncpus++;
337 }
338
339 nmems = 0;
340 MEM_FOREACH(miter) {
341 mnodes = MAX(miter->mem.nodeid, mnodes);
342 nmems++;
343 }
344
345 nnodes = MAX(cnodes, mnodes) + 1;
346
347 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
348 KM_NOSLEEP);
349 if (node_array == NULL)
350 return ENOMEM;
351
352 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
353 KM_NOSLEEP);
354 if (cpu_array == NULL)
355 return ENOMEM;
356
357 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
358 KM_NOSLEEP);
359 if (mem_array == NULL)
360 return ENOMEM;
361
362 i = 0;
363 CPU_FOREACH(citer) {
364 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
365 i++;
366 node_array[citer->cpu.nodeid].ncpus++;
367 }
368
369 i = 0;
370 MEM_FOREACH(miter) {
371 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
372 i++;
373 node_array[miter->mem.nodeid].nmems++;
374 }
375
376 for (i = 0; i < nnodes; i++) {
377 node_array[i].nodeid = i;
378
379 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
380 sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
381 node_array[i].mem = kmem_zalloc(node_array[i].nmems *
382 sizeof(struct acpisrat_mem *), KM_NOSLEEP);
383
384 k = 0;
385 for (j = 0; j < ncpus; j++) {
386 if (cpu_array[j].nodeid != i)
387 continue;
388 node_array[i].cpu[k] = &cpu_array[j];
389 k++;
390 }
391
392 k = 0;
393 for (j = 0; j < nmems; j++) {
394 if (mem_array[j].nodeid != i)
395 continue;
396 node_array[i].mem[k] = &mem_array[j];
397 k++;
398 }
399 }
400
401 while ((citer = CPU_FIRST) != NULL) {
402 CPU_REM(citer);
403 cpu_free(citer);
404 }
405
406 while ((miter = MEM_FIRST) != NULL) {
407 MEM_REM(miter);
408 mem_free(miter);
409 }
410
411 return 0;
412}
413
414
415int
416acpisrat_exit(void)
417{
418 int i;
419
420 if (node_array) {
421 for (i = 0; i < nnodes; i++) {
422 if (node_array[i].cpu)
423 kmem_free(node_array[i].cpu,
424 node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
425 if (node_array[i].mem)
426 kmem_free(node_array[i].mem,
427 node_array[i].nmems * sizeof(struct acpisrat_mem *));
428 }
429 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
430 }
431 node_array = NULL;
432
433 if (cpu_array)
434 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
435 cpu_array = NULL;
436
437 if (mem_array)
438 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
439 mem_array = NULL;
440
441 nnodes = 0;
442 ncpus = 0;
443 nmems = 0;
444
445 return 0;
446}
447
448
449void
450acpisrat_dump(void)
451{
452 uint32_t i, j, nn, nc, nm;
453 struct acpisrat_cpu c;
454 struct acpisrat_mem m;
455
456 nn = acpisrat_nodes();
457 aprint_debug("SRAT: %u NUMA nodes\n", nn);
458 for (i = 0; i < nn; i++) {
459 nc = acpisrat_node_cpus(i);
460 for (j = 0; j < nc; j++) {
461 acpisrat_cpu(i, j, &c);
462 aprint_debug("SRAT: node %u cpu %u "
463 "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
464 c.nodeid, j, c.apicid, c.sapiceid, c.flags,
465 c.clockdomain);
466 }
467
468 nm = acpisrat_node_memoryranges(i);
469 for (j = 0; j < nm; j++) {
470 acpisrat_mem(i, j, &m);
471 aprint_debug("SRAT: node %u memory range %u (0x%"
472 PRIx64" - 0x%"PRIx64" flags %u)\n",
473 m.nodeid, j, m.baseaddress,
474 m.baseaddress + m.length, m.flags);
475 }
476 }
477}
478
479uint32_t
480acpisrat_nodes(void)
481{
482 return nnodes;
483}
484
485uint32_t
486acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
487{
488 return node_array[nodeid].ncpus;
489}
490
491uint32_t
492acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
493{
494 return node_array[nodeid].nmems;
495}
496
497void
498acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
499 struct acpisrat_cpu *c)
500{
501 memcpy(c, node_array[nodeid].cpu[cpunum],
502 sizeof(struct acpisrat_cpu));
503}
504
505void
506acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
507 struct acpisrat_mem *mem)
508{
509 memcpy(mem, node_array[nodeid].mem[memrange],
510 sizeof(struct acpisrat_mem));
511}
512