1/* $NetBSD: xen_machdep.c,v 1.15 2016/06/08 01:59:06 jnemeth Exp $ */
2
3/*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28/*
29 *
30 * Copyright (c) 2004 Christian Limpach.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
43 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
44 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
45 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
46 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
47 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
48 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
49 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
50 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
51 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
52 */
53
54
55#include <sys/cdefs.h>
56__KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.15 2016/06/08 01:59:06 jnemeth Exp $");
57
58#include "opt_xen.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/boot_flag.h>
63#include <sys/mount.h>
64#include <sys/reboot.h>
65#include <sys/timetc.h>
66#include <sys/sysctl.h>
67#include <sys/pmf.h>
68
69#include <xen/hypervisor.h>
70#include <xen/shutdown_xenbus.h>
71#include <xen/xen-public/version.h>
72
73#define DPRINTK(x) printk x
74#if 0
75#define DPRINTK(x)
76#endif
77
78u_int tsc_get_timecount(struct timecounter *);
79
80bool xen_suspend_allow;
81
82extern uint64_t tsc_freq; /* XXX */
83
84static int sysctl_xen_suspend(SYSCTLFN_ARGS);
85static void xen_suspend_domain(void);
86static void xen_prepare_suspend(void);
87static void xen_prepare_resume(void);
88
89void
90xen_parse_cmdline(int what, union xen_cmdline_parseinfo *xcp)
91{
92 char _cmd_line[256], *cmd_line, *opt, *s;
93 int b, i, ipidx = 0;
94 uint32_t xi_ip[5];
95 size_t len;
96
97 len = strlcpy(_cmd_line, xen_start_info.cmd_line, sizeof(_cmd_line));
98 if (len > sizeof(_cmd_line)) {
99 printf("command line exceeded limit of 255 chars. Truncated.\n");
100 }
101 cmd_line = _cmd_line;
102
103 switch (what) {
104 case XEN_PARSE_BOOTDEV:
105 xcp->xcp_bootdev[0] = 0;
106 break;
107 case XEN_PARSE_CONSOLE:
108 xcp->xcp_console[0] = 0;
109 break;
110 }
111
112 while (cmd_line && *cmd_line) {
113 opt = cmd_line;
114 cmd_line = strchr(opt, ' ');
115 if (cmd_line)
116 *cmd_line = 0;
117
118 switch (what) {
119 case XEN_PARSE_BOOTDEV:
120 if (strncasecmp(opt, "bootdev=", 8) == 0) {
121 strncpy(xcp->xcp_bootdev, opt + 8,
122 sizeof(xcp->xcp_bootdev));
123 break;
124 }
125 if (strncasecmp(opt, "root=", 5) == 0) {
126 strncpy(xcp->xcp_bootdev, opt + 5,
127 sizeof(xcp->xcp_bootdev));
128 break;
129 }
130 break;
131
132 case XEN_PARSE_NETINFO:
133 if (xcp->xcp_netinfo.xi_root &&
134 strncasecmp(opt, "nfsroot=", 8) == 0)
135 strncpy(xcp->xcp_netinfo.xi_root, opt + 8,
136 MNAMELEN);
137
138 if (strncasecmp(opt, "ip=", 3) == 0) {
139 memset(xi_ip, 0, sizeof(xi_ip));
140 opt += 3;
141 ipidx = 0;
142 while (opt && *opt) {
143 s = opt;
144 opt = strchr(opt, ':');
145 if (opt)
146 *opt = 0;
147
148 switch (ipidx) {
149 case 0: /* ip */
150 case 1: /* nfs server */
151 case 2: /* gw */
152 case 3: /* mask */
153 case 4: /* host */
154 if (*s == 0)
155 break;
156 for (i = 0; i < 4; i++) {
157 b = strtoul(s, &s, 10);
158 xi_ip[ipidx] = b + 256
159 * xi_ip[ipidx];
160 if (*s != '.')
161 break;
162 s++;
163 }
164 if (i < 3)
165 xi_ip[ipidx] = 0;
166 break;
167 case 5: /* interface */
168 if (!strncmp(s, "xennet", 6))
169 s += 6;
170 else if (!strncmp(s, "eth", 3))
171 s += 3;
172 else
173 break;
174 if (xcp->xcp_netinfo.xi_ifno
175 == strtoul(s, NULL, 10))
176 memcpy(xcp->
177 xcp_netinfo.xi_ip,
178 xi_ip,
179 sizeof(xi_ip));
180 break;
181 }
182 ipidx++;
183
184 if (opt)
185 *opt++ = ':';
186 }
187 }
188 break;
189
190 case XEN_PARSE_CONSOLE:
191 if (strncasecmp(opt, "console=", 8) == 0)
192 strncpy(xcp->xcp_console, opt + 8,
193 sizeof(xcp->xcp_console));
194 break;
195
196 case XEN_PARSE_BOOTFLAGS:
197 if (*opt == '-') {
198 opt++;
199 while(*opt != '\0') {
200 BOOT_FLAG(*opt, boothowto);
201 opt++;
202 }
203 }
204 break;
205 case XEN_PARSE_PCIBACK:
206 if (strncasecmp(opt, "pciback.hide=", 13) == 0)
207 strncpy(xcp->xcp_pcidevs, opt + 13,
208 sizeof(xcp->xcp_pcidevs));
209 break;
210 }
211
212 if (cmd_line)
213 *cmd_line++ = ' ';
214 }
215}
216
217u_int
218tsc_get_timecount(struct timecounter *tc)
219{
220
221 panic("xen: tsc_get_timecount");
222}
223
224/*
225 * this function sets up the machdep.xen.suspend sysctl(7) that
226 * controls domain suspend/save.
227 */
228void
229sysctl_xen_suspend_setup(void)
230{
231 const struct sysctlnode *node = NULL;
232
233 /*
234 * dom0 implements sleep support through ACPI. It should not call
235 * this function to register a suspend interface.
236 */
237 KASSERT(!(xendomain_is_dom0()));
238
239 sysctl_createv(NULL, 0, NULL, &node,
240 CTLFLAG_PERMANENT,
241 CTLTYPE_NODE, "machdep", NULL,
242 NULL, 0, NULL, 0,
243 CTL_MACHDEP, CTL_EOL);
244
245 sysctl_createv(NULL, 0, &node, &node,
246 CTLFLAG_PERMANENT,
247 CTLTYPE_NODE, "xen",
248 SYSCTL_DESCR("Xen top level node"),
249 NULL, 0, NULL, 0,
250 CTL_CREATE, CTL_EOL);
251
252 sysctl_createv(NULL, 0, &node, &node,
253 CTLFLAG_PERMANENT | CTLFLAG_READWRITE | CTLFLAG_IMMEDIATE,
254 CTLTYPE_INT, "suspend",
255 SYSCTL_DESCR("Suspend/save current Xen domain"),
256 sysctl_xen_suspend, 0, NULL, 0,
257 CTL_CREATE, CTL_EOL);
258}
259
260static int
261sysctl_xen_suspend(SYSCTLFN_ARGS)
262{
263 int error;
264 struct sysctlnode node;
265
266 node = *rnode;
267 error = sysctl_lookup(SYSCTLFN_CALL(&node));
268
269 if (error || newp == NULL)
270 return error;
271
272 /* only allow domain to suspend when dom0 instructed to do so */
273 if (xen_suspend_allow == false)
274 return EAGAIN;
275
276 xen_suspend_domain();
277
278 return 0;
279
280}
281
282/*
283 * Last operations before suspending domain
284 */
285static void
286xen_prepare_suspend(void)
287{
288
289 kpreempt_disable();
290
291 pmap_xen_suspend();
292 xen_suspendclocks(curcpu());
293
294 /*
295 * save/restore code does not translate these MFNs to their
296 * associated PFNs, so we must do it
297 */
298 xen_start_info.store_mfn =
299 atop(xpmap_mtop(ptoa(xen_start_info.store_mfn)));
300 xen_start_info.console_mfn =
301 atop(xpmap_mtop(ptoa(xen_start_info.console_mfn)));
302
303 DPRINTK(("suspending domain\n"));
304 aprint_verbose("suspending domain\n");
305
306 /* invalidate the shared_info page */
307 if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
308 0, UVMF_INVLPG)) {
309 DPRINTK(("HYPERVISOR_shared_info page invalidation failed"));
310 HYPERVISOR_crash();
311 }
312
313}
314
315/*
316 * First operations before restoring domain context
317 */
318static void
319xen_prepare_resume(void)
320{
321 /* map the new shared_info page */
322 if (HYPERVISOR_update_va_mapping((vaddr_t)HYPERVISOR_shared_info,
323 xen_start_info.shared_info | PG_RW | PG_V,
324 UVMF_INVLPG)) {
325 DPRINTK(("could not map new shared info page"));
326 HYPERVISOR_crash();
327 }
328
329 pmap_xen_resume();
330
331 if (xen_start_info.nr_pages != physmem) {
332 /*
333 * XXX JYM for now, we crash - fix it with memory
334 * hotplug when supported
335 */
336 DPRINTK(("xen_start_info.nr_pages != physmem"));
337 HYPERVISOR_crash();
338 }
339
340 DPRINTK(("preparing domain resume\n"));
341 aprint_verbose("preparing domain resume\n");
342
343 xen_suspend_allow = false;
344
345 xen_resumeclocks(curcpu());
346
347 kpreempt_enable();
348
349}
350
351static void
352xen_suspend_domain(void)
353{
354 paddr_t mfn;
355 int s = splvm();
356
357 /*
358 * console becomes unavailable when suspended, so
359 * direct communications to domain are hampered from there on.
360 * We can only rely on low level primitives like printk(), until
361 * console is fully restored
362 */
363 if (!pmf_system_suspend(PMF_Q_NONE)) {
364 DPRINTK(("devices suspend failed"));
365 HYPERVISOR_crash();
366 }
367
368 /*
369 * obtain the MFN of the start_info page now, as we will not be
370 * able to do it once pmap is locked
371 */
372 pmap_extract_ma(pmap_kernel(), (vaddr_t)&xen_start_info, &mfn);
373 mfn >>= PAGE_SHIFT;
374
375 xen_prepare_suspend();
376
377 DPRINTK(("calling HYPERVISOR_suspend()\n"));
378 if (HYPERVISOR_suspend(mfn) != 0) {
379 /* XXX JYM: implement checkpoint/snapshot (ret == 1) */
380 DPRINTK(("HYPERVISOR_suspend() failed"));
381 HYPERVISOR_crash();
382 }
383
384 DPRINTK(("left HYPERVISOR_suspend()\n"));
385
386 xen_prepare_resume();
387
388 DPRINTK(("resuming devices\n"));
389 if (!pmf_system_resume(PMF_Q_NONE)) {
390 DPRINTK(("devices resume failed\n"));
391 HYPERVISOR_crash();
392 }
393
394 splx(s);
395
396 /* xencons is back online, we can print to console */
397 aprint_verbose("domain resumed\n");
398
399}
400
401bool xen_feature_tables[XENFEAT_NR_SUBMAPS * 32];
402
403void
404xen_init_features(void)
405{
406 xen_feature_info_t features;
407
408 for (int sm = 0; sm < XENFEAT_NR_SUBMAPS; sm++) {
409 features.submap_idx = sm;
410 if (HYPERVISOR_xen_version(XENVER_get_features, &features) < 0)
411 break;
412 for (int f = 0; f < 32; f++) {
413 xen_feature_tables[sm * 32 + f] =
414 (features.submap & (1 << f)) ? 1 : 0;
415 }
416 }
417}
418