1/* $NetBSD: viomb.c,v 1.7 2016/09/27 03:33:32 pgoyette Exp $ */
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.7 2016/09/27 03:33:32 pgoyette Exp $");
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/bus.h>
35#include <sys/condvar.h>
36#include <sys/device.h>
37#include <sys/kthread.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40#include <uvm/uvm_page.h>
41#include <sys/module.h>
42
43#include <dev/pci/pcidevs.h>
44#include <dev/pci/pcireg.h>
45#include <dev/pci/pcivar.h>
46
47#include <dev/pci/virtioreg.h>
48#include <dev/pci/virtiovar.h>
49
50#include "ioconf.h"
51
52/* Configuration registers */
53#define VIRTIO_BALLOON_CONFIG_NUM_PAGES 0 /* 32bit */
54#define VIRTIO_BALLOON_CONFIG_ACTUAL 4 /* 32bit */
55
56/* Feature bits */
57#define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
58#define VIRTIO_BALLOON_F_STATS_VQ (1<<1)
59
60#define VIRTIO_BALLOON_FLAG_BITS \
61 VIRTIO_COMMON_FLAG_BITS \
62 "\x02""STATS_VQ" \
63 "\x01""MUST_TELL_HOST"
64
65#define PGS_PER_REQ (256) /* 1MB, 4KB/page */
66
67CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
68
69struct balloon_req {
70 bus_dmamap_t bl_dmamap;
71 struct pglist bl_pglist;
72 int bl_nentries;
73 uint32_t bl_pages[PGS_PER_REQ];
74};
75
76struct viomb_softc {
77 device_t sc_dev;
78
79 struct virtio_softc *sc_virtio;
80 struct virtqueue sc_vq[2];
81
82 unsigned int sc_npages;
83 unsigned int sc_actual;
84 int sc_inflight;
85 struct balloon_req sc_req;
86 struct pglist sc_balloon_pages;
87
88 int sc_inflate_done;
89 int sc_deflate_done;
90
91 kcondvar_t sc_wait;
92 kmutex_t sc_waitlock;
93};
94
95static int balloon_initialized = 0; /* multiple balloon is not allowed */
96
97static int viomb_match(device_t, cfdata_t, void *);
98static void viomb_attach(device_t, device_t, void *);
99static void viomb_read_config(struct viomb_softc *);
100static int viomb_config_change(struct virtio_softc *);
101static int inflate(struct viomb_softc *);
102static int inflateq_done(struct virtqueue *);
103static int inflate_done(struct viomb_softc *);
104static int deflate(struct viomb_softc *);
105static int deflateq_done(struct virtqueue *);
106static int deflate_done(struct viomb_softc *);
107static void viomb_thread(void *);
108
109CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
110 viomb_match, viomb_attach, NULL, NULL);
111
112static int
113viomb_match(device_t parent, cfdata_t match, void *aux)
114{
115 struct virtio_softc *vsc = aux;
116
117 if (vsc->sc_childdevid == PCI_PRODUCT_VIRTIO_BALLOON)
118 return 1;
119
120 return 0;
121}
122
123static void
124viomb_attach(device_t parent, device_t self, void *aux)
125{
126 struct viomb_softc *sc = device_private(self);
127 struct virtio_softc *vsc = device_private(parent);
128 const struct sysctlnode *node;
129 uint32_t features;
130 char buf[256];
131
132 if (vsc->sc_child != NULL) {
133 aprint_normal(": child already attached for %s; "
134 "something wrong...\n", device_xname(parent));
135 return;
136 }
137 if (balloon_initialized++) {
138 aprint_normal(": balloon already exists; something wrong...\n");
139 goto err_none;
140 }
141
142 sc->sc_dev = self;
143 sc->sc_virtio = vsc;
144
145 vsc->sc_child = self;
146 vsc->sc_ipl = IPL_VM;
147 vsc->sc_vqs = &sc->sc_vq[0];
148 vsc->sc_nvqs = 2;
149 vsc->sc_config_change = viomb_config_change;
150 vsc->sc_intrhand = virtio_vq_intr;
151 vsc->sc_flags = 0;
152
153 features = virtio_negotiate_features(vsc,
154 VIRTIO_CONFIG_DEVICE_FEATURES);
155
156 snprintb(buf, sizeof(buf), VIRTIO_BALLOON_FLAG_BITS, features);
157 aprint_normal(": Features: %s\n", buf);
158 if ((virtio_alloc_vq(vsc, &sc->sc_vq[0], 0,
159 sizeof(uint32_t)*PGS_PER_REQ, 1,
160 "inflate") != 0) ||
161 (virtio_alloc_vq(vsc, &sc->sc_vq[1], 1,
162 sizeof(uint32_t)*PGS_PER_REQ, 1,
163 "deflate") != 0)) {
164 goto err_none;
165 }
166 sc->sc_vq[0].vq_done = inflateq_done;
167 sc->sc_vq[1].vq_done = deflateq_done;
168
169 viomb_read_config(sc);
170 sc->sc_inflight = 0;
171 TAILQ_INIT(&sc->sc_balloon_pages);
172
173 if (bus_dmamap_create(vsc->sc_dmat, sizeof(uint32_t)*PGS_PER_REQ,
174 1, sizeof(uint32_t)*PGS_PER_REQ, 0,
175 BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
176 aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
177 goto err_vq;
178 }
179 if (bus_dmamap_load(vsc->sc_dmat, sc->sc_req.bl_dmamap,
180 &sc->sc_req.bl_pages[0],
181 sizeof(uint32_t) * PGS_PER_REQ,
182 NULL, BUS_DMA_NOWAIT)) {
183 aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
184 goto err_dmamap;
185 }
186
187 sc->sc_inflate_done = sc->sc_deflate_done = 0;
188 mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
189 cv_init(&sc->sc_wait, "balloon");
190
191 if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
192 viomb_thread, sc, NULL, "viomb")) {
193 aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
194 goto err_mutex;
195 }
196
197 sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
198 "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
199 NULL, 0, NULL, 0,
200 CTL_HW, CTL_CREATE, CTL_EOL);
201 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
202 "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
203 NULL, 0, &sc->sc_npages, 0,
204 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
205 sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
206 "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
207 NULL, 0, &sc->sc_actual, 0,
208 CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
209 return;
210
211err_mutex:
212 cv_destroy(&sc->sc_wait);
213 mutex_destroy(&sc->sc_waitlock);
214err_dmamap:
215 bus_dmamap_destroy(vsc->sc_dmat, sc->sc_req.bl_dmamap);
216err_vq:
217 virtio_free_vq(vsc, &sc->sc_vq[1]);
218 virtio_free_vq(vsc, &sc->sc_vq[0]);
219err_none:
220 vsc->sc_child = (void*)1;
221 return;
222}
223
224static void
225viomb_read_config(struct viomb_softc *sc)
226{
227 unsigned int reg;
228
229 /* these values are explicitly specified as little-endian */
230 reg = virtio_read_device_config_4(sc->sc_virtio,
231 VIRTIO_BALLOON_CONFIG_NUM_PAGES);
232 sc->sc_npages = le32toh(reg);
233
234 reg = virtio_read_device_config_4(sc->sc_virtio,
235 VIRTIO_BALLOON_CONFIG_ACTUAL);
236 sc->sc_actual = le32toh(reg);
237}
238
239/*
240 * Config change callback: wakeup the kthread.
241 */
242static int
243viomb_config_change(struct virtio_softc *vsc)
244{
245 struct viomb_softc *sc = device_private(vsc->sc_child);
246 unsigned int old;
247
248 old = sc->sc_npages;
249 viomb_read_config(sc);
250 mutex_enter(&sc->sc_waitlock);
251 cv_signal(&sc->sc_wait);
252 mutex_exit(&sc->sc_waitlock);
253 if (sc->sc_npages > old)
254 printf("%s: inflating balloon from %u to %u.\n",
255 device_xname(sc->sc_dev), old, sc->sc_npages);
256 else if (sc->sc_npages < old)
257 printf("%s: deflating balloon from %u to %u.\n",
258 device_xname(sc->sc_dev), old, sc->sc_npages);
259
260 return 1;
261}
262
263/*
264 * Inflate: consume some amount of physical memory.
265 */
266static int
267inflate(struct viomb_softc *sc)
268{
269 struct virtio_softc *vsc = sc->sc_virtio;
270 int i, slot;
271 uint64_t nvpages, nhpages;
272 struct balloon_req *b;
273 struct vm_page *p;
274 struct virtqueue *vq = &sc->sc_vq[0];
275
276 if (sc->sc_inflight)
277 return 0;
278 nvpages = sc->sc_npages - sc->sc_actual;
279 if (nvpages > PGS_PER_REQ)
280 nvpages = PGS_PER_REQ;
281 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
282
283 b = &sc->sc_req;
284 if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*PAGE_SIZE,
285 0, 0, &b->bl_pglist, nhpages, 1)) {
286 printf("%s: %" PRIu64 " pages of physical memory "
287 "could not be allocated, retrying...\n",
288 device_xname(sc->sc_dev), nhpages);
289 return 1; /* sleep longer */
290 }
291
292 b->bl_nentries = nvpages;
293 i = 0;
294 TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
295 b->bl_pages[i++] = p->phys_addr / VIRTIO_PAGE_SIZE;
296 }
297 KASSERT(i == nvpages);
298
299 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
300 printf("%s: inflate enqueue failed.\n",
301 device_xname(sc->sc_dev));
302 uvm_pglistfree(&b->bl_pglist);
303 return 0;
304 }
305 if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
306 printf("%s: inflate enqueue failed.\n",
307 device_xname(sc->sc_dev));
308 uvm_pglistfree(&b->bl_pglist);
309 return 0;
310 }
311 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0,
312 sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
313 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
314 virtio_enqueue_commit(vsc, vq, slot, true);
315 sc->sc_inflight += nvpages;
316
317 return 0;
318}
319
320static int
321inflateq_done(struct virtqueue *vq)
322{
323 struct virtio_softc *vsc = vq->vq_owner;
324 struct viomb_softc *sc = device_private(vsc->sc_child);
325
326 mutex_enter(&sc->sc_waitlock);
327 sc->sc_inflate_done = 1;
328 cv_signal(&sc->sc_wait);
329 mutex_exit(&sc->sc_waitlock);
330
331 return 1;
332}
333
334static int
335inflate_done(struct viomb_softc *sc)
336{
337 struct virtio_softc *vsc = sc->sc_virtio;
338 struct virtqueue *vq = &sc->sc_vq[0];
339 struct balloon_req *b;
340 int r, slot;
341 uint64_t nvpages;
342 struct vm_page *p;
343
344 r = virtio_dequeue(vsc, vq, &slot, NULL);
345 if (r != 0) {
346 printf("%s: inflate dequeue failed, errno %d.\n",
347 device_xname(sc->sc_dev), r);
348 return 1;
349 }
350 virtio_dequeue_commit(vsc, vq, slot);
351
352 b = &sc->sc_req;
353 nvpages = b->bl_nentries;
354 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
355 offsetof(struct balloon_req, bl_pages),
356 sizeof(uint32_t)*nvpages,
357 BUS_DMASYNC_POSTWRITE);
358 while (!TAILQ_EMPTY(&b->bl_pglist)) {
359 p = TAILQ_FIRST(&b->bl_pglist);
360 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
361 TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
362 }
363
364 sc->sc_inflight -= nvpages;
365 virtio_write_device_config_4(vsc,
366 VIRTIO_BALLOON_CONFIG_ACTUAL,
367 sc->sc_actual + nvpages);
368 viomb_read_config(sc);
369
370 return 1;
371}
372
373/*
374 * Deflate: free previously allocated memory.
375 */
376static int
377deflate(struct viomb_softc *sc)
378{
379 struct virtio_softc *vsc = sc->sc_virtio;
380 int i, slot;
381 uint64_t nvpages, nhpages;
382 struct balloon_req *b;
383 struct vm_page *p;
384 struct virtqueue *vq = &sc->sc_vq[1];
385
386 nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
387 if (nvpages > PGS_PER_REQ)
388 nvpages = PGS_PER_REQ;
389 nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
390
391 b = &sc->sc_req;
392
393 b->bl_nentries = nvpages;
394 TAILQ_INIT(&b->bl_pglist);
395 for (i = 0; i < nhpages; i++) {
396 p = TAILQ_FIRST(&sc->sc_balloon_pages);
397 if (p == NULL)
398 break;
399 TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
400 TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
401 b->bl_pages[i] = p->phys_addr / VIRTIO_PAGE_SIZE;
402 }
403
404 if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
405 printf("%s: deflate enqueue failed.\n",
406 device_xname(sc->sc_dev));
407 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
408 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
409 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
410 pageq.queue);
411 }
412 return 0;
413 }
414 if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
415 printf("%s: deflate enqueue failed.\n",
416 device_xname(sc->sc_dev));
417 TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
418 TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
419 TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
420 pageq.queue);
421 }
422 return 0;
423 }
424 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap, 0,
425 sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
426 virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
427 virtio_enqueue_commit(vsc, vq, slot, true);
428 sc->sc_inflight -= nvpages;
429
430 if (!(vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST))
431 uvm_pglistfree(&b->bl_pglist);
432
433 return 0;
434}
435
436static int
437deflateq_done(struct virtqueue *vq)
438{
439 struct virtio_softc *vsc = vq->vq_owner;
440 struct viomb_softc *sc = device_private(vsc->sc_child);
441
442 mutex_enter(&sc->sc_waitlock);
443 sc->sc_deflate_done = 1;
444 cv_signal(&sc->sc_wait);
445 mutex_exit(&sc->sc_waitlock);
446
447 return 1;
448}
449
450static int
451deflate_done(struct viomb_softc *sc)
452{
453 struct virtio_softc *vsc = sc->sc_virtio;
454 struct virtqueue *vq = &sc->sc_vq[1];
455 struct balloon_req *b;
456 int r, slot;
457 uint64_t nvpages;
458
459 r = virtio_dequeue(vsc, vq, &slot, NULL);
460 if (r != 0) {
461 printf("%s: deflate dequeue failed, errno %d\n",
462 device_xname(sc->sc_dev), r);
463 return 1;
464 }
465 virtio_dequeue_commit(vsc, vq, slot);
466
467 b = &sc->sc_req;
468 nvpages = b->bl_nentries;
469 bus_dmamap_sync(vsc->sc_dmat, b->bl_dmamap,
470 offsetof(struct balloon_req, bl_pages),
471 sizeof(uint32_t)*nvpages,
472 BUS_DMASYNC_POSTWRITE);
473
474 if (vsc->sc_features & VIRTIO_BALLOON_F_MUST_TELL_HOST)
475 uvm_pglistfree(&b->bl_pglist);
476
477 sc->sc_inflight += nvpages;
478 virtio_write_device_config_4(vsc,
479 VIRTIO_BALLOON_CONFIG_ACTUAL,
480 sc->sc_actual - nvpages);
481 viomb_read_config(sc);
482
483 return 1;
484}
485
486/*
487 * Kthread: sleeps, eventually inflate and deflate.
488 */
489static void
490viomb_thread(void *arg)
491{
492 struct viomb_softc *sc = arg;
493 int sleeptime, r;
494
495 for ( ; ; ) {
496 sleeptime = 30000;
497 if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
498 if (sc->sc_inflight == 0) {
499 r = inflate(sc);
500 if (r != 0)
501 sleeptime = 10000;
502 else
503 sleeptime = 1000;
504 } else
505 sleeptime = 100;
506 } else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
507 if (sc->sc_inflight == 0)
508 r = deflate(sc);
509 sleeptime = 100;
510 }
511
512 again:
513 mutex_enter(&sc->sc_waitlock);
514 if (sc->sc_inflate_done) {
515 sc->sc_inflate_done = 0;
516 mutex_exit(&sc->sc_waitlock);
517 inflate_done(sc);
518 goto again;
519 }
520 if (sc->sc_deflate_done) {
521 sc->sc_deflate_done = 0;
522 mutex_exit(&sc->sc_waitlock);
523 deflate_done(sc);
524 goto again;
525 }
526 cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
527 mstohz(sleeptime));
528 mutex_exit(&sc->sc_waitlock);
529 }
530}
531
532MODULE(MODULE_CLASS_DRIVER, viomb, "virtio");
533
534#ifdef _MODULE
535#include "ioconf.c"
536#endif
537
538static int
539viomb_modcmd(modcmd_t cmd, void *opaque)
540{
541 int error = 0;
542
543#ifdef _MODULE
544 switch (cmd) {
545 case MODULE_CMD_INIT:
546 error = config_init_component(cfdriver_ioconf_viomb,
547 cfattach_ioconf_viomb, cfdata_ioconf_viomb);
548 break;
549 case MODULE_CMD_FINI:
550 error = config_fini_component(cfdriver_ioconf_viomb,
551 cfattach_ioconf_viomb, cfdata_ioconf_viomb);
552 break;
553 default:
554 error = ENOTTY;
555 break;
556 }
557#endif
558
559 return error;
560}
561