1/* $NetBSD: xbd_xenbus.c,v 1.75 2015/10/25 07:51:16 maxv Exp $ */
2
3/*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28/*
29 * The file contains the xbd frontend code required for block-level
30 * communications (similar to hard disks) between two Xen domains.
31 *
32 * We are not supposed to receive solicitations spontaneously from backend. The
33 * protocol is therefore fairly simple and uses only one ring to communicate
34 * with backend: frontend posts requests to the ring then wait for their
35 * replies asynchronously.
36 *
37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule
38 * an operation request for the device (be it open(), read(), write(), ...).
39 * Calls are typically processed that way:
40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 *
46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as
49 * completed.
50 */
51
52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.75 2015/10/25 07:51:16 maxv Exp $");
54
55#include "opt_xen.h"
56
57
58#include <sys/param.h>
59#include <sys/buf.h>
60#include <sys/bufq.h>
61#include <sys/device.h>
62#include <sys/disk.h>
63#include <sys/disklabel.h>
64#include <sys/conf.h>
65#include <sys/fcntl.h>
66#include <sys/kernel.h>
67#include <sys/proc.h>
68#include <sys/systm.h>
69#include <sys/stat.h>
70#include <sys/vnode.h>
71
72#include <dev/dkvar.h>
73
74#include <uvm/uvm.h>
75
76#include <xen/hypervisor.h>
77#include <xen/evtchn.h>
78#include <xen/granttables.h>
79#include <xen/xen-public/io/blkif.h>
80#include <xen/xen-public/io/protocols.h>
81
82#include <xen/xenbus.h>
83#include "locators.h"
84
85#undef XBD_DEBUG
86#ifdef XBD_DEBUG
87#define DPRINTF(x) printf x;
88#else
89#define DPRINTF(x)
90#endif
91
92#define GRANT_INVALID_REF -1
93
94#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
95#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
96
97#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */
98#define XEN_BSIZE (1 << XEN_BSHIFT)
99
100struct xbd_req {
101 SLIST_ENTRY(xbd_req) req_next;
102 uint16_t req_id; /* ID passed to backend */
103 union {
104 struct {
105 grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
106 int req_nr_segments; /* number of segments in this request */
107 struct buf *req_bp; /* buffer associated with this request */
108 void *req_data; /* pointer to the data buffer */
109 } req_rw;
110 struct {
111 int s_error;
112 volatile int s_done;
113 } req_sync;
114 } u;
115};
116#define req_gntref u.req_rw.req_gntref
117#define req_nr_segments u.req_rw.req_nr_segments
118#define req_bp u.req_rw.req_bp
119#define req_data u.req_rw.req_data
120#define req_sync u.req_sync
121
122struct xbd_xenbus_softc {
123 struct dk_softc sc_dksc; /* Must be first in this struct */
124 struct xenbus_device *sc_xbusd;
125
126 blkif_front_ring_t sc_ring;
127
128 unsigned int sc_evtchn;
129
130 grant_ref_t sc_ring_gntref;
131
132 struct xbd_req sc_reqs[XBD_RING_SIZE];
133 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
134 bool sc_xbdreq_wait; /* special waiting on xbd_req */
135
136 int sc_backend_status; /* our status with backend */
137#define BLKIF_STATE_DISCONNECTED 0
138#define BLKIF_STATE_CONNECTED 1
139#define BLKIF_STATE_SUSPENDED 2
140
141 int sc_shutdown;
142#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */
143#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */
144#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */
145
146 uint64_t sc_sectors; /* number of sectors for this device */
147 u_long sc_secsize; /* sector size */
148 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
149 u_long sc_info; /* VDISK_* */
150 u_long sc_handle; /* from backend */
151 int sc_cache_flush; /* backend supports BLKIF_OP_FLUSH_DISKCACHE */
152};
153
154#if 0
155/* too big to be on stack */
156static multicall_entry_t rq_mcl[XBD_RING_SIZE+1];
157static paddr_t rq_pages[XBD_RING_SIZE];
158#endif
159
160static int xbd_xenbus_match(device_t, cfdata_t, void *);
161static void xbd_xenbus_attach(device_t, device_t, void *);
162static int xbd_xenbus_detach(device_t, int);
163
164static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *);
165static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
166
167static int xbd_handler(void *);
168static int xbd_diskstart(device_t, struct buf *);
169static void xbd_backend_changed(void *, XenbusState);
170static void xbd_connect(struct xbd_xenbus_softc *);
171
172static int xbd_map_align(struct xbd_req *);
173static void xbd_unmap_align(struct xbd_req *);
174
175static void xbdminphys(struct buf *);
176
177CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc),
178 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL,
179 DVF_DETACH_SHUTDOWN);
180
181dev_type_open(xbdopen);
182dev_type_close(xbdclose);
183dev_type_read(xbdread);
184dev_type_write(xbdwrite);
185dev_type_ioctl(xbdioctl);
186dev_type_strategy(xbdstrategy);
187dev_type_dump(xbddump);
188dev_type_size(xbdsize);
189
190const struct bdevsw xbd_bdevsw = {
191 .d_open = xbdopen,
192 .d_close = xbdclose,
193 .d_strategy = xbdstrategy,
194 .d_ioctl = xbdioctl,
195 .d_dump = xbddump,
196 .d_psize = xbdsize,
197 .d_discard = nodiscard,
198 .d_flag = D_DISK
199};
200
201const struct cdevsw xbd_cdevsw = {
202 .d_open = xbdopen,
203 .d_close = xbdclose,
204 .d_read = xbdread,
205 .d_write = xbdwrite,
206 .d_ioctl = xbdioctl,
207 .d_stop = nostop,
208 .d_tty = notty,
209 .d_poll = nopoll,
210 .d_mmap = nommap,
211 .d_kqfilter = nokqfilter,
212 .d_discard = nodiscard,
213 .d_flag = D_DISK
214};
215
216extern struct cfdriver xbd_cd;
217
218static struct dkdriver xbddkdriver = {
219 .d_strategy = xbdstrategy,
220 .d_minphys = xbdminphys,
221 .d_open = xbdopen,
222 .d_close = xbdclose,
223 .d_diskstart = xbd_diskstart,
224};
225
226static int
227xbd_xenbus_match(device_t parent, cfdata_t match, void *aux)
228{
229 struct xenbusdev_attach_args *xa = aux;
230
231 if (strcmp(xa->xa_type, "vbd") != 0)
232 return 0;
233
234 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
235 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
236 return 0;
237
238 return 1;
239}
240
241static void
242xbd_xenbus_attach(device_t parent, device_t self, void *aux)
243{
244 struct xbd_xenbus_softc *sc = device_private(self);
245 struct xenbusdev_attach_args *xa = aux;
246 blkif_sring_t *ring;
247 RING_IDX i;
248#ifdef XBD_DEBUG
249 char **dir, *val;
250 int dir_n = 0;
251 char id_str[20];
252 int err;
253#endif
254
255 config_pending_incr(self);
256 aprint_normal(": Xen Virtual Block Device Interface\n");
257
258 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI);
259 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver);
260
261#ifdef XBD_DEBUG
262 printf("path: %s\n", xa->xa_xbusd->xbusd_path);
263 snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
264 err = xenbus_directory(NULL, "device/vbd", id_str, &dir_n, &dir);
265 if (err) {
266 aprint_error_dev(self, "xenbus_directory err %d\n", err);
267 } else {
268 printf("%s/\n", xa->xa_xbusd->xbusd_path);
269 for (i = 0; i < dir_n; i++) {
270 printf("\t/%s", dir[i]);
271 err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
272 dir[i], NULL, &val);
273 if (err) {
274 aprint_error_dev(self, "xenbus_read err %d\n",
275 err);
276 } else {
277 printf(" = %s\n", val);
278 free(val, M_DEVBUF);
279 }
280 }
281 }
282#endif /* XBD_DEBUG */
283 sc->sc_xbusd = xa->xa_xbusd;
284 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed;
285
286 /* initialize free requests list */
287 SLIST_INIT(&sc->sc_xbdreq_head);
288 for (i = 0; i < XBD_RING_SIZE; i++) {
289 sc->sc_reqs[i].req_id = i;
290 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i],
291 req_next);
292 }
293
294 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
295 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
296
297 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
298 if (ring == NULL)
299 panic("%s: can't alloc ring", device_xname(self));
300 sc->sc_ring.sring = ring;
301
302 /* resume shared structures and tell backend that we are ready */
303 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) {
304 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE,
305 UVM_KMF_WIRED);
306 return;
307 }
308
309 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume))
310 aprint_error_dev(self, "couldn't establish power handler\n");
311
312}
313
314static int
315xbd_xenbus_detach(device_t dev, int flags)
316{
317 struct xbd_xenbus_softc *sc = device_private(dev);
318 int bmaj, cmaj, i, mn, rc, s;
319
320 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags);
321 if (rc != 0)
322 return rc;
323
324 s = splbio();
325 DPRINTF(("%s: xbd_detach\n", device_xname(dev)));
326 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) {
327 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL;
328 /* wait for requests to complete */
329 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
330 sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
331 tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach", hz/2);
332
333 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
334 }
335 if ((flags & DETACH_FORCE) == 0) {
336 /* xbd_xenbus_detach already in progress */
337 wakeup(xbd_xenbus_detach);
338 splx(s);
339 return EALREADY;
340 }
341 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend)
342 != XenbusStateClosed)
343 tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach2", hz/2);
344 splx(s);
345
346 /* locate the major number */
347 bmaj = bdevsw_lookup_major(&xbd_bdevsw);
348 cmaj = cdevsw_lookup_major(&xbd_cdevsw);
349
350 /* Nuke the vnodes for any open instances. */
351 for (i = 0; i < MAXPARTITIONS; i++) {
352 mn = DISKMINOR(device_unit(dev), i);
353 vdevgone(bmaj, mn, mn, VBLK);
354 vdevgone(cmaj, mn, mn, VCHR);
355 }
356 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) {
357 /* Delete all of our wedges. */
358 dkwedge_delall(&sc->sc_dksc.sc_dkdev);
359
360 /* Kill off any queued buffers. */
361 dk_drain(&sc->sc_dksc);
362 bufq_free(sc->sc_dksc.sc_bufq);
363
364 /* detach disk */
365 disk_detach(&sc->sc_dksc.sc_dkdev);
366 disk_destroy(&sc->sc_dksc.sc_dkdev);
367 dk_detach(&sc->sc_dksc);
368 }
369
370 hypervisor_mask_event(sc->sc_evtchn);
371 event_remove_handler(sc->sc_evtchn, &xbd_handler, sc);
372 while (xengnt_status(sc->sc_ring_gntref)) {
373 tsleep(xbd_xenbus_detach, PRIBIO, "xbd_ref", hz/2);
374 }
375 xengnt_revoke_access(sc->sc_ring_gntref);
376 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring,
377 PAGE_SIZE, UVM_KMF_WIRED);
378
379 pmf_device_deregister(dev);
380
381 return 0;
382}
383
384static bool
385xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) {
386
387 int s;
388 struct xbd_xenbus_softc *sc;
389
390 sc = device_private(dev);
391
392 s = splbio();
393 /* wait for requests to complete, then suspend device */
394 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
395 sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
396 tsleep(xbd_xenbus_suspend, PRIBIO, "xbdsuspend", hz/2);
397
398 hypervisor_mask_event(sc->sc_evtchn);
399 sc->sc_backend_status = BLKIF_STATE_SUSPENDED;
400 event_remove_handler(sc->sc_evtchn, xbd_handler, sc);
401
402 splx(s);
403
404 xenbus_device_suspend(sc->sc_xbusd);
405 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
406
407 return true;
408}
409
410static bool
411xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual)
412{
413 struct xbd_xenbus_softc *sc;
414 struct xenbus_transaction *xbt;
415 int error;
416 blkif_sring_t *ring;
417 paddr_t ma;
418 const char *errmsg;
419
420 sc = device_private(dev);
421
422 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
423 /*
424 * Device was suspended, so ensure that access associated to
425 * the block I/O ring is revoked.
426 */
427 xengnt_revoke_access(sc->sc_ring_gntref);
428 }
429 sc->sc_ring_gntref = GRANT_INVALID_REF;
430
431 /* Initialize ring */
432 ring = sc->sc_ring.sring;
433 memset(ring, 0, PAGE_SIZE);
434 SHARED_RING_INIT(ring);
435 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE);
436
437 /*
438 * get MA address of the ring, and use it to set up the grant entry
439 * for the block device
440 */
441 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma);
442 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref);
443 if (error)
444 goto abort_resume;
445
446 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
447 if (error)
448 goto abort_resume;
449
450 aprint_verbose_dev(dev, "using event channel %d\n",
451 sc->sc_evtchn);
452 event_set_handler(sc->sc_evtchn, &xbd_handler, sc,
453 IPL_BIO, device_xname(dev));
454
455again:
456 xbt = xenbus_transaction_start();
457 if (xbt == NULL)
458 return false;
459 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
460 "ring-ref","%u", sc->sc_ring_gntref);
461 if (error) {
462 errmsg = "writing ring-ref";
463 goto abort_transaction;
464 }
465 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
466 "event-channel", "%u", sc->sc_evtchn);
467 if (error) {
468 errmsg = "writing event channel";
469 goto abort_transaction;
470 }
471 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
472 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
473 if (error) {
474 errmsg = "writing protocol";
475 goto abort_transaction;
476 }
477 error = xenbus_transaction_end(xbt, 0);
478 if (error == EAGAIN)
479 goto again;
480 if (error != 0) {
481 xenbus_dev_fatal(sc->sc_xbusd, error,
482 "completing transaction");
483 return false;
484 }
485
486 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised);
487
488 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
489 /*
490 * device was suspended, softc structures are
491 * already initialized - we use a shortcut
492 */
493 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
494 xenbus_device_resume(sc->sc_xbusd);
495 hypervisor_enable_event(sc->sc_evtchn);
496 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
497 }
498
499 return true;
500
501abort_resume:
502 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
503 return false;
504
505abort_transaction:
506 xenbus_transaction_end(xbt, 1);
507 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
508 return false;
509}
510
511static void xbd_backend_changed(void *arg, XenbusState new_state)
512{
513 struct xbd_xenbus_softc *sc = device_private((device_t)arg);
514 struct disk_geom *dg;
515
516 char buf[9];
517 int s;
518 DPRINTF(("%s: new backend state %d\n",
519 device_xname(sc->sc_dksc.sc_dev), new_state));
520
521 switch (new_state) {
522 case XenbusStateUnknown:
523 case XenbusStateInitialising:
524 case XenbusStateInitWait:
525 case XenbusStateInitialised:
526 break;
527 case XenbusStateClosing:
528 s = splbio();
529 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN)
530 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
531 /* wait for requests to complete */
532 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
533 sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
534 tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach", hz/2);
535 splx(s);
536 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
537 break;
538 case XenbusStateConnected:
539 /*
540 * note that xbd_backend_changed() can only be called by
541 * the xenbus thread.
542 */
543
544 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED ||
545 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)
546 /* already connected */
547 return;
548
549 xbd_connect(sc);
550 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN;
551 hypervisor_enable_event(sc->sc_evtchn);
552
553 sc->sc_xbdsize =
554 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE;
555 dg = &sc->sc_dksc.sc_dkdev.dk_geom;
556 memset(dg, 0, sizeof(*dg));
557
558 dg->dg_secperunit = sc->sc_xbdsize;
559 dg->dg_secsize = DEV_BSIZE;
560 dg->dg_ntracks = 1;
561 // XXX: Ok to hard-code DEV_BSIZE?
562 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize);
563 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors;
564
565 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
566 dk_attach(&sc->sc_dksc);
567 disk_attach(&sc->sc_dksc.sc_dkdev);
568
569 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
570
571 /* try to read the disklabel */
572 dk_getdisklabel(&sc->sc_dksc, 0 /* XXX ? */);
573 format_bytes(buf, sizeof(buf), sc->sc_sectors * sc->sc_secsize);
574 aprint_verbose_dev(sc->sc_dksc.sc_dev,
575 "%s, %d bytes/sect x %" PRIu64 " sectors\n",
576 buf, (int)dg->dg_secsize, sc->sc_xbdsize);
577 /* Discover wedges on this disk. */
578 dkwedge_discover(&sc->sc_dksc.sc_dkdev);
579
580 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL);
581
582 /* the disk should be working now */
583 config_pending_decr(sc->sc_dksc.sc_dev);
584 break;
585 default:
586 panic("bad backend state %d", new_state);
587 }
588}
589
590static void
591xbd_connect(struct xbd_xenbus_softc *sc)
592{
593 int err;
594 unsigned long long sectors;
595 u_long cache_flush;
596
597 err = xenbus_read_ul(NULL,
598 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
599 if (err)
600 panic("%s: can't read number from %s/virtual-device\n",
601 device_xname(sc->sc_dksc.sc_dev),
602 sc->sc_xbusd->xbusd_otherend);
603 err = xenbus_read_ull(NULL,
604 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10);
605 if (err)
606 panic("%s: can't read number from %s/sectors\n",
607 device_xname(sc->sc_dksc.sc_dev),
608 sc->sc_xbusd->xbusd_otherend);
609 sc->sc_sectors = sectors;
610
611 err = xenbus_read_ul(NULL,
612 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10);
613 if (err)
614 panic("%s: can't read number from %s/info\n",
615 device_xname(sc->sc_dksc.sc_dev),
616 sc->sc_xbusd->xbusd_otherend);
617 err = xenbus_read_ul(NULL,
618 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10);
619 if (err)
620 panic("%s: can't read number from %s/sector-size\n",
621 device_xname(sc->sc_dksc.sc_dev),
622 sc->sc_xbusd->xbusd_otherend);
623
624 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
625 "feature-flush-cache", &cache_flush, 10);
626 if (err)
627 cache_flush = 0;
628 if (cache_flush > 0)
629 sc->sc_cache_flush = 1;
630 else
631 sc->sc_cache_flush = 0;
632
633 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
634}
635
636static int
637xbd_handler(void *arg)
638{
639 struct xbd_xenbus_softc *sc = arg;
640 struct buf *bp;
641 RING_IDX resp_prod, i;
642 int more_to_do;
643 int seg;
644
645 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev)));
646
647 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
648 return 0;
649again:
650 resp_prod = sc->sc_ring.sring->rsp_prod;
651 xen_rmb(); /* ensure we see replies up to resp_prod */
652 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
653 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
654 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
655 bp = xbdreq->req_bp;
656 DPRINTF(("xbd_handler(%p): b_bcount = %ld\n",
657 xbdreq->req_bp, (long)bp->b_bcount));
658 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) {
659 xbdreq->req_sync.s_error = rep->status;
660 xbdreq->req_sync.s_done = 1;
661 wakeup(xbdreq);
662 /* caller will free the req */
663 continue;
664 }
665 for (seg = xbdreq->req_nr_segments - 1; seg >= 0; seg--) {
666 if (__predict_false(
667 xengnt_status(xbdreq->req_gntref[seg]))) {
668 aprint_verbose_dev(sc->sc_dksc.sc_dev,
669 "grant still used by backend\n");
670 sc->sc_ring.rsp_cons = i;
671 xbdreq->req_nr_segments = seg + 1;
672 goto done;
673 }
674 xengnt_revoke_access(xbdreq->req_gntref[seg]);
675 xbdreq->req_nr_segments--;
676 }
677 if (rep->operation != BLKIF_OP_READ &&
678 rep->operation != BLKIF_OP_WRITE) {
679 aprint_error_dev(sc->sc_dksc.sc_dev,
680 "bad operation %d from backend\n", rep->operation);
681 bp->b_error = EIO;
682 bp->b_resid = bp->b_bcount;
683 goto next;
684 }
685 if (rep->status != BLKIF_RSP_OKAY) {
686 bp->b_error = EIO;
687 bp->b_resid = bp->b_bcount;
688 goto next;
689 }
690 /* b_resid was set in dk_start */
691next:
692 if (bp->b_data != xbdreq->req_data)
693 xbd_unmap_align(xbdreq);
694
695 dk_done(&sc->sc_dksc, bp);
696
697 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
698 }
699done:
700 xen_rmb();
701 sc->sc_ring.rsp_cons = i;
702
703 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
704 if (more_to_do)
705 goto again;
706
707 if (sc->sc_xbdreq_wait)
708 wakeup(&sc->sc_xbdreq_wait);
709 else
710 dk_start(&sc->sc_dksc, NULL);
711 return 1;
712}
713
714static void
715xbdminphys(struct buf *bp)
716{
717 if (bp->b_bcount > XBD_MAX_XFER) {
718 bp->b_bcount = XBD_MAX_XFER;
719 }
720 minphys(bp);
721}
722
723int
724xbdopen(dev_t dev, int flags, int fmt, struct lwp *l)
725{
726 struct xbd_xenbus_softc *sc;
727
728 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
729 if (sc == NULL)
730 return (ENXIO);
731 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY))
732 return EROFS;
733
734 DPRINTF(("xbdopen(0x%04x, %d)\n", dev, flags));
735 return dk_open(&sc->sc_dksc, dev, flags, fmt, l);
736}
737
738int
739xbdclose(dev_t dev, int flags, int fmt, struct lwp *l)
740{
741 struct xbd_xenbus_softc *sc;
742
743 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
744
745 DPRINTF(("xbdclose(%d, %d)\n", dev, flags));
746 return dk_close(&sc->sc_dksc, dev, flags, fmt, l);
747}
748
749void
750xbdstrategy(struct buf *bp)
751{
752 struct xbd_xenbus_softc *sc;
753
754 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev));
755
756 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp,
757 (long)bp->b_bcount));
758
759 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
760 bp->b_error = EIO;
761 biodone(bp);
762 return;
763 }
764 if (__predict_false((sc->sc_info & VDISK_READONLY) &&
765 (bp->b_flags & B_READ) == 0)) {
766 bp->b_error = EROFS;
767 biodone(bp);
768 return;
769 }
770
771 dk_strategy(&sc->sc_dksc, bp);
772 return;
773}
774
775int
776xbdsize(dev_t dev)
777{
778 struct xbd_xenbus_softc *sc;
779
780 DPRINTF(("xbdsize(%d)\n", dev));
781
782 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
783 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN)
784 return -1;
785 return dk_size(&sc->sc_dksc, dev);
786}
787
788int
789xbdread(dev_t dev, struct uio *uio, int flags)
790{
791 struct xbd_xenbus_softc *sc =
792 device_lookup_private(&xbd_cd, DISKUNIT(dev));
793 struct dk_softc *dksc = &sc->sc_dksc;
794
795 if (!DK_ATTACHED(dksc))
796 return ENXIO;
797 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio);
798}
799
800int
801xbdwrite(dev_t dev, struct uio *uio, int flags)
802{
803 struct xbd_xenbus_softc *sc =
804 device_lookup_private(&xbd_cd, DISKUNIT(dev));
805 struct dk_softc *dksc = &sc->sc_dksc;
806
807 if (!DK_ATTACHED(dksc))
808 return ENXIO;
809 if (__predict_false(sc->sc_info & VDISK_READONLY))
810 return EROFS;
811 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio);
812}
813
814int
815xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
816{
817 struct xbd_xenbus_softc *sc =
818 device_lookup_private(&xbd_cd, DISKUNIT(dev));
819 struct dk_softc *dksc;
820 int error;
821 int s;
822 struct xbd_req *xbdreq;
823 blkif_request_t *req;
824 int notify;
825
826 DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
827 dev, cmd, data, flag, l));
828 dksc = &sc->sc_dksc;
829
830 switch (cmd) {
831 case DIOCSSTRATEGY:
832 error = EOPNOTSUPP;
833 break;
834 case DIOCCACHESYNC:
835 if (sc->sc_cache_flush <= 0) {
836 if (sc->sc_cache_flush == 0) {
837 aprint_error_dev(sc->sc_dksc.sc_dev,
838 "WARNING: cache flush not supported "
839 "by backend\n");
840 sc->sc_cache_flush = -1;
841 }
842 return EOPNOTSUPP;
843 }
844
845 s = splbio();
846
847 while (RING_FULL(&sc->sc_ring)) {
848 sc->sc_xbdreq_wait = 1;
849 tsleep(&sc->sc_xbdreq_wait, PRIBIO, "xbdreq", 0);
850 }
851 sc->sc_xbdreq_wait = 0;
852
853 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
854 if (__predict_false(xbdreq == NULL)) {
855 DPRINTF(("xbdioctl: no req\n"));
856 error = ENOMEM;
857 } else {
858 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
859 req = RING_GET_REQUEST(&sc->sc_ring,
860 sc->sc_ring.req_prod_pvt);
861 req->id = xbdreq->req_id;
862 req->operation = BLKIF_OP_FLUSH_DISKCACHE;
863 req->handle = sc->sc_handle;
864 xbdreq->req_sync.s_done = 0;
865 sc->sc_ring.req_prod_pvt++;
866 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring,
867 notify);
868 if (notify)
869 hypervisor_notify_via_evtchn(sc->sc_evtchn);
870 /* request sent, no wait for completion */
871 while (xbdreq->req_sync.s_done == 0) {
872 tsleep(xbdreq, PRIBIO, "xbdsync", 0);
873 }
874 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP)
875 error = EOPNOTSUPP;
876 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY)
877 error = 0;
878 else
879 error = EIO;
880 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
881 req_next);
882 }
883 splx(s);
884 break;
885
886 default:
887 error = dk_ioctl(dksc, dev, cmd, data, flag, l);
888 break;
889 }
890
891 return error;
892}
893
894int
895xbddump(dev_t dev, daddr_t blkno, void *va, size_t size)
896{
897 struct xbd_xenbus_softc *sc;
898
899 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
900 if (sc == NULL)
901 return (ENXIO);
902
903 DPRINTF(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va,
904 (unsigned long)size));
905 return dk_dump(&sc->sc_dksc, dev, blkno, va, size);
906}
907
908static int
909xbd_diskstart(device_t self, struct buf *bp)
910{
911 struct xbd_xenbus_softc *sc = device_private(self);
912 struct xbd_req *xbdreq;
913 blkif_request_t *req;
914 size_t bcount, off;
915 paddr_t ma;
916 vaddr_t va;
917 int nsects, nbytes, seg;
918 int notify, error = 0;
919
920 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
921 bp, (long)bp->b_bcount));
922
923 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
924 error = EIO;
925 goto err;
926 }
927
928 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
929 /* invalid block number */
930 error = EINVAL;
931 goto err;
932 }
933
934 if (__predict_false(
935 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
936 /* device is suspended, do not consume buffer */
937 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
938 sc->sc_dksc.sc_xname));
939 error = EAGAIN;
940 goto out;
941 }
942
943 if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
944 DPRINTF(("xbd_diskstart: ring_full\n"));
945 error = EAGAIN;
946 goto out;
947 }
948
949 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
950 if (__predict_false(xbdreq == NULL)) {
951 DPRINTF(("xbd_diskstart: no req\n"));
952 error = EAGAIN;
953 goto out;
954 }
955
956 xbdreq->req_bp = bp;
957 xbdreq->req_data = bp->b_data;
958 if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
959 if (__predict_false(xbd_map_align(xbdreq) != 0)) {
960 DPRINTF(("xbd_diskstart: no align\n"));
961 error = EAGAIN;
962 goto out;
963 }
964 }
965
966 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
967 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
968 req->id = xbdreq->req_id;
969 req->operation =
970 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
971 req->sector_number = bp->b_rawblkno;
972 req->handle = sc->sc_handle;
973
974 va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
975 off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
976 bcount = bp->b_bcount;
977 bp->b_resid = 0;
978 for (seg = 0; bcount > 0;) {
979 pmap_extract_ma(pmap_kernel(), va, &ma);
980 KASSERT((ma & (XEN_BSIZE - 1)) == 0);
981 if (bcount > PAGE_SIZE - off)
982 nbytes = PAGE_SIZE - off;
983 else
984 nbytes = bcount;
985 nsects = nbytes >> XEN_BSHIFT;
986 req->seg[seg].first_sect = off >> XEN_BSHIFT;
987 req->seg[seg].last_sect =
988 (off >> XEN_BSHIFT) + nsects - 1;
989 KASSERT(req->seg[seg].first_sect <=
990 req->seg[seg].last_sect);
991 KASSERT(req->seg[seg].last_sect < 8);
992 if (__predict_false(xengnt_grant_access(
993 sc->sc_xbusd->xbusd_otherend_id, ma,
994 (bp->b_flags & B_READ) == 0,
995 &xbdreq->req_gntref[seg])))
996 panic("xbd_diskstart: xengnt_grant_access"); /* XXX XXX !!! */
997 req->seg[seg].gref = xbdreq->req_gntref[seg];
998 seg++;
999 KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
1000 va += PAGE_SIZE;
1001 off = 0;
1002 bcount -= nbytes;
1003 }
1004 xbdreq->req_nr_segments = req->nr_segments = seg;
1005 sc->sc_ring.req_prod_pvt++;
1006
1007out:
1008 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1009 if (notify)
1010 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1011err:
1012 return error;
1013}
1014
1015static int
1016xbd_map_align(struct xbd_req *req)
1017{
1018 int s = splvm();
1019 int rc;
1020
1021 rc = uvm_km_kmem_alloc(kmem_va_arena,
1022 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT),
1023 (vmem_addr_t *)&req->req_data);
1024 splx(s);
1025 if (__predict_false(rc != 0))
1026 return ENOMEM;
1027 if ((req->req_bp->b_flags & B_READ) == 0)
1028 memcpy(req->req_data, req->req_bp->b_data,
1029 req->req_bp->b_bcount);
1030 return 0;
1031}
1032
1033static void
1034xbd_unmap_align(struct xbd_req *req)
1035{
1036 int s;
1037 if (req->req_bp->b_flags & B_READ)
1038 memcpy(req->req_bp->b_data, req->req_data,
1039 req->req_bp->b_bcount);
1040 s = splvm();
1041 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)req->req_data, req->req_bp->b_bcount);
1042 splx(s);
1043}
1044