1/* $NetBSD: if_xennet_xenbus.c,v 1.68 2016/06/10 13:27:13 ozaki-r Exp $ */
2
3/*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28/*
29 * Copyright (c) 2004 Christian Limpach.
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
42 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
45 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
50 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52
53/*
54 * This file contains the xennet frontend code required for the network
55 * communication between two Xen domains.
56 * It ressembles xbd, but is a little more complex as it must deal with two
57 * rings:
58 * - the TX ring, to transmit packets to backend (inside => outside)
59 * - the RX ring, to receive packets from backend (outside => inside)
60 *
61 * Principles are following.
62 *
63 * For TX:
64 * Purpose is to transmit packets to the outside. The start of day is in
65 * xennet_start() (default output routine of xennet) that schedules a softint,
66 * xennet_softstart(). xennet_softstart() generates the requests associated
67 * to the TX mbufs queued (see altq(9)).
68 * The backend's responses are processed by xennet_tx_complete(), called either
69 * from:
70 * - xennet_start()
71 * - xennet_handler(), during an asynchronous event notification from backend
72 * (similar to an IRQ).
73 *
74 * for RX:
75 * Purpose is to process the packets received from the outside. RX buffers
76 * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
77 * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
78 * preparation for incoming packets from backend.
79 * When RX packets need to be processed, backend takes the requests previously
80 * offered by frontend and pushes the associated responses inside the I/O ring.
81 * When done, it notifies frontend through an event notification, which will
82 * asynchronously call xennet_handler() in frontend.
83 * xennet_handler() processes the responses, generates the associated mbuf, and
84 * passes it to the MI layer for further processing.
85 */
86
87#include <sys/cdefs.h>
88__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.68 2016/06/10 13:27:13 ozaki-r Exp $");
89
90#include "opt_xen.h"
91#include "opt_nfs_boot.h"
92
93#include <sys/param.h>
94#include <sys/device.h>
95#include <sys/conf.h>
96#include <sys/kernel.h>
97#include <sys/proc.h>
98#include <sys/systm.h>
99#include <sys/intr.h>
100#include <sys/rndsource.h>
101
102#include <net/if.h>
103#include <net/if_dl.h>
104#include <net/if_ether.h>
105#include <net/bpf.h>
106#include <net/bpfdesc.h>
107
108#if defined(NFS_BOOT_BOOTSTATIC)
109#include <sys/fstypes.h>
110#include <sys/mount.h>
111#include <sys/statvfs.h>
112#include <netinet/in.h>
113#include <nfs/rpcv2.h>
114#include <nfs/nfsproto.h>
115#include <nfs/nfs.h>
116#include <nfs/nfsmount.h>
117#include <nfs/nfsdiskless.h>
118#include <xen/if_xennetvar.h>
119#endif /* defined(NFS_BOOT_BOOTSTATIC) */
120
121#include <xen/xennet_checksum.h>
122
123#include <uvm/uvm.h>
124
125#include <xen/hypervisor.h>
126#include <xen/evtchn.h>
127#include <xen/granttables.h>
128#include <xen/xen-public/io/netif.h>
129#include <xen/xenpmap.h>
130
131#include <xen/xenbus.h>
132#include "locators.h"
133
134#undef XENNET_DEBUG_DUMP
135#undef XENNET_DEBUG
136#ifdef XENNET_DEBUG
137#define XEDB_FOLLOW 0x01
138#define XEDB_INIT 0x02
139#define XEDB_EVENT 0x04
140#define XEDB_MBUF 0x08
141#define XEDB_MEM 0x10
142int xennet_debug = 0xff;
143#define DPRINTF(x) if (xennet_debug) printf x;
144#define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
145#else
146#define DPRINTF(x)
147#define DPRINTFN(n,x)
148#endif
149
150#define GRANT_INVALID_REF -1 /* entry is free */
151
152#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
153#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
154
155struct xennet_txreq {
156 SLIST_ENTRY(xennet_txreq) txreq_next;
157 uint16_t txreq_id; /* ID passed to backend */
158 grant_ref_t txreq_gntref; /* grant ref of this request */
159 struct mbuf *txreq_m; /* mbuf being transmitted */
160};
161
162struct xennet_rxreq {
163 SLIST_ENTRY(xennet_rxreq) rxreq_next;
164 uint16_t rxreq_id; /* ID passed to backend */
165 grant_ref_t rxreq_gntref; /* grant ref of this request */
166/* va/pa for this receive buf. ma will be provided by backend */
167 paddr_t rxreq_pa;
168 vaddr_t rxreq_va;
169 struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
170};
171
172struct xennet_xenbus_softc {
173 device_t sc_dev;
174 struct ethercom sc_ethercom;
175 uint8_t sc_enaddr[6];
176 struct xenbus_device *sc_xbusd;
177
178 netif_tx_front_ring_t sc_tx_ring;
179 netif_rx_front_ring_t sc_rx_ring;
180
181 unsigned int sc_evtchn;
182 void *sc_softintr;
183
184 grant_ref_t sc_tx_ring_gntref;
185 grant_ref_t sc_rx_ring_gntref;
186
187 kmutex_t sc_tx_lock; /* protects free TX list, below */
188 kmutex_t sc_rx_lock; /* protects free RX list, below */
189 struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
190 struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
191 SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
192 SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
193 int sc_free_rxreql; /* number of free receive request struct */
194
195 int sc_backend_status; /* our status with backend */
196#define BEST_CLOSED 0
197#define BEST_DISCONNECTED 1
198#define BEST_CONNECTED 2
199#define BEST_SUSPENDED 3
200 unsigned long sc_rx_feature;
201#define FEATURE_RX_FLIP 0
202#define FEATURE_RX_COPY 1
203 krndsource_t sc_rnd_source;
204};
205#define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
206 (sc)->sc_rx_ring.sring->rsp_prod)
207
208/* too big to be on stack */
209static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
210static u_long xennet_pages[NET_RX_RING_SIZE];
211
212static pool_cache_t if_xennetrxbuf_cache;
213static int if_xennetrxbuf_cache_inited=0;
214
215static int xennet_xenbus_match(device_t, cfdata_t, void *);
216static void xennet_xenbus_attach(device_t, device_t, void *);
217static int xennet_xenbus_detach(device_t, int);
218static void xennet_backend_changed(void *, XenbusState);
219
220static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
221static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
222static void xennet_tx_complete(struct xennet_xenbus_softc *);
223static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
224static void xennet_rx_free_req(struct xennet_rxreq *);
225static int xennet_handler(void *);
226static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
227#ifdef XENNET_DEBUG_DUMP
228static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229#endif
230
231static int xennet_init(struct ifnet *);
232static void xennet_stop(struct ifnet *, int);
233static void xennet_reset(struct xennet_xenbus_softc *);
234static void xennet_softstart(void *);
235static void xennet_start(struct ifnet *);
236static int xennet_ioctl(struct ifnet *, u_long, void *);
237static void xennet_watchdog(struct ifnet *);
238
239static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
240static bool xennet_xenbus_resume (device_t dev, const pmf_qual_t *);
241
242CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
243 xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
244
245static int
246xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
247{
248 struct xenbusdev_attach_args *xa = aux;
249
250 if (strcmp(xa->xa_type, "vif") != 0)
251 return 0;
252
253 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
254 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
255 return 0;
256
257 return 1;
258}
259
260static void
261xennet_xenbus_attach(device_t parent, device_t self, void *aux)
262{
263 struct xennet_xenbus_softc *sc = device_private(self);
264 struct xenbusdev_attach_args *xa = aux;
265 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
266 int err;
267 netif_tx_sring_t *tx_ring;
268 netif_rx_sring_t *rx_ring;
269 RING_IDX i;
270 char *val, *e, *p;
271 int s;
272 extern int ifqmaxlen; /* XXX */
273#ifdef XENNET_DEBUG
274 char **dir;
275 int dir_n = 0;
276 char id_str[20];
277#endif
278
279 aprint_normal(": Xen Virtual Network Interface\n");
280 sc->sc_dev = self;
281
282#ifdef XENNET_DEBUG
283 printf("path: %s\n", xa->xa_xbusd->xbusd_path);
284 snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
285 err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
286 if (err) {
287 aprint_error_dev(self, "xenbus_directory err %d\n", err);
288 } else {
289 printf("%s/\n", xa->xa_xbusd->xbusd_path);
290 for (i = 0; i < dir_n; i++) {
291 printf("\t/%s", dir[i]);
292 err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
293 dir[i], NULL, &val);
294 if (err) {
295 aprint_error_dev(self, "xenbus_read err %d\n",
296 err);
297 } else {
298 printf(" = %s\n", val);
299 free(val, M_DEVBUF);
300 }
301 }
302 }
303#endif /* XENNET_DEBUG */
304 sc->sc_xbusd = xa->xa_xbusd;
305 sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
306
307 /* xenbus ensure 2 devices can't be probed at the same time */
308 if (if_xennetrxbuf_cache_inited == 0) {
309 if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
310 "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
311 if_xennetrxbuf_cache_inited = 1;
312 }
313
314
315 /* initialize free RX and RX request lists */
316 mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
317 SLIST_INIT(&sc->sc_txreq_head);
318 for (i = 0; i < NET_TX_RING_SIZE; i++) {
319 sc->sc_txreqs[i].txreq_id = i;
320 SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
321 txreq_next);
322 }
323 mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
324 SLIST_INIT(&sc->sc_rxreq_head);
325 s = splvm();
326 for (i = 0; i < NET_RX_RING_SIZE; i++) {
327 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
328 rxreq->rxreq_id = i;
329 rxreq->rxreq_sc = sc;
330 rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
331 if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
332 if (rxreq->rxreq_va == 0)
333 break;
334 rxreq->rxreq_gntref = GRANT_INVALID_REF;
335 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
336 }
337 splx(s);
338 sc->sc_free_rxreql = i;
339 if (sc->sc_free_rxreql == 0) {
340 aprint_error_dev(self, "failed to allocate rx memory\n");
341 return;
342 }
343
344 /* read mac address */
345 err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
346 if (err) {
347 aprint_error_dev(self, "can't read mac address, err %d\n", err);
348 return;
349 }
350 for (i = 0, p = val; i < 6; i++) {
351 sc->sc_enaddr[i] = strtoul(p, &e, 16);
352 if ((e[0] == '\0' && i != 5) && e[0] != ':') {
353 aprint_error_dev(self,
354 "%s is not a valid mac address\n", val);
355 free(val, M_DEVBUF);
356 return;
357 }
358 p = &e[1];
359 }
360 free(val, M_DEVBUF);
361 aprint_normal_dev(self, "MAC address %s\n",
362 ether_sprintf(sc->sc_enaddr));
363 /* Initialize ifnet structure and attach interface */
364 strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
365 sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
366 ifp->if_softc = sc;
367 ifp->if_start = xennet_start;
368 ifp->if_ioctl = xennet_ioctl;
369 ifp->if_watchdog = xennet_watchdog;
370 ifp->if_init = xennet_init;
371 ifp->if_stop = xennet_stop;
372 ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
373 ifp->if_timer = 0;
374 ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
375 ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
376 IFQ_SET_READY(&ifp->if_snd);
377 if_attach(ifp);
378 ether_ifattach(ifp, sc->sc_enaddr);
379 sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
380 if (sc->sc_softintr == NULL)
381 panic("%s: can't establish soft interrupt",
382 device_xname(self));
383
384 /* alloc shared rings */
385 tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
386 UVM_KMF_WIRED);
387 rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
388 UVM_KMF_WIRED);
389 if (tx_ring == NULL || rx_ring == NULL)
390 panic("%s: can't alloc rings", device_xname(self));
391
392 sc->sc_tx_ring.sring = tx_ring;
393 sc->sc_rx_ring.sring = rx_ring;
394
395 /* resume shared structures and tell backend that we are ready */
396 if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
397 uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
398 UVM_KMF_WIRED);
399 uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
400 UVM_KMF_WIRED);
401 return;
402 }
403
404 rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
405 RND_TYPE_NET, RND_FLAG_DEFAULT);
406
407 if (!pmf_device_register(self, xennet_xenbus_suspend,
408 xennet_xenbus_resume))
409 aprint_error_dev(self, "couldn't establish power handler\n");
410 else
411 pmf_class_network_register(self, ifp);
412}
413
414static int
415xennet_xenbus_detach(device_t self, int flags)
416{
417 struct xennet_xenbus_softc *sc = device_private(self);
418 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
419 int s0, s1;
420 RING_IDX i;
421
422 DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
423 s0 = splnet();
424 xennet_stop(ifp, 1);
425 event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
426 /* wait for pending TX to complete, and collect pending RX packets */
427 xennet_handler(sc);
428 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
429 tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
430 xennet_handler(sc);
431 }
432 xennet_free_rx_buffer(sc);
433
434 s1 = splvm();
435 for (i = 0; i < NET_RX_RING_SIZE; i++) {
436 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
437 uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
438 UVM_KMF_WIRED);
439 }
440 splx(s1);
441
442 ether_ifdetach(ifp);
443 if_detach(ifp);
444
445 /* Unhook the entropy source. */
446 rnd_detach_source(&sc->sc_rnd_source);
447
448 while (xengnt_status(sc->sc_tx_ring_gntref)) {
449 tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
450 }
451 xengnt_revoke_access(sc->sc_tx_ring_gntref);
452 uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
453 UVM_KMF_WIRED);
454 while (xengnt_status(sc->sc_rx_ring_gntref)) {
455 tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
456 }
457 xengnt_revoke_access(sc->sc_rx_ring_gntref);
458 uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
459 UVM_KMF_WIRED);
460 softint_disestablish(sc->sc_softintr);
461 splx(s0);
462
463 pmf_device_deregister(self);
464
465 DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
466 return 0;
467}
468
469static bool
470xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
471{
472 struct xennet_xenbus_softc *sc = device_private(dev);
473 int error;
474 netif_tx_sring_t *tx_ring;
475 netif_rx_sring_t *rx_ring;
476 paddr_t ma;
477
478 /* invalidate the RX and TX rings */
479 if (sc->sc_backend_status == BEST_SUSPENDED) {
480 /*
481 * Device was suspended, so ensure that access associated to
482 * the previous RX and TX rings are revoked.
483 */
484 xengnt_revoke_access(sc->sc_tx_ring_gntref);
485 xengnt_revoke_access(sc->sc_rx_ring_gntref);
486 }
487
488 sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
489 sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
490
491 tx_ring = sc->sc_tx_ring.sring;
492 rx_ring = sc->sc_rx_ring.sring;
493
494 /* Initialize rings */
495 memset(tx_ring, 0, PAGE_SIZE);
496 SHARED_RING_INIT(tx_ring);
497 FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
498
499 memset(rx_ring, 0, PAGE_SIZE);
500 SHARED_RING_INIT(rx_ring);
501 FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
502
503 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
504 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
505 if (error)
506 goto abort_resume;
507 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
508 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
509 if (error)
510 goto abort_resume;
511 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
512 if (error)
513 goto abort_resume;
514 aprint_verbose_dev(dev, "using event channel %d\n",
515 sc->sc_evtchn);
516 event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
517 IPL_NET, device_xname(dev));
518 return true;
519
520abort_resume:
521 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
522 return false;
523}
524
525static bool
526xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
527{
528 int error;
529 unsigned long rx_copy;
530 struct xenbus_transaction *xbt;
531 const char *errmsg;
532
533 error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
534 "feature-rx-copy", &rx_copy, 10);
535 if (error)
536 rx_copy = 0; /* default value if key is absent */
537
538 if (rx_copy == 1) {
539 aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
540 sc->sc_rx_feature = FEATURE_RX_COPY;
541 } else {
542 aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
543 sc->sc_rx_feature = FEATURE_RX_FLIP;
544 }
545
546again:
547 xbt = xenbus_transaction_start();
548 if (xbt == NULL)
549 return false;
550 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
551 "vifname", "%s", device_xname(sc->sc_dev));
552 if (error) {
553 errmsg = "vifname";
554 goto abort_transaction;
555 }
556 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
557 "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
558 if (error) {
559 errmsg = "writing tx ring-ref";
560 goto abort_transaction;
561 }
562 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
563 "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
564 if (error) {
565 errmsg = "writing rx ring-ref";
566 goto abort_transaction;
567 }
568 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
569 "request-rx-copy", "%lu", rx_copy);
570 if (error) {
571 errmsg = "writing request-rx-copy";
572 goto abort_transaction;
573 }
574 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
575 "feature-rx-notify", "%u", 1);
576 if (error) {
577 errmsg = "writing feature-rx-notify";
578 goto abort_transaction;
579 }
580 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
581 "event-channel", "%u", sc->sc_evtchn);
582 if (error) {
583 errmsg = "writing event channel";
584 goto abort_transaction;
585 }
586 error = xenbus_transaction_end(xbt, 0);
587 if (error == EAGAIN)
588 goto again;
589 if (error) {
590 xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
591 return false;
592 }
593 mutex_enter(&sc->sc_rx_lock);
594 xennet_alloc_rx_buffer(sc);
595 mutex_exit(&sc->sc_rx_lock);
596
597 if (sc->sc_backend_status == BEST_SUSPENDED) {
598 xenbus_device_resume(sc->sc_xbusd);
599 }
600
601 sc->sc_backend_status = BEST_CONNECTED;
602
603 return true;
604
605abort_transaction:
606 xenbus_transaction_end(xbt, 1);
607 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
608 return false;
609}
610
611static bool
612xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
613{
614 int s;
615 struct xennet_xenbus_softc *sc = device_private(dev);
616
617 /*
618 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
619 * so we do not mask event channel here
620 */
621
622 s = splnet();
623 /* process any outstanding TX responses, then collect RX packets */
624 xennet_handler(sc);
625 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
626 tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
627 xennet_handler(sc);
628 }
629
630 /*
631 * dom0 may still use references to the grants we gave away
632 * earlier during RX buffers allocation. So we do not free RX buffers
633 * here, as dom0 does not expect the guest domain to suddenly revoke
634 * access to these grants.
635 */
636
637 sc->sc_backend_status = BEST_SUSPENDED;
638 event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
639
640 splx(s);
641
642 xenbus_device_suspend(sc->sc_xbusd);
643 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
644
645 return true;
646}
647
648static void xennet_backend_changed(void *arg, XenbusState new_state)
649{
650 struct xennet_xenbus_softc *sc = device_private((device_t)arg);
651 DPRINTF(("%s: new backend state %d\n",
652 device_xname(sc->sc_dev), new_state));
653
654 switch (new_state) {
655 case XenbusStateInitialising:
656 case XenbusStateInitialised:
657 case XenbusStateConnected:
658 break;
659 case XenbusStateClosing:
660 sc->sc_backend_status = BEST_CLOSED;
661 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
662 break;
663 case XenbusStateInitWait:
664 if (sc->sc_backend_status == BEST_CONNECTED)
665 break;
666 if (xennet_talk_to_backend(sc))
667 xenbus_switch_state(sc->sc_xbusd, NULL,
668 XenbusStateConnected);
669 break;
670 case XenbusStateUnknown:
671 default:
672 panic("bad backend state %d", new_state);
673 }
674}
675
676/*
677 * Allocate RX buffers and put the associated request structures
678 * in the ring. This allows the backend to use them to communicate with
679 * frontend when some data is destined to frontend
680 */
681
682static void
683xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
684{
685 RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
686 RING_IDX i;
687 struct xennet_rxreq *req;
688 struct xen_memory_reservation reservation;
689 int s, otherend_id, notify;
690
691 otherend_id = sc->sc_xbusd->xbusd_otherend_id;
692
693 KASSERT(mutex_owned(&sc->sc_rx_lock));
694 for (i = 0; sc->sc_free_rxreql != 0; i++) {
695 req = SLIST_FIRST(&sc->sc_rxreq_head);
696 KASSERT(req != NULL);
697 KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
698 RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
699 req->rxreq_id;
700
701 switch (sc->sc_rx_feature) {
702 case FEATURE_RX_COPY:
703 if (xengnt_grant_access(otherend_id,
704 xpmap_ptom_masked(req->rxreq_pa),
705 0, &req->rxreq_gntref) != 0) {
706 goto out_loop;
707 }
708 break;
709 case FEATURE_RX_FLIP:
710 if (xengnt_grant_transfer(otherend_id,
711 &req->rxreq_gntref) != 0) {
712 goto out_loop;
713 }
714 break;
715 default:
716 panic("%s: unsupported RX feature mode: %ld\n",
717 __func__, sc->sc_rx_feature);
718 }
719
720 RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
721 req->rxreq_gntref;
722
723 SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
724 sc->sc_free_rxreql--;
725
726 if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
727 /* unmap the page */
728 MULTI_update_va_mapping(&rx_mcl[i],
729 req->rxreq_va, 0, 0);
730 /*
731 * Remove this page from pseudo phys map before
732 * passing back to Xen.
733 */
734 xennet_pages[i] =
735 xpmap_ptom(req->rxreq_pa) >> PAGE_SHIFT;
736 xpmap_ptom_unmap(req->rxreq_pa);
737 }
738 }
739
740out_loop:
741 if (i == 0) {
742 return;
743 }
744
745 if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
746 /* also make sure to flush all TLB entries */
747 rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
748 UVMF_TLB_FLUSH | UVMF_ALL;
749 /*
750 * We may have allocated buffers which have entries
751 * outstanding in the page update queue -- make sure we flush
752 * those first!
753 */
754 s = splvm();
755 xpq_flush_queue();
756 splx(s);
757 /* now decrease reservation */
758 set_xen_guest_handle(reservation.extent_start, xennet_pages);
759 reservation.nr_extents = i;
760 reservation.extent_order = 0;
761 reservation.address_bits = 0;
762 reservation.domid = DOMID_SELF;
763 rx_mcl[i].op = __HYPERVISOR_memory_op;
764 rx_mcl[i].args[0] = XENMEM_decrease_reservation;
765 rx_mcl[i].args[1] = (unsigned long)&reservation;
766 HYPERVISOR_multicall(rx_mcl, i+1);
767 if (__predict_false(rx_mcl[i].result != i)) {
768 panic("xennet_alloc_rx_buffer: "
769 "XENMEM_decrease_reservation");
770 }
771 }
772
773 sc->sc_rx_ring.req_prod_pvt = req_prod + i;
774 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
775 if (notify)
776 hypervisor_notify_via_evtchn(sc->sc_evtchn);
777 return;
778}
779
780/*
781 * Reclaim all RX buffers used by the I/O ring between frontend and backend
782 */
783static void
784xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
785{
786 paddr_t ma, pa;
787 vaddr_t va;
788 RING_IDX i;
789 mmu_update_t mmu[1];
790 multicall_entry_t mcl[2];
791
792 mutex_enter(&sc->sc_rx_lock);
793
794 DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
795 /* get back memory from RX ring */
796 for (i = 0; i < NET_RX_RING_SIZE; i++) {
797 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
798
799 if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
800 /*
801 * this req is still granted. Get back the page or
802 * allocate a new one, and remap it.
803 */
804 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
805 rxreq_next);
806 sc->sc_free_rxreql++;
807
808 switch (sc->sc_rx_feature) {
809 case FEATURE_RX_COPY:
810 xengnt_revoke_access(rxreq->rxreq_gntref);
811 rxreq->rxreq_gntref = GRANT_INVALID_REF;
812 break;
813 case FEATURE_RX_FLIP:
814 ma = xengnt_revoke_transfer(
815 rxreq->rxreq_gntref);
816 rxreq->rxreq_gntref = GRANT_INVALID_REF;
817 if (ma == 0) {
818 u_long pfn;
819 struct xen_memory_reservation xenres;
820 /*
821 * transfer not complete, we lost the page.
822 * Get one from hypervisor
823 */
824 set_xen_guest_handle(
825 xenres.extent_start, &pfn);
826 xenres.nr_extents = 1;
827 xenres.extent_order = 0;
828 xenres.address_bits = 31;
829 xenres.domid = DOMID_SELF;
830 if (HYPERVISOR_memory_op(
831 XENMEM_increase_reservation, &xenres) < 0) {
832 panic("xennet_free_rx_buffer: "
833 "can't get memory back");
834 }
835 ma = pfn;
836 KASSERT(ma != 0);
837 }
838 pa = rxreq->rxreq_pa;
839 va = rxreq->rxreq_va;
840 /* remap the page */
841 mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
842 mmu[0].val = pa >> PAGE_SHIFT;
843 MULTI_update_va_mapping(&mcl[0], va,
844 (ma << PAGE_SHIFT) | PG_V | PG_KW,
845 UVMF_TLB_FLUSH|UVMF_ALL);
846 xpmap_ptom_map(pa, ptoa(ma));
847 mcl[1].op = __HYPERVISOR_mmu_update;
848 mcl[1].args[0] = (unsigned long)mmu;
849 mcl[1].args[1] = 1;
850 mcl[1].args[2] = 0;
851 mcl[1].args[3] = DOMID_SELF;
852 HYPERVISOR_multicall(mcl, 2);
853 break;
854 default:
855 panic("%s: unsupported RX feature mode: %ld\n",
856 __func__, sc->sc_rx_feature);
857 }
858 }
859
860 }
861 mutex_exit(&sc->sc_rx_lock);
862 DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
863}
864
865/*
866 * Clears a used RX request when its associated mbuf has been processed
867 */
868static void
869xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
870{
871 int s = splnet();
872 KASSERT(buf == m->m_ext.ext_buf);
873 KASSERT(arg == NULL);
874 KASSERT(m != NULL);
875 vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
876 pool_cache_put_paddr(if_xennetrxbuf_cache,
877 (void *)va, m->m_ext.ext_paddr);
878 pool_cache_put(mb_cache, m);
879 splx(s);
880};
881
882static void
883xennet_rx_free_req(struct xennet_rxreq *req)
884{
885 struct xennet_xenbus_softc *sc = req->rxreq_sc;
886
887 KASSERT(mutex_owned(&sc->sc_rx_lock));
888
889 /* puts back the RX request in the list of free RX requests */
890 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
891 sc->sc_free_rxreql++;
892
893 /*
894 * ring needs more requests to be pushed in, allocate some
895 * RX buffers to catch-up with backend's consumption
896 */
897 req->rxreq_gntref = GRANT_INVALID_REF;
898
899 if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
900 __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
901 xennet_alloc_rx_buffer(sc);
902 }
903}
904
905/*
906 * Process responses associated to the TX mbufs sent previously through
907 * xennet_softstart()
908 * Called at splnet.
909 */
910static void
911xennet_tx_complete(struct xennet_xenbus_softc *sc)
912{
913 struct xennet_txreq *req;
914 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
915 RING_IDX resp_prod, i;
916
917 DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
918 sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
919
920again:
921 resp_prod = sc->sc_tx_ring.sring->rsp_prod;
922 xen_rmb();
923 mutex_enter(&sc->sc_tx_lock);
924 for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
925 req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
926 KASSERT(req->txreq_id ==
927 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
928 if (__predict_false(xengnt_status(req->txreq_gntref))) {
929 aprint_verbose_dev(sc->sc_dev,
930 "grant still used by backend\n");
931 sc->sc_tx_ring.rsp_cons = i;
932 goto end;
933 }
934 if (__predict_false(
935 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
936 NETIF_RSP_OKAY))
937 ifp->if_oerrors++;
938 else
939 ifp->if_opackets++;
940 xengnt_revoke_access(req->txreq_gntref);
941 m_freem(req->txreq_m);
942 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
943 }
944 mutex_exit(&sc->sc_tx_lock);
945
946 sc->sc_tx_ring.rsp_cons = resp_prod;
947 /* set new event and check for race with rsp_cons update */
948 sc->sc_tx_ring.sring->rsp_event =
949 resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
950 ifp->if_timer = 0;
951 xen_wmb();
952 if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
953 goto again;
954end:
955 if (ifp->if_flags & IFF_OACTIVE) {
956 ifp->if_flags &= ~IFF_OACTIVE;
957 xennet_softstart(sc);
958 }
959}
960
961/*
962 * Xennet event handler.
963 * Get outstanding responses of TX packets, then collect all responses of
964 * pending RX packets
965 * Called at splnet.
966 */
967static int
968xennet_handler(void *arg)
969{
970 struct xennet_xenbus_softc *sc = arg;
971 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
972 RING_IDX resp_prod, i;
973 struct xennet_rxreq *req;
974 paddr_t ma, pa;
975 vaddr_t va;
976 mmu_update_t mmu[1];
977 multicall_entry_t mcl[2];
978 struct mbuf *m;
979 void *pktp;
980 int more_to_do;
981
982 if (sc->sc_backend_status != BEST_CONNECTED)
983 return 1;
984
985 xennet_tx_complete(sc);
986
987 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
988
989again:
990 DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
991 sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
992
993 mutex_enter(&sc->sc_rx_lock);
994 resp_prod = sc->sc_rx_ring.sring->rsp_prod;
995 xen_rmb(); /* ensure we see replies up to resp_prod */
996
997 for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
998 netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
999 req = &sc->sc_rxreqs[rx->id];
1000 KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1001 KASSERT(req->rxreq_id == rx->id);
1002
1003 ma = 0;
1004 switch (sc->sc_rx_feature) {
1005 case FEATURE_RX_COPY:
1006 xengnt_revoke_access(req->rxreq_gntref);
1007 break;
1008 case FEATURE_RX_FLIP:
1009 ma = xengnt_revoke_transfer(req->rxreq_gntref);
1010 if (ma == 0) {
1011 DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
1012 /*
1013 * the remote could't send us a packet.
1014 * we can't free this rxreq as no page will be mapped
1015 * here. Instead give it back immediatly to backend.
1016 */
1017 ifp->if_ierrors++;
1018 RING_GET_REQUEST(&sc->sc_rx_ring,
1019 sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
1020 RING_GET_REQUEST(&sc->sc_rx_ring,
1021 sc->sc_rx_ring.req_prod_pvt)->gref =
1022 req->rxreq_gntref;
1023 sc->sc_rx_ring.req_prod_pvt++;
1024 RING_PUSH_REQUESTS(&sc->sc_rx_ring);
1025 continue;
1026 }
1027 break;
1028 default:
1029 panic("%s: unsupported RX feature mode: %ld\n",
1030 __func__, sc->sc_rx_feature);
1031 }
1032
1033 pa = req->rxreq_pa;
1034 va = req->rxreq_va;
1035
1036 if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
1037 /* remap the page */
1038 mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
1039 mmu[0].val = pa >> PAGE_SHIFT;
1040 MULTI_update_va_mapping(&mcl[0], va,
1041 (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
1042 xpmap_ptom_map(pa, ptoa(ma));
1043 mcl[1].op = __HYPERVISOR_mmu_update;
1044 mcl[1].args[0] = (unsigned long)mmu;
1045 mcl[1].args[1] = 1;
1046 mcl[1].args[2] = 0;
1047 mcl[1].args[3] = DOMID_SELF;
1048 HYPERVISOR_multicall(mcl, 2);
1049 }
1050
1051 pktp = (void *)(va + rx->offset);
1052#ifdef XENNET_DEBUG_DUMP
1053 xennet_hex_dump(pktp, rx->status, "r", rx->id);
1054#endif
1055 if ((ifp->if_flags & IFF_PROMISC) == 0) {
1056 struct ether_header *eh = pktp;
1057 if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1058 memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1059 ETHER_ADDR_LEN) != 0) {
1060 DPRINTFN(XEDB_EVENT,
1061 ("xennet_handler bad dest\n"));
1062 /* packet not for us */
1063 xennet_rx_free_req(req);
1064 continue;
1065 }
1066 }
1067 MGETHDR(m, M_DONTWAIT, MT_DATA);
1068 if (__predict_false(m == NULL)) {
1069 printf("%s: rx no mbuf\n", ifp->if_xname);
1070 ifp->if_ierrors++;
1071 xennet_rx_free_req(req);
1072 continue;
1073 }
1074 MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1075
1076 m_set_rcvif(m, ifp);
1077 req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
1078 if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
1079 if (__predict_false(req->rxreq_va == 0)) {
1080 printf("%s: rx no buf\n", ifp->if_xname);
1081 ifp->if_ierrors++;
1082 req->rxreq_va = va;
1083 req->rxreq_pa = pa;
1084 xennet_rx_free_req(req);
1085 m_freem(m);
1086 continue;
1087 }
1088 m->m_len = m->m_pkthdr.len = rx->status;
1089 MEXTADD(m, pktp, rx->status,
1090 M_DEVBUF, xennet_rx_mbuf_free, NULL);
1091 m->m_flags |= M_EXT_RW; /* we own the buffer */
1092 m->m_ext.ext_paddr = pa;
1093 if ((rx->flags & NETRXF_csum_blank) != 0) {
1094 xennet_checksum_fill(&m);
1095 if (m == NULL) {
1096 ifp->if_ierrors++;
1097 continue;
1098 }
1099 }
1100 /* free req may overwrite *rx, better doing it late */
1101 xennet_rx_free_req(req);
1102 /*
1103 * Pass packet to bpf if there is a listener.
1104 */
1105 bpf_mtap(ifp, m);
1106
1107 ifp->if_ipackets++;
1108
1109 /* Pass the packet up. */
1110 if_percpuq_enqueue(ifp->if_percpuq, m);
1111 }
1112 xen_rmb();
1113 sc->sc_rx_ring.rsp_cons = i;
1114 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1115 mutex_exit(&sc->sc_rx_lock);
1116
1117 if (more_to_do)
1118 goto again;
1119
1120 return 1;
1121}
1122
1123/*
1124 * The output routine of a xennet interface
1125 * Called at splnet.
1126 */
1127void
1128xennet_start(struct ifnet *ifp)
1129{
1130 struct xennet_xenbus_softc *sc = ifp->if_softc;
1131
1132 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1133
1134 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1135
1136 xennet_tx_complete(sc);
1137
1138 if (__predict_false(
1139 (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1140 return;
1141
1142 /*
1143 * The Xen communication channel is much more efficient if we can
1144 * schedule batch of packets for domain0. To achieve this, we
1145 * schedule a soft interrupt, and just return. This way, the network
1146 * stack will enqueue all pending mbufs in the interface's send queue
1147 * before it is processed by xennet_softstart().
1148 */
1149 softint_schedule(sc->sc_softintr);
1150 return;
1151}
1152
1153/*
1154 * Prepares mbufs for TX, and notify backend when finished
1155 * Called at splsoftnet
1156 */
1157void
1158xennet_softstart(void *arg)
1159{
1160 struct xennet_xenbus_softc *sc = arg;
1161 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1162 struct mbuf *m, *new_m;
1163 netif_tx_request_t *txreq;
1164 RING_IDX req_prod;
1165 paddr_t pa, pa2;
1166 struct xennet_txreq *req;
1167 int notify;
1168 int do_notify = 0;
1169
1170 mutex_enter(&sc->sc_tx_lock);
1171 if (__predict_false(
1172 (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1173 mutex_exit(&sc->sc_tx_lock);
1174 return;
1175 }
1176
1177 req_prod = sc->sc_tx_ring.req_prod_pvt;
1178 while (/*CONSTCOND*/1) {
1179 uint16_t txflags;
1180
1181 req = SLIST_FIRST(&sc->sc_txreq_head);
1182 if (__predict_false(req == NULL)) {
1183 ifp->if_flags |= IFF_OACTIVE;
1184 break;
1185 }
1186 IFQ_POLL(&ifp->if_snd, m);
1187 if (m == NULL)
1188 break;
1189
1190 switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1191 case M_EXT|M_EXT_CLUSTER:
1192 KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1193 pa = m->m_ext.ext_paddr +
1194 (m->m_data - m->m_ext.ext_buf);
1195 break;
1196 case 0:
1197 KASSERT(m->m_paddr != M_PADDR_INVALID);
1198 pa = m->m_paddr + M_BUFOFFSET(m) +
1199 (m->m_data - M_BUFADDR(m));
1200 break;
1201 default:
1202 if (__predict_false(
1203 !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1204 &pa))) {
1205 panic("xennet_start: no pa");
1206 }
1207 break;
1208 }
1209
1210 if ((m->m_pkthdr.csum_flags &
1211 (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1212 txflags = NETTXF_csum_blank;
1213 } else {
1214 txflags = 0;
1215 }
1216
1217 if (m->m_pkthdr.len != m->m_len ||
1218 (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1219
1220 MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1221 if (__predict_false(new_m == NULL)) {
1222 printf("%s: cannot allocate new mbuf\n",
1223 device_xname(sc->sc_dev));
1224 break;
1225 }
1226 if (m->m_pkthdr.len > MHLEN) {
1227 MCLGET(new_m, M_DONTWAIT);
1228 if (__predict_false(
1229 (new_m->m_flags & M_EXT) == 0)) {
1230 DPRINTF(("%s: no mbuf cluster\n",
1231 device_xname(sc->sc_dev)));
1232 m_freem(new_m);
1233 break;
1234 }
1235 }
1236
1237 m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1238 new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1239
1240 if ((new_m->m_flags & M_EXT) != 0) {
1241 pa = new_m->m_ext.ext_paddr;
1242 KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1243 KASSERT(pa != M_PADDR_INVALID);
1244 } else {
1245 pa = new_m->m_paddr;
1246 KASSERT(pa != M_PADDR_INVALID);
1247 KASSERT(new_m->m_data == M_BUFADDR(new_m));
1248 pa += M_BUFOFFSET(new_m);
1249 }
1250 if (__predict_false(xengnt_grant_access(
1251 sc->sc_xbusd->xbusd_otherend_id,
1252 xpmap_ptom_masked(pa),
1253 GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1254 m_freem(new_m);
1255 ifp->if_flags |= IFF_OACTIVE;
1256 break;
1257 }
1258 /* we will be able to send new_m */
1259 IFQ_DEQUEUE(&ifp->if_snd, m);
1260 m_freem(m);
1261 m = new_m;
1262 } else {
1263 if (__predict_false(xengnt_grant_access(
1264 sc->sc_xbusd->xbusd_otherend_id,
1265 xpmap_ptom_masked(pa),
1266 GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1267 ifp->if_flags |= IFF_OACTIVE;
1268 break;
1269 }
1270 /* we will be able to send m */
1271 IFQ_DEQUEUE(&ifp->if_snd, m);
1272 }
1273 MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1274
1275 KASSERT(((pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) == 0);
1276
1277 SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1278 req->txreq_m = m;
1279
1280 DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1281 "mbuf %p, buf %p/%p/%p, size %d\n",
1282 req->txreq_id, m, mtod(m, void *), (void *)pa,
1283 (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1284 pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1285 DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1286 (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1287#ifdef XENNET_DEBUG_DUMP
1288 xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1289 req->txreq_id);
1290#endif
1291
1292 txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1293 txreq->id = req->txreq_id;
1294 txreq->gref = req->txreq_gntref;
1295 txreq->offset = pa & ~PG_FRAME;
1296 txreq->size = m->m_pkthdr.len;
1297 txreq->flags = txflags;
1298
1299 req_prod++;
1300 sc->sc_tx_ring.req_prod_pvt = req_prod;
1301 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1302 if (notify)
1303 do_notify = 1;
1304
1305#ifdef XENNET_DEBUG
1306 DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1307 "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1308 (void *)*kvtopte(mtod(m, vaddr_t)),
1309 (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1310 (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1311 DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1312 " prod %d\n",
1313 txreq->id, txreq->gref, txreq->offset, txreq->size,
1314 txreq->flags, req_prod));
1315#endif
1316
1317 /*
1318 * Pass packet to bpf if there is a listener.
1319 */
1320 bpf_mtap(ifp, m);
1321 }
1322
1323 if (do_notify) {
1324 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1325 ifp->if_timer = 5;
1326 }
1327
1328 mutex_exit(&sc->sc_tx_lock);
1329
1330 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1331 device_xname(sc->sc_dev)));
1332}
1333
1334int
1335xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1336{
1337#ifdef XENNET_DEBUG
1338 struct xennet_xenbus_softc *sc = ifp->if_softc;
1339#endif
1340 int s, error = 0;
1341
1342 s = splnet();
1343
1344 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1345 device_xname(sc->sc_dev)));
1346 error = ether_ioctl(ifp, cmd, data);
1347 if (error == ENETRESET)
1348 error = 0;
1349 splx(s);
1350
1351 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1352 device_xname(sc->sc_dev), error));
1353
1354 return error;
1355}
1356
1357void
1358xennet_watchdog(struct ifnet *ifp)
1359{
1360 aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1361}
1362
1363int
1364xennet_init(struct ifnet *ifp)
1365{
1366 struct xennet_xenbus_softc *sc = ifp->if_softc;
1367 mutex_enter(&sc->sc_rx_lock);
1368
1369 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1370 device_xname(sc->sc_dev)));
1371
1372 if ((ifp->if_flags & IFF_RUNNING) == 0) {
1373 sc->sc_rx_ring.sring->rsp_event =
1374 sc->sc_rx_ring.rsp_cons + 1;
1375 hypervisor_enable_event(sc->sc_evtchn);
1376 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1377 xennet_reset(sc);
1378 }
1379 ifp->if_flags |= IFF_RUNNING;
1380 ifp->if_flags &= ~IFF_OACTIVE;
1381 ifp->if_timer = 0;
1382 mutex_exit(&sc->sc_rx_lock);
1383 return 0;
1384}
1385
1386void
1387xennet_stop(struct ifnet *ifp, int disable)
1388{
1389 struct xennet_xenbus_softc *sc = ifp->if_softc;
1390
1391 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1392 hypervisor_mask_event(sc->sc_evtchn);
1393 xennet_reset(sc);
1394}
1395
1396void
1397xennet_reset(struct xennet_xenbus_softc *sc)
1398{
1399
1400 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1401 device_xname(sc->sc_dev)));
1402}
1403
1404#if defined(NFS_BOOT_BOOTSTATIC)
1405int
1406xennet_bootstatic_callback(struct nfs_diskless *nd)
1407{
1408#if 0
1409 struct ifnet *ifp = nd->nd_ifp;
1410 struct xennet_xenbus_softc *sc =
1411 (struct xennet_xenbus_softc *)ifp->if_softc;
1412#endif
1413 int flags = 0;
1414 union xen_cmdline_parseinfo xcp;
1415 struct sockaddr_in *sin;
1416
1417 memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1418 xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1419 xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1420 xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1421
1422 if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1423 flags |= NFS_BOOT_HAS_SERVER;
1424 if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1425 flags |= NFS_BOOT_HAS_ROOTPATH;
1426 }
1427
1428 nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1429 nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1430 nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1431
1432 sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1433 memset((void *)sin, 0, sizeof(*sin));
1434 sin->sin_len = sizeof(*sin);
1435 sin->sin_family = AF_INET;
1436 sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1437
1438 if (nd->nd_myip.s_addr)
1439 flags |= NFS_BOOT_HAS_MYIP;
1440 if (nd->nd_gwip.s_addr)
1441 flags |= NFS_BOOT_HAS_GWIP;
1442 if (nd->nd_mask.s_addr)
1443 flags |= NFS_BOOT_HAS_MASK;
1444 if (sin->sin_addr.s_addr)
1445 flags |= NFS_BOOT_HAS_SERVADDR;
1446
1447 return flags;
1448}
1449#endif /* defined(NFS_BOOT_BOOTSTATIC) */
1450
1451#ifdef XENNET_DEBUG_DUMP
1452#define XCHR(x) hexdigits[(x) & 0xf]
1453static void
1454xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1455{
1456 size_t i, j;
1457
1458 printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1459 printf("00000000 ");
1460 for(i=0; i<len; i++) {
1461 printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1462 if ((i+1) % 16 == 8)
1463 printf(" ");
1464 if ((i+1) % 16 == 0) {
1465 printf(" %c", '|');
1466 for(j=0; j<16; j++)
1467 printf("%c", pkt[i-15+j]>=32 &&
1468 pkt[i-15+j]<127?pkt[i-15+j]:'.');
1469 printf("%c\n%c%c%c%c%c%c%c%c ", '|',
1470 XCHR((i+1)>>28), XCHR((i+1)>>24),
1471 XCHR((i+1)>>20), XCHR((i+1)>>16),
1472 XCHR((i+1)>>12), XCHR((i+1)>>8),
1473 XCHR((i+1)>>4), XCHR(i+1));
1474 }
1475 }
1476 printf("\n");
1477}
1478#undef XCHR
1479#endif
1480