1/* $NetBSD: xengnt.c,v 1.25 2012/10/24 13:07:46 royger Exp $ */
2
3/*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.25 2012/10/24 13:07:46 royger Exp $");
30
31#include <sys/types.h>
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/malloc.h>
35#include <sys/queue.h>
36#include <sys/extent.h>
37#include <sys/kernel.h>
38#include <sys/mutex.h>
39#include <uvm/uvm.h>
40
41#include <xen/hypervisor.h>
42#include <xen/xen.h>
43#include <xen/granttables.h>
44
45/* #define XENDEBUG */
46#ifdef XENDEBUG
47#define DPRINTF(x) printf x
48#else
49#define DPRINTF(x)
50#endif
51
52#define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_t))
53
54/* External tools reserve first few grant table entries. */
55#define NR_RESERVED_ENTRIES 8
56
57/* Current number of frames making up the grant table */
58int gnt_nr_grant_frames;
59/* Maximum number of frames that can make up the grant table */
60int gnt_max_grant_frames;
61
62/* table of free grant entries */
63grant_ref_t *gnt_entries;
64/* last free entry */
65int last_gnt_entry;
66/* empty entry in the list */
67#define XENGNT_NO_ENTRY 0xffffffff
68
69/* VM address of the grant table */
70grant_entry_t *grant_table;
71kmutex_t grant_lock;
72
73static grant_ref_t xengnt_get_entry(void);
74static void xengnt_free_entry(grant_ref_t);
75static int xengnt_more_entries(void);
76
77void
78xengnt_init(void)
79{
80 struct gnttab_query_size query;
81 int rc;
82 int nr_grant_entries;
83 int i;
84
85 query.dom = DOMID_SELF;
86 rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
87 if ((rc < 0) || (query.status != GNTST_okay))
88 gnt_max_grant_frames = 4; /* Legacy max number of frames */
89 else
90 gnt_max_grant_frames = query.max_nr_frames;
91 gnt_nr_grant_frames = 0;
92
93 nr_grant_entries =
94 gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
95
96 grant_table = (void *)uvm_km_alloc(kernel_map,
97 gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
98 if (grant_table == NULL)
99 panic("xengnt_init() no VM space");
100 gnt_entries = malloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
101 M_DEVBUF, M_NOWAIT);
102 if (gnt_entries == NULL)
103 panic("xengnt_init() no space for bitmask");
104 for (i = 0; i <= nr_grant_entries; i++)
105 gnt_entries[i] = XENGNT_NO_ENTRY;
106
107 mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
108
109 xengnt_resume();
110
111}
112
113/*
114 * Resume grant table state
115 */
116bool
117xengnt_resume(void)
118{
119 int previous_nr_grant_frames = gnt_nr_grant_frames;
120
121 last_gnt_entry = 0;
122 gnt_nr_grant_frames = 0;
123
124 mutex_enter(&grant_lock);
125 while (gnt_nr_grant_frames < previous_nr_grant_frames) {
126 if (xengnt_more_entries() != 0)
127 panic("xengnt_resume: can't restore grant frames");
128 }
129 mutex_exit(&grant_lock);
130 return true;
131}
132
133/*
134 * Suspend grant table state
135 */
136bool
137xengnt_suspend(void) {
138
139 int i;
140
141 mutex_enter(&grant_lock);
142 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
143
144 for (i = 0; i < last_gnt_entry; i++) {
145 /* invalidate all grant entries (necessary for resume) */
146 gnt_entries[i] = XENGNT_NO_ENTRY;
147 }
148
149 /* Remove virtual => machine mapping */
150 pmap_kremove((vaddr_t)grant_table, gnt_nr_grant_frames * PAGE_SIZE);
151 pmap_update(pmap_kernel());
152 mutex_exit(&grant_lock);
153 return true;
154}
155
156
157/*
158 * Add another page to the grant table
159 * Returns 0 on success, ENOMEM on failure
160 */
161static int
162xengnt_more_entries(void)
163{
164 gnttab_setup_table_t setup;
165 u_long *pages;
166 int nframes_new = gnt_nr_grant_frames + 1;
167 int i, start_gnt;
168 KASSERT(mutex_owned(&grant_lock));
169
170 if (gnt_nr_grant_frames == gnt_max_grant_frames)
171 return ENOMEM;
172
173 pages = malloc(nframes_new * sizeof(u_long), M_DEVBUF, M_NOWAIT);
174 if (pages == NULL)
175 return ENOMEM;
176
177 setup.dom = DOMID_SELF;
178 setup.nr_frames = nframes_new;
179 set_xen_guest_handle(setup.frame_list, pages);
180
181 /*
182 * setup the grant table, made of nframes_new frames
183 * and return the list of their virtual addresses
184 * in 'pages'
185 */
186 if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
187 panic("%s: setup table failed", __func__);
188 if (setup.status != GNTST_okay) {
189 aprint_error("%s: setup table returned %d\n",
190 __func__, setup.status);
191 free(pages, M_DEVBUF);
192 return ENOMEM;
193 }
194
195 DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n",
196 pages[gnt_nr_grant_frames],
197 (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE));
198
199 /*
200 * map between grant_table addresses and the machine addresses of
201 * the grant table frames
202 */
203 pmap_kenter_ma(((vaddr_t)grant_table) + gnt_nr_grant_frames * PAGE_SIZE,
204 ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT,
205 VM_PROT_WRITE, 0);
206 pmap_update(pmap_kernel());
207
208 /*
209 * add the grant entries associated to the last grant table frame
210 * and mark them as free. Prevent using the first grants (from 0 to 8)
211 * since they are used by the tools.
212 */
213 start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) <
214 (NR_RESERVED_ENTRIES + 1) ?
215 (NR_RESERVED_ENTRIES + 1) :
216 (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
217 for (i = start_gnt;
218 i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE;
219 i++) {
220 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
221 gnt_entries[last_gnt_entry] = i;
222 last_gnt_entry++;
223 }
224 gnt_nr_grant_frames = nframes_new;
225 free(pages, M_DEVBUF);
226 return 0;
227}
228
229/*
230 * Returns a reference to the first free entry in grant table
231 */
232static grant_ref_t
233xengnt_get_entry(void)
234{
235 grant_ref_t entry;
236 static struct timeval xengnt_nonmemtime;
237 static const struct timeval xengnt_nonmemintvl = {5,0};
238
239 if (last_gnt_entry == 0) {
240 if (xengnt_more_entries()) {
241 if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
242 printf("xengnt_get_entry: out of grant "
243 "table entries\n");
244 return XENGNT_NO_ENTRY;
245 }
246 }
247 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
248 last_gnt_entry--;
249 entry = gnt_entries[last_gnt_entry];
250 gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY;
251 KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES);
252 KASSERT(last_gnt_entry >= 0);
253 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
254 return entry;
255}
256
257/*
258 * Mark the grant table entry as free
259 */
260static void
261xengnt_free_entry(grant_ref_t entry)
262{
263 mutex_enter(&grant_lock);
264 KASSERT(entry > NR_RESERVED_ENTRIES);
265 KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
266 KASSERT(last_gnt_entry >= 0);
267 KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
268 gnt_entries[last_gnt_entry] = entry;
269 last_gnt_entry++;
270 mutex_exit(&grant_lock);
271}
272
273int
274xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp)
275{
276 mutex_enter(&grant_lock);
277
278 *entryp = xengnt_get_entry();
279 if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
280 mutex_exit(&grant_lock);
281 return ENOMEM;
282 }
283
284 grant_table[*entryp].frame = ma >> PAGE_SHIFT;
285 grant_table[*entryp].domid = dom;
286 /*
287 * ensure that the above values reach global visibility
288 * before permitting frame's access (done when we set flags)
289 */
290 xen_rmb();
291 grant_table[*entryp].flags =
292 GTF_permit_access | (ro ? GTF_readonly : 0);
293 mutex_exit(&grant_lock);
294 return 0;
295}
296
297void
298xengnt_revoke_access(grant_ref_t entry)
299{
300 uint16_t flags, nflags;
301
302 nflags = grant_table[entry].flags;
303
304 do {
305 if ((flags = nflags) & (GTF_reading|GTF_writing))
306 panic("xengnt_revoke_access: still in use");
307 nflags = xen_atomic_cmpxchg16(&grant_table[entry].flags,
308 flags, 0);
309 } while (nflags != flags);
310 xengnt_free_entry(entry);
311}
312
313int
314xengnt_grant_transfer(domid_t dom, grant_ref_t *entryp)
315{
316 mutex_enter(&grant_lock);
317
318 *entryp = xengnt_get_entry();
319 if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
320 mutex_exit(&grant_lock);
321 return ENOMEM;
322 }
323
324 grant_table[*entryp].frame = 0;
325 grant_table[*entryp].domid = dom;
326 /*
327 * ensure that the above values reach global visibility
328 * before permitting frame's transfer (done when we set flags)
329 */
330 xen_rmb();
331 grant_table[*entryp].flags = GTF_accept_transfer;
332 mutex_exit(&grant_lock);
333 return 0;
334}
335
336paddr_t
337xengnt_revoke_transfer(grant_ref_t entry)
338{
339 paddr_t page;
340 uint16_t flags;
341
342 /* if the transfer has not started, free the entry and return 0 */
343 while (!((flags = grant_table[entry].flags) & GTF_transfer_committed)) {
344 if (xen_atomic_cmpxchg16(&grant_table[entry].flags,
345 flags, 0) == flags ) {
346 xengnt_free_entry(entry);
347 return 0;
348 }
349 HYPERVISOR_yield();
350 }
351
352 /* If transfer in progress, wait for completion */
353 while (!((flags = grant_table[entry].flags) & GTF_transfer_completed))
354 HYPERVISOR_yield();
355
356 /* Read the frame number /after/ reading completion status. */
357 __insn_barrier();
358 page = grant_table[entry].frame;
359 if (page == 0)
360 printf("xengnt_revoke_transfer: guest sent pa 0\n");
361
362 xengnt_free_entry(entry);
363 return page;
364}
365
366int
367xengnt_status(grant_ref_t entry)
368{
369 return (grant_table[entry].flags & (GTF_reading|GTF_writing));
370}
371