1 | /* $NetBSD: nouveau_subdev_vm_base.c,v 1.3 2015/10/22 22:39:46 jmcneill Exp $ */ |
2 | |
3 | /* |
4 | * Copyright 2010 Red Hat Inc. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Ben Skeggs |
25 | */ |
26 | |
27 | #include <sys/cdefs.h> |
28 | __KERNEL_RCSID(0, "$NetBSD: nouveau_subdev_vm_base.c,v 1.3 2015/10/22 22:39:46 jmcneill Exp $" ); |
29 | |
30 | #include <core/gpuobj.h> |
31 | #include <core/mm.h> |
32 | |
33 | #include <subdev/fb.h> |
34 | #include <subdev/vm.h> |
35 | |
36 | void |
37 | nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_mem *node) |
38 | { |
39 | struct nouveau_vm *vm = vma->vm; |
40 | struct nouveau_vmmgr *vmm = vm->vmm; |
41 | struct nouveau_mm_node *r; |
42 | int big = vma->node->type != vmm->spg_shift; |
43 | u32 offset = vma->node->offset + (delta >> 12); |
44 | u32 bits = vma->node->type - 12; |
45 | u32 pde = (offset >> vmm->pgt_bits) - vm->fpde; |
46 | u32 pte = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits; |
47 | u32 max = 1 << (vmm->pgt_bits - bits); |
48 | u32 end, len; |
49 | |
50 | delta = 0; |
51 | list_for_each_entry(r, &node->regions, rl_entry) { |
52 | u64 phys = (u64)r->offset << 12; |
53 | u32 num = r->length >> bits; |
54 | |
55 | while (num) { |
56 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
57 | |
58 | end = (pte + num); |
59 | if (unlikely(end >= max)) |
60 | end = max; |
61 | len = end - pte; |
62 | |
63 | vmm->map(vma, pgt, node, pte, len, phys, delta); |
64 | |
65 | num -= len; |
66 | pte += len; |
67 | if (unlikely(end >= max)) { |
68 | phys += len << (bits + 12); |
69 | pde++; |
70 | pte = 0; |
71 | } |
72 | |
73 | delta += (u64)len << vma->node->type; |
74 | } |
75 | } |
76 | |
77 | vmm->flush(vm); |
78 | } |
79 | |
80 | #ifdef __NetBSD__ |
81 | |
82 | static void |
83 | nouveau_vm_map_dma(struct nouveau_vma *vma, u64 delta, u64 length, |
84 | struct nouveau_mem *mem) |
85 | { |
86 | struct nouveau_vm *vm = vma->vm; |
87 | struct nouveau_vmmgr *vmm = vm->vmm; |
88 | int big = vma->node->type != vmm->spg_shift; |
89 | u32 offset = vma->node->offset + (delta >> 12); |
90 | u32 bits = vma->node->type - 12; |
91 | u32 num = length >> vma->node->type; |
92 | u32 pde = (offset >> vmm->pgt_bits) - vm->fpde; |
93 | u32 pte = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits; |
94 | u32 max = 1 << (vmm->pgt_bits - bits); |
95 | unsigned seg, pgoff; |
96 | |
97 | for (seg = 0; seg < mem->pages->dm_nsegs; seg++) { |
98 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
99 | dma_addr_t addr = mem->pages->dm_segs[seg].ds_addr; |
100 | |
101 | KASSERT((mem->pages->dm_segs[seg].ds_len & NOUVEAU_GPU_PAGE_MASK) == 0); |
102 | for (pgoff = 0; pgoff < mem->pages->dm_segs[seg].ds_len; |
103 | pgoff += NOUVEAU_GPU_PAGE_SIZE, addr += NOUVEAU_GPU_PAGE_SIZE) { |
104 | |
105 | vmm->map_sg(vma, pgt, mem, pte, 1, &addr); |
106 | num--; |
107 | pte++; |
108 | |
109 | if (num == 0) |
110 | goto finish; |
111 | |
112 | if (__predict_false(pte >= max)) { |
113 | pde++; |
114 | pte = 0; |
115 | } |
116 | } |
117 | } |
118 | |
119 | finish: |
120 | vmm->flush(vm); |
121 | } |
122 | |
123 | #else |
124 | |
125 | static void |
126 | nouveau_vm_map_sg_table(struct nouveau_vma *vma, u64 delta, u64 length, |
127 | struct nouveau_mem *mem) |
128 | { |
129 | struct nouveau_vm *vm = vma->vm; |
130 | struct nouveau_vmmgr *vmm = vm->vmm; |
131 | int big = vma->node->type != vmm->spg_shift; |
132 | u32 offset = vma->node->offset + (delta >> 12); |
133 | u32 bits = vma->node->type - 12; |
134 | u32 num = length >> vma->node->type; |
135 | u32 pde = (offset >> vmm->pgt_bits) - vm->fpde; |
136 | u32 pte = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits; |
137 | u32 max = 1 << (vmm->pgt_bits - bits); |
138 | unsigned m, sglen; |
139 | u32 end, len; |
140 | int i; |
141 | struct scatterlist *sg; |
142 | |
143 | for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) { |
144 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
145 | sglen = sg_dma_len(sg) >> PAGE_SHIFT; |
146 | |
147 | end = pte + sglen; |
148 | if (unlikely(end >= max)) |
149 | end = max; |
150 | len = end - pte; |
151 | |
152 | for (m = 0; m < len; m++) { |
153 | dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); |
154 | |
155 | vmm->map_sg(vma, pgt, mem, pte, 1, &addr); |
156 | num--; |
157 | pte++; |
158 | |
159 | if (num == 0) |
160 | goto finish; |
161 | } |
162 | if (unlikely(end >= max)) { |
163 | pde++; |
164 | pte = 0; |
165 | } |
166 | if (m < sglen) { |
167 | for (; m < sglen; m++) { |
168 | dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT); |
169 | |
170 | vmm->map_sg(vma, pgt, mem, pte, 1, &addr); |
171 | num--; |
172 | pte++; |
173 | if (num == 0) |
174 | goto finish; |
175 | } |
176 | } |
177 | |
178 | } |
179 | finish: |
180 | vmm->flush(vm); |
181 | } |
182 | |
183 | static void |
184 | nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length, |
185 | struct nouveau_mem *mem) |
186 | { |
187 | struct nouveau_vm *vm = vma->vm; |
188 | struct nouveau_vmmgr *vmm = vm->vmm; |
189 | dma_addr_t *list = mem->pages; |
190 | int big = vma->node->type != vmm->spg_shift; |
191 | u32 offset = vma->node->offset + (delta >> 12); |
192 | u32 bits = vma->node->type - 12; |
193 | u32 num = length >> vma->node->type; |
194 | u32 pde = (offset >> vmm->pgt_bits) - vm->fpde; |
195 | u32 pte = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits; |
196 | u32 max = 1 << (vmm->pgt_bits - bits); |
197 | u32 end, len; |
198 | |
199 | while (num) { |
200 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
201 | |
202 | end = (pte + num); |
203 | if (unlikely(end >= max)) |
204 | end = max; |
205 | len = end - pte; |
206 | |
207 | vmm->map_sg(vma, pgt, mem, pte, len, list); |
208 | |
209 | num -= len; |
210 | pte += len; |
211 | list += len; |
212 | if (unlikely(end >= max)) { |
213 | pde++; |
214 | pte = 0; |
215 | } |
216 | } |
217 | |
218 | vmm->flush(vm); |
219 | } |
220 | |
221 | #endif |
222 | |
223 | void |
224 | nouveau_vm_map(struct nouveau_vma *vma, struct nouveau_mem *node) |
225 | { |
226 | #ifdef __NetBSD__ |
227 | if (node->pages) |
228 | nouveau_vm_map_dma(vma, 0, node->size << 12, node); |
229 | else |
230 | nouveau_vm_map_at(vma, 0, node); |
231 | #else |
232 | if (node->sg) |
233 | nouveau_vm_map_sg_table(vma, 0, node->size << 12, node); |
234 | else |
235 | if (node->pages) |
236 | nouveau_vm_map_sg(vma, 0, node->size << 12, node); |
237 | else |
238 | nouveau_vm_map_at(vma, 0, node); |
239 | #endif |
240 | } |
241 | |
242 | void |
243 | nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64 length) |
244 | { |
245 | struct nouveau_vm *vm = vma->vm; |
246 | struct nouveau_vmmgr *vmm = vm->vmm; |
247 | int big = vma->node->type != vmm->spg_shift; |
248 | u32 offset = vma->node->offset + (delta >> 12); |
249 | u32 bits = vma->node->type - 12; |
250 | u32 num = length >> vma->node->type; |
251 | u32 pde = (offset >> vmm->pgt_bits) - vm->fpde; |
252 | u32 pte = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits; |
253 | u32 max = 1 << (vmm->pgt_bits - bits); |
254 | u32 end, len; |
255 | |
256 | while (num) { |
257 | struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big]; |
258 | |
259 | end = (pte + num); |
260 | if (unlikely(end >= max)) |
261 | end = max; |
262 | len = end - pte; |
263 | |
264 | vmm->unmap(pgt, pte, len); |
265 | |
266 | num -= len; |
267 | pte += len; |
268 | if (unlikely(end >= max)) { |
269 | pde++; |
270 | pte = 0; |
271 | } |
272 | } |
273 | |
274 | vmm->flush(vm); |
275 | } |
276 | |
277 | void |
278 | nouveau_vm_unmap(struct nouveau_vma *vma) |
279 | { |
280 | nouveau_vm_unmap_at(vma, 0, (u64)vma->node->length << 12); |
281 | } |
282 | |
283 | static void |
284 | nouveau_vm_unmap_pgt(struct nouveau_vm *vm, int big, u32 fpde, u32 lpde) |
285 | { |
286 | struct nouveau_vmmgr *vmm = vm->vmm; |
287 | struct nouveau_vm_pgd *vpgd; |
288 | struct nouveau_vm_pgt *vpgt; |
289 | struct nouveau_gpuobj *pgt; |
290 | u32 pde; |
291 | |
292 | for (pde = fpde; pde <= lpde; pde++) { |
293 | vpgt = &vm->pgt[pde - vm->fpde]; |
294 | if (--vpgt->refcount[big]) |
295 | continue; |
296 | |
297 | pgt = vpgt->obj[big]; |
298 | vpgt->obj[big] = NULL; |
299 | |
300 | list_for_each_entry(vpgd, &vm->pgd_list, head) { |
301 | vmm->map_pgt(vpgd->obj, pde, vpgt->obj); |
302 | } |
303 | |
304 | mutex_unlock(&nv_subdev(vmm)->mutex); |
305 | nouveau_gpuobj_ref(NULL, &pgt); |
306 | mutex_lock(&nv_subdev(vmm)->mutex); |
307 | } |
308 | } |
309 | |
310 | static int |
311 | nouveau_vm_map_pgt(struct nouveau_vm *vm, u32 pde, u32 type) |
312 | { |
313 | struct nouveau_vmmgr *vmm = vm->vmm; |
314 | struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; |
315 | struct nouveau_vm_pgd *vpgd; |
316 | struct nouveau_gpuobj *pgt; |
317 | int big = (type != vmm->spg_shift); |
318 | u32 pgt_size; |
319 | int ret; |
320 | |
321 | pgt_size = (1 << (vmm->pgt_bits + 12)) >> type; |
322 | pgt_size *= 8; |
323 | |
324 | mutex_unlock(&nv_subdev(vmm)->mutex); |
325 | ret = nouveau_gpuobj_new(nv_object(vm->vmm), NULL, pgt_size, 0x1000, |
326 | NVOBJ_FLAG_ZERO_ALLOC, &pgt); |
327 | mutex_lock(&nv_subdev(vmm)->mutex); |
328 | if (unlikely(ret)) |
329 | return ret; |
330 | |
331 | /* someone beat us to filling the PDE while we didn't have the lock */ |
332 | if (unlikely(vpgt->refcount[big]++)) { |
333 | mutex_unlock(&nv_subdev(vmm)->mutex); |
334 | nouveau_gpuobj_ref(NULL, &pgt); |
335 | mutex_lock(&nv_subdev(vmm)->mutex); |
336 | return 0; |
337 | } |
338 | |
339 | vpgt->obj[big] = pgt; |
340 | list_for_each_entry(vpgd, &vm->pgd_list, head) { |
341 | vmm->map_pgt(vpgd->obj, pde, vpgt->obj); |
342 | } |
343 | |
344 | return 0; |
345 | } |
346 | |
347 | int |
348 | nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift, |
349 | u32 access, struct nouveau_vma *vma) |
350 | { |
351 | struct nouveau_vmmgr *vmm = vm->vmm; |
352 | u32 align = (1 << page_shift) >> 12; |
353 | u32 msize = size >> 12; |
354 | u32 fpde, lpde, pde; |
355 | int ret; |
356 | |
357 | mutex_lock(&nv_subdev(vmm)->mutex); |
358 | ret = nouveau_mm_head(&vm->mm, page_shift, msize, msize, align, |
359 | &vma->node); |
360 | if (unlikely(ret != 0)) { |
361 | mutex_unlock(&nv_subdev(vmm)->mutex); |
362 | return ret; |
363 | } |
364 | |
365 | fpde = (vma->node->offset >> vmm->pgt_bits); |
366 | lpde = (vma->node->offset + vma->node->length - 1) >> vmm->pgt_bits; |
367 | |
368 | for (pde = fpde; pde <= lpde; pde++) { |
369 | struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde]; |
370 | int big = (vma->node->type != vmm->spg_shift); |
371 | |
372 | if (likely(vpgt->refcount[big])) { |
373 | vpgt->refcount[big]++; |
374 | continue; |
375 | } |
376 | |
377 | ret = nouveau_vm_map_pgt(vm, pde, vma->node->type); |
378 | if (ret) { |
379 | if (pde != fpde) |
380 | nouveau_vm_unmap_pgt(vm, big, fpde, pde - 1); |
381 | nouveau_mm_free(&vm->mm, &vma->node); |
382 | mutex_unlock(&nv_subdev(vmm)->mutex); |
383 | return ret; |
384 | } |
385 | } |
386 | mutex_unlock(&nv_subdev(vmm)->mutex); |
387 | |
388 | vma->vm = NULL; |
389 | nouveau_vm_ref(vm, &vma->vm, NULL); |
390 | vma->offset = (u64)vma->node->offset << 12; |
391 | vma->access = access; |
392 | return 0; |
393 | } |
394 | |
395 | void |
396 | nouveau_vm_put(struct nouveau_vma *vma) |
397 | { |
398 | struct nouveau_vm *vm = vma->vm; |
399 | struct nouveau_vmmgr *vmm = vm->vmm; |
400 | u32 fpde, lpde; |
401 | |
402 | if (unlikely(vma->node == NULL)) |
403 | return; |
404 | fpde = (vma->node->offset >> vmm->pgt_bits); |
405 | lpde = (vma->node->offset + vma->node->length - 1) >> vmm->pgt_bits; |
406 | |
407 | mutex_lock(&nv_subdev(vmm)->mutex); |
408 | nouveau_vm_unmap_pgt(vm, vma->node->type != vmm->spg_shift, fpde, lpde); |
409 | nouveau_mm_free(&vm->mm, &vma->node); |
410 | mutex_unlock(&nv_subdev(vmm)->mutex); |
411 | |
412 | nouveau_vm_ref(NULL, &vma->vm, NULL); |
413 | } |
414 | |
415 | int |
416 | nouveau_vm_create(struct nouveau_vmmgr *vmm, u64 offset, u64 length, |
417 | u64 mm_offset, u32 block, struct nouveau_vm **pvm) |
418 | { |
419 | struct nouveau_vm *vm; |
420 | u64 mm_length = (offset + length) - mm_offset; |
421 | int ret; |
422 | |
423 | vm = kzalloc(sizeof(*vm), GFP_KERNEL); |
424 | if (!vm) |
425 | return -ENOMEM; |
426 | |
427 | INIT_LIST_HEAD(&vm->pgd_list); |
428 | vm->vmm = vmm; |
429 | kref_init(&vm->refcount); |
430 | vm->fpde = offset >> (vmm->pgt_bits + 12); |
431 | vm->lpde = (offset + length - 1) >> (vmm->pgt_bits + 12); |
432 | |
433 | vm->pgt = vzalloc((vm->lpde - vm->fpde + 1) * sizeof(*vm->pgt)); |
434 | if (!vm->pgt) { |
435 | kfree(vm); |
436 | return -ENOMEM; |
437 | } |
438 | |
439 | ret = nouveau_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12, |
440 | block >> 12); |
441 | if (ret) { |
442 | vfree(vm->pgt); |
443 | kfree(vm); |
444 | return ret; |
445 | } |
446 | |
447 | *pvm = vm; |
448 | |
449 | return 0; |
450 | } |
451 | |
452 | int |
453 | nouveau_vm_new(struct nouveau_device *device, u64 offset, u64 length, |
454 | u64 mm_offset, struct nouveau_vm **pvm) |
455 | { |
456 | struct nouveau_vmmgr *vmm = nouveau_vmmgr(device); |
457 | return vmm->create(vmm, offset, length, mm_offset, pvm); |
458 | } |
459 | |
460 | static int |
461 | nouveau_vm_link(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd) |
462 | { |
463 | struct nouveau_vmmgr *vmm = vm->vmm; |
464 | struct nouveau_vm_pgd *vpgd; |
465 | int i; |
466 | |
467 | if (!pgd) |
468 | return 0; |
469 | |
470 | vpgd = kzalloc(sizeof(*vpgd), GFP_KERNEL); |
471 | if (!vpgd) |
472 | return -ENOMEM; |
473 | |
474 | nouveau_gpuobj_ref(pgd, &vpgd->obj); |
475 | |
476 | mutex_lock(&nv_subdev(vmm)->mutex); |
477 | for (i = vm->fpde; i <= vm->lpde; i++) |
478 | vmm->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj); |
479 | list_add(&vpgd->head, &vm->pgd_list); |
480 | mutex_unlock(&nv_subdev(vmm)->mutex); |
481 | return 0; |
482 | } |
483 | |
484 | static void |
485 | nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *mpgd) |
486 | { |
487 | struct nouveau_vmmgr *vmm = vm->vmm; |
488 | struct nouveau_vm_pgd *vpgd, *tmp; |
489 | struct nouveau_gpuobj *pgd = NULL; |
490 | |
491 | if (!mpgd) |
492 | return; |
493 | |
494 | mutex_lock(&nv_subdev(vmm)->mutex); |
495 | list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { |
496 | if (vpgd->obj == mpgd) { |
497 | pgd = vpgd->obj; |
498 | list_del(&vpgd->head); |
499 | kfree(vpgd); |
500 | break; |
501 | } |
502 | } |
503 | mutex_unlock(&nv_subdev(vmm)->mutex); |
504 | |
505 | nouveau_gpuobj_ref(NULL, &pgd); |
506 | } |
507 | |
508 | static void |
509 | nouveau_vm_del(struct kref *kref) |
510 | { |
511 | struct nouveau_vm *vm = container_of(kref, typeof(*vm), refcount); |
512 | struct nouveau_vm_pgd *vpgd, *tmp; |
513 | |
514 | list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) { |
515 | nouveau_vm_unlink(vm, vpgd->obj); |
516 | } |
517 | |
518 | nouveau_mm_fini(&vm->mm); |
519 | vfree(vm->pgt); |
520 | kfree(vm); |
521 | } |
522 | |
523 | int |
524 | nouveau_vm_ref(struct nouveau_vm *ref, struct nouveau_vm **ptr, |
525 | struct nouveau_gpuobj *pgd) |
526 | { |
527 | if (ref) { |
528 | int ret = nouveau_vm_link(ref, pgd); |
529 | if (ret) |
530 | return ret; |
531 | |
532 | kref_get(&ref->refcount); |
533 | } |
534 | |
535 | if (*ptr) { |
536 | nouveau_vm_unlink(*ptr, pgd); |
537 | kref_put(&(*ptr)->refcount, nouveau_vm_del); |
538 | } |
539 | |
540 | *ptr = ref; |
541 | return 0; |
542 | } |
543 | |