1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
3 | * Copyright 2008 Red Hat Inc. |
4 | * Copyright 2009 Jerome Glisse. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Dave Airlie |
25 | * Alex Deucher |
26 | * Jerome Glisse |
27 | */ |
28 | #include <drm/drmP.h> |
29 | #include <drm/radeon_drm.h> |
30 | #include "radeon.h" |
31 | #include "radeon_trace.h" |
32 | |
33 | /* |
34 | * GPUVM |
35 | * GPUVM is similar to the legacy gart on older asics, however |
36 | * rather than there being a single global gart table |
37 | * for the entire GPU, there are multiple VM page tables active |
38 | * at any given time. The VM page tables can contain a mix |
39 | * vram pages and system memory pages and system memory pages |
40 | * can be mapped as snooped (cached system pages) or unsnooped |
41 | * (uncached system pages). |
42 | * Each VM has an ID associated with it and there is a page table |
43 | * associated with each VMID. When execting a command buffer, |
44 | * the kernel tells the the ring what VMID to use for that command |
45 | * buffer. VMIDs are allocated dynamically as commands are submitted. |
46 | * The userspace drivers maintain their own address space and the kernel |
47 | * sets up their pages tables accordingly when they submit their |
48 | * command buffers and a VMID is assigned. |
49 | * Cayman/Trinity support up to 8 active VMs at any given time; |
50 | * SI supports 16. |
51 | */ |
52 | |
53 | /** |
54 | * radeon_vm_num_pde - return the number of page directory entries |
55 | * |
56 | * @rdev: radeon_device pointer |
57 | * |
58 | * Calculate the number of page directory entries (cayman+). |
59 | */ |
60 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) |
61 | { |
62 | return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; |
63 | } |
64 | |
65 | /** |
66 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
67 | * |
68 | * @rdev: radeon_device pointer |
69 | * |
70 | * Calculate the size of the page directory in bytes (cayman+). |
71 | */ |
72 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
73 | { |
74 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
75 | } |
76 | |
77 | /** |
78 | * radeon_vm_manager_init - init the vm manager |
79 | * |
80 | * @rdev: radeon_device pointer |
81 | * |
82 | * Init the vm manager (cayman+). |
83 | * Returns 0 for success, error for failure. |
84 | */ |
85 | int radeon_vm_manager_init(struct radeon_device *rdev) |
86 | { |
87 | int r; |
88 | |
89 | if (!rdev->vm_manager.enabled) { |
90 | r = radeon_asic_vm_init(rdev); |
91 | if (r) |
92 | return r; |
93 | |
94 | rdev->vm_manager.enabled = true; |
95 | } |
96 | return 0; |
97 | } |
98 | |
99 | /** |
100 | * radeon_vm_manager_fini - tear down the vm manager |
101 | * |
102 | * @rdev: radeon_device pointer |
103 | * |
104 | * Tear down the VM manager (cayman+). |
105 | */ |
106 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
107 | { |
108 | int i; |
109 | |
110 | if (!rdev->vm_manager.enabled) |
111 | return; |
112 | |
113 | for (i = 0; i < RADEON_NUM_VM; ++i) |
114 | radeon_fence_unref(&rdev->vm_manager.active[i]); |
115 | radeon_asic_vm_fini(rdev); |
116 | rdev->vm_manager.enabled = false; |
117 | } |
118 | |
119 | /** |
120 | * radeon_vm_get_bos - add the vm BOs to a validation list |
121 | * |
122 | * @vm: vm providing the BOs |
123 | * @head: head of validation list |
124 | * |
125 | * Add the page directory to the list of BOs to |
126 | * validate for command submission (cayman+). |
127 | */ |
128 | struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, |
129 | struct radeon_vm *vm, |
130 | struct list_head *head) |
131 | { |
132 | struct radeon_cs_reloc *list; |
133 | unsigned i, idx; |
134 | |
135 | list = kmalloc_array(vm->max_pde_used + 2, |
136 | sizeof(struct radeon_cs_reloc), GFP_KERNEL); |
137 | if (!list) |
138 | return NULL; |
139 | |
140 | /* add the vm page table to the list */ |
141 | list[0].gobj = NULL; |
142 | list[0].robj = vm->page_directory; |
143 | list[0].domain = RADEON_GEM_DOMAIN_VRAM; |
144 | list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; |
145 | list[0].tv.bo = &vm->page_directory->tbo; |
146 | list[0].tiling_flags = 0; |
147 | list[0].handle = 0; |
148 | list_add(&list[0].tv.head, head); |
149 | |
150 | for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
151 | if (!vm->page_tables[i].bo) |
152 | continue; |
153 | |
154 | list[idx].gobj = NULL; |
155 | list[idx].robj = vm->page_tables[i].bo; |
156 | list[idx].domain = RADEON_GEM_DOMAIN_VRAM; |
157 | list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; |
158 | list[idx].tv.bo = &list[idx].robj->tbo; |
159 | list[idx].tiling_flags = 0; |
160 | list[idx].handle = 0; |
161 | list_add(&list[idx++].tv.head, head); |
162 | } |
163 | |
164 | return list; |
165 | } |
166 | |
167 | /** |
168 | * radeon_vm_grab_id - allocate the next free VMID |
169 | * |
170 | * @rdev: radeon_device pointer |
171 | * @vm: vm to allocate id for |
172 | * @ring: ring we want to submit job to |
173 | * |
174 | * Allocate an id for the vm (cayman+). |
175 | * Returns the fence we need to sync to (if any). |
176 | * |
177 | * Global and local mutex must be locked! |
178 | */ |
179 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, |
180 | struct radeon_vm *vm, int ring) |
181 | { |
182 | struct radeon_fence *best[RADEON_NUM_RINGS] = {}; |
183 | unsigned choices[2] = {}; |
184 | unsigned i; |
185 | |
186 | /* check if the id is still valid */ |
187 | if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) |
188 | return NULL; |
189 | |
190 | /* we definately need to flush */ |
191 | radeon_fence_unref(&vm->last_flush); |
192 | |
193 | /* skip over VMID 0, since it is the system VM */ |
194 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
195 | struct radeon_fence *fence = rdev->vm_manager.active[i]; |
196 | |
197 | if (fence == NULL) { |
198 | /* found a free one */ |
199 | vm->id = i; |
200 | trace_radeon_vm_grab_id(vm->id, ring); |
201 | return NULL; |
202 | } |
203 | |
204 | if (radeon_fence_is_earlier(fence, best[fence->ring])) { |
205 | best[fence->ring] = fence; |
206 | choices[fence->ring == ring ? 0 : 1] = i; |
207 | } |
208 | } |
209 | |
210 | for (i = 0; i < 2; ++i) { |
211 | if (choices[i]) { |
212 | vm->id = choices[i]; |
213 | trace_radeon_vm_grab_id(vm->id, ring); |
214 | return rdev->vm_manager.active[choices[i]]; |
215 | } |
216 | } |
217 | |
218 | /* should never happen */ |
219 | BUG(); |
220 | return NULL; |
221 | } |
222 | |
223 | /** |
224 | * radeon_vm_flush - hardware flush the vm |
225 | * |
226 | * @rdev: radeon_device pointer |
227 | * @vm: vm we want to flush |
228 | * @ring: ring to use for flush |
229 | * |
230 | * Flush the vm (cayman+). |
231 | * |
232 | * Global and local mutex must be locked! |
233 | */ |
234 | void radeon_vm_flush(struct radeon_device *rdev, |
235 | struct radeon_vm *vm, |
236 | int ring) |
237 | { |
238 | uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); |
239 | |
240 | /* if we can't remember our last VM flush then flush now! */ |
241 | /* XXX figure out why we have to flush all the time */ |
242 | if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { |
243 | vm->pd_gpu_addr = pd_addr; |
244 | radeon_ring_vm_flush(rdev, ring, vm); |
245 | } |
246 | } |
247 | |
248 | /** |
249 | * radeon_vm_fence - remember fence for vm |
250 | * |
251 | * @rdev: radeon_device pointer |
252 | * @vm: vm we want to fence |
253 | * @fence: fence to remember |
254 | * |
255 | * Fence the vm (cayman+). |
256 | * Set the fence used to protect page table and id. |
257 | * |
258 | * Global and local mutex must be locked! |
259 | */ |
260 | void radeon_vm_fence(struct radeon_device *rdev, |
261 | struct radeon_vm *vm, |
262 | struct radeon_fence *fence) |
263 | { |
264 | radeon_fence_unref(&vm->fence); |
265 | vm->fence = radeon_fence_ref(fence); |
266 | |
267 | radeon_fence_unref(&rdev->vm_manager.active[vm->id]); |
268 | rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); |
269 | |
270 | radeon_fence_unref(&vm->last_id_use); |
271 | vm->last_id_use = radeon_fence_ref(fence); |
272 | |
273 | /* we just flushed the VM, remember that */ |
274 | if (!vm->last_flush) |
275 | vm->last_flush = radeon_fence_ref(fence); |
276 | } |
277 | |
278 | /** |
279 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo |
280 | * |
281 | * @vm: requested vm |
282 | * @bo: requested buffer object |
283 | * |
284 | * Find @bo inside the requested vm (cayman+). |
285 | * Search inside the @bos vm list for the requested vm |
286 | * Returns the found bo_va or NULL if none is found |
287 | * |
288 | * Object has to be reserved! |
289 | */ |
290 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, |
291 | struct radeon_bo *bo) |
292 | { |
293 | struct radeon_bo_va *bo_va; |
294 | |
295 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
296 | if (bo_va->vm == vm) { |
297 | return bo_va; |
298 | } |
299 | } |
300 | return NULL; |
301 | } |
302 | |
303 | /** |
304 | * radeon_vm_bo_add - add a bo to a specific vm |
305 | * |
306 | * @rdev: radeon_device pointer |
307 | * @vm: requested vm |
308 | * @bo: radeon buffer object |
309 | * |
310 | * Add @bo into the requested vm (cayman+). |
311 | * Add @bo to the list of bos associated with the vm |
312 | * Returns newly added bo_va or NULL for failure |
313 | * |
314 | * Object has to be reserved! |
315 | */ |
316 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
317 | struct radeon_vm *vm, |
318 | struct radeon_bo *bo) |
319 | { |
320 | struct radeon_bo_va *bo_va; |
321 | |
322 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
323 | if (bo_va == NULL) { |
324 | return NULL; |
325 | } |
326 | bo_va->vm = vm; |
327 | bo_va->bo = bo; |
328 | bo_va->soffset = 0; |
329 | bo_va->eoffset = 0; |
330 | bo_va->flags = 0; |
331 | bo_va->valid = false; |
332 | bo_va->ref_count = 1; |
333 | INIT_LIST_HEAD(&bo_va->bo_list); |
334 | INIT_LIST_HEAD(&bo_va->vm_list); |
335 | |
336 | mutex_lock(&vm->mutex); |
337 | list_add(&bo_va->vm_list, &vm->va); |
338 | list_add_tail(&bo_va->bo_list, &bo->va); |
339 | mutex_unlock(&vm->mutex); |
340 | |
341 | return bo_va; |
342 | } |
343 | |
344 | /** |
345 | * radeon_vm_clear_bo - initially clear the page dir/table |
346 | * |
347 | * @rdev: radeon_device pointer |
348 | * @bo: bo to clear |
349 | */ |
350 | static int radeon_vm_clear_bo(struct radeon_device *rdev, |
351 | struct radeon_bo *bo) |
352 | { |
353 | struct ttm_validate_buffer tv; |
354 | struct ww_acquire_ctx ticket; |
355 | struct list_head head; |
356 | struct radeon_ib ib; |
357 | unsigned entries; |
358 | uint64_t addr; |
359 | int r; |
360 | |
361 | memset(&tv, 0, sizeof(tv)); |
362 | tv.bo = &bo->tbo; |
363 | |
364 | INIT_LIST_HEAD(&head); |
365 | list_add(&tv.head, &head); |
366 | |
367 | r = ttm_eu_reserve_buffers(&ticket, &head); |
368 | if (r) |
369 | return r; |
370 | |
371 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
372 | if (r) |
373 | goto error; |
374 | |
375 | addr = radeon_bo_gpu_offset(bo); |
376 | entries = radeon_bo_size(bo) / 8; |
377 | |
378 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, |
379 | NULL, entries * 2 + 64); |
380 | if (r) |
381 | goto error; |
382 | |
383 | ib.length_dw = 0; |
384 | |
385 | radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); |
386 | |
387 | r = radeon_ib_schedule(rdev, &ib, NULL); |
388 | if (r) |
389 | goto error; |
390 | |
391 | ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); |
392 | radeon_ib_free(rdev, &ib); |
393 | |
394 | return 0; |
395 | |
396 | error: |
397 | ttm_eu_backoff_reservation(&ticket, &head); |
398 | return r; |
399 | } |
400 | |
401 | /** |
402 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm |
403 | * |
404 | * @rdev: radeon_device pointer |
405 | * @bo_va: bo_va to store the address |
406 | * @soffset: requested offset of the buffer in the VM address space |
407 | * @flags: attributes of pages (read/write/valid/etc.) |
408 | * |
409 | * Set offset of @bo_va (cayman+). |
410 | * Validate and set the offset requested within the vm address space. |
411 | * Returns 0 for success, error for failure. |
412 | * |
413 | * Object has to be reserved! |
414 | */ |
415 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
416 | struct radeon_bo_va *bo_va, |
417 | uint64_t soffset, |
418 | uint32_t flags) |
419 | { |
420 | uint64_t size = radeon_bo_size(bo_va->bo); |
421 | uint64_t eoffset, last_offset = 0; |
422 | struct radeon_vm *vm = bo_va->vm; |
423 | struct radeon_bo_va *tmp; |
424 | struct list_head *head; |
425 | unsigned last_pfn, pt_idx; |
426 | int r; |
427 | |
428 | if (soffset) { |
429 | /* make sure object fit at this offset */ |
430 | eoffset = soffset + size; |
431 | if (soffset >= eoffset) { |
432 | return -EINVAL; |
433 | } |
434 | |
435 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; |
436 | if (last_pfn > rdev->vm_manager.max_pfn) { |
437 | dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n" , |
438 | last_pfn, rdev->vm_manager.max_pfn); |
439 | return -EINVAL; |
440 | } |
441 | |
442 | } else { |
443 | eoffset = last_pfn = 0; |
444 | } |
445 | |
446 | mutex_lock(&vm->mutex); |
447 | head = &vm->va; |
448 | last_offset = 0; |
449 | list_for_each_entry(tmp, &vm->va, vm_list) { |
450 | if (bo_va == tmp) { |
451 | /* skip over currently modified bo */ |
452 | continue; |
453 | } |
454 | |
455 | if (soffset >= last_offset && eoffset <= tmp->soffset) { |
456 | /* bo can be added before this one */ |
457 | break; |
458 | } |
459 | if (eoffset > tmp->soffset && soffset < tmp->eoffset) { |
460 | /* bo and tmp overlap, invalid offset */ |
461 | dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n" , |
462 | bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, |
463 | (unsigned)tmp->soffset, (unsigned)tmp->eoffset); |
464 | mutex_unlock(&vm->mutex); |
465 | return -EINVAL; |
466 | } |
467 | last_offset = tmp->eoffset; |
468 | head = &tmp->vm_list; |
469 | } |
470 | |
471 | bo_va->soffset = soffset; |
472 | bo_va->eoffset = eoffset; |
473 | bo_va->flags = flags; |
474 | bo_va->valid = false; |
475 | list_move(&bo_va->vm_list, head); |
476 | |
477 | soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; |
478 | eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; |
479 | |
480 | if (eoffset > vm->max_pde_used) |
481 | vm->max_pde_used = eoffset; |
482 | |
483 | radeon_bo_unreserve(bo_va->bo); |
484 | |
485 | /* walk over the address space and allocate the page tables */ |
486 | for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { |
487 | struct radeon_bo *pt; |
488 | |
489 | if (vm->page_tables[pt_idx].bo) |
490 | continue; |
491 | |
492 | /* drop mutex to allocate and clear page table */ |
493 | mutex_unlock(&vm->mutex); |
494 | |
495 | r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, |
496 | RADEON_GPU_PAGE_SIZE, false, |
497 | RADEON_GEM_DOMAIN_VRAM, NULL, &pt); |
498 | if (r) |
499 | return r; |
500 | |
501 | r = radeon_vm_clear_bo(rdev, pt); |
502 | if (r) { |
503 | radeon_bo_unref(&pt); |
504 | radeon_bo_reserve(bo_va->bo, false); |
505 | return r; |
506 | } |
507 | |
508 | /* aquire mutex again */ |
509 | mutex_lock(&vm->mutex); |
510 | if (vm->page_tables[pt_idx].bo) { |
511 | /* someone else allocated the pt in the meantime */ |
512 | mutex_unlock(&vm->mutex); |
513 | radeon_bo_unref(&pt); |
514 | mutex_lock(&vm->mutex); |
515 | continue; |
516 | } |
517 | |
518 | vm->page_tables[pt_idx].addr = 0; |
519 | vm->page_tables[pt_idx].bo = pt; |
520 | } |
521 | |
522 | mutex_unlock(&vm->mutex); |
523 | return radeon_bo_reserve(bo_va->bo, false); |
524 | } |
525 | |
526 | /** |
527 | * radeon_vm_map_gart - get the physical address of a gart page |
528 | * |
529 | * @rdev: radeon_device pointer |
530 | * @addr: the unmapped addr |
531 | * |
532 | * Look up the physical address of the page that the pte resolves |
533 | * to (cayman+). |
534 | * Returns the physical address of the page. |
535 | */ |
536 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
537 | { |
538 | uint64_t result; |
539 | |
540 | /* page table offset */ |
541 | result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; |
542 | |
543 | /* in case cpu page size != gpu page size*/ |
544 | result |= addr & (~PAGE_MASK); |
545 | |
546 | return result; |
547 | } |
548 | |
549 | /** |
550 | * radeon_vm_page_flags - translate page flags to what the hw uses |
551 | * |
552 | * @flags: flags comming from userspace |
553 | * |
554 | * Translate the flags the userspace ABI uses to hw flags. |
555 | */ |
556 | static uint32_t radeon_vm_page_flags(uint32_t flags) |
557 | { |
558 | uint32_t hw_flags = 0; |
559 | hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; |
560 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; |
561 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; |
562 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
563 | hw_flags |= R600_PTE_SYSTEM; |
564 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; |
565 | } |
566 | return hw_flags; |
567 | } |
568 | |
569 | /** |
570 | * radeon_vm_update_pdes - make sure that page directory is valid |
571 | * |
572 | * @rdev: radeon_device pointer |
573 | * @vm: requested vm |
574 | * @start: start of GPU address range |
575 | * @end: end of GPU address range |
576 | * |
577 | * Allocates new page tables if necessary |
578 | * and updates the page directory (cayman+). |
579 | * Returns 0 for success, error for failure. |
580 | * |
581 | * Global and local mutex must be locked! |
582 | */ |
583 | int radeon_vm_update_page_directory(struct radeon_device *rdev, |
584 | struct radeon_vm *vm) |
585 | { |
586 | static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; |
587 | |
588 | struct radeon_bo *pd = vm->page_directory; |
589 | uint64_t pd_addr = radeon_bo_gpu_offset(pd); |
590 | uint64_t last_pde = ~0, last_pt = ~0; |
591 | unsigned count = 0, pt_idx, ndw; |
592 | struct radeon_ib ib; |
593 | int r; |
594 | |
595 | /* padding, etc. */ |
596 | ndw = 64; |
597 | |
598 | /* assume the worst case */ |
599 | ndw += vm->max_pde_used * 16; |
600 | |
601 | /* update too big for an IB */ |
602 | if (ndw > 0xfffff) |
603 | return -ENOMEM; |
604 | |
605 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
606 | if (r) |
607 | return r; |
608 | ib.length_dw = 0; |
609 | |
610 | /* walk over the address space and update the page directory */ |
611 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
612 | struct radeon_bo *bo = vm->page_tables[pt_idx].bo; |
613 | uint64_t pde, pt; |
614 | |
615 | if (bo == NULL) |
616 | continue; |
617 | |
618 | pt = radeon_bo_gpu_offset(bo); |
619 | if (vm->page_tables[pt_idx].addr == pt) |
620 | continue; |
621 | vm->page_tables[pt_idx].addr = pt; |
622 | |
623 | pde = pd_addr + pt_idx * 8; |
624 | if (((last_pde + 8 * count) != pde) || |
625 | ((last_pt + incr * count) != pt)) { |
626 | |
627 | if (count) { |
628 | radeon_asic_vm_set_page(rdev, &ib, last_pde, |
629 | last_pt, count, incr, |
630 | R600_PTE_VALID); |
631 | } |
632 | |
633 | count = 1; |
634 | last_pde = pde; |
635 | last_pt = pt; |
636 | } else { |
637 | ++count; |
638 | } |
639 | } |
640 | |
641 | if (count) |
642 | radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, |
643 | incr, R600_PTE_VALID); |
644 | |
645 | if (ib.length_dw != 0) { |
646 | radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); |
647 | radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); |
648 | r = radeon_ib_schedule(rdev, &ib, NULL); |
649 | if (r) { |
650 | radeon_ib_free(rdev, &ib); |
651 | return r; |
652 | } |
653 | radeon_fence_unref(&vm->fence); |
654 | vm->fence = radeon_fence_ref(ib.fence); |
655 | radeon_fence_unref(&vm->last_flush); |
656 | } |
657 | radeon_ib_free(rdev, &ib); |
658 | |
659 | return 0; |
660 | } |
661 | |
662 | /** |
663 | * radeon_vm_update_ptes - make sure that page tables are valid |
664 | * |
665 | * @rdev: radeon_device pointer |
666 | * @vm: requested vm |
667 | * @start: start of GPU address range |
668 | * @end: end of GPU address range |
669 | * @dst: destination address to map to |
670 | * @flags: mapping flags |
671 | * |
672 | * Update the page tables in the range @start - @end (cayman+). |
673 | * |
674 | * Global and local mutex must be locked! |
675 | */ |
676 | static void radeon_vm_update_ptes(struct radeon_device *rdev, |
677 | struct radeon_vm *vm, |
678 | struct radeon_ib *ib, |
679 | uint64_t start, uint64_t end, |
680 | uint64_t dst, uint32_t flags) |
681 | { |
682 | static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; |
683 | |
684 | uint64_t last_pte = ~0, last_dst = ~0; |
685 | unsigned count = 0; |
686 | uint64_t addr; |
687 | |
688 | start = start / RADEON_GPU_PAGE_SIZE; |
689 | end = end / RADEON_GPU_PAGE_SIZE; |
690 | |
691 | /* walk over the address space and update the page tables */ |
692 | for (addr = start; addr < end; ) { |
693 | uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; |
694 | struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
695 | unsigned nptes; |
696 | uint64_t pte; |
697 | |
698 | radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj); |
699 | |
700 | if ((addr & ~mask) == (end & ~mask)) |
701 | nptes = end - addr; |
702 | else |
703 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); |
704 | |
705 | pte = radeon_bo_gpu_offset(pt); |
706 | pte += (addr & mask) * 8; |
707 | |
708 | if ((last_pte + 8 * count) != pte) { |
709 | |
710 | if (count) { |
711 | radeon_asic_vm_set_page(rdev, ib, last_pte, |
712 | last_dst, count, |
713 | RADEON_GPU_PAGE_SIZE, |
714 | flags); |
715 | } |
716 | |
717 | count = nptes; |
718 | last_pte = pte; |
719 | last_dst = dst; |
720 | } else { |
721 | count += nptes; |
722 | } |
723 | |
724 | addr += nptes; |
725 | dst += nptes * RADEON_GPU_PAGE_SIZE; |
726 | } |
727 | |
728 | if (count) { |
729 | radeon_asic_vm_set_page(rdev, ib, last_pte, |
730 | last_dst, count, |
731 | RADEON_GPU_PAGE_SIZE, flags); |
732 | } |
733 | } |
734 | |
735 | /** |
736 | * radeon_vm_bo_update - map a bo into the vm page table |
737 | * |
738 | * @rdev: radeon_device pointer |
739 | * @vm: requested vm |
740 | * @bo: radeon buffer object |
741 | * @mem: ttm mem |
742 | * |
743 | * Fill in the page table entries for @bo (cayman+). |
744 | * Returns 0 for success, -EINVAL for failure. |
745 | * |
746 | * Object have to be reserved and mutex must be locked! |
747 | */ |
748 | int radeon_vm_bo_update(struct radeon_device *rdev, |
749 | struct radeon_vm *vm, |
750 | struct radeon_bo *bo, |
751 | struct ttm_mem_reg *mem) |
752 | { |
753 | struct radeon_ib ib; |
754 | struct radeon_bo_va *bo_va; |
755 | unsigned nptes, ndw; |
756 | uint64_t addr; |
757 | int r; |
758 | |
759 | bo_va = radeon_vm_bo_find(vm, bo); |
760 | if (bo_va == NULL) { |
761 | dev_err(rdev->dev, "bo %p not in vm %p\n" , bo, vm); |
762 | return -EINVAL; |
763 | } |
764 | |
765 | if (!bo_va->soffset) { |
766 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n" , |
767 | bo, vm); |
768 | return -EINVAL; |
769 | } |
770 | |
771 | if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) |
772 | return 0; |
773 | |
774 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
775 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; |
776 | if (mem) { |
777 | addr = mem->start << PAGE_SHIFT; |
778 | if (mem->mem_type != TTM_PL_SYSTEM) { |
779 | bo_va->flags |= RADEON_VM_PAGE_VALID; |
780 | bo_va->valid = true; |
781 | } |
782 | if (mem->mem_type == TTM_PL_TT) { |
783 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; |
784 | } else { |
785 | addr += rdev->vm_manager.vram_base_offset; |
786 | } |
787 | } else { |
788 | addr = 0; |
789 | bo_va->valid = false; |
790 | } |
791 | |
792 | trace_radeon_vm_bo_update(bo_va); |
793 | |
794 | nptes = radeon_bo_ngpu_pages(bo); |
795 | |
796 | /* padding, etc. */ |
797 | ndw = 64; |
798 | |
799 | if (RADEON_VM_BLOCK_SIZE > 11) |
800 | /* reserve space for one header for every 2k dwords */ |
801 | ndw += (nptes >> 11) * 4; |
802 | else |
803 | /* reserve space for one header for |
804 | every (1 << BLOCK_SIZE) entries */ |
805 | ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; |
806 | |
807 | /* reserve space for pte addresses */ |
808 | ndw += nptes * 2; |
809 | |
810 | /* update too big for an IB */ |
811 | if (ndw > 0xfffff) |
812 | return -ENOMEM; |
813 | |
814 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
815 | if (r) |
816 | return r; |
817 | ib.length_dw = 0; |
818 | |
819 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, |
820 | addr, radeon_vm_page_flags(bo_va->flags)); |
821 | |
822 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
823 | r = radeon_ib_schedule(rdev, &ib, NULL); |
824 | if (r) { |
825 | radeon_ib_free(rdev, &ib); |
826 | return r; |
827 | } |
828 | radeon_fence_unref(&vm->fence); |
829 | vm->fence = radeon_fence_ref(ib.fence); |
830 | radeon_ib_free(rdev, &ib); |
831 | radeon_fence_unref(&vm->last_flush); |
832 | |
833 | return 0; |
834 | } |
835 | |
836 | /** |
837 | * radeon_vm_bo_rmv - remove a bo to a specific vm |
838 | * |
839 | * @rdev: radeon_device pointer |
840 | * @bo_va: requested bo_va |
841 | * |
842 | * Remove @bo_va->bo from the requested vm (cayman+). |
843 | * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and |
844 | * remove the ptes for @bo_va in the page table. |
845 | * Returns 0 for success. |
846 | * |
847 | * Object have to be reserved! |
848 | */ |
849 | int radeon_vm_bo_rmv(struct radeon_device *rdev, |
850 | struct radeon_bo_va *bo_va) |
851 | { |
852 | int r = 0; |
853 | |
854 | mutex_lock(&bo_va->vm->mutex); |
855 | if (bo_va->soffset) |
856 | r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); |
857 | |
858 | list_del(&bo_va->vm_list); |
859 | mutex_unlock(&bo_va->vm->mutex); |
860 | list_del(&bo_va->bo_list); |
861 | |
862 | kfree(bo_va); |
863 | return r; |
864 | } |
865 | |
866 | /** |
867 | * radeon_vm_bo_invalidate - mark the bo as invalid |
868 | * |
869 | * @rdev: radeon_device pointer |
870 | * @vm: requested vm |
871 | * @bo: radeon buffer object |
872 | * |
873 | * Mark @bo as invalid (cayman+). |
874 | */ |
875 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
876 | struct radeon_bo *bo) |
877 | { |
878 | struct radeon_bo_va *bo_va; |
879 | |
880 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
881 | bo_va->valid = false; |
882 | } |
883 | } |
884 | |
885 | /** |
886 | * radeon_vm_init - initialize a vm instance |
887 | * |
888 | * @rdev: radeon_device pointer |
889 | * @vm: requested vm |
890 | * |
891 | * Init @vm fields (cayman+). |
892 | */ |
893 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
894 | { |
895 | unsigned pd_size, pd_entries, pts_size; |
896 | int r; |
897 | |
898 | vm->id = 0; |
899 | vm->fence = NULL; |
900 | vm->last_flush = NULL; |
901 | vm->last_id_use = NULL; |
902 | #ifdef __NetBSD__ |
903 | linux_mutex_init(&vm->mutex); |
904 | #else |
905 | mutex_init(&vm->mutex); |
906 | #endif |
907 | INIT_LIST_HEAD(&vm->va); |
908 | |
909 | pd_size = radeon_vm_directory_size(rdev); |
910 | pd_entries = radeon_vm_num_pdes(rdev); |
911 | |
912 | /* allocate page table array */ |
913 | pts_size = pd_entries * sizeof(struct radeon_vm_pt); |
914 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); |
915 | if (vm->page_tables == NULL) { |
916 | DRM_ERROR("Cannot allocate memory for page table array\n" ); |
917 | return -ENOMEM; |
918 | } |
919 | |
920 | r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, |
921 | RADEON_GEM_DOMAIN_VRAM, NULL, |
922 | &vm->page_directory); |
923 | if (r) |
924 | return r; |
925 | |
926 | r = radeon_vm_clear_bo(rdev, vm->page_directory); |
927 | if (r) { |
928 | radeon_bo_unref(&vm->page_directory); |
929 | vm->page_directory = NULL; |
930 | return r; |
931 | } |
932 | |
933 | return 0; |
934 | } |
935 | |
936 | /** |
937 | * radeon_vm_fini - tear down a vm instance |
938 | * |
939 | * @rdev: radeon_device pointer |
940 | * @vm: requested vm |
941 | * |
942 | * Tear down @vm (cayman+). |
943 | * Unbind the VM and remove all bos from the vm bo list |
944 | */ |
945 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
946 | { |
947 | struct radeon_bo_va *bo_va, *tmp; |
948 | int i, r; |
949 | |
950 | if (!list_empty(&vm->va)) { |
951 | dev_err(rdev->dev, "still active bo inside vm\n" ); |
952 | } |
953 | list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { |
954 | list_del_init(&bo_va->vm_list); |
955 | r = radeon_bo_reserve(bo_va->bo, false); |
956 | if (!r) { |
957 | list_del_init(&bo_va->bo_list); |
958 | radeon_bo_unreserve(bo_va->bo); |
959 | kfree(bo_va); |
960 | } |
961 | } |
962 | |
963 | |
964 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) |
965 | radeon_bo_unref(&vm->page_tables[i].bo); |
966 | kfree(vm->page_tables); |
967 | |
968 | radeon_bo_unref(&vm->page_directory); |
969 | |
970 | radeon_fence_unref(&vm->fence); |
971 | radeon_fence_unref(&vm->last_flush); |
972 | radeon_fence_unref(&vm->last_id_use); |
973 | |
974 | #ifdef __NetBSD__ |
975 | linux_mutex_destroy(&vm->mutex); |
976 | #else |
977 | mutex_destroy(&vm->mutex); |
978 | #endif |
979 | } |
980 | |