1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: Alex Deucher |
23 | */ |
24 | #include <drm/drmP.h> |
25 | #include "radeon.h" |
26 | #include "radeon_asic.h" |
27 | #include "r600d.h" |
28 | |
29 | u32 r600_gpu_check_soft_reset(struct radeon_device *rdev); |
30 | |
31 | /* |
32 | * DMA |
33 | * Starting with R600, the GPU has an asynchronous |
34 | * DMA engine. The programming model is very similar |
35 | * to the 3D engine (ring buffer, IBs, etc.), but the |
36 | * DMA controller has it's own packet format that is |
37 | * different form the PM4 format used by the 3D engine. |
38 | * It supports copying data, writing embedded data, |
39 | * solid fills, and a number of other things. It also |
40 | * has support for tiling/detiling of buffers. |
41 | */ |
42 | |
43 | /** |
44 | * r600_dma_get_rptr - get the current read pointer |
45 | * |
46 | * @rdev: radeon_device pointer |
47 | * @ring: radeon ring pointer |
48 | * |
49 | * Get the current rptr from the hardware (r6xx+). |
50 | */ |
51 | uint32_t r600_dma_get_rptr(struct radeon_device *rdev, |
52 | struct radeon_ring *ring) |
53 | { |
54 | u32 rptr; |
55 | |
56 | if (rdev->wb.enabled) |
57 | rptr = rdev->wb.wb[ring->rptr_offs/4]; |
58 | else |
59 | rptr = RREG32(DMA_RB_RPTR); |
60 | |
61 | return (rptr & 0x3fffc) >> 2; |
62 | } |
63 | |
64 | /** |
65 | * r600_dma_get_wptr - get the current write pointer |
66 | * |
67 | * @rdev: radeon_device pointer |
68 | * @ring: radeon ring pointer |
69 | * |
70 | * Get the current wptr from the hardware (r6xx+). |
71 | */ |
72 | uint32_t r600_dma_get_wptr(struct radeon_device *rdev, |
73 | struct radeon_ring *ring) |
74 | { |
75 | return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2; |
76 | } |
77 | |
78 | /** |
79 | * r600_dma_set_wptr - commit the write pointer |
80 | * |
81 | * @rdev: radeon_device pointer |
82 | * @ring: radeon ring pointer |
83 | * |
84 | * Write the wptr back to the hardware (r6xx+). |
85 | */ |
86 | void r600_dma_set_wptr(struct radeon_device *rdev, |
87 | struct radeon_ring *ring) |
88 | { |
89 | WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc); |
90 | } |
91 | |
92 | /** |
93 | * r600_dma_stop - stop the async dma engine |
94 | * |
95 | * @rdev: radeon_device pointer |
96 | * |
97 | * Stop the async dma engine (r6xx-evergreen). |
98 | */ |
99 | void r600_dma_stop(struct radeon_device *rdev) |
100 | { |
101 | u32 rb_cntl = RREG32(DMA_RB_CNTL); |
102 | |
103 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
104 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
105 | |
106 | rb_cntl &= ~DMA_RB_ENABLE; |
107 | WREG32(DMA_RB_CNTL, rb_cntl); |
108 | |
109 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; |
110 | } |
111 | |
112 | /** |
113 | * r600_dma_resume - setup and start the async dma engine |
114 | * |
115 | * @rdev: radeon_device pointer |
116 | * |
117 | * Set up the DMA ring buffer and enable it. (r6xx-evergreen). |
118 | * Returns 0 for success, error for failure. |
119 | */ |
120 | int r600_dma_resume(struct radeon_device *rdev) |
121 | { |
122 | struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
123 | u32 rb_cntl, dma_cntl, ib_cntl; |
124 | u32 rb_bufsz; |
125 | int r; |
126 | |
127 | /* Reset dma */ |
128 | if (rdev->family >= CHIP_RV770) |
129 | WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); |
130 | else |
131 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); |
132 | RREG32(SRBM_SOFT_RESET); |
133 | udelay(50); |
134 | WREG32(SRBM_SOFT_RESET, 0); |
135 | |
136 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); |
137 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); |
138 | |
139 | /* Set ring buffer size in dwords */ |
140 | rb_bufsz = order_base_2(ring->ring_size / 4); |
141 | rb_cntl = rb_bufsz << 1; |
142 | #ifdef __BIG_ENDIAN |
143 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; |
144 | #endif |
145 | WREG32(DMA_RB_CNTL, rb_cntl); |
146 | |
147 | /* Initialize the ring buffer's read and write pointers */ |
148 | WREG32(DMA_RB_RPTR, 0); |
149 | WREG32(DMA_RB_WPTR, 0); |
150 | |
151 | /* set the wb address whether it's enabled or not */ |
152 | WREG32(DMA_RB_RPTR_ADDR_HI, |
153 | upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF); |
154 | WREG32(DMA_RB_RPTR_ADDR_LO, |
155 | ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC)); |
156 | |
157 | if (rdev->wb.enabled) |
158 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; |
159 | |
160 | WREG32(DMA_RB_BASE, ring->gpu_addr >> 8); |
161 | |
162 | /* enable DMA IBs */ |
163 | ib_cntl = DMA_IB_ENABLE; |
164 | #ifdef __BIG_ENDIAN |
165 | ib_cntl |= DMA_IB_SWAP_ENABLE; |
166 | #endif |
167 | WREG32(DMA_IB_CNTL, ib_cntl); |
168 | |
169 | dma_cntl = RREG32(DMA_CNTL); |
170 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; |
171 | WREG32(DMA_CNTL, dma_cntl); |
172 | |
173 | if (rdev->family >= CHIP_RV770) |
174 | WREG32(DMA_MODE, 1); |
175 | |
176 | ring->wptr = 0; |
177 | WREG32(DMA_RB_WPTR, ring->wptr << 2); |
178 | |
179 | WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); |
180 | |
181 | ring->ready = true; |
182 | |
183 | r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring); |
184 | if (r) { |
185 | ring->ready = false; |
186 | return r; |
187 | } |
188 | |
189 | if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) |
190 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | /** |
196 | * r600_dma_fini - tear down the async dma engine |
197 | * |
198 | * @rdev: radeon_device pointer |
199 | * |
200 | * Stop the async dma engine and free the ring (r6xx-evergreen). |
201 | */ |
202 | void r600_dma_fini(struct radeon_device *rdev) |
203 | { |
204 | r600_dma_stop(rdev); |
205 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); |
206 | } |
207 | |
208 | /** |
209 | * r600_dma_is_lockup - Check if the DMA engine is locked up |
210 | * |
211 | * @rdev: radeon_device pointer |
212 | * @ring: radeon_ring structure holding ring information |
213 | * |
214 | * Check if the async DMA engine is locked up. |
215 | * Returns true if the engine appears to be locked up, false if not. |
216 | */ |
217 | bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
218 | { |
219 | u32 reset_mask = r600_gpu_check_soft_reset(rdev); |
220 | |
221 | if (!(reset_mask & RADEON_RESET_DMA)) { |
222 | radeon_ring_lockup_update(rdev, ring); |
223 | return false; |
224 | } |
225 | return radeon_ring_test_lockup(rdev, ring); |
226 | } |
227 | |
228 | #ifdef __NetBSD__ |
229 | /* |
230 | * XXX Can't use bus_space here because this is all mapped through the |
231 | * radeon_bo abstraction. Can't assume we're x86 because this is |
232 | * AMD/ATI Radeon, not Intel. |
233 | */ |
234 | |
235 | # define __iomem volatile |
236 | # define readl fake_readl |
237 | # define writel fake_writel |
238 | |
239 | static inline uint32_t |
240 | fake_readl(const void __iomem *ptr) |
241 | { |
242 | uint32_t v; |
243 | |
244 | v = *(const uint32_t __iomem *)ptr; |
245 | membar_consumer(); |
246 | |
247 | return v; |
248 | } |
249 | |
250 | static inline void |
251 | fake_writel(uint32_t v, void __iomem *ptr) |
252 | { |
253 | |
254 | membar_producer(); |
255 | *(uint32_t __iomem *)ptr = v; |
256 | } |
257 | #endif |
258 | |
259 | /** |
260 | * r600_dma_ring_test - simple async dma engine test |
261 | * |
262 | * @rdev: radeon_device pointer |
263 | * @ring: radeon_ring structure holding ring information |
264 | * |
265 | * Test the DMA engine by writing using it to write an |
266 | * value to memory. (r6xx-SI). |
267 | * Returns 0 for success, error for failure. |
268 | */ |
269 | int r600_dma_ring_test(struct radeon_device *rdev, |
270 | struct radeon_ring *ring) |
271 | { |
272 | unsigned i; |
273 | int r; |
274 | void __iomem *ptr = rdev->vram_scratch.ptr; |
275 | u32 tmp; |
276 | |
277 | if (!ptr) { |
278 | DRM_ERROR("invalid vram scratch pointer\n" ); |
279 | return -EINVAL; |
280 | } |
281 | |
282 | tmp = 0xCAFEDEAD; |
283 | writel(tmp, ptr); |
284 | |
285 | r = radeon_ring_lock(rdev, ring, 4); |
286 | if (r) { |
287 | DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n" , ring->idx, r); |
288 | return r; |
289 | } |
290 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
291 | radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); |
292 | radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); |
293 | radeon_ring_write(ring, 0xDEADBEEF); |
294 | radeon_ring_unlock_commit(rdev, ring); |
295 | |
296 | for (i = 0; i < rdev->usec_timeout; i++) { |
297 | tmp = readl(ptr); |
298 | if (tmp == 0xDEADBEEF) |
299 | break; |
300 | DRM_UDELAY(1); |
301 | } |
302 | |
303 | if (i < rdev->usec_timeout) { |
304 | DRM_INFO("ring test on %d succeeded in %d usecs\n" , ring->idx, i); |
305 | } else { |
306 | DRM_ERROR("radeon: ring %d test failed (0x%08X)\n" , |
307 | ring->idx, tmp); |
308 | r = -EINVAL; |
309 | } |
310 | return r; |
311 | } |
312 | |
313 | #ifdef __NetBSD__ |
314 | # undef __iomem |
315 | # undef fake_readl |
316 | # undef fake_writel |
317 | #endif |
318 | |
319 | /** |
320 | * r600_dma_fence_ring_emit - emit a fence on the DMA ring |
321 | * |
322 | * @rdev: radeon_device pointer |
323 | * @fence: radeon fence object |
324 | * |
325 | * Add a DMA fence packet to the ring to write |
326 | * the fence seq number and DMA trap packet to generate |
327 | * an interrupt if needed (r6xx-r7xx). |
328 | */ |
329 | void r600_dma_fence_ring_emit(struct radeon_device *rdev, |
330 | struct radeon_fence *fence) |
331 | { |
332 | struct radeon_ring *ring = &rdev->ring[fence->ring]; |
333 | u64 addr = rdev->fence_drv[fence->ring].gpu_addr; |
334 | |
335 | /* write the fence */ |
336 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0)); |
337 | radeon_ring_write(ring, addr & 0xfffffffc); |
338 | radeon_ring_write(ring, (upper_32_bits(addr) & 0xff)); |
339 | radeon_ring_write(ring, lower_32_bits(fence->seq)); |
340 | /* generate an interrupt */ |
341 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0)); |
342 | } |
343 | |
344 | /** |
345 | * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring |
346 | * |
347 | * @rdev: radeon_device pointer |
348 | * @ring: radeon_ring structure holding ring information |
349 | * @semaphore: radeon semaphore object |
350 | * @emit_wait: wait or signal semaphore |
351 | * |
352 | * Add a DMA semaphore packet to the ring wait on or signal |
353 | * other rings (r6xx-SI). |
354 | */ |
355 | bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev, |
356 | struct radeon_ring *ring, |
357 | struct radeon_semaphore *semaphore, |
358 | bool emit_wait) |
359 | { |
360 | u64 addr = semaphore->gpu_addr; |
361 | u32 s = emit_wait ? 0 : 1; |
362 | |
363 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); |
364 | radeon_ring_write(ring, addr & 0xfffffffc); |
365 | radeon_ring_write(ring, upper_32_bits(addr) & 0xff); |
366 | |
367 | return true; |
368 | } |
369 | |
370 | #ifdef __NetBSD__ |
371 | # define __iomem volatile |
372 | # define readl fake_readl |
373 | # define writel fake_writel |
374 | #endif |
375 | |
376 | /** |
377 | * r600_dma_ib_test - test an IB on the DMA engine |
378 | * |
379 | * @rdev: radeon_device pointer |
380 | * @ring: radeon_ring structure holding ring information |
381 | * |
382 | * Test a simple IB in the DMA ring (r6xx-SI). |
383 | * Returns 0 on success, error on failure. |
384 | */ |
385 | int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) |
386 | { |
387 | struct radeon_ib ib; |
388 | unsigned i; |
389 | int r; |
390 | void __iomem *ptr = rdev->vram_scratch.ptr; |
391 | u32 tmp = 0; |
392 | |
393 | if (!ptr) { |
394 | DRM_ERROR("invalid vram scratch pointer\n" ); |
395 | return -EINVAL; |
396 | } |
397 | |
398 | tmp = 0xCAFEDEAD; |
399 | writel(tmp, ptr); |
400 | |
401 | r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); |
402 | if (r) { |
403 | DRM_ERROR("radeon: failed to get ib (%d).\n" , r); |
404 | return r; |
405 | } |
406 | |
407 | ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1); |
408 | ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc; |
409 | ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff; |
410 | ib.ptr[3] = 0xDEADBEEF; |
411 | ib.length_dw = 4; |
412 | |
413 | r = radeon_ib_schedule(rdev, &ib, NULL); |
414 | if (r) { |
415 | radeon_ib_free(rdev, &ib); |
416 | DRM_ERROR("radeon: failed to schedule ib (%d).\n" , r); |
417 | return r; |
418 | } |
419 | r = radeon_fence_wait(ib.fence, false); |
420 | if (r) { |
421 | DRM_ERROR("radeon: fence wait failed (%d).\n" , r); |
422 | return r; |
423 | } |
424 | for (i = 0; i < rdev->usec_timeout; i++) { |
425 | tmp = readl(ptr); |
426 | if (tmp == 0xDEADBEEF) |
427 | break; |
428 | DRM_UDELAY(1); |
429 | } |
430 | if (i < rdev->usec_timeout) { |
431 | DRM_INFO("ib test on ring %d succeeded in %u usecs\n" , ib.fence->ring, i); |
432 | } else { |
433 | DRM_ERROR("radeon: ib test failed (0x%08X)\n" , tmp); |
434 | r = -EINVAL; |
435 | } |
436 | radeon_ib_free(rdev, &ib); |
437 | return r; |
438 | } |
439 | |
440 | #ifdef __NetBSD__ |
441 | # undef __iomem |
442 | # undef fake_readl |
443 | # undef fake_writel |
444 | #endif |
445 | |
446 | /** |
447 | * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine |
448 | * |
449 | * @rdev: radeon_device pointer |
450 | * @ib: IB object to schedule |
451 | * |
452 | * Schedule an IB in the DMA ring (r6xx-r7xx). |
453 | */ |
454 | void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) |
455 | { |
456 | struct radeon_ring *ring = &rdev->ring[ib->ring]; |
457 | |
458 | if (rdev->wb.enabled) { |
459 | u32 next_rptr = ring->wptr + 4; |
460 | while ((next_rptr & 7) != 5) |
461 | next_rptr++; |
462 | next_rptr += 3; |
463 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
464 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); |
465 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
466 | radeon_ring_write(ring, next_rptr); |
467 | } |
468 | |
469 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
470 | * Pad as necessary with NOPs. |
471 | */ |
472 | while ((ring->wptr & 7) != 5) |
473 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); |
474 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0)); |
475 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); |
476 | radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
477 | |
478 | } |
479 | |
480 | /** |
481 | * r600_copy_dma - copy pages using the DMA engine |
482 | * |
483 | * @rdev: radeon_device pointer |
484 | * @src_offset: src GPU address |
485 | * @dst_offset: dst GPU address |
486 | * @num_gpu_pages: number of GPU pages to xfer |
487 | * @fence: radeon fence object |
488 | * |
489 | * Copy GPU paging using the DMA engine (r6xx). |
490 | * Used by the radeon ttm implementation to move pages if |
491 | * registered as the asic copy callback. |
492 | */ |
493 | int r600_copy_dma(struct radeon_device *rdev, |
494 | uint64_t src_offset, uint64_t dst_offset, |
495 | unsigned num_gpu_pages, |
496 | struct radeon_fence **fence) |
497 | { |
498 | struct radeon_semaphore *sem = NULL; |
499 | int ring_index = rdev->asic->copy.dma_ring_index; |
500 | struct radeon_ring *ring = &rdev->ring[ring_index]; |
501 | u32 size_in_dw, cur_size_in_dw; |
502 | int i, num_loops; |
503 | int r = 0; |
504 | |
505 | r = radeon_semaphore_create(rdev, &sem); |
506 | if (r) { |
507 | DRM_ERROR("radeon: moving bo (%d).\n" , r); |
508 | return r; |
509 | } |
510 | |
511 | size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; |
512 | num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); |
513 | r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); |
514 | if (r) { |
515 | DRM_ERROR("radeon: moving bo (%d).\n" , r); |
516 | radeon_semaphore_free(rdev, &sem, NULL); |
517 | return r; |
518 | } |
519 | |
520 | radeon_semaphore_sync_to(sem, *fence); |
521 | radeon_semaphore_sync_rings(rdev, sem, ring->idx); |
522 | |
523 | for (i = 0; i < num_loops; i++) { |
524 | cur_size_in_dw = size_in_dw; |
525 | if (cur_size_in_dw > 0xFFFE) |
526 | cur_size_in_dw = 0xFFFE; |
527 | size_in_dw -= cur_size_in_dw; |
528 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw)); |
529 | radeon_ring_write(ring, dst_offset & 0xfffffffc); |
530 | radeon_ring_write(ring, src_offset & 0xfffffffc); |
531 | radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) | |
532 | (upper_32_bits(src_offset) & 0xff))); |
533 | src_offset += cur_size_in_dw * 4; |
534 | dst_offset += cur_size_in_dw * 4; |
535 | } |
536 | |
537 | r = radeon_fence_emit(rdev, fence, ring->idx); |
538 | if (r) { |
539 | radeon_ring_unlock_undo(rdev, ring); |
540 | radeon_semaphore_free(rdev, &sem, NULL); |
541 | return r; |
542 | } |
543 | |
544 | radeon_ring_unlock_commit(rdev, ring); |
545 | radeon_semaphore_free(rdev, &sem, *fence); |
546 | |
547 | return r; |
548 | } |
549 | |