1/*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <drm/drmP.h>
25#include "radeon.h"
26#include "radeon_asic.h"
27#include "r600d.h"
28
29u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
30
31/*
32 * DMA
33 * Starting with R600, the GPU has an asynchronous
34 * DMA engine. The programming model is very similar
35 * to the 3D engine (ring buffer, IBs, etc.), but the
36 * DMA controller has it's own packet format that is
37 * different form the PM4 format used by the 3D engine.
38 * It supports copying data, writing embedded data,
39 * solid fills, and a number of other things. It also
40 * has support for tiling/detiling of buffers.
41 */
42
43/**
44 * r600_dma_get_rptr - get the current read pointer
45 *
46 * @rdev: radeon_device pointer
47 * @ring: radeon ring pointer
48 *
49 * Get the current rptr from the hardware (r6xx+).
50 */
51uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
52 struct radeon_ring *ring)
53{
54 u32 rptr;
55
56 if (rdev->wb.enabled)
57 rptr = rdev->wb.wb[ring->rptr_offs/4];
58 else
59 rptr = RREG32(DMA_RB_RPTR);
60
61 return (rptr & 0x3fffc) >> 2;
62}
63
64/**
65 * r600_dma_get_wptr - get the current write pointer
66 *
67 * @rdev: radeon_device pointer
68 * @ring: radeon ring pointer
69 *
70 * Get the current wptr from the hardware (r6xx+).
71 */
72uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
73 struct radeon_ring *ring)
74{
75 return (RREG32(DMA_RB_WPTR) & 0x3fffc) >> 2;
76}
77
78/**
79 * r600_dma_set_wptr - commit the write pointer
80 *
81 * @rdev: radeon_device pointer
82 * @ring: radeon ring pointer
83 *
84 * Write the wptr back to the hardware (r6xx+).
85 */
86void r600_dma_set_wptr(struct radeon_device *rdev,
87 struct radeon_ring *ring)
88{
89 WREG32(DMA_RB_WPTR, (ring->wptr << 2) & 0x3fffc);
90}
91
92/**
93 * r600_dma_stop - stop the async dma engine
94 *
95 * @rdev: radeon_device pointer
96 *
97 * Stop the async dma engine (r6xx-evergreen).
98 */
99void r600_dma_stop(struct radeon_device *rdev)
100{
101 u32 rb_cntl = RREG32(DMA_RB_CNTL);
102
103 if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX)
104 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
105
106 rb_cntl &= ~DMA_RB_ENABLE;
107 WREG32(DMA_RB_CNTL, rb_cntl);
108
109 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
110}
111
112/**
113 * r600_dma_resume - setup and start the async dma engine
114 *
115 * @rdev: radeon_device pointer
116 *
117 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
118 * Returns 0 for success, error for failure.
119 */
120int r600_dma_resume(struct radeon_device *rdev)
121{
122 struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
123 u32 rb_cntl, dma_cntl, ib_cntl;
124 u32 rb_bufsz;
125 int r;
126
127 /* Reset dma */
128 if (rdev->family >= CHIP_RV770)
129 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
130 else
131 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
132 RREG32(SRBM_SOFT_RESET);
133 udelay(50);
134 WREG32(SRBM_SOFT_RESET, 0);
135
136 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
137 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
138
139 /* Set ring buffer size in dwords */
140 rb_bufsz = order_base_2(ring->ring_size / 4);
141 rb_cntl = rb_bufsz << 1;
142#ifdef __BIG_ENDIAN
143 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
144#endif
145 WREG32(DMA_RB_CNTL, rb_cntl);
146
147 /* Initialize the ring buffer's read and write pointers */
148 WREG32(DMA_RB_RPTR, 0);
149 WREG32(DMA_RB_WPTR, 0);
150
151 /* set the wb address whether it's enabled or not */
152 WREG32(DMA_RB_RPTR_ADDR_HI,
153 upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
154 WREG32(DMA_RB_RPTR_ADDR_LO,
155 ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
156
157 if (rdev->wb.enabled)
158 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
159
160 WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
161
162 /* enable DMA IBs */
163 ib_cntl = DMA_IB_ENABLE;
164#ifdef __BIG_ENDIAN
165 ib_cntl |= DMA_IB_SWAP_ENABLE;
166#endif
167 WREG32(DMA_IB_CNTL, ib_cntl);
168
169 dma_cntl = RREG32(DMA_CNTL);
170 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
171 WREG32(DMA_CNTL, dma_cntl);
172
173 if (rdev->family >= CHIP_RV770)
174 WREG32(DMA_MODE, 1);
175
176 ring->wptr = 0;
177 WREG32(DMA_RB_WPTR, ring->wptr << 2);
178
179 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
180
181 ring->ready = true;
182
183 r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
184 if (r) {
185 ring->ready = false;
186 return r;
187 }
188
189 if (rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX)
190 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
191
192 return 0;
193}
194
195/**
196 * r600_dma_fini - tear down the async dma engine
197 *
198 * @rdev: radeon_device pointer
199 *
200 * Stop the async dma engine and free the ring (r6xx-evergreen).
201 */
202void r600_dma_fini(struct radeon_device *rdev)
203{
204 r600_dma_stop(rdev);
205 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
206}
207
208/**
209 * r600_dma_is_lockup - Check if the DMA engine is locked up
210 *
211 * @rdev: radeon_device pointer
212 * @ring: radeon_ring structure holding ring information
213 *
214 * Check if the async DMA engine is locked up.
215 * Returns true if the engine appears to be locked up, false if not.
216 */
217bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
218{
219 u32 reset_mask = r600_gpu_check_soft_reset(rdev);
220
221 if (!(reset_mask & RADEON_RESET_DMA)) {
222 radeon_ring_lockup_update(rdev, ring);
223 return false;
224 }
225 return radeon_ring_test_lockup(rdev, ring);
226}
227
228#ifdef __NetBSD__
229/*
230 * XXX Can't use bus_space here because this is all mapped through the
231 * radeon_bo abstraction. Can't assume we're x86 because this is
232 * AMD/ATI Radeon, not Intel.
233 */
234
235# define __iomem volatile
236# define readl fake_readl
237# define writel fake_writel
238
239static inline uint32_t
240fake_readl(const void __iomem *ptr)
241{
242 uint32_t v;
243
244 v = *(const uint32_t __iomem *)ptr;
245 membar_consumer();
246
247 return v;
248}
249
250static inline void
251fake_writel(uint32_t v, void __iomem *ptr)
252{
253
254 membar_producer();
255 *(uint32_t __iomem *)ptr = v;
256}
257#endif
258
259/**
260 * r600_dma_ring_test - simple async dma engine test
261 *
262 * @rdev: radeon_device pointer
263 * @ring: radeon_ring structure holding ring information
264 *
265 * Test the DMA engine by writing using it to write an
266 * value to memory. (r6xx-SI).
267 * Returns 0 for success, error for failure.
268 */
269int r600_dma_ring_test(struct radeon_device *rdev,
270 struct radeon_ring *ring)
271{
272 unsigned i;
273 int r;
274 void __iomem *ptr = rdev->vram_scratch.ptr;
275 u32 tmp;
276
277 if (!ptr) {
278 DRM_ERROR("invalid vram scratch pointer\n");
279 return -EINVAL;
280 }
281
282 tmp = 0xCAFEDEAD;
283 writel(tmp, ptr);
284
285 r = radeon_ring_lock(rdev, ring, 4);
286 if (r) {
287 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
288 return r;
289 }
290 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
291 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
292 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
293 radeon_ring_write(ring, 0xDEADBEEF);
294 radeon_ring_unlock_commit(rdev, ring);
295
296 for (i = 0; i < rdev->usec_timeout; i++) {
297 tmp = readl(ptr);
298 if (tmp == 0xDEADBEEF)
299 break;
300 DRM_UDELAY(1);
301 }
302
303 if (i < rdev->usec_timeout) {
304 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
305 } else {
306 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
307 ring->idx, tmp);
308 r = -EINVAL;
309 }
310 return r;
311}
312
313#ifdef __NetBSD__
314# undef __iomem
315# undef fake_readl
316# undef fake_writel
317#endif
318
319/**
320 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
321 *
322 * @rdev: radeon_device pointer
323 * @fence: radeon fence object
324 *
325 * Add a DMA fence packet to the ring to write
326 * the fence seq number and DMA trap packet to generate
327 * an interrupt if needed (r6xx-r7xx).
328 */
329void r600_dma_fence_ring_emit(struct radeon_device *rdev,
330 struct radeon_fence *fence)
331{
332 struct radeon_ring *ring = &rdev->ring[fence->ring];
333 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
334
335 /* write the fence */
336 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
337 radeon_ring_write(ring, addr & 0xfffffffc);
338 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
339 radeon_ring_write(ring, lower_32_bits(fence->seq));
340 /* generate an interrupt */
341 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
342}
343
344/**
345 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
346 *
347 * @rdev: radeon_device pointer
348 * @ring: radeon_ring structure holding ring information
349 * @semaphore: radeon semaphore object
350 * @emit_wait: wait or signal semaphore
351 *
352 * Add a DMA semaphore packet to the ring wait on or signal
353 * other rings (r6xx-SI).
354 */
355bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
356 struct radeon_ring *ring,
357 struct radeon_semaphore *semaphore,
358 bool emit_wait)
359{
360 u64 addr = semaphore->gpu_addr;
361 u32 s = emit_wait ? 0 : 1;
362
363 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
364 radeon_ring_write(ring, addr & 0xfffffffc);
365 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
366
367 return true;
368}
369
370#ifdef __NetBSD__
371# define __iomem volatile
372# define readl fake_readl
373# define writel fake_writel
374#endif
375
376/**
377 * r600_dma_ib_test - test an IB on the DMA engine
378 *
379 * @rdev: radeon_device pointer
380 * @ring: radeon_ring structure holding ring information
381 *
382 * Test a simple IB in the DMA ring (r6xx-SI).
383 * Returns 0 on success, error on failure.
384 */
385int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
386{
387 struct radeon_ib ib;
388 unsigned i;
389 int r;
390 void __iomem *ptr = rdev->vram_scratch.ptr;
391 u32 tmp = 0;
392
393 if (!ptr) {
394 DRM_ERROR("invalid vram scratch pointer\n");
395 return -EINVAL;
396 }
397
398 tmp = 0xCAFEDEAD;
399 writel(tmp, ptr);
400
401 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
402 if (r) {
403 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
404 return r;
405 }
406
407 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
408 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
409 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
410 ib.ptr[3] = 0xDEADBEEF;
411 ib.length_dw = 4;
412
413 r = radeon_ib_schedule(rdev, &ib, NULL);
414 if (r) {
415 radeon_ib_free(rdev, &ib);
416 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
417 return r;
418 }
419 r = radeon_fence_wait(ib.fence, false);
420 if (r) {
421 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
422 return r;
423 }
424 for (i = 0; i < rdev->usec_timeout; i++) {
425 tmp = readl(ptr);
426 if (tmp == 0xDEADBEEF)
427 break;
428 DRM_UDELAY(1);
429 }
430 if (i < rdev->usec_timeout) {
431 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
432 } else {
433 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
434 r = -EINVAL;
435 }
436 radeon_ib_free(rdev, &ib);
437 return r;
438}
439
440#ifdef __NetBSD__
441# undef __iomem
442# undef fake_readl
443# undef fake_writel
444#endif
445
446/**
447 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
448 *
449 * @rdev: radeon_device pointer
450 * @ib: IB object to schedule
451 *
452 * Schedule an IB in the DMA ring (r6xx-r7xx).
453 */
454void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
455{
456 struct radeon_ring *ring = &rdev->ring[ib->ring];
457
458 if (rdev->wb.enabled) {
459 u32 next_rptr = ring->wptr + 4;
460 while ((next_rptr & 7) != 5)
461 next_rptr++;
462 next_rptr += 3;
463 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
464 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
465 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
466 radeon_ring_write(ring, next_rptr);
467 }
468
469 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
470 * Pad as necessary with NOPs.
471 */
472 while ((ring->wptr & 7) != 5)
473 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
474 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
475 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
476 radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
477
478}
479
480/**
481 * r600_copy_dma - copy pages using the DMA engine
482 *
483 * @rdev: radeon_device pointer
484 * @src_offset: src GPU address
485 * @dst_offset: dst GPU address
486 * @num_gpu_pages: number of GPU pages to xfer
487 * @fence: radeon fence object
488 *
489 * Copy GPU paging using the DMA engine (r6xx).
490 * Used by the radeon ttm implementation to move pages if
491 * registered as the asic copy callback.
492 */
493int r600_copy_dma(struct radeon_device *rdev,
494 uint64_t src_offset, uint64_t dst_offset,
495 unsigned num_gpu_pages,
496 struct radeon_fence **fence)
497{
498 struct radeon_semaphore *sem = NULL;
499 int ring_index = rdev->asic->copy.dma_ring_index;
500 struct radeon_ring *ring = &rdev->ring[ring_index];
501 u32 size_in_dw, cur_size_in_dw;
502 int i, num_loops;
503 int r = 0;
504
505 r = radeon_semaphore_create(rdev, &sem);
506 if (r) {
507 DRM_ERROR("radeon: moving bo (%d).\n", r);
508 return r;
509 }
510
511 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
512 num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
513 r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
514 if (r) {
515 DRM_ERROR("radeon: moving bo (%d).\n", r);
516 radeon_semaphore_free(rdev, &sem, NULL);
517 return r;
518 }
519
520 radeon_semaphore_sync_to(sem, *fence);
521 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
522
523 for (i = 0; i < num_loops; i++) {
524 cur_size_in_dw = size_in_dw;
525 if (cur_size_in_dw > 0xFFFE)
526 cur_size_in_dw = 0xFFFE;
527 size_in_dw -= cur_size_in_dw;
528 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
529 radeon_ring_write(ring, dst_offset & 0xfffffffc);
530 radeon_ring_write(ring, src_offset & 0xfffffffc);
531 radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
532 (upper_32_bits(src_offset) & 0xff)));
533 src_offset += cur_size_in_dw * 4;
534 dst_offset += cur_size_in_dw * 4;
535 }
536
537 r = radeon_fence_emit(rdev, fence, ring->idx);
538 if (r) {
539 radeon_ring_unlock_undo(rdev, ring);
540 radeon_semaphore_free(rdev, &sem, NULL);
541 return r;
542 }
543
544 radeon_ring_unlock_commit(rdev, ring);
545 radeon_semaphore_free(rdev, &sem, *fence);
546
547 return r;
548}
549