1 | /* |
2 | * Copyright 2010 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: Alex Deucher |
23 | */ |
24 | #include <drm/drmP.h> |
25 | #include "radeon.h" |
26 | #include "radeon_asic.h" |
27 | #include "radeon_trace.h" |
28 | #include "nid.h" |
29 | |
30 | u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev); |
31 | |
32 | /* |
33 | * DMA |
34 | * Starting with R600, the GPU has an asynchronous |
35 | * DMA engine. The programming model is very similar |
36 | * to the 3D engine (ring buffer, IBs, etc.), but the |
37 | * DMA controller has it's own packet format that is |
38 | * different form the PM4 format used by the 3D engine. |
39 | * It supports copying data, writing embedded data, |
40 | * solid fills, and a number of other things. It also |
41 | * has support for tiling/detiling of buffers. |
42 | * Cayman and newer support two asynchronous DMA engines. |
43 | */ |
44 | |
45 | /** |
46 | * cayman_dma_get_rptr - get the current read pointer |
47 | * |
48 | * @rdev: radeon_device pointer |
49 | * @ring: radeon ring pointer |
50 | * |
51 | * Get the current rptr from the hardware (cayman+). |
52 | */ |
53 | uint32_t cayman_dma_get_rptr(struct radeon_device *rdev, |
54 | struct radeon_ring *ring) |
55 | { |
56 | u32 rptr, reg; |
57 | |
58 | if (rdev->wb.enabled) { |
59 | rptr = rdev->wb.wb[ring->rptr_offs/4]; |
60 | } else { |
61 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
62 | reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; |
63 | else |
64 | reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; |
65 | |
66 | rptr = RREG32(reg); |
67 | } |
68 | |
69 | return (rptr & 0x3fffc) >> 2; |
70 | } |
71 | |
72 | /** |
73 | * cayman_dma_get_wptr - get the current write pointer |
74 | * |
75 | * @rdev: radeon_device pointer |
76 | * @ring: radeon ring pointer |
77 | * |
78 | * Get the current wptr from the hardware (cayman+). |
79 | */ |
80 | uint32_t cayman_dma_get_wptr(struct radeon_device *rdev, |
81 | struct radeon_ring *ring) |
82 | { |
83 | u32 reg; |
84 | |
85 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
86 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; |
87 | else |
88 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; |
89 | |
90 | return (RREG32(reg) & 0x3fffc) >> 2; |
91 | } |
92 | |
93 | /** |
94 | * cayman_dma_set_wptr - commit the write pointer |
95 | * |
96 | * @rdev: radeon_device pointer |
97 | * @ring: radeon ring pointer |
98 | * |
99 | * Write the wptr back to the hardware (cayman+). |
100 | */ |
101 | void cayman_dma_set_wptr(struct radeon_device *rdev, |
102 | struct radeon_ring *ring) |
103 | { |
104 | u32 reg; |
105 | |
106 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
107 | reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; |
108 | else |
109 | reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; |
110 | |
111 | WREG32(reg, (ring->wptr << 2) & 0x3fffc); |
112 | } |
113 | |
114 | /** |
115 | * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine |
116 | * |
117 | * @rdev: radeon_device pointer |
118 | * @ib: IB object to schedule |
119 | * |
120 | * Schedule an IB in the DMA ring (cayman-SI). |
121 | */ |
122 | void cayman_dma_ring_ib_execute(struct radeon_device *rdev, |
123 | struct radeon_ib *ib) |
124 | { |
125 | struct radeon_ring *ring = &rdev->ring[ib->ring]; |
126 | |
127 | if (rdev->wb.enabled) { |
128 | u32 next_rptr = ring->wptr + 4; |
129 | while ((next_rptr & 7) != 5) |
130 | next_rptr++; |
131 | next_rptr += 3; |
132 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); |
133 | radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); |
134 | radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); |
135 | radeon_ring_write(ring, next_rptr); |
136 | } |
137 | |
138 | /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. |
139 | * Pad as necessary with NOPs. |
140 | */ |
141 | while ((ring->wptr & 7) != 5) |
142 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); |
143 | radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); |
144 | radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); |
145 | radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); |
146 | |
147 | } |
148 | |
149 | /** |
150 | * cayman_dma_stop - stop the async dma engines |
151 | * |
152 | * @rdev: radeon_device pointer |
153 | * |
154 | * Stop the async dma engines (cayman-SI). |
155 | */ |
156 | void cayman_dma_stop(struct radeon_device *rdev) |
157 | { |
158 | u32 rb_cntl; |
159 | |
160 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
161 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) |
162 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); |
163 | |
164 | /* dma0 */ |
165 | rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); |
166 | rb_cntl &= ~DMA_RB_ENABLE; |
167 | WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); |
168 | |
169 | /* dma1 */ |
170 | rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); |
171 | rb_cntl &= ~DMA_RB_ENABLE; |
172 | WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); |
173 | |
174 | rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; |
175 | rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; |
176 | } |
177 | |
178 | /** |
179 | * cayman_dma_resume - setup and start the async dma engines |
180 | * |
181 | * @rdev: radeon_device pointer |
182 | * |
183 | * Set up the DMA ring buffers and enable them. (cayman-SI). |
184 | * Returns 0 for success, error for failure. |
185 | */ |
186 | int cayman_dma_resume(struct radeon_device *rdev) |
187 | { |
188 | struct radeon_ring *ring; |
189 | u32 rb_cntl, dma_cntl, ib_cntl; |
190 | u32 rb_bufsz; |
191 | u32 reg_offset, wb_offset; |
192 | int i, r; |
193 | |
194 | /* Reset dma */ |
195 | WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); |
196 | RREG32(SRBM_SOFT_RESET); |
197 | udelay(50); |
198 | WREG32(SRBM_SOFT_RESET, 0); |
199 | |
200 | for (i = 0; i < 2; i++) { |
201 | if (i == 0) { |
202 | ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; |
203 | reg_offset = DMA0_REGISTER_OFFSET; |
204 | wb_offset = R600_WB_DMA_RPTR_OFFSET; |
205 | } else { |
206 | ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; |
207 | reg_offset = DMA1_REGISTER_OFFSET; |
208 | wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; |
209 | } |
210 | |
211 | WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); |
212 | WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); |
213 | |
214 | /* Set ring buffer size in dwords */ |
215 | rb_bufsz = order_base_2(ring->ring_size / 4); |
216 | rb_cntl = rb_bufsz << 1; |
217 | #ifdef __BIG_ENDIAN |
218 | rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; |
219 | #endif |
220 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); |
221 | |
222 | /* Initialize the ring buffer's read and write pointers */ |
223 | WREG32(DMA_RB_RPTR + reg_offset, 0); |
224 | WREG32(DMA_RB_WPTR + reg_offset, 0); |
225 | |
226 | /* set the wb address whether it's enabled or not */ |
227 | WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, |
228 | upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); |
229 | WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, |
230 | ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); |
231 | |
232 | if (rdev->wb.enabled) |
233 | rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; |
234 | |
235 | WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); |
236 | |
237 | /* enable DMA IBs */ |
238 | ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; |
239 | #ifdef __BIG_ENDIAN |
240 | ib_cntl |= DMA_IB_SWAP_ENABLE; |
241 | #endif |
242 | WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); |
243 | |
244 | dma_cntl = RREG32(DMA_CNTL + reg_offset); |
245 | dma_cntl &= ~CTXEMPTY_INT_ENABLE; |
246 | WREG32(DMA_CNTL + reg_offset, dma_cntl); |
247 | |
248 | ring->wptr = 0; |
249 | WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); |
250 | |
251 | WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); |
252 | |
253 | ring->ready = true; |
254 | |
255 | r = radeon_ring_test(rdev, ring->idx, ring); |
256 | if (r) { |
257 | ring->ready = false; |
258 | return r; |
259 | } |
260 | } |
261 | |
262 | if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || |
263 | (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) |
264 | radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); |
265 | |
266 | return 0; |
267 | } |
268 | |
269 | /** |
270 | * cayman_dma_fini - tear down the async dma engines |
271 | * |
272 | * @rdev: radeon_device pointer |
273 | * |
274 | * Stop the async dma engines and free the rings (cayman-SI). |
275 | */ |
276 | void cayman_dma_fini(struct radeon_device *rdev) |
277 | { |
278 | cayman_dma_stop(rdev); |
279 | radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); |
280 | radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); |
281 | } |
282 | |
283 | /** |
284 | * cayman_dma_is_lockup - Check if the DMA engine is locked up |
285 | * |
286 | * @rdev: radeon_device pointer |
287 | * @ring: radeon_ring structure holding ring information |
288 | * |
289 | * Check if the async DMA engine is locked up. |
290 | * Returns true if the engine appears to be locked up, false if not. |
291 | */ |
292 | bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) |
293 | { |
294 | u32 reset_mask = cayman_gpu_check_soft_reset(rdev); |
295 | u32 mask; |
296 | |
297 | if (ring->idx == R600_RING_TYPE_DMA_INDEX) |
298 | mask = RADEON_RESET_DMA; |
299 | else |
300 | mask = RADEON_RESET_DMA1; |
301 | |
302 | if (!(reset_mask & mask)) { |
303 | radeon_ring_lockup_update(rdev, ring); |
304 | return false; |
305 | } |
306 | return radeon_ring_test_lockup(rdev, ring); |
307 | } |
308 | |
309 | /** |
310 | * cayman_dma_vm_set_page - update the page tables using the DMA |
311 | * |
312 | * @rdev: radeon_device pointer |
313 | * @ib: indirect buffer to fill with commands |
314 | * @pe: addr of the page entry |
315 | * @addr: dst addr to write into pe |
316 | * @count: number of page entries to update |
317 | * @incr: increase next addr by incr bytes |
318 | * @flags: hw access flags |
319 | * |
320 | * Update the page tables using the DMA (cayman/TN). |
321 | */ |
322 | void cayman_dma_vm_set_page(struct radeon_device *rdev, |
323 | struct radeon_ib *ib, |
324 | uint64_t pe, |
325 | uint64_t addr, unsigned count, |
326 | uint32_t incr, uint32_t flags) |
327 | { |
328 | uint64_t value; |
329 | unsigned ndw; |
330 | |
331 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
332 | |
333 | if ((flags & R600_PTE_SYSTEM) || (count == 1)) { |
334 | while (count) { |
335 | ndw = count * 2; |
336 | if (ndw > 0xFFFFE) |
337 | ndw = 0xFFFFE; |
338 | |
339 | /* for non-physically contiguous pages (system) */ |
340 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); |
341 | ib->ptr[ib->length_dw++] = pe; |
342 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; |
343 | for (; ndw > 0; ndw -= 2, --count, pe += 8) { |
344 | if (flags & R600_PTE_SYSTEM) { |
345 | value = radeon_vm_map_gart(rdev, addr); |
346 | value &= 0xFFFFFFFFFFFFF000ULL; |
347 | } else if (flags & R600_PTE_VALID) { |
348 | value = addr; |
349 | } else { |
350 | value = 0; |
351 | } |
352 | addr += incr; |
353 | value |= flags; |
354 | ib->ptr[ib->length_dw++] = value; |
355 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
356 | } |
357 | } |
358 | } else { |
359 | while (count) { |
360 | ndw = count * 2; |
361 | if (ndw > 0xFFFFE) |
362 | ndw = 0xFFFFE; |
363 | |
364 | if (flags & R600_PTE_VALID) |
365 | value = addr; |
366 | else |
367 | value = 0; |
368 | /* for physically contiguous pages (vram) */ |
369 | ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); |
370 | ib->ptr[ib->length_dw++] = pe; /* dst addr */ |
371 | ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; |
372 | ib->ptr[ib->length_dw++] = flags; /* mask */ |
373 | ib->ptr[ib->length_dw++] = 0; |
374 | ib->ptr[ib->length_dw++] = value; /* value */ |
375 | ib->ptr[ib->length_dw++] = upper_32_bits(value); |
376 | ib->ptr[ib->length_dw++] = incr; /* increment size */ |
377 | ib->ptr[ib->length_dw++] = 0; |
378 | pe += ndw * 4; |
379 | addr += (ndw / 2) * incr; |
380 | count -= ndw / 2; |
381 | } |
382 | } |
383 | while (ib->length_dw & 0x7) |
384 | ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); |
385 | } |
386 | |
387 | void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) |
388 | { |
389 | struct radeon_ring *ring = &rdev->ring[ridx]; |
390 | |
391 | if (vm == NULL) |
392 | return; |
393 | |
394 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
395 | radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); |
396 | radeon_ring_write(ring, vm->pd_gpu_addr >> 12); |
397 | |
398 | /* flush hdp cache */ |
399 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
400 | radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); |
401 | radeon_ring_write(ring, 1); |
402 | |
403 | /* bits 0-7 are the VM contexts0-7 */ |
404 | radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); |
405 | radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); |
406 | radeon_ring_write(ring, 1 << vm->id); |
407 | } |
408 | |
409 | |