1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <linux/slab.h>
26#include <linux/module.h>
27#include "drmP.h"
28#include "radeon.h"
29#include "radeon_asic.h"
30#include "cikd.h"
31#include "atom.h"
32#include "cik_blit_shaders.h"
33#include "radeon_ucode.h"
34#include "clearstate_ci.h"
35
36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61MODULE_FIRMWARE("radeon/KABINI_me.bin");
62MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
67MODULE_FIRMWARE("radeon/MULLINS_me.bin");
68MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
69MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
70MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
71MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
72
73extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74extern void r600_ih_ring_fini(struct radeon_device *rdev);
75extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77extern bool evergreen_is_display_hung(struct radeon_device *rdev);
78extern void sumo_rlc_fini(struct radeon_device *rdev);
79extern int sumo_rlc_init(struct radeon_device *rdev);
80extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
81extern void si_rlc_reset(struct radeon_device *rdev);
82extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
83extern int cik_sdma_resume(struct radeon_device *rdev);
84extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
85extern void cik_sdma_fini(struct radeon_device *rdev);
86extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
87static void cik_rlc_stop(struct radeon_device *rdev);
88static void cik_pcie_gen3_enable(struct radeon_device *rdev);
89static void cik_program_aspm(struct radeon_device *rdev);
90static void cik_init_pg(struct radeon_device *rdev);
91static void cik_init_cg(struct radeon_device *rdev);
92static void cik_fini_pg(struct radeon_device *rdev);
93static void cik_fini_cg(struct radeon_device *rdev);
94static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
95 bool enable);
96
97/* get temperature in millidegrees */
98int ci_get_temp(struct radeon_device *rdev)
99{
100 u32 temp;
101 int actual_temp = 0;
102
103 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
104 CTF_TEMP_SHIFT;
105
106 if (temp & 0x200)
107 actual_temp = 255;
108 else
109 actual_temp = temp & 0x1ff;
110
111 actual_temp = actual_temp * 1000;
112
113 return actual_temp;
114}
115
116/* get temperature in millidegrees */
117int kv_get_temp(struct radeon_device *rdev)
118{
119 u32 temp;
120 int actual_temp = 0;
121
122 temp = RREG32_SMC(0xC0300E0C);
123
124 if (temp)
125 actual_temp = (temp / 8) - 49;
126 else
127 actual_temp = 0;
128
129 actual_temp = actual_temp * 1000;
130
131 return actual_temp;
132}
133
134/*
135 * Indirect registers accessor
136 */
137u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
138{
139 unsigned long flags;
140 u32 r;
141
142 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
143 WREG32(PCIE_INDEX, reg);
144 (void)RREG32(PCIE_INDEX);
145 r = RREG32(PCIE_DATA);
146 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
147 return r;
148}
149
150void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
151{
152 unsigned long flags;
153
154 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
155 WREG32(PCIE_INDEX, reg);
156 (void)RREG32(PCIE_INDEX);
157 WREG32(PCIE_DATA, v);
158 (void)RREG32(PCIE_DATA);
159 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
160}
161
162static const u32 spectre_rlc_save_restore_register_list[] =
163{
164 (0x0e00 << 16) | (0xc12c >> 2),
165 0x00000000,
166 (0x0e00 << 16) | (0xc140 >> 2),
167 0x00000000,
168 (0x0e00 << 16) | (0xc150 >> 2),
169 0x00000000,
170 (0x0e00 << 16) | (0xc15c >> 2),
171 0x00000000,
172 (0x0e00 << 16) | (0xc168 >> 2),
173 0x00000000,
174 (0x0e00 << 16) | (0xc170 >> 2),
175 0x00000000,
176 (0x0e00 << 16) | (0xc178 >> 2),
177 0x00000000,
178 (0x0e00 << 16) | (0xc204 >> 2),
179 0x00000000,
180 (0x0e00 << 16) | (0xc2b4 >> 2),
181 0x00000000,
182 (0x0e00 << 16) | (0xc2b8 >> 2),
183 0x00000000,
184 (0x0e00 << 16) | (0xc2bc >> 2),
185 0x00000000,
186 (0x0e00 << 16) | (0xc2c0 >> 2),
187 0x00000000,
188 (0x0e00 << 16) | (0x8228 >> 2),
189 0x00000000,
190 (0x0e00 << 16) | (0x829c >> 2),
191 0x00000000,
192 (0x0e00 << 16) | (0x869c >> 2),
193 0x00000000,
194 (0x0600 << 16) | (0x98f4 >> 2),
195 0x00000000,
196 (0x0e00 << 16) | (0x98f8 >> 2),
197 0x00000000,
198 (0x0e00 << 16) | (0x9900 >> 2),
199 0x00000000,
200 (0x0e00 << 16) | (0xc260 >> 2),
201 0x00000000,
202 (0x0e00 << 16) | (0x90e8 >> 2),
203 0x00000000,
204 (0x0e00 << 16) | (0x3c000 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0x3c00c >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0x8c1c >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0x9700 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xcd20 >> 2),
213 0x00000000,
214 (0x4e00 << 16) | (0xcd20 >> 2),
215 0x00000000,
216 (0x5e00 << 16) | (0xcd20 >> 2),
217 0x00000000,
218 (0x6e00 << 16) | (0xcd20 >> 2),
219 0x00000000,
220 (0x7e00 << 16) | (0xcd20 >> 2),
221 0x00000000,
222 (0x8e00 << 16) | (0xcd20 >> 2),
223 0x00000000,
224 (0x9e00 << 16) | (0xcd20 >> 2),
225 0x00000000,
226 (0xae00 << 16) | (0xcd20 >> 2),
227 0x00000000,
228 (0xbe00 << 16) | (0xcd20 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0x89bc >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0x8900 >> 2),
233 0x00000000,
234 0x3,
235 (0x0e00 << 16) | (0xc130 >> 2),
236 0x00000000,
237 (0x0e00 << 16) | (0xc134 >> 2),
238 0x00000000,
239 (0x0e00 << 16) | (0xc1fc >> 2),
240 0x00000000,
241 (0x0e00 << 16) | (0xc208 >> 2),
242 0x00000000,
243 (0x0e00 << 16) | (0xc264 >> 2),
244 0x00000000,
245 (0x0e00 << 16) | (0xc268 >> 2),
246 0x00000000,
247 (0x0e00 << 16) | (0xc26c >> 2),
248 0x00000000,
249 (0x0e00 << 16) | (0xc270 >> 2),
250 0x00000000,
251 (0x0e00 << 16) | (0xc274 >> 2),
252 0x00000000,
253 (0x0e00 << 16) | (0xc278 >> 2),
254 0x00000000,
255 (0x0e00 << 16) | (0xc27c >> 2),
256 0x00000000,
257 (0x0e00 << 16) | (0xc280 >> 2),
258 0x00000000,
259 (0x0e00 << 16) | (0xc284 >> 2),
260 0x00000000,
261 (0x0e00 << 16) | (0xc288 >> 2),
262 0x00000000,
263 (0x0e00 << 16) | (0xc28c >> 2),
264 0x00000000,
265 (0x0e00 << 16) | (0xc290 >> 2),
266 0x00000000,
267 (0x0e00 << 16) | (0xc294 >> 2),
268 0x00000000,
269 (0x0e00 << 16) | (0xc298 >> 2),
270 0x00000000,
271 (0x0e00 << 16) | (0xc29c >> 2),
272 0x00000000,
273 (0x0e00 << 16) | (0xc2a0 >> 2),
274 0x00000000,
275 (0x0e00 << 16) | (0xc2a4 >> 2),
276 0x00000000,
277 (0x0e00 << 16) | (0xc2a8 >> 2),
278 0x00000000,
279 (0x0e00 << 16) | (0xc2ac >> 2),
280 0x00000000,
281 (0x0e00 << 16) | (0xc2b0 >> 2),
282 0x00000000,
283 (0x0e00 << 16) | (0x301d0 >> 2),
284 0x00000000,
285 (0x0e00 << 16) | (0x30238 >> 2),
286 0x00000000,
287 (0x0e00 << 16) | (0x30250 >> 2),
288 0x00000000,
289 (0x0e00 << 16) | (0x30254 >> 2),
290 0x00000000,
291 (0x0e00 << 16) | (0x30258 >> 2),
292 0x00000000,
293 (0x0e00 << 16) | (0x3025c >> 2),
294 0x00000000,
295 (0x4e00 << 16) | (0xc900 >> 2),
296 0x00000000,
297 (0x5e00 << 16) | (0xc900 >> 2),
298 0x00000000,
299 (0x6e00 << 16) | (0xc900 >> 2),
300 0x00000000,
301 (0x7e00 << 16) | (0xc900 >> 2),
302 0x00000000,
303 (0x8e00 << 16) | (0xc900 >> 2),
304 0x00000000,
305 (0x9e00 << 16) | (0xc900 >> 2),
306 0x00000000,
307 (0xae00 << 16) | (0xc900 >> 2),
308 0x00000000,
309 (0xbe00 << 16) | (0xc900 >> 2),
310 0x00000000,
311 (0x4e00 << 16) | (0xc904 >> 2),
312 0x00000000,
313 (0x5e00 << 16) | (0xc904 >> 2),
314 0x00000000,
315 (0x6e00 << 16) | (0xc904 >> 2),
316 0x00000000,
317 (0x7e00 << 16) | (0xc904 >> 2),
318 0x00000000,
319 (0x8e00 << 16) | (0xc904 >> 2),
320 0x00000000,
321 (0x9e00 << 16) | (0xc904 >> 2),
322 0x00000000,
323 (0xae00 << 16) | (0xc904 >> 2),
324 0x00000000,
325 (0xbe00 << 16) | (0xc904 >> 2),
326 0x00000000,
327 (0x4e00 << 16) | (0xc908 >> 2),
328 0x00000000,
329 (0x5e00 << 16) | (0xc908 >> 2),
330 0x00000000,
331 (0x6e00 << 16) | (0xc908 >> 2),
332 0x00000000,
333 (0x7e00 << 16) | (0xc908 >> 2),
334 0x00000000,
335 (0x8e00 << 16) | (0xc908 >> 2),
336 0x00000000,
337 (0x9e00 << 16) | (0xc908 >> 2),
338 0x00000000,
339 (0xae00 << 16) | (0xc908 >> 2),
340 0x00000000,
341 (0xbe00 << 16) | (0xc908 >> 2),
342 0x00000000,
343 (0x4e00 << 16) | (0xc90c >> 2),
344 0x00000000,
345 (0x5e00 << 16) | (0xc90c >> 2),
346 0x00000000,
347 (0x6e00 << 16) | (0xc90c >> 2),
348 0x00000000,
349 (0x7e00 << 16) | (0xc90c >> 2),
350 0x00000000,
351 (0x8e00 << 16) | (0xc90c >> 2),
352 0x00000000,
353 (0x9e00 << 16) | (0xc90c >> 2),
354 0x00000000,
355 (0xae00 << 16) | (0xc90c >> 2),
356 0x00000000,
357 (0xbe00 << 16) | (0xc90c >> 2),
358 0x00000000,
359 (0x4e00 << 16) | (0xc910 >> 2),
360 0x00000000,
361 (0x5e00 << 16) | (0xc910 >> 2),
362 0x00000000,
363 (0x6e00 << 16) | (0xc910 >> 2),
364 0x00000000,
365 (0x7e00 << 16) | (0xc910 >> 2),
366 0x00000000,
367 (0x8e00 << 16) | (0xc910 >> 2),
368 0x00000000,
369 (0x9e00 << 16) | (0xc910 >> 2),
370 0x00000000,
371 (0xae00 << 16) | (0xc910 >> 2),
372 0x00000000,
373 (0xbe00 << 16) | (0xc910 >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0xc99c >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0x9834 >> 2),
378 0x00000000,
379 (0x0000 << 16) | (0x30f00 >> 2),
380 0x00000000,
381 (0x0001 << 16) | (0x30f00 >> 2),
382 0x00000000,
383 (0x0000 << 16) | (0x30f04 >> 2),
384 0x00000000,
385 (0x0001 << 16) | (0x30f04 >> 2),
386 0x00000000,
387 (0x0000 << 16) | (0x30f08 >> 2),
388 0x00000000,
389 (0x0001 << 16) | (0x30f08 >> 2),
390 0x00000000,
391 (0x0000 << 16) | (0x30f0c >> 2),
392 0x00000000,
393 (0x0001 << 16) | (0x30f0c >> 2),
394 0x00000000,
395 (0x0600 << 16) | (0x9b7c >> 2),
396 0x00000000,
397 (0x0e00 << 16) | (0x8a14 >> 2),
398 0x00000000,
399 (0x0e00 << 16) | (0x8a18 >> 2),
400 0x00000000,
401 (0x0600 << 16) | (0x30a00 >> 2),
402 0x00000000,
403 (0x0e00 << 16) | (0x8bf0 >> 2),
404 0x00000000,
405 (0x0e00 << 16) | (0x8bcc >> 2),
406 0x00000000,
407 (0x0e00 << 16) | (0x8b24 >> 2),
408 0x00000000,
409 (0x0e00 << 16) | (0x30a04 >> 2),
410 0x00000000,
411 (0x0600 << 16) | (0x30a10 >> 2),
412 0x00000000,
413 (0x0600 << 16) | (0x30a14 >> 2),
414 0x00000000,
415 (0x0600 << 16) | (0x30a18 >> 2),
416 0x00000000,
417 (0x0600 << 16) | (0x30a2c >> 2),
418 0x00000000,
419 (0x0e00 << 16) | (0xc700 >> 2),
420 0x00000000,
421 (0x0e00 << 16) | (0xc704 >> 2),
422 0x00000000,
423 (0x0e00 << 16) | (0xc708 >> 2),
424 0x00000000,
425 (0x0e00 << 16) | (0xc768 >> 2),
426 0x00000000,
427 (0x0400 << 16) | (0xc770 >> 2),
428 0x00000000,
429 (0x0400 << 16) | (0xc774 >> 2),
430 0x00000000,
431 (0x0400 << 16) | (0xc778 >> 2),
432 0x00000000,
433 (0x0400 << 16) | (0xc77c >> 2),
434 0x00000000,
435 (0x0400 << 16) | (0xc780 >> 2),
436 0x00000000,
437 (0x0400 << 16) | (0xc784 >> 2),
438 0x00000000,
439 (0x0400 << 16) | (0xc788 >> 2),
440 0x00000000,
441 (0x0400 << 16) | (0xc78c >> 2),
442 0x00000000,
443 (0x0400 << 16) | (0xc798 >> 2),
444 0x00000000,
445 (0x0400 << 16) | (0xc79c >> 2),
446 0x00000000,
447 (0x0400 << 16) | (0xc7a0 >> 2),
448 0x00000000,
449 (0x0400 << 16) | (0xc7a4 >> 2),
450 0x00000000,
451 (0x0400 << 16) | (0xc7a8 >> 2),
452 0x00000000,
453 (0x0400 << 16) | (0xc7ac >> 2),
454 0x00000000,
455 (0x0400 << 16) | (0xc7b0 >> 2),
456 0x00000000,
457 (0x0400 << 16) | (0xc7b4 >> 2),
458 0x00000000,
459 (0x0e00 << 16) | (0x9100 >> 2),
460 0x00000000,
461 (0x0e00 << 16) | (0x3c010 >> 2),
462 0x00000000,
463 (0x0e00 << 16) | (0x92a8 >> 2),
464 0x00000000,
465 (0x0e00 << 16) | (0x92ac >> 2),
466 0x00000000,
467 (0x0e00 << 16) | (0x92b4 >> 2),
468 0x00000000,
469 (0x0e00 << 16) | (0x92b8 >> 2),
470 0x00000000,
471 (0x0e00 << 16) | (0x92bc >> 2),
472 0x00000000,
473 (0x0e00 << 16) | (0x92c0 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0x92c4 >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0x92c8 >> 2),
478 0x00000000,
479 (0x0e00 << 16) | (0x92cc >> 2),
480 0x00000000,
481 (0x0e00 << 16) | (0x92d0 >> 2),
482 0x00000000,
483 (0x0e00 << 16) | (0x8c00 >> 2),
484 0x00000000,
485 (0x0e00 << 16) | (0x8c04 >> 2),
486 0x00000000,
487 (0x0e00 << 16) | (0x8c20 >> 2),
488 0x00000000,
489 (0x0e00 << 16) | (0x8c38 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0x8c3c >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0xae00 >> 2),
494 0x00000000,
495 (0x0e00 << 16) | (0x9604 >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0xac08 >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0xac0c >> 2),
500 0x00000000,
501 (0x0e00 << 16) | (0xac10 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0xac14 >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0xac58 >> 2),
506 0x00000000,
507 (0x0e00 << 16) | (0xac68 >> 2),
508 0x00000000,
509 (0x0e00 << 16) | (0xac6c >> 2),
510 0x00000000,
511 (0x0e00 << 16) | (0xac70 >> 2),
512 0x00000000,
513 (0x0e00 << 16) | (0xac74 >> 2),
514 0x00000000,
515 (0x0e00 << 16) | (0xac78 >> 2),
516 0x00000000,
517 (0x0e00 << 16) | (0xac7c >> 2),
518 0x00000000,
519 (0x0e00 << 16) | (0xac80 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0xac84 >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0xac88 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0xac8c >> 2),
526 0x00000000,
527 (0x0e00 << 16) | (0x970c >> 2),
528 0x00000000,
529 (0x0e00 << 16) | (0x9714 >> 2),
530 0x00000000,
531 (0x0e00 << 16) | (0x9718 >> 2),
532 0x00000000,
533 (0x0e00 << 16) | (0x971c >> 2),
534 0x00000000,
535 (0x0e00 << 16) | (0x31068 >> 2),
536 0x00000000,
537 (0x4e00 << 16) | (0x31068 >> 2),
538 0x00000000,
539 (0x5e00 << 16) | (0x31068 >> 2),
540 0x00000000,
541 (0x6e00 << 16) | (0x31068 >> 2),
542 0x00000000,
543 (0x7e00 << 16) | (0x31068 >> 2),
544 0x00000000,
545 (0x8e00 << 16) | (0x31068 >> 2),
546 0x00000000,
547 (0x9e00 << 16) | (0x31068 >> 2),
548 0x00000000,
549 (0xae00 << 16) | (0x31068 >> 2),
550 0x00000000,
551 (0xbe00 << 16) | (0x31068 >> 2),
552 0x00000000,
553 (0x0e00 << 16) | (0xcd10 >> 2),
554 0x00000000,
555 (0x0e00 << 16) | (0xcd14 >> 2),
556 0x00000000,
557 (0x0e00 << 16) | (0x88b0 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x88b4 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x88b8 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x88bc >> 2),
564 0x00000000,
565 (0x0400 << 16) | (0x89c0 >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x88c4 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x88c8 >> 2),
570 0x00000000,
571 (0x0e00 << 16) | (0x88d0 >> 2),
572 0x00000000,
573 (0x0e00 << 16) | (0x88d4 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x88d8 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0x8980 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0x30938 >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0x3093c >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0x30940 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0x89a0 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0x30900 >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0x30904 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0x89b4 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0x3c210 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0x3c214 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0x3c218 >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0x8904 >> 2),
600 0x00000000,
601 0x5,
602 (0x0e00 << 16) | (0x8c28 >> 2),
603 (0x0e00 << 16) | (0x8c2c >> 2),
604 (0x0e00 << 16) | (0x8c30 >> 2),
605 (0x0e00 << 16) | (0x8c34 >> 2),
606 (0x0e00 << 16) | (0x9600 >> 2),
607};
608
609static const u32 kalindi_rlc_save_restore_register_list[] =
610{
611 (0x0e00 << 16) | (0xc12c >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xc140 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xc150 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xc15c >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xc168 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xc170 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xc204 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xc2b4 >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xc2b8 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xc2bc >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xc2c0 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0x8228 >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0x829c >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0x869c >> 2),
638 0x00000000,
639 (0x0600 << 16) | (0x98f4 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0x98f8 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x9900 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc260 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0x90e8 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0x3c000 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0x3c00c >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0x8c1c >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0x9700 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0xcd20 >> 2),
658 0x00000000,
659 (0x4e00 << 16) | (0xcd20 >> 2),
660 0x00000000,
661 (0x5e00 << 16) | (0xcd20 >> 2),
662 0x00000000,
663 (0x6e00 << 16) | (0xcd20 >> 2),
664 0x00000000,
665 (0x7e00 << 16) | (0xcd20 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x89bc >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0x8900 >> 2),
670 0x00000000,
671 0x3,
672 (0x0e00 << 16) | (0xc130 >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0xc134 >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0xc1fc >> 2),
677 0x00000000,
678 (0x0e00 << 16) | (0xc208 >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0xc264 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0xc268 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0xc26c >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0xc270 >> 2),
687 0x00000000,
688 (0x0e00 << 16) | (0xc274 >> 2),
689 0x00000000,
690 (0x0e00 << 16) | (0xc28c >> 2),
691 0x00000000,
692 (0x0e00 << 16) | (0xc290 >> 2),
693 0x00000000,
694 (0x0e00 << 16) | (0xc294 >> 2),
695 0x00000000,
696 (0x0e00 << 16) | (0xc298 >> 2),
697 0x00000000,
698 (0x0e00 << 16) | (0xc2a0 >> 2),
699 0x00000000,
700 (0x0e00 << 16) | (0xc2a4 >> 2),
701 0x00000000,
702 (0x0e00 << 16) | (0xc2a8 >> 2),
703 0x00000000,
704 (0x0e00 << 16) | (0xc2ac >> 2),
705 0x00000000,
706 (0x0e00 << 16) | (0x301d0 >> 2),
707 0x00000000,
708 (0x0e00 << 16) | (0x30238 >> 2),
709 0x00000000,
710 (0x0e00 << 16) | (0x30250 >> 2),
711 0x00000000,
712 (0x0e00 << 16) | (0x30254 >> 2),
713 0x00000000,
714 (0x0e00 << 16) | (0x30258 >> 2),
715 0x00000000,
716 (0x0e00 << 16) | (0x3025c >> 2),
717 0x00000000,
718 (0x4e00 << 16) | (0xc900 >> 2),
719 0x00000000,
720 (0x5e00 << 16) | (0xc900 >> 2),
721 0x00000000,
722 (0x6e00 << 16) | (0xc900 >> 2),
723 0x00000000,
724 (0x7e00 << 16) | (0xc900 >> 2),
725 0x00000000,
726 (0x4e00 << 16) | (0xc904 >> 2),
727 0x00000000,
728 (0x5e00 << 16) | (0xc904 >> 2),
729 0x00000000,
730 (0x6e00 << 16) | (0xc904 >> 2),
731 0x00000000,
732 (0x7e00 << 16) | (0xc904 >> 2),
733 0x00000000,
734 (0x4e00 << 16) | (0xc908 >> 2),
735 0x00000000,
736 (0x5e00 << 16) | (0xc908 >> 2),
737 0x00000000,
738 (0x6e00 << 16) | (0xc908 >> 2),
739 0x00000000,
740 (0x7e00 << 16) | (0xc908 >> 2),
741 0x00000000,
742 (0x4e00 << 16) | (0xc90c >> 2),
743 0x00000000,
744 (0x5e00 << 16) | (0xc90c >> 2),
745 0x00000000,
746 (0x6e00 << 16) | (0xc90c >> 2),
747 0x00000000,
748 (0x7e00 << 16) | (0xc90c >> 2),
749 0x00000000,
750 (0x4e00 << 16) | (0xc910 >> 2),
751 0x00000000,
752 (0x5e00 << 16) | (0xc910 >> 2),
753 0x00000000,
754 (0x6e00 << 16) | (0xc910 >> 2),
755 0x00000000,
756 (0x7e00 << 16) | (0xc910 >> 2),
757 0x00000000,
758 (0x0e00 << 16) | (0xc99c >> 2),
759 0x00000000,
760 (0x0e00 << 16) | (0x9834 >> 2),
761 0x00000000,
762 (0x0000 << 16) | (0x30f00 >> 2),
763 0x00000000,
764 (0x0000 << 16) | (0x30f04 >> 2),
765 0x00000000,
766 (0x0000 << 16) | (0x30f08 >> 2),
767 0x00000000,
768 (0x0000 << 16) | (0x30f0c >> 2),
769 0x00000000,
770 (0x0600 << 16) | (0x9b7c >> 2),
771 0x00000000,
772 (0x0e00 << 16) | (0x8a14 >> 2),
773 0x00000000,
774 (0x0e00 << 16) | (0x8a18 >> 2),
775 0x00000000,
776 (0x0600 << 16) | (0x30a00 >> 2),
777 0x00000000,
778 (0x0e00 << 16) | (0x8bf0 >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0x8bcc >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0x8b24 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0x30a04 >> 2),
785 0x00000000,
786 (0x0600 << 16) | (0x30a10 >> 2),
787 0x00000000,
788 (0x0600 << 16) | (0x30a14 >> 2),
789 0x00000000,
790 (0x0600 << 16) | (0x30a18 >> 2),
791 0x00000000,
792 (0x0600 << 16) | (0x30a2c >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0xc700 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0xc704 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xc708 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xc768 >> 2),
801 0x00000000,
802 (0x0400 << 16) | (0xc770 >> 2),
803 0x00000000,
804 (0x0400 << 16) | (0xc774 >> 2),
805 0x00000000,
806 (0x0400 << 16) | (0xc798 >> 2),
807 0x00000000,
808 (0x0400 << 16) | (0xc79c >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0x9100 >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0x3c010 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0x8c00 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0x8c04 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0x8c20 >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0x8c38 >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0x8c3c >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0xae00 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0x9604 >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0xac08 >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0xac0c >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0xac10 >> 2),
833 0x00000000,
834 (0x0e00 << 16) | (0xac14 >> 2),
835 0x00000000,
836 (0x0e00 << 16) | (0xac58 >> 2),
837 0x00000000,
838 (0x0e00 << 16) | (0xac68 >> 2),
839 0x00000000,
840 (0x0e00 << 16) | (0xac6c >> 2),
841 0x00000000,
842 (0x0e00 << 16) | (0xac70 >> 2),
843 0x00000000,
844 (0x0e00 << 16) | (0xac74 >> 2),
845 0x00000000,
846 (0x0e00 << 16) | (0xac78 >> 2),
847 0x00000000,
848 (0x0e00 << 16) | (0xac7c >> 2),
849 0x00000000,
850 (0x0e00 << 16) | (0xac80 >> 2),
851 0x00000000,
852 (0x0e00 << 16) | (0xac84 >> 2),
853 0x00000000,
854 (0x0e00 << 16) | (0xac88 >> 2),
855 0x00000000,
856 (0x0e00 << 16) | (0xac8c >> 2),
857 0x00000000,
858 (0x0e00 << 16) | (0x970c >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x9714 >> 2),
861 0x00000000,
862 (0x0e00 << 16) | (0x9718 >> 2),
863 0x00000000,
864 (0x0e00 << 16) | (0x971c >> 2),
865 0x00000000,
866 (0x0e00 << 16) | (0x31068 >> 2),
867 0x00000000,
868 (0x4e00 << 16) | (0x31068 >> 2),
869 0x00000000,
870 (0x5e00 << 16) | (0x31068 >> 2),
871 0x00000000,
872 (0x6e00 << 16) | (0x31068 >> 2),
873 0x00000000,
874 (0x7e00 << 16) | (0x31068 >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0xcd10 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0xcd14 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x88b0 >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x88b4 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x88b8 >> 2),
885 0x00000000,
886 (0x0e00 << 16) | (0x88bc >> 2),
887 0x00000000,
888 (0x0400 << 16) | (0x89c0 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x88c4 >> 2),
891 0x00000000,
892 (0x0e00 << 16) | (0x88c8 >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x88d0 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0x88d4 >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0x88d8 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0x8980 >> 2),
901 0x00000000,
902 (0x0e00 << 16) | (0x30938 >> 2),
903 0x00000000,
904 (0x0e00 << 16) | (0x3093c >> 2),
905 0x00000000,
906 (0x0e00 << 16) | (0x30940 >> 2),
907 0x00000000,
908 (0x0e00 << 16) | (0x89a0 >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0x30900 >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0x30904 >> 2),
913 0x00000000,
914 (0x0e00 << 16) | (0x89b4 >> 2),
915 0x00000000,
916 (0x0e00 << 16) | (0x3e1fc >> 2),
917 0x00000000,
918 (0x0e00 << 16) | (0x3c210 >> 2),
919 0x00000000,
920 (0x0e00 << 16) | (0x3c214 >> 2),
921 0x00000000,
922 (0x0e00 << 16) | (0x3c218 >> 2),
923 0x00000000,
924 (0x0e00 << 16) | (0x8904 >> 2),
925 0x00000000,
926 0x5,
927 (0x0e00 << 16) | (0x8c28 >> 2),
928 (0x0e00 << 16) | (0x8c2c >> 2),
929 (0x0e00 << 16) | (0x8c30 >> 2),
930 (0x0e00 << 16) | (0x8c34 >> 2),
931 (0x0e00 << 16) | (0x9600 >> 2),
932};
933
934static const u32 bonaire_golden_spm_registers[] =
935{
936 0x30800, 0xe0ffffff, 0xe0000000
937};
938
939static const u32 bonaire_golden_common_registers[] =
940{
941 0xc770, 0xffffffff, 0x00000800,
942 0xc774, 0xffffffff, 0x00000800,
943 0xc798, 0xffffffff, 0x00007fbf,
944 0xc79c, 0xffffffff, 0x00007faf
945};
946
947static const u32 bonaire_golden_registers[] =
948{
949 0x3354, 0x00000333, 0x00000333,
950 0x3350, 0x000c0fc0, 0x00040200,
951 0x9a10, 0x00010000, 0x00058208,
952 0x3c000, 0xffff1fff, 0x00140000,
953 0x3c200, 0xfdfc0fff, 0x00000100,
954 0x3c234, 0x40000000, 0x40000200,
955 0x9830, 0xffffffff, 0x00000000,
956 0x9834, 0xf00fffff, 0x00000400,
957 0x9838, 0x0002021c, 0x00020200,
958 0xc78, 0x00000080, 0x00000000,
959 0x5bb0, 0x000000f0, 0x00000070,
960 0x5bc0, 0xf0311fff, 0x80300000,
961 0x98f8, 0x73773777, 0x12010001,
962 0x350c, 0x00810000, 0x408af000,
963 0x7030, 0x31000111, 0x00000011,
964 0x2f48, 0x73773777, 0x12010001,
965 0x220c, 0x00007fb6, 0x0021a1b1,
966 0x2210, 0x00007fb6, 0x002021b1,
967 0x2180, 0x00007fb6, 0x00002191,
968 0x2218, 0x00007fb6, 0x002121b1,
969 0x221c, 0x00007fb6, 0x002021b1,
970 0x21dc, 0x00007fb6, 0x00002191,
971 0x21e0, 0x00007fb6, 0x00002191,
972 0x3628, 0x0000003f, 0x0000000a,
973 0x362c, 0x0000003f, 0x0000000a,
974 0x2ae4, 0x00073ffe, 0x000022a2,
975 0x240c, 0x000007ff, 0x00000000,
976 0x8a14, 0xf000003f, 0x00000007,
977 0x8bf0, 0x00002001, 0x00000001,
978 0x8b24, 0xffffffff, 0x00ffffff,
979 0x30a04, 0x0000ff0f, 0x00000000,
980 0x28a4c, 0x07ffffff, 0x06000000,
981 0x4d8, 0x00000fff, 0x00000100,
982 0x3e78, 0x00000001, 0x00000002,
983 0x9100, 0x03000000, 0x0362c688,
984 0x8c00, 0x000000ff, 0x00000001,
985 0xe40, 0x00001fff, 0x00001fff,
986 0x9060, 0x0000007f, 0x00000020,
987 0x9508, 0x00010000, 0x00010000,
988 0xac14, 0x000003ff, 0x000000f3,
989 0xac0c, 0xffffffff, 0x00001032
990};
991
992static const u32 bonaire_mgcg_cgcg_init[] =
993{
994 0xc420, 0xffffffff, 0xfffffffc,
995 0x30800, 0xffffffff, 0xe0000000,
996 0x3c2a0, 0xffffffff, 0x00000100,
997 0x3c208, 0xffffffff, 0x00000100,
998 0x3c2c0, 0xffffffff, 0xc0000100,
999 0x3c2c8, 0xffffffff, 0xc0000100,
1000 0x3c2c4, 0xffffffff, 0xc0000100,
1001 0x55e4, 0xffffffff, 0x00600100,
1002 0x3c280, 0xffffffff, 0x00000100,
1003 0x3c214, 0xffffffff, 0x06000100,
1004 0x3c220, 0xffffffff, 0x00000100,
1005 0x3c218, 0xffffffff, 0x06000100,
1006 0x3c204, 0xffffffff, 0x00000100,
1007 0x3c2e0, 0xffffffff, 0x00000100,
1008 0x3c224, 0xffffffff, 0x00000100,
1009 0x3c200, 0xffffffff, 0x00000100,
1010 0x3c230, 0xffffffff, 0x00000100,
1011 0x3c234, 0xffffffff, 0x00000100,
1012 0x3c250, 0xffffffff, 0x00000100,
1013 0x3c254, 0xffffffff, 0x00000100,
1014 0x3c258, 0xffffffff, 0x00000100,
1015 0x3c25c, 0xffffffff, 0x00000100,
1016 0x3c260, 0xffffffff, 0x00000100,
1017 0x3c27c, 0xffffffff, 0x00000100,
1018 0x3c278, 0xffffffff, 0x00000100,
1019 0x3c210, 0xffffffff, 0x06000100,
1020 0x3c290, 0xffffffff, 0x00000100,
1021 0x3c274, 0xffffffff, 0x00000100,
1022 0x3c2b4, 0xffffffff, 0x00000100,
1023 0x3c2b0, 0xffffffff, 0x00000100,
1024 0x3c270, 0xffffffff, 0x00000100,
1025 0x30800, 0xffffffff, 0xe0000000,
1026 0x3c020, 0xffffffff, 0x00010000,
1027 0x3c024, 0xffffffff, 0x00030002,
1028 0x3c028, 0xffffffff, 0x00040007,
1029 0x3c02c, 0xffffffff, 0x00060005,
1030 0x3c030, 0xffffffff, 0x00090008,
1031 0x3c034, 0xffffffff, 0x00010000,
1032 0x3c038, 0xffffffff, 0x00030002,
1033 0x3c03c, 0xffffffff, 0x00040007,
1034 0x3c040, 0xffffffff, 0x00060005,
1035 0x3c044, 0xffffffff, 0x00090008,
1036 0x3c048, 0xffffffff, 0x00010000,
1037 0x3c04c, 0xffffffff, 0x00030002,
1038 0x3c050, 0xffffffff, 0x00040007,
1039 0x3c054, 0xffffffff, 0x00060005,
1040 0x3c058, 0xffffffff, 0x00090008,
1041 0x3c05c, 0xffffffff, 0x00010000,
1042 0x3c060, 0xffffffff, 0x00030002,
1043 0x3c064, 0xffffffff, 0x00040007,
1044 0x3c068, 0xffffffff, 0x00060005,
1045 0x3c06c, 0xffffffff, 0x00090008,
1046 0x3c070, 0xffffffff, 0x00010000,
1047 0x3c074, 0xffffffff, 0x00030002,
1048 0x3c078, 0xffffffff, 0x00040007,
1049 0x3c07c, 0xffffffff, 0x00060005,
1050 0x3c080, 0xffffffff, 0x00090008,
1051 0x3c084, 0xffffffff, 0x00010000,
1052 0x3c088, 0xffffffff, 0x00030002,
1053 0x3c08c, 0xffffffff, 0x00040007,
1054 0x3c090, 0xffffffff, 0x00060005,
1055 0x3c094, 0xffffffff, 0x00090008,
1056 0x3c098, 0xffffffff, 0x00010000,
1057 0x3c09c, 0xffffffff, 0x00030002,
1058 0x3c0a0, 0xffffffff, 0x00040007,
1059 0x3c0a4, 0xffffffff, 0x00060005,
1060 0x3c0a8, 0xffffffff, 0x00090008,
1061 0x3c000, 0xffffffff, 0x96e00200,
1062 0x8708, 0xffffffff, 0x00900100,
1063 0xc424, 0xffffffff, 0x0020003f,
1064 0x38, 0xffffffff, 0x0140001c,
1065 0x3c, 0x000f0000, 0x000f0000,
1066 0x220, 0xffffffff, 0xC060000C,
1067 0x224, 0xc0000fff, 0x00000100,
1068 0xf90, 0xffffffff, 0x00000100,
1069 0xf98, 0x00000101, 0x00000000,
1070 0x20a8, 0xffffffff, 0x00000104,
1071 0x55e4, 0xff000fff, 0x00000100,
1072 0x30cc, 0xc0000fff, 0x00000104,
1073 0xc1e4, 0x00000001, 0x00000001,
1074 0xd00c, 0xff000ff0, 0x00000100,
1075 0xd80c, 0xff000ff0, 0x00000100
1076};
1077
1078static const u32 spectre_golden_spm_registers[] =
1079{
1080 0x30800, 0xe0ffffff, 0xe0000000
1081};
1082
1083static const u32 spectre_golden_common_registers[] =
1084{
1085 0xc770, 0xffffffff, 0x00000800,
1086 0xc774, 0xffffffff, 0x00000800,
1087 0xc798, 0xffffffff, 0x00007fbf,
1088 0xc79c, 0xffffffff, 0x00007faf
1089};
1090
1091static const u32 spectre_golden_registers[] =
1092{
1093 0x3c000, 0xffff1fff, 0x96940200,
1094 0x3c00c, 0xffff0001, 0xff000000,
1095 0x3c200, 0xfffc0fff, 0x00000100,
1096 0x6ed8, 0x00010101, 0x00010000,
1097 0x9834, 0xf00fffff, 0x00000400,
1098 0x9838, 0xfffffffc, 0x00020200,
1099 0x5bb0, 0x000000f0, 0x00000070,
1100 0x5bc0, 0xf0311fff, 0x80300000,
1101 0x98f8, 0x73773777, 0x12010001,
1102 0x9b7c, 0x00ff0000, 0x00fc0000,
1103 0x2f48, 0x73773777, 0x12010001,
1104 0x8a14, 0xf000003f, 0x00000007,
1105 0x8b24, 0xffffffff, 0x00ffffff,
1106 0x28350, 0x3f3f3fff, 0x00000082,
1107 0x28354, 0x0000003f, 0x00000000,
1108 0x3e78, 0x00000001, 0x00000002,
1109 0x913c, 0xffff03df, 0x00000004,
1110 0xc768, 0x00000008, 0x00000008,
1111 0x8c00, 0x000008ff, 0x00000800,
1112 0x9508, 0x00010000, 0x00010000,
1113 0xac0c, 0xffffffff, 0x54763210,
1114 0x214f8, 0x01ff01ff, 0x00000002,
1115 0x21498, 0x007ff800, 0x00200000,
1116 0x2015c, 0xffffffff, 0x00000f40,
1117 0x30934, 0xffffffff, 0x00000001
1118};
1119
1120static const u32 spectre_mgcg_cgcg_init[] =
1121{
1122 0xc420, 0xffffffff, 0xfffffffc,
1123 0x30800, 0xffffffff, 0xe0000000,
1124 0x3c2a0, 0xffffffff, 0x00000100,
1125 0x3c208, 0xffffffff, 0x00000100,
1126 0x3c2c0, 0xffffffff, 0x00000100,
1127 0x3c2c8, 0xffffffff, 0x00000100,
1128 0x3c2c4, 0xffffffff, 0x00000100,
1129 0x55e4, 0xffffffff, 0x00600100,
1130 0x3c280, 0xffffffff, 0x00000100,
1131 0x3c214, 0xffffffff, 0x06000100,
1132 0x3c220, 0xffffffff, 0x00000100,
1133 0x3c218, 0xffffffff, 0x06000100,
1134 0x3c204, 0xffffffff, 0x00000100,
1135 0x3c2e0, 0xffffffff, 0x00000100,
1136 0x3c224, 0xffffffff, 0x00000100,
1137 0x3c200, 0xffffffff, 0x00000100,
1138 0x3c230, 0xffffffff, 0x00000100,
1139 0x3c234, 0xffffffff, 0x00000100,
1140 0x3c250, 0xffffffff, 0x00000100,
1141 0x3c254, 0xffffffff, 0x00000100,
1142 0x3c258, 0xffffffff, 0x00000100,
1143 0x3c25c, 0xffffffff, 0x00000100,
1144 0x3c260, 0xffffffff, 0x00000100,
1145 0x3c27c, 0xffffffff, 0x00000100,
1146 0x3c278, 0xffffffff, 0x00000100,
1147 0x3c210, 0xffffffff, 0x06000100,
1148 0x3c290, 0xffffffff, 0x00000100,
1149 0x3c274, 0xffffffff, 0x00000100,
1150 0x3c2b4, 0xffffffff, 0x00000100,
1151 0x3c2b0, 0xffffffff, 0x00000100,
1152 0x3c270, 0xffffffff, 0x00000100,
1153 0x30800, 0xffffffff, 0xe0000000,
1154 0x3c020, 0xffffffff, 0x00010000,
1155 0x3c024, 0xffffffff, 0x00030002,
1156 0x3c028, 0xffffffff, 0x00040007,
1157 0x3c02c, 0xffffffff, 0x00060005,
1158 0x3c030, 0xffffffff, 0x00090008,
1159 0x3c034, 0xffffffff, 0x00010000,
1160 0x3c038, 0xffffffff, 0x00030002,
1161 0x3c03c, 0xffffffff, 0x00040007,
1162 0x3c040, 0xffffffff, 0x00060005,
1163 0x3c044, 0xffffffff, 0x00090008,
1164 0x3c048, 0xffffffff, 0x00010000,
1165 0x3c04c, 0xffffffff, 0x00030002,
1166 0x3c050, 0xffffffff, 0x00040007,
1167 0x3c054, 0xffffffff, 0x00060005,
1168 0x3c058, 0xffffffff, 0x00090008,
1169 0x3c05c, 0xffffffff, 0x00010000,
1170 0x3c060, 0xffffffff, 0x00030002,
1171 0x3c064, 0xffffffff, 0x00040007,
1172 0x3c068, 0xffffffff, 0x00060005,
1173 0x3c06c, 0xffffffff, 0x00090008,
1174 0x3c070, 0xffffffff, 0x00010000,
1175 0x3c074, 0xffffffff, 0x00030002,
1176 0x3c078, 0xffffffff, 0x00040007,
1177 0x3c07c, 0xffffffff, 0x00060005,
1178 0x3c080, 0xffffffff, 0x00090008,
1179 0x3c084, 0xffffffff, 0x00010000,
1180 0x3c088, 0xffffffff, 0x00030002,
1181 0x3c08c, 0xffffffff, 0x00040007,
1182 0x3c090, 0xffffffff, 0x00060005,
1183 0x3c094, 0xffffffff, 0x00090008,
1184 0x3c098, 0xffffffff, 0x00010000,
1185 0x3c09c, 0xffffffff, 0x00030002,
1186 0x3c0a0, 0xffffffff, 0x00040007,
1187 0x3c0a4, 0xffffffff, 0x00060005,
1188 0x3c0a8, 0xffffffff, 0x00090008,
1189 0x3c0ac, 0xffffffff, 0x00010000,
1190 0x3c0b0, 0xffffffff, 0x00030002,
1191 0x3c0b4, 0xffffffff, 0x00040007,
1192 0x3c0b8, 0xffffffff, 0x00060005,
1193 0x3c0bc, 0xffffffff, 0x00090008,
1194 0x3c000, 0xffffffff, 0x96e00200,
1195 0x8708, 0xffffffff, 0x00900100,
1196 0xc424, 0xffffffff, 0x0020003f,
1197 0x38, 0xffffffff, 0x0140001c,
1198 0x3c, 0x000f0000, 0x000f0000,
1199 0x220, 0xffffffff, 0xC060000C,
1200 0x224, 0xc0000fff, 0x00000100,
1201 0xf90, 0xffffffff, 0x00000100,
1202 0xf98, 0x00000101, 0x00000000,
1203 0x20a8, 0xffffffff, 0x00000104,
1204 0x55e4, 0xff000fff, 0x00000100,
1205 0x30cc, 0xc0000fff, 0x00000104,
1206 0xc1e4, 0x00000001, 0x00000001,
1207 0xd00c, 0xff000ff0, 0x00000100,
1208 0xd80c, 0xff000ff0, 0x00000100
1209};
1210
1211static const u32 kalindi_golden_spm_registers[] =
1212{
1213 0x30800, 0xe0ffffff, 0xe0000000
1214};
1215
1216static const u32 kalindi_golden_common_registers[] =
1217{
1218 0xc770, 0xffffffff, 0x00000800,
1219 0xc774, 0xffffffff, 0x00000800,
1220 0xc798, 0xffffffff, 0x00007fbf,
1221 0xc79c, 0xffffffff, 0x00007faf
1222};
1223
1224static const u32 kalindi_golden_registers[] =
1225{
1226 0x3c000, 0xffffdfff, 0x6e944040,
1227 0x55e4, 0xff607fff, 0xfc000100,
1228 0x3c220, 0xff000fff, 0x00000100,
1229 0x3c224, 0xff000fff, 0x00000100,
1230 0x3c200, 0xfffc0fff, 0x00000100,
1231 0x6ed8, 0x00010101, 0x00010000,
1232 0x9830, 0xffffffff, 0x00000000,
1233 0x9834, 0xf00fffff, 0x00000400,
1234 0x5bb0, 0x000000f0, 0x00000070,
1235 0x5bc0, 0xf0311fff, 0x80300000,
1236 0x98f8, 0x73773777, 0x12010001,
1237 0x98fc, 0xffffffff, 0x00000010,
1238 0x9b7c, 0x00ff0000, 0x00fc0000,
1239 0x8030, 0x00001f0f, 0x0000100a,
1240 0x2f48, 0x73773777, 0x12010001,
1241 0x2408, 0x000fffff, 0x000c007f,
1242 0x8a14, 0xf000003f, 0x00000007,
1243 0x8b24, 0x3fff3fff, 0x00ffcfff,
1244 0x30a04, 0x0000ff0f, 0x00000000,
1245 0x28a4c, 0x07ffffff, 0x06000000,
1246 0x4d8, 0x00000fff, 0x00000100,
1247 0x3e78, 0x00000001, 0x00000002,
1248 0xc768, 0x00000008, 0x00000008,
1249 0x8c00, 0x000000ff, 0x00000003,
1250 0x214f8, 0x01ff01ff, 0x00000002,
1251 0x21498, 0x007ff800, 0x00200000,
1252 0x2015c, 0xffffffff, 0x00000f40,
1253 0x88c4, 0x001f3ae3, 0x00000082,
1254 0x88d4, 0x0000001f, 0x00000010,
1255 0x30934, 0xffffffff, 0x00000000
1256};
1257
1258static const u32 kalindi_mgcg_cgcg_init[] =
1259{
1260 0xc420, 0xffffffff, 0xfffffffc,
1261 0x30800, 0xffffffff, 0xe0000000,
1262 0x3c2a0, 0xffffffff, 0x00000100,
1263 0x3c208, 0xffffffff, 0x00000100,
1264 0x3c2c0, 0xffffffff, 0x00000100,
1265 0x3c2c8, 0xffffffff, 0x00000100,
1266 0x3c2c4, 0xffffffff, 0x00000100,
1267 0x55e4, 0xffffffff, 0x00600100,
1268 0x3c280, 0xffffffff, 0x00000100,
1269 0x3c214, 0xffffffff, 0x06000100,
1270 0x3c220, 0xffffffff, 0x00000100,
1271 0x3c218, 0xffffffff, 0x06000100,
1272 0x3c204, 0xffffffff, 0x00000100,
1273 0x3c2e0, 0xffffffff, 0x00000100,
1274 0x3c224, 0xffffffff, 0x00000100,
1275 0x3c200, 0xffffffff, 0x00000100,
1276 0x3c230, 0xffffffff, 0x00000100,
1277 0x3c234, 0xffffffff, 0x00000100,
1278 0x3c250, 0xffffffff, 0x00000100,
1279 0x3c254, 0xffffffff, 0x00000100,
1280 0x3c258, 0xffffffff, 0x00000100,
1281 0x3c25c, 0xffffffff, 0x00000100,
1282 0x3c260, 0xffffffff, 0x00000100,
1283 0x3c27c, 0xffffffff, 0x00000100,
1284 0x3c278, 0xffffffff, 0x00000100,
1285 0x3c210, 0xffffffff, 0x06000100,
1286 0x3c290, 0xffffffff, 0x00000100,
1287 0x3c274, 0xffffffff, 0x00000100,
1288 0x3c2b4, 0xffffffff, 0x00000100,
1289 0x3c2b0, 0xffffffff, 0x00000100,
1290 0x3c270, 0xffffffff, 0x00000100,
1291 0x30800, 0xffffffff, 0xe0000000,
1292 0x3c020, 0xffffffff, 0x00010000,
1293 0x3c024, 0xffffffff, 0x00030002,
1294 0x3c028, 0xffffffff, 0x00040007,
1295 0x3c02c, 0xffffffff, 0x00060005,
1296 0x3c030, 0xffffffff, 0x00090008,
1297 0x3c034, 0xffffffff, 0x00010000,
1298 0x3c038, 0xffffffff, 0x00030002,
1299 0x3c03c, 0xffffffff, 0x00040007,
1300 0x3c040, 0xffffffff, 0x00060005,
1301 0x3c044, 0xffffffff, 0x00090008,
1302 0x3c000, 0xffffffff, 0x96e00200,
1303 0x8708, 0xffffffff, 0x00900100,
1304 0xc424, 0xffffffff, 0x0020003f,
1305 0x38, 0xffffffff, 0x0140001c,
1306 0x3c, 0x000f0000, 0x000f0000,
1307 0x220, 0xffffffff, 0xC060000C,
1308 0x224, 0xc0000fff, 0x00000100,
1309 0x20a8, 0xffffffff, 0x00000104,
1310 0x55e4, 0xff000fff, 0x00000100,
1311 0x30cc, 0xc0000fff, 0x00000104,
1312 0xc1e4, 0x00000001, 0x00000001,
1313 0xd00c, 0xff000ff0, 0x00000100,
1314 0xd80c, 0xff000ff0, 0x00000100
1315};
1316
1317static const u32 hawaii_golden_spm_registers[] =
1318{
1319 0x30800, 0xe0ffffff, 0xe0000000
1320};
1321
1322static const u32 hawaii_golden_common_registers[] =
1323{
1324 0x30800, 0xffffffff, 0xe0000000,
1325 0x28350, 0xffffffff, 0x3a00161a,
1326 0x28354, 0xffffffff, 0x0000002e,
1327 0x9a10, 0xffffffff, 0x00018208,
1328 0x98f8, 0xffffffff, 0x12011003
1329};
1330
1331static const u32 hawaii_golden_registers[] =
1332{
1333 0x3354, 0x00000333, 0x00000333,
1334 0x9a10, 0x00010000, 0x00058208,
1335 0x9830, 0xffffffff, 0x00000000,
1336 0x9834, 0xf00fffff, 0x00000400,
1337 0x9838, 0x0002021c, 0x00020200,
1338 0xc78, 0x00000080, 0x00000000,
1339 0x5bb0, 0x000000f0, 0x00000070,
1340 0x5bc0, 0xf0311fff, 0x80300000,
1341 0x350c, 0x00810000, 0x408af000,
1342 0x7030, 0x31000111, 0x00000011,
1343 0x2f48, 0x73773777, 0x12010001,
1344 0x2120, 0x0000007f, 0x0000001b,
1345 0x21dc, 0x00007fb6, 0x00002191,
1346 0x3628, 0x0000003f, 0x0000000a,
1347 0x362c, 0x0000003f, 0x0000000a,
1348 0x2ae4, 0x00073ffe, 0x000022a2,
1349 0x240c, 0x000007ff, 0x00000000,
1350 0x8bf0, 0x00002001, 0x00000001,
1351 0x8b24, 0xffffffff, 0x00ffffff,
1352 0x30a04, 0x0000ff0f, 0x00000000,
1353 0x28a4c, 0x07ffffff, 0x06000000,
1354 0x3e78, 0x00000001, 0x00000002,
1355 0xc768, 0x00000008, 0x00000008,
1356 0xc770, 0x00000f00, 0x00000800,
1357 0xc774, 0x00000f00, 0x00000800,
1358 0xc798, 0x00ffffff, 0x00ff7fbf,
1359 0xc79c, 0x00ffffff, 0x00ff7faf,
1360 0x8c00, 0x000000ff, 0x00000800,
1361 0xe40, 0x00001fff, 0x00001fff,
1362 0x9060, 0x0000007f, 0x00000020,
1363 0x9508, 0x00010000, 0x00010000,
1364 0xae00, 0x00100000, 0x000ff07c,
1365 0xac14, 0x000003ff, 0x0000000f,
1366 0xac10, 0xffffffff, 0x7564fdec,
1367 0xac0c, 0xffffffff, 0x3120b9a8,
1368 0xac08, 0x20000000, 0x0f9c0000
1369};
1370
1371static const u32 hawaii_mgcg_cgcg_init[] =
1372{
1373 0xc420, 0xffffffff, 0xfffffffd,
1374 0x30800, 0xffffffff, 0xe0000000,
1375 0x3c2a0, 0xffffffff, 0x00000100,
1376 0x3c208, 0xffffffff, 0x00000100,
1377 0x3c2c0, 0xffffffff, 0x00000100,
1378 0x3c2c8, 0xffffffff, 0x00000100,
1379 0x3c2c4, 0xffffffff, 0x00000100,
1380 0x55e4, 0xffffffff, 0x00200100,
1381 0x3c280, 0xffffffff, 0x00000100,
1382 0x3c214, 0xffffffff, 0x06000100,
1383 0x3c220, 0xffffffff, 0x00000100,
1384 0x3c218, 0xffffffff, 0x06000100,
1385 0x3c204, 0xffffffff, 0x00000100,
1386 0x3c2e0, 0xffffffff, 0x00000100,
1387 0x3c224, 0xffffffff, 0x00000100,
1388 0x3c200, 0xffffffff, 0x00000100,
1389 0x3c230, 0xffffffff, 0x00000100,
1390 0x3c234, 0xffffffff, 0x00000100,
1391 0x3c250, 0xffffffff, 0x00000100,
1392 0x3c254, 0xffffffff, 0x00000100,
1393 0x3c258, 0xffffffff, 0x00000100,
1394 0x3c25c, 0xffffffff, 0x00000100,
1395 0x3c260, 0xffffffff, 0x00000100,
1396 0x3c27c, 0xffffffff, 0x00000100,
1397 0x3c278, 0xffffffff, 0x00000100,
1398 0x3c210, 0xffffffff, 0x06000100,
1399 0x3c290, 0xffffffff, 0x00000100,
1400 0x3c274, 0xffffffff, 0x00000100,
1401 0x3c2b4, 0xffffffff, 0x00000100,
1402 0x3c2b0, 0xffffffff, 0x00000100,
1403 0x3c270, 0xffffffff, 0x00000100,
1404 0x30800, 0xffffffff, 0xe0000000,
1405 0x3c020, 0xffffffff, 0x00010000,
1406 0x3c024, 0xffffffff, 0x00030002,
1407 0x3c028, 0xffffffff, 0x00040007,
1408 0x3c02c, 0xffffffff, 0x00060005,
1409 0x3c030, 0xffffffff, 0x00090008,
1410 0x3c034, 0xffffffff, 0x00010000,
1411 0x3c038, 0xffffffff, 0x00030002,
1412 0x3c03c, 0xffffffff, 0x00040007,
1413 0x3c040, 0xffffffff, 0x00060005,
1414 0x3c044, 0xffffffff, 0x00090008,
1415 0x3c048, 0xffffffff, 0x00010000,
1416 0x3c04c, 0xffffffff, 0x00030002,
1417 0x3c050, 0xffffffff, 0x00040007,
1418 0x3c054, 0xffffffff, 0x00060005,
1419 0x3c058, 0xffffffff, 0x00090008,
1420 0x3c05c, 0xffffffff, 0x00010000,
1421 0x3c060, 0xffffffff, 0x00030002,
1422 0x3c064, 0xffffffff, 0x00040007,
1423 0x3c068, 0xffffffff, 0x00060005,
1424 0x3c06c, 0xffffffff, 0x00090008,
1425 0x3c070, 0xffffffff, 0x00010000,
1426 0x3c074, 0xffffffff, 0x00030002,
1427 0x3c078, 0xffffffff, 0x00040007,
1428 0x3c07c, 0xffffffff, 0x00060005,
1429 0x3c080, 0xffffffff, 0x00090008,
1430 0x3c084, 0xffffffff, 0x00010000,
1431 0x3c088, 0xffffffff, 0x00030002,
1432 0x3c08c, 0xffffffff, 0x00040007,
1433 0x3c090, 0xffffffff, 0x00060005,
1434 0x3c094, 0xffffffff, 0x00090008,
1435 0x3c098, 0xffffffff, 0x00010000,
1436 0x3c09c, 0xffffffff, 0x00030002,
1437 0x3c0a0, 0xffffffff, 0x00040007,
1438 0x3c0a4, 0xffffffff, 0x00060005,
1439 0x3c0a8, 0xffffffff, 0x00090008,
1440 0x3c0ac, 0xffffffff, 0x00010000,
1441 0x3c0b0, 0xffffffff, 0x00030002,
1442 0x3c0b4, 0xffffffff, 0x00040007,
1443 0x3c0b8, 0xffffffff, 0x00060005,
1444 0x3c0bc, 0xffffffff, 0x00090008,
1445 0x3c0c0, 0xffffffff, 0x00010000,
1446 0x3c0c4, 0xffffffff, 0x00030002,
1447 0x3c0c8, 0xffffffff, 0x00040007,
1448 0x3c0cc, 0xffffffff, 0x00060005,
1449 0x3c0d0, 0xffffffff, 0x00090008,
1450 0x3c0d4, 0xffffffff, 0x00010000,
1451 0x3c0d8, 0xffffffff, 0x00030002,
1452 0x3c0dc, 0xffffffff, 0x00040007,
1453 0x3c0e0, 0xffffffff, 0x00060005,
1454 0x3c0e4, 0xffffffff, 0x00090008,
1455 0x3c0e8, 0xffffffff, 0x00010000,
1456 0x3c0ec, 0xffffffff, 0x00030002,
1457 0x3c0f0, 0xffffffff, 0x00040007,
1458 0x3c0f4, 0xffffffff, 0x00060005,
1459 0x3c0f8, 0xffffffff, 0x00090008,
1460 0xc318, 0xffffffff, 0x00020200,
1461 0x3350, 0xffffffff, 0x00000200,
1462 0x15c0, 0xffffffff, 0x00000400,
1463 0x55e8, 0xffffffff, 0x00000000,
1464 0x2f50, 0xffffffff, 0x00000902,
1465 0x3c000, 0xffffffff, 0x96940200,
1466 0x8708, 0xffffffff, 0x00900100,
1467 0xc424, 0xffffffff, 0x0020003f,
1468 0x38, 0xffffffff, 0x0140001c,
1469 0x3c, 0x000f0000, 0x000f0000,
1470 0x220, 0xffffffff, 0xc060000c,
1471 0x224, 0xc0000fff, 0x00000100,
1472 0xf90, 0xffffffff, 0x00000100,
1473 0xf98, 0x00000101, 0x00000000,
1474 0x20a8, 0xffffffff, 0x00000104,
1475 0x55e4, 0xff000fff, 0x00000100,
1476 0x30cc, 0xc0000fff, 0x00000104,
1477 0xc1e4, 0x00000001, 0x00000001,
1478 0xd00c, 0xff000ff0, 0x00000100,
1479 0xd80c, 0xff000ff0, 0x00000100
1480};
1481
1482static const u32 godavari_golden_registers[] =
1483{
1484 0x55e4, 0xff607fff, 0xfc000100,
1485 0x6ed8, 0x00010101, 0x00010000,
1486 0x9830, 0xffffffff, 0x00000000,
1487 0x98302, 0xf00fffff, 0x00000400,
1488 0x6130, 0xffffffff, 0x00010000,
1489 0x5bb0, 0x000000f0, 0x00000070,
1490 0x5bc0, 0xf0311fff, 0x80300000,
1491 0x98f8, 0x73773777, 0x12010001,
1492 0x98fc, 0xffffffff, 0x00000010,
1493 0x8030, 0x00001f0f, 0x0000100a,
1494 0x2f48, 0x73773777, 0x12010001,
1495 0x2408, 0x000fffff, 0x000c007f,
1496 0x8a14, 0xf000003f, 0x00000007,
1497 0x8b24, 0xffffffff, 0x00ff0fff,
1498 0x30a04, 0x0000ff0f, 0x00000000,
1499 0x28a4c, 0x07ffffff, 0x06000000,
1500 0x4d8, 0x00000fff, 0x00000100,
1501 0xd014, 0x00010000, 0x00810001,
1502 0xd814, 0x00010000, 0x00810001,
1503 0x3e78, 0x00000001, 0x00000002,
1504 0xc768, 0x00000008, 0x00000008,
1505 0xc770, 0x00000f00, 0x00000800,
1506 0xc774, 0x00000f00, 0x00000800,
1507 0xc798, 0x00ffffff, 0x00ff7fbf,
1508 0xc79c, 0x00ffffff, 0x00ff7faf,
1509 0x8c00, 0x000000ff, 0x00000001,
1510 0x214f8, 0x01ff01ff, 0x00000002,
1511 0x21498, 0x007ff800, 0x00200000,
1512 0x2015c, 0xffffffff, 0x00000f40,
1513 0x88c4, 0x001f3ae3, 0x00000082,
1514 0x88d4, 0x0000001f, 0x00000010,
1515 0x30934, 0xffffffff, 0x00000000
1516};
1517
1518
1519static void cik_init_golden_registers(struct radeon_device *rdev)
1520{
1521 switch (rdev->family) {
1522 case CHIP_BONAIRE:
1523 radeon_program_register_sequence(rdev,
1524 bonaire_mgcg_cgcg_init,
1525 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1526 radeon_program_register_sequence(rdev,
1527 bonaire_golden_registers,
1528 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1529 radeon_program_register_sequence(rdev,
1530 bonaire_golden_common_registers,
1531 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1532 radeon_program_register_sequence(rdev,
1533 bonaire_golden_spm_registers,
1534 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1535 break;
1536 case CHIP_KABINI:
1537 radeon_program_register_sequence(rdev,
1538 kalindi_mgcg_cgcg_init,
1539 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1540 radeon_program_register_sequence(rdev,
1541 kalindi_golden_registers,
1542 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1543 radeon_program_register_sequence(rdev,
1544 kalindi_golden_common_registers,
1545 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1546 radeon_program_register_sequence(rdev,
1547 kalindi_golden_spm_registers,
1548 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1549 break;
1550 case CHIP_MULLINS:
1551 radeon_program_register_sequence(rdev,
1552 kalindi_mgcg_cgcg_init,
1553 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1554 radeon_program_register_sequence(rdev,
1555 godavari_golden_registers,
1556 (const u32)ARRAY_SIZE(godavari_golden_registers));
1557 radeon_program_register_sequence(rdev,
1558 kalindi_golden_common_registers,
1559 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1560 radeon_program_register_sequence(rdev,
1561 kalindi_golden_spm_registers,
1562 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1563 break;
1564 case CHIP_KAVERI:
1565 radeon_program_register_sequence(rdev,
1566 spectre_mgcg_cgcg_init,
1567 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1568 radeon_program_register_sequence(rdev,
1569 spectre_golden_registers,
1570 (const u32)ARRAY_SIZE(spectre_golden_registers));
1571 radeon_program_register_sequence(rdev,
1572 spectre_golden_common_registers,
1573 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1574 radeon_program_register_sequence(rdev,
1575 spectre_golden_spm_registers,
1576 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1577 break;
1578 case CHIP_HAWAII:
1579 radeon_program_register_sequence(rdev,
1580 hawaii_mgcg_cgcg_init,
1581 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1582 radeon_program_register_sequence(rdev,
1583 hawaii_golden_registers,
1584 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1585 radeon_program_register_sequence(rdev,
1586 hawaii_golden_common_registers,
1587 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1588 radeon_program_register_sequence(rdev,
1589 hawaii_golden_spm_registers,
1590 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1591 break;
1592 default:
1593 break;
1594 }
1595}
1596
1597/**
1598 * cik_get_xclk - get the xclk
1599 *
1600 * @rdev: radeon_device pointer
1601 *
1602 * Returns the reference clock used by the gfx engine
1603 * (CIK).
1604 */
1605u32 cik_get_xclk(struct radeon_device *rdev)
1606{
1607 u32 reference_clock = rdev->clock.spll.reference_freq;
1608
1609 if (rdev->flags & RADEON_IS_IGP) {
1610 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1611 return reference_clock / 2;
1612 } else {
1613 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1614 return reference_clock / 4;
1615 }
1616 return reference_clock;
1617}
1618
1619/**
1620 * cik_mm_rdoorbell - read a doorbell dword
1621 *
1622 * @rdev: radeon_device pointer
1623 * @index: doorbell index
1624 *
1625 * Returns the value in the doorbell aperture at the
1626 * requested doorbell index (CIK).
1627 */
1628u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1629{
1630 if (index < rdev->doorbell.num_doorbells) {
1631#ifdef __NetBSD__
1632 return bus_space_read_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1633 index*4);
1634#else
1635 return readl(rdev->doorbell.ptr + index);
1636#endif
1637 } else {
1638 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1639 return 0;
1640 }
1641}
1642
1643/**
1644 * cik_mm_wdoorbell - write a doorbell dword
1645 *
1646 * @rdev: radeon_device pointer
1647 * @index: doorbell index
1648 * @v: value to write
1649 *
1650 * Writes @v to the doorbell aperture at the
1651 * requested doorbell index (CIK).
1652 */
1653void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1654{
1655 if (index < rdev->doorbell.num_doorbells) {
1656#ifdef __NetBSD__
1657 bus_space_write_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1658 index*4, v);
1659#else
1660 writel(v, rdev->doorbell.ptr + index);
1661#endif
1662 } else {
1663 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1664 }
1665}
1666
1667#define BONAIRE_IO_MC_REGS_SIZE 36
1668
1669static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1670{
1671 {0x00000070, 0x04400000},
1672 {0x00000071, 0x80c01803},
1673 {0x00000072, 0x00004004},
1674 {0x00000073, 0x00000100},
1675 {0x00000074, 0x00ff0000},
1676 {0x00000075, 0x34000000},
1677 {0x00000076, 0x08000014},
1678 {0x00000077, 0x00cc08ec},
1679 {0x00000078, 0x00000400},
1680 {0x00000079, 0x00000000},
1681 {0x0000007a, 0x04090000},
1682 {0x0000007c, 0x00000000},
1683 {0x0000007e, 0x4408a8e8},
1684 {0x0000007f, 0x00000304},
1685 {0x00000080, 0x00000000},
1686 {0x00000082, 0x00000001},
1687 {0x00000083, 0x00000002},
1688 {0x00000084, 0xf3e4f400},
1689 {0x00000085, 0x052024e3},
1690 {0x00000087, 0x00000000},
1691 {0x00000088, 0x01000000},
1692 {0x0000008a, 0x1c0a0000},
1693 {0x0000008b, 0xff010000},
1694 {0x0000008d, 0xffffefff},
1695 {0x0000008e, 0xfff3efff},
1696 {0x0000008f, 0xfff3efbf},
1697 {0x00000092, 0xf7ffffff},
1698 {0x00000093, 0xffffff7f},
1699 {0x00000095, 0x00101101},
1700 {0x00000096, 0x00000fff},
1701 {0x00000097, 0x00116fff},
1702 {0x00000098, 0x60010000},
1703 {0x00000099, 0x10010000},
1704 {0x0000009a, 0x00006000},
1705 {0x0000009b, 0x00001000},
1706 {0x0000009f, 0x00b48000}
1707};
1708
1709#define HAWAII_IO_MC_REGS_SIZE 22
1710
1711static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1712{
1713 {0x0000007d, 0x40000000},
1714 {0x0000007e, 0x40180304},
1715 {0x0000007f, 0x0000ff00},
1716 {0x00000081, 0x00000000},
1717 {0x00000083, 0x00000800},
1718 {0x00000086, 0x00000000},
1719 {0x00000087, 0x00000100},
1720 {0x00000088, 0x00020100},
1721 {0x00000089, 0x00000000},
1722 {0x0000008b, 0x00040000},
1723 {0x0000008c, 0x00000100},
1724 {0x0000008e, 0xff010000},
1725 {0x00000090, 0xffffefff},
1726 {0x00000091, 0xfff3efff},
1727 {0x00000092, 0xfff3efbf},
1728 {0x00000093, 0xf7ffffff},
1729 {0x00000094, 0xffffff7f},
1730 {0x00000095, 0x00000fff},
1731 {0x00000096, 0x00116fff},
1732 {0x00000097, 0x60010000},
1733 {0x00000098, 0x10010000},
1734 {0x0000009f, 0x00c79000}
1735};
1736
1737
1738/**
1739 * cik_srbm_select - select specific register instances
1740 *
1741 * @rdev: radeon_device pointer
1742 * @me: selected ME (micro engine)
1743 * @pipe: pipe
1744 * @queue: queue
1745 * @vmid: VMID
1746 *
1747 * Switches the currently active registers instances. Some
1748 * registers are instanced per VMID, others are instanced per
1749 * me/pipe/queue combination.
1750 */
1751static void cik_srbm_select(struct radeon_device *rdev,
1752 u32 me, u32 pipe, u32 queue, u32 vmid)
1753{
1754 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1755 MEID(me & 0x3) |
1756 VMID(vmid & 0xf) |
1757 QUEUEID(queue & 0x7));
1758 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1759}
1760
1761/* ucode loading */
1762/**
1763 * ci_mc_load_microcode - load MC ucode into the hw
1764 *
1765 * @rdev: radeon_device pointer
1766 *
1767 * Load the GDDR MC ucode into the hw (CIK).
1768 * Returns 0 on success, error on failure.
1769 */
1770int ci_mc_load_microcode(struct radeon_device *rdev)
1771{
1772 const __be32 *fw_data;
1773 u32 running, blackout = 0;
1774 const u32 *io_mc_regs;
1775 int i, regs_size, ucode_size;
1776
1777 if (!rdev->mc_fw)
1778 return -EINVAL;
1779
1780 ucode_size = rdev->mc_fw->size / 4;
1781
1782 switch (rdev->family) {
1783 case CHIP_BONAIRE:
1784 io_mc_regs = &bonaire_io_mc_regs[0][0];
1785 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1786 break;
1787 case CHIP_HAWAII:
1788 io_mc_regs = &hawaii_io_mc_regs[0][0];
1789 regs_size = HAWAII_IO_MC_REGS_SIZE;
1790 break;
1791 default:
1792 return -EINVAL;
1793 }
1794
1795 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1796
1797 if (running == 0) {
1798 if (running) {
1799 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1800 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1801 }
1802
1803 /* reset the engine and set to writable */
1804 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1805 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1806
1807 /* load mc io regs */
1808 for (i = 0; i < regs_size; i++) {
1809 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1810 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1811 }
1812 /* load the MC ucode */
1813 fw_data = (const __be32 *)rdev->mc_fw->data;
1814 for (i = 0; i < ucode_size; i++)
1815 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1816
1817 /* put the engine back into the active state */
1818 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1819 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1820 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1821
1822 /* wait for training to complete */
1823 for (i = 0; i < rdev->usec_timeout; i++) {
1824 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1825 break;
1826 udelay(1);
1827 }
1828 for (i = 0; i < rdev->usec_timeout; i++) {
1829 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1830 break;
1831 udelay(1);
1832 }
1833
1834 if (running)
1835 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1836 }
1837
1838 return 0;
1839}
1840
1841/**
1842 * cik_init_microcode - load ucode images from disk
1843 *
1844 * @rdev: radeon_device pointer
1845 *
1846 * Use the firmware interface to load the ucode images into
1847 * the driver (not loaded into hw).
1848 * Returns 0 on success, error on failure.
1849 */
1850static int cik_init_microcode(struct radeon_device *rdev)
1851{
1852 const char *chip_name;
1853 size_t pfp_req_size, me_req_size, ce_req_size,
1854 mec_req_size, rlc_req_size, mc_req_size = 0,
1855 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1856 char fw_name[30];
1857 int err;
1858
1859 DRM_DEBUG("\n");
1860
1861 switch (rdev->family) {
1862 case CHIP_BONAIRE:
1863 chip_name = "BONAIRE";
1864 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1865 me_req_size = CIK_ME_UCODE_SIZE * 4;
1866 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1867 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1868 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1869 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1870 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1871 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1872#ifdef __NetBSD__ /* XXX ALIGN means something else. */
1873 smc_req_size = round_up(BONAIRE_SMC_UCODE_SIZE, 4);
1874#else
1875 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1876#endif
1877 break;
1878 case CHIP_HAWAII:
1879 chip_name = "HAWAII";
1880 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1881 me_req_size = CIK_ME_UCODE_SIZE * 4;
1882 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1883 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1884 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1885 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1886 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1887 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1888#ifdef __NetBSD__ /* XXX ALIGN means something else. */
1889 smc_req_size = round_up(HAWAII_SMC_UCODE_SIZE, 4);
1890#else
1891 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1892#endif
1893 break;
1894 case CHIP_KAVERI:
1895 chip_name = "KAVERI";
1896 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1897 me_req_size = CIK_ME_UCODE_SIZE * 4;
1898 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1899 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1900 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1901 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1902 break;
1903 case CHIP_KABINI:
1904 chip_name = "KABINI";
1905 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1906 me_req_size = CIK_ME_UCODE_SIZE * 4;
1907 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1908 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1909 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1910 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1911 break;
1912 case CHIP_MULLINS:
1913 chip_name = "MULLINS";
1914 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1915 me_req_size = CIK_ME_UCODE_SIZE * 4;
1916 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1917 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1918 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1919 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1920 break;
1921 default: BUG();
1922 }
1923
1924 DRM_INFO("Loading %s Microcode\n", chip_name);
1925
1926 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1927 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1928 if (err)
1929 goto out;
1930 if (rdev->pfp_fw->size != pfp_req_size) {
1931 printk(KERN_ERR
1932 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1933 rdev->pfp_fw->size, fw_name);
1934 err = -EINVAL;
1935 goto out;
1936 }
1937
1938 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1939 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1940 if (err)
1941 goto out;
1942 if (rdev->me_fw->size != me_req_size) {
1943 printk(KERN_ERR
1944 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1945 rdev->me_fw->size, fw_name);
1946 err = -EINVAL;
1947 }
1948
1949 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1950 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1951 if (err)
1952 goto out;
1953 if (rdev->ce_fw->size != ce_req_size) {
1954 printk(KERN_ERR
1955 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1956 rdev->ce_fw->size, fw_name);
1957 err = -EINVAL;
1958 }
1959
1960 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1961 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1962 if (err)
1963 goto out;
1964 if (rdev->mec_fw->size != mec_req_size) {
1965 printk(KERN_ERR
1966 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1967 rdev->mec_fw->size, fw_name);
1968 err = -EINVAL;
1969 }
1970
1971 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1972 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1973 if (err)
1974 goto out;
1975 if (rdev->rlc_fw->size != rlc_req_size) {
1976 printk(KERN_ERR
1977 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1978 rdev->rlc_fw->size, fw_name);
1979 err = -EINVAL;
1980 }
1981
1982 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1983 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1984 if (err)
1985 goto out;
1986 if (rdev->sdma_fw->size != sdma_req_size) {
1987 printk(KERN_ERR
1988 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1989 rdev->sdma_fw->size, fw_name);
1990 err = -EINVAL;
1991 }
1992
1993 /* No SMC, MC ucode on APUs */
1994 if (!(rdev->flags & RADEON_IS_IGP)) {
1995 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1996 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1997 if (err) {
1998 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1999 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2000 if (err)
2001 goto out;
2002 }
2003 if ((rdev->mc_fw->size != mc_req_size) &&
2004 (rdev->mc_fw->size != mc2_req_size)){
2005 printk(KERN_ERR
2006 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2007 rdev->mc_fw->size, fw_name);
2008 err = -EINVAL;
2009 }
2010 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2011
2012 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2013 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2014 if (err) {
2015 printk(KERN_ERR
2016 "smc: error loading firmware \"%s\"\n",
2017 fw_name);
2018 release_firmware(rdev->smc_fw);
2019 rdev->smc_fw = NULL;
2020 err = 0;
2021 } else if (rdev->smc_fw->size != smc_req_size) {
2022 printk(KERN_ERR
2023 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2024 rdev->smc_fw->size, fw_name);
2025 err = -EINVAL;
2026 }
2027 }
2028
2029out:
2030 if (err) {
2031 if (err != -EINVAL)
2032 printk(KERN_ERR
2033 "cik_cp: Failed to load firmware \"%s\"\n",
2034 fw_name);
2035 release_firmware(rdev->pfp_fw);
2036 rdev->pfp_fw = NULL;
2037 release_firmware(rdev->me_fw);
2038 rdev->me_fw = NULL;
2039 release_firmware(rdev->ce_fw);
2040 rdev->ce_fw = NULL;
2041 release_firmware(rdev->rlc_fw);
2042 rdev->rlc_fw = NULL;
2043 release_firmware(rdev->mc_fw);
2044 rdev->mc_fw = NULL;
2045 release_firmware(rdev->smc_fw);
2046 rdev->smc_fw = NULL;
2047 }
2048 return err;
2049}
2050
2051/*
2052 * Core functions
2053 */
2054/**
2055 * cik_tiling_mode_table_init - init the hw tiling table
2056 *
2057 * @rdev: radeon_device pointer
2058 *
2059 * Starting with SI, the tiling setup is done globally in a
2060 * set of 32 tiling modes. Rather than selecting each set of
2061 * parameters per surface as on older asics, we just select
2062 * which index in the tiling table we want to use, and the
2063 * surface uses those parameters (CIK).
2064 */
2065static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2066{
2067 const u32 num_tile_mode_states = 32;
2068 const u32 num_secondary_tile_mode_states = 16;
2069 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2070 u32 num_pipe_configs;
2071 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2072 rdev->config.cik.max_shader_engines;
2073
2074 switch (rdev->config.cik.mem_row_size_in_kb) {
2075 case 1:
2076 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2077 break;
2078 case 2:
2079 default:
2080 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2081 break;
2082 case 4:
2083 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2084 break;
2085 }
2086
2087 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2088 if (num_pipe_configs > 8)
2089 num_pipe_configs = 16;
2090
2091 if (num_pipe_configs == 16) {
2092 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2093 switch (reg_offset) {
2094 case 0:
2095 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2097 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2098 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2099 break;
2100 case 1:
2101 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2103 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2105 break;
2106 case 2:
2107 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2108 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2109 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2111 break;
2112 case 3:
2113 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2115 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2116 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2117 break;
2118 case 4:
2119 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2121 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2122 TILE_SPLIT(split_equal_to_row_size));
2123 break;
2124 case 5:
2125 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2126 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2128 break;
2129 case 6:
2130 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2131 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2132 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2134 break;
2135 case 7:
2136 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2137 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2138 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2139 TILE_SPLIT(split_equal_to_row_size));
2140 break;
2141 case 8:
2142 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2143 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2144 break;
2145 case 9:
2146 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2147 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2149 break;
2150 case 10:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155 break;
2156 case 11:
2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 break;
2162 case 12:
2163 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2165 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 break;
2168 case 13:
2169 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2170 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2172 break;
2173 case 14:
2174 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178 break;
2179 case 16:
2180 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2182 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 break;
2185 case 17:
2186 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2188 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 break;
2191 case 27:
2192 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2195 break;
2196 case 28:
2197 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2198 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2199 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201 break;
2202 case 29:
2203 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207 break;
2208 case 30:
2209 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213 break;
2214 default:
2215 gb_tile_moden = 0;
2216 break;
2217 }
2218 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2219 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2220 }
2221 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2222 switch (reg_offset) {
2223 case 0:
2224 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2227 NUM_BANKS(ADDR_SURF_16_BANK));
2228 break;
2229 case 1:
2230 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233 NUM_BANKS(ADDR_SURF_16_BANK));
2234 break;
2235 case 2:
2236 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2239 NUM_BANKS(ADDR_SURF_16_BANK));
2240 break;
2241 case 3:
2242 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2245 NUM_BANKS(ADDR_SURF_16_BANK));
2246 break;
2247 case 4:
2248 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2251 NUM_BANKS(ADDR_SURF_8_BANK));
2252 break;
2253 case 5:
2254 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2257 NUM_BANKS(ADDR_SURF_4_BANK));
2258 break;
2259 case 6:
2260 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263 NUM_BANKS(ADDR_SURF_2_BANK));
2264 break;
2265 case 8:
2266 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 NUM_BANKS(ADDR_SURF_16_BANK));
2270 break;
2271 case 9:
2272 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 NUM_BANKS(ADDR_SURF_16_BANK));
2276 break;
2277 case 10:
2278 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281 NUM_BANKS(ADDR_SURF_16_BANK));
2282 break;
2283 case 11:
2284 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2287 NUM_BANKS(ADDR_SURF_8_BANK));
2288 break;
2289 case 12:
2290 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2293 NUM_BANKS(ADDR_SURF_4_BANK));
2294 break;
2295 case 13:
2296 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2299 NUM_BANKS(ADDR_SURF_2_BANK));
2300 break;
2301 case 14:
2302 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2305 NUM_BANKS(ADDR_SURF_2_BANK));
2306 break;
2307 default:
2308 gb_tile_moden = 0;
2309 break;
2310 }
2311 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2312 }
2313 } else if (num_pipe_configs == 8) {
2314 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2315 switch (reg_offset) {
2316 case 0:
2317 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2321 break;
2322 case 1:
2323 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2327 break;
2328 case 2:
2329 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2333 break;
2334 case 3:
2335 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2339 break;
2340 case 4:
2341 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2344 TILE_SPLIT(split_equal_to_row_size));
2345 break;
2346 case 5:
2347 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350 break;
2351 case 6:
2352 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2354 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2355 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2356 break;
2357 case 7:
2358 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2361 TILE_SPLIT(split_equal_to_row_size));
2362 break;
2363 case 8:
2364 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2365 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2366 break;
2367 case 9:
2368 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2371 break;
2372 case 10:
2373 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377 break;
2378 case 11:
2379 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2380 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 break;
2384 case 12:
2385 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2387 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 break;
2390 case 13:
2391 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2392 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2394 break;
2395 case 14:
2396 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2398 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400 break;
2401 case 16:
2402 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2404 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 break;
2407 case 17:
2408 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 break;
2413 case 27:
2414 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2417 break;
2418 case 28:
2419 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2421 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 break;
2424 case 29:
2425 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 break;
2430 case 30:
2431 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 break;
2436 default:
2437 gb_tile_moden = 0;
2438 break;
2439 }
2440 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2441 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2442 }
2443 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2444 switch (reg_offset) {
2445 case 0:
2446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2449 NUM_BANKS(ADDR_SURF_16_BANK));
2450 break;
2451 case 1:
2452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455 NUM_BANKS(ADDR_SURF_16_BANK));
2456 break;
2457 case 2:
2458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 NUM_BANKS(ADDR_SURF_16_BANK));
2462 break;
2463 case 3:
2464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467 NUM_BANKS(ADDR_SURF_16_BANK));
2468 break;
2469 case 4:
2470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 NUM_BANKS(ADDR_SURF_8_BANK));
2474 break;
2475 case 5:
2476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479 NUM_BANKS(ADDR_SURF_4_BANK));
2480 break;
2481 case 6:
2482 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 NUM_BANKS(ADDR_SURF_2_BANK));
2486 break;
2487 case 8:
2488 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2491 NUM_BANKS(ADDR_SURF_16_BANK));
2492 break;
2493 case 9:
2494 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2497 NUM_BANKS(ADDR_SURF_16_BANK));
2498 break;
2499 case 10:
2500 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2503 NUM_BANKS(ADDR_SURF_16_BANK));
2504 break;
2505 case 11:
2506 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2509 NUM_BANKS(ADDR_SURF_16_BANK));
2510 break;
2511 case 12:
2512 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515 NUM_BANKS(ADDR_SURF_8_BANK));
2516 break;
2517 case 13:
2518 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2521 NUM_BANKS(ADDR_SURF_4_BANK));
2522 break;
2523 case 14:
2524 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2527 NUM_BANKS(ADDR_SURF_2_BANK));
2528 break;
2529 default:
2530 gb_tile_moden = 0;
2531 break;
2532 }
2533 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2534 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2535 }
2536 } else if (num_pipe_configs == 4) {
2537 if (num_rbs == 4) {
2538 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2539 switch (reg_offset) {
2540 case 0:
2541 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2545 break;
2546 case 1:
2547 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2551 break;
2552 case 2:
2553 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2557 break;
2558 case 3:
2559 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2563 break;
2564 case 4:
2565 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568 TILE_SPLIT(split_equal_to_row_size));
2569 break;
2570 case 5:
2571 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2574 break;
2575 case 6:
2576 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2579 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580 break;
2581 case 7:
2582 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2585 TILE_SPLIT(split_equal_to_row_size));
2586 break;
2587 case 8:
2588 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2589 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2590 break;
2591 case 9:
2592 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2595 break;
2596 case 10:
2597 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2601 break;
2602 case 11:
2603 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2605 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607 break;
2608 case 12:
2609 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2610 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2611 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613 break;
2614 case 13:
2615 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2618 break;
2619 case 14:
2620 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2622 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 break;
2625 case 16:
2626 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2628 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 break;
2631 case 17:
2632 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 break;
2637 case 27:
2638 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2639 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2641 break;
2642 case 28:
2643 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2644 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 break;
2648 case 29:
2649 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2650 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2651 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2652 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2653 break;
2654 case 30:
2655 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659 break;
2660 default:
2661 gb_tile_moden = 0;
2662 break;
2663 }
2664 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2665 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2666 }
2667 } else if (num_rbs < 4) {
2668 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2669 switch (reg_offset) {
2670 case 0:
2671 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2675 break;
2676 case 1:
2677 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2679 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2680 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2681 break;
2682 case 2:
2683 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2685 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2687 break;
2688 case 3:
2689 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2692 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2693 break;
2694 case 4:
2695 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698 TILE_SPLIT(split_equal_to_row_size));
2699 break;
2700 case 5:
2701 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 break;
2705 case 6:
2706 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2708 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2710 break;
2711 case 7:
2712 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2714 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 TILE_SPLIT(split_equal_to_row_size));
2716 break;
2717 case 8:
2718 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2719 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2720 break;
2721 case 9:
2722 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2723 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2725 break;
2726 case 10:
2727 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2729 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 break;
2732 case 11:
2733 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 break;
2738 case 12:
2739 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2741 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 break;
2744 case 13:
2745 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2746 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2748 break;
2749 case 14:
2750 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754 break;
2755 case 16:
2756 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760 break;
2761 case 17:
2762 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766 break;
2767 case 27:
2768 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2771 break;
2772 case 28:
2773 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2774 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2775 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 break;
2778 case 29:
2779 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2781 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783 break;
2784 case 30:
2785 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2786 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2787 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789 break;
2790 default:
2791 gb_tile_moden = 0;
2792 break;
2793 }
2794 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2795 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2796 }
2797 }
2798 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2799 switch (reg_offset) {
2800 case 0:
2801 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2804 NUM_BANKS(ADDR_SURF_16_BANK));
2805 break;
2806 case 1:
2807 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810 NUM_BANKS(ADDR_SURF_16_BANK));
2811 break;
2812 case 2:
2813 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816 NUM_BANKS(ADDR_SURF_16_BANK));
2817 break;
2818 case 3:
2819 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2822 NUM_BANKS(ADDR_SURF_16_BANK));
2823 break;
2824 case 4:
2825 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 NUM_BANKS(ADDR_SURF_16_BANK));
2829 break;
2830 case 5:
2831 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2834 NUM_BANKS(ADDR_SURF_8_BANK));
2835 break;
2836 case 6:
2837 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840 NUM_BANKS(ADDR_SURF_4_BANK));
2841 break;
2842 case 8:
2843 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 NUM_BANKS(ADDR_SURF_16_BANK));
2847 break;
2848 case 9:
2849 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2853 break;
2854 case 10:
2855 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2857 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2858 NUM_BANKS(ADDR_SURF_16_BANK));
2859 break;
2860 case 11:
2861 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864 NUM_BANKS(ADDR_SURF_16_BANK));
2865 break;
2866 case 12:
2867 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2870 NUM_BANKS(ADDR_SURF_16_BANK));
2871 break;
2872 case 13:
2873 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2876 NUM_BANKS(ADDR_SURF_8_BANK));
2877 break;
2878 case 14:
2879 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2882 NUM_BANKS(ADDR_SURF_4_BANK));
2883 break;
2884 default:
2885 gb_tile_moden = 0;
2886 break;
2887 }
2888 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2889 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2890 }
2891 } else if (num_pipe_configs == 2) {
2892 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2893 switch (reg_offset) {
2894 case 0:
2895 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897 PIPE_CONFIG(ADDR_SURF_P2) |
2898 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2899 break;
2900 case 1:
2901 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2903 PIPE_CONFIG(ADDR_SURF_P2) |
2904 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2905 break;
2906 case 2:
2907 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2909 PIPE_CONFIG(ADDR_SURF_P2) |
2910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2911 break;
2912 case 3:
2913 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 PIPE_CONFIG(ADDR_SURF_P2) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2917 break;
2918 case 4:
2919 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921 PIPE_CONFIG(ADDR_SURF_P2) |
2922 TILE_SPLIT(split_equal_to_row_size));
2923 break;
2924 case 5:
2925 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 PIPE_CONFIG(ADDR_SURF_P2) |
2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 break;
2929 case 6:
2930 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2931 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2932 PIPE_CONFIG(ADDR_SURF_P2) |
2933 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2934 break;
2935 case 7:
2936 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2938 PIPE_CONFIG(ADDR_SURF_P2) |
2939 TILE_SPLIT(split_equal_to_row_size));
2940 break;
2941 case 8:
2942 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2943 PIPE_CONFIG(ADDR_SURF_P2);
2944 break;
2945 case 9:
2946 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2947 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948 PIPE_CONFIG(ADDR_SURF_P2));
2949 break;
2950 case 10:
2951 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2953 PIPE_CONFIG(ADDR_SURF_P2) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 break;
2956 case 11:
2957 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2959 PIPE_CONFIG(ADDR_SURF_P2) |
2960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 break;
2962 case 12:
2963 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2964 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2965 PIPE_CONFIG(ADDR_SURF_P2) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 break;
2968 case 13:
2969 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2970 PIPE_CONFIG(ADDR_SURF_P2) |
2971 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2972 break;
2973 case 14:
2974 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976 PIPE_CONFIG(ADDR_SURF_P2) |
2977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2978 break;
2979 case 16:
2980 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2982 PIPE_CONFIG(ADDR_SURF_P2) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2984 break;
2985 case 17:
2986 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2988 PIPE_CONFIG(ADDR_SURF_P2) |
2989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2990 break;
2991 case 27:
2992 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994 PIPE_CONFIG(ADDR_SURF_P2));
2995 break;
2996 case 28:
2997 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2999 PIPE_CONFIG(ADDR_SURF_P2) |
3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001 break;
3002 case 29:
3003 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3004 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3005 PIPE_CONFIG(ADDR_SURF_P2) |
3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3007 break;
3008 case 30:
3009 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3011 PIPE_CONFIG(ADDR_SURF_P2) |
3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 break;
3014 default:
3015 gb_tile_moden = 0;
3016 break;
3017 }
3018 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3019 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3020 }
3021 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3022 switch (reg_offset) {
3023 case 0:
3024 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 NUM_BANKS(ADDR_SURF_16_BANK));
3028 break;
3029 case 1:
3030 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3031 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033 NUM_BANKS(ADDR_SURF_16_BANK));
3034 break;
3035 case 2:
3036 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 NUM_BANKS(ADDR_SURF_16_BANK));
3040 break;
3041 case 3:
3042 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3044 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3045 NUM_BANKS(ADDR_SURF_16_BANK));
3046 break;
3047 case 4:
3048 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3051 NUM_BANKS(ADDR_SURF_16_BANK));
3052 break;
3053 case 5:
3054 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 NUM_BANKS(ADDR_SURF_16_BANK));
3058 break;
3059 case 6:
3060 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063 NUM_BANKS(ADDR_SURF_8_BANK));
3064 break;
3065 case 8:
3066 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069 NUM_BANKS(ADDR_SURF_16_BANK));
3070 break;
3071 case 9:
3072 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3073 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3074 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3075 NUM_BANKS(ADDR_SURF_16_BANK));
3076 break;
3077 case 10:
3078 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3081 NUM_BANKS(ADDR_SURF_16_BANK));
3082 break;
3083 case 11:
3084 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3087 NUM_BANKS(ADDR_SURF_16_BANK));
3088 break;
3089 case 12:
3090 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3092 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3093 NUM_BANKS(ADDR_SURF_16_BANK));
3094 break;
3095 case 13:
3096 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3098 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3099 NUM_BANKS(ADDR_SURF_16_BANK));
3100 break;
3101 case 14:
3102 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 NUM_BANKS(ADDR_SURF_8_BANK));
3106 break;
3107 default:
3108 gb_tile_moden = 0;
3109 break;
3110 }
3111 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3112 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3113 }
3114 } else
3115 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3116}
3117
3118/**
3119 * cik_select_se_sh - select which SE, SH to address
3120 *
3121 * @rdev: radeon_device pointer
3122 * @se_num: shader engine to address
3123 * @sh_num: sh block to address
3124 *
3125 * Select which SE, SH combinations to address. Certain
3126 * registers are instanced per SE or SH. 0xffffffff means
3127 * broadcast to all SEs or SHs (CIK).
3128 */
3129static void cik_select_se_sh(struct radeon_device *rdev,
3130 u32 se_num, u32 sh_num)
3131{
3132 u32 data = INSTANCE_BROADCAST_WRITES;
3133
3134 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3135 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3136 else if (se_num == 0xffffffff)
3137 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3138 else if (sh_num == 0xffffffff)
3139 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3140 else
3141 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3142 WREG32(GRBM_GFX_INDEX, data);
3143}
3144
3145/**
3146 * cik_create_bitmask - create a bitmask
3147 *
3148 * @bit_width: length of the mask
3149 *
3150 * create a variable length bit mask (CIK).
3151 * Returns the bitmask.
3152 */
3153static u32 cik_create_bitmask(u32 bit_width)
3154{
3155 u32 i, mask = 0;
3156
3157 for (i = 0; i < bit_width; i++) {
3158 mask <<= 1;
3159 mask |= 1;
3160 }
3161 return mask;
3162}
3163
3164/**
3165 * cik_get_rb_disabled - computes the mask of disabled RBs
3166 *
3167 * @rdev: radeon_device pointer
3168 * @max_rb_num: max RBs (render backends) for the asic
3169 * @se_num: number of SEs (shader engines) for the asic
3170 * @sh_per_se: number of SH blocks per SE for the asic
3171 *
3172 * Calculates the bitmask of disabled RBs (CIK).
3173 * Returns the disabled RB bitmask.
3174 */
3175static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3176 u32 max_rb_num_per_se,
3177 u32 sh_per_se)
3178{
3179 u32 data, mask;
3180
3181 data = RREG32(CC_RB_BACKEND_DISABLE);
3182 if (data & 1)
3183 data &= BACKEND_DISABLE_MASK;
3184 else
3185 data = 0;
3186 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3187
3188 data >>= BACKEND_DISABLE_SHIFT;
3189
3190 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3191
3192 return data & mask;
3193}
3194
3195/**
3196 * cik_setup_rb - setup the RBs on the asic
3197 *
3198 * @rdev: radeon_device pointer
3199 * @se_num: number of SEs (shader engines) for the asic
3200 * @sh_per_se: number of SH blocks per SE for the asic
3201 * @max_rb_num: max RBs (render backends) for the asic
3202 *
3203 * Configures per-SE/SH RB registers (CIK).
3204 */
3205static void cik_setup_rb(struct radeon_device *rdev,
3206 u32 se_num, u32 sh_per_se,
3207 u32 max_rb_num_per_se)
3208{
3209 int i, j;
3210 u32 data, mask;
3211 u32 disabled_rbs = 0;
3212 u32 enabled_rbs = 0;
3213
3214 for (i = 0; i < se_num; i++) {
3215 for (j = 0; j < sh_per_se; j++) {
3216 cik_select_se_sh(rdev, i, j);
3217 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3218 if (rdev->family == CHIP_HAWAII)
3219 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3220 else
3221 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3222 }
3223 }
3224 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3225
3226 mask = 1;
3227 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3228 if (!(disabled_rbs & mask))
3229 enabled_rbs |= mask;
3230 mask <<= 1;
3231 }
3232
3233 rdev->config.cik.backend_enable_mask = enabled_rbs;
3234
3235 for (i = 0; i < se_num; i++) {
3236 cik_select_se_sh(rdev, i, 0xffffffff);
3237 data = 0;
3238 for (j = 0; j < sh_per_se; j++) {
3239 switch (enabled_rbs & 3) {
3240 case 0:
3241 if (j == 0)
3242 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3243 else
3244 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3245 break;
3246 case 1:
3247 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3248 break;
3249 case 2:
3250 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3251 break;
3252 case 3:
3253 default:
3254 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3255 break;
3256 }
3257 enabled_rbs >>= 2;
3258 }
3259 WREG32(PA_SC_RASTER_CONFIG, data);
3260 }
3261 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3262}
3263
3264/**
3265 * cik_gpu_init - setup the 3D engine
3266 *
3267 * @rdev: radeon_device pointer
3268 *
3269 * Configures the 3D engine and tiling configuration
3270 * registers so that the 3D engine is usable.
3271 */
3272static void cik_gpu_init(struct radeon_device *rdev)
3273{
3274 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3275 u32 mc_shared_chmap __unused, mc_arb_ramcfg;
3276 u32 hdp_host_path_cntl;
3277 u32 tmp;
3278 int i, j;
3279
3280 switch (rdev->family) {
3281 case CHIP_BONAIRE:
3282 rdev->config.cik.max_shader_engines = 2;
3283 rdev->config.cik.max_tile_pipes = 4;
3284 rdev->config.cik.max_cu_per_sh = 7;
3285 rdev->config.cik.max_sh_per_se = 1;
3286 rdev->config.cik.max_backends_per_se = 2;
3287 rdev->config.cik.max_texture_channel_caches = 4;
3288 rdev->config.cik.max_gprs = 256;
3289 rdev->config.cik.max_gs_threads = 32;
3290 rdev->config.cik.max_hw_contexts = 8;
3291
3292 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3293 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3294 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3295 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3296 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3297 break;
3298 case CHIP_HAWAII:
3299 rdev->config.cik.max_shader_engines = 4;
3300 rdev->config.cik.max_tile_pipes = 16;
3301 rdev->config.cik.max_cu_per_sh = 11;
3302 rdev->config.cik.max_sh_per_se = 1;
3303 rdev->config.cik.max_backends_per_se = 4;
3304 rdev->config.cik.max_texture_channel_caches = 16;
3305 rdev->config.cik.max_gprs = 256;
3306 rdev->config.cik.max_gs_threads = 32;
3307 rdev->config.cik.max_hw_contexts = 8;
3308
3309 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3310 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3311 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3312 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3313 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3314 break;
3315 case CHIP_KAVERI:
3316 rdev->config.cik.max_shader_engines = 1;
3317 rdev->config.cik.max_tile_pipes = 4;
3318 if ((rdev->pdev->device == 0x1304) ||
3319 (rdev->pdev->device == 0x1305) ||
3320 (rdev->pdev->device == 0x130C) ||
3321 (rdev->pdev->device == 0x130F) ||
3322 (rdev->pdev->device == 0x1310) ||
3323 (rdev->pdev->device == 0x1311) ||
3324 (rdev->pdev->device == 0x131C)) {
3325 rdev->config.cik.max_cu_per_sh = 8;
3326 rdev->config.cik.max_backends_per_se = 2;
3327 } else if ((rdev->pdev->device == 0x1309) ||
3328 (rdev->pdev->device == 0x130A) ||
3329 (rdev->pdev->device == 0x130D) ||
3330 (rdev->pdev->device == 0x1313) ||
3331 (rdev->pdev->device == 0x131D)) {
3332 rdev->config.cik.max_cu_per_sh = 6;
3333 rdev->config.cik.max_backends_per_se = 2;
3334 } else if ((rdev->pdev->device == 0x1306) ||
3335 (rdev->pdev->device == 0x1307) ||
3336 (rdev->pdev->device == 0x130B) ||
3337 (rdev->pdev->device == 0x130E) ||
3338 (rdev->pdev->device == 0x1315) ||
3339 (rdev->pdev->device == 0x131B)) {
3340 rdev->config.cik.max_cu_per_sh = 4;
3341 rdev->config.cik.max_backends_per_se = 1;
3342 } else {
3343 rdev->config.cik.max_cu_per_sh = 3;
3344 rdev->config.cik.max_backends_per_se = 1;
3345 }
3346 rdev->config.cik.max_sh_per_se = 1;
3347 rdev->config.cik.max_texture_channel_caches = 4;
3348 rdev->config.cik.max_gprs = 256;
3349 rdev->config.cik.max_gs_threads = 16;
3350 rdev->config.cik.max_hw_contexts = 8;
3351
3352 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3353 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3354 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3355 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3356 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3357 break;
3358 case CHIP_KABINI:
3359 case CHIP_MULLINS:
3360 default:
3361 rdev->config.cik.max_shader_engines = 1;
3362 rdev->config.cik.max_tile_pipes = 2;
3363 rdev->config.cik.max_cu_per_sh = 2;
3364 rdev->config.cik.max_sh_per_se = 1;
3365 rdev->config.cik.max_backends_per_se = 1;
3366 rdev->config.cik.max_texture_channel_caches = 2;
3367 rdev->config.cik.max_gprs = 256;
3368 rdev->config.cik.max_gs_threads = 16;
3369 rdev->config.cik.max_hw_contexts = 8;
3370
3371 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3372 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3373 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3374 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3375 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3376 break;
3377 }
3378
3379 /* Initialize HDP */
3380 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3381 WREG32((0x2c14 + j), 0x00000000);
3382 WREG32((0x2c18 + j), 0x00000000);
3383 WREG32((0x2c1c + j), 0x00000000);
3384 WREG32((0x2c20 + j), 0x00000000);
3385 WREG32((0x2c24 + j), 0x00000000);
3386 }
3387
3388 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3389
3390 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3391
3392 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3393 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3394
3395 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3396 rdev->config.cik.mem_max_burst_length_bytes = 256;
3397 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3398 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3399 if (rdev->config.cik.mem_row_size_in_kb > 4)
3400 rdev->config.cik.mem_row_size_in_kb = 4;
3401 /* XXX use MC settings? */
3402 rdev->config.cik.shader_engine_tile_size = 32;
3403 rdev->config.cik.num_gpus = 1;
3404 rdev->config.cik.multi_gpu_tile_size = 64;
3405
3406 /* fix up row size */
3407 gb_addr_config &= ~ROW_SIZE_MASK;
3408 switch (rdev->config.cik.mem_row_size_in_kb) {
3409 case 1:
3410 default:
3411 gb_addr_config |= ROW_SIZE(0);
3412 break;
3413 case 2:
3414 gb_addr_config |= ROW_SIZE(1);
3415 break;
3416 case 4:
3417 gb_addr_config |= ROW_SIZE(2);
3418 break;
3419 }
3420
3421 /* setup tiling info dword. gb_addr_config is not adequate since it does
3422 * not have bank info, so create a custom tiling dword.
3423 * bits 3:0 num_pipes
3424 * bits 7:4 num_banks
3425 * bits 11:8 group_size
3426 * bits 15:12 row_size
3427 */
3428 rdev->config.cik.tile_config = 0;
3429 switch (rdev->config.cik.num_tile_pipes) {
3430 case 1:
3431 rdev->config.cik.tile_config |= (0 << 0);
3432 break;
3433 case 2:
3434 rdev->config.cik.tile_config |= (1 << 0);
3435 break;
3436 case 4:
3437 rdev->config.cik.tile_config |= (2 << 0);
3438 break;
3439 case 8:
3440 default:
3441 /* XXX what about 12? */
3442 rdev->config.cik.tile_config |= (3 << 0);
3443 break;
3444 }
3445 rdev->config.cik.tile_config |=
3446 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3447 rdev->config.cik.tile_config |=
3448 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3449 rdev->config.cik.tile_config |=
3450 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3451
3452 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3453 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3454 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3455 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3456 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3457 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3458 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3459 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3460
3461 cik_tiling_mode_table_init(rdev);
3462
3463 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3464 rdev->config.cik.max_sh_per_se,
3465 rdev->config.cik.max_backends_per_se);
3466
3467 /* set HW defaults for 3D engine */
3468 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3469
3470 WREG32(SX_DEBUG_1, 0x20);
3471
3472 WREG32(TA_CNTL_AUX, 0x00010000);
3473
3474 tmp = RREG32(SPI_CONFIG_CNTL);
3475 tmp |= 0x03000000;
3476 WREG32(SPI_CONFIG_CNTL, tmp);
3477
3478 WREG32(SQ_CONFIG, 1);
3479
3480 WREG32(DB_DEBUG, 0);
3481
3482 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3483 tmp |= 0x00000400;
3484 WREG32(DB_DEBUG2, tmp);
3485
3486 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3487 tmp |= 0x00020200;
3488 WREG32(DB_DEBUG3, tmp);
3489
3490 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3491 tmp |= 0x00018208;
3492 WREG32(CB_HW_CONTROL, tmp);
3493
3494 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3495
3496 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3497 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3498 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3499 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3500
3501 WREG32(VGT_NUM_INSTANCES, 1);
3502
3503 WREG32(CP_PERFMON_CNTL, 0);
3504
3505 WREG32(SQ_CONFIG, 0);
3506
3507 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3508 FORCE_EOV_MAX_REZ_CNT(255)));
3509
3510 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3511 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3512
3513 WREG32(VGT_GS_VERTEX_REUSE, 16);
3514 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3515
3516 tmp = RREG32(HDP_MISC_CNTL);
3517 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3518 WREG32(HDP_MISC_CNTL, tmp);
3519
3520 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3521 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3522
3523 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3524 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3525
3526 udelay(50);
3527}
3528
3529/*
3530 * GPU scratch registers helpers function.
3531 */
3532/**
3533 * cik_scratch_init - setup driver info for CP scratch regs
3534 *
3535 * @rdev: radeon_device pointer
3536 *
3537 * Set up the number and offset of the CP scratch registers.
3538 * NOTE: use of CP scratch registers is a legacy inferface and
3539 * is not used by default on newer asics (r6xx+). On newer asics,
3540 * memory buffers are used for fences rather than scratch regs.
3541 */
3542static void cik_scratch_init(struct radeon_device *rdev)
3543{
3544 int i;
3545
3546 rdev->scratch.num_reg = 7;
3547 rdev->scratch.reg_base = SCRATCH_REG0;
3548 for (i = 0; i < rdev->scratch.num_reg; i++) {
3549 rdev->scratch.free[i] = true;
3550 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3551 }
3552}
3553
3554/**
3555 * cik_ring_test - basic gfx ring test
3556 *
3557 * @rdev: radeon_device pointer
3558 * @ring: radeon_ring structure holding ring information
3559 *
3560 * Allocate a scratch register and write to it using the gfx ring (CIK).
3561 * Provides a basic gfx ring test to verify that the ring is working.
3562 * Used by cik_cp_gfx_resume();
3563 * Returns 0 on success, error on failure.
3564 */
3565int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3566{
3567 uint32_t scratch;
3568 uint32_t tmp = 0;
3569 unsigned i;
3570 int r;
3571
3572 r = radeon_scratch_get(rdev, &scratch);
3573 if (r) {
3574 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3575 return r;
3576 }
3577 WREG32(scratch, 0xCAFEDEAD);
3578 r = radeon_ring_lock(rdev, ring, 3);
3579 if (r) {
3580 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3581 radeon_scratch_free(rdev, scratch);
3582 return r;
3583 }
3584 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3585 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3586 radeon_ring_write(ring, 0xDEADBEEF);
3587 radeon_ring_unlock_commit(rdev, ring);
3588
3589 for (i = 0; i < rdev->usec_timeout; i++) {
3590 tmp = RREG32(scratch);
3591 if (tmp == 0xDEADBEEF)
3592 break;
3593 DRM_UDELAY(1);
3594 }
3595 if (i < rdev->usec_timeout) {
3596 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3597 } else {
3598 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3599 ring->idx, scratch, tmp);
3600 r = -EINVAL;
3601 }
3602 radeon_scratch_free(rdev, scratch);
3603 return r;
3604}
3605
3606/**
3607 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3608 *
3609 * @rdev: radeon_device pointer
3610 * @ridx: radeon ring index
3611 *
3612 * Emits an hdp flush on the cp.
3613 */
3614static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3615 int ridx)
3616{
3617 struct radeon_ring *ring = &rdev->ring[ridx];
3618 u32 ref_and_mask;
3619
3620 switch (ring->idx) {
3621 case CAYMAN_RING_TYPE_CP1_INDEX:
3622 case CAYMAN_RING_TYPE_CP2_INDEX:
3623 default:
3624 switch (ring->me) {
3625 case 0:
3626 ref_and_mask = CP2 << ring->pipe;
3627 break;
3628 case 1:
3629 ref_and_mask = CP6 << ring->pipe;
3630 break;
3631 default:
3632 return;
3633 }
3634 break;
3635 case RADEON_RING_TYPE_GFX_INDEX:
3636 ref_and_mask = CP0;
3637 break;
3638 }
3639
3640 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3641 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3642 WAIT_REG_MEM_FUNCTION(3) | /* == */
3643 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3644 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3645 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3646 radeon_ring_write(ring, ref_and_mask);
3647 radeon_ring_write(ring, ref_and_mask);
3648 radeon_ring_write(ring, 0x20); /* poll interval */
3649}
3650
3651/**
3652 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3653 *
3654 * @rdev: radeon_device pointer
3655 * @fence: radeon fence object
3656 *
3657 * Emits a fence sequnce number on the gfx ring and flushes
3658 * GPU caches.
3659 */
3660void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3661 struct radeon_fence *fence)
3662{
3663 struct radeon_ring *ring = &rdev->ring[fence->ring];
3664 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3665
3666 /* EVENT_WRITE_EOP - flush caches, send int */
3667 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3668 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3669 EOP_TC_ACTION_EN |
3670 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3671 EVENT_INDEX(5)));
3672 radeon_ring_write(ring, addr & 0xfffffffc);
3673 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3674 radeon_ring_write(ring, fence->seq);
3675 radeon_ring_write(ring, 0);
3676 /* HDP flush */
3677 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3678}
3679
3680/**
3681 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3682 *
3683 * @rdev: radeon_device pointer
3684 * @fence: radeon fence object
3685 *
3686 * Emits a fence sequnce number on the compute ring and flushes
3687 * GPU caches.
3688 */
3689void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3690 struct radeon_fence *fence)
3691{
3692 struct radeon_ring *ring = &rdev->ring[fence->ring];
3693 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3694
3695 /* RELEASE_MEM - flush caches, send int */
3696 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3697 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3698 EOP_TC_ACTION_EN |
3699 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3700 EVENT_INDEX(5)));
3701 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3702 radeon_ring_write(ring, addr & 0xfffffffc);
3703 radeon_ring_write(ring, upper_32_bits(addr));
3704 radeon_ring_write(ring, fence->seq);
3705 radeon_ring_write(ring, 0);
3706 /* HDP flush */
3707 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3708}
3709
3710bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3711 struct radeon_ring *ring,
3712 struct radeon_semaphore *semaphore,
3713 bool emit_wait)
3714{
3715 uint64_t addr = semaphore->gpu_addr;
3716 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3717
3718 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3719 radeon_ring_write(ring, addr & 0xffffffff);
3720 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3721
3722 return true;
3723}
3724
3725/**
3726 * cik_copy_cpdma - copy pages using the CP DMA engine
3727 *
3728 * @rdev: radeon_device pointer
3729 * @src_offset: src GPU address
3730 * @dst_offset: dst GPU address
3731 * @num_gpu_pages: number of GPU pages to xfer
3732 * @fence: radeon fence object
3733 *
3734 * Copy GPU paging using the CP DMA engine (CIK+).
3735 * Used by the radeon ttm implementation to move pages if
3736 * registered as the asic copy callback.
3737 */
3738int cik_copy_cpdma(struct radeon_device *rdev,
3739 uint64_t src_offset, uint64_t dst_offset,
3740 unsigned num_gpu_pages,
3741 struct radeon_fence **fence)
3742{
3743 struct radeon_semaphore *sem = NULL;
3744 int ring_index = rdev->asic->copy.blit_ring_index;
3745 struct radeon_ring *ring = &rdev->ring[ring_index];
3746 u32 size_in_bytes, cur_size_in_bytes, control;
3747 int i, num_loops;
3748 int r = 0;
3749
3750 r = radeon_semaphore_create(rdev, &sem);
3751 if (r) {
3752 DRM_ERROR("radeon: moving bo (%d).\n", r);
3753 return r;
3754 }
3755
3756 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3757 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3758 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3759 if (r) {
3760 DRM_ERROR("radeon: moving bo (%d).\n", r);
3761 radeon_semaphore_free(rdev, &sem, NULL);
3762 return r;
3763 }
3764
3765 radeon_semaphore_sync_to(sem, *fence);
3766 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3767
3768 for (i = 0; i < num_loops; i++) {
3769 cur_size_in_bytes = size_in_bytes;
3770 if (cur_size_in_bytes > 0x1fffff)
3771 cur_size_in_bytes = 0x1fffff;
3772 size_in_bytes -= cur_size_in_bytes;
3773 control = 0;
3774 if (size_in_bytes == 0)
3775 control |= PACKET3_DMA_DATA_CP_SYNC;
3776 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3777 radeon_ring_write(ring, control);
3778 radeon_ring_write(ring, lower_32_bits(src_offset));
3779 radeon_ring_write(ring, upper_32_bits(src_offset));
3780 radeon_ring_write(ring, lower_32_bits(dst_offset));
3781 radeon_ring_write(ring, upper_32_bits(dst_offset));
3782 radeon_ring_write(ring, cur_size_in_bytes);
3783 src_offset += cur_size_in_bytes;
3784 dst_offset += cur_size_in_bytes;
3785 }
3786
3787 r = radeon_fence_emit(rdev, fence, ring->idx);
3788 if (r) {
3789 radeon_ring_unlock_undo(rdev, ring);
3790 radeon_semaphore_free(rdev, &sem, NULL);
3791 return r;
3792 }
3793
3794 radeon_ring_unlock_commit(rdev, ring);
3795 radeon_semaphore_free(rdev, &sem, *fence);
3796
3797 return r;
3798}
3799
3800/*
3801 * IB stuff
3802 */
3803/**
3804 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3805 *
3806 * @rdev: radeon_device pointer
3807 * @ib: radeon indirect buffer object
3808 *
3809 * Emits an DE (drawing engine) or CE (constant engine) IB
3810 * on the gfx ring. IBs are usually generated by userspace
3811 * acceleration drivers and submitted to the kernel for
3812 * sheduling on the ring. This function schedules the IB
3813 * on the gfx ring for execution by the GPU.
3814 */
3815void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3816{
3817 struct radeon_ring *ring = &rdev->ring[ib->ring];
3818 u32 header, control = INDIRECT_BUFFER_VALID;
3819
3820 if (ib->is_const_ib) {
3821 /* set switch buffer packet before const IB */
3822 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3823 radeon_ring_write(ring, 0);
3824
3825 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3826 } else {
3827 u32 next_rptr;
3828 if (ring->rptr_save_reg) {
3829 next_rptr = ring->wptr + 3 + 4;
3830 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3831 radeon_ring_write(ring, ((ring->rptr_save_reg -
3832 PACKET3_SET_UCONFIG_REG_START) >> 2));
3833 radeon_ring_write(ring, next_rptr);
3834 } else if (rdev->wb.enabled) {
3835 next_rptr = ring->wptr + 5 + 4;
3836 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3837 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3838 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3839 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3840 radeon_ring_write(ring, next_rptr);
3841 }
3842
3843 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3844 }
3845
3846 control |= ib->length_dw |
3847 (ib->vm ? (ib->vm->id << 24) : 0);
3848
3849 radeon_ring_write(ring, header);
3850 radeon_ring_write(ring,
3851#ifdef __BIG_ENDIAN
3852 (2 << 0) |
3853#endif
3854 (ib->gpu_addr & 0xFFFFFFFC));
3855 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3856 radeon_ring_write(ring, control);
3857}
3858
3859/**
3860 * cik_ib_test - basic gfx ring IB test
3861 *
3862 * @rdev: radeon_device pointer
3863 * @ring: radeon_ring structure holding ring information
3864 *
3865 * Allocate an IB and execute it on the gfx ring (CIK).
3866 * Provides a basic gfx ring test to verify that IBs are working.
3867 * Returns 0 on success, error on failure.
3868 */
3869int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3870{
3871 struct radeon_ib ib;
3872 uint32_t scratch;
3873 uint32_t tmp = 0;
3874 unsigned i;
3875 int r;
3876
3877 r = radeon_scratch_get(rdev, &scratch);
3878 if (r) {
3879 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3880 return r;
3881 }
3882 WREG32(scratch, 0xCAFEDEAD);
3883 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3884 if (r) {
3885 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3886 radeon_scratch_free(rdev, scratch);
3887 return r;
3888 }
3889 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3890 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3891 ib.ptr[2] = 0xDEADBEEF;
3892 ib.length_dw = 3;
3893 r = radeon_ib_schedule(rdev, &ib, NULL);
3894 if (r) {
3895 radeon_scratch_free(rdev, scratch);
3896 radeon_ib_free(rdev, &ib);
3897 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3898 return r;
3899 }
3900 r = radeon_fence_wait(ib.fence, false);
3901 if (r) {
3902 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3903 radeon_scratch_free(rdev, scratch);
3904 radeon_ib_free(rdev, &ib);
3905 return r;
3906 }
3907 for (i = 0; i < rdev->usec_timeout; i++) {
3908 tmp = RREG32(scratch);
3909 if (tmp == 0xDEADBEEF)
3910 break;
3911 DRM_UDELAY(1);
3912 }
3913 if (i < rdev->usec_timeout) {
3914 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3915 } else {
3916 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3917 scratch, tmp);
3918 r = -EINVAL;
3919 }
3920 radeon_scratch_free(rdev, scratch);
3921 radeon_ib_free(rdev, &ib);
3922 return r;
3923}
3924
3925/*
3926 * CP.
3927 * On CIK, gfx and compute now have independant command processors.
3928 *
3929 * GFX
3930 * Gfx consists of a single ring and can process both gfx jobs and
3931 * compute jobs. The gfx CP consists of three microengines (ME):
3932 * PFP - Pre-Fetch Parser
3933 * ME - Micro Engine
3934 * CE - Constant Engine
3935 * The PFP and ME make up what is considered the Drawing Engine (DE).
3936 * The CE is an asynchronous engine used for updating buffer desciptors
3937 * used by the DE so that they can be loaded into cache in parallel
3938 * while the DE is processing state update packets.
3939 *
3940 * Compute
3941 * The compute CP consists of two microengines (ME):
3942 * MEC1 - Compute MicroEngine 1
3943 * MEC2 - Compute MicroEngine 2
3944 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3945 * The queues are exposed to userspace and are programmed directly
3946 * by the compute runtime.
3947 */
3948/**
3949 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3950 *
3951 * @rdev: radeon_device pointer
3952 * @enable: enable or disable the MEs
3953 *
3954 * Halts or unhalts the gfx MEs.
3955 */
3956static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3957{
3958 if (enable)
3959 WREG32(CP_ME_CNTL, 0);
3960 else {
3961 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3962 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3963 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3964 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3965 }
3966 udelay(50);
3967}
3968
3969/**
3970 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3971 *
3972 * @rdev: radeon_device pointer
3973 *
3974 * Loads the gfx PFP, ME, and CE ucode.
3975 * Returns 0 for success, -EINVAL if the ucode is not available.
3976 */
3977static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3978{
3979 const __be32 *fw_data;
3980 int i;
3981
3982 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3983 return -EINVAL;
3984
3985 cik_cp_gfx_enable(rdev, false);
3986
3987 /* PFP */
3988 fw_data = (const __be32 *)rdev->pfp_fw->data;
3989 WREG32(CP_PFP_UCODE_ADDR, 0);
3990 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3991 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3992 WREG32(CP_PFP_UCODE_ADDR, 0);
3993
3994 /* CE */
3995 fw_data = (const __be32 *)rdev->ce_fw->data;
3996 WREG32(CP_CE_UCODE_ADDR, 0);
3997 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3998 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3999 WREG32(CP_CE_UCODE_ADDR, 0);
4000
4001 /* ME */
4002 fw_data = (const __be32 *)rdev->me_fw->data;
4003 WREG32(CP_ME_RAM_WADDR, 0);
4004 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4005 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4006 WREG32(CP_ME_RAM_WADDR, 0);
4007
4008 WREG32(CP_PFP_UCODE_ADDR, 0);
4009 WREG32(CP_CE_UCODE_ADDR, 0);
4010 WREG32(CP_ME_RAM_WADDR, 0);
4011 WREG32(CP_ME_RAM_RADDR, 0);
4012 return 0;
4013}
4014
4015/**
4016 * cik_cp_gfx_start - start the gfx ring
4017 *
4018 * @rdev: radeon_device pointer
4019 *
4020 * Enables the ring and loads the clear state context and other
4021 * packets required to init the ring.
4022 * Returns 0 for success, error for failure.
4023 */
4024static int cik_cp_gfx_start(struct radeon_device *rdev)
4025{
4026 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4027 int r, i;
4028
4029 /* init the CP */
4030 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4031 WREG32(CP_ENDIAN_SWAP, 0);
4032 WREG32(CP_DEVICE_ID, 1);
4033
4034 cik_cp_gfx_enable(rdev, true);
4035
4036 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4037 if (r) {
4038 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4039 return r;
4040 }
4041
4042 /* init the CE partitions. CE only used for gfx on CIK */
4043 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4044 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4045 radeon_ring_write(ring, 0xc000);
4046 radeon_ring_write(ring, 0xc000);
4047
4048 /* setup clear context state */
4049 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4050 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4051
4052 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4053 radeon_ring_write(ring, 0x80000000);
4054 radeon_ring_write(ring, 0x80000000);
4055
4056 for (i = 0; i < cik_default_size; i++)
4057 radeon_ring_write(ring, cik_default_state[i]);
4058
4059 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4060 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4061
4062 /* set clear context state */
4063 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4064 radeon_ring_write(ring, 0);
4065
4066 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4067 radeon_ring_write(ring, 0x00000316);
4068 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4069 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4070
4071 radeon_ring_unlock_commit(rdev, ring);
4072
4073 return 0;
4074}
4075
4076/**
4077 * cik_cp_gfx_fini - stop the gfx ring
4078 *
4079 * @rdev: radeon_device pointer
4080 *
4081 * Stop the gfx ring and tear down the driver ring
4082 * info.
4083 */
4084static void cik_cp_gfx_fini(struct radeon_device *rdev)
4085{
4086 cik_cp_gfx_enable(rdev, false);
4087 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4088}
4089
4090/**
4091 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4092 *
4093 * @rdev: radeon_device pointer
4094 *
4095 * Program the location and size of the gfx ring buffer
4096 * and test it to make sure it's working.
4097 * Returns 0 for success, error for failure.
4098 */
4099static int cik_cp_gfx_resume(struct radeon_device *rdev)
4100{
4101 struct radeon_ring *ring;
4102 u32 tmp;
4103 u32 rb_bufsz;
4104 u64 rb_addr;
4105 int r;
4106
4107 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4108 if (rdev->family != CHIP_HAWAII)
4109 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4110
4111 /* Set the write pointer delay */
4112 WREG32(CP_RB_WPTR_DELAY, 0);
4113
4114 /* set the RB to use vmid 0 */
4115 WREG32(CP_RB_VMID, 0);
4116
4117 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4118
4119 /* ring 0 - compute and gfx */
4120 /* Set ring buffer size */
4121 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4122 rb_bufsz = order_base_2(ring->ring_size / 8);
4123 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4124#ifdef __BIG_ENDIAN
4125 tmp |= BUF_SWAP_32BIT;
4126#endif
4127 WREG32(CP_RB0_CNTL, tmp);
4128
4129 /* Initialize the ring buffer's read and write pointers */
4130 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4131 ring->wptr = 0;
4132 WREG32(CP_RB0_WPTR, ring->wptr);
4133
4134 /* set the wb address wether it's enabled or not */
4135 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4136 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4137
4138 /* scratch register shadowing is no longer supported */
4139 WREG32(SCRATCH_UMSK, 0);
4140
4141 if (!rdev->wb.enabled)
4142 tmp |= RB_NO_UPDATE;
4143
4144 mdelay(1);
4145 WREG32(CP_RB0_CNTL, tmp);
4146
4147 rb_addr = ring->gpu_addr >> 8;
4148 WREG32(CP_RB0_BASE, rb_addr);
4149 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4150
4151 /* start the ring */
4152 cik_cp_gfx_start(rdev);
4153 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4154 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4155 if (r) {
4156 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4157 return r;
4158 }
4159
4160 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4161 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4162
4163 return 0;
4164}
4165
4166u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4167 struct radeon_ring *ring)
4168{
4169 u32 rptr;
4170
4171 if (rdev->wb.enabled)
4172 rptr = rdev->wb.wb[ring->rptr_offs/4];
4173 else
4174 rptr = RREG32(CP_RB0_RPTR);
4175
4176 return rptr;
4177}
4178
4179u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4180 struct radeon_ring *ring)
4181{
4182 u32 wptr;
4183
4184 wptr = RREG32(CP_RB0_WPTR);
4185
4186 return wptr;
4187}
4188
4189void cik_gfx_set_wptr(struct radeon_device *rdev,
4190 struct radeon_ring *ring)
4191{
4192 WREG32(CP_RB0_WPTR, ring->wptr);
4193 (void)RREG32(CP_RB0_WPTR);
4194}
4195
4196u32 cik_compute_get_rptr(struct radeon_device *rdev,
4197 struct radeon_ring *ring)
4198{
4199 u32 rptr;
4200
4201 if (rdev->wb.enabled) {
4202 rptr = rdev->wb.wb[ring->rptr_offs/4];
4203 } else {
4204 mutex_lock(&rdev->srbm_mutex);
4205 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4206 rptr = RREG32(CP_HQD_PQ_RPTR);
4207 cik_srbm_select(rdev, 0, 0, 0, 0);
4208 mutex_unlock(&rdev->srbm_mutex);
4209 }
4210
4211 return rptr;
4212}
4213
4214u32 cik_compute_get_wptr(struct radeon_device *rdev,
4215 struct radeon_ring *ring)
4216{
4217 u32 wptr;
4218
4219 if (rdev->wb.enabled) {
4220 /* XXX check if swapping is necessary on BE */
4221 wptr = rdev->wb.wb[ring->wptr_offs/4];
4222 } else {
4223 mutex_lock(&rdev->srbm_mutex);
4224 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4225 wptr = RREG32(CP_HQD_PQ_WPTR);
4226 cik_srbm_select(rdev, 0, 0, 0, 0);
4227 mutex_unlock(&rdev->srbm_mutex);
4228 }
4229
4230 return wptr;
4231}
4232
4233void cik_compute_set_wptr(struct radeon_device *rdev,
4234 struct radeon_ring *ring)
4235{
4236 /* XXX check if swapping is necessary on BE */
4237 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4238 WDOORBELL32(ring->doorbell_index, ring->wptr);
4239}
4240
4241/**
4242 * cik_cp_compute_enable - enable/disable the compute CP MEs
4243 *
4244 * @rdev: radeon_device pointer
4245 * @enable: enable or disable the MEs
4246 *
4247 * Halts or unhalts the compute MEs.
4248 */
4249static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4250{
4251 if (enable)
4252 WREG32(CP_MEC_CNTL, 0);
4253 else {
4254 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4255 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4256 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4257 }
4258 udelay(50);
4259}
4260
4261/**
4262 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4263 *
4264 * @rdev: radeon_device pointer
4265 *
4266 * Loads the compute MEC1&2 ucode.
4267 * Returns 0 for success, -EINVAL if the ucode is not available.
4268 */
4269static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4270{
4271 const __be32 *fw_data;
4272 int i;
4273
4274 if (!rdev->mec_fw)
4275 return -EINVAL;
4276
4277 cik_cp_compute_enable(rdev, false);
4278
4279 /* MEC1 */
4280 fw_data = (const __be32 *)rdev->mec_fw->data;
4281 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4283 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4284 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4285
4286 if (rdev->family == CHIP_KAVERI) {
4287 /* MEC2 */
4288 fw_data = (const __be32 *)rdev->mec_fw->data;
4289 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4290 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4291 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4292 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4293 }
4294
4295 return 0;
4296}
4297
4298/**
4299 * cik_cp_compute_start - start the compute queues
4300 *
4301 * @rdev: radeon_device pointer
4302 *
4303 * Enable the compute queues.
4304 * Returns 0 for success, error for failure.
4305 */
4306static int cik_cp_compute_start(struct radeon_device *rdev)
4307{
4308 cik_cp_compute_enable(rdev, true);
4309
4310 return 0;
4311}
4312
4313/**
4314 * cik_cp_compute_fini - stop the compute queues
4315 *
4316 * @rdev: radeon_device pointer
4317 *
4318 * Stop the compute queues and tear down the driver queue
4319 * info.
4320 */
4321static void cik_cp_compute_fini(struct radeon_device *rdev)
4322{
4323 int i, idx, r;
4324
4325 cik_cp_compute_enable(rdev, false);
4326
4327 for (i = 0; i < 2; i++) {
4328 if (i == 0)
4329 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4330 else
4331 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4332
4333 if (rdev->ring[idx].mqd_obj) {
4334 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4335 if (unlikely(r != 0))
4336 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4337
4338 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4339 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4340
4341 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4342 rdev->ring[idx].mqd_obj = NULL;
4343 }
4344 }
4345}
4346
4347static void cik_mec_fini(struct radeon_device *rdev)
4348{
4349 int r;
4350
4351 if (rdev->mec.hpd_eop_obj) {
4352 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4353 if (unlikely(r != 0))
4354 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4355 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4356 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4357
4358 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4359 rdev->mec.hpd_eop_obj = NULL;
4360 }
4361}
4362
4363#define MEC_HPD_SIZE 2048
4364
4365static int cik_mec_init(struct radeon_device *rdev)
4366{
4367 int r;
4368 u32 *hpd;
4369
4370 /*
4371 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4372 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4373 */
4374 if (rdev->family == CHIP_KAVERI)
4375 rdev->mec.num_mec = 2;
4376 else
4377 rdev->mec.num_mec = 1;
4378 rdev->mec.num_pipe = 4;
4379 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4380
4381 if (rdev->mec.hpd_eop_obj == NULL) {
4382 r = radeon_bo_create(rdev,
4383 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4384 PAGE_SIZE, true,
4385 RADEON_GEM_DOMAIN_GTT, NULL,
4386 &rdev->mec.hpd_eop_obj);
4387 if (r) {
4388 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4389 return r;
4390 }
4391 }
4392
4393 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4394 if (unlikely(r != 0)) {
4395 cik_mec_fini(rdev);
4396 return r;
4397 }
4398 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4399 &rdev->mec.hpd_eop_gpu_addr);
4400 if (r) {
4401 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4402 cik_mec_fini(rdev);
4403 return r;
4404 }
4405 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4406 if (r) {
4407 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4408 cik_mec_fini(rdev);
4409 return r;
4410 }
4411
4412 /* clear memory. Not sure if this is required or not */
4413 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4414
4415 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4416 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4417
4418 return 0;
4419}
4420
4421struct hqd_registers
4422{
4423 u32 cp_mqd_base_addr;
4424 u32 cp_mqd_base_addr_hi;
4425 u32 cp_hqd_active;
4426 u32 cp_hqd_vmid;
4427 u32 cp_hqd_persistent_state;
4428 u32 cp_hqd_pipe_priority;
4429 u32 cp_hqd_queue_priority;
4430 u32 cp_hqd_quantum;
4431 u32 cp_hqd_pq_base;
4432 u32 cp_hqd_pq_base_hi;
4433 u32 cp_hqd_pq_rptr;
4434 u32 cp_hqd_pq_rptr_report_addr;
4435 u32 cp_hqd_pq_rptr_report_addr_hi;
4436 u32 cp_hqd_pq_wptr_poll_addr;
4437 u32 cp_hqd_pq_wptr_poll_addr_hi;
4438 u32 cp_hqd_pq_doorbell_control;
4439 u32 cp_hqd_pq_wptr;
4440 u32 cp_hqd_pq_control;
4441 u32 cp_hqd_ib_base_addr;
4442 u32 cp_hqd_ib_base_addr_hi;
4443 u32 cp_hqd_ib_rptr;
4444 u32 cp_hqd_ib_control;
4445 u32 cp_hqd_iq_timer;
4446 u32 cp_hqd_iq_rptr;
4447 u32 cp_hqd_dequeue_request;
4448 u32 cp_hqd_dma_offload;
4449 u32 cp_hqd_sema_cmd;
4450 u32 cp_hqd_msg_type;
4451 u32 cp_hqd_atomic0_preop_lo;
4452 u32 cp_hqd_atomic0_preop_hi;
4453 u32 cp_hqd_atomic1_preop_lo;
4454 u32 cp_hqd_atomic1_preop_hi;
4455 u32 cp_hqd_hq_scheduler0;
4456 u32 cp_hqd_hq_scheduler1;
4457 u32 cp_mqd_control;
4458};
4459
4460struct bonaire_mqd
4461{
4462 u32 header;
4463 u32 dispatch_initiator;
4464 u32 dimensions[3];
4465 u32 start_idx[3];
4466 u32 num_threads[3];
4467 u32 pipeline_stat_enable;
4468 u32 perf_counter_enable;
4469 u32 pgm[2];
4470 u32 tba[2];
4471 u32 tma[2];
4472 u32 pgm_rsrc[2];
4473 u32 vmid;
4474 u32 resource_limits;
4475 u32 static_thread_mgmt01[2];
4476 u32 tmp_ring_size;
4477 u32 static_thread_mgmt23[2];
4478 u32 restart[3];
4479 u32 thread_trace_enable;
4480 u32 reserved1;
4481 u32 user_data[16];
4482 u32 vgtcs_invoke_count[2];
4483 struct hqd_registers queue_state;
4484 u32 dequeue_cntr;
4485 u32 interrupt_queue[64];
4486};
4487
4488/**
4489 * cik_cp_compute_resume - setup the compute queue registers
4490 *
4491 * @rdev: radeon_device pointer
4492 *
4493 * Program the compute queues and test them to make sure they
4494 * are working.
4495 * Returns 0 for success, error for failure.
4496 */
4497static int cik_cp_compute_resume(struct radeon_device *rdev)
4498{
4499 int r, i, idx;
4500 u32 tmp;
4501 bool use_doorbell = true;
4502 u64 hqd_gpu_addr;
4503 u64 mqd_gpu_addr;
4504 u64 eop_gpu_addr;
4505 u64 wb_gpu_addr;
4506 u32 *buf;
4507 struct bonaire_mqd *mqd;
4508
4509 r = cik_cp_compute_start(rdev);
4510 if (r)
4511 return r;
4512
4513 /* fix up chicken bits */
4514 tmp = RREG32(CP_CPF_DEBUG);
4515 tmp |= (1 << 23);
4516 WREG32(CP_CPF_DEBUG, tmp);
4517
4518 /* init the pipes */
4519 mutex_lock(&rdev->srbm_mutex);
4520 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4521 int me = (i < 4) ? 1 : 2;
4522 int pipe = (i < 4) ? i : (i - 4);
4523
4524 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4525
4526 cik_srbm_select(rdev, me, pipe, 0, 0);
4527
4528 /* write the EOP addr */
4529 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4530 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4531
4532 /* set the VMID assigned */
4533 WREG32(CP_HPD_EOP_VMID, 0);
4534
4535 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4536 tmp = RREG32(CP_HPD_EOP_CONTROL);
4537 tmp &= ~EOP_SIZE_MASK;
4538 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4539 WREG32(CP_HPD_EOP_CONTROL, tmp);
4540 }
4541 cik_srbm_select(rdev, 0, 0, 0, 0);
4542 mutex_unlock(&rdev->srbm_mutex);
4543
4544 /* init the queues. Just two for now. */
4545 for (i = 0; i < 2; i++) {
4546 if (i == 0)
4547 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4548 else
4549 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4550
4551 if (rdev->ring[idx].mqd_obj == NULL) {
4552 r = radeon_bo_create(rdev,
4553 sizeof(struct bonaire_mqd),
4554 PAGE_SIZE, true,
4555 RADEON_GEM_DOMAIN_GTT, NULL,
4556 &rdev->ring[idx].mqd_obj);
4557 if (r) {
4558 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4559 return r;
4560 }
4561 }
4562
4563 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4564 if (unlikely(r != 0)) {
4565 cik_cp_compute_fini(rdev);
4566 return r;
4567 }
4568 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4569 &mqd_gpu_addr);
4570 if (r) {
4571 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4572 cik_cp_compute_fini(rdev);
4573 return r;
4574 }
4575 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4576 if (r) {
4577 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4578 cik_cp_compute_fini(rdev);
4579 return r;
4580 }
4581
4582 /* init the mqd struct */
4583 memset(buf, 0, sizeof(struct bonaire_mqd));
4584
4585 mqd = (struct bonaire_mqd *)buf;
4586 mqd->header = 0xC0310800;
4587 mqd->static_thread_mgmt01[0] = 0xffffffff;
4588 mqd->static_thread_mgmt01[1] = 0xffffffff;
4589 mqd->static_thread_mgmt23[0] = 0xffffffff;
4590 mqd->static_thread_mgmt23[1] = 0xffffffff;
4591
4592 mutex_lock(&rdev->srbm_mutex);
4593 cik_srbm_select(rdev, rdev->ring[idx].me,
4594 rdev->ring[idx].pipe,
4595 rdev->ring[idx].queue, 0);
4596
4597 /* disable wptr polling */
4598 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4599 tmp &= ~WPTR_POLL_EN;
4600 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4601
4602 /* enable doorbell? */
4603 mqd->queue_state.cp_hqd_pq_doorbell_control =
4604 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4605 if (use_doorbell)
4606 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4607 else
4608 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4609 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4610 mqd->queue_state.cp_hqd_pq_doorbell_control);
4611
4612 /* disable the queue if it's active */
4613 mqd->queue_state.cp_hqd_dequeue_request = 0;
4614 mqd->queue_state.cp_hqd_pq_rptr = 0;
4615 mqd->queue_state.cp_hqd_pq_wptr= 0;
4616 if (RREG32(CP_HQD_ACTIVE) & 1) {
4617 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4618 for (i = 0; i < rdev->usec_timeout; i++) {
4619 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4620 break;
4621 udelay(1);
4622 }
4623 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4624 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4625 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4626 }
4627
4628 /* set the pointer to the MQD */
4629 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4630 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4631 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4632 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4633 /* set MQD vmid to 0 */
4634 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4635 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4636 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4637
4638 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4639 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4640 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4641 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4642 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4643 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4644
4645 /* set up the HQD, this is similar to CP_RB0_CNTL */
4646 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4647 mqd->queue_state.cp_hqd_pq_control &=
4648 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4649
4650 mqd->queue_state.cp_hqd_pq_control |=
4651 order_base_2(rdev->ring[idx].ring_size / 8);
4652 mqd->queue_state.cp_hqd_pq_control |=
4653 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4654#ifdef __BIG_ENDIAN
4655 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4656#endif
4657 mqd->queue_state.cp_hqd_pq_control &=
4658 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4659 mqd->queue_state.cp_hqd_pq_control |=
4660 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4661 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4662
4663 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4664 if (i == 0)
4665 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4666 else
4667 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4668 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4669 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4670 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4671 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4672 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4673
4674 /* set the wb address wether it's enabled or not */
4675 if (i == 0)
4676 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4677 else
4678 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4679 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4680 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4681 upper_32_bits(wb_gpu_addr) & 0xffff;
4682 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4683 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4684 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4685 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4686
4687 /* enable the doorbell if requested */
4688 if (use_doorbell) {
4689 mqd->queue_state.cp_hqd_pq_doorbell_control =
4690 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4691 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4692 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4693 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4694 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4695 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4696 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4697
4698 } else {
4699 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4700 }
4701 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4702 mqd->queue_state.cp_hqd_pq_doorbell_control);
4703
4704 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4705 rdev->ring[idx].wptr = 0;
4706 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4707 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4708 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4709
4710 /* set the vmid for the queue */
4711 mqd->queue_state.cp_hqd_vmid = 0;
4712 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4713
4714 /* activate the queue */
4715 mqd->queue_state.cp_hqd_active = 1;
4716 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4717
4718 cik_srbm_select(rdev, 0, 0, 0, 0);
4719 mutex_unlock(&rdev->srbm_mutex);
4720
4721 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4722 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4723
4724 rdev->ring[idx].ready = true;
4725 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4726 if (r)
4727 rdev->ring[idx].ready = false;
4728 }
4729
4730 return 0;
4731}
4732
4733static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4734{
4735 cik_cp_gfx_enable(rdev, enable);
4736 cik_cp_compute_enable(rdev, enable);
4737}
4738
4739static int cik_cp_load_microcode(struct radeon_device *rdev)
4740{
4741 int r;
4742
4743 r = cik_cp_gfx_load_microcode(rdev);
4744 if (r)
4745 return r;
4746 r = cik_cp_compute_load_microcode(rdev);
4747 if (r)
4748 return r;
4749
4750 return 0;
4751}
4752
4753static void cik_cp_fini(struct radeon_device *rdev)
4754{
4755 cik_cp_gfx_fini(rdev);
4756 cik_cp_compute_fini(rdev);
4757}
4758
4759static int cik_cp_resume(struct radeon_device *rdev)
4760{
4761 int r;
4762
4763 cik_enable_gui_idle_interrupt(rdev, false);
4764
4765 r = cik_cp_load_microcode(rdev);
4766 if (r)
4767 return r;
4768
4769 r = cik_cp_gfx_resume(rdev);
4770 if (r)
4771 return r;
4772 r = cik_cp_compute_resume(rdev);
4773 if (r)
4774 return r;
4775
4776 cik_enable_gui_idle_interrupt(rdev, true);
4777
4778 return 0;
4779}
4780
4781static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4782{
4783 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4784 RREG32(GRBM_STATUS));
4785 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4786 RREG32(GRBM_STATUS2));
4787 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4788 RREG32(GRBM_STATUS_SE0));
4789 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4790 RREG32(GRBM_STATUS_SE1));
4791 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4792 RREG32(GRBM_STATUS_SE2));
4793 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4794 RREG32(GRBM_STATUS_SE3));
4795 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4796 RREG32(SRBM_STATUS));
4797 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4798 RREG32(SRBM_STATUS2));
4799 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4800 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4801 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4802 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4803 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4804 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4805 RREG32(CP_STALLED_STAT1));
4806 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4807 RREG32(CP_STALLED_STAT2));
4808 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4809 RREG32(CP_STALLED_STAT3));
4810 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4811 RREG32(CP_CPF_BUSY_STAT));
4812 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4813 RREG32(CP_CPF_STALLED_STAT1));
4814 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4815 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4816 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4817 RREG32(CP_CPC_STALLED_STAT1));
4818 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4819}
4820
4821/**
4822 * cik_gpu_check_soft_reset - check which blocks are busy
4823 *
4824 * @rdev: radeon_device pointer
4825 *
4826 * Check which blocks are busy and return the relevant reset
4827 * mask to be used by cik_gpu_soft_reset().
4828 * Returns a mask of the blocks to be reset.
4829 */
4830u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4831{
4832 u32 reset_mask = 0;
4833 u32 tmp;
4834
4835 /* GRBM_STATUS */
4836 tmp = RREG32(GRBM_STATUS);
4837 if (tmp & (PA_BUSY | SC_BUSY |
4838 BCI_BUSY | SX_BUSY |
4839 TA_BUSY | VGT_BUSY |
4840 DB_BUSY | CB_BUSY |
4841 GDS_BUSY | SPI_BUSY |
4842 IA_BUSY | IA_BUSY_NO_DMA))
4843 reset_mask |= RADEON_RESET_GFX;
4844
4845 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4846 reset_mask |= RADEON_RESET_CP;
4847
4848 /* GRBM_STATUS2 */
4849 tmp = RREG32(GRBM_STATUS2);
4850 if (tmp & RLC_BUSY)
4851 reset_mask |= RADEON_RESET_RLC;
4852
4853 /* SDMA0_STATUS_REG */
4854 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4855 if (!(tmp & SDMA_IDLE))
4856 reset_mask |= RADEON_RESET_DMA;
4857
4858 /* SDMA1_STATUS_REG */
4859 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4860 if (!(tmp & SDMA_IDLE))
4861 reset_mask |= RADEON_RESET_DMA1;
4862
4863 /* SRBM_STATUS2 */
4864 tmp = RREG32(SRBM_STATUS2);
4865 if (tmp & SDMA_BUSY)
4866 reset_mask |= RADEON_RESET_DMA;
4867
4868 if (tmp & SDMA1_BUSY)
4869 reset_mask |= RADEON_RESET_DMA1;
4870
4871 /* SRBM_STATUS */
4872 tmp = RREG32(SRBM_STATUS);
4873
4874 if (tmp & IH_BUSY)
4875 reset_mask |= RADEON_RESET_IH;
4876
4877 if (tmp & SEM_BUSY)
4878 reset_mask |= RADEON_RESET_SEM;
4879
4880 if (tmp & GRBM_RQ_PENDING)
4881 reset_mask |= RADEON_RESET_GRBM;
4882
4883 if (tmp & VMC_BUSY)
4884 reset_mask |= RADEON_RESET_VMC;
4885
4886 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4887 MCC_BUSY | MCD_BUSY))
4888 reset_mask |= RADEON_RESET_MC;
4889
4890 if (evergreen_is_display_hung(rdev))
4891 reset_mask |= RADEON_RESET_DISPLAY;
4892
4893 /* Skip MC reset as it's mostly likely not hung, just busy */
4894 if (reset_mask & RADEON_RESET_MC) {
4895 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4896 reset_mask &= ~RADEON_RESET_MC;
4897 }
4898
4899 return reset_mask;
4900}
4901
4902/**
4903 * cik_gpu_soft_reset - soft reset GPU
4904 *
4905 * @rdev: radeon_device pointer
4906 * @reset_mask: mask of which blocks to reset
4907 *
4908 * Soft reset the blocks specified in @reset_mask.
4909 */
4910static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4911{
4912 struct evergreen_mc_save save;
4913 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4914 u32 tmp;
4915
4916 if (reset_mask == 0)
4917 return;
4918
4919 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4920
4921 cik_print_gpu_status_regs(rdev);
4922 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4923 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4924 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4925 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4926
4927 /* disable CG/PG */
4928 cik_fini_pg(rdev);
4929 cik_fini_cg(rdev);
4930
4931 /* stop the rlc */
4932 cik_rlc_stop(rdev);
4933
4934 /* Disable GFX parsing/prefetching */
4935 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4936
4937 /* Disable MEC parsing/prefetching */
4938 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4939
4940 if (reset_mask & RADEON_RESET_DMA) {
4941 /* sdma0 */
4942 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4943 tmp |= SDMA_HALT;
4944 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4945 }
4946 if (reset_mask & RADEON_RESET_DMA1) {
4947 /* sdma1 */
4948 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4949 tmp |= SDMA_HALT;
4950 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4951 }
4952
4953 evergreen_mc_stop(rdev, &save);
4954 if (evergreen_mc_wait_for_idle(rdev)) {
4955 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4956 }
4957
4958 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4959 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4960
4961 if (reset_mask & RADEON_RESET_CP) {
4962 grbm_soft_reset |= SOFT_RESET_CP;
4963
4964 srbm_soft_reset |= SOFT_RESET_GRBM;
4965 }
4966
4967 if (reset_mask & RADEON_RESET_DMA)
4968 srbm_soft_reset |= SOFT_RESET_SDMA;
4969
4970 if (reset_mask & RADEON_RESET_DMA1)
4971 srbm_soft_reset |= SOFT_RESET_SDMA1;
4972
4973 if (reset_mask & RADEON_RESET_DISPLAY)
4974 srbm_soft_reset |= SOFT_RESET_DC;
4975
4976 if (reset_mask & RADEON_RESET_RLC)
4977 grbm_soft_reset |= SOFT_RESET_RLC;
4978
4979 if (reset_mask & RADEON_RESET_SEM)
4980 srbm_soft_reset |= SOFT_RESET_SEM;
4981
4982 if (reset_mask & RADEON_RESET_IH)
4983 srbm_soft_reset |= SOFT_RESET_IH;
4984
4985 if (reset_mask & RADEON_RESET_GRBM)
4986 srbm_soft_reset |= SOFT_RESET_GRBM;
4987
4988 if (reset_mask & RADEON_RESET_VMC)
4989 srbm_soft_reset |= SOFT_RESET_VMC;
4990
4991 if (!(rdev->flags & RADEON_IS_IGP)) {
4992 if (reset_mask & RADEON_RESET_MC)
4993 srbm_soft_reset |= SOFT_RESET_MC;
4994 }
4995
4996 if (grbm_soft_reset) {
4997 tmp = RREG32(GRBM_SOFT_RESET);
4998 tmp |= grbm_soft_reset;
4999 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5000 WREG32(GRBM_SOFT_RESET, tmp);
5001 tmp = RREG32(GRBM_SOFT_RESET);
5002
5003 udelay(50);
5004
5005 tmp &= ~grbm_soft_reset;
5006 WREG32(GRBM_SOFT_RESET, tmp);
5007 tmp = RREG32(GRBM_SOFT_RESET);
5008 }
5009
5010 if (srbm_soft_reset) {
5011 tmp = RREG32(SRBM_SOFT_RESET);
5012 tmp |= srbm_soft_reset;
5013 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5014 WREG32(SRBM_SOFT_RESET, tmp);
5015 tmp = RREG32(SRBM_SOFT_RESET);
5016
5017 udelay(50);
5018
5019 tmp &= ~srbm_soft_reset;
5020 WREG32(SRBM_SOFT_RESET, tmp);
5021 tmp = RREG32(SRBM_SOFT_RESET);
5022 }
5023
5024 /* Wait a little for things to settle down */
5025 udelay(50);
5026
5027 evergreen_mc_resume(rdev, &save);
5028 udelay(50);
5029
5030 cik_print_gpu_status_regs(rdev);
5031}
5032
5033struct kv_reset_save_regs {
5034 u32 gmcon_reng_execute;
5035 u32 gmcon_misc;
5036 u32 gmcon_misc3;
5037};
5038
5039static void kv_save_regs_for_reset(struct radeon_device *rdev,
5040 struct kv_reset_save_regs *save)
5041{
5042 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5043 save->gmcon_misc = RREG32(GMCON_MISC);
5044 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5045
5046 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5047 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5048 STCTRL_STUTTER_EN));
5049}
5050
5051static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5052 struct kv_reset_save_regs *save)
5053{
5054 int i;
5055
5056 WREG32(GMCON_PGFSM_WRITE, 0);
5057 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5058
5059 for (i = 0; i < 5; i++)
5060 WREG32(GMCON_PGFSM_WRITE, 0);
5061
5062 WREG32(GMCON_PGFSM_WRITE, 0);
5063 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5064
5065 for (i = 0; i < 5; i++)
5066 WREG32(GMCON_PGFSM_WRITE, 0);
5067
5068 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5069 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5070
5071 for (i = 0; i < 5; i++)
5072 WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5075 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5076
5077 for (i = 0; i < 5; i++)
5078 WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5081 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5082
5083 for (i = 0; i < 5; i++)
5084 WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086 WREG32(GMCON_PGFSM_WRITE, 0);
5087 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5088
5089 for (i = 0; i < 5; i++)
5090 WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5093 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5094
5095 for (i = 0; i < 5; i++)
5096 WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5099 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5100
5101 for (i = 0; i < 5; i++)
5102 WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5105 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5106
5107 for (i = 0; i < 5; i++)
5108 WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5111 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5112
5113 for (i = 0; i < 5; i++)
5114 WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5117 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5118
5119 WREG32(GMCON_MISC3, save->gmcon_misc3);
5120 WREG32(GMCON_MISC, save->gmcon_misc);
5121 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5122}
5123
5124static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5125{
5126 struct evergreen_mc_save save;
5127 struct kv_reset_save_regs kv_save = { 0 };
5128 u32 tmp, i;
5129
5130 dev_info(rdev->dev, "GPU pci config reset\n");
5131
5132 /* disable dpm? */
5133
5134 /* disable cg/pg */
5135 cik_fini_pg(rdev);
5136 cik_fini_cg(rdev);
5137
5138 /* Disable GFX parsing/prefetching */
5139 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5140
5141 /* Disable MEC parsing/prefetching */
5142 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5143
5144 /* sdma0 */
5145 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5146 tmp |= SDMA_HALT;
5147 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5148 /* sdma1 */
5149 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5150 tmp |= SDMA_HALT;
5151 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5152 /* XXX other engines? */
5153
5154 /* halt the rlc, disable cp internal ints */
5155 cik_rlc_stop(rdev);
5156
5157 udelay(50);
5158
5159 /* disable mem access */
5160 evergreen_mc_stop(rdev, &save);
5161 if (evergreen_mc_wait_for_idle(rdev)) {
5162 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5163 }
5164
5165 if (rdev->flags & RADEON_IS_IGP)
5166 kv_save_regs_for_reset(rdev, &kv_save);
5167
5168 /* disable BM */
5169 pci_clear_master(rdev->pdev);
5170 /* reset */
5171 radeon_pci_config_reset(rdev);
5172
5173 udelay(100);
5174
5175 /* wait for asic to come out of reset */
5176 for (i = 0; i < rdev->usec_timeout; i++) {
5177 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5178 break;
5179 udelay(1);
5180 }
5181
5182 /* does asic init need to be run first??? */
5183 if (rdev->flags & RADEON_IS_IGP)
5184 kv_restore_regs_for_reset(rdev, &kv_save);
5185}
5186
5187/**
5188 * cik_asic_reset - soft reset GPU
5189 *
5190 * @rdev: radeon_device pointer
5191 *
5192 * Look up which blocks are hung and attempt
5193 * to reset them.
5194 * Returns 0 for success.
5195 */
5196int cik_asic_reset(struct radeon_device *rdev)
5197{
5198 u32 reset_mask;
5199
5200 reset_mask = cik_gpu_check_soft_reset(rdev);
5201
5202 if (reset_mask)
5203 r600_set_bios_scratch_engine_hung(rdev, true);
5204
5205 /* try soft reset */
5206 cik_gpu_soft_reset(rdev, reset_mask);
5207
5208 reset_mask = cik_gpu_check_soft_reset(rdev);
5209
5210 /* try pci config reset */
5211 if (reset_mask && radeon_hard_reset)
5212 cik_gpu_pci_config_reset(rdev);
5213
5214 reset_mask = cik_gpu_check_soft_reset(rdev);
5215
5216 if (!reset_mask)
5217 r600_set_bios_scratch_engine_hung(rdev, false);
5218
5219 return 0;
5220}
5221
5222/**
5223 * cik_gfx_is_lockup - check if the 3D engine is locked up
5224 *
5225 * @rdev: radeon_device pointer
5226 * @ring: radeon_ring structure holding ring information
5227 *
5228 * Check if the 3D engine is locked up (CIK).
5229 * Returns true if the engine is locked, false if not.
5230 */
5231bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5232{
5233 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5234
5235 if (!(reset_mask & (RADEON_RESET_GFX |
5236 RADEON_RESET_COMPUTE |
5237 RADEON_RESET_CP))) {
5238 radeon_ring_lockup_update(rdev, ring);
5239 return false;
5240 }
5241 return radeon_ring_test_lockup(rdev, ring);
5242}
5243
5244/* MC */
5245/**
5246 * cik_mc_program - program the GPU memory controller
5247 *
5248 * @rdev: radeon_device pointer
5249 *
5250 * Set the location of vram, gart, and AGP in the GPU's
5251 * physical address space (CIK).
5252 */
5253static void cik_mc_program(struct radeon_device *rdev)
5254{
5255 struct evergreen_mc_save save;
5256 u32 tmp;
5257 int i, j;
5258
5259 /* Initialize HDP */
5260 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5261 WREG32((0x2c14 + j), 0x00000000);
5262 WREG32((0x2c18 + j), 0x00000000);
5263 WREG32((0x2c1c + j), 0x00000000);
5264 WREG32((0x2c20 + j), 0x00000000);
5265 WREG32((0x2c24 + j), 0x00000000);
5266 }
5267 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5268
5269 evergreen_mc_stop(rdev, &save);
5270 if (radeon_mc_wait_for_idle(rdev)) {
5271 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5272 }
5273 /* Lockout access through VGA aperture*/
5274 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5275 /* Update configuration */
5276 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5277 rdev->mc.vram_start >> 12);
5278 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5279 rdev->mc.vram_end >> 12);
5280 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5281 rdev->vram_scratch.gpu_addr >> 12);
5282 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5283 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5284 WREG32(MC_VM_FB_LOCATION, tmp);
5285 /* XXX double check these! */
5286 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5287 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5288 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5289 WREG32(MC_VM_AGP_BASE, 0);
5290 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5291 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5292 if (radeon_mc_wait_for_idle(rdev)) {
5293 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5294 }
5295 evergreen_mc_resume(rdev, &save);
5296 /* we need to own VRAM, so turn off the VGA renderer here
5297 * to stop it overwriting our objects */
5298 rv515_vga_render_disable(rdev);
5299}
5300
5301/**
5302 * cik_mc_init - initialize the memory controller driver params
5303 *
5304 * @rdev: radeon_device pointer
5305 *
5306 * Look up the amount of vram, vram width, and decide how to place
5307 * vram and gart within the GPU's physical address space (CIK).
5308 * Returns 0 for success.
5309 */
5310static int cik_mc_init(struct radeon_device *rdev)
5311{
5312 u32 tmp;
5313 int chansize, numchan;
5314
5315 /* Get VRAM informations */
5316 rdev->mc.vram_is_ddr = true;
5317 tmp = RREG32(MC_ARB_RAMCFG);
5318 if (tmp & CHANSIZE_MASK) {
5319 chansize = 64;
5320 } else {
5321 chansize = 32;
5322 }
5323 tmp = RREG32(MC_SHARED_CHMAP);
5324 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5325 case 0:
5326 default:
5327 numchan = 1;
5328 break;
5329 case 1:
5330 numchan = 2;
5331 break;
5332 case 2:
5333 numchan = 4;
5334 break;
5335 case 3:
5336 numchan = 8;
5337 break;
5338 case 4:
5339 numchan = 3;
5340 break;
5341 case 5:
5342 numchan = 6;
5343 break;
5344 case 6:
5345 numchan = 10;
5346 break;
5347 case 7:
5348 numchan = 12;
5349 break;
5350 case 8:
5351 numchan = 16;
5352 break;
5353 }
5354 rdev->mc.vram_width = numchan * chansize;
5355 /* Could aper size report 0 ? */
5356 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5357 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5358 /* size in MB on si */
5359 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5360 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5361 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5362 si_vram_gtt_location(rdev, &rdev->mc);
5363 radeon_update_bandwidth_info(rdev);
5364
5365 return 0;
5366}
5367
5368/*
5369 * GART
5370 * VMID 0 is the physical GPU addresses as used by the kernel.
5371 * VMIDs 1-15 are used for userspace clients and are handled
5372 * by the radeon vm/hsa code.
5373 */
5374/**
5375 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5376 *
5377 * @rdev: radeon_device pointer
5378 *
5379 * Flush the TLB for the VMID 0 page table (CIK).
5380 */
5381void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5382{
5383 /* flush hdp cache */
5384 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5385
5386 /* bits 0-15 are the VM contexts0-15 */
5387 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5388}
5389
5390/**
5391 * cik_pcie_gart_enable - gart enable
5392 *
5393 * @rdev: radeon_device pointer
5394 *
5395 * This sets up the TLBs, programs the page tables for VMID0,
5396 * sets up the hw for VMIDs 1-15 which are allocated on
5397 * demand, and sets up the global locations for the LDS, GDS,
5398 * and GPUVM for FSA64 clients (CIK).
5399 * Returns 0 for success, errors for failure.
5400 */
5401static int cik_pcie_gart_enable(struct radeon_device *rdev)
5402{
5403 int r, i;
5404
5405 if (rdev->gart.robj == NULL) {
5406 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5407 return -EINVAL;
5408 }
5409 r = radeon_gart_table_vram_pin(rdev);
5410 if (r)
5411 return r;
5412 radeon_gart_restore(rdev);
5413 /* Setup TLB control */
5414 WREG32(MC_VM_MX_L1_TLB_CNTL,
5415 (0xA << 7) |
5416 ENABLE_L1_TLB |
5417 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5418 ENABLE_ADVANCED_DRIVER_MODEL |
5419 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5420 /* Setup L2 cache */
5421 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5422 ENABLE_L2_FRAGMENT_PROCESSING |
5423 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5424 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5425 EFFECTIVE_L2_QUEUE_SIZE(7) |
5426 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5427 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5428 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5429 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5430 /* setup context0 */
5431 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5432 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5433 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5434 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5435 (u32)(rdev->dummy_page.addr >> 12));
5436 WREG32(VM_CONTEXT0_CNTL2, 0);
5437 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5438 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5439
5440 WREG32(0x15D4, 0);
5441 WREG32(0x15D8, 0);
5442 WREG32(0x15DC, 0);
5443
5444 /* empty context1-15 */
5445 /* FIXME start with 4G, once using 2 level pt switch to full
5446 * vm size space
5447 */
5448 /* set vm size, must be a multiple of 4 */
5449 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5450 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5451 for (i = 1; i < 16; i++) {
5452 if (i < 8)
5453 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5454 rdev->gart.table_addr >> 12);
5455 else
5456 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5457 rdev->gart.table_addr >> 12);
5458 }
5459
5460 /* enable context1-15 */
5461 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5462 (u32)(rdev->dummy_page.addr >> 12));
5463 WREG32(VM_CONTEXT1_CNTL2, 4);
5464 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5465 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5466 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5467 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5468 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5469 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5470 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5471 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5472 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5473 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5474 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5475 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5476 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5477
5478 if (rdev->family == CHIP_KAVERI) {
5479 u32 tmp = RREG32(CHUB_CONTROL);
5480 tmp &= ~BYPASS_VM;
5481 WREG32(CHUB_CONTROL, tmp);
5482 }
5483
5484 /* XXX SH_MEM regs */
5485 /* where to put LDS, scratch, GPUVM in FSA64 space */
5486 mutex_lock(&rdev->srbm_mutex);
5487 for (i = 0; i < 16; i++) {
5488 cik_srbm_select(rdev, 0, 0, 0, i);
5489 /* CP and shaders */
5490 WREG32(SH_MEM_CONFIG, 0);
5491 WREG32(SH_MEM_APE1_BASE, 1);
5492 WREG32(SH_MEM_APE1_LIMIT, 0);
5493 WREG32(SH_MEM_BASES, 0);
5494 /* SDMA GFX */
5495 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5496 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5497 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5498 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5499 /* XXX SDMA RLC - todo */
5500 }
5501 cik_srbm_select(rdev, 0, 0, 0, 0);
5502 mutex_unlock(&rdev->srbm_mutex);
5503
5504 cik_pcie_gart_tlb_flush(rdev);
5505 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5506 (unsigned)(rdev->mc.gtt_size >> 20),
5507 (unsigned long long)rdev->gart.table_addr);
5508 rdev->gart.ready = true;
5509 return 0;
5510}
5511
5512/**
5513 * cik_pcie_gart_disable - gart disable
5514 *
5515 * @rdev: radeon_device pointer
5516 *
5517 * This disables all VM page table (CIK).
5518 */
5519static void cik_pcie_gart_disable(struct radeon_device *rdev)
5520{
5521 /* Disable all tables */
5522 WREG32(VM_CONTEXT0_CNTL, 0);
5523 WREG32(VM_CONTEXT1_CNTL, 0);
5524 /* Setup TLB control */
5525 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5526 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5527 /* Setup L2 cache */
5528 WREG32(VM_L2_CNTL,
5529 ENABLE_L2_FRAGMENT_PROCESSING |
5530 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5531 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5532 EFFECTIVE_L2_QUEUE_SIZE(7) |
5533 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5534 WREG32(VM_L2_CNTL2, 0);
5535 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5536 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5537 radeon_gart_table_vram_unpin(rdev);
5538}
5539
5540/**
5541 * cik_pcie_gart_fini - vm fini callback
5542 *
5543 * @rdev: radeon_device pointer
5544 *
5545 * Tears down the driver GART/VM setup (CIK).
5546 */
5547static void cik_pcie_gart_fini(struct radeon_device *rdev)
5548{
5549 cik_pcie_gart_disable(rdev);
5550 radeon_gart_table_vram_free(rdev);
5551 radeon_gart_fini(rdev);
5552}
5553
5554/* vm parser */
5555/**
5556 * cik_ib_parse - vm ib_parse callback
5557 *
5558 * @rdev: radeon_device pointer
5559 * @ib: indirect buffer pointer
5560 *
5561 * CIK uses hw IB checking so this is a nop (CIK).
5562 */
5563int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5564{
5565 return 0;
5566}
5567
5568/*
5569 * vm
5570 * VMID 0 is the physical GPU addresses as used by the kernel.
5571 * VMIDs 1-15 are used for userspace clients and are handled
5572 * by the radeon vm/hsa code.
5573 */
5574/**
5575 * cik_vm_init - cik vm init callback
5576 *
5577 * @rdev: radeon_device pointer
5578 *
5579 * Inits cik specific vm parameters (number of VMs, base of vram for
5580 * VMIDs 1-15) (CIK).
5581 * Returns 0 for success.
5582 */
5583int cik_vm_init(struct radeon_device *rdev)
5584{
5585 /* number of VMs */
5586 rdev->vm_manager.nvm = 16;
5587 /* base offset of vram pages */
5588 if (rdev->flags & RADEON_IS_IGP) {
5589 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5590 tmp <<= 22;
5591 rdev->vm_manager.vram_base_offset = tmp;
5592 } else
5593 rdev->vm_manager.vram_base_offset = 0;
5594
5595 return 0;
5596}
5597
5598/**
5599 * cik_vm_fini - cik vm fini callback
5600 *
5601 * @rdev: radeon_device pointer
5602 *
5603 * Tear down any asic specific VM setup (CIK).
5604 */
5605void cik_vm_fini(struct radeon_device *rdev)
5606{
5607}
5608
5609/**
5610 * cik_vm_decode_fault - print human readable fault info
5611 *
5612 * @rdev: radeon_device pointer
5613 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5614 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5615 *
5616 * Print human readable fault information (CIK).
5617 */
5618static void cik_vm_decode_fault(struct radeon_device *rdev,
5619 u32 status, u32 addr, u32 mc_client)
5620{
5621 u32 mc_id;
5622 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5623 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5624 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5625 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5626
5627 if (rdev->family == CHIP_HAWAII)
5628 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5629 else
5630 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5631
5632 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5633 protections, vmid, addr,
5634 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5635 block, mc_client, mc_id);
5636}
5637
5638/**
5639 * cik_vm_flush - cik vm flush using the CP
5640 *
5641 * @rdev: radeon_device pointer
5642 *
5643 * Update the page table base and flush the VM TLB
5644 * using the CP (CIK).
5645 */
5646void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5647{
5648 struct radeon_ring *ring = &rdev->ring[ridx];
5649
5650 if (vm == NULL)
5651 return;
5652
5653 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5654 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5655 WRITE_DATA_DST_SEL(0)));
5656 if (vm->id < 8) {
5657 radeon_ring_write(ring,
5658 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5659 } else {
5660 radeon_ring_write(ring,
5661 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5662 }
5663 radeon_ring_write(ring, 0);
5664 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5665
5666 /* update SH_MEM_* regs */
5667 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5668 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5669 WRITE_DATA_DST_SEL(0)));
5670 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5671 radeon_ring_write(ring, 0);
5672 radeon_ring_write(ring, VMID(vm->id));
5673
5674 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5675 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5676 WRITE_DATA_DST_SEL(0)));
5677 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5678 radeon_ring_write(ring, 0);
5679
5680 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5681 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5682 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5683 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5684
5685 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5686 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5687 WRITE_DATA_DST_SEL(0)));
5688 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5689 radeon_ring_write(ring, 0);
5690 radeon_ring_write(ring, VMID(0));
5691
5692 /* HDP flush */
5693 cik_hdp_flush_cp_ring_emit(rdev, ridx);
5694
5695 /* bits 0-15 are the VM contexts0-15 */
5696 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5698 WRITE_DATA_DST_SEL(0)));
5699 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5700 radeon_ring_write(ring, 0);
5701 radeon_ring_write(ring, 1 << vm->id);
5702
5703 /* compute doesn't have PFP */
5704 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5705 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5706 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5707 radeon_ring_write(ring, 0x0);
5708 }
5709}
5710
5711/*
5712 * RLC
5713 * The RLC is a multi-purpose microengine that handles a
5714 * variety of functions, the most important of which is
5715 * the interrupt controller.
5716 */
5717static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5718 bool enable)
5719{
5720 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5721
5722 if (enable)
5723 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5724 else
5725 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5726 WREG32(CP_INT_CNTL_RING0, tmp);
5727}
5728
5729static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5730{
5731 u32 tmp;
5732
5733 tmp = RREG32(RLC_LB_CNTL);
5734 if (enable)
5735 tmp |= LOAD_BALANCE_ENABLE;
5736 else
5737 tmp &= ~LOAD_BALANCE_ENABLE;
5738 WREG32(RLC_LB_CNTL, tmp);
5739}
5740
5741static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5742{
5743 u32 i, j, k;
5744 u32 mask;
5745
5746 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5747 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5748 cik_select_se_sh(rdev, i, j);
5749 for (k = 0; k < rdev->usec_timeout; k++) {
5750 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5751 break;
5752 udelay(1);
5753 }
5754 }
5755 }
5756 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5757
5758 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5759 for (k = 0; k < rdev->usec_timeout; k++) {
5760 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5761 break;
5762 udelay(1);
5763 }
5764}
5765
5766static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5767{
5768 u32 tmp;
5769
5770 tmp = RREG32(RLC_CNTL);
5771 if (tmp != rlc)
5772 WREG32(RLC_CNTL, rlc);
5773}
5774
5775static u32 cik_halt_rlc(struct radeon_device *rdev)
5776{
5777 u32 data, orig;
5778
5779 orig = data = RREG32(RLC_CNTL);
5780
5781 if (data & RLC_ENABLE) {
5782 u32 i;
5783
5784 data &= ~RLC_ENABLE;
5785 WREG32(RLC_CNTL, data);
5786
5787 for (i = 0; i < rdev->usec_timeout; i++) {
5788 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5789 break;
5790 udelay(1);
5791 }
5792
5793 cik_wait_for_rlc_serdes(rdev);
5794 }
5795
5796 return orig;
5797}
5798
5799void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5800{
5801 u32 tmp, i, mask;
5802
5803 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5804 WREG32(RLC_GPR_REG2, tmp);
5805
5806 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5807 for (i = 0; i < rdev->usec_timeout; i++) {
5808 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5809 break;
5810 udelay(1);
5811 }
5812
5813 for (i = 0; i < rdev->usec_timeout; i++) {
5814 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5815 break;
5816 udelay(1);
5817 }
5818}
5819
5820void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5821{
5822 u32 tmp;
5823
5824 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5825 WREG32(RLC_GPR_REG2, tmp);
5826}
5827
5828/**
5829 * cik_rlc_stop - stop the RLC ME
5830 *
5831 * @rdev: radeon_device pointer
5832 *
5833 * Halt the RLC ME (MicroEngine) (CIK).
5834 */
5835static void cik_rlc_stop(struct radeon_device *rdev)
5836{
5837 WREG32(RLC_CNTL, 0);
5838
5839 cik_enable_gui_idle_interrupt(rdev, false);
5840
5841 cik_wait_for_rlc_serdes(rdev);
5842}
5843
5844/**
5845 * cik_rlc_start - start the RLC ME
5846 *
5847 * @rdev: radeon_device pointer
5848 *
5849 * Unhalt the RLC ME (MicroEngine) (CIK).
5850 */
5851static void cik_rlc_start(struct radeon_device *rdev)
5852{
5853 WREG32(RLC_CNTL, RLC_ENABLE);
5854
5855 cik_enable_gui_idle_interrupt(rdev, true);
5856
5857 udelay(50);
5858}
5859
5860/**
5861 * cik_rlc_resume - setup the RLC hw
5862 *
5863 * @rdev: radeon_device pointer
5864 *
5865 * Initialize the RLC registers, load the ucode,
5866 * and start the RLC (CIK).
5867 * Returns 0 for success, -EINVAL if the ucode is not available.
5868 */
5869static int cik_rlc_resume(struct radeon_device *rdev)
5870{
5871 u32 i, size, tmp;
5872 const __be32 *fw_data;
5873
5874 if (!rdev->rlc_fw)
5875 return -EINVAL;
5876
5877 switch (rdev->family) {
5878 case CHIP_BONAIRE:
5879 case CHIP_HAWAII:
5880 default:
5881 size = BONAIRE_RLC_UCODE_SIZE;
5882 break;
5883 case CHIP_KAVERI:
5884 size = KV_RLC_UCODE_SIZE;
5885 break;
5886 case CHIP_KABINI:
5887 size = KB_RLC_UCODE_SIZE;
5888 break;
5889 case CHIP_MULLINS:
5890 size = ML_RLC_UCODE_SIZE;
5891 break;
5892 }
5893
5894 cik_rlc_stop(rdev);
5895
5896 /* disable CG */
5897 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5898 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5899
5900 si_rlc_reset(rdev);
5901
5902 cik_init_pg(rdev);
5903
5904 cik_init_cg(rdev);
5905
5906 WREG32(RLC_LB_CNTR_INIT, 0);
5907 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5908
5909 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5910 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5911 WREG32(RLC_LB_PARAMS, 0x00600408);
5912 WREG32(RLC_LB_CNTL, 0x80000004);
5913
5914 WREG32(RLC_MC_CNTL, 0);
5915 WREG32(RLC_UCODE_CNTL, 0);
5916
5917 fw_data = (const __be32 *)rdev->rlc_fw->data;
5918 WREG32(RLC_GPM_UCODE_ADDR, 0);
5919 for (i = 0; i < size; i++)
5920 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5921 WREG32(RLC_GPM_UCODE_ADDR, 0);
5922
5923 /* XXX - find out what chips support lbpw */
5924 cik_enable_lbpw(rdev, false);
5925
5926 if (rdev->family == CHIP_BONAIRE)
5927 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5928
5929 cik_rlc_start(rdev);
5930
5931 return 0;
5932}
5933
5934static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5935{
5936 u32 data, orig, tmp, tmp2;
5937
5938 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5939
5940 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5941 cik_enable_gui_idle_interrupt(rdev, true);
5942
5943 tmp = cik_halt_rlc(rdev);
5944
5945 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5946 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5947 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5948 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5949 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5950
5951 cik_update_rlc(rdev, tmp);
5952
5953 data |= CGCG_EN | CGLS_EN;
5954 } else {
5955 cik_enable_gui_idle_interrupt(rdev, false);
5956
5957 RREG32(CB_CGTT_SCLK_CTRL);
5958 RREG32(CB_CGTT_SCLK_CTRL);
5959 RREG32(CB_CGTT_SCLK_CTRL);
5960 RREG32(CB_CGTT_SCLK_CTRL);
5961
5962 data &= ~(CGCG_EN | CGLS_EN);
5963 }
5964
5965 if (orig != data)
5966 WREG32(RLC_CGCG_CGLS_CTRL, data);
5967
5968}
5969
5970static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5971{
5972 u32 data, orig, tmp = 0;
5973
5974 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5975 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5976 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5977 orig = data = RREG32(CP_MEM_SLP_CNTL);
5978 data |= CP_MEM_LS_EN;
5979 if (orig != data)
5980 WREG32(CP_MEM_SLP_CNTL, data);
5981 }
5982 }
5983
5984 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5985 data &= 0xfffffffd;
5986 if (orig != data)
5987 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5988
5989 tmp = cik_halt_rlc(rdev);
5990
5991 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5992 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5993 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5994 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5995 WREG32(RLC_SERDES_WR_CTRL, data);
5996
5997 cik_update_rlc(rdev, tmp);
5998
5999 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6000 orig = data = RREG32(CGTS_SM_CTRL_REG);
6001 data &= ~SM_MODE_MASK;
6002 data |= SM_MODE(0x2);
6003 data |= SM_MODE_ENABLE;
6004 data &= ~CGTS_OVERRIDE;
6005 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6006 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6007 data &= ~CGTS_LS_OVERRIDE;
6008 data &= ~ON_MONITOR_ADD_MASK;
6009 data |= ON_MONITOR_ADD_EN;
6010 data |= ON_MONITOR_ADD(0x96);
6011 if (orig != data)
6012 WREG32(CGTS_SM_CTRL_REG, data);
6013 }
6014 } else {
6015 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6016 data |= 0x00000002;
6017 if (orig != data)
6018 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6019
6020 data = RREG32(RLC_MEM_SLP_CNTL);
6021 if (data & RLC_MEM_LS_EN) {
6022 data &= ~RLC_MEM_LS_EN;
6023 WREG32(RLC_MEM_SLP_CNTL, data);
6024 }
6025
6026 data = RREG32(CP_MEM_SLP_CNTL);
6027 if (data & CP_MEM_LS_EN) {
6028 data &= ~CP_MEM_LS_EN;
6029 WREG32(CP_MEM_SLP_CNTL, data);
6030 }
6031
6032 orig = data = RREG32(CGTS_SM_CTRL_REG);
6033 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6034 if (orig != data)
6035 WREG32(CGTS_SM_CTRL_REG, data);
6036
6037 tmp = cik_halt_rlc(rdev);
6038
6039 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6040 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6041 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6042 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6043 WREG32(RLC_SERDES_WR_CTRL, data);
6044
6045 cik_update_rlc(rdev, tmp);
6046 }
6047}
6048
6049static const u32 mc_cg_registers[] =
6050{
6051 MC_HUB_MISC_HUB_CG,
6052 MC_HUB_MISC_SIP_CG,
6053 MC_HUB_MISC_VM_CG,
6054 MC_XPB_CLK_GAT,
6055 ATC_MISC_CG,
6056 MC_CITF_MISC_WR_CG,
6057 MC_CITF_MISC_RD_CG,
6058 MC_CITF_MISC_VM_CG,
6059 VM_L2_CG,
6060};
6061
6062static void cik_enable_mc_ls(struct radeon_device *rdev,
6063 bool enable)
6064{
6065 int i;
6066 u32 orig, data;
6067
6068 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6069 orig = data = RREG32(mc_cg_registers[i]);
6070 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6071 data |= MC_LS_ENABLE;
6072 else
6073 data &= ~MC_LS_ENABLE;
6074 if (data != orig)
6075 WREG32(mc_cg_registers[i], data);
6076 }
6077}
6078
6079static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6080 bool enable)
6081{
6082 int i;
6083 u32 orig, data;
6084
6085 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6086 orig = data = RREG32(mc_cg_registers[i]);
6087 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6088 data |= MC_CG_ENABLE;
6089 else
6090 data &= ~MC_CG_ENABLE;
6091 if (data != orig)
6092 WREG32(mc_cg_registers[i], data);
6093 }
6094}
6095
6096static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6097 bool enable)
6098{
6099 u32 orig, data;
6100
6101 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6102 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6103 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6104 } else {
6105 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6106 data |= 0xff000000;
6107 if (data != orig)
6108 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6109
6110 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6111 data |= 0xff000000;
6112 if (data != orig)
6113 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6114 }
6115}
6116
6117static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6118 bool enable)
6119{
6120 u32 orig, data;
6121
6122 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6123 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6124 data |= 0x100;
6125 if (orig != data)
6126 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6127
6128 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6129 data |= 0x100;
6130 if (orig != data)
6131 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6132 } else {
6133 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6134 data &= ~0x100;
6135 if (orig != data)
6136 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6137
6138 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6139 data &= ~0x100;
6140 if (orig != data)
6141 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6142 }
6143}
6144
6145static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6146 bool enable)
6147{
6148 u32 orig, data;
6149
6150 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6151 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6152 data = 0xfff;
6153 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6154
6155 orig = data = RREG32(UVD_CGC_CTRL);
6156 data |= DCM;
6157 if (orig != data)
6158 WREG32(UVD_CGC_CTRL, data);
6159 } else {
6160 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6161 data &= ~0xfff;
6162 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6163
6164 orig = data = RREG32(UVD_CGC_CTRL);
6165 data &= ~DCM;
6166 if (orig != data)
6167 WREG32(UVD_CGC_CTRL, data);
6168 }
6169}
6170
6171static void cik_enable_bif_mgls(struct radeon_device *rdev,
6172 bool enable)
6173{
6174 u32 orig, data;
6175
6176 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6177
6178 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6179 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6180 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6181 else
6182 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6183 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6184
6185 if (orig != data)
6186 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6187}
6188
6189static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6190 bool enable)
6191{
6192 u32 orig, data;
6193
6194 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6195
6196 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6197 data &= ~CLOCK_GATING_DIS;
6198 else
6199 data |= CLOCK_GATING_DIS;
6200
6201 if (orig != data)
6202 WREG32(HDP_HOST_PATH_CNTL, data);
6203}
6204
6205static void cik_enable_hdp_ls(struct radeon_device *rdev,
6206 bool enable)
6207{
6208 u32 orig, data;
6209
6210 orig = data = RREG32(HDP_MEM_POWER_LS);
6211
6212 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6213 data |= HDP_LS_ENABLE;
6214 else
6215 data &= ~HDP_LS_ENABLE;
6216
6217 if (orig != data)
6218 WREG32(HDP_MEM_POWER_LS, data);
6219}
6220
6221void cik_update_cg(struct radeon_device *rdev,
6222 u32 block, bool enable)
6223{
6224
6225 if (block & RADEON_CG_BLOCK_GFX) {
6226 cik_enable_gui_idle_interrupt(rdev, false);
6227 /* order matters! */
6228 if (enable) {
6229 cik_enable_mgcg(rdev, true);
6230 cik_enable_cgcg(rdev, true);
6231 } else {
6232 cik_enable_cgcg(rdev, false);
6233 cik_enable_mgcg(rdev, false);
6234 }
6235 cik_enable_gui_idle_interrupt(rdev, true);
6236 }
6237
6238 if (block & RADEON_CG_BLOCK_MC) {
6239 if (!(rdev->flags & RADEON_IS_IGP)) {
6240 cik_enable_mc_mgcg(rdev, enable);
6241 cik_enable_mc_ls(rdev, enable);
6242 }
6243 }
6244
6245 if (block & RADEON_CG_BLOCK_SDMA) {
6246 cik_enable_sdma_mgcg(rdev, enable);
6247 cik_enable_sdma_mgls(rdev, enable);
6248 }
6249
6250 if (block & RADEON_CG_BLOCK_BIF) {
6251 cik_enable_bif_mgls(rdev, enable);
6252 }
6253
6254 if (block & RADEON_CG_BLOCK_UVD) {
6255 if (rdev->has_uvd)
6256 cik_enable_uvd_mgcg(rdev, enable);
6257 }
6258
6259 if (block & RADEON_CG_BLOCK_HDP) {
6260 cik_enable_hdp_mgcg(rdev, enable);
6261 cik_enable_hdp_ls(rdev, enable);
6262 }
6263
6264 if (block & RADEON_CG_BLOCK_VCE) {
6265 vce_v2_0_enable_mgcg(rdev, enable);
6266 }
6267}
6268
6269static void cik_init_cg(struct radeon_device *rdev)
6270{
6271
6272 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6273
6274 if (rdev->has_uvd)
6275 si_init_uvd_internal_cg(rdev);
6276
6277 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6278 RADEON_CG_BLOCK_SDMA |
6279 RADEON_CG_BLOCK_BIF |
6280 RADEON_CG_BLOCK_UVD |
6281 RADEON_CG_BLOCK_HDP), true);
6282}
6283
6284static void cik_fini_cg(struct radeon_device *rdev)
6285{
6286 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6287 RADEON_CG_BLOCK_SDMA |
6288 RADEON_CG_BLOCK_BIF |
6289 RADEON_CG_BLOCK_UVD |
6290 RADEON_CG_BLOCK_HDP), false);
6291
6292 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6293}
6294
6295static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6296 bool enable)
6297{
6298 u32 data, orig;
6299
6300 orig = data = RREG32(RLC_PG_CNTL);
6301 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6302 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6303 else
6304 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6305 if (orig != data)
6306 WREG32(RLC_PG_CNTL, data);
6307}
6308
6309static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6310 bool enable)
6311{
6312 u32 data, orig;
6313
6314 orig = data = RREG32(RLC_PG_CNTL);
6315 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6316 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6317 else
6318 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6319 if (orig != data)
6320 WREG32(RLC_PG_CNTL, data);
6321}
6322
6323static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6324{
6325 u32 data, orig;
6326
6327 orig = data = RREG32(RLC_PG_CNTL);
6328 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6329 data &= ~DISABLE_CP_PG;
6330 else
6331 data |= DISABLE_CP_PG;
6332 if (orig != data)
6333 WREG32(RLC_PG_CNTL, data);
6334}
6335
6336static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6337{
6338 u32 data, orig;
6339
6340 orig = data = RREG32(RLC_PG_CNTL);
6341 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6342 data &= ~DISABLE_GDS_PG;
6343 else
6344 data |= DISABLE_GDS_PG;
6345 if (orig != data)
6346 WREG32(RLC_PG_CNTL, data);
6347}
6348
6349#define CP_ME_TABLE_SIZE 96
6350#define CP_ME_TABLE_OFFSET 2048
6351#define CP_MEC_TABLE_OFFSET 4096
6352
6353void cik_init_cp_pg_table(struct radeon_device *rdev)
6354{
6355 const __be32 *fw_data;
6356 volatile u32 *dst_ptr;
6357 int me, i, max_me = 4;
6358 u32 bo_offset = 0;
6359 u32 table_offset;
6360
6361 if (rdev->family == CHIP_KAVERI)
6362 max_me = 5;
6363
6364 if (rdev->rlc.cp_table_ptr == NULL)
6365 return;
6366
6367 /* write the cp table buffer */
6368 dst_ptr = rdev->rlc.cp_table_ptr;
6369 for (me = 0; me < max_me; me++) {
6370 if (me == 0) {
6371 fw_data = (const __be32 *)rdev->ce_fw->data;
6372 table_offset = CP_ME_TABLE_OFFSET;
6373 } else if (me == 1) {
6374 fw_data = (const __be32 *)rdev->pfp_fw->data;
6375 table_offset = CP_ME_TABLE_OFFSET;
6376 } else if (me == 2) {
6377 fw_data = (const __be32 *)rdev->me_fw->data;
6378 table_offset = CP_ME_TABLE_OFFSET;
6379 } else {
6380 fw_data = (const __be32 *)rdev->mec_fw->data;
6381 table_offset = CP_MEC_TABLE_OFFSET;
6382 }
6383
6384 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6385 dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6386 }
6387 bo_offset += CP_ME_TABLE_SIZE;
6388 }
6389}
6390
6391static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6392 bool enable)
6393{
6394 u32 data, orig;
6395
6396 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6397 orig = data = RREG32(RLC_PG_CNTL);
6398 data |= GFX_PG_ENABLE;
6399 if (orig != data)
6400 WREG32(RLC_PG_CNTL, data);
6401
6402 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6403 data |= AUTO_PG_EN;
6404 if (orig != data)
6405 WREG32(RLC_AUTO_PG_CTRL, data);
6406 } else {
6407 orig = data = RREG32(RLC_PG_CNTL);
6408 data &= ~GFX_PG_ENABLE;
6409 if (orig != data)
6410 WREG32(RLC_PG_CNTL, data);
6411
6412 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6413 data &= ~AUTO_PG_EN;
6414 if (orig != data)
6415 WREG32(RLC_AUTO_PG_CTRL, data);
6416
6417 data = RREG32(DB_RENDER_CONTROL);
6418 }
6419}
6420
6421static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6422{
6423 u32 mask = 0, tmp, tmp1;
6424 int i;
6425
6426 cik_select_se_sh(rdev, se, sh);
6427 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6428 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6429 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6430
6431 tmp &= 0xffff0000;
6432
6433 tmp |= tmp1;
6434 tmp >>= 16;
6435
6436 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6437 mask <<= 1;
6438 mask |= 1;
6439 }
6440
6441 return (~tmp) & mask;
6442}
6443
6444static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6445{
6446 u32 i, j, k, active_cu_number = 0;
6447 u32 mask, counter, cu_bitmap;
6448 u32 tmp = 0;
6449
6450 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6451 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6452 mask = 1;
6453 cu_bitmap = 0;
6454 counter = 0;
6455 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6456 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6457 if (counter < 2)
6458 cu_bitmap |= mask;
6459 counter ++;
6460 }
6461 mask <<= 1;
6462 }
6463
6464 active_cu_number += counter;
6465 tmp |= (cu_bitmap << (i * 16 + j * 8));
6466 }
6467 }
6468
6469 WREG32(RLC_PG_AO_CU_MASK, tmp);
6470
6471 tmp = RREG32(RLC_MAX_PG_CU);
6472 tmp &= ~MAX_PU_CU_MASK;
6473 tmp |= MAX_PU_CU(active_cu_number);
6474 WREG32(RLC_MAX_PG_CU, tmp);
6475}
6476
6477static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6478 bool enable)
6479{
6480 u32 data, orig;
6481
6482 orig = data = RREG32(RLC_PG_CNTL);
6483 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6484 data |= STATIC_PER_CU_PG_ENABLE;
6485 else
6486 data &= ~STATIC_PER_CU_PG_ENABLE;
6487 if (orig != data)
6488 WREG32(RLC_PG_CNTL, data);
6489}
6490
6491static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6492 bool enable)
6493{
6494 u32 data, orig;
6495
6496 orig = data = RREG32(RLC_PG_CNTL);
6497 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6498 data |= DYN_PER_CU_PG_ENABLE;
6499 else
6500 data &= ~DYN_PER_CU_PG_ENABLE;
6501 if (orig != data)
6502 WREG32(RLC_PG_CNTL, data);
6503}
6504
6505#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6506#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6507
6508static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6509{
6510 u32 data, orig;
6511 u32 i;
6512
6513 if (rdev->rlc.cs_data) {
6514 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6515 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6516 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6517 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6518 } else {
6519 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6520 for (i = 0; i < 3; i++)
6521 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6522 }
6523 if (rdev->rlc.reg_list) {
6524 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6525 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6526 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6527 }
6528
6529 orig = data = RREG32(RLC_PG_CNTL);
6530 data |= GFX_PG_SRC;
6531 if (orig != data)
6532 WREG32(RLC_PG_CNTL, data);
6533
6534 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6535 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6536
6537 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6538 data &= ~IDLE_POLL_COUNT_MASK;
6539 data |= IDLE_POLL_COUNT(0x60);
6540 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6541
6542 data = 0x10101010;
6543 WREG32(RLC_PG_DELAY, data);
6544
6545 data = RREG32(RLC_PG_DELAY_2);
6546 data &= ~0xff;
6547 data |= 0x3;
6548 WREG32(RLC_PG_DELAY_2, data);
6549
6550 data = RREG32(RLC_AUTO_PG_CTRL);
6551 data &= ~GRBM_REG_SGIT_MASK;
6552 data |= GRBM_REG_SGIT(0x700);
6553 WREG32(RLC_AUTO_PG_CTRL, data);
6554
6555}
6556
6557static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6558{
6559 cik_enable_gfx_cgpg(rdev, enable);
6560 cik_enable_gfx_static_mgpg(rdev, enable);
6561 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6562}
6563
6564u32 cik_get_csb_size(struct radeon_device *rdev)
6565{
6566 u32 count = 0;
6567 const struct cs_section_def *sect = NULL;
6568 const struct cs_extent_def *ext = NULL;
6569
6570 if (rdev->rlc.cs_data == NULL)
6571 return 0;
6572
6573 /* begin clear state */
6574 count += 2;
6575 /* context control state */
6576 count += 3;
6577
6578 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6579 for (ext = sect->section; ext->extent != NULL; ++ext) {
6580 if (sect->id == SECT_CONTEXT)
6581 count += 2 + ext->reg_count;
6582 else
6583 return 0;
6584 }
6585 }
6586 /* pa_sc_raster_config/pa_sc_raster_config1 */
6587 count += 4;
6588 /* end clear state */
6589 count += 2;
6590 /* clear state */
6591 count += 2;
6592
6593 return count;
6594}
6595
6596void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6597{
6598 u32 count = 0, i;
6599 const struct cs_section_def *sect = NULL;
6600 const struct cs_extent_def *ext = NULL;
6601
6602 if (rdev->rlc.cs_data == NULL)
6603 return;
6604 if (buffer == NULL)
6605 return;
6606
6607 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6608 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6609
6610 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6611 buffer[count++] = cpu_to_le32(0x80000000);
6612 buffer[count++] = cpu_to_le32(0x80000000);
6613
6614 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6615 for (ext = sect->section; ext->extent != NULL; ++ext) {
6616 if (sect->id == SECT_CONTEXT) {
6617 buffer[count++] =
6618 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6619 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6620 for (i = 0; i < ext->reg_count; i++)
6621 buffer[count++] = cpu_to_le32(ext->extent[i]);
6622 } else {
6623 return;
6624 }
6625 }
6626 }
6627
6628 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6629 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6630 switch (rdev->family) {
6631 case CHIP_BONAIRE:
6632 buffer[count++] = cpu_to_le32(0x16000012);
6633 buffer[count++] = cpu_to_le32(0x00000000);
6634 break;
6635 case CHIP_KAVERI:
6636 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6637 buffer[count++] = cpu_to_le32(0x00000000);
6638 break;
6639 case CHIP_KABINI:
6640 case CHIP_MULLINS:
6641 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6642 buffer[count++] = cpu_to_le32(0x00000000);
6643 break;
6644 case CHIP_HAWAII:
6645 buffer[count++] = cpu_to_le32(0x3a00161a);
6646 buffer[count++] = cpu_to_le32(0x0000002e);
6647 break;
6648 default:
6649 buffer[count++] = cpu_to_le32(0x00000000);
6650 buffer[count++] = cpu_to_le32(0x00000000);
6651 break;
6652 }
6653
6654 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6655 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6656
6657 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6658 buffer[count++] = cpu_to_le32(0);
6659}
6660
6661static void cik_init_pg(struct radeon_device *rdev)
6662{
6663 if (rdev->pg_flags) {
6664 cik_enable_sck_slowdown_on_pu(rdev, true);
6665 cik_enable_sck_slowdown_on_pd(rdev, true);
6666 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6667 cik_init_gfx_cgpg(rdev);
6668 cik_enable_cp_pg(rdev, true);
6669 cik_enable_gds_pg(rdev, true);
6670 }
6671 cik_init_ao_cu_mask(rdev);
6672 cik_update_gfx_pg(rdev, true);
6673 }
6674}
6675
6676static void cik_fini_pg(struct radeon_device *rdev)
6677{
6678 if (rdev->pg_flags) {
6679 cik_update_gfx_pg(rdev, false);
6680 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6681 cik_enable_cp_pg(rdev, false);
6682 cik_enable_gds_pg(rdev, false);
6683 }
6684 }
6685}
6686
6687/*
6688 * Interrupts
6689 * Starting with r6xx, interrupts are handled via a ring buffer.
6690 * Ring buffers are areas of GPU accessible memory that the GPU
6691 * writes interrupt vectors into and the host reads vectors out of.
6692 * There is a rptr (read pointer) that determines where the
6693 * host is currently reading, and a wptr (write pointer)
6694 * which determines where the GPU has written. When the
6695 * pointers are equal, the ring is idle. When the GPU
6696 * writes vectors to the ring buffer, it increments the
6697 * wptr. When there is an interrupt, the host then starts
6698 * fetching commands and processing them until the pointers are
6699 * equal again at which point it updates the rptr.
6700 */
6701
6702/**
6703 * cik_enable_interrupts - Enable the interrupt ring buffer
6704 *
6705 * @rdev: radeon_device pointer
6706 *
6707 * Enable the interrupt ring buffer (CIK).
6708 */
6709static void cik_enable_interrupts(struct radeon_device *rdev)
6710{
6711 u32 ih_cntl = RREG32(IH_CNTL);
6712 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6713
6714 ih_cntl |= ENABLE_INTR;
6715 ih_rb_cntl |= IH_RB_ENABLE;
6716 WREG32(IH_CNTL, ih_cntl);
6717 WREG32(IH_RB_CNTL, ih_rb_cntl);
6718 rdev->ih.enabled = true;
6719}
6720
6721/**
6722 * cik_disable_interrupts - Disable the interrupt ring buffer
6723 *
6724 * @rdev: radeon_device pointer
6725 *
6726 * Disable the interrupt ring buffer (CIK).
6727 */
6728static void cik_disable_interrupts(struct radeon_device *rdev)
6729{
6730 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6731 u32 ih_cntl = RREG32(IH_CNTL);
6732
6733 ih_rb_cntl &= ~IH_RB_ENABLE;
6734 ih_cntl &= ~ENABLE_INTR;
6735 WREG32(IH_RB_CNTL, ih_rb_cntl);
6736 WREG32(IH_CNTL, ih_cntl);
6737 /* set rptr, wptr to 0 */
6738 WREG32(IH_RB_RPTR, 0);
6739 WREG32(IH_RB_WPTR, 0);
6740 rdev->ih.enabled = false;
6741 rdev->ih.rptr = 0;
6742}
6743
6744/**
6745 * cik_disable_interrupt_state - Disable all interrupt sources
6746 *
6747 * @rdev: radeon_device pointer
6748 *
6749 * Clear all interrupt enable bits used by the driver (CIK).
6750 */
6751static void cik_disable_interrupt_state(struct radeon_device *rdev)
6752{
6753 u32 tmp;
6754
6755 /* gfx ring */
6756 tmp = RREG32(CP_INT_CNTL_RING0) &
6757 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6758 WREG32(CP_INT_CNTL_RING0, tmp);
6759 /* sdma */
6760 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6761 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6762 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6763 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6764 /* compute queues */
6765 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6766 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6767 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6768 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6769 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6770 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6771 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6772 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6773 /* grbm */
6774 WREG32(GRBM_INT_CNTL, 0);
6775 /* vline/vblank, etc. */
6776 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6777 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6778 if (rdev->num_crtc >= 4) {
6779 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6780 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6781 }
6782 if (rdev->num_crtc >= 6) {
6783 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6784 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6785 }
6786 /* pflip */
6787 if (rdev->num_crtc >= 2) {
6788 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6789 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6790 }
6791 if (rdev->num_crtc >= 4) {
6792 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6793 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6794 }
6795 if (rdev->num_crtc >= 6) {
6796 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6797 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6798 }
6799
6800 /* dac hotplug */
6801 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6802
6803 /* digital hotplug */
6804 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6805 WREG32(DC_HPD1_INT_CONTROL, tmp);
6806 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6807 WREG32(DC_HPD2_INT_CONTROL, tmp);
6808 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6809 WREG32(DC_HPD3_INT_CONTROL, tmp);
6810 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6811 WREG32(DC_HPD4_INT_CONTROL, tmp);
6812 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6813 WREG32(DC_HPD5_INT_CONTROL, tmp);
6814 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6815 WREG32(DC_HPD6_INT_CONTROL, tmp);
6816
6817}
6818
6819/**
6820 * cik_irq_init - init and enable the interrupt ring
6821 *
6822 * @rdev: radeon_device pointer
6823 *
6824 * Allocate a ring buffer for the interrupt controller,
6825 * enable the RLC, disable interrupts, enable the IH
6826 * ring buffer and enable it (CIK).
6827 * Called at device load and reume.
6828 * Returns 0 for success, errors for failure.
6829 */
6830static int cik_irq_init(struct radeon_device *rdev)
6831{
6832 int ret = 0;
6833 int rb_bufsz;
6834 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6835
6836 /* allocate ring */
6837 ret = r600_ih_ring_alloc(rdev);
6838 if (ret)
6839 return ret;
6840
6841 /* disable irqs */
6842 cik_disable_interrupts(rdev);
6843
6844 /* init rlc */
6845 ret = cik_rlc_resume(rdev);
6846 if (ret) {
6847 r600_ih_ring_fini(rdev);
6848 return ret;
6849 }
6850
6851 /* setup interrupt control */
6852 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6853 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6854 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6855 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6856 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6857 */
6858 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6859 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6860 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6861 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6862
6863 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6864 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6865
6866 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6867 IH_WPTR_OVERFLOW_CLEAR |
6868 (rb_bufsz << 1));
6869
6870 if (rdev->wb.enabled)
6871 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6872
6873 /* set the writeback address whether it's enabled or not */
6874 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6875 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6876
6877 WREG32(IH_RB_CNTL, ih_rb_cntl);
6878
6879 /* set rptr, wptr to 0 */
6880 WREG32(IH_RB_RPTR, 0);
6881 WREG32(IH_RB_WPTR, 0);
6882
6883 /* Default settings for IH_CNTL (disabled at first) */
6884 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6885 /* RPTR_REARM only works if msi's are enabled */
6886 if (rdev->msi_enabled)
6887 ih_cntl |= RPTR_REARM;
6888 WREG32(IH_CNTL, ih_cntl);
6889
6890 /* force the active interrupt state to all disabled */
6891 cik_disable_interrupt_state(rdev);
6892
6893 pci_set_master(rdev->pdev);
6894
6895 /* enable irqs */
6896 cik_enable_interrupts(rdev);
6897
6898 return ret;
6899}
6900
6901/**
6902 * cik_irq_set - enable/disable interrupt sources
6903 *
6904 * @rdev: radeon_device pointer
6905 *
6906 * Enable interrupt sources on the GPU (vblanks, hpd,
6907 * etc.) (CIK).
6908 * Returns 0 for success, errors for failure.
6909 */
6910int cik_irq_set(struct radeon_device *rdev)
6911{
6912 u32 cp_int_cntl;
6913 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6914 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6915 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6916 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6917 u32 grbm_int_cntl = 0;
6918 u32 dma_cntl, dma_cntl1;
6919 u32 thermal_int;
6920
6921 if (!rdev->irq.installed) {
6922 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6923 return -EINVAL;
6924 }
6925 /* don't enable anything if the ih is disabled */
6926 if (!rdev->ih.enabled) {
6927 cik_disable_interrupts(rdev);
6928 /* force the active interrupt state to all disabled */
6929 cik_disable_interrupt_state(rdev);
6930 return 0;
6931 }
6932
6933 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6934 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6935 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6936
6937 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6938 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6939 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6940 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6941 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6942 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6943
6944 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6945 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6946
6947 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6948 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6949 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6950 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6951 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6952 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6953 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6954 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6955
6956 if (rdev->flags & RADEON_IS_IGP)
6957 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6958 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6959 else
6960 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6961 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6962
6963 /* enable CP interrupts on all rings */
6964 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6965 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6966 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6967 }
6968 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6969 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6970 DRM_DEBUG("si_irq_set: sw int cp1\n");
6971 if (ring->me == 1) {
6972 switch (ring->pipe) {
6973 case 0:
6974 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6975 break;
6976 case 1:
6977 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6978 break;
6979 case 2:
6980 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6981 break;
6982 case 3:
6983 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6984 break;
6985 default:
6986 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6987 break;
6988 }
6989 } else if (ring->me == 2) {
6990 switch (ring->pipe) {
6991 case 0:
6992 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6993 break;
6994 case 1:
6995 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6996 break;
6997 case 2:
6998 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6999 break;
7000 case 3:
7001 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7002 break;
7003 default:
7004 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7005 break;
7006 }
7007 } else {
7008 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7009 }
7010 }
7011 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7012 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7013 DRM_DEBUG("si_irq_set: sw int cp2\n");
7014 if (ring->me == 1) {
7015 switch (ring->pipe) {
7016 case 0:
7017 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7018 break;
7019 case 1:
7020 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7021 break;
7022 case 2:
7023 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7024 break;
7025 case 3:
7026 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7027 break;
7028 default:
7029 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7030 break;
7031 }
7032 } else if (ring->me == 2) {
7033 switch (ring->pipe) {
7034 case 0:
7035 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7036 break;
7037 case 1:
7038 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7039 break;
7040 case 2:
7041 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7042 break;
7043 case 3:
7044 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7045 break;
7046 default:
7047 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7048 break;
7049 }
7050 } else {
7051 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7052 }
7053 }
7054
7055 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7056 DRM_DEBUG("cik_irq_set: sw int dma\n");
7057 dma_cntl |= TRAP_ENABLE;
7058 }
7059
7060 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7061 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7062 dma_cntl1 |= TRAP_ENABLE;
7063 }
7064
7065 if (rdev->irq.crtc_vblank_int[0] ||
7066 atomic_read(&rdev->irq.pflip[0])) {
7067 DRM_DEBUG("cik_irq_set: vblank 0\n");
7068 crtc1 |= VBLANK_INTERRUPT_MASK;
7069 }
7070 if (rdev->irq.crtc_vblank_int[1] ||
7071 atomic_read(&rdev->irq.pflip[1])) {
7072 DRM_DEBUG("cik_irq_set: vblank 1\n");
7073 crtc2 |= VBLANK_INTERRUPT_MASK;
7074 }
7075 if (rdev->irq.crtc_vblank_int[2] ||
7076 atomic_read(&rdev->irq.pflip[2])) {
7077 DRM_DEBUG("cik_irq_set: vblank 2\n");
7078 crtc3 |= VBLANK_INTERRUPT_MASK;
7079 }
7080 if (rdev->irq.crtc_vblank_int[3] ||
7081 atomic_read(&rdev->irq.pflip[3])) {
7082 DRM_DEBUG("cik_irq_set: vblank 3\n");
7083 crtc4 |= VBLANK_INTERRUPT_MASK;
7084 }
7085 if (rdev->irq.crtc_vblank_int[4] ||
7086 atomic_read(&rdev->irq.pflip[4])) {
7087 DRM_DEBUG("cik_irq_set: vblank 4\n");
7088 crtc5 |= VBLANK_INTERRUPT_MASK;
7089 }
7090 if (rdev->irq.crtc_vblank_int[5] ||
7091 atomic_read(&rdev->irq.pflip[5])) {
7092 DRM_DEBUG("cik_irq_set: vblank 5\n");
7093 crtc6 |= VBLANK_INTERRUPT_MASK;
7094 }
7095 if (rdev->irq.hpd[0]) {
7096 DRM_DEBUG("cik_irq_set: hpd 1\n");
7097 hpd1 |= DC_HPDx_INT_EN;
7098 }
7099 if (rdev->irq.hpd[1]) {
7100 DRM_DEBUG("cik_irq_set: hpd 2\n");
7101 hpd2 |= DC_HPDx_INT_EN;
7102 }
7103 if (rdev->irq.hpd[2]) {
7104 DRM_DEBUG("cik_irq_set: hpd 3\n");
7105 hpd3 |= DC_HPDx_INT_EN;
7106 }
7107 if (rdev->irq.hpd[3]) {
7108 DRM_DEBUG("cik_irq_set: hpd 4\n");
7109 hpd4 |= DC_HPDx_INT_EN;
7110 }
7111 if (rdev->irq.hpd[4]) {
7112 DRM_DEBUG("cik_irq_set: hpd 5\n");
7113 hpd5 |= DC_HPDx_INT_EN;
7114 }
7115 if (rdev->irq.hpd[5]) {
7116 DRM_DEBUG("cik_irq_set: hpd 6\n");
7117 hpd6 |= DC_HPDx_INT_EN;
7118 }
7119
7120 if (rdev->irq.dpm_thermal) {
7121 DRM_DEBUG("dpm thermal\n");
7122 if (rdev->flags & RADEON_IS_IGP)
7123 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7124 else
7125 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7126 }
7127
7128 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7129
7130 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7131 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7132
7133 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7134 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7135 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7136 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7137 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7138 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7139 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7140 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7141
7142 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7143
7144 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7145 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7146 if (rdev->num_crtc >= 4) {
7147 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7148 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7149 }
7150 if (rdev->num_crtc >= 6) {
7151 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7152 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7153 }
7154
7155 if (rdev->num_crtc >= 2) {
7156 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7157 GRPH_PFLIP_INT_MASK);
7158 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7159 GRPH_PFLIP_INT_MASK);
7160 }
7161 if (rdev->num_crtc >= 4) {
7162 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7163 GRPH_PFLIP_INT_MASK);
7164 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7165 GRPH_PFLIP_INT_MASK);
7166 }
7167 if (rdev->num_crtc >= 6) {
7168 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7169 GRPH_PFLIP_INT_MASK);
7170 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7171 GRPH_PFLIP_INT_MASK);
7172 }
7173
7174 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7175 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7176 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7177 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7178 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7179 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7180
7181 if (rdev->flags & RADEON_IS_IGP)
7182 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7183 else
7184 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7185
7186 return 0;
7187}
7188
7189/**
7190 * cik_irq_ack - ack interrupt sources
7191 *
7192 * @rdev: radeon_device pointer
7193 *
7194 * Ack interrupt sources on the GPU (vblanks, hpd,
7195 * etc.) (CIK). Certain interrupts sources are sw
7196 * generated and do not require an explicit ack.
7197 */
7198static inline void cik_irq_ack(struct radeon_device *rdev)
7199{
7200 u32 tmp;
7201
7202 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7203 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7204 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7205 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7206 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7207 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7208 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7209
7210 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7211 EVERGREEN_CRTC0_REGISTER_OFFSET);
7212 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7213 EVERGREEN_CRTC1_REGISTER_OFFSET);
7214 if (rdev->num_crtc >= 4) {
7215 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7216 EVERGREEN_CRTC2_REGISTER_OFFSET);
7217 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7218 EVERGREEN_CRTC3_REGISTER_OFFSET);
7219 }
7220 if (rdev->num_crtc >= 6) {
7221 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7222 EVERGREEN_CRTC4_REGISTER_OFFSET);
7223 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7224 EVERGREEN_CRTC5_REGISTER_OFFSET);
7225 }
7226
7227 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7228 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7229 GRPH_PFLIP_INT_CLEAR);
7230 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7231 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7232 GRPH_PFLIP_INT_CLEAR);
7233 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7234 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7235 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7236 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7237 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7238 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7239 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7240 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7241
7242 if (rdev->num_crtc >= 4) {
7243 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7244 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7245 GRPH_PFLIP_INT_CLEAR);
7246 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7247 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7248 GRPH_PFLIP_INT_CLEAR);
7249 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7250 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7251 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7252 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7253 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7254 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7255 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7256 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7257 }
7258
7259 if (rdev->num_crtc >= 6) {
7260 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7261 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7262 GRPH_PFLIP_INT_CLEAR);
7263 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7264 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7265 GRPH_PFLIP_INT_CLEAR);
7266 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7267 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7268 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7269 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7270 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7271 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7272 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7273 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7274 }
7275
7276 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7277 tmp = RREG32(DC_HPD1_INT_CONTROL);
7278 tmp |= DC_HPDx_INT_ACK;
7279 WREG32(DC_HPD1_INT_CONTROL, tmp);
7280 }
7281 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7282 tmp = RREG32(DC_HPD2_INT_CONTROL);
7283 tmp |= DC_HPDx_INT_ACK;
7284 WREG32(DC_HPD2_INT_CONTROL, tmp);
7285 }
7286 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7287 tmp = RREG32(DC_HPD3_INT_CONTROL);
7288 tmp |= DC_HPDx_INT_ACK;
7289 WREG32(DC_HPD3_INT_CONTROL, tmp);
7290 }
7291 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7292 tmp = RREG32(DC_HPD4_INT_CONTROL);
7293 tmp |= DC_HPDx_INT_ACK;
7294 WREG32(DC_HPD4_INT_CONTROL, tmp);
7295 }
7296 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7297 tmp = RREG32(DC_HPD5_INT_CONTROL);
7298 tmp |= DC_HPDx_INT_ACK;
7299 WREG32(DC_HPD5_INT_CONTROL, tmp);
7300 }
7301 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7302 tmp = RREG32(DC_HPD5_INT_CONTROL);
7303 tmp |= DC_HPDx_INT_ACK;
7304 WREG32(DC_HPD6_INT_CONTROL, tmp);
7305 }
7306}
7307
7308/**
7309 * cik_irq_disable - disable interrupts
7310 *
7311 * @rdev: radeon_device pointer
7312 *
7313 * Disable interrupts on the hw (CIK).
7314 */
7315static void cik_irq_disable(struct radeon_device *rdev)
7316{
7317 cik_disable_interrupts(rdev);
7318 /* Wait and acknowledge irq */
7319 mdelay(1);
7320 cik_irq_ack(rdev);
7321 cik_disable_interrupt_state(rdev);
7322}
7323
7324/**
7325 * cik_irq_disable - disable interrupts for suspend
7326 *
7327 * @rdev: radeon_device pointer
7328 *
7329 * Disable interrupts and stop the RLC (CIK).
7330 * Used for suspend.
7331 */
7332static void cik_irq_suspend(struct radeon_device *rdev)
7333{
7334 cik_irq_disable(rdev);
7335 cik_rlc_stop(rdev);
7336}
7337
7338/**
7339 * cik_irq_fini - tear down interrupt support
7340 *
7341 * @rdev: radeon_device pointer
7342 *
7343 * Disable interrupts on the hw and free the IH ring
7344 * buffer (CIK).
7345 * Used for driver unload.
7346 */
7347static void cik_irq_fini(struct radeon_device *rdev)
7348{
7349 cik_irq_suspend(rdev);
7350 r600_ih_ring_fini(rdev);
7351}
7352
7353/**
7354 * cik_get_ih_wptr - get the IH ring buffer wptr
7355 *
7356 * @rdev: radeon_device pointer
7357 *
7358 * Get the IH ring buffer wptr from either the register
7359 * or the writeback memory buffer (CIK). Also check for
7360 * ring buffer overflow and deal with it.
7361 * Used by cik_irq_process().
7362 * Returns the value of the wptr.
7363 */
7364static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7365{
7366 u32 wptr, tmp;
7367
7368 if (rdev->wb.enabled)
7369 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7370 else
7371 wptr = RREG32(IH_RB_WPTR);
7372
7373 if (wptr & RB_OVERFLOW) {
7374 /* When a ring buffer overflow happen start parsing interrupt
7375 * from the last not overwritten vector (wptr + 16). Hopefully
7376 * this should allow us to catchup.
7377 */
7378 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7379 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7380 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7381 tmp = RREG32(IH_RB_CNTL);
7382 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7383 WREG32(IH_RB_CNTL, tmp);
7384 }
7385 return (wptr & rdev->ih.ptr_mask);
7386}
7387
7388/* CIK IV Ring
7389 * Each IV ring entry is 128 bits:
7390 * [7:0] - interrupt source id
7391 * [31:8] - reserved
7392 * [59:32] - interrupt source data
7393 * [63:60] - reserved
7394 * [71:64] - RINGID
7395 * CP:
7396 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7397 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7398 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7399 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7400 * PIPE_ID - ME0 0=3D
7401 * - ME1&2 compute dispatcher (4 pipes each)
7402 * SDMA:
7403 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7404 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7405 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7406 * [79:72] - VMID
7407 * [95:80] - PASID
7408 * [127:96] - reserved
7409 */
7410/**
7411 * cik_irq_process - interrupt handler
7412 *
7413 * @rdev: radeon_device pointer
7414 *
7415 * Interrupt hander (CIK). Walk the IH ring,
7416 * ack interrupts and schedule work to handle
7417 * interrupt events.
7418 * Returns irq process return code.
7419 */
7420int cik_irq_process(struct radeon_device *rdev)
7421{
7422 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7423 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7424 u32 wptr;
7425 u32 rptr;
7426 u32 src_id, src_data, ring_id;
7427 u8 me_id, pipe_id, queue_id;
7428 u32 ring_index;
7429 bool queue_hotplug = false;
7430 bool queue_reset = false;
7431 u32 addr, status, mc_client;
7432 bool queue_thermal = false;
7433
7434 if (!rdev->ih.enabled || rdev->shutdown)
7435 return IRQ_NONE;
7436
7437 wptr = cik_get_ih_wptr(rdev);
7438
7439restart_ih:
7440 /* is somebody else already processing irqs? */
7441 if (atomic_xchg(&rdev->ih.lock, 1))
7442 return IRQ_NONE;
7443
7444 rptr = rdev->ih.rptr;
7445 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7446
7447 /* Order reading of wptr vs. reading of IH ring data */
7448 rmb();
7449
7450 /* display interrupts */
7451 cik_irq_ack(rdev);
7452
7453 while (rptr != wptr) {
7454 /* wptr/rptr are in bytes! */
7455 ring_index = rptr / 4;
7456 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7457 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7458 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7459
7460 switch (src_id) {
7461 case 1: /* D1 vblank/vline */
7462 switch (src_data) {
7463 case 0: /* D1 vblank */
7464 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7465 if (rdev->irq.crtc_vblank_int[0]) {
7466 drm_handle_vblank(rdev->ddev, 0);
7467#ifdef __NetBSD__
7468 spin_lock(&rdev->irq.vblank_lock);
7469 rdev->pm.vblank_sync = true;
7470 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7471 spin_unlock(&rdev->irq.vblank_lock);
7472#else
7473 rdev->pm.vblank_sync = true;
7474 wake_up(&rdev->irq.vblank_queue);
7475#endif
7476 }
7477 if (atomic_read(&rdev->irq.pflip[0]))
7478 radeon_crtc_handle_flip(rdev, 0);
7479 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7480 DRM_DEBUG("IH: D1 vblank\n");
7481 }
7482 break;
7483 case 1: /* D1 vline */
7484 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7485 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7486 DRM_DEBUG("IH: D1 vline\n");
7487 }
7488 break;
7489 default:
7490 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7491 break;
7492 }
7493 break;
7494 case 2: /* D2 vblank/vline */
7495 switch (src_data) {
7496 case 0: /* D2 vblank */
7497 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7498 if (rdev->irq.crtc_vblank_int[1]) {
7499 drm_handle_vblank(rdev->ddev, 1);
7500#ifdef __NetBSD__
7501 spin_lock(&rdev->irq.vblank_lock);
7502 rdev->pm.vblank_sync = true;
7503 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7504 spin_unlock(&rdev->irq.vblank_lock);
7505#else
7506 rdev->pm.vblank_sync = true;
7507 wake_up(&rdev->irq.vblank_queue);
7508#endif
7509 }
7510 if (atomic_read(&rdev->irq.pflip[1]))
7511 radeon_crtc_handle_flip(rdev, 1);
7512 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7513 DRM_DEBUG("IH: D2 vblank\n");
7514 }
7515 break;
7516 case 1: /* D2 vline */
7517 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7518 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7519 DRM_DEBUG("IH: D2 vline\n");
7520 }
7521 break;
7522 default:
7523 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7524 break;
7525 }
7526 break;
7527 case 3: /* D3 vblank/vline */
7528 switch (src_data) {
7529 case 0: /* D3 vblank */
7530 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7531 if (rdev->irq.crtc_vblank_int[2]) {
7532 drm_handle_vblank(rdev->ddev, 2);
7533#ifdef __NetBSD__
7534 spin_lock(&rdev->irq.vblank_lock);
7535 rdev->pm.vblank_sync = true;
7536 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7537 spin_unlock(&rdev->irq.vblank_lock);
7538#else
7539 rdev->pm.vblank_sync = true;
7540 wake_up(&rdev->irq.vblank_queue);
7541#endif
7542 }
7543 if (atomic_read(&rdev->irq.pflip[2]))
7544 radeon_crtc_handle_flip(rdev, 2);
7545 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7546 DRM_DEBUG("IH: D3 vblank\n");
7547 }
7548 break;
7549 case 1: /* D3 vline */
7550 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7551 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7552 DRM_DEBUG("IH: D3 vline\n");
7553 }
7554 break;
7555 default:
7556 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7557 break;
7558 }
7559 break;
7560 case 4: /* D4 vblank/vline */
7561 switch (src_data) {
7562 case 0: /* D4 vblank */
7563 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7564 if (rdev->irq.crtc_vblank_int[3]) {
7565 drm_handle_vblank(rdev->ddev, 3);
7566#ifdef __NetBSD__
7567 spin_lock(&rdev->irq.vblank_lock);
7568 rdev->pm.vblank_sync = true;
7569 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7570 spin_unlock(&rdev->irq.vblank_lock);
7571#else
7572 rdev->pm.vblank_sync = true;
7573 wake_up(&rdev->irq.vblank_queue);
7574#endif
7575 }
7576 if (atomic_read(&rdev->irq.pflip[3]))
7577 radeon_crtc_handle_flip(rdev, 3);
7578 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7579 DRM_DEBUG("IH: D4 vblank\n");
7580 }
7581 break;
7582 case 1: /* D4 vline */
7583 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7584 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7585 DRM_DEBUG("IH: D4 vline\n");
7586 }
7587 break;
7588 default:
7589 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7590 break;
7591 }
7592 break;
7593 case 5: /* D5 vblank/vline */
7594 switch (src_data) {
7595 case 0: /* D5 vblank */
7596 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7597 if (rdev->irq.crtc_vblank_int[4]) {
7598 drm_handle_vblank(rdev->ddev, 4);
7599#ifdef __NetBSD__
7600 spin_lock(&rdev->irq.vblank_lock);
7601 rdev->pm.vblank_sync = true;
7602 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7603 spin_unlock(&rdev->irq.vblank_lock);
7604#else
7605 rdev->pm.vblank_sync = true;
7606 wake_up(&rdev->irq.vblank_queue);
7607#endif
7608 }
7609 if (atomic_read(&rdev->irq.pflip[4]))
7610 radeon_crtc_handle_flip(rdev, 4);
7611 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7612 DRM_DEBUG("IH: D5 vblank\n");
7613 }
7614 break;
7615 case 1: /* D5 vline */
7616 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7617 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7618 DRM_DEBUG("IH: D5 vline\n");
7619 }
7620 break;
7621 default:
7622 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623 break;
7624 }
7625 break;
7626 case 6: /* D6 vblank/vline */
7627 switch (src_data) {
7628 case 0: /* D6 vblank */
7629 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7630 if (rdev->irq.crtc_vblank_int[5]) {
7631 drm_handle_vblank(rdev->ddev, 5);
7632#ifdef __NetBSD__
7633 spin_lock(&rdev->irq.vblank_lock);
7634 rdev->pm.vblank_sync = true;
7635 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7636 spin_unlock(&rdev->irq.vblank_lock);
7637#else
7638 rdev->pm.vblank_sync = true;
7639 wake_up(&rdev->irq.vblank_queue);
7640#endif
7641 }
7642 if (atomic_read(&rdev->irq.pflip[5]))
7643 radeon_crtc_handle_flip(rdev, 5);
7644 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7645 DRM_DEBUG("IH: D6 vblank\n");
7646 }
7647 break;
7648 case 1: /* D6 vline */
7649 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7650 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7651 DRM_DEBUG("IH: D6 vline\n");
7652 }
7653 break;
7654 default:
7655 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7656 break;
7657 }
7658 break;
7659 case 8: /* D1 page flip */
7660 case 10: /* D2 page flip */
7661 case 12: /* D3 page flip */
7662 case 14: /* D4 page flip */
7663 case 16: /* D5 page flip */
7664 case 18: /* D6 page flip */
7665 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7666 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7667 break;
7668 case 42: /* HPD hotplug */
7669 switch (src_data) {
7670 case 0:
7671 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7672 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7673 queue_hotplug = true;
7674 DRM_DEBUG("IH: HPD1\n");
7675 }
7676 break;
7677 case 1:
7678 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7679 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7680 queue_hotplug = true;
7681 DRM_DEBUG("IH: HPD2\n");
7682 }
7683 break;
7684 case 2:
7685 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7686 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7687 queue_hotplug = true;
7688 DRM_DEBUG("IH: HPD3\n");
7689 }
7690 break;
7691 case 3:
7692 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7693 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7694 queue_hotplug = true;
7695 DRM_DEBUG("IH: HPD4\n");
7696 }
7697 break;
7698 case 4:
7699 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7700 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7701 queue_hotplug = true;
7702 DRM_DEBUG("IH: HPD5\n");
7703 }
7704 break;
7705 case 5:
7706 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7707 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7708 queue_hotplug = true;
7709 DRM_DEBUG("IH: HPD6\n");
7710 }
7711 break;
7712 default:
7713 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7714 break;
7715 }
7716 break;
7717 case 124: /* UVD */
7718 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7719 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7720 break;
7721 case 146:
7722 case 147:
7723 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7724 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7725 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7726 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7727 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7728 addr);
7729 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7730 status);
7731 cik_vm_decode_fault(rdev, status, addr, mc_client);
7732 /* reset addr and status */
7733 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7734 break;
7735 case 167: /* VCE */
7736 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7737 switch (src_data) {
7738 case 0:
7739 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7740 break;
7741 case 1:
7742 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7743 break;
7744 default:
7745 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7746 break;
7747 }
7748 break;
7749 case 176: /* GFX RB CP_INT */
7750 case 177: /* GFX IB CP_INT */
7751 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7752 break;
7753 case 181: /* CP EOP event */
7754 DRM_DEBUG("IH: CP EOP\n");
7755 /* XXX check the bitfield order! */
7756 me_id = (ring_id & 0x60) >> 5;
7757 pipe_id = (ring_id & 0x18) >> 3;
7758 queue_id = (ring_id & 0x7) >> 0;
7759 switch (me_id) {
7760 case 0:
7761 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7762 break;
7763 case 1:
7764 case 2:
7765 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7766 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7767 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7768 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7769 break;
7770 }
7771 break;
7772 case 184: /* CP Privileged reg access */
7773 DRM_ERROR("Illegal register access in command stream\n");
7774 /* XXX check the bitfield order! */
7775 me_id = (ring_id & 0x60) >> 5;
7776 pipe_id = (ring_id & 0x18) >> 3;
7777 queue_id = (ring_id & 0x7) >> 0;
7778 switch (me_id) {
7779 case 0:
7780 /* This results in a full GPU reset, but all we need to do is soft
7781 * reset the CP for gfx
7782 */
7783 queue_reset = true;
7784 break;
7785 case 1:
7786 /* XXX compute */
7787 queue_reset = true;
7788 break;
7789 case 2:
7790 /* XXX compute */
7791 queue_reset = true;
7792 break;
7793 }
7794 break;
7795 case 185: /* CP Privileged inst */
7796 DRM_ERROR("Illegal instruction in command stream\n");
7797 /* XXX check the bitfield order! */
7798 me_id = (ring_id & 0x60) >> 5;
7799 pipe_id = (ring_id & 0x18) >> 3;
7800 queue_id = (ring_id & 0x7) >> 0;
7801 switch (me_id) {
7802 case 0:
7803 /* This results in a full GPU reset, but all we need to do is soft
7804 * reset the CP for gfx
7805 */
7806 queue_reset = true;
7807 break;
7808 case 1:
7809 /* XXX compute */
7810 queue_reset = true;
7811 break;
7812 case 2:
7813 /* XXX compute */
7814 queue_reset = true;
7815 break;
7816 }
7817 break;
7818 case 224: /* SDMA trap event */
7819 /* XXX check the bitfield order! */
7820 me_id = (ring_id & 0x3) >> 0;
7821 queue_id = (ring_id & 0xc) >> 2;
7822 DRM_DEBUG("IH: SDMA trap\n");
7823 switch (me_id) {
7824 case 0:
7825 switch (queue_id) {
7826 case 0:
7827 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7828 break;
7829 case 1:
7830 /* XXX compute */
7831 break;
7832 case 2:
7833 /* XXX compute */
7834 break;
7835 }
7836 break;
7837 case 1:
7838 switch (queue_id) {
7839 case 0:
7840 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7841 break;
7842 case 1:
7843 /* XXX compute */
7844 break;
7845 case 2:
7846 /* XXX compute */
7847 break;
7848 }
7849 break;
7850 }
7851 break;
7852 case 230: /* thermal low to high */
7853 DRM_DEBUG("IH: thermal low to high\n");
7854 rdev->pm.dpm.thermal.high_to_low = false;
7855 queue_thermal = true;
7856 break;
7857 case 231: /* thermal high to low */
7858 DRM_DEBUG("IH: thermal high to low\n");
7859 rdev->pm.dpm.thermal.high_to_low = true;
7860 queue_thermal = true;
7861 break;
7862 case 233: /* GUI IDLE */
7863 DRM_DEBUG("IH: GUI idle\n");
7864 break;
7865 case 241: /* SDMA Privileged inst */
7866 case 247: /* SDMA Privileged inst */
7867 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7868 /* XXX check the bitfield order! */
7869 me_id = (ring_id & 0x3) >> 0;
7870 queue_id = (ring_id & 0xc) >> 2;
7871 switch (me_id) {
7872 case 0:
7873 switch (queue_id) {
7874 case 0:
7875 queue_reset = true;
7876 break;
7877 case 1:
7878 /* XXX compute */
7879 queue_reset = true;
7880 break;
7881 case 2:
7882 /* XXX compute */
7883 queue_reset = true;
7884 break;
7885 }
7886 break;
7887 case 1:
7888 switch (queue_id) {
7889 case 0:
7890 queue_reset = true;
7891 break;
7892 case 1:
7893 /* XXX compute */
7894 queue_reset = true;
7895 break;
7896 case 2:
7897 /* XXX compute */
7898 queue_reset = true;
7899 break;
7900 }
7901 break;
7902 }
7903 break;
7904 default:
7905 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7906 break;
7907 }
7908
7909 /* wptr/rptr are in bytes! */
7910 rptr += 16;
7911 rptr &= rdev->ih.ptr_mask;
7912 }
7913 if (queue_hotplug)
7914 schedule_work(&rdev->hotplug_work);
7915 if (queue_reset)
7916 schedule_work(&rdev->reset_work);
7917 if (queue_thermal)
7918 schedule_work(&rdev->pm.dpm.thermal.work);
7919 rdev->ih.rptr = rptr;
7920 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7921 atomic_set(&rdev->ih.lock, 0);
7922
7923 /* make sure wptr hasn't changed while processing */
7924 wptr = cik_get_ih_wptr(rdev);
7925 if (wptr != rptr)
7926 goto restart_ih;
7927
7928 return IRQ_HANDLED;
7929}
7930
7931/*
7932 * startup/shutdown callbacks
7933 */
7934/**
7935 * cik_startup - program the asic to a functional state
7936 *
7937 * @rdev: radeon_device pointer
7938 *
7939 * Programs the asic to a functional state (CIK).
7940 * Called by cik_init() and cik_resume().
7941 * Returns 0 for success, error for failure.
7942 */
7943static int cik_startup(struct radeon_device *rdev)
7944{
7945 struct radeon_ring *ring;
7946 int r;
7947
7948 /* enable pcie gen2/3 link */
7949 cik_pcie_gen3_enable(rdev);
7950 /* enable aspm */
7951 cik_program_aspm(rdev);
7952
7953 /* scratch needs to be initialized before MC */
7954 r = r600_vram_scratch_init(rdev);
7955 if (r)
7956 return r;
7957
7958 cik_mc_program(rdev);
7959
7960 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7961 r = ci_mc_load_microcode(rdev);
7962 if (r) {
7963 DRM_ERROR("Failed to load MC firmware!\n");
7964 return r;
7965 }
7966 }
7967
7968 r = cik_pcie_gart_enable(rdev);
7969 if (r)
7970 return r;
7971 cik_gpu_init(rdev);
7972
7973 /* allocate rlc buffers */
7974 if (rdev->flags & RADEON_IS_IGP) {
7975 if (rdev->family == CHIP_KAVERI) {
7976 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7977 rdev->rlc.reg_list_size =
7978 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7979 } else {
7980 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7981 rdev->rlc.reg_list_size =
7982 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7983 }
7984 }
7985 rdev->rlc.cs_data = ci_cs_data;
7986 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7987 r = sumo_rlc_init(rdev);
7988 if (r) {
7989 DRM_ERROR("Failed to init rlc BOs!\n");
7990 return r;
7991 }
7992
7993 /* allocate wb buffer */
7994 r = radeon_wb_init(rdev);
7995 if (r)
7996 return r;
7997
7998 /* allocate mec buffers */
7999 r = cik_mec_init(rdev);
8000 if (r) {
8001 DRM_ERROR("Failed to init MEC BOs!\n");
8002 return r;
8003 }
8004
8005 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8006 if (r) {
8007 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8008 return r;
8009 }
8010
8011 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8012 if (r) {
8013 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8014 return r;
8015 }
8016
8017 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8018 if (r) {
8019 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8020 return r;
8021 }
8022
8023 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8024 if (r) {
8025 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8026 return r;
8027 }
8028
8029 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8030 if (r) {
8031 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8032 return r;
8033 }
8034
8035 r = radeon_uvd_resume(rdev);
8036 if (!r) {
8037 r = uvd_v4_2_resume(rdev);
8038 if (!r) {
8039 r = radeon_fence_driver_start_ring(rdev,
8040 R600_RING_TYPE_UVD_INDEX);
8041 if (r)
8042 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8043 }
8044 }
8045 if (r)
8046 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8047
8048 r = radeon_vce_resume(rdev);
8049 if (!r) {
8050 r = vce_v2_0_resume(rdev);
8051 if (!r)
8052 r = radeon_fence_driver_start_ring(rdev,
8053 TN_RING_TYPE_VCE1_INDEX);
8054 if (!r)
8055 r = radeon_fence_driver_start_ring(rdev,
8056 TN_RING_TYPE_VCE2_INDEX);
8057 }
8058 if (r) {
8059 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8060 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8061 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8062 }
8063
8064 /* Enable IRQ */
8065 if (!rdev->irq.installed) {
8066 r = radeon_irq_kms_init(rdev);
8067 if (r)
8068 return r;
8069 }
8070
8071 r = cik_irq_init(rdev);
8072 if (r) {
8073 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8074 radeon_irq_kms_fini(rdev);
8075 return r;
8076 }
8077 cik_irq_set(rdev);
8078
8079 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8080 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8081 PACKET3(PACKET3_NOP, 0x3FFF));
8082 if (r)
8083 return r;
8084
8085 /* set up the compute queues */
8086 /* type-2 packets are deprecated on MEC, use type-3 instead */
8087 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8088 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8089 PACKET3(PACKET3_NOP, 0x3FFF));
8090 if (r)
8091 return r;
8092 ring->me = 1; /* first MEC */
8093 ring->pipe = 0; /* first pipe */
8094 ring->queue = 0; /* first queue */
8095 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8096
8097 /* type-2 packets are deprecated on MEC, use type-3 instead */
8098 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8099 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8100 PACKET3(PACKET3_NOP, 0x3FFF));
8101 if (r)
8102 return r;
8103 /* dGPU only have 1 MEC */
8104 ring->me = 1; /* first MEC */
8105 ring->pipe = 0; /* first pipe */
8106 ring->queue = 1; /* second queue */
8107 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8108
8109 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8110 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8111 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8112 if (r)
8113 return r;
8114
8115 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8116 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8117 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8118 if (r)
8119 return r;
8120
8121 r = cik_cp_resume(rdev);
8122 if (r)
8123 return r;
8124
8125 r = cik_sdma_resume(rdev);
8126 if (r)
8127 return r;
8128
8129 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8130 if (ring->ring_size) {
8131 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8132 RADEON_CP_PACKET2);
8133 if (!r)
8134 r = uvd_v1_0_init(rdev);
8135 if (r)
8136 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8137 }
8138
8139 r = -ENOENT;
8140
8141 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8142 if (ring->ring_size)
8143 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8144 VCE_CMD_NO_OP);
8145
8146 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8147 if (ring->ring_size)
8148 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8149 VCE_CMD_NO_OP);
8150
8151 if (!r)
8152 r = vce_v1_0_init(rdev);
8153 else if (r != -ENOENT)
8154 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8155
8156 r = radeon_ib_pool_init(rdev);
8157 if (r) {
8158 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8159 return r;
8160 }
8161
8162 r = radeon_vm_manager_init(rdev);
8163 if (r) {
8164 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8165 return r;
8166 }
8167
8168 r = dce6_audio_init(rdev);
8169 if (r)
8170 return r;
8171
8172 return 0;
8173}
8174
8175/**
8176 * cik_resume - resume the asic to a functional state
8177 *
8178 * @rdev: radeon_device pointer
8179 *
8180 * Programs the asic to a functional state (CIK).
8181 * Called at resume.
8182 * Returns 0 for success, error for failure.
8183 */
8184int cik_resume(struct radeon_device *rdev)
8185{
8186 int r;
8187
8188 /* post card */
8189 atom_asic_init(rdev->mode_info.atom_context);
8190
8191 /* init golden registers */
8192 cik_init_golden_registers(rdev);
8193
8194 if (rdev->pm.pm_method == PM_METHOD_DPM)
8195 radeon_pm_resume(rdev);
8196
8197 rdev->accel_working = true;
8198 r = cik_startup(rdev);
8199 if (r) {
8200 DRM_ERROR("cik startup failed on resume\n");
8201 rdev->accel_working = false;
8202 return r;
8203 }
8204
8205 return r;
8206
8207}
8208
8209/**
8210 * cik_suspend - suspend the asic
8211 *
8212 * @rdev: radeon_device pointer
8213 *
8214 * Bring the chip into a state suitable for suspend (CIK).
8215 * Called at suspend.
8216 * Returns 0 for success.
8217 */
8218int cik_suspend(struct radeon_device *rdev)
8219{
8220 radeon_pm_suspend(rdev);
8221 dce6_audio_fini(rdev);
8222 radeon_vm_manager_fini(rdev);
8223 cik_cp_enable(rdev, false);
8224 cik_sdma_enable(rdev, false);
8225 uvd_v1_0_fini(rdev);
8226 radeon_uvd_suspend(rdev);
8227 radeon_vce_suspend(rdev);
8228 cik_fini_pg(rdev);
8229 cik_fini_cg(rdev);
8230 cik_irq_suspend(rdev);
8231 radeon_wb_disable(rdev);
8232 cik_pcie_gart_disable(rdev);
8233 return 0;
8234}
8235
8236/* Plan is to move initialization in that function and use
8237 * helper function so that radeon_device_init pretty much
8238 * do nothing more than calling asic specific function. This
8239 * should also allow to remove a bunch of callback function
8240 * like vram_info.
8241 */
8242/**
8243 * cik_init - asic specific driver and hw init
8244 *
8245 * @rdev: radeon_device pointer
8246 *
8247 * Setup asic specific driver variables and program the hw
8248 * to a functional state (CIK).
8249 * Called at driver startup.
8250 * Returns 0 for success, errors for failure.
8251 */
8252int cik_init(struct radeon_device *rdev)
8253{
8254 struct radeon_ring *ring;
8255 int r;
8256
8257 /* Read BIOS */
8258 if (!radeon_get_bios(rdev)) {
8259 if (ASIC_IS_AVIVO(rdev))
8260 return -EINVAL;
8261 }
8262 /* Must be an ATOMBIOS */
8263 if (!rdev->is_atom_bios) {
8264 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8265 return -EINVAL;
8266 }
8267 r = radeon_atombios_init(rdev);
8268 if (r)
8269 return r;
8270
8271 /* Post card if necessary */
8272 if (!radeon_card_posted(rdev)) {
8273 if (!rdev->bios) {
8274 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8275 return -EINVAL;
8276 }
8277 DRM_INFO("GPU not posted. posting now...\n");
8278 atom_asic_init(rdev->mode_info.atom_context);
8279 }
8280 /* init golden registers */
8281 cik_init_golden_registers(rdev);
8282 /* Initialize scratch registers */
8283 cik_scratch_init(rdev);
8284 /* Initialize surface registers */
8285 radeon_surface_init(rdev);
8286 /* Initialize clocks */
8287 radeon_get_clock_info(rdev->ddev);
8288
8289 /* Fence driver */
8290 r = radeon_fence_driver_init(rdev);
8291 if (r)
8292 return r;
8293
8294 /* initialize memory controller */
8295 r = cik_mc_init(rdev);
8296 if (r)
8297 return r;
8298 /* Memory manager */
8299 r = radeon_bo_init(rdev);
8300 if (r)
8301 return r;
8302
8303 if (rdev->flags & RADEON_IS_IGP) {
8304 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8305 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8306 r = cik_init_microcode(rdev);
8307 if (r) {
8308 DRM_ERROR("Failed to load firmware!\n");
8309 return r;
8310 }
8311 }
8312 } else {
8313 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8314 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8315 !rdev->mc_fw) {
8316 r = cik_init_microcode(rdev);
8317 if (r) {
8318 DRM_ERROR("Failed to load firmware!\n");
8319 return r;
8320 }
8321 }
8322 }
8323
8324 /* Initialize power management */
8325 radeon_pm_init(rdev);
8326
8327 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8328 ring->ring_obj = NULL;
8329 r600_ring_init(rdev, ring, 1024 * 1024);
8330
8331 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8332 ring->ring_obj = NULL;
8333 r600_ring_init(rdev, ring, 1024 * 1024);
8334 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8335 if (r)
8336 return r;
8337
8338 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8339 ring->ring_obj = NULL;
8340 r600_ring_init(rdev, ring, 1024 * 1024);
8341 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8342 if (r)
8343 return r;
8344
8345 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8346 ring->ring_obj = NULL;
8347 r600_ring_init(rdev, ring, 256 * 1024);
8348
8349 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8350 ring->ring_obj = NULL;
8351 r600_ring_init(rdev, ring, 256 * 1024);
8352
8353 r = radeon_uvd_init(rdev);
8354 if (!r) {
8355 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8356 ring->ring_obj = NULL;
8357 r600_ring_init(rdev, ring, 4096);
8358 }
8359
8360 r = radeon_vce_init(rdev);
8361 if (!r) {
8362 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8363 ring->ring_obj = NULL;
8364 r600_ring_init(rdev, ring, 4096);
8365
8366 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8367 ring->ring_obj = NULL;
8368 r600_ring_init(rdev, ring, 4096);
8369 }
8370
8371 rdev->ih.ring_obj = NULL;
8372 r600_ih_ring_init(rdev, 64 * 1024);
8373
8374 r = r600_pcie_gart_init(rdev);
8375 if (r)
8376 return r;
8377
8378 rdev->accel_working = true;
8379 r = cik_startup(rdev);
8380 if (r) {
8381 dev_err(rdev->dev, "disabling GPU acceleration\n");
8382 cik_cp_fini(rdev);
8383 cik_sdma_fini(rdev);
8384 cik_irq_fini(rdev);
8385 sumo_rlc_fini(rdev);
8386 cik_mec_fini(rdev);
8387 radeon_wb_fini(rdev);
8388 radeon_ib_pool_fini(rdev);
8389 radeon_vm_manager_fini(rdev);
8390 radeon_irq_kms_fini(rdev);
8391 cik_pcie_gart_fini(rdev);
8392 rdev->accel_working = false;
8393 }
8394
8395 /* Don't start up if the MC ucode is missing.
8396 * The default clocks and voltages before the MC ucode
8397 * is loaded are not suffient for advanced operations.
8398 */
8399 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8400 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8401 return -EINVAL;
8402 }
8403
8404 return 0;
8405}
8406
8407/**
8408 * cik_fini - asic specific driver and hw fini
8409 *
8410 * @rdev: radeon_device pointer
8411 *
8412 * Tear down the asic specific driver variables and program the hw
8413 * to an idle state (CIK).
8414 * Called at driver unload.
8415 */
8416void cik_fini(struct radeon_device *rdev)
8417{
8418 radeon_pm_fini(rdev);
8419 cik_cp_fini(rdev);
8420 cik_sdma_fini(rdev);
8421 cik_fini_pg(rdev);
8422 cik_fini_cg(rdev);
8423 cik_irq_fini(rdev);
8424 sumo_rlc_fini(rdev);
8425 cik_mec_fini(rdev);
8426 radeon_wb_fini(rdev);
8427 radeon_vm_manager_fini(rdev);
8428 radeon_ib_pool_fini(rdev);
8429 radeon_irq_kms_fini(rdev);
8430 uvd_v1_0_fini(rdev);
8431 radeon_uvd_fini(rdev);
8432 radeon_vce_fini(rdev);
8433 cik_pcie_gart_fini(rdev);
8434 r600_vram_scratch_fini(rdev);
8435 radeon_gem_fini(rdev);
8436 radeon_fence_driver_fini(rdev);
8437 radeon_bo_fini(rdev);
8438 radeon_atombios_fini(rdev);
8439 kfree(rdev->bios);
8440 rdev->bios = NULL;
8441}
8442
8443void dce8_program_fmt(struct drm_encoder *encoder)
8444{
8445 struct drm_device *dev = encoder->dev;
8446 struct radeon_device *rdev = dev->dev_private;
8447 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8448 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8449 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8450 int bpc = 0;
8451 u32 tmp = 0;
8452 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8453
8454 if (connector) {
8455 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8456 bpc = radeon_get_monitor_bpc(connector);
8457 dither = radeon_connector->dither;
8458 }
8459
8460 /* LVDS/eDP FMT is set up by atom */
8461 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8462 return;
8463
8464 /* not needed for analog */
8465 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8466 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8467 return;
8468
8469 if (bpc == 0)
8470 return;
8471
8472 switch (bpc) {
8473 case 6:
8474 if (dither == RADEON_FMT_DITHER_ENABLE)
8475 /* XXX sort out optimal dither settings */
8476 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8477 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8478 else
8479 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8480 break;
8481 case 8:
8482 if (dither == RADEON_FMT_DITHER_ENABLE)
8483 /* XXX sort out optimal dither settings */
8484 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8485 FMT_RGB_RANDOM_ENABLE |
8486 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8487 else
8488 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8489 break;
8490 case 10:
8491 if (dither == RADEON_FMT_DITHER_ENABLE)
8492 /* XXX sort out optimal dither settings */
8493 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8494 FMT_RGB_RANDOM_ENABLE |
8495 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8496 else
8497 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8498 break;
8499 default:
8500 /* not needed */
8501 break;
8502 }
8503
8504 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8505}
8506
8507/* display watermark setup */
8508/**
8509 * dce8_line_buffer_adjust - Set up the line buffer
8510 *
8511 * @rdev: radeon_device pointer
8512 * @radeon_crtc: the selected display controller
8513 * @mode: the current display mode on the selected display
8514 * controller
8515 *
8516 * Setup up the line buffer allocation for
8517 * the selected display controller (CIK).
8518 * Returns the line buffer size in pixels.
8519 */
8520static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8521 struct radeon_crtc *radeon_crtc,
8522 struct drm_display_mode *mode)
8523{
8524 u32 tmp, buffer_alloc, i;
8525 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8526 /*
8527 * Line Buffer Setup
8528 * There are 6 line buffers, one for each display controllers.
8529 * There are 3 partitions per LB. Select the number of partitions
8530 * to enable based on the display width. For display widths larger
8531 * than 4096, you need use to use 2 display controllers and combine
8532 * them using the stereo blender.
8533 */
8534 if (radeon_crtc->base.enabled && mode) {
8535 if (mode->crtc_hdisplay < 1920) {
8536 tmp = 1;
8537 buffer_alloc = 2;
8538 } else if (mode->crtc_hdisplay < 2560) {
8539 tmp = 2;
8540 buffer_alloc = 2;
8541 } else if (mode->crtc_hdisplay < 4096) {
8542 tmp = 0;
8543 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8544 } else {
8545 DRM_DEBUG_KMS("Mode too big for LB!\n");
8546 tmp = 0;
8547 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8548 }
8549 } else {
8550 tmp = 1;
8551 buffer_alloc = 0;
8552 }
8553
8554 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8555 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8556
8557 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8558 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8559 for (i = 0; i < rdev->usec_timeout; i++) {
8560 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8561 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8562 break;
8563 udelay(1);
8564 }
8565
8566 if (radeon_crtc->base.enabled && mode) {
8567 switch (tmp) {
8568 case 0:
8569 default:
8570 return 4096 * 2;
8571 case 1:
8572 return 1920 * 2;
8573 case 2:
8574 return 2560 * 2;
8575 }
8576 }
8577
8578 /* controller not enabled, so no lb used */
8579 return 0;
8580}
8581
8582/**
8583 * cik_get_number_of_dram_channels - get the number of dram channels
8584 *
8585 * @rdev: radeon_device pointer
8586 *
8587 * Look up the number of video ram channels (CIK).
8588 * Used for display watermark bandwidth calculations
8589 * Returns the number of dram channels
8590 */
8591static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8592{
8593 u32 tmp = RREG32(MC_SHARED_CHMAP);
8594
8595 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8596 case 0:
8597 default:
8598 return 1;
8599 case 1:
8600 return 2;
8601 case 2:
8602 return 4;
8603 case 3:
8604 return 8;
8605 case 4:
8606 return 3;
8607 case 5:
8608 return 6;
8609 case 6:
8610 return 10;
8611 case 7:
8612 return 12;
8613 case 8:
8614 return 16;
8615 }
8616}
8617
8618struct dce8_wm_params {
8619 u32 dram_channels; /* number of dram channels */
8620 u32 yclk; /* bandwidth per dram data pin in kHz */
8621 u32 sclk; /* engine clock in kHz */
8622 u32 disp_clk; /* display clock in kHz */
8623 u32 src_width; /* viewport width */
8624 u32 active_time; /* active display time in ns */
8625 u32 blank_time; /* blank time in ns */
8626 bool interlaced; /* mode is interlaced */
8627 fixed20_12 vsc; /* vertical scale ratio */
8628 u32 num_heads; /* number of active crtcs */
8629 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8630 u32 lb_size; /* line buffer allocated to pipe */
8631 u32 vtaps; /* vertical scaler taps */
8632};
8633
8634/**
8635 * dce8_dram_bandwidth - get the dram bandwidth
8636 *
8637 * @wm: watermark calculation data
8638 *
8639 * Calculate the raw dram bandwidth (CIK).
8640 * Used for display watermark bandwidth calculations
8641 * Returns the dram bandwidth in MBytes/s
8642 */
8643static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8644{
8645 /* Calculate raw DRAM Bandwidth */
8646 fixed20_12 dram_efficiency; /* 0.7 */
8647 fixed20_12 yclk, dram_channels, bandwidth;
8648 fixed20_12 a;
8649
8650 a.full = dfixed_const(1000);
8651 yclk.full = dfixed_const(wm->yclk);
8652 yclk.full = dfixed_div(yclk, a);
8653 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8654 a.full = dfixed_const(10);
8655 dram_efficiency.full = dfixed_const(7);
8656 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8657 bandwidth.full = dfixed_mul(dram_channels, yclk);
8658 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8659
8660 return dfixed_trunc(bandwidth);
8661}
8662
8663/**
8664 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8665 *
8666 * @wm: watermark calculation data
8667 *
8668 * Calculate the dram bandwidth used for display (CIK).
8669 * Used for display watermark bandwidth calculations
8670 * Returns the dram bandwidth for display in MBytes/s
8671 */
8672static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8673{
8674 /* Calculate DRAM Bandwidth and the part allocated to display. */
8675 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8676 fixed20_12 yclk, dram_channels, bandwidth;
8677 fixed20_12 a;
8678
8679 a.full = dfixed_const(1000);
8680 yclk.full = dfixed_const(wm->yclk);
8681 yclk.full = dfixed_div(yclk, a);
8682 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8683 a.full = dfixed_const(10);
8684 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8685 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8686 bandwidth.full = dfixed_mul(dram_channels, yclk);
8687 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8688
8689 return dfixed_trunc(bandwidth);
8690}
8691
8692/**
8693 * dce8_data_return_bandwidth - get the data return bandwidth
8694 *
8695 * @wm: watermark calculation data
8696 *
8697 * Calculate the data return bandwidth used for display (CIK).
8698 * Used for display watermark bandwidth calculations
8699 * Returns the data return bandwidth in MBytes/s
8700 */
8701static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8702{
8703 /* Calculate the display Data return Bandwidth */
8704 fixed20_12 return_efficiency; /* 0.8 */
8705 fixed20_12 sclk, bandwidth;
8706 fixed20_12 a;
8707
8708 a.full = dfixed_const(1000);
8709 sclk.full = dfixed_const(wm->sclk);
8710 sclk.full = dfixed_div(sclk, a);
8711 a.full = dfixed_const(10);
8712 return_efficiency.full = dfixed_const(8);
8713 return_efficiency.full = dfixed_div(return_efficiency, a);
8714 a.full = dfixed_const(32);
8715 bandwidth.full = dfixed_mul(a, sclk);
8716 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8717
8718 return dfixed_trunc(bandwidth);
8719}
8720
8721/**
8722 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8723 *
8724 * @wm: watermark calculation data
8725 *
8726 * Calculate the dmif bandwidth used for display (CIK).
8727 * Used for display watermark bandwidth calculations
8728 * Returns the dmif bandwidth in MBytes/s
8729 */
8730static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8731{
8732 /* Calculate the DMIF Request Bandwidth */
8733 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8734 fixed20_12 disp_clk, bandwidth;
8735 fixed20_12 a, b;
8736
8737 a.full = dfixed_const(1000);
8738 disp_clk.full = dfixed_const(wm->disp_clk);
8739 disp_clk.full = dfixed_div(disp_clk, a);
8740 a.full = dfixed_const(32);
8741 b.full = dfixed_mul(a, disp_clk);
8742
8743 a.full = dfixed_const(10);
8744 disp_clk_request_efficiency.full = dfixed_const(8);
8745 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8746
8747 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8748
8749 return dfixed_trunc(bandwidth);
8750}
8751
8752/**
8753 * dce8_available_bandwidth - get the min available bandwidth
8754 *
8755 * @wm: watermark calculation data
8756 *
8757 * Calculate the min available bandwidth used for display (CIK).
8758 * Used for display watermark bandwidth calculations
8759 * Returns the min available bandwidth in MBytes/s
8760 */
8761static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8762{
8763 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8764 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8765 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8766 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8767
8768 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8769}
8770
8771/**
8772 * dce8_average_bandwidth - get the average available bandwidth
8773 *
8774 * @wm: watermark calculation data
8775 *
8776 * Calculate the average available bandwidth used for display (CIK).
8777 * Used for display watermark bandwidth calculations
8778 * Returns the average available bandwidth in MBytes/s
8779 */
8780static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8781{
8782 /* Calculate the display mode Average Bandwidth
8783 * DisplayMode should contain the source and destination dimensions,
8784 * timing, etc.
8785 */
8786 fixed20_12 bpp;
8787 fixed20_12 line_time;
8788 fixed20_12 src_width;
8789 fixed20_12 bandwidth;
8790 fixed20_12 a;
8791
8792 a.full = dfixed_const(1000);
8793 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8794 line_time.full = dfixed_div(line_time, a);
8795 bpp.full = dfixed_const(wm->bytes_per_pixel);
8796 src_width.full = dfixed_const(wm->src_width);
8797 bandwidth.full = dfixed_mul(src_width, bpp);
8798 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8799 bandwidth.full = dfixed_div(bandwidth, line_time);
8800
8801 return dfixed_trunc(bandwidth);
8802}
8803
8804/**
8805 * dce8_latency_watermark - get the latency watermark
8806 *
8807 * @wm: watermark calculation data
8808 *
8809 * Calculate the latency watermark (CIK).
8810 * Used for display watermark bandwidth calculations
8811 * Returns the latency watermark in ns
8812 */
8813static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8814{
8815 /* First calculate the latency in ns */
8816 u32 mc_latency = 2000; /* 2000 ns. */
8817 u32 available_bandwidth = dce8_available_bandwidth(wm);
8818 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8819 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8820 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8821 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8822 (wm->num_heads * cursor_line_pair_return_time);
8823 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8824 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8825 u32 tmp, dmif_size = 12288;
8826 fixed20_12 a, b, c;
8827
8828 if (wm->num_heads == 0)
8829 return 0;
8830
8831 a.full = dfixed_const(2);
8832 b.full = dfixed_const(1);
8833 if ((wm->vsc.full > a.full) ||
8834 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8835 (wm->vtaps >= 5) ||
8836 ((wm->vsc.full >= a.full) && wm->interlaced))
8837 max_src_lines_per_dst_line = 4;
8838 else
8839 max_src_lines_per_dst_line = 2;
8840
8841 a.full = dfixed_const(available_bandwidth);
8842 b.full = dfixed_const(wm->num_heads);
8843 a.full = dfixed_div(a, b);
8844
8845 b.full = dfixed_const(mc_latency + 512);
8846 c.full = dfixed_const(wm->disp_clk);
8847 b.full = dfixed_div(b, c);
8848
8849 c.full = dfixed_const(dmif_size);
8850 b.full = dfixed_div(c, b);
8851
8852 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8853
8854 b.full = dfixed_const(1000);
8855 c.full = dfixed_const(wm->disp_clk);
8856 b.full = dfixed_div(c, b);
8857 c.full = dfixed_const(wm->bytes_per_pixel);
8858 b.full = dfixed_mul(b, c);
8859
8860 lb_fill_bw = min(tmp, dfixed_trunc(b));
8861
8862 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8863 b.full = dfixed_const(1000);
8864 c.full = dfixed_const(lb_fill_bw);
8865 b.full = dfixed_div(c, b);
8866 a.full = dfixed_div(a, b);
8867 line_fill_time = dfixed_trunc(a);
8868
8869 if (line_fill_time < wm->active_time)
8870 return latency;
8871 else
8872 return latency + (line_fill_time - wm->active_time);
8873
8874}
8875
8876/**
8877 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8878 * average and available dram bandwidth
8879 *
8880 * @wm: watermark calculation data
8881 *
8882 * Check if the display average bandwidth fits in the display
8883 * dram bandwidth (CIK).
8884 * Used for display watermark bandwidth calculations
8885 * Returns true if the display fits, false if not.
8886 */
8887static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8888{
8889 if (dce8_average_bandwidth(wm) <=
8890 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8891 return true;
8892 else
8893 return false;
8894}
8895
8896/**
8897 * dce8_average_bandwidth_vs_available_bandwidth - check
8898 * average and available bandwidth
8899 *
8900 * @wm: watermark calculation data
8901 *
8902 * Check if the display average bandwidth fits in the display
8903 * available bandwidth (CIK).
8904 * Used for display watermark bandwidth calculations
8905 * Returns true if the display fits, false if not.
8906 */
8907static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8908{
8909 if (dce8_average_bandwidth(wm) <=
8910 (dce8_available_bandwidth(wm) / wm->num_heads))
8911 return true;
8912 else
8913 return false;
8914}
8915
8916/**
8917 * dce8_check_latency_hiding - check latency hiding
8918 *
8919 * @wm: watermark calculation data
8920 *
8921 * Check latency hiding (CIK).
8922 * Used for display watermark bandwidth calculations
8923 * Returns true if the display fits, false if not.
8924 */
8925static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8926{
8927 u32 lb_partitions = wm->lb_size / wm->src_width;
8928 u32 line_time = wm->active_time + wm->blank_time;
8929 u32 latency_tolerant_lines;
8930 u32 latency_hiding;
8931 fixed20_12 a;
8932
8933 a.full = dfixed_const(1);
8934 if (wm->vsc.full > a.full)
8935 latency_tolerant_lines = 1;
8936 else {
8937 if (lb_partitions <= (wm->vtaps + 1))
8938 latency_tolerant_lines = 1;
8939 else
8940 latency_tolerant_lines = 2;
8941 }
8942
8943 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8944
8945 if (dce8_latency_watermark(wm) <= latency_hiding)
8946 return true;
8947 else
8948 return false;
8949}
8950
8951/**
8952 * dce8_program_watermarks - program display watermarks
8953 *
8954 * @rdev: radeon_device pointer
8955 * @radeon_crtc: the selected display controller
8956 * @lb_size: line buffer size
8957 * @num_heads: number of display controllers in use
8958 *
8959 * Calculate and program the display watermarks for the
8960 * selected display controller (CIK).
8961 */
8962static void dce8_program_watermarks(struct radeon_device *rdev,
8963 struct radeon_crtc *radeon_crtc,
8964 u32 lb_size, u32 num_heads)
8965{
8966 struct drm_display_mode *mode = &radeon_crtc->base.mode;
8967 struct dce8_wm_params wm_low, wm_high;
8968 u32 pixel_period;
8969 u32 line_time = 0;
8970 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8971 u32 tmp, wm_mask;
8972
8973 if (radeon_crtc->base.enabled && num_heads && mode) {
8974 pixel_period = 1000000 / (u32)mode->clock;
8975 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8976
8977 /* watermark for high clocks */
8978 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8979 rdev->pm.dpm_enabled) {
8980 wm_high.yclk =
8981 radeon_dpm_get_mclk(rdev, false) * 10;
8982 wm_high.sclk =
8983 radeon_dpm_get_sclk(rdev, false) * 10;
8984 } else {
8985 wm_high.yclk = rdev->pm.current_mclk * 10;
8986 wm_high.sclk = rdev->pm.current_sclk * 10;
8987 }
8988
8989 wm_high.disp_clk = mode->clock;
8990 wm_high.src_width = mode->crtc_hdisplay;
8991 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8992 wm_high.blank_time = line_time - wm_high.active_time;
8993 wm_high.interlaced = false;
8994 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8995 wm_high.interlaced = true;
8996 wm_high.vsc = radeon_crtc->vsc;
8997 wm_high.vtaps = 1;
8998 if (radeon_crtc->rmx_type != RMX_OFF)
8999 wm_high.vtaps = 2;
9000 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9001 wm_high.lb_size = lb_size;
9002 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9003 wm_high.num_heads = num_heads;
9004
9005 /* set for high clocks */
9006 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9007
9008 /* possibly force display priority to high */
9009 /* should really do this at mode validation time... */
9010 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9011 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9012 !dce8_check_latency_hiding(&wm_high) ||
9013 (rdev->disp_priority == 2)) {
9014 DRM_DEBUG_KMS("force priority to high\n");
9015 }
9016
9017 /* watermark for low clocks */
9018 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9019 rdev->pm.dpm_enabled) {
9020 wm_low.yclk =
9021 radeon_dpm_get_mclk(rdev, true) * 10;
9022 wm_low.sclk =
9023 radeon_dpm_get_sclk(rdev, true) * 10;
9024 } else {
9025 wm_low.yclk = rdev->pm.current_mclk * 10;
9026 wm_low.sclk = rdev->pm.current_sclk * 10;
9027 }
9028
9029 wm_low.disp_clk = mode->clock;
9030 wm_low.src_width = mode->crtc_hdisplay;
9031 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9032 wm_low.blank_time = line_time - wm_low.active_time;
9033 wm_low.interlaced = false;
9034 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9035 wm_low.interlaced = true;
9036 wm_low.vsc = radeon_crtc->vsc;
9037 wm_low.vtaps = 1;
9038 if (radeon_crtc->rmx_type != RMX_OFF)
9039 wm_low.vtaps = 2;
9040 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9041 wm_low.lb_size = lb_size;
9042 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9043 wm_low.num_heads = num_heads;
9044
9045 /* set for low clocks */
9046 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9047
9048 /* possibly force display priority to high */
9049 /* should really do this at mode validation time... */
9050 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9051 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9052 !dce8_check_latency_hiding(&wm_low) ||
9053 (rdev->disp_priority == 2)) {
9054 DRM_DEBUG_KMS("force priority to high\n");
9055 }
9056 }
9057
9058 /* select wm A */
9059 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9060 tmp = wm_mask;
9061 tmp &= ~LATENCY_WATERMARK_MASK(3);
9062 tmp |= LATENCY_WATERMARK_MASK(1);
9063 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9064 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9065 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9066 LATENCY_HIGH_WATERMARK(line_time)));
9067 /* select wm B */
9068 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9069 tmp &= ~LATENCY_WATERMARK_MASK(3);
9070 tmp |= LATENCY_WATERMARK_MASK(2);
9071 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9072 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9073 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9074 LATENCY_HIGH_WATERMARK(line_time)));
9075 /* restore original selection */
9076 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9077
9078 /* save values for DPM */
9079 radeon_crtc->line_time = line_time;
9080 radeon_crtc->wm_high = latency_watermark_a;
9081 radeon_crtc->wm_low = latency_watermark_b;
9082}
9083
9084/**
9085 * dce8_bandwidth_update - program display watermarks
9086 *
9087 * @rdev: radeon_device pointer
9088 *
9089 * Calculate and program the display watermarks and line
9090 * buffer allocation (CIK).
9091 */
9092void dce8_bandwidth_update(struct radeon_device *rdev)
9093{
9094 struct drm_display_mode *mode = NULL;
9095 u32 num_heads = 0, lb_size;
9096 int i;
9097
9098 radeon_update_display_priority(rdev);
9099
9100 for (i = 0; i < rdev->num_crtc; i++) {
9101 if (rdev->mode_info.crtcs[i]->base.enabled)
9102 num_heads++;
9103 }
9104 for (i = 0; i < rdev->num_crtc; i++) {
9105 mode = &rdev->mode_info.crtcs[i]->base.mode;
9106 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9107 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9108 }
9109}
9110
9111/**
9112 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9113 *
9114 * @rdev: radeon_device pointer
9115 *
9116 * Fetches a GPU clock counter snapshot (SI).
9117 * Returns the 64 bit clock counter snapshot.
9118 */
9119uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9120{
9121 uint64_t clock;
9122
9123 mutex_lock(&rdev->gpu_clock_mutex);
9124 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9125 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9126 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9127 mutex_unlock(&rdev->gpu_clock_mutex);
9128 return clock;
9129}
9130
9131static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9132 u32 cntl_reg, u32 status_reg)
9133{
9134 int r, i;
9135 struct atom_clock_dividers dividers;
9136 uint32_t tmp;
9137
9138 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9139 clock, false, &dividers);
9140 if (r)
9141 return r;
9142
9143 tmp = RREG32_SMC(cntl_reg);
9144 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9145 tmp |= dividers.post_divider;
9146 WREG32_SMC(cntl_reg, tmp);
9147
9148 for (i = 0; i < 100; i++) {
9149 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9150 break;
9151 mdelay(10);
9152 }
9153 if (i == 100)
9154 return -ETIMEDOUT;
9155
9156 return 0;
9157}
9158
9159int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9160{
9161 int r = 0;
9162
9163 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9164 if (r)
9165 return r;
9166
9167 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9168 return r;
9169}
9170
9171int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9172{
9173 int r, i;
9174 struct atom_clock_dividers dividers;
9175 u32 tmp;
9176
9177 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9178 ecclk, false, &dividers);
9179 if (r)
9180 return r;
9181
9182 for (i = 0; i < 100; i++) {
9183 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9184 break;
9185 mdelay(10);
9186 }
9187 if (i == 100)
9188 return -ETIMEDOUT;
9189
9190 tmp = RREG32_SMC(CG_ECLK_CNTL);
9191 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9192 tmp |= dividers.post_divider;
9193 WREG32_SMC(CG_ECLK_CNTL, tmp);
9194
9195 for (i = 0; i < 100; i++) {
9196 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9197 break;
9198 mdelay(10);
9199 }
9200 if (i == 100)
9201 return -ETIMEDOUT;
9202
9203 return 0;
9204}
9205
9206static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9207{
9208#ifndef __NetBSD__ /* XXX radeon pcie */
9209 struct pci_dev *root = rdev->pdev->bus->self;
9210 int bridge_pos, gpu_pos;
9211 u32 speed_cntl, mask, current_data_rate;
9212 int ret, i;
9213 u16 tmp16;
9214
9215 if (radeon_pcie_gen2 == 0)
9216 return;
9217
9218 if (rdev->flags & RADEON_IS_IGP)
9219 return;
9220
9221 if (!(rdev->flags & RADEON_IS_PCIE))
9222 return;
9223
9224 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9225 if (ret != 0)
9226 return;
9227
9228 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9229 return;
9230
9231 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9232 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9233 LC_CURRENT_DATA_RATE_SHIFT;
9234 if (mask & DRM_PCIE_SPEED_80) {
9235 if (current_data_rate == 2) {
9236 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9237 return;
9238 }
9239 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9240 } else if (mask & DRM_PCIE_SPEED_50) {
9241 if (current_data_rate == 1) {
9242 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9243 return;
9244 }
9245 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9246 }
9247
9248 bridge_pos = pci_pcie_cap(root);
9249 if (!bridge_pos)
9250 return;
9251
9252 gpu_pos = pci_pcie_cap(rdev->pdev);
9253 if (!gpu_pos)
9254 return;
9255
9256 if (mask & DRM_PCIE_SPEED_80) {
9257 /* re-try equalization if gen3 is not already enabled */
9258 if (current_data_rate != 2) {
9259 u16 bridge_cfg, gpu_cfg;
9260 u16 bridge_cfg2, gpu_cfg2;
9261 u32 max_lw, current_lw, tmp;
9262
9263 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9264 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9265
9266 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9267 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9268
9269 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9270 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9271
9272 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9273 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9274 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9275
9276 if (current_lw < max_lw) {
9277 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9278 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9279 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9280 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9281 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9282 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9283 }
9284 }
9285
9286 for (i = 0; i < 10; i++) {
9287 /* check status */
9288 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9289 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9290 break;
9291
9292 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9293 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9294
9295 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9296 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9297
9298 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9299 tmp |= LC_SET_QUIESCE;
9300 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9301
9302 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9303 tmp |= LC_REDO_EQ;
9304 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9305
9306 mdelay(100);
9307
9308 /* linkctl */
9309 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9310 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9311 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9312 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9313
9314 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9315 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9316 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9317 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9318
9319 /* linkctl2 */
9320 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9321 tmp16 &= ~((1 << 4) | (7 << 9));
9322 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9323 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9324
9325 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9326 tmp16 &= ~((1 << 4) | (7 << 9));
9327 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9328 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9329
9330 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9331 tmp &= ~LC_SET_QUIESCE;
9332 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9333 }
9334 }
9335 }
9336
9337 /* set the link speed */
9338 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9339 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9340 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9341
9342 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9343 tmp16 &= ~0xf;
9344 if (mask & DRM_PCIE_SPEED_80)
9345 tmp16 |= 3; /* gen3 */
9346 else if (mask & DRM_PCIE_SPEED_50)
9347 tmp16 |= 2; /* gen2 */
9348 else
9349 tmp16 |= 1; /* gen1 */
9350 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9351
9352 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9353 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9354 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9355
9356 for (i = 0; i < rdev->usec_timeout; i++) {
9357 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9358 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9359 break;
9360 udelay(1);
9361 }
9362#endif
9363}
9364
9365static void cik_program_aspm(struct radeon_device *rdev)
9366{
9367 u32 data, orig;
9368 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9369 bool disable_clkreq = false;
9370
9371 if (radeon_aspm == 0)
9372 return;
9373
9374 /* XXX double check IGPs */
9375 if (rdev->flags & RADEON_IS_IGP)
9376 return;
9377
9378 if (!(rdev->flags & RADEON_IS_PCIE))
9379 return;
9380
9381 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9382 data &= ~LC_XMIT_N_FTS_MASK;
9383 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9384 if (orig != data)
9385 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9386
9387 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9388 data |= LC_GO_TO_RECOVERY;
9389 if (orig != data)
9390 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9391
9392 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9393 data |= P_IGNORE_EDB_ERR;
9394 if (orig != data)
9395 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9396
9397 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9398 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9399 data |= LC_PMI_TO_L1_DIS;
9400 if (!disable_l0s)
9401 data |= LC_L0S_INACTIVITY(7);
9402
9403 if (!disable_l1) {
9404 data |= LC_L1_INACTIVITY(7);
9405 data &= ~LC_PMI_TO_L1_DIS;
9406 if (orig != data)
9407 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9408
9409 if (!disable_plloff_in_l1) {
9410 bool clk_req_support;
9411
9412 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9413 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9414 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9415 if (orig != data)
9416 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9417
9418 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9419 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9420 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9421 if (orig != data)
9422 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9423
9424 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9425 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9426 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9427 if (orig != data)
9428 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9429
9430 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9431 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9432 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9433 if (orig != data)
9434 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9435
9436 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9437 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9438 data |= LC_DYN_LANES_PWR_STATE(3);
9439 if (orig != data)
9440 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9441
9442 if (!disable_clkreq) {
9443#ifndef __NetBSD__ /* XXX radeon pcie */
9444 struct pci_dev *root = rdev->pdev->bus->self;
9445 u32 lnkcap;
9446#endif
9447
9448 clk_req_support = false;
9449#ifndef __NetBSD__ /* XXX radeon pcie */
9450 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9451 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9452 clk_req_support = true;
9453#endif
9454 } else {
9455 clk_req_support = false;
9456 }
9457
9458 if (clk_req_support) {
9459 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9460 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9461 if (orig != data)
9462 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9463
9464 orig = data = RREG32_SMC(THM_CLK_CNTL);
9465 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9466 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9467 if (orig != data)
9468 WREG32_SMC(THM_CLK_CNTL, data);
9469
9470 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9471 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9472 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9473 if (orig != data)
9474 WREG32_SMC(MISC_CLK_CTRL, data);
9475
9476 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9477 data &= ~BCLK_AS_XCLK;
9478 if (orig != data)
9479 WREG32_SMC(CG_CLKPIN_CNTL, data);
9480
9481 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9482 data &= ~FORCE_BIF_REFCLK_EN;
9483 if (orig != data)
9484 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9485
9486 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9487 data &= ~MPLL_CLKOUT_SEL_MASK;
9488 data |= MPLL_CLKOUT_SEL(4);
9489 if (orig != data)
9490 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9491 }
9492 }
9493 } else {
9494 if (orig != data)
9495 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9496 }
9497
9498 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9499 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9500 if (orig != data)
9501 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9502
9503 if (!disable_l0s) {
9504 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9505 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9506 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9507 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9508 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9509 data &= ~LC_L0S_INACTIVITY_MASK;
9510 if (orig != data)
9511 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9512 }
9513 }
9514 }
9515}
9516