Merge branch 'drm-next-3.16' of git://people.freedesktop.org/~agd5f/linux into drm...
authorDave Airlie <airlied@redhat.com>
Tue, 10 Jun 2014 03:09:01 +0000 (13:09 +1000)
committerDave Airlie <airlied@redhat.com>
Tue, 10 Jun 2014 03:09:01 +0000 (13:09 +1000)
Some additional patches for radeon for 3.16 now that -fixes has been merged.

- Gart fix for all asics r6xx+
- Add some VM tuning parameters
- misc fixes

* 'drm-next-3.16' of git://people.freedesktop.org/~agd5f/linux:
  drm/radeon: Move fb update from radeon_flip_work_func to radeon_crtc_page_flip
  drm/radeon/dpm: powertune updates for SI
  Revert "drm/radeon: use variable UVD clocks"
  drm/radeon: add query for number of active CUs
  drm/radeon: add debugfs file to trigger GPU reset
  drm/radeon: make vm_block_size a module parameter
  drm/radeon: make VM size a module parameter (v2)
  drm/radeon: rename alt_domain to allowed_domains
  drm/radeon: use the SDMA on for buffer moves on CIK again
  drm/radeon: remove range check from *_gart_set_page
  drm/radeon: stop poisoning the GART TLB
  drm/radeon: hdmi deep color modes must obey clock limit of sink.
  drm/edid: Store all supported hdmi deep color modes in drm_display_info
  drm/radeon: add missing vce init case for hawaii
  drm/radeon: use lower_32_bits where appropriate

34 files changed:
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_sdma.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r300.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_fence.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/radeon_mode.h
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/radeon/radeon_uvd.c
drivers/gpu/drm/radeon/radeon_vce.c
drivers/gpu/drm/radeon/radeon_vm.c
drivers/gpu/drm/radeon/rs400.c
drivers/gpu/drm/radeon/rs600.c
drivers/gpu/drm/radeon/rv770.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/si_dma.c
drivers/gpu/drm/radeon/si_dpm.c
drivers/gpu/drm/radeon/uvd_v2_2.c
include/drm/drm_crtc.h
include/uapi/drm/radeon_drm.h

index 7be2178..dfa9769 100644 (file)
@@ -3471,18 +3471,21 @@ static bool drm_assign_hdmi_deep_color_info(struct edid *edid,
 
                        if (hdmi[6] & DRM_EDID_HDMI_DC_30) {
                                dc_bpc = 10;
+                               info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_30;
                                DRM_DEBUG("%s: HDMI sink does deep color 30.\n",
                                                  connector->name);
                        }
 
                        if (hdmi[6] & DRM_EDID_HDMI_DC_36) {
                                dc_bpc = 12;
+                               info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_36;
                                DRM_DEBUG("%s: HDMI sink does deep color 36.\n",
                                                  connector->name);
                        }
 
                        if (hdmi[6] & DRM_EDID_HDMI_DC_48) {
                                dc_bpc = 16;
+                               info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_48;
                                DRM_DEBUG("%s: HDMI sink does deep color 48.\n",
                                                  connector->name);
                        }
index 76c30f2..26c12a3 100644 (file)
@@ -962,6 +962,9 @@ static bool atombios_crtc_prepare_pll(struct drm_crtc *crtc, struct drm_display_
                struct radeon_connector_atom_dig *dig_connector =
                        radeon_connector->con_priv;
                int dp_clock;
+
+               /* Assign mode clock for hdmi deep color max clock limit check */
+               radeon_connector->pixelclock_for_modeset = mode->clock;
                radeon_crtc->bpc = radeon_get_monitor_bpc(connector);
 
                switch (encoder_mode) {
index 69a00d6..dcd4518 100644 (file)
@@ -80,6 +80,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern void si_rlc_reset(struct radeon_device *rdev);
 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
@@ -3257,7 +3258,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
        u32 mc_shared_chmap, mc_arb_ramcfg;
        u32 hdp_host_path_cntl;
        u32 tmp;
-       int i, j;
+       int i, j, k;
 
        switch (rdev->family) {
        case CHIP_BONAIRE:
@@ -3446,6 +3447,15 @@ static void cik_gpu_init(struct radeon_device *rdev)
                     rdev->config.cik.max_sh_per_se,
                     rdev->config.cik.max_backends_per_se);
 
+       for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+                       for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
+                               rdev->config.cik.active_cus +=
+                                       hweight32(cik_get_cu_active_bitmap(rdev, i, j));
+                       }
+               }
+       }
+
        /* set HW defaults for 3D engine */
        WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
 
@@ -3698,7 +3708,7 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev,
        unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
 
        radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
-       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, lower_32_bits(addr));
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
 
        return true;
@@ -3818,7 +3828,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
                        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
                        radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
                        radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-                       radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+                       radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
                        radeon_ring_write(ring, next_rptr);
                }
 
@@ -5446,7 +5456,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
               (u32)(rdev->dummy_page.addr >> 12));
        WREG32(VM_CONTEXT1_CNTL2, 4);
        WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-                               PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) |
+                               PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
                                RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
index 1347162..8e9d0f1 100644 (file)
@@ -141,7 +141,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
                next_rptr += 4;
                radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
                radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
                radeon_ring_write(ring, 1); /* number of DWs to follow */
                radeon_ring_write(ring, next_rptr);
        }
@@ -151,7 +151,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
                radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
        radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
-       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+       radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
        radeon_ring_write(ring, ib->length_dw);
 
 }
@@ -203,8 +203,8 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
 
        /* write the fence */
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
-       radeon_ring_write(ring, addr & 0xffffffff);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+       radeon_ring_write(ring, lower_32_bits(addr));
+       radeon_ring_write(ring, upper_32_bits(addr));
        radeon_ring_write(ring, fence->seq);
        /* generate an interrupt */
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
@@ -233,7 +233,7 @@ bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
 
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
        radeon_ring_write(ring, addr & 0xfffffff8);
-       radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+       radeon_ring_write(ring, upper_32_bits(addr));
 
        return true;
 }
@@ -551,10 +551,10 @@ int cik_copy_dma(struct radeon_device *rdev,
                radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
                radeon_ring_write(ring, cur_size_in_bytes);
                radeon_ring_write(ring, 0); /* src/dst endian swap */
-               radeon_ring_write(ring, src_offset & 0xffffffff);
-               radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
-               radeon_ring_write(ring, dst_offset & 0xffffffff);
-               radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
+               radeon_ring_write(ring, lower_32_bits(src_offset));
+               radeon_ring_write(ring, upper_32_bits(src_offset));
+               radeon_ring_write(ring, lower_32_bits(dst_offset));
+               radeon_ring_write(ring, upper_32_bits(dst_offset));
                src_offset += cur_size_in_bytes;
                dst_offset += cur_size_in_bytes;
        }
@@ -605,7 +605,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
        }
        radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
        radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
+       radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr));
        radeon_ring_write(ring, 1); /* number of DWs to follow */
        radeon_ring_write(ring, 0xDEADBEEF);
        radeon_ring_unlock_commit(rdev, ring);
@@ -660,7 +660,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 
        ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
        ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
+       ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr);
        ib.ptr[3] = 1;
        ib.ptr[4] = 0xDEADBEEF;
        ib.length_dw = 5;
@@ -752,9 +752,9 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev,
                        ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
                        ib->ptr[ib->length_dw++] = bytes;
                        ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-                       ib->ptr[ib->length_dw++] = src & 0xffffffff;
+                       ib->ptr[ib->length_dw++] = lower_32_bits(src);
                        ib->ptr[ib->length_dw++] = upper_32_bits(src);
-                       ib->ptr[ib->length_dw++] = pe & 0xffffffff;
+                       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
                        ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 
                        pe += bytes;
index 653eff8..e2f6052 100644 (file)
@@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
                        disabled_rb_mask &= ~(1 << i);
        }
 
+       for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
+               u32 simd_disable_bitmap;
+
+               WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+               simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds;
+               tmp <<= 16;
+               tmp |= simd_disable_bitmap;
+       }
+       rdev->config.evergreen.active_simds = hweight32(~tmp);
+
        WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
        WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
index 1d3209f..5a33ca6 100644 (file)
@@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
                        disabled_rb_mask &= ~(1 << i);
        }
 
+       for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
+               u32 simd_disable_bitmap;
+
+               WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+               simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+               simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
+               tmp <<= 16;
+               tmp |= simd_disable_bitmap;
+       }
+       rdev->config.cayman.active_simds = hweight32(~tmp);
+
        WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
        WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
@@ -1268,7 +1280,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
               (u32)(rdev->dummy_page.addr >> 12));
        WREG32(VM_CONTEXT1_CNTL2, 4);
        WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-                               PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) |
+                               PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
                                RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
@@ -1346,7 +1358,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
        /* EVENT_WRITE_EOP - flush caches, send int */
        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
        radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
-       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, lower_32_bits(addr));
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
        radeon_ring_write(ring, fence->seq);
        radeon_ring_write(ring, 0);
index ad99813..1544efc 100644 (file)
@@ -682,15 +682,11 @@ void r100_pci_gart_disable(struct radeon_device *rdev)
        WREG32(RADEON_AIC_HI_ADDR, 0);
 }
 
-int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
+                           uint64_t addr)
 {
        u32 *gtt = rdev->gart.ptr;
-
-       if (i < 0 || i > rdev->gart.num_gpu_pages) {
-               return -EINVAL;
-       }
        gtt[i] = cpu_to_le32(lower_32_bits(addr));
-       return 0;
 }
 
 void r100_pci_gart_fini(struct radeon_device *rdev)
index 206caf9..3c21d77 100644 (file)
@@ -72,13 +72,11 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)
 
-int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+                             uint64_t addr)
 {
        void __iomem *ptr = rdev->gart.ptr;
 
-       if (i < 0 || i > rdev->gart.num_gpu_pages) {
-               return -EINVAL;
-       }
        addr = (lower_32_bits(addr) >> 8) |
               ((upper_32_bits(addr) & 0xff) << 24) |
               R300_PTE_WRITEABLE | R300_PTE_READABLE;
@@ -86,7 +84,6 @@ int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
         * on powerpc without HW swappers, it'll get swapped on way
         * into VRAM - so no need for cpu_to_le32 on VRAM tables */
        writel(addr, ((void __iomem *)ptr) + (i * 4));
-       return 0;
 }
 
 int rv370_pcie_gart_init(struct radeon_device *rdev)
index c758812..c66952d 100644 (file)
@@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev)
        if (tmp < rdev->config.r600.max_simds) {
                rdev->config.r600.max_simds = tmp;
        }
+       tmp = rdev->config.r600.max_simds -
+               r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
+       rdev->config.r600.active_simds = tmp;
 
        disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
        tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
@@ -2724,7 +2727,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
                /* EVENT_WRITE_EOP - flush caches, send int */
                radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
                radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
-               radeon_ring_write(ring, addr & 0xffffffff);
+               radeon_ring_write(ring, lower_32_bits(addr));
                radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
                radeon_ring_write(ring, fence->seq);
                radeon_ring_write(ring, 0);
@@ -2763,7 +2766,7 @@ bool r600_semaphore_ring_emit(struct radeon_device *rdev,
                sel |= PACKET3_SEM_WAIT_ON_SIGNAL;
 
        radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
-       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, lower_32_bits(addr));
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
 
        return true;
@@ -2824,9 +2827,9 @@ int r600_copy_cpdma(struct radeon_device *rdev,
                if (size_in_bytes == 0)
                        tmp |= PACKET3_CP_DMA_CP_SYNC;
                radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4));
-               radeon_ring_write(ring, src_offset & 0xffffffff);
+               radeon_ring_write(ring, lower_32_bits(src_offset));
                radeon_ring_write(ring, tmp);
-               radeon_ring_write(ring, dst_offset & 0xffffffff);
+               radeon_ring_write(ring, lower_32_bits(dst_offset));
                radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
                radeon_ring_write(ring, cur_size_in_bytes);
                src_offset += cur_size_in_bytes;
index 7501ba3..4b0bbf8 100644 (file)
@@ -100,6 +100,8 @@ extern int radeon_dpm;
 extern int radeon_aspm;
 extern int radeon_runtime_pm;
 extern int radeon_hard_reset;
+extern int radeon_vm_size;
+extern int radeon_vm_block_size;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
@@ -837,13 +839,8 @@ struct radeon_mec {
 /* maximum number of VMIDs */
 #define RADEON_NUM_VM  16
 
-/* defines number of bits in page table versus page directory,
- * a page is 4KB so we have 12 bits offset, 9 bits in the page
- * table and the remaining 19 bits are in the page directory */
-#define RADEON_VM_BLOCK_SIZE   9
-
 /* number of entries in page table */
-#define RADEON_VM_PTE_COUNT (1 << RADEON_VM_BLOCK_SIZE)
+#define RADEON_VM_PTE_COUNT (1 << radeon_vm_block_size)
 
 /* PTBs (Page Table Blocks) need to be aligned to 32K */
 #define RADEON_VM_PTB_ALIGN_SIZE   32768
@@ -997,8 +994,8 @@ struct radeon_cs_reloc {
        struct radeon_bo                *robj;
        struct ttm_validate_buffer      tv;
        uint64_t                        gpu_offset;
-       unsigned                        domain;
-       unsigned                        alt_domain;
+       unsigned                        prefered_domains;
+       unsigned                        allowed_domains;
        uint32_t                        tiling_flags;
        uint32_t                        handle;
 };
@@ -1782,7 +1779,8 @@ struct radeon_asic {
        /* gart */
        struct {
                void (*tlb_flush)(struct radeon_device *rdev);
-               int (*set_page)(struct radeon_device *rdev, int i, uint64_t addr);
+               void (*set_page)(struct radeon_device *rdev, unsigned i,
+                                uint64_t addr);
        } gart;
        struct {
                int (*init)(struct radeon_device *rdev);
@@ -1934,6 +1932,7 @@ struct r600_asic {
        unsigned                tiling_group_size;
        unsigned                tile_config;
        unsigned                backend_map;
+       unsigned                active_simds;
 };
 
 struct rv770_asic {
@@ -1959,6 +1958,7 @@ struct rv770_asic {
        unsigned                tiling_group_size;
        unsigned                tile_config;
        unsigned                backend_map;
+       unsigned                active_simds;
 };
 
 struct evergreen_asic {
@@ -1985,6 +1985,7 @@ struct evergreen_asic {
        unsigned tiling_group_size;
        unsigned tile_config;
        unsigned backend_map;
+       unsigned active_simds;
 };
 
 struct cayman_asic {
@@ -2023,6 +2024,7 @@ struct cayman_asic {
        unsigned multi_gpu_tile_size;
 
        unsigned tile_config;
+       unsigned active_simds;
 };
 
 struct si_asic {
@@ -2053,6 +2055,7 @@ struct si_asic {
 
        unsigned tile_config;
        uint32_t tile_mode_array[32];
+       uint32_t active_cus;
 };
 
 struct cik_asic {
@@ -2084,6 +2087,7 @@ struct cik_asic {
        unsigned tile_config;
        uint32_t tile_mode_array[32];
        uint32_t macrotile_mode_array[16];
+       uint32_t active_cus;
 };
 
 union radeon_asic_config {
index 34ea53d..34b9aa9 100644 (file)
@@ -2029,8 +2029,8 @@ static struct radeon_asic ci_asic = {
                .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
                .dma = &cik_copy_dma,
                .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
-               .copy = &cik_copy_cpdma,
-               .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+               .copy = &cik_copy_dma,
+               .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
        },
        .surface = {
                .set_reg = r600_set_surface_reg,
index 0eab015..01e7c0a 100644 (file)
@@ -67,7 +67,8 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
 int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device *rdev);
-int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
+                           uint64_t addr);
 void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring);
 int r100_irq_set(struct radeon_device *rdev);
 int r100_irq_process(struct radeon_device *rdev);
@@ -171,7 +172,8 @@ extern void r300_fence_ring_emit(struct radeon_device *rdev,
                                struct radeon_fence *fence);
 extern int r300_cs_parse(struct radeon_cs_parser *p);
 extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev);
-extern int rv370_pcie_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
+                                    uint64_t addr);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
 extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
 extern void r300_set_reg_safe(struct radeon_device *rdev);
@@ -206,7 +208,8 @@ extern void rs400_fini(struct radeon_device *rdev);
 extern int rs400_suspend(struct radeon_device *rdev);
 extern int rs400_resume(struct radeon_device *rdev);
 void rs400_gart_tlb_flush(struct radeon_device *rdev);
-int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
+                        uint64_t addr);
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int rs400_gart_init(struct radeon_device *rdev);
@@ -229,7 +232,8 @@ int rs600_irq_process(struct radeon_device *rdev);
 void rs600_irq_disable(struct radeon_device *rdev);
 u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void rs600_gart_tlb_flush(struct radeon_device *rdev);
-int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr);
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
+                        uint64_t addr);
 uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg);
 void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void rs600_bandwidth_update(struct radeon_device *rdev);
index 4522f7d..933c5c3 100644 (file)
@@ -101,6 +101,7 @@ int radeon_get_monitor_bpc(struct drm_connector *connector)
        struct radeon_connector *radeon_connector = to_radeon_connector(connector);
        struct radeon_connector_atom_dig *dig_connector;
        int bpc = 8;
+       int mode_clock, max_tmds_clock;
 
        switch (connector->connector_type) {
        case DRM_MODE_CONNECTOR_DVII:
@@ -166,6 +167,36 @@ int radeon_get_monitor_bpc(struct drm_connector *connector)
                                          connector->name, bpc);
                        bpc = 12;
                }
+
+               /* Any defined maximum tmds clock limit we must not exceed? */
+               if (connector->max_tmds_clock > 0) {
+                       /* mode_clock is clock in kHz for mode to be modeset on this connector */
+                       mode_clock = radeon_connector->pixelclock_for_modeset;
+
+                       /* Maximum allowable input clock in kHz */
+                       max_tmds_clock = connector->max_tmds_clock * 1000;
+
+                       DRM_DEBUG("%s: hdmi mode dotclock %d kHz, max tmds input clock %d kHz.\n",
+                                         connector->name, mode_clock, max_tmds_clock);
+
+                       /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */
+                       if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) {
+                               if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) &&
+                                       (mode_clock * 5/4 <= max_tmds_clock))
+                                       bpc = 10;
+                               else
+                                       bpc = 8;
+
+                               DRM_DEBUG("%s: HDMI deep color 12 bpc exceeds max tmds clock. Using %d bpc.\n",
+                                                 connector->name, bpc);
+                       }
+
+                       if ((bpc == 10) && (mode_clock * 5/4 > max_tmds_clock)) {
+                               bpc = 8;
+                               DRM_DEBUG("%s: HDMI deep color 10 bpc exceeds max tmds clock. Using %d bpc.\n",
+                                                 connector->name, bpc);
+                       }
+               }
        }
 
        DRM_DEBUG("%s: Display bpc=%d, returned bpc=%d\n",
index 41ecf8a..71a1434 100644 (file)
@@ -140,10 +140,10 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
                if (p->ring == R600_RING_TYPE_UVD_INDEX &&
                    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
                        /* TODO: is this still needed for NI+ ? */
-                       p->relocs[i].domain =
+                       p->relocs[i].prefered_domains =
                                RADEON_GEM_DOMAIN_VRAM;
 
-                       p->relocs[i].alt_domain =
+                       p->relocs[i].allowed_domains =
                                RADEON_GEM_DOMAIN_VRAM;
 
                        /* prioritize this over any other relocation */
@@ -158,10 +158,10 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
                                return -EINVAL;
                        }
 
-                       p->relocs[i].domain = domain;
+                       p->relocs[i].prefered_domains = domain;
                        if (domain == RADEON_GEM_DOMAIN_VRAM)
                                domain |= RADEON_GEM_DOMAIN_GTT;
-                       p->relocs[i].alt_domain = domain;
+                       p->relocs[i].allowed_domains = domain;
                }
 
                p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
index 31565de..03686fa 100644 (file)
@@ -1052,6 +1052,43 @@ static void radeon_check_arguments(struct radeon_device *rdev)
                radeon_agpmode = 0;
                break;
        }
+
+       if (!radeon_check_pot_argument(radeon_vm_size)) {
+               dev_warn(rdev->dev, "VM size (%d) must be a power of 2\n",
+                        radeon_vm_size);
+               radeon_vm_size = 4096;
+       }
+
+       if (radeon_vm_size < 4) {
+               dev_warn(rdev->dev, "VM size (%d) to small, min is 4MB\n",
+                        radeon_vm_size);
+               radeon_vm_size = 4096;
+       }
+
+       /*
+        * Max GPUVM size for Cayman, SI and CI are 40 bits.
+        */
+       if (radeon_vm_size > 1024*1024) {
+               dev_warn(rdev->dev, "VM size (%d) to large, max is 1TB\n",
+                        radeon_vm_size);
+               radeon_vm_size = 4096;
+       }
+
+       /* defines number of bits in page table versus page directory,
+        * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+        * page table and the remaining bits are in the page directory */
+       if (radeon_vm_block_size < 9) {
+               dev_warn(rdev->dev, "VM page table size (%d) to small\n",
+                        radeon_vm_block_size);
+               radeon_vm_block_size = 9;
+       }
+
+       if (radeon_vm_block_size > 24 ||
+           radeon_vm_size < (1ull << radeon_vm_block_size)) {
+               dev_warn(rdev->dev, "VM page table size (%d) to large\n",
+                        radeon_vm_block_size);
+               radeon_vm_block_size = 9;
+       }
 }
 
 /**
@@ -1197,17 +1234,16 @@ int radeon_device_init(struct radeon_device *rdev,
        if (r)
                return r;
 
+       radeon_check_arguments(rdev);
        /* Adjust VM size here.
-        * Currently set to 4GB ((1 << 20) 4k pages).
-        * Max GPUVM size for cayman and SI is 40 bits.
+        * Max GPUVM size for cayman+ is 40 bits.
         */
-       rdev->vm_manager.max_pfn = 1 << 20;
+       rdev->vm_manager.max_pfn = radeon_vm_size << 8;
 
        /* Set asic functions */
        r = radeon_asic_init(rdev);
        if (r)
                return r;
-       radeon_check_arguments(rdev);
 
        /* all of the newer IGP chips have an internal gart
         * However some rs4xx report as AGP, so remove that here.
index a4e725c..5ed6170 100644 (file)
@@ -462,9 +462,6 @@ static void radeon_flip_work_func(struct work_struct *__work)
        /* We borrow the event spin lock for protecting flip_work */
        spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
-       /* update crtc fb */
-       crtc->primary->fb = fb;
-
        /* set the proper interrupt */
        radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
 
@@ -539,6 +536,9 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
        }
        radeon_crtc->flip_work = work;
 
+       /* update crtc fb */
+       crtc->primary->fb = fb;
+
        spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 
        queue_work(radeon_crtc->flip_queue, &work->flip_work);
index 15447a4..6e30174 100644 (file)
  *   2.37.0 - allow GS ring setup on r6xx/r7xx
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
+ *   2.39.0 - Add INFO query for number of active CUs
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       38
+#define KMS_DRIVER_MINOR       39
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -172,6 +173,8 @@ int radeon_dpm = -1;
 int radeon_aspm = -1;
 int radeon_runtime_pm = -1;
 int radeon_hard_reset = 0;
+int radeon_vm_size = 4096;
+int radeon_vm_block_size = 9;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -239,6 +242,12 @@ module_param_named(runpm, radeon_runtime_pm, int, 0444);
 MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
 module_param_named(hard_reset, radeon_hard_reset, int, 0444);
 
+MODULE_PARM_DESC(vm_size, "VM address space size in megabytes (default 4GB)");
+module_param_named(vm_size, radeon_vm_size, int, 0444);
+
+MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default 9)");
+module_param_named(vm_block_size, radeon_vm_block_size, int, 0444);
+
 static struct pci_device_id pciidlist[] = {
        radeon_PCI_IDS
 };
index a77b1c1..9137870 100644 (file)
@@ -819,15 +819,35 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
        return 0;
 }
 
+/**
+ * radeon_debugfs_gpu_reset - manually trigger a gpu reset
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
+{
+       struct drm_info_node *node = (struct drm_info_node *) m->private;
+       struct drm_device *dev = node->minor->dev;
+       struct radeon_device *rdev = dev->dev_private;
+
+       down_read(&rdev->exclusive_lock);
+       seq_printf(m, "%d\n", rdev->needs_reset);
+       rdev->needs_reset = true;
+       up_read(&rdev->exclusive_lock);
+
+       return 0;
+}
+
 static struct drm_info_list radeon_debugfs_fence_list[] = {
        {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
+       {"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
 };
 #endif
 
 int radeon_debugfs_fence_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
-       return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
+       return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
 #else
        return 0;
 #endif
index f071737..35d9318 100644 (file)
@@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                value_size = sizeof(uint64_t);
                value64 = atomic64_read(&rdev->gtt_usage);
                break;
+       case RADEON_INFO_ACTIVE_CU_COUNT:
+               if (rdev->family >= CHIP_BONAIRE)
+                       *value = rdev->config.cik.active_cus;
+               else if (rdev->family >= CHIP_TAHITI)
+                       *value = rdev->config.si.active_cus;
+               else if (rdev->family >= CHIP_CAYMAN)
+                       *value = rdev->config.cayman.active_simds;
+               else if (rdev->family >= CHIP_CEDAR)
+                       *value = rdev->config.evergreen.active_simds;
+               else if (rdev->family >= CHIP_RV770)
+                       *value = rdev->config.rv770.active_simds;
+               else if (rdev->family >= CHIP_R600)
+                       *value = rdev->config.r600.active_simds;
+               else
+                       *value = 1;
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
index ea72ad8..ad0e4b8 100644 (file)
@@ -506,6 +506,7 @@ struct radeon_connector {
        struct radeon_i2c_chan *router_bus;
        enum radeon_connector_audio audio;
        enum radeon_connector_dither dither;
+       int pixelclock_for_modeset;
 };
 
 struct radeon_framebuffer {
index 2918087..6c717b2 100644 (file)
@@ -446,7 +446,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
        list_for_each_entry(lobj, head, tv.head) {
                bo = lobj->robj;
                if (!bo->pin_count) {
-                       u32 domain = lobj->domain;
+                       u32 domain = lobj->prefered_domains;
                        u32 current_domain =
                                radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
 
@@ -458,7 +458,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
                         * into account. We don't want to disallow buffer moves
                         * completely.
                         */
-                       if ((lobj->alt_domain & current_domain) != 0 &&
+                       if ((lobj->allowed_domains & current_domain) != 0 &&
                            (domain & current_domain) == 0 && /* will be moved */
                            bytes_moved > bytes_moved_threshold) {
                                /* don't move it */
@@ -476,8 +476,9 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
                                       initial_bytes_moved;
 
                        if (unlikely(r)) {
-                               if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
-                                       domain = lobj->alt_domain;
+                               if (r != -ERESTARTSYS &&
+                                   domain != lobj->allowed_domains) {
+                                       domain = lobj->allowed_domains;
                                        goto retry;
                                }
                                ttm_eu_backoff_reservation(ticket, head);
index 2bdae61..12c663e 100644 (file)
@@ -984,6 +984,8 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
                if (enable) {
                        mutex_lock(&rdev->pm.mutex);
                        rdev->pm.dpm.uvd_active = true;
+                       /* disable this for now */
+#if 0
                        if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0))
                                dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD;
                        else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0))
@@ -993,6 +995,7 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
                        else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2))
                                dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2;
                        else
+#endif
                                dpm_state = POWER_STATE_TYPE_INTERNAL_UVD;
                        rdev->pm.dpm.state = dpm_state;
                        mutex_unlock(&rdev->pm.mutex);
index 1b65ae2..a4ad270 100644 (file)
@@ -812,7 +812,8 @@ void radeon_uvd_note_usage(struct radeon_device *rdev)
                    (rdev->pm.dpm.hd != hd)) {
                        rdev->pm.dpm.sd = sd;
                        rdev->pm.dpm.hd = hd;
-                       streams_changed = true;
+                       /* disable this for now */
+                       /*streams_changed = true;*/
                }
        }
 
index 3971d96..aa21c31 100644 (file)
@@ -66,6 +66,7 @@ int radeon_vce_init(struct radeon_device *rdev)
        case CHIP_BONAIRE:
        case CHIP_KAVERI:
        case CHIP_KABINI:
+       case CHIP_HAWAII:
        case CHIP_MULLINS:
                fw_name = FIRMWARE_BONAIRE;
                break;
index a72e9c8..899d912 100644 (file)
@@ -59,7 +59,7 @@
  */
 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
 {
-       return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
+       return rdev->vm_manager.max_pfn >> radeon_vm_block_size;
 }
 
 /**
@@ -140,8 +140,8 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
        /* add the vm page table to the list */
        list[0].gobj = NULL;
        list[0].robj = vm->page_directory;
-       list[0].domain = RADEON_GEM_DOMAIN_VRAM;
-       list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM;
+       list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
+       list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
        list[0].tv.bo = &vm->page_directory->tbo;
        list[0].tiling_flags = 0;
        list[0].handle = 0;
@@ -153,8 +153,8 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
 
                list[idx].gobj = NULL;
                list[idx].robj = vm->page_tables[i].bo;
-               list[idx].domain = RADEON_GEM_DOMAIN_VRAM;
-               list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM;
+               list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
+               list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
                list[idx].tv.bo = &list[idx].robj->tbo;
                list[idx].tiling_flags = 0;
                list[idx].handle = 0;
@@ -474,8 +474,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
        bo_va->valid = false;
        list_move(&bo_va->vm_list, head);
 
-       soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-       eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
+       soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+       eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+
+       BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
 
        if (eoffset > vm->max_pde_used)
                vm->max_pde_used = eoffset;
@@ -583,10 +585,9 @@ static uint32_t radeon_vm_page_flags(uint32_t flags)
 int radeon_vm_update_page_directory(struct radeon_device *rdev,
                                    struct radeon_vm *vm)
 {
-       static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
-
        struct radeon_bo *pd = vm->page_directory;
        uint64_t pd_addr = radeon_bo_gpu_offset(pd);
+       uint32_t incr = RADEON_VM_PTE_COUNT * 8;
        uint64_t last_pde = ~0, last_pt = ~0;
        unsigned count = 0, pt_idx, ndw;
        struct radeon_ib ib;
@@ -757,8 +758,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
                                  uint64_t start, uint64_t end,
                                  uint64_t dst, uint32_t flags)
 {
-       static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
-
+       uint64_t mask = RADEON_VM_PTE_COUNT - 1;
        uint64_t last_pte = ~0, last_dst = ~0;
        unsigned count = 0;
        uint64_t addr;
@@ -768,7 +768,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 
        /* walk over the address space and update the page tables */
        for (addr = start; addr < end; ) {
-               uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
+               uint64_t pt_idx = addr >> radeon_vm_block_size;
                struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
                unsigned nptes;
                uint64_t pte;
@@ -873,13 +873,13 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
        /* padding, etc. */
        ndw = 64;
 
-       if (RADEON_VM_BLOCK_SIZE > 11)
+       if (radeon_vm_block_size > 11)
                /* reserve space for one header for every 2k dwords */
                ndw += (nptes >> 11) * 4;
        else
                /* reserve space for one header for
                    every (1 << BLOCK_SIZE) entries */
-               ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
+               ndw += (nptes >> radeon_vm_block_size) * 4;
 
        /* reserve space for pte addresses */
        ndw += nptes * 2;
index 130d5cc..a0f96de 100644 (file)
@@ -212,21 +212,16 @@ void rs400_gart_fini(struct radeon_device *rdev)
 #define RS400_PTE_WRITEABLE (1 << 2)
 #define RS400_PTE_READABLE  (1 << 3)
 
-int rs400_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
 {
        uint32_t entry;
        u32 *gtt = rdev->gart.ptr;
 
-       if (i < 0 || i > rdev->gart.num_gpu_pages) {
-               return -EINVAL;
-       }
-
        entry = (lower_32_bits(addr) & PAGE_MASK) |
                ((upper_32_bits(addr) & 0xff) << 4) |
                RS400_PTE_WRITEABLE | RS400_PTE_READABLE;
        entry = cpu_to_le32(entry);
        gtt[i] = entry;
-       return 0;
 }
 
 int rs400_mc_wait_for_idle(struct radeon_device *rdev)
index 0a8be63..d1a35cb 100644 (file)
@@ -626,17 +626,16 @@ static void rs600_gart_fini(struct radeon_device *rdev)
        radeon_gart_table_vram_free(rdev);
 }
 
-int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t addr)
 {
        void __iomem *ptr = (void *)rdev->gart.ptr;
 
-       if (i < 0 || i > rdev->gart.num_gpu_pages) {
-               return -EINVAL;
-       }
        addr = addr & 0xFFFFFFFFFFFFF000ULL;
-       addr |= R600_PTE_GART;
+       if (addr == rdev->dummy_page.addr)
+               addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED;
+       else
+               addr |= R600_PTE_GART;
        writeq(addr, ptr + (i * 8));
-       return 0;
 }
 
 int rs600_irq_set(struct radeon_device *rdev)
index 97b7766..da8703d 100644 (file)
@@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
        if (tmp < rdev->config.rv770.max_simds) {
                rdev->config.rv770.max_simds = tmp;
        }
+       tmp = rdev->config.rv770.max_simds -
+               r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
+       rdev->config.rv770.active_simds = tmp;
 
        switch (rdev->config.rv770.max_tile_pipes) {
        case 1:
index d64ef91..730cee2 100644 (file)
@@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 
+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
@@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev)
        u32 sx_debug_1;
        u32 hdp_host_path_cntl;
        u32 tmp;
-       int i, j;
+       int i, j, k;
 
        switch (rdev->family) {
        case CHIP_TAHITI:
@@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev)
                     rdev->config.si.max_sh_per_se,
                     rdev->config.si.max_cu_per_sh);
 
+       for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+               for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+                       for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+                               rdev->config.si.active_cus +=
+                                       hweight32(si_get_cu_active_bitmap(rdev, i, j));
+                       }
+               }
+       }
 
        /* set HW defaults for 3D engine */
        WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
@@ -3186,7 +3195,7 @@ void si_fence_ring_emit(struct radeon_device *rdev,
        /* EVENT_WRITE_EOP - flush caches, send int */
        radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
        radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
-       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, lower_32_bits(addr));
        radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
        radeon_ring_write(ring, fence->seq);
        radeon_ring_write(ring, 0);
@@ -3219,7 +3228,7 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
                        radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
                        radeon_ring_write(ring, (1 << 8));
                        radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-                       radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+                       radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
                        radeon_ring_write(ring, next_rptr);
                }
 
@@ -4095,7 +4104,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
               (u32)(rdev->dummy_page.addr >> 12));
        WREG32(VM_CONTEXT1_CNTL2, 4);
        WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
-                               PAGE_TABLE_BLOCK_SIZE(RADEON_VM_BLOCK_SIZE - 9) |
+                               PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
                                RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
                                DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
index 9a660f8..e24c94b 100644 (file)
@@ -88,8 +88,8 @@ void si_dma_vm_set_page(struct radeon_device *rdev,
 
                        ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
                                                              1, 0, 0, bytes);
-                       ib->ptr[ib->length_dw++] = pe & 0xffffffff;
-                       ib->ptr[ib->length_dw++] = src & 0xffffffff;
+                       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+                       ib->ptr[ib->length_dw++] = lower_32_bits(src);
                        ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
                        ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
 
@@ -220,8 +220,8 @@ int si_copy_dma(struct radeon_device *rdev,
                        cur_size_in_bytes = 0xFFFFF;
                size_in_bytes -= cur_size_in_bytes;
                radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
-               radeon_ring_write(ring, dst_offset & 0xffffffff);
-               radeon_ring_write(ring, src_offset & 0xffffffff);
+               radeon_ring_write(ring, lower_32_bits(dst_offset));
+               radeon_ring_write(ring, lower_32_bits(src_offset));
                radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
                radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
                src_offset += cur_size_in_bytes;
index 9a3567b..5891886 100644 (file)
@@ -1948,6 +1948,10 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
                        si_pi->cac_weights = cac_weights_cape_verde_pro;
                        si_pi->dte_data = dte_data_cape_verde;
                        break;
+               case 0x682C:
+                       si_pi->cac_weights = cac_weights_cape_verde_pro;
+                       si_pi->dte_data = dte_data_sun_xt;
+                       break;
                case 0x6825:
                case 0x6827:
                        si_pi->cac_weights = cac_weights_heathrow;
@@ -1971,10 +1975,9 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
                        si_pi->dte_data = dte_data_venus_xt;
                        break;
                case 0x6823:
-                       si_pi->cac_weights = cac_weights_chelsea_pro;
-                       si_pi->dte_data = dte_data_venus_pro;
-                       break;
                case 0x682B:
+               case 0x6822:
+               case 0x682A:
                        si_pi->cac_weights = cac_weights_chelsea_pro;
                        si_pi->dte_data = dte_data_venus_pro;
                        break;
@@ -1988,6 +1991,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
                case 0x6601:
                case 0x6621:
                case 0x6603:
+               case 0x6605:
                        si_pi->cac_weights = cac_weights_mars_pro;
                        si_pi->lcac_config = lcac_mars_pro;
                        si_pi->cac_override = cac_override_oland;
@@ -1998,6 +2002,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
                case 0x6600:
                case 0x6606:
                case 0x6620:
+               case 0x6604:
                        si_pi->cac_weights = cac_weights_mars_xt;
                        si_pi->lcac_config = lcac_mars_pro;
                        si_pi->cac_override = cac_override_oland;
@@ -2006,6 +2011,8 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
                        update_dte_from_pl2 = true;
                        break;
                case 0x6611:
+               case 0x6613:
+               case 0x6608:
                        si_pi->cac_weights = cac_weights_oland_pro;
                        si_pi->lcac_config = lcac_mars_pro;
                        si_pi->cac_override = cac_override_oland;
index d177100..8bfdadd 100644 (file)
@@ -45,7 +45,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev,
        radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
        radeon_ring_write(ring, fence->seq);
        radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
-       radeon_ring_write(ring, addr & 0xffffffff);
+       radeon_ring_write(ring, lower_32_bits(addr));
        radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
        radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
        radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
index a7fac56..251b75e 100644 (file)
@@ -121,6 +121,9 @@ struct drm_display_info {
        enum subpixel_order subpixel_order;
        u32 color_formats;
 
+       /* Mask of supported hdmi deep color modes */
+       u8 edid_hdmi_dc_modes;
+
        u8 cea_rev;
 };
 
index aefa2f6..1cc0b61 100644 (file)
@@ -1007,7 +1007,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_NUM_BYTES_MOVED    0x1d
 #define RADEON_INFO_VRAM_USAGE         0x1e
 #define RADEON_INFO_GTT_USAGE          0x1f
-
+#define RADEON_INFO_ACTIVE_CU_COUNT    0x20
 
 struct drm_radeon_info {
        uint32_t                request;