Merge tag 'drm-for-v4.9' of git://people.freedesktop.org/~airlied/linux
[cascardo/linux.git] / drivers / gpu / drm / i915 / i915_gem_render_state.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Mika Kuoppala <mika.kuoppala@intel.com>
25  *
26  */
27
28 #include "i915_drv.h"
29 #include "intel_renderstate.h"
30
31 struct render_state {
32         const struct intel_renderstate_rodata *rodata;
33         struct i915_vma *vma;
34         u32 aux_batch_size;
35         u32 aux_batch_offset;
36 };
37
38 static const struct intel_renderstate_rodata *
39 render_state_get_rodata(const struct drm_i915_gem_request *req)
40 {
41         switch (INTEL_GEN(req->i915)) {
42         case 6:
43                 return &gen6_null_state;
44         case 7:
45                 return &gen7_null_state;
46         case 8:
47                 return &gen8_null_state;
48         case 9:
49                 return &gen9_null_state;
50         }
51
52         return NULL;
53 }
54
55 /*
56  * Macro to add commands to auxiliary batch.
57  * This macro only checks for page overflow before inserting the commands,
58  * this is sufficient as the null state generator makes the final batch
59  * with two passes to build command and state separately. At this point
60  * the size of both are known and it compacts them by relocating the state
61  * right after the commands taking care of aligment so we should sufficient
62  * space below them for adding new commands.
63  */
64 #define OUT_BATCH(batch, i, val)                                \
65         do {                                                    \
66                 if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {  \
67                         ret = -ENOSPC;                          \
68                         goto err_out;                           \
69                 }                                               \
70                 (batch)[(i)++] = (val);                         \
71         } while(0)
72
73 static int render_state_setup(struct render_state *so)
74 {
75         struct drm_device *dev = so->vma->vm->dev;
76         const struct intel_renderstate_rodata *rodata = so->rodata;
77         const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
78         unsigned int i = 0, reloc_index = 0;
79         struct page *page;
80         u32 *d;
81         int ret;
82
83         ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
84         if (ret)
85                 return ret;
86
87         page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
88         d = kmap(page);
89
90         while (i < rodata->batch_items) {
91                 u32 s = rodata->batch[i];
92
93                 if (i * 4  == rodata->reloc[reloc_index]) {
94                         u64 r = s + so->vma->node.start;
95                         s = lower_32_bits(r);
96                         if (has_64bit_reloc) {
97                                 if (i + 1 >= rodata->batch_items ||
98                                     rodata->batch[i + 1] != 0) {
99                                         ret = -EINVAL;
100                                         goto err_out;
101                                 }
102
103                                 d[i++] = s;
104                                 s = upper_32_bits(r);
105                         }
106
107                         reloc_index++;
108                 }
109
110                 d[i++] = s;
111         }
112
113         while (i % CACHELINE_DWORDS)
114                 OUT_BATCH(d, i, MI_NOOP);
115
116         so->aux_batch_offset = i * sizeof(u32);
117
118         if (HAS_POOLED_EU(dev)) {
119                 /*
120                  * We always program 3x6 pool config but depending upon which
121                  * subslice is disabled HW drops down to appropriate config
122                  * shown below.
123                  *
124                  * In the below table 2x6 config always refers to
125                  * fused-down version, native 2x6 is not available and can
126                  * be ignored
127                  *
128                  * SNo  subslices config                eu pool configuration
129                  * -----------------------------------------------------------
130                  * 1    3 subslices enabled (3x6)  -    0x00777000  (9+9)
131                  * 2    ss0 disabled (2x6)         -    0x00777000  (3+9)
132                  * 3    ss1 disabled (2x6)         -    0x00770000  (6+6)
133                  * 4    ss2 disabled (2x6)         -    0x00007000  (9+3)
134                  */
135                 u32 eu_pool_config = 0x00777000;
136
137                 OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
138                 OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
139                 OUT_BATCH(d, i, eu_pool_config);
140                 OUT_BATCH(d, i, 0);
141                 OUT_BATCH(d, i, 0);
142                 OUT_BATCH(d, i, 0);
143         }
144
145         OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
146         so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
147
148         /*
149          * Since we are sending length, we need to strictly conform to
150          * all requirements. For Gen2 this must be a multiple of 8.
151          */
152         so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
153
154         kunmap(page);
155
156         ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
157         if (ret)
158                 return ret;
159
160         if (rodata->reloc[reloc_index] != -1) {
161                 DRM_ERROR("only %d relocs resolved\n", reloc_index);
162                 return -EINVAL;
163         }
164
165         return 0;
166
167 err_out:
168         kunmap(page);
169         return ret;
170 }
171
172 #undef OUT_BATCH
173
174 int i915_gem_render_state_init(struct drm_i915_gem_request *req)
175 {
176         struct render_state so;
177         struct drm_i915_gem_object *obj;
178         int ret;
179
180         if (WARN_ON(req->engine->id != RCS))
181                 return -ENOENT;
182
183         so.rodata = render_state_get_rodata(req);
184         if (!so.rodata)
185                 return 0;
186
187         if (so.rodata->batch_items * 4 > 4096)
188                 return -EINVAL;
189
190         obj = i915_gem_object_create(&req->i915->drm, 4096);
191         if (IS_ERR(obj))
192                 return PTR_ERR(obj);
193
194         so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
195         if (IS_ERR(so.vma)) {
196                 ret = PTR_ERR(so.vma);
197                 goto err_obj;
198         }
199
200         ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
201         if (ret)
202                 goto err_obj;
203
204         ret = render_state_setup(&so);
205         if (ret)
206                 goto err_unpin;
207
208         ret = req->engine->emit_bb_start(req, so.vma->node.start,
209                                          so.rodata->batch_items * 4,
210                                          I915_DISPATCH_SECURE);
211         if (ret)
212                 goto err_unpin;
213
214         if (so.aux_batch_size > 8) {
215                 ret = req->engine->emit_bb_start(req,
216                                                  (so.vma->node.start +
217                                                   so.aux_batch_offset),
218                                                  so.aux_batch_size,
219                                                  I915_DISPATCH_SECURE);
220                 if (ret)
221                         goto err_unpin;
222         }
223
224         i915_vma_move_to_active(so.vma, req, 0);
225 err_unpin:
226         i915_vma_unpin(so.vma);
227 err_obj:
228         i915_gem_object_put(obj);
229         return ret;
230 }