OpenTTD Source  20241121-master-g67a0fccfad
viewport_sprite_sorter_sse4.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of OpenTTD.
3  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6  */
7 
10 #ifdef WITH_SSE
11 
12 #include "stdafx.h"
13 #include "cpu.h"
14 #include "smmintrin.h"
15 #include "viewport_sprite_sorter.h"
16 #include <forward_list>
17 #include <stack>
18 
19 #include "safeguards.h"
20 
21 #ifdef POINTER_IS_64BIT
22  static_assert((sizeof(ParentSpriteToDraw) % 16) == 0);
23 # define LOAD_128 _mm_load_si128
24 #else
25 # define LOAD_128 _mm_loadu_si128
26 #endif
27 
28 GNU_TARGET("sse4.1")
29 void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
30 {
31  if (psdv->size() < 2) return;
32 
33  const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
34 
35  /* We rely on sprites being, for the most part, already ordered.
36  * So we don't need to move many of them and can keep track of their
37  * order efficiently by using stack. We always move sprites to the front
38  * of the current position, i.e. to the top of the stack.
39  * Also use special constants to indicate sorting state without
40  * adding extra fields to ParentSpriteToDraw structure.
41  */
42  const uint32_t ORDER_COMPARED = UINT32_MAX; // Sprite was compared but we still need to compare the ones preceding it
43  const uint32_t ORDER_RETURNED = UINT32_MAX - 1; // Mark sorted sprite in case there are other occurrences of it in the stack
44  std::stack<ParentSpriteToDraw *> sprite_order;
45  uint32_t next_order = 0;
46 
47  std::forward_list<std::pair<int64_t, ParentSpriteToDraw *>> sprite_list; // We store sprites in a list sorted by xmin+ymin
48 
49  /* Initialize sprite list and order. */
50  for (auto p = psdv->rbegin(); p != psdv->rend(); p++) {
51  sprite_list.emplace_front((*p)->xmin + (*p)->ymin, *p);
52  sprite_order.push(*p);
53  (*p)->order = next_order++;
54  }
55 
56  sprite_list.sort();
57 
58  std::vector<ParentSpriteToDraw *> preceding; // Temporarily stores sprites that precede current and their position in the list
59  auto preceding_prev = sprite_list.begin(); // Store iterator in case we need to delete a single preciding sprite
60  auto out = psdv->begin(); // Iterator to output sorted sprites
61 
62  while (!sprite_order.empty()) {
63 
64  auto s = sprite_order.top();
65  sprite_order.pop();
66 
67  /* Sprite is already sorted, ignore it. */
68  if (s->order == ORDER_RETURNED) continue;
69 
70  /* Sprite was already compared, just need to output it. */
71  if (s->order == ORDER_COMPARED) {
72  *(out++) = s;
73  s->order = ORDER_RETURNED;
74  continue;
75  }
76 
77  preceding.clear();
78 
79  /* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax
80  * So by iterating sprites with xmin + ymin <= s->xmax + s->ymax
81  * we get all we need and some more that we filter out later.
82  * We don't include zmin into the sum as there are usually more neighbors on x and y than z
83  * so including it will actually increase the amount of false positives.
84  * Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax)
85  * to ensure that we iterate the current sprite as we need to remove it from the list.
86  */
87  auto ssum = std::max(s->xmax, s->xmin) + std::max(s->ymax, s->ymin);
88  auto prev = sprite_list.before_begin();
89  auto x = sprite_list.begin();
90  while (x != sprite_list.end() && ((*x).first <= ssum)) {
91  auto p = (*x).second;
92  if (p == s) {
93  /* We found the current sprite, remove it and move on. */
94  x = sprite_list.erase_after(prev);
95  continue;
96  }
97 
98  auto p_prev = prev;
99  prev = x++;
100 
101  /* Check that p->xmin <= s->xmax && p->ymin <= s->ymax && p->zmin <= s->zmax */
102  __m128i s_max = LOAD_128((__m128i*) &s->xmax);
103  __m128i p_min = LOAD_128((__m128i*) &p->xmin);
104  __m128i r1 = _mm_cmplt_epi32(s_max, p_min);
105  if (!_mm_testz_si128(mask_ptest, r1))
106  continue;
107 
108  /* Check if sprites overlap, i.e.
109  * s->xmin <= p->xmax && s->ymin <= p->ymax && s->zmin <= p->zmax
110  */
111  __m128i s_min = LOAD_128((__m128i*) &s->xmin);
112  __m128i p_max = LOAD_128((__m128i*) &p->xmax);
113  __m128i r2 = _mm_cmplt_epi32(p_max, s_min);
114  if (_mm_testz_si128(mask_ptest, r2)) {
115  /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
116  * the screen and with higher Z elevation, are drawn in front.
117  * Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
118  * i.e. X=(left+right)/2, etc.
119  * However, since we only care about order, don't actually divide / 2
120  */
121  if (s->xmin + s->xmax + s->ymin + s->ymax + s->zmin + s->zmax <=
122  p->xmin + p->xmax + p->ymin + p->ymax + p->zmin + p->zmax) {
123  continue;
124  }
125  }
126 
127  preceding.push_back(p);
128  preceding_prev = p_prev;
129  }
130 
131  if (preceding.empty()) {
132  /* No preceding sprites, add current one to the output */
133  *(out++) = s;
134  s->order = ORDER_RETURNED;
135  continue;
136  }
137 
138  /* Optimization for the case when we only have 1 sprite to move. */
139  if (preceding.size() == 1) {
140  auto p = preceding[0];
141  /* We can only output the preceding sprite if there can't be any other sprites preceding it. */
142  if (p->xmax <= s->xmax && p->ymax <= s->ymax && p->zmax <= s->zmax) {
143  p->order = ORDER_RETURNED;
144  s->order = ORDER_RETURNED;
145  sprite_list.erase_after(preceding_prev);
146  *(out++) = p;
147  *(out++) = s;
148  continue;
149  }
150  }
151 
152  /* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */
153  std::sort(preceding.begin(), preceding.end(), [](const ParentSpriteToDraw *a, const ParentSpriteToDraw *b) {
154  return a->order > b->order;
155  });
156 
157  s->order = ORDER_COMPARED;
158  sprite_order.push(s); // Still need to output so push it back for now
159 
160  for (auto p: preceding) {
161  p->order = next_order++;
162  sprite_order.push(p);
163  }
164  }
165 }
166 
167 
172 bool ViewportSortParentSpritesSSE41Checker()
173 {
174  return HasCPUIDFlag(1, 2, 19);
175 }
176 
177 #endif /* WITH_SSE */
bool HasCPUIDFlag(uint type, uint index, uint bit)
Check whether the current CPU has the given flag.
Definition: cpu.cpp:84
Functions related to CPU specific instructions.
A number of safeguards to prevent using unsafe methods.
Definition of base types and functions in a cross-platform compatible way.
Parent sprite that should be drawn.
Types related to sprite sorting.