18#define INTERNAL_LINKAGE static
inline void InsertFirstUint32(
const uint32_t value, __m128i &into)
26 into = _mm_insert_epi32(into, value, 0);
28 into = _mm_insert_epi16(into, value, 0);
29 into = _mm_insert_epi16(into, value >> 16, 1);
inline void InsertSecondUint32(
const uint32_t value, __m128i &into)
37 into = _mm_insert_epi32(into, value, 1);
39 into = _mm_insert_epi16(into, value, 2);
40 into = _mm_insert_epi16(into, value >> 16, 3);
inline void LoadUint64(
const uint64_t value, __m128i &into)
47#ifdef POINTER_IS_64BIT
48 into = _mm_cvtsi64_si128(value);
50 #if (SSE_VERSION >= 4)
51 into = _mm_cvtsi32_si128(value);
52 InsertSecondUint32(value >> 32, into);
54 (*(um128i*) &into).m128i_u64[0] = value;
inline __m128i PackUnsaturated(__m128i from,
const __m128i &mask)
63 from = _mm_and_si128(from, mask);
64 return _mm_packus_epi16(from, from);
66 return _mm_shuffle_epi8(from, mask);
inline __m128i DistributeAlpha(
const __m128i from,
const __m128i &mask)
74 __m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F);
75 alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F);
76 return _mm_andnot_si128(mask, alphaAB);
78 return _mm_shuffle_epi8(from, mask);
inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst,
const __m128i &distribution_mask,
const __m128i &pack_mask,
const __m128i &alpha_mask)
85 __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
86 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
88 __m128i alphaMaskAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128());
89 __m128i alphaAB = _mm_sub_epi16(srcAB, alphaMaskAB);
90 alphaAB = DistributeAlpha(alphaAB, distribution_mask);
92 srcAB = _mm_sub_epi16(srcAB, dstAB);
93 srcAB = _mm_mullo_epi16(srcAB, alphaAB);
94 srcAB = _mm_srli_epi16(srcAB, 8);
95 srcAB = _mm_add_epi16(srcAB, dstAB);
97 alphaMaskAB = _mm_and_si128(alphaMaskAB, alpha_mask);
98 srcAB = _mm_or_si128(srcAB, alphaMaskAB);
100 return PackUnsaturated(srcAB, pack_mask);
inline __m128i DarkenTwoPixels(__m128i src, __m128i dst,
const __m128i &distribution_mask,
const __m128i &tr_nom_base)
109 __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
110 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
111 __m128i alphaAB = DistributeAlpha(srcAB, distribution_mask);
112 alphaAB = _mm_srli_epi16(alphaAB, 2);
113 __m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
114 dstAB = _mm_mullo_epi16(dstAB, nom);
115 dstAB = _mm_srli_epi16(dstAB, 8);
116 return _mm_packus_epi16(dstAB, dstAB);
123 uint64_t c16 = colour.b | (uint64_t) colour.g << 16 | (uint64_t) colour.r << 32;
125 uint64_t c16_ob = c16;
127 c16 &= 0x01FF01FF01FFULL;
130 c16_ob = (((c16_ob >> (8 + 7)) & 0x0100010001ULL) * 0xFF) & c16;
131 const uint ob = ((uint16_t) c16_ob + (uint16_t) (c16_ob >> 16) + (uint16_t) (c16_ob >> 32)) / 2;
133 const uint32_t alpha32 = colour.data & 0xFF000000;
135 LoadUint64(c16, ret);
137 __m128i ob128 = _mm_cvtsi32_si128(ob);
138 ob128 = _mm_shufflelo_epi16(ob128, 0xC0);
139 __m128i white = OVERBRIGHT_VALUE_MASK;
141 ret = _mm_subs_epu16(white, c128);
142 ret = _mm_mullo_epi16(ret, ob128);
143 ret = _mm_srli_epi16(ret, 8);
144 ret = _mm_add_epi16(ret, c128);
147 ret = _mm_packus_epi16(ret, ret);
148 return alpha32 | _mm_cvtsi128_si32(ret);
inline Colour AdjustBrightneSSE(Colour colour, uint8_t brightness)
158 if (brightness == DEFAULT_BRIGHTNESS)
return colour;
inline __m128i AdjustBrightnessOfTwoPixels([[maybe_unused]] __m128i from, [[maybe_unused]] uint32_t brightness)
173 brightness &= 0xFF00FF00;
174 brightness += DEFAULT_BRIGHTNESS;
176 __m128i colAB = _mm_unpacklo_epi8(from, _mm_setzero_si128());
177 __m128i briAB = _mm_cvtsi32_si128(brightness);
178 briAB = _mm_shuffle_epi8(briAB, BRIGHTNESS_LOW_CONTROL_MASK);
179 colAB = _mm_mullo_epi16(colAB, briAB);
180 __m128i colAB_ob = _mm_srli_epi16(colAB, 8 + 7);
181 colAB = _mm_srli_epi16(colAB, 7);
187 colAB = _mm_and_si128(colAB, BRIGHTNESS_DIV_CLEANER);
188 colAB_ob = _mm_and_si128(colAB_ob, OVERBRIGHT_PRESENCE_MASK);
189 colAB_ob = _mm_mullo_epi16(colAB_ob, OVERBRIGHT_VALUE_MASK);
190 colAB_ob = _mm_and_si128(colAB_ob, colAB);
191 __m128i obAB = _mm_hadd_epi16(_mm_hadd_epi16(colAB_ob, _mm_setzero_si128()), _mm_setzero_si128());
193 obAB = _mm_srli_epi16(obAB, 1);
194 obAB = _mm_shuffle_epi8(obAB, OVERBRIGHT_CONTROL_MASK);
195 __m128i retAB = OVERBRIGHT_VALUE_MASK;
196 retAB = _mm_subs_epu16(retAB, colAB);
197 retAB = _mm_mullo_epi16(retAB, obAB);
198 retAB = _mm_srli_epi16(retAB, 8);
199 retAB = _mm_add_epi16(retAB, colAB);
201 return _mm_packus_epi16(retAB, retAB);
205#if FULL_ANIMATION == 0
214template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last,
bool translucent>
216#if (SSE_VERSION == 2)
218#elif (SSE_VERSION == 3)
220#elif (SSE_VERSION == 4)
224 const uint8_t *
const remap = bp->
225 Colour *dst_line = (Colour *) bp->
dst + bp->
top * bp->
pitch + bp->
226 int effective_width = bp->
229 const SpriteData *
const sd = (
const SpriteData *) bp->
230 const SpriteInfo *
const si = &sd->infos[zoom];
231 const MapValue *src_mv_line = (
const MapValue *) &sd->data[si->mv_offset] + bp->
skip_top * si->sprite_width;
232 const Colour *src_rgba_line = (
const Colour *) ((
const uint8_t *) &sd->data[si->sprite_offset] + bp->
skip_top * si->sprite_line_size);
234 if (read_mode != RM_WITH_MARGIN) {
238 const MapValue *src_mv = src_mv_line;
241 const __m128i alpha_and = ALPHA_AND_MASK;
242 #define ALPHA_BLEND_PARAM_3 alpha_and
243#if (SSE_VERSION == 2)
244 const __m128i clear_hi = CLEAR_HIGH_BYTE_MASK;
245 #define ALPHA_BLEND_PARAM_1 alpha_and
246 #define ALPHA_BLEND_PARAM_2 clear_hi
247 #define DARKEN_PARAM_1 tr_nom_base
248 #define DARKEN_PARAM_2 tr_nom_base
250 const __m128i a_cm = ALPHA_CONTROL_MASK;
251 const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
252 #define ALPHA_BLEND_PARAM_1 a_cm
253 #define ALPHA_BLEND_PARAM_2 pack_low_cm
254 #define DARKEN_PARAM_1 a_cm
255 #define DARKEN_PARAM_2 tr_nom_base
257 const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
259 for (
int y = bp->
height; y != 0; y--) {
260 Colour *dst = dst_line;
261 const Colour *src = src_rgba_line + META_LENGTH;
264 if (read_mode == RM_WITH_MARGIN) {
265 assert(bt_last == BT_NONE);
266 src += src_rgba_line[0].data;
267 dst += src_rgba_line[0].data;
269 const int width_diff = si->sprite_width - bp->
270 effective_width = bp->
width - (int) src_rgba_line[0].data;
271 const int delta_diff = (int) src_rgba_line[1].data - width_diff;
272 const int new_width = effective_width - delta_diff;
273 effective_width = delta_diff > 0 ? new_width : effective_width;
274 if (effective_width <= 0)
goto next_line;
280 for (uint x = (uint) effective_width; x > 0; x--) {
281 if (src->a) *dst = *src;
288 for (uint x = (uint) effective_width / 2; x > 0; x--) {
289 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
290 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
291 _mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
296 if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
297 __m128i srcABCD = _mm_cvtsi32_si128(src->data);
298 __m128i dstABCD = _mm_cvtsi32_si128(dst->data);
299 dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
304#if (SSE_VERSION >= 3)
305 for (uint x = (uint) effective_width / 2; x > 0; x--) {
306 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
307 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
308 uint32_t mvX2 = *((uint32_t *)
const_cast<MapValue *
311 if (mvX2 & 0x00FF00FF) {
312 #define CMOV_REMAP(m_colour, m_colour_init, m_src, m_m) \
314 Colour m_colour = m_colour_init; \
316 const Colour srcm = (Colour) (m_src); \
317 const uint m = (uint8_t) (m_m); \
318 const uint r = remap[m]; \
319 const Colour cmap = (this->LookupColourInPalette(r).data & 0x00FFFFFF) | (srcm.data & 0xFF000000); \
320 m_colour = r == 0 ? m_colour : cmap; \
321 m_colour = m != 0 ? m_colour : srcm; \
323#ifdef POINTER_IS_64BIT
324 uint64_t srcs = _mm_cvtsi128_si64(srcABCD);
325 uint64_t remapped_src = 0;
326 CMOV_REMAP(c0, 0, srcs, mvX2);
327 remapped_src = c0.data;
328 CMOV_REMAP(c1, 0, srcs >> 32, mvX2 >> 16);
329 remapped_src |= (uint64_t) c1.data << 32;
330 srcABCD = _mm_cvtsi64_si128(remapped_src);
332 Colour remapped_src[2];
333 CMOV_REMAP(c0, 0, _mm_cvtsi128_si32(srcABCD), mvX2);
334 remapped_src[0] = c0.data;
335 CMOV_REMAP(c1, 0, src[1], mvX2 >> 16);
336 remapped_src[1] = c1.data;
337 srcABCD = _mm_loadl_epi64((__m128i*) &remapped_src);
340 if ((mvX2 & 0xFF00FF00) != 0x80008000) srcABCD = AdjustBrightnessOfTwoPixels(srcABCD, mvX2);
344 _mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
350 if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
352 for (uint x = (uint) effective_width; x > 0; x--) {
357 const uint r = remap[src_mv->m];
359 Colour remapped_colour = AdjustBrightneSSE(this->LookupColourInPalette(r), src_mv->v);
361 *dst = remapped_colour;
363 remapped_colour.a = src->a;
364 srcABCD = _mm_cvtsi32_si128(remapped_colour.data);
365 goto bmcr_alpha_blend_single;
369 srcABCD = _mm_cvtsi32_si128(src->data);
372 __m128i dstABCD = _mm_cvtsi32_si128(dst->data);
375 dst->data = _mm_cvtsi128_si32(srcABCD);
377#if (SSE_VERSION == 2)
387 for (uint x = (uint) bp->
width / 2; x > 0; x--) {
388 __m128i srcABCD = _mm_loadl_epi64((
const __m128i*) src);
389 __m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
390 _mm_storel_epi64((__m128i *) dst, DarkenTwoPixels(srcABCD, dstABCD, DARKEN_PARAM_1, DARKEN_PARAM_2));
395 if ((bt_last == BT_NONE && bp->
width & 1) || bt_last == BT_ODD) {
396 __m128i srcABCD = _mm_cvtsi32_si128(src->data);
397 __m128i dstABCD = _mm_cvtsi32_si128(dst->data);
398 dst->data = _mm_cvtsi128_si32(DarkenTwoPixels(srcABCD, dstABCD, DARKEN_PARAM_1, DARKEN_PARAM_2));
404 for (uint x = (uint) bp->
width; x > 0; x--) {
415 for (uint x = (uint) bp->
width; x > 0; x--) {
416 if (src_mv->m == 0) {
418 uint8_t g = MakeDark(src->r, src->g, src->b);
419 *dst = ComposeColourRGBA(g, g, g, src->a, *dst);
422 uint r = remap[src_mv->m];
423 if (r != 0) *dst = ComposeColourPANoCheck(AdjustBrightness(this->LookupColourInPalette(r), src_mv->v), src->a, *dst);
432 for (uint x = (uint) bp->
width; x > 0; x--) {
434 *dst = Colour(0, 0, 0);
445 src_rgba_line = (
const Colour*) ((
const uint8_t*) src_rgba_line + si->sprite_line_size);
446 dst_line += bp->
458#if (SSE_VERSION == 2)
460#elif (SSE_VERSION == 3)
462#elif (SSE_VERSION == 4)
470 const BlockType bt_last = (BlockType) (bp->
width & 1);
472 default: Draw<BlitterMode::Normal, RM_WITH_SKIP, BT_EVEN, true>(bp, zoom);
473 case BT_ODD: Draw<BlitterMode::Normal, RM_WITH_SKIP, BT_ODD, true>(bp, zoom);
476 if (((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags & SF_TRANSLUCENT) {
477 Draw<BlitterMode::Normal, RM_WITH_MARGIN, BT_NONE, true>(bp, zoom);
479 Draw<BlitterMode::Normal, RM_WITH_MARGIN, BT_NONE, false>(bp, zoom);
486 if (((
const Blitter_32bppSSE_Base::SpriteData *) bp->
sprite)->flags & SF_NO_REMAP)
goto bm_normal;
488 Draw<BlitterMode::ColourRemap, RM_WITH_SKIP, BT_NONE, true>(bp, zoom);
490 Draw<BlitterMode::ColourRemap, RM_WITH_MARGIN, BT_NONE, true>(bp, zoom);
The modes of blitting we can do.
@ Transparent
Perform transparency darkening remapping.
@ CrashRemap
Perform a crash remapping.
@ BlackRemap
Perform remapping to a completely blackened sprite.
@ TransparentRemap
Perform transparency colour remapping.
@ ColourRemap
Perform a colour remapping.
Colour ReallyAdjustBrightness(Colour colour, int brightness)
Adjust brightness of colour.
uint8_t GetNearestColourIndex(uint8_t r, uint8_t g, uint8_t b)
Get nearest colour palette index from an RGB colour.
Parameters related to blitting.
int skip_top
How much pixels of the source to skip on the top (based on zoom of dst)
void * dst
Destination buffer.
int left
The left offset in the 'dst' in pixels to start drawing.
int pitch
The pitch of the destination buffer.
int skip_left
How much pixels of the source to skip on the left (based on zoom of dst)
int height
The height in pixels that needs to be drawn to dst.
const uint8_t * remap
XXX – Temporary storage for remap array.
int width
The width in pixels that needs to be drawn to dst.
const void * sprite
Pointer to the sprite how ever the encoder stored it.
int top
The top offset in the 'dst' in pixels to start drawing.
All zoom levels we know.