diff --git a/src/video/SDL_blit_A_avx2.c b/src/video/SDL_blit_A_avx2.c index ccb65f783e..d2a241874d 100644 --- a/src/video/SDL_blit_A_avx2.c +++ b/src/video/SDL_blit_A_avx2.c @@ -7,16 +7,13 @@ #include "SDL_blit.h" #include "SDL_blit_A_sse4_1.h" -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("avx2"))) -#endif /** * Using the AVX2 instruction set, blit eight pixels with alpha blending * @param src A pointer to four 32-bit pixels of ARGB format to blit into dst * @param dst A pointer to four 32-bit pixels of ARGB format to retain visual data for while alpha blending * @return A 128-bit wide vector of four alpha-blended pixels in ARGB format */ -__m128i MixRGBA_AVX2(__m128i src, __m128i dst) { +__m128i SDL_TARGETING("avx2") MixRGBA_AVX2(__m128i src, __m128i dst) { __m256i src_color = _mm256_cvtepu8_epi16(src); __m256i dst_color = _mm256_cvtepu8_epi16(dst); const __m256i SHUFFLE_ALPHA = _mm256_set_epi8( @@ -43,10 +40,7 @@ __m128i MixRGBA_AVX2(__m128i src, __m128i dst) { return _mm_add_epi8(mix, dst); } -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("avx2"))) -#endif -void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) +void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) { int width = info->dst_w; int height = info->dst_h; @@ -62,7 +56,7 @@ void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) while (height--) { /* Process 4-wide chunks of source color data that may be in wrong format */ for (int i = 0; i < chunks; i += 1) { - __m128i c_src = convertPixelFormatsx4(_mm_loadu_si128((__m128i*) (src + i * 16)), srcfmt); + __m128i c_src = AlignPixelToSDL_PixelFormat_x4(_mm_loadu_si128((__m128i *) (src + i * 16)), srcfmt); _mm_store_si128((__m128i*)(buf + i * 16), c_src); } @@ -82,7 +76,7 @@ void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) Uint32 *src_ptr = ((Uint32*)(src + (offset * 4))); Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4))); __m128i c_src = _mm_loadu_si64(src_ptr); - c_src = convertPixelFormatsx4(c_src, srcfmt); + c_src = AlignPixelToSDL_PixelFormat_x4(c_src, srcfmt); __m128i c_dst = _mm_loadu_si64(dst_ptr); __m128i c_mix = MixRGBA_SSE4_1(c_src, c_dst); _mm_storeu_si64(dst_ptr, c_mix); @@ -92,7 +86,7 @@ void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info) if (remaining_pixels == 1) { Uint32 *src_ptr = ((Uint32*)(src + (offset * 4))); Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4))); - Uint32 pixel = convertPixelFormat(*src_ptr, srcfmt); + Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt); /* Old GCC has bad or no _mm_loadu_si32 */ #if defined(__GNUC__) && (__GNUC__ < 11) __m128i c_src = _mm_set_epi32(0, 0, 0, pixel); diff --git a/src/video/SDL_blit_A_avx2.h b/src/video/SDL_blit_A_avx2.h index c3fc7b1117..61eab95424 100644 --- a/src/video/SDL_blit_A_avx2.h +++ b/src/video/SDL_blit_A_avx2.h @@ -1,7 +1,4 @@ #ifndef SDL_SDL_BLIT_A_AVX2_H #define SDL_SDL_BLIT_A_AVX2_H -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("avx2"))) -#endif -void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info); +void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info); #endif //SDL_SDL_BLIT_A_AVX2_H diff --git a/src/video/SDL_blit_A_sse4_1.c b/src/video/SDL_blit_A_sse4_1.c index 3cc852e5da..2135c87001 100644 --- a/src/video/SDL_blit_A_sse4_1.c +++ b/src/video/SDL_blit_A_sse4_1.c @@ -6,16 +6,13 @@ #include "SDL_blit.h" -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("sse4.1"))) -#endif /** * Using the SSE4.1 instruction set, blit four pixels with alpha blending * @param src A pointer to two 32-bit pixels of ARGB format to blit into dst * @param dst A pointer to two 32-bit pixels of ARGB format to retain visual data for while alpha blending * @return A 128-bit wide vector of two alpha-blended pixels in ARGB format */ -__m128i MixRGBA_SSE4_1(__m128i src, __m128i dst) { +__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst) { __m128i src_color = _mm_cvtepu8_epi16(src); __m128i dst_color = _mm_cvtepu8_epi16(dst); /** @@ -36,7 +33,7 @@ __m128i MixRGBA_SSE4_1(__m128i src, __m128i dst) { return _mm_add_epi8(reduced, dst); } -Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) { +Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) { Uint8 a = (color >> srcFormat->Ashift) & 0xFF; Uint8 r = (color >> srcFormat->Rshift) & 0xFF; Uint8 g = (color >> srcFormat->Gshift) & 0xFF; @@ -45,13 +42,10 @@ Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) { return (a << 24) | (r << 16) | (g << 8) | b; } -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("sse4.1"))) -#endif /* * This helper function converts arbitrary pixel format data into ARGB form with a 4 pixel-wide shuffle */ -__m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat) { +__m128i SDL_TARGETING("sse4.1") AlignPixelToSDL_PixelFormat_x4(__m128i colors, const SDL_PixelFormat* srcFormat) { // Create shuffle masks based on the source SDL_PixelFormat to ARGB __m128i srcShuffleMask = _mm_set_epi8( srcFormat->Ashift / 8 + 12, srcFormat->Rshift / 8 + 12, srcFormat->Gshift / 8 + 12, srcFormat->Bshift / 8 + 12, @@ -64,10 +58,7 @@ __m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat) return _mm_shuffle_epi8(colors, srcShuffleMask); } -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("sse4.1"))) -#endif -void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { +void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { int width = info->dst_w; int height = info->dst_h; Uint8 *src = info->src; @@ -83,7 +74,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { /* Process 4-wide chunks of source color data that may be in wrong format into buffer */ for (int i = 0; i < chunks; i += 1) { __m128i colors = _mm_loadu_si128((__m128i*)(src + i * 16)); - _mm_storeu_si128((__m128i*)(buffer + i * 16), convertPixelFormatsx4(colors, srcfmt)); + _mm_storeu_si128((__m128i*)(buffer + i * 16), AlignPixelToSDL_PixelFormat_x4(colors, srcfmt)); } /* Alpha-blend in 2-wide chunks from buffer into destination */ @@ -102,7 +93,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { Uint32 *src_ptr = ((Uint32*)(src + (offset * 4))); Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4))); __m128i c_src = _mm_loadu_si64(src_ptr); - c_src = convertPixelFormatsx4(c_src, srcfmt); + c_src = AlignPixelToSDL_PixelFormat_x4(c_src, srcfmt); __m128i c_dst = _mm_loadu_si64(dst_ptr); __m128i c_mix = MixRGBA_SSE4_1(c_src, c_dst); _mm_storeu_si64(dst_ptr, c_mix); @@ -112,7 +103,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) { if (remaining_pixels == 1) { Uint32 *src_ptr = ((Uint32*)(src + (offset * 4))); Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4))); - Uint32 pixel = convertPixelFormat(*src_ptr, srcfmt); + Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt); /* Old GCC has bad or no _mm_loadu_si32 */ #if defined(__GNUC__) && (__GNUC__ < 11) __m128i c_src = _mm_set_epi32(0, 0, 0, pixel); diff --git a/src/video/SDL_blit_A_sse4_1.h b/src/video/SDL_blit_A_sse4_1.h index 47be0dd582..f26196efc6 100644 --- a/src/video/SDL_blit_A_sse4_1.h +++ b/src/video/SDL_blit_A_sse4_1.h @@ -2,22 +2,13 @@ #define SDL_SDL_BLIT_A_SSE4_1_H #ifdef SDL_SSE4_1_INTRINSICS -Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat); +Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat); -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("sse4.1"))) -#endif -__m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat); +__m128i SDL_TARGETING("sse4.1") AlignPixelToSDL_PixelFormat_x4(__m128i colors, const SDL_PixelFormat* srcFormat); -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("sse4.1"))) -#endif -__m128i MixRGBA_SSE4_1(__m128i src, __m128i dst); +__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst); -#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__)) -__attribute__((target("sse4.1"))) -#endif -void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo *info); +void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo *info); #endif