Clean up API surface to use attribute macros and rename convertPixels

This commit is contained in:
Isaac Aronson 2023-09-06 17:13:41 -05:00 committed by Sam Lantinga
parent bac318fc27
commit e8cba442c5
4 changed files with 17 additions and 44 deletions

View File

@ -7,16 +7,13 @@
#include "SDL_blit.h"
#include "SDL_blit_A_sse4_1.h"
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("avx2")))
#endif
/**
* Using the AVX2 instruction set, blit eight pixels with alpha blending
* @param src A pointer to four 32-bit pixels of ARGB format to blit into dst
* @param dst A pointer to four 32-bit pixels of ARGB format to retain visual data for while alpha blending
* @return A 128-bit wide vector of four alpha-blended pixels in ARGB format
*/
__m128i MixRGBA_AVX2(__m128i src, __m128i dst) {
__m128i SDL_TARGETING("avx2") MixRGBA_AVX2(__m128i src, __m128i dst) {
__m256i src_color = _mm256_cvtepu8_epi16(src);
__m256i dst_color = _mm256_cvtepu8_epi16(dst);
const __m256i SHUFFLE_ALPHA = _mm256_set_epi8(
@ -43,10 +40,7 @@ __m128i MixRGBA_AVX2(__m128i src, __m128i dst) {
return _mm_add_epi8(mix, dst);
}
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("avx2")))
#endif
void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info)
void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info)
{
int width = info->dst_w;
int height = info->dst_h;
@ -62,7 +56,7 @@ void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info)
while (height--) {
/* Process 4-wide chunks of source color data that may be in wrong format */
for (int i = 0; i < chunks; i += 1) {
__m128i c_src = convertPixelFormatsx4(_mm_loadu_si128((__m128i*) (src + i * 16)), srcfmt);
__m128i c_src = AlignPixelToSDL_PixelFormat_x4(_mm_loadu_si128((__m128i *) (src + i * 16)), srcfmt);
_mm_store_si128((__m128i*)(buf + i * 16), c_src);
}
@ -82,7 +76,7 @@ void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info)
Uint32 *src_ptr = ((Uint32*)(src + (offset * 4)));
Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4)));
__m128i c_src = _mm_loadu_si64(src_ptr);
c_src = convertPixelFormatsx4(c_src, srcfmt);
c_src = AlignPixelToSDL_PixelFormat_x4(c_src, srcfmt);
__m128i c_dst = _mm_loadu_si64(dst_ptr);
__m128i c_mix = MixRGBA_SSE4_1(c_src, c_dst);
_mm_storeu_si64(dst_ptr, c_mix);
@ -92,7 +86,7 @@ void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info)
if (remaining_pixels == 1) {
Uint32 *src_ptr = ((Uint32*)(src + (offset * 4)));
Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4)));
Uint32 pixel = convertPixelFormat(*src_ptr, srcfmt);
Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt);
/* Old GCC has bad or no _mm_loadu_si32 */
#if defined(__GNUC__) && (__GNUC__ < 11)
__m128i c_src = _mm_set_epi32(0, 0, 0, pixel);

View File

@ -1,7 +1,4 @@
#ifndef SDL_SDL_BLIT_A_AVX2_H
#define SDL_SDL_BLIT_A_AVX2_H
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("avx2")))
#endif
void BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info);
void SDL_TARGETING("avx2") BlitNtoNPixelAlpha_AVX2(SDL_BlitInfo *info);
#endif //SDL_SDL_BLIT_A_AVX2_H

View File

@ -6,16 +6,13 @@
#include "SDL_blit.h"
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("sse4.1")))
#endif
/**
* Using the SSE4.1 instruction set, blit four pixels with alpha blending
* @param src A pointer to two 32-bit pixels of ARGB format to blit into dst
* @param dst A pointer to two 32-bit pixels of ARGB format to retain visual data for while alpha blending
* @return A 128-bit wide vector of two alpha-blended pixels in ARGB format
*/
__m128i MixRGBA_SSE4_1(__m128i src, __m128i dst) {
__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst) {
__m128i src_color = _mm_cvtepu8_epi16(src);
__m128i dst_color = _mm_cvtepu8_epi16(dst);
/**
@ -36,7 +33,7 @@ __m128i MixRGBA_SSE4_1(__m128i src, __m128i dst) {
return _mm_add_epi8(reduced, dst);
}
Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) {
Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) {
Uint8 a = (color >> srcFormat->Ashift) & 0xFF;
Uint8 r = (color >> srcFormat->Rshift) & 0xFF;
Uint8 g = (color >> srcFormat->Gshift) & 0xFF;
@ -45,13 +42,10 @@ Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) {
return (a << 24) | (r << 16) | (g << 8) | b;
}
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("sse4.1")))
#endif
/*
* This helper function converts arbitrary pixel format data into ARGB form with a 4 pixel-wide shuffle
*/
__m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat) {
__m128i SDL_TARGETING("sse4.1") AlignPixelToSDL_PixelFormat_x4(__m128i colors, const SDL_PixelFormat* srcFormat) {
// Create shuffle masks based on the source SDL_PixelFormat to ARGB
__m128i srcShuffleMask = _mm_set_epi8(
srcFormat->Ashift / 8 + 12, srcFormat->Rshift / 8 + 12, srcFormat->Gshift / 8 + 12, srcFormat->Bshift / 8 + 12,
@ -64,10 +58,7 @@ __m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat)
return _mm_shuffle_epi8(colors, srcShuffleMask);
}
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("sse4.1")))
#endif
void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
int width = info->dst_w;
int height = info->dst_h;
Uint8 *src = info->src;
@ -83,7 +74,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
/* Process 4-wide chunks of source color data that may be in wrong format into buffer */
for (int i = 0; i < chunks; i += 1) {
__m128i colors = _mm_loadu_si128((__m128i*)(src + i * 16));
_mm_storeu_si128((__m128i*)(buffer + i * 16), convertPixelFormatsx4(colors, srcfmt));
_mm_storeu_si128((__m128i*)(buffer + i * 16), AlignPixelToSDL_PixelFormat_x4(colors, srcfmt));
}
/* Alpha-blend in 2-wide chunks from buffer into destination */
@ -102,7 +93,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
Uint32 *src_ptr = ((Uint32*)(src + (offset * 4)));
Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4)));
__m128i c_src = _mm_loadu_si64(src_ptr);
c_src = convertPixelFormatsx4(c_src, srcfmt);
c_src = AlignPixelToSDL_PixelFormat_x4(c_src, srcfmt);
__m128i c_dst = _mm_loadu_si64(dst_ptr);
__m128i c_mix = MixRGBA_SSE4_1(c_src, c_dst);
_mm_storeu_si64(dst_ptr, c_mix);
@ -112,7 +103,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
if (remaining_pixels == 1) {
Uint32 *src_ptr = ((Uint32*)(src + (offset * 4)));
Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4)));
Uint32 pixel = convertPixelFormat(*src_ptr, srcfmt);
Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt);
/* Old GCC has bad or no _mm_loadu_si32 */
#if defined(__GNUC__) && (__GNUC__ < 11)
__m128i c_src = _mm_set_epi32(0, 0, 0, pixel);

View File

@ -2,22 +2,13 @@
#define SDL_SDL_BLIT_A_SSE4_1_H
#ifdef SDL_SSE4_1_INTRINSICS
Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat);
Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat);
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("sse4.1")))
#endif
__m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat);
__m128i SDL_TARGETING("sse4.1") AlignPixelToSDL_PixelFormat_x4(__m128i colors, const SDL_PixelFormat* srcFormat);
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("sse4.1")))
#endif
__m128i MixRGBA_SSE4_1(__m128i src, __m128i dst);
__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst);
#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
__attribute__((target("sse4.1")))
#endif
void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo *info);
void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo *info);
#endif