diff --git a/src/video/yuv2rgb/yuv_rgb.c b/src/video/yuv2rgb/yuv_rgb.c index 30c4c0c4c8..bf4002014a 100644 --- a/src/video/yuv2rgb/yuv_rgb.c +++ b/src/video/yuv2rgb/yuv_rgb.c @@ -241,6 +241,7 @@ void rgb24_yuv420_std( #ifdef SDL_SSE2_INTRINSICS +/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. #define SSE_FUNCTION_NAME yuv420_rgb565_sse #define STD_FUNCTION_NAME yuv420_rgb565_std #define YUV_FORMAT YUV_FORMAT_420 @@ -248,12 +249,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv420_rgb565_sseu -#define STD_FUNCTION_NAME yuv420_rgb565_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_RGB565 -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv420_rgb24_sse #define STD_FUNCTION_NAME yuv420_rgb24_std #define YUV_FORMAT YUV_FORMAT_420 @@ -261,12 +256,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv420_rgb24_sseu -#define STD_FUNCTION_NAME yuv420_rgb24_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_RGB24 -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv420_rgba_sse #define STD_FUNCTION_NAME yuv420_rgba_std #define YUV_FORMAT YUV_FORMAT_420 @@ -274,12 +263,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv420_rgba_sseu -#define STD_FUNCTION_NAME yuv420_rgba_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_RGBA -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv420_bgra_sse #define STD_FUNCTION_NAME yuv420_bgra_std #define YUV_FORMAT YUV_FORMAT_420 @@ -287,12 +270,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv420_bgra_sseu -#define STD_FUNCTION_NAME yuv420_bgra_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_BGRA -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv420_argb_sse #define STD_FUNCTION_NAME yuv420_argb_std #define YUV_FORMAT YUV_FORMAT_420 @@ -300,12 +277,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv420_argb_sseu -#define STD_FUNCTION_NAME yuv420_argb_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_ARGB -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv420_abgr_sse #define STD_FUNCTION_NAME yuv420_abgr_std #define YUV_FORMAT YUV_FORMAT_420 @@ -313,12 +284,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv420_abgr_sseu -#define STD_FUNCTION_NAME yuv420_abgr_std -#define YUV_FORMAT YUV_FORMAT_420 -#define RGB_FORMAT RGB_FORMAT_ABGR -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv422_rgb565_sse #define STD_FUNCTION_NAME yuv422_rgb565_std #define YUV_FORMAT YUV_FORMAT_422 @@ -326,12 +291,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv422_rgb565_sseu -#define STD_FUNCTION_NAME yuv422_rgb565_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_RGB565 -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv422_rgb24_sse #define STD_FUNCTION_NAME yuv422_rgb24_std #define YUV_FORMAT YUV_FORMAT_422 @@ -339,12 +298,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv422_rgb24_sseu -#define STD_FUNCTION_NAME yuv422_rgb24_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_RGB24 -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv422_rgba_sse #define STD_FUNCTION_NAME yuv422_rgba_std #define YUV_FORMAT YUV_FORMAT_422 @@ -352,12 +305,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv422_rgba_sseu -#define STD_FUNCTION_NAME yuv422_rgba_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_RGBA -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv422_bgra_sse #define STD_FUNCTION_NAME yuv422_bgra_std #define YUV_FORMAT YUV_FORMAT_422 @@ -365,12 +312,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv422_bgra_sseu -#define STD_FUNCTION_NAME yuv422_bgra_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_BGRA -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv422_argb_sse #define STD_FUNCTION_NAME yuv422_argb_std #define YUV_FORMAT YUV_FORMAT_422 @@ -378,12 +319,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv422_argb_sseu -#define STD_FUNCTION_NAME yuv422_argb_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_ARGB -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuv422_abgr_sse #define STD_FUNCTION_NAME yuv422_abgr_std #define YUV_FORMAT YUV_FORMAT_422 @@ -391,12 +326,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuv422_abgr_sseu -#define STD_FUNCTION_NAME yuv422_abgr_std -#define YUV_FORMAT YUV_FORMAT_422 -#define RGB_FORMAT RGB_FORMAT_ABGR -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse #define STD_FUNCTION_NAME yuvnv12_rgb565_std #define YUV_FORMAT YUV_FORMAT_NV12 @@ -404,12 +333,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu -#define STD_FUNCTION_NAME yuvnv12_rgb565_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_RGB565 -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse #define STD_FUNCTION_NAME yuvnv12_rgb24_std #define YUV_FORMAT YUV_FORMAT_NV12 @@ -417,12 +340,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu -#define STD_FUNCTION_NAME yuvnv12_rgb24_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_RGB24 -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuvnv12_rgba_sse #define STD_FUNCTION_NAME yuvnv12_rgba_std #define YUV_FORMAT YUV_FORMAT_NV12 @@ -430,12 +347,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuvnv12_rgba_sseu -#define STD_FUNCTION_NAME yuvnv12_rgba_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_RGBA -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuvnv12_bgra_sse #define STD_FUNCTION_NAME yuvnv12_bgra_std #define YUV_FORMAT YUV_FORMAT_NV12 @@ -443,12 +354,6 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuvnv12_bgra_sseu -#define STD_FUNCTION_NAME yuvnv12_bgra_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_BGRA -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuvnv12_argb_sse #define STD_FUNCTION_NAME yuvnv12_argb_std #define YUV_FORMAT YUV_FORMAT_NV12 @@ -456,18 +361,115 @@ void rgb24_yuv420_std( #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" -#define SSE_FUNCTION_NAME yuvnv12_argb_sseu -#define STD_FUNCTION_NAME yuvnv12_argb_std -#define YUV_FORMAT YUV_FORMAT_NV12 -#define RGB_FORMAT RGB_FORMAT_ARGB -#include "yuv_rgb_sse_func.h" - #define SSE_FUNCTION_NAME yuvnv12_abgr_sse #define STD_FUNCTION_NAME yuvnv12_abgr_std #define YUV_FORMAT YUV_FORMAT_NV12 #define RGB_FORMAT RGB_FORMAT_ABGR #define SSE_ALIGNED #include "yuv_rgb_sse_func.h" +*/ + +#define SSE_FUNCTION_NAME yuv420_rgb565_sseu +#define STD_FUNCTION_NAME yuv420_rgb565_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_rgb24_sseu +#define STD_FUNCTION_NAME yuv420_rgb24_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_rgba_sseu +#define STD_FUNCTION_NAME yuv420_rgba_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_bgra_sseu +#define STD_FUNCTION_NAME yuv420_bgra_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_argb_sseu +#define STD_FUNCTION_NAME yuv420_argb_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv420_abgr_sseu +#define STD_FUNCTION_NAME yuv420_abgr_std +#define YUV_FORMAT YUV_FORMAT_420 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgb565_sseu +#define STD_FUNCTION_NAME yuv422_rgb565_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgb24_sseu +#define STD_FUNCTION_NAME yuv422_rgb24_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_rgba_sseu +#define STD_FUNCTION_NAME yuv422_rgba_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_bgra_sseu +#define STD_FUNCTION_NAME yuv422_bgra_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_argb_sseu +#define STD_FUNCTION_NAME yuv422_argb_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuv422_abgr_sseu +#define STD_FUNCTION_NAME yuv422_abgr_std +#define YUV_FORMAT YUV_FORMAT_422 +#define RGB_FORMAT RGB_FORMAT_ABGR +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu +#define STD_FUNCTION_NAME yuvnv12_rgb565_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB565 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu +#define STD_FUNCTION_NAME yuvnv12_rgb24_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGB24 +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_rgba_sseu +#define STD_FUNCTION_NAME yuvnv12_rgba_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_RGBA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_bgra_sseu +#define STD_FUNCTION_NAME yuvnv12_bgra_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_BGRA +#include "yuv_rgb_sse_func.h" + +#define SSE_FUNCTION_NAME yuvnv12_argb_sseu +#define STD_FUNCTION_NAME yuvnv12_argb_std +#define YUV_FORMAT YUV_FORMAT_NV12 +#define RGB_FORMAT RGB_FORMAT_ARGB +#include "yuv_rgb_sse_func.h" #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu #define STD_FUNCTION_NAME yuvnv12_abgr_std @@ -476,6 +478,7 @@ void rgb24_yuv420_std( #include "yuv_rgb_sse_func.h" +/* SDL doesn't use these atm and compiling them adds seconds onto the build. --ryan. #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ @@ -515,7 +518,9 @@ V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ V = _mm_add_epi16(V, _mm_set1_epi16(128<