gentoo-ebuilds/dev-lang/orc/files/orc-0.4.40-avx.patch
Sam James e82087ca0b
dev-lang/orc: backport AVX backend fix
Closes: https://bugs.gentoo.org/948164
Signed-off-by: Sam James <sam@gentoo.org>
2025-01-16 19:25:41 +00:00

77 lines
3.2 KiB
Diff

https://bugs.gentoo.org/948164
https://gitlab.freedesktop.org/gstreamer/orc/-/issues/82
https://gitlab.freedesktop.org/gstreamer/orc/-/commit/8e48a61e27f4d3e60bf2e3e7873fd61363db6ff8
From 8e48a61e27f4d3e60bf2e3e7873fd61363db6ff8 Mon Sep 17 00:00:00 2001
From: "L. E. Segovia" <amy@centricular.com>
Date: Wed, 15 Jan 2025 22:20:14 +0000
Subject: [PATCH] avx: Fix sqrtps encoding, it's an unary operator
Fixes #82
Part-of: <https://gitlab.freedesktop.org/gstreamer/orc/-/merge_requests/213>
---
orc/orcavx.h | 4 ++--
orc/orcprogram-c.c | 1 +
orc/orcrules-avx.c | 2 +-
testsuite/test.orc | 8 ++++++++
4 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/orc/orcavx.h b/orc/orcavx.h
index f564b63f..ca95bd02 100644
--- a/orc/orcavx.h
+++ b/orc/orcavx.h
@@ -224,8 +224,8 @@ ORC_API void orc_avx_restore_mxcsr (OrcCompiler *compiler);
#define orc_avx_emit_mulps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_mulps, 32, s1, s2, d, ORC_X86_AVX_VEX256_PREFIX)
#define orc_avx_sse_emit_divps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_divps, 32, s1, s2, d, ORC_X86_AVX_VEX128_PREFIX)
#define orc_avx_emit_divps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_divps, 32, s1, s2, d, ORC_X86_AVX_VEX256_PREFIX)
-#define orc_avx_sse_emit_sqrtps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_sqrtps, 32, s1, s2, d, ORC_X86_AVX_VEX128_PREFIX)
-#define orc_avx_emit_sqrtps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_sqrtps, 32, s1, s2, d, ORC_X86_AVX_VEX256_PREFIX)
+#define orc_avx_sse_emit_sqrtps(p,s1,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_sqrtps, 32, s1, 0, d, ORC_X86_AVX_VEX128_PREFIX)
+#define orc_avx_emit_sqrtps(p,s1,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_sqrtps, 32, s1, 0, d, ORC_X86_AVX_VEX256_PREFIX)
#define orc_avx_sse_emit_andps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_andps, 32, s1, s2, d, ORC_X86_AVX_VEX128_PREFIX)
#define orc_avx_emit_andps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_andps, 32, s1, s2, d, ORC_X86_AVX_VEX256_PREFIX)
#define orc_avx_sse_emit_orps(p,s1,s2,d) orc_vex_emit_cpuinsn_size(p, ORC_X86_orps, 32, s1, s2, d, ORC_X86_AVX_VEX128_PREFIX)
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c
index 49e0b73b..1c9ff7cf 100644
--- a/orc/orcprogram-c.c
+++ b/orc/orcprogram-c.c
@@ -106,6 +106,7 @@ orc_target_c_get_asm_preamble (void)
{
return "\n"
"/* begin Orc C target preamble */\n"
+ "#include <math.h>\n"
"#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))\n"
"#define ORC_ABS(a) ((a)<0 ? -(a) : (a))\n"
"#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))\n"
diff --git a/orc/orcrules-avx.c b/orc/orcrules-avx.c
index 66925982..5cffe145 100644
--- a/orc/orcrules-avx.c
+++ b/orc/orcrules-avx.c
@@ -2678,7 +2678,7 @@ BINARY (addf, addps)
BINARY (subf, subps)
BINARY (mulf, mulps)
BINARY (divf, divps)
-BINARY (sqrtf, sqrtps)
+UNARY (sqrtf, sqrtps)
BINARY (orf, orps)
BINARY (andf, andps)
diff --git a/testsuite/test.orc b/testsuite/test.orc
index 3e9c5790..9ff53236 100644
--- a/testsuite/test.orc
+++ b/testsuite/test.orc
@@ -2806,3 +2806,11 @@ x4 addb argb, x, c128
mulslq t1, d1, p1
shrsq t1, t1, 27
convql d1, t1
+
+.function sqrt_nx
+.dest 4 dst float
+.source 4 src float
+.floatparam 4 k
+.temp 4 tmp
+sqrtf tmp, src
+mulf dst, tmp, k
--
GitLab