diff options
Diffstat (limited to 'pkgs/development/libraries/mesa/fix-rounding.patch')
-rw-r--r-- | pkgs/development/libraries/mesa/fix-rounding.patch | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/pkgs/development/libraries/mesa/fix-rounding.patch b/pkgs/development/libraries/mesa/fix-rounding.patch new file mode 100644 index 000000000000..f81760a6708a --- /dev/null +++ b/pkgs/development/libraries/mesa/fix-rounding.patch @@ -0,0 +1,357 @@ +From c25ae5d27b114e23d5734f846002df1a05759658 Mon Sep 17 00:00:00 2001 +From: Roland Scheidegger <sroland@vmware.com> +Date: Thu, 31 Jan 2013 19:27:49 +0000 +Subject: gallivm: fix issues with trunc/round/floor/ceil with no arch rounding + +The emulation of these if there's no rounding instruction available +is a bit more complicated than what the code did. +In particular, doing fp-to-int/int-to-fp will not work if the exponent +is large enough (and with NaNs, Infs). Hence such values need to be filtered +out and the original value returned in this case (which fortunately should +always be exact). This comes at the expense of performance (if your cpu +doesn't support rounding instructions). +Furthermore, floor/ifloor/ceil/iceil were affected by precision issues for +values near negative (for floor) or positive (for ceil) zero, fix that as well +(fixing this issue might not actually be slower except for ceil/iceil if the +type is not signed which is probably rare - note iceil has no callers left +in any case). + +Also add some new rounding test values in lp_test_arit to actually test +for that stuff (which previously would have failed without sse41). + +This fixes https://bugs.freedesktop.org/show_bug.cgi?id=59701. +--- +diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c +index b4e9f23..ec05026 100644 +--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c ++++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c +@@ -1590,12 +1590,37 @@ lp_build_trunc(struct lp_build_context *bld, + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_TRUNCATE); + } + else { +- LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); +- LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); +- LLVMValueRef res; +- res = LLVMBuildFPToSI(builder, a, int_vec_type, ""); +- res = LLVMBuildSIToFP(builder, res, vec_type, ""); +- return res; ++ const struct lp_type type = bld->type; ++ struct lp_type inttype; ++ struct lp_build_context intbld; ++ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); ++ LLVMValueRef trunc, res, anosign, mask; ++ LLVMTypeRef int_vec_type = bld->int_vec_type; ++ LLVMTypeRef vec_type = bld->vec_type; ++ ++ assert(type.width == 32); /* might want to handle doubles at some point */ ++ ++ inttype = type; ++ inttype.floating = 0; ++ lp_build_context_init(&intbld, bld->gallivm, inttype); ++ ++ /* round by truncation */ ++ trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); ++ res = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc"); ++ ++ /* mask out sign bit */ ++ anosign = lp_build_abs(bld, a); ++ /* ++ * mask out all values if anosign > 2^24 ++ * This should work both for large ints (all rounding is no-op for them ++ * because such floats are always exact) as well as special cases like ++ * NaNs, Infs (taking advantage of the fact they use max exponent). ++ * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) ++ */ ++ anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); ++ cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); ++ mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); ++ return lp_build_select(bld, mask, a, res); + } + } + +@@ -1620,11 +1645,36 @@ lp_build_round(struct lp_build_context *bld, + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST); + } + else { +- LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); +- LLVMValueRef res; ++ const struct lp_type type = bld->type; ++ struct lp_type inttype; ++ struct lp_build_context intbld; ++ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); ++ LLVMValueRef res, anosign, mask; ++ LLVMTypeRef int_vec_type = bld->int_vec_type; ++ LLVMTypeRef vec_type = bld->vec_type; ++ ++ assert(type.width == 32); /* might want to handle doubles at some point */ ++ ++ inttype = type; ++ inttype.floating = 0; ++ lp_build_context_init(&intbld, bld->gallivm, inttype); ++ + res = lp_build_iround(bld, a); + res = LLVMBuildSIToFP(builder, res, vec_type, ""); +- return res; ++ ++ /* mask out sign bit */ ++ anosign = lp_build_abs(bld, a); ++ /* ++ * mask out all values if anosign > 2^24 ++ * This should work both for large ints (all rounding is no-op for them ++ * because such floats are always exact) as well as special cases like ++ * NaNs, Infs (taking advantage of the fact they use max exponent). ++ * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) ++ */ ++ anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); ++ cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); ++ mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); ++ return lp_build_select(bld, mask, a, res); + } + } + +@@ -1648,11 +1698,52 @@ lp_build_floor(struct lp_build_context *bld, + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_FLOOR); + } + else { +- LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); +- LLVMValueRef res; +- res = lp_build_ifloor(bld, a); +- res = LLVMBuildSIToFP(builder, res, vec_type, ""); +- return res; ++ const struct lp_type type = bld->type; ++ struct lp_type inttype; ++ struct lp_build_context intbld; ++ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); ++ LLVMValueRef trunc, res, anosign, mask; ++ LLVMTypeRef int_vec_type = bld->int_vec_type; ++ LLVMTypeRef vec_type = bld->vec_type; ++ ++ assert(type.width == 32); /* might want to handle doubles at some point */ ++ ++ inttype = type; ++ inttype.floating = 0; ++ lp_build_context_init(&intbld, bld->gallivm, inttype); ++ ++ /* round by truncation */ ++ trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); ++ res = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc"); ++ ++ if (type.sign) { ++ LLVMValueRef tmp; ++ ++ /* ++ * fix values if rounding is wrong (for non-special cases) ++ * - this is the case if trunc > a ++ */ ++ mask = lp_build_cmp(bld, PIPE_FUNC_GREATER, res, a); ++ /* tmp = trunc > a ? 1.0 : 0.0 */ ++ tmp = LLVMBuildBitCast(builder, bld->one, int_vec_type, ""); ++ tmp = lp_build_and(&intbld, mask, tmp); ++ tmp = LLVMBuildBitCast(builder, tmp, vec_type, ""); ++ res = lp_build_sub(bld, res, tmp); ++ } ++ ++ /* mask out sign bit */ ++ anosign = lp_build_abs(bld, a); ++ /* ++ * mask out all values if anosign > 2^24 ++ * This should work both for large ints (all rounding is no-op for them ++ * because such floats are always exact) as well as special cases like ++ * NaNs, Infs (taking advantage of the fact they use max exponent). ++ * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) ++ */ ++ anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); ++ cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); ++ mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); ++ return lp_build_select(bld, mask, a, res); + } + } + +@@ -1676,11 +1767,48 @@ lp_build_ceil(struct lp_build_context *bld, + return lp_build_round_arch(bld, a, LP_BUILD_ROUND_CEIL); + } + else { +- LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); +- LLVMValueRef res; +- res = lp_build_iceil(bld, a); +- res = LLVMBuildSIToFP(builder, res, vec_type, ""); +- return res; ++ const struct lp_type type = bld->type; ++ struct lp_type inttype; ++ struct lp_build_context intbld; ++ LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24); ++ LLVMValueRef trunc, res, anosign, mask, tmp; ++ LLVMTypeRef int_vec_type = bld->int_vec_type; ++ LLVMTypeRef vec_type = bld->vec_type; ++ ++ assert(type.width == 32); /* might want to handle doubles at some point */ ++ ++ inttype = type; ++ inttype.floating = 0; ++ lp_build_context_init(&intbld, bld->gallivm, inttype); ++ ++ /* round by truncation */ ++ trunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); ++ trunc = LLVMBuildSIToFP(builder, trunc, vec_type, "ceil.trunc"); ++ ++ /* ++ * fix values if rounding is wrong (for non-special cases) ++ * - this is the case if trunc < a ++ */ ++ mask = lp_build_cmp(bld, PIPE_FUNC_LESS, trunc, a); ++ /* tmp = trunc < a ? 1.0 : 0.0 */ ++ tmp = LLVMBuildBitCast(builder, bld->one, int_vec_type, ""); ++ tmp = lp_build_and(&intbld, mask, tmp); ++ tmp = LLVMBuildBitCast(builder, tmp, vec_type, ""); ++ res = lp_build_add(bld, trunc, tmp); ++ ++ /* mask out sign bit */ ++ anosign = lp_build_abs(bld, a); ++ /* ++ * mask out all values if anosign > 2^24 ++ * This should work both for large ints (all rounding is no-op for them ++ * because such floats are always exact) as well as special cases like ++ * NaNs, Infs (taking advantage of the fact they use max exponent). ++ * (2^24 is arbitrary anything between 2^24 and 2^31 should work.) ++ */ ++ anosign = LLVMBuildBitCast(builder, anosign, int_vec_type, ""); ++ cmpval = LLVMBuildBitCast(builder, cmpval, int_vec_type, ""); ++ mask = lp_build_cmp(&intbld, PIPE_FUNC_GREATER, anosign, cmpval); ++ return lp_build_select(bld, mask, a, res); + } + } + +@@ -1826,32 +1954,30 @@ lp_build_ifloor(struct lp_build_context *bld, + res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_FLOOR); + } + else { +- /* Take the sign bit and add it to 1 constant */ +- LLVMTypeRef vec_type = bld->vec_type; +- unsigned mantissa = lp_mantissa(type); +- LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, +- (unsigned long long)1 << (type.width - 1)); +- LLVMValueRef sign; +- LLVMValueRef offset; ++ struct lp_type inttype; ++ struct lp_build_context intbld; ++ LLVMValueRef trunc, itrunc, mask; + +- /* sign = a < 0 ? ~0 : 0 */ +- sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); +- sign = LLVMBuildAnd(builder, sign, mask, ""); +- sign = LLVMBuildAShr(builder, sign, +- lp_build_const_int_vec(bld->gallivm, type, +- type.width - 1), +- "ifloor.sign"); ++ assert(type.floating); ++ assert(lp_check_value(type, a)); + +- /* offset = -0.99999(9)f */ +- offset = lp_build_const_vec(bld->gallivm, type, +- -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); +- offset = LLVMConstBitCast(offset, int_vec_type); ++ inttype = type; ++ inttype.floating = 0; ++ lp_build_context_init(&intbld, bld->gallivm, inttype); + +- /* offset = a < 0 ? offset : 0.0f */ +- offset = LLVMBuildAnd(builder, offset, sign, ""); +- offset = LLVMBuildBitCast(builder, offset, vec_type, "ifloor.offset"); ++ /* round by truncation */ ++ itrunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); ++ trunc = LLVMBuildSIToFP(builder, itrunc, bld->vec_type, "ifloor.trunc"); + +- res = LLVMBuildFAdd(builder, res, offset, "ifloor.res"); ++ /* ++ * fix values if rounding is wrong (for non-special cases) ++ * - this is the case if trunc > a ++ * The results of doing this with NaNs, very large values etc. ++ * are undefined but this seems to be the case anyway. ++ */ ++ mask = lp_build_cmp(bld, PIPE_FUNC_GREATER, trunc, a); ++ /* cheapie minus one with mask since the mask is minus one / zero */ ++ return lp_build_add(&intbld, itrunc, mask); + } + } + +@@ -1883,35 +2009,30 @@ lp_build_iceil(struct lp_build_context *bld, + res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_CEIL); + } + else { +- LLVMTypeRef vec_type = bld->vec_type; +- unsigned mantissa = lp_mantissa(type); +- LLVMValueRef offset; ++ struct lp_type inttype; ++ struct lp_build_context intbld; ++ LLVMValueRef trunc, itrunc, mask; + +- /* offset = 0.99999(9)f */ +- offset = lp_build_const_vec(bld->gallivm, type, +- (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa)); ++ assert(type.floating); ++ assert(lp_check_value(type, a)); + +- if (type.sign) { +- LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, +- (unsigned long long)1 << (type.width - 1)); +- LLVMValueRef sign; ++ inttype = type; ++ inttype.floating = 0; ++ lp_build_context_init(&intbld, bld->gallivm, inttype); + +- /* sign = a < 0 ? 0 : ~0 */ +- sign = LLVMBuildBitCast(builder, a, int_vec_type, ""); +- sign = LLVMBuildAnd(builder, sign, mask, ""); +- sign = LLVMBuildAShr(builder, sign, +- lp_build_const_int_vec(bld->gallivm, type, +- type.width - 1), +- "iceil.sign"); +- sign = LLVMBuildNot(builder, sign, "iceil.not"); +- +- /* offset = a < 0 ? 0.0 : offset */ +- offset = LLVMConstBitCast(offset, int_vec_type); +- offset = LLVMBuildAnd(builder, offset, sign, ""); +- offset = LLVMBuildBitCast(builder, offset, vec_type, "iceil.offset"); +- } ++ /* round by truncation */ ++ itrunc = LLVMBuildFPToSI(builder, a, int_vec_type, ""); ++ trunc = LLVMBuildSIToFP(builder, itrunc, bld->vec_type, "iceil.trunc"); + +- res = LLVMBuildFAdd(builder, a, offset, "iceil.res"); ++ /* ++ * fix values if rounding is wrong (for non-special cases) ++ * - this is the case if trunc < a ++ * The results of doing this with NaNs, very large values etc. ++ * are undefined but this seems to be the case anyway. ++ */ ++ mask = lp_build_cmp(bld, PIPE_FUNC_LESS, trunc, a); ++ /* cheapie plus one with mask since the mask is minus one / zero */ ++ return lp_build_sub(&intbld, itrunc, mask); + } + + /* round to nearest (toward zero) */ +diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c +index 99928b8..f14e4b3 100644 +--- a/src/gallium/drivers/llvmpipe/lp_test_arit.c ++++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c +@@ -207,6 +207,18 @@ const float round_values[] = { + -10.0, -1, 0.0, 12.0, + -1.49, -0.25, 1.25, 2.51, + -0.99, -0.01, 0.01, 0.99, ++ 1.401298464324817e-45f, // smallest denormal ++ -1.401298464324817e-45f, ++ 1.62981451e-08f, ++ -1.62981451e-08f, ++ 1.62981451e15f, // large number not representable as 32bit int ++ -1.62981451e15f, ++ FLT_EPSILON, ++ -FLT_EPSILON, ++ 1.0f - 0.5f*FLT_EPSILON, ++ -1.0f + FLT_EPSILON, ++ FLT_MAX, ++ -FLT_MAX + }; + + static float fractf(float x) +-- +cgit v0.9.0.2-2-gbebe |