Bad codegen after commit 3aae916 #92217

dyung · 2024-05-15T05:14:44Z

We have an internal test that recently started producing incorrect output which I bisected back to commit 3aae916.

Consider the following code:

static void init(unsigned char pred, volatile void *data, unsigned size) {
  unsigned char *bytes = (unsigned char *)data;
  for (unsigned i = 0; i != size; ++i) {
    bytes[i] = pred + i;
  }
}
#define INIT(PRED, VAR) init(PRED, &VAR, sizeof(VAR))

#include <x86intrin.h>

int main(int argc, char *argv[])
{
      __m256 id7702;
      INIT(243, id7702);
    __m256 id7701 = _mm256_sqrt_ps(id7702);
  volatile int id7700 = _mm256_movemask_ps(id7701);
  //printf("id7700:%x\n", id7700);
  return id7700;
}

If compiled with optimizations and -mavx and the resulting binary is run, the return value changes after 3aae916:

$ ~/src/upstream/3aae916ff7fe9d0953aa63b2ba1d0e871f6f76fc-linux/bin/clang -O2 -mavx repro.cpp -o repro.bad.out
$ ~/src/upstream/f658d84e01bcdd49e27dc9ef80e1a6cc5f9417fe-linux/bin/clang -O2 -mavx repro.cpp -o repro.good.out
$ ./repro.good.out 
$ echo $?
7
$ ./repro.bad.out 
$ echo $?
0

You can also see the difference on godbolt comparing with 18.1.0:
https://godbolt.org/z/KPbK4jcGc

The text was updated successfully, but these errors were encountered:

dtcxzyw · 2024-05-15T06:20:57Z

Bisected to EarlyCSE: https://godbolt.org/z/ejMhYfGzs
Before:

define dso_local i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 {
entry:
  %id7700 = alloca i32, align 4
  %id7702.sroa.0.0.vec.insert = insertelement <32 x i8> undef, i8 -13, i32 0
  %id7702.sroa.0.1.vec.insert = insertelement <32 x i8> %id7702.sroa.0.0.vec.insert, i8 -12, i32 1
  %id7702.sroa.0.2.vec.insert = insertelement <32 x i8> %id7702.sroa.0.1.vec.insert, i8 -11, i32 2
  %id7702.sroa.0.3.vec.insert = insertelement <32 x i8> %id7702.sroa.0.2.vec.insert, i8 -10, i32 3
  %id7702.sroa.0.4.vec.insert = insertelement <32 x i8> %id7702.sroa.0.3.vec.insert, i8 -9, i32 4
  %id7702.sroa.0.5.vec.insert = insertelement <32 x i8> %id7702.sroa.0.4.vec.insert, i8 -8, i32 5
  %id7702.sroa.0.6.vec.insert = insertelement <32 x i8> %id7702.sroa.0.5.vec.insert, i8 -7, i32 6
  %id7702.sroa.0.7.vec.insert = insertelement <32 x i8> %id7702.sroa.0.6.vec.insert, i8 -6, i32 7
  %id7702.sroa.0.8.vec.insert = insertelement <32 x i8> %id7702.sroa.0.7.vec.insert, i8 -5, i32 8
  %id7702.sroa.0.9.vec.insert = insertelement <32 x i8> %id7702.sroa.0.8.vec.insert, i8 -4, i32 9
  %id7702.sroa.0.10.vec.insert = insertelement <32 x i8> %id7702.sroa.0.9.vec.insert, i8 -3, i32 10
  %id7702.sroa.0.11.vec.insert = insertelement <32 x i8> %id7702.sroa.0.10.vec.insert, i8 -2, i32 11
  %id7702.sroa.0.12.vec.insert = insertelement <32 x i8> %id7702.sroa.0.11.vec.insert, i8 -1, i32 12
  %id7702.sroa.0.13.vec.insert = insertelement <32 x i8> %id7702.sroa.0.12.vec.insert, i8 0, i32 13
  %id7702.sroa.0.14.vec.insert = insertelement <32 x i8> %id7702.sroa.0.13.vec.insert, i8 1, i32 14
  %id7702.sroa.0.15.vec.insert = insertelement <32 x i8> %id7702.sroa.0.14.vec.insert, i8 2, i32 15
  %id7702.sroa.0.16.vec.insert = insertelement <32 x i8> %id7702.sroa.0.15.vec.insert, i8 3, i32 16
  %id7702.sroa.0.17.vec.insert = insertelement <32 x i8> %id7702.sroa.0.16.vec.insert, i8 4, i32 17
  %id7702.sroa.0.18.vec.insert = insertelement <32 x i8> %id7702.sroa.0.17.vec.insert, i8 5, i32 18
  %id7702.sroa.0.19.vec.insert = insertelement <32 x i8> %id7702.sroa.0.18.vec.insert, i8 6, i32 19
  %id7702.sroa.0.20.vec.insert = insertelement <32 x i8> %id7702.sroa.0.19.vec.insert, i8 7, i32 20
  %id7702.sroa.0.21.vec.insert = insertelement <32 x i8> %id7702.sroa.0.20.vec.insert, i8 8, i32 21
  %id7702.sroa.0.22.vec.insert = insertelement <32 x i8> %id7702.sroa.0.21.vec.insert, i8 9, i32 22
  %id7702.sroa.0.23.vec.insert = insertelement <32 x i8> %id7702.sroa.0.22.vec.insert, i8 10, i32 23
  %id7702.sroa.0.24.vec.insert = insertelement <32 x i8> %id7702.sroa.0.23.vec.insert, i8 11, i32 24
  %id7702.sroa.0.25.vec.insert = insertelement <32 x i8> %id7702.sroa.0.24.vec.insert, i8 12, i32 25
  %id7702.sroa.0.26.vec.insert = insertelement <32 x i8> %id7702.sroa.0.25.vec.insert, i8 13, i32 26
  %id7702.sroa.0.27.vec.insert = insertelement <32 x i8> %id7702.sroa.0.26.vec.insert, i8 14, i32 27
  %id7702.sroa.0.28.vec.insert = insertelement <32 x i8> %id7702.sroa.0.27.vec.insert, i8 15, i32 28
  %id7702.sroa.0.29.vec.insert = insertelement <32 x i8> %id7702.sroa.0.28.vec.insert, i8 16, i32 29
  %id7702.sroa.0.30.vec.insert = insertelement <32 x i8> %id7702.sroa.0.29.vec.insert, i8 17, i32 30
  %id7702.sroa.0.31.vec.insert = insertelement <32 x i8> %id7702.sroa.0.30.vec.insert, i8 18, i32 31
  %0 = bitcast <32 x i8> %id7702.sroa.0.31.vec.insert to <8 x float>
  %1 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %0)
  call void @llvm.lifetime.start.p0(i64 4, ptr %id7700)
  %2 = bitcast <8 x float> %1 to <8 x i32>
  %3 = icmp slt <8 x i32> %2, zeroinitializer
  %4 = bitcast <8 x i1> %3 to i8
  %5 = zext i8 %4 to i32
  store volatile i32 %5, ptr %id7700, align 4, !tbaa !5
  %id7700.0.id7700.0. = load volatile i32, ptr %id7700, align 4, !tbaa !5
  call void @llvm.lifetime.end.p0(i64 4, ptr %id7700)
  ret i32 %id7700.0.id7700.0.
}

After:

; Function Attrs: nounwind uwtable
define dso_local i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 {
entry:
  %id7700 = alloca i32, align 4
  %0 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> <float 0xC6DEBE9E60000000, float 0xC75F3F1EE0000000, float 0xC7DFBF9F60000000, float 0x3840201FE0000000, float 0x38C0A08060000000, float 0x39412100E0000000, float 0x39C1A18160000000, float 0x3A422201E0000000>)
  call void @llvm.lifetime.start.p0(i64 4, ptr %id7700)
  %1 = bitcast <8 x float> %0 to <8 x i32>
  store volatile i32 0, ptr %id7700, align 4, !tbaa !5
  %id7700.0.id7700.0. = load volatile i32, ptr %id7700, align 4, !tbaa !5
  call void @llvm.lifetime.end.p0(i64 4, ptr %id7700)
  ret i32 %id7700.0.id7700.0.
}

computeKnownFPClass assumes that the square root of a value is always non-negative.

dtcxzyw · 2024-05-15T06:41:09Z

computeKnownFPClass assumes that the square root of a value is always non-negative.

Any thoughts? @arsenm

C23 Standard (N3096) says:

7.12.7.10 The sqrt functions
The sqrt functions compute the nonnegative square root of x. A domain error occurs if the argument is less than zero

dtcxzyw · 2024-05-15T06:47:36Z

See 7.2.1 Treatment of error conditions:

For all functions, a domain error occurs if and only if an input argument is outside the domain over which the mathematical function is defined. The description of each function lists any re- quired domain errors; an implementation may define additional domain errors, provided that such errors are consistent with the mathematical definition of the function.282) Whether a sig-naling NaN input causes a domain error is implementation-defined. On a domain error, the function returns an implementation-defined value; if the integer expression math_errhandling
& MATH_ERRNO is nonzero, the integer expression errno acquires the value EDOM; if the integer expressionmath_errhandling & MATH_ERREXCEPTisnonzero,the"invalid"floating-pointexcep- tion is raised.

dtcxzyw · 2024-05-15T06:48:21Z

cc @andykaylor @jcranmer-intel

arsenm · 2024-05-15T06:50:24Z

computeKnownFPClass assumes that the square root of a value is always non-negative.

Any thoughts? @arsenm

It is always non-negative, except for -0

dtcxzyw · 2024-05-15T06:53:18Z

computeKnownFPClass assumes that the square root of a value is always non-negative.

Any thoughts? @arsenm

It is always non-negative, except for -0

See

llvm-project/llvm/lib/Analysis/ValueTracking.cpp

Lines 4930 to 4938 in 45726c1

    
           // If the input denormal mode could be PreserveSign, a negative 
        
           // subnormal input could produce a negative zero output. 
        
           const Function *F = II->getFunction(); 
        
           if (Q.IIQ.hasNoSignedZeros(II) || 
        
               (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) { 
        
             Known.knownNot(fcNegZero); 
        
             if (KnownSrc.isKnownNeverNaN()) 
        
               Known.signBitMustBeZero(); 
        
           }

arsenm · 2024-05-15T07:06:59Z

See 7.2.1 Treatment of error conditions:

The C definitions are unhelpful. It returns a NaN, as per IEEE

dtcxzyw · 2024-05-15T07:23:59Z

See 7.2.1 Treatment of error conditions:

The C definitions are unhelpful. It returns a NaN, as per IEEE

IEEE Std 754-2019 IEEE Standard for Floating-Point Arithmetic:

6.3 The sign bit 6.3.0
When either an input or result is a NaN, this standard does not interpret the sign of a NaN.
...
Except that squareRoot(−0) shall be −0, every numeric squareRoot result shall have a positive sign.

I will post a fix.

…92510) According to IEEE Std 754-2019, `sqrt` returns nan when the input is negative (except for -0). In this case, we cannot make assumptions about sign bit of the result. Fixes #92217

github-actions bot added the new issue label May 15, 2024

dyung mentioned this issue May 15, 2024

Reland "[ValueTracking] Compute knownbits from known fp classes" #92084

Merged

dtcxzyw self-assigned this May 15, 2024

dtcxzyw added miscompilation llvm:optimizations and removed new issue labels May 15, 2024

dtcxzyw added the floating-point Floating-point math label May 15, 2024

This was referenced May 15, 2024

False negative with the sign bit of nans AliveToolkit/alive2#1037

Open

[ValueTracking] Fix incorrect inferrence about the signbit of sqrt #92510

Merged

dtcxzyw closed this as completed in #92510 May 20, 2024

EugeneZelenko added llvm:analysis and removed llvm:optimizations labels May 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Bad codegen after commit 3aae916 #92217

Bad codegen after commit 3aae916 #92217

dyung commented May 15, 2024

dtcxzyw commented May 15, 2024

dtcxzyw commented May 15, 2024 •

edited

dtcxzyw commented May 15, 2024

dtcxzyw commented May 15, 2024

arsenm commented May 15, 2024

dtcxzyw commented May 15, 2024

arsenm commented May 15, 2024

dtcxzyw commented May 15, 2024

Bad codegen after commit 3aae916 #92217

Bad codegen after commit 3aae916 #92217

Comments

dyung commented May 15, 2024

dtcxzyw commented May 15, 2024

dtcxzyw commented May 15, 2024 • edited

dtcxzyw commented May 15, 2024

dtcxzyw commented May 15, 2024

arsenm commented May 15, 2024

dtcxzyw commented May 15, 2024

arsenm commented May 15, 2024

dtcxzyw commented May 15, 2024

dtcxzyw commented May 15, 2024 •

edited