Compiling with -O{1,2,3} breaks custom rule that works with -O0

My usual preface: apologies if I have missed relevant docs or existing/past issues on this.

I have some C code that uses a custom forward-mode rule. Compiled with `-O0`, that rule gets triggered. But if I compile with any higher optimization level, the custom rule does not get triggered (which I investigate using a simple print statement). Here is an MWE:
```c

#include<stdlib.h>
#include<stdio.h>
#include<math.h>

int enzyme_const, enzyme_dup, enzyme_dupnoneed;
double __enzyme_fwddiff(void*, ...);

static void ipabsterm(double* at, double* t){
  *at = fabs(*t);
}

static void derivative_ipabsterm(double* at, double* d_at,
                                 double*  t, double* d_t){
  printf("Hi!\n");
  *at   = fmax(*t, *d_t);
  *d_at = fmax(*t, *d_t);
}

void* __enzyme_register_derivative_ipabsterm[] = {
  (void*)ipabsterm,
  (void*)derivative_ipabsterm,
};

double __attribute__((optnone)) absterm(double t){
  double at;
  ipabsterm(&at, &t);
  return at;
}

double _fma(double x, double y, double z){
  return x*y + z;
}

double horner(double x, double* coefs, int len) {
  double b_p1 = coefs[len-1];
  double b    = 0.0;
  for(int k=len-1; k>0; k--){
    b = coefs[k-1] + x*b_p1;
    b_p1 = b;
  }
  return b;
}

double gamma(double _x) {

  const double sq2pi = sqrt(2*M_PI);

	double x = _x;
	double s;
	if(x < 0) {
		s = sin(M_PI * _x);
		if(s == 0) return NAN;
		x = -x;
		s *= x;
	}
	if(!isfinite(x)) return x;

	if(x > 11.5) {
		double w = 1/x; 
		double coefs[10] = {1.0,
			8.333333333333331800504e-2, 3.472222222230075327854e-3, -2.681327161876304418288e-3, -2.294719747873185405699e-4,
			7.840334842744753003862e-4, 6.989332260623193171870e-5, -5.950237554056330156018e-4, -2.363848809501759061727e-5,
			7.147391378143610789273e-4
		};
		w = horner(w, coefs, 10);
		double muladd = _fma(0.5, x, -0.25);
		double v = pow(x, muladd);
		double res = sq2pi * v * (v / exp(x)) * w;

		if(_x < 0) {
			return M_PI / (res * s);
		} else {
			return res;
		}
	}
	double P[8] = {
		1.000000000000000000009e0, 8.378004301573126728826e-1, 3.629515436640239168939e-1, 1.113062816019361559013e-1,
		2.385363243461108252554e-2, 4.092666828394035500949e-3, 4.542931960608009155600e-4, 4.212760487471622013093e-5
	};
	double Q[9] = {
		9.999999999999999999908e-1, 4.150160950588455434583e-1, -2.243510905670329164562e-1, -4.633887671244534213831e-2,
		2.773706565840072979165e-2, -7.955933682494738320586e-4, -1.237799246653152231188e-3, 2.346584059160635244282e-4,
		-1.397148517476170440917e-5
	};

	double z = 1.0;
	while(x >= 3.0) {
		x -= 1.0;
		z *= x;
	}
	while(x < 2.0) {
		z /= x;
		x += 1.0;
	}

	x -= 2.0;
	double p = horner(x, P, 8);
	double q = horner(x, Q, 9);
	return _x < 0 ? M_PI * q / (s * z * p) : z * p / q;
}

double besselk_power_series(double v, double x) {
  double gam    = gamma(v); 
  double ngam   = M_PI/(sin(-M_PI*fabs(v))*gam*v);
  double s1, s2, t1, t2, at1;
  s1 = 0.0 ; s2 = 0.0 ; t1 = 1.0; t2 = 1.0;
  double xx     = x*x;
  double fourk  = 0.0;
  for(int k=1; k<50; k++) {
    fourk = 4*k;
    s1 += t1;
    s2 += t2;
    t1 *= xx/(fourk*(k-v));
    t2 *= xx/(fourk*(k+v));
    at1 = absterm(t1);
    if (at1 <= 2.220446049250313e-16) break;
  }
  double xpv = pow(x/2, v);
  double s   = gam*s1 + xpv*xpv*ngam*s2;
  return s/(2*xpv);
}

double test(double v, double x) {
	double dv = 1.0;
	double df = __enzyme_fwddiff((void*) besselk_power_series, enzyme_dup, v, dv, enzyme_const, x);
	return df;
}

int main(int argc, char** argv) {
  printf("%1.16e\n", test(1.02, 1.51));
  return 0;
}
```
Which I compile with
```bash
clang mwe.c -fplugin=/usr/lib/ClangEnzyme-18.so -O0 -Rpass=.* -lm -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -Wall -pedantic -o mwe
```
on linux with `clang 18`. When the compiled code actually hits the rule, `./mwe` will print a handful of `Hi!` lines before giving the solution.

I have the `-Rpass=.*` flag to see all the compiler optimizations that get done, and depending on a few small tweaks I sometimes get something like
```
libmatern.c:[...]: remark: Cannot use provided custom derivative pass [-Rpass=enzyme]
```
for `ipabsterm`. But I'm really having trouble figuring out what compiler optimization is breaking things.  As you can see with the attribute I've put for that function, I was suspicious that the function was getting inlined and the loop vectorized, which maybe was breaking things. But nothing I have tried has fixed it.

Any thoughts or suggestions you have would be greatly appreciated! Thanks so much in advance.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Compiling with -O{1,2,3} breaks custom rule that works with -O0 #1994

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Compiling with -O{1,2,3} breaks custom rule that works with -O0 #1994

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions