My usual preface: apologies if I have missed relevant docs or existing/past issues on this.
I have some C code that uses a custom forward-mode rule. Compiled with -O0, that rule gets triggered. But if I compile with any higher optimization level, the custom rule does not get triggered (which I investigate using a simple print statement). Here is an MWE:
#include<stdlib.h>
#include<stdio.h>
#include<math.h>
int enzyme_const, enzyme_dup, enzyme_dupnoneed;
double __enzyme_fwddiff(void*, ...);
static void ipabsterm(double* at, double* t){
*at = fabs(*t);
}
static void derivative_ipabsterm(double* at, double* d_at,
double* t, double* d_t){
printf("Hi!\n");
*at = fmax(*t, *d_t);
*d_at = fmax(*t, *d_t);
}
void* __enzyme_register_derivative_ipabsterm[] = {
(void*)ipabsterm,
(void*)derivative_ipabsterm,
};
double __attribute__((optnone)) absterm(double t){
double at;
ipabsterm(&at, &t);
return at;
}
double _fma(double x, double y, double z){
return x*y + z;
}
double horner(double x, double* coefs, int len) {
double b_p1 = coefs[len-1];
double b = 0.0;
for(int k=len-1; k>0; k--){
b = coefs[k-1] + x*b_p1;
b_p1 = b;
}
return b;
}
double gamma(double _x) {
const double sq2pi = sqrt(2*M_PI);
double x = _x;
double s;
if(x < 0) {
s = sin(M_PI * _x);
if(s == 0) return NAN;
x = -x;
s *= x;
}
if(!isfinite(x)) return x;
if(x > 11.5) {
double w = 1/x;
double coefs[10] = {1.0,
8.333333333333331800504e-2, 3.472222222230075327854e-3, -2.681327161876304418288e-3, -2.294719747873185405699e-4,
7.840334842744753003862e-4, 6.989332260623193171870e-5, -5.950237554056330156018e-4, -2.363848809501759061727e-5,
7.147391378143610789273e-4
};
w = horner(w, coefs, 10);
double muladd = _fma(0.5, x, -0.25);
double v = pow(x, muladd);
double res = sq2pi * v * (v / exp(x)) * w;
if(_x < 0) {
return M_PI / (res * s);
} else {
return res;
}
}
double P[8] = {
1.000000000000000000009e0, 8.378004301573126728826e-1, 3.629515436640239168939e-1, 1.113062816019361559013e-1,
2.385363243461108252554e-2, 4.092666828394035500949e-3, 4.542931960608009155600e-4, 4.212760487471622013093e-5
};
double Q[9] = {
9.999999999999999999908e-1, 4.150160950588455434583e-1, -2.243510905670329164562e-1, -4.633887671244534213831e-2,
2.773706565840072979165e-2, -7.955933682494738320586e-4, -1.237799246653152231188e-3, 2.346584059160635244282e-4,
-1.397148517476170440917e-5
};
double z = 1.0;
while(x >= 3.0) {
x -= 1.0;
z *= x;
}
while(x < 2.0) {
z /= x;
x += 1.0;
}
x -= 2.0;
double p = horner(x, P, 8);
double q = horner(x, Q, 9);
return _x < 0 ? M_PI * q / (s * z * p) : z * p / q;
}
double besselk_power_series(double v, double x) {
double gam = gamma(v);
double ngam = M_PI/(sin(-M_PI*fabs(v))*gam*v);
double s1, s2, t1, t2, at1;
s1 = 0.0 ; s2 = 0.0 ; t1 = 1.0; t2 = 1.0;
double xx = x*x;
double fourk = 0.0;
for(int k=1; k<50; k++) {
fourk = 4*k;
s1 += t1;
s2 += t2;
t1 *= xx/(fourk*(k-v));
t2 *= xx/(fourk*(k+v));
at1 = absterm(t1);
if (at1 <= 2.220446049250313e-16) break;
}
double xpv = pow(x/2, v);
double s = gam*s1 + xpv*xpv*ngam*s2;
return s/(2*xpv);
}
double test(double v, double x) {
double dv = 1.0;
double df = __enzyme_fwddiff((void*) besselk_power_series, enzyme_dup, v, dv, enzyme_const, x);
return df;
}
int main(int argc, char** argv) {
printf("%1.16e\n", test(1.02, 1.51));
return 0;
}
Which I compile with
clang mwe.c -fplugin=/usr/lib/ClangEnzyme-18.so -O0 -Rpass=.* -lm -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -Wall -pedantic -o mwe
on linux with clang 18. When the compiled code actually hits the rule, ./mwe will print a handful of Hi! lines before giving the solution.
I have the -Rpass=.* flag to see all the compiler optimizations that get done, and depending on a few small tweaks I sometimes get something like
libmatern.c:[...]: remark: Cannot use provided custom derivative pass [-Rpass=enzyme]
for ipabsterm. But I'm really having trouble figuring out what compiler optimization is breaking things. As you can see with the attribute I've put for that function, I was suspicious that the function was getting inlined and the loop vectorized, which maybe was breaking things. But nothing I have tried has fixed it.
Any thoughts or suggestions you have would be greatly appreciated! Thanks so much in advance.
My usual preface: apologies if I have missed relevant docs or existing/past issues on this.
I have some C code that uses a custom forward-mode rule. Compiled with
-O0, that rule gets triggered. But if I compile with any higher optimization level, the custom rule does not get triggered (which I investigate using a simple print statement). Here is an MWE:Which I compile with
clang mwe.c -fplugin=/usr/lib/ClangEnzyme-18.so -O0 -Rpass=.* -lm -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -Wall -pedantic -o mweon linux with
clang 18. When the compiled code actually hits the rule,./mwewill print a handful ofHi!lines before giving the solution.I have the
-Rpass=.*flag to see all the compiler optimizations that get done, and depending on a few small tweaks I sometimes get something likefor
ipabsterm. But I'm really having trouble figuring out what compiler optimization is breaking things. As you can see with the attribute I've put for that function, I was suspicious that the function was getting inlined and the loop vectorized, which maybe was breaking things. But nothing I have tried has fixed it.Any thoughts or suggestions you have would be greatly appreciated! Thanks so much in advance.