The NN with erf function output activation can occassionally output way beyond the boundary [-1,1]:
from jax import random
from neural_tangents import stax
import neural_tangents as nt
import random as rd
init_fn, apply_fn, kernel_fn = stax.serial(
stax.Dense(1),
stax.Relu(),
stax.Dense(1),
stax.Relu(),
stax.Dense(1),
stax.Relu(),
stax.Dense(1),
stax.Erf(),
)
key1, key2 = random.split(random.PRNGKey(777))
x1 = random.normal(key1, (100, 10))
x2 = random.normal(key2, (100, 10))
x_train, x_test = x1, x2
y_train = [rd.choice([-1, 1]) for i in range(100)]
y_train = np.array(y_train)[:, np.newaxis]
predict_fn = nt.predict.gradient_descent_mse_ensemble(kernel_fn, x_train, y_train)
y_test_nngp = predict_fn(x_test=x_test, get="nngp")
print(y_test_nngp.max()) ## 1.6560178
print(y_test_nngp.min()) ## -2.244388