PufferAI · davidhpark · Aug 12, 2025 · Aug 12, 2025
diff --git a/pufferlib/config/ocean/bitflip.ini b/pufferlib/config/ocean/bitflip.ini
@@ -0,0 +1,14 @@
+[base]
+package = ocean
+env_name = puffer_bitflip
+policy_name = Policy
+rnn_name = Recurrent
+
+[env]
+num_envs = 4096
+
+[train]
+total_timesteps = 20_000_000
+gamma = 0.95
+learning_rate = 0.05
+minibatch_size = 32768
diff --git a/pufferlib/ocean/bitflip/binding.c b/pufferlib/ocean/bitflip/binding.c
@@ -0,0 +1,17 @@
+#include "bitflip.h"
+
+#define Env BitFlip
+#include "../env_binding.h"
+
+static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
+  env->size = unpack(kwargs, "size");
+  return 0;
+}
+
+static int my_log(PyObject *dict, Log *log) {
+  assign_to_dict(dict, "perf", log->perf);
+  assign_to_dict(dict, "score", log->score);
+  assign_to_dict(dict, "episode_return", log->episode_return);
+  assign_to_dict(dict, "episode_length", log->episode_length);
+  return 0;
+}
diff --git a/pufferlib/ocean/bitflip/bitflip.c b/pufferlib/ocean/bitflip/bitflip.c
@@ -0,0 +1,33 @@
+#include "bitflip.h"
+
+int main() {
+  BitFlip env = {.size = 5};
+  env.observations =
+      (unsigned char *)calloc(env.size * 3, sizeof(unsigned char));
+  env.actions = (int *)calloc(1, sizeof(int));
+  env.rewards = (float *)calloc(1, sizeof(float));
+  env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char));
+
+  c_reset(&env);
+  c_render(&env);
+  while (!WindowShouldClose()) {
+    if (IsKeyDown(KEY_LEFT_SHIFT)) {
+      env.actions[0] = NOOP;
+      if (IsKeyDown(KEY_LEFT))
+        env.actions[0] = LEFT;
+      if (IsKeyDown(KEY_RIGHT))
+        env.actions[0] = RIGHT;
+      if (IsKeyDown(KEY_UP))
+        env.actions[0] = FLIP;
+    } else {
+      env.actions[0] = rand() % 4;
+    }
+    c_step(&env);
+    c_render(&env);
+  }
+  free(env.observations);
+  free(env.actions);
+  free(env.rewards);
+  free(env.terminals);
+  c_close(&env);
+}
diff --git a/pufferlib/ocean/bitflip/bitflip.h b/pufferlib/ocean/bitflip/bitflip.h
@@ -0,0 +1,152 @@
+#include "raylib.h"
+#include <stdlib.h>
+#include <string.h>
+
+const Color PUFF_RED = (Color){187, 0, 0, 255};
+const Color PUFF_CYAN = (Color){0, 187, 187, 255};
+const Color PUFF_WHITE = (Color){241, 241, 241, 241};
+const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255};
+
+const unsigned char NOOP = 0;
+const unsigned char LEFT = 1;
+const unsigned char RIGHT = 2;
+const unsigned char FLIP = 3;
+
+const unsigned char OFF = 0;
+const unsigned char ON = 1;
+const unsigned char EMPTY = 0;
+const unsigned char CURSOR = 3;
+
+typedef struct {
+  float perf;  // Recommended 0-1 normalized single real number perf metric
+  float score; // Recommended unnormalized single real number perf metric
+  float episode_return; // Recommended metric: sum of agent rewards over episode
+  float episode_length; // Recommended metric: number of steps of agent episode
+  // Any extra fields you add here may be exported to Python in binding.c
+  float n; // Required as the last field
+} Log;
+
+typedef struct {
+  Log log;
+  unsigned char *observations;
+  int *actions;
+  float *rewards;
+  unsigned char *terminals;
+  int size;
+  int pos;
+  int n_correct;
+  int tick;
+} BitFlip;
+
+void add_log(BitFlip *env) {
+  env->log.perf += (env->rewards[0] > 0) ? 1 : 0;
+  env->log.score += env->rewards[0];
+  env->log.episode_length += env->tick;
+  env->log.episode_return += env->rewards[0];
+  env->log.n++;
+}
+
+void c_reset(BitFlip *env) {
+  memset(env->observations, OFF, env->size * 3 * sizeof(char));
+  env->n_correct = 0;
+  env->observations[0] = ON;
+  for (int i = 1; i < env->size; i++) {
+    env->observations[i] = (rand() % 2 == 1) ? ON : OFF;
+
+    // Track how many are correct to begin with
+    if (env->observations[i] == OFF) {
+      env->n_correct++;
+    }
+  }
+  env->pos = 2 * env->size + (env->size - 1) / 2;
+  env->observations[env->pos] = CURSOR;
+  env->tick = 0;
+}
+
+void c_step(BitFlip *env) {
+  env->tick += 1;
+
+  int action = env->actions[0];
+  env->terminals[0] = 0;
+  env->rewards[0] = 0.0;
+
+  env->observations[env->pos] = EMPTY;
+
+  if (action == LEFT) {
+    env->pos -= 1;
+  } else if (action == RIGHT) {
+    env->pos += 1;
+  }
+
+  if (env->tick == 12 * env->size || env->pos < 2 * env->size ||
+      env->pos >= env->size * 3) {
+    env->terminals[0] = 1;
+    env->rewards[0] = -1.0;
+    add_log(env);
+    c_reset(env);
+    return;
+  }
+
+  env->observations[env->pos] = CURSOR;
+
+  int state_idx = env->pos - env->size;
+  int target_idx = env->pos - 2 * env->size;
+
+  if (action == FLIP) {
+    env->observations[state_idx] ^= 1;
+
+    if (env->observations[state_idx] == env->observations[target_idx]) {
+      env->n_correct += 1;
+    } else {
+      env->n_correct -= 1;
+    }
+  }
+
+  if (env->n_correct == env->size) {
+    env->rewards[0] = 1.0;
+    env->terminals[0] = 1;
+    add_log(env);
+    c_reset(env);
+    return;
+  }
+}
+
+void c_render(BitFlip *env) {
+  int px = 64;
+
+  if (!IsWindowReady()) {
+    InitWindow(px * env->size, px * 3, "PufferLib BitFlip");
+    SetTargetFPS(5);
+  }
+
+  if (IsKeyDown(KEY_ESCAPE)) {
+    exit(0);
+  }
+
+  BeginDrawing();
+  ClearBackground(PUFF_BACKGROUND);
+
+  for (int i = 0; i < env->size * 2; i++) {
+    int tex = env->observations[i];
+    if (tex == OFF) {
+      continue;
+    }
+    DrawRectangle((i % env->size) * px, (i / env->size) * px, px, px,
+                  PUFF_CYAN);
+  }
+  for (int i = env->size * 2; i < env->size * 3; i++) {
+    int tex = env->observations[i];
+    if (tex == EMPTY) {
+      continue;
+    }
+    DrawRectangle((i % env->size) * px, (i / env->size) * px, px, px, PUFF_RED);
+  }
+
+  EndDrawing();
+}
+
+void c_close(BitFlip *env) {
+  if (IsWindowReady()) {
+    CloseWindow();
+  }
+}
diff --git a/pufferlib/ocean/bitflip/bitflip.py b/pufferlib/ocean/bitflip/bitflip.py
@@ -0,0 +1,77 @@
+"""A simple sample environment. Use this as a template for your own envs."""
+
+import gymnasium
+import numpy as np
+
+import pufferlib
+from pufferlib.ocean.bitflip import binding
+
+
+class BitFlip(pufferlib.PufferEnv):
+    def __init__(
+        self, num_envs=1, render_mode=None, log_interval=128, size=5, buf=None, seed=0
+    ):
+        self.single_observation_space = gymnasium.spaces.Box(
+            low=0, high=1, shape=(size * 3,), dtype=np.uint8
+        )
+        self.single_action_space = gymnasium.spaces.Discrete(4)
+        self.render_mode = render_mode
+        self.num_agents = num_envs
+        self.log_interval = log_interval
+
+        super().__init__(buf)
+        self.c_envs = binding.vec_init(
+            self.observations,
+            self.actions,
+            self.rewards,
+            self.terminals,
+            self.truncations,
+            num_envs,
+            seed,
+            size=size,
+        )
+
+    def reset(self, seed=0):
+        binding.vec_reset(self.c_envs, seed)
+        self.tick = 0
+        return self.observations, []
+
+    def step(self, actions):
+        self.tick += 1
+
+        self.actions[:] = actions
+        binding.vec_step(self.c_envs)
+
+        info = []
+        if self.tick % self.log_interval == 0:
+            info.append(binding.vec_log(self.c_envs))
+
+        return (self.observations, self.rewards, self.terminals, self.truncations, info)
+
+    def render(self):
+        binding.vec_render(self.c_envs, 0)
+
+    def close(self):
+        binding.vec_close(self.c_envs)
+
+
+if __name__ == "__main__":
+    N = 4096
+
+    env = BitFlip(num_envs=N)
+    env.reset()
+    steps = 0
+
+    CACHE = 1024
+    actions = np.random.randint(0, 4, (CACHE, N))
+
+    i = 0
+    import time
+
+    start = time.time()
+    while time.time() - start < 10:
+        env.step(actions[i % CACHE])
+        steps += N
+        i += 1
+
+    print("BitFlip SPS:", int(steps / (time.time() - start)))
diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py
@@ -118,6 +118,7 @@ def make_multiagent(buf=None, **kwargs):
 
 MAKE_FUNCTIONS = {
     'battle': 'Battle',
+    'bitflip': 'BitFlip',
     'breakout': 'Breakout',
     'blastar': 'Blastar',
     'convert': 'Convert',