Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pufferlib/config/ocean/bitflip.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[base]
package = ocean
env_name = puffer_bitflip
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 4096

[train]
total_timesteps = 20_000_000
gamma = 0.95
learning_rate = 0.05
minibatch_size = 32768
17 changes: 17 additions & 0 deletions pufferlib/ocean/bitflip/binding.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "bitflip.h"

#define Env BitFlip
#include "../env_binding.h"

static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->size = unpack(kwargs, "size");
return 0;
}

static int my_log(PyObject *dict, Log *log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
return 0;
}
33 changes: 33 additions & 0 deletions pufferlib/ocean/bitflip/bitflip.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "bitflip.h"

int main() {
BitFlip env = {.size = 5};
env.observations =
(unsigned char *)calloc(env.size * 3, sizeof(unsigned char));
env.actions = (int *)calloc(1, sizeof(int));
env.rewards = (float *)calloc(1, sizeof(float));
env.terminals = (unsigned char *)calloc(1, sizeof(unsigned char));

c_reset(&env);
c_render(&env);
while (!WindowShouldClose()) {
if (IsKeyDown(KEY_LEFT_SHIFT)) {
env.actions[0] = NOOP;
if (IsKeyDown(KEY_LEFT))
env.actions[0] = LEFT;
if (IsKeyDown(KEY_RIGHT))
env.actions[0] = RIGHT;
if (IsKeyDown(KEY_UP))
env.actions[0] = FLIP;
} else {
env.actions[0] = rand() % 4;
}
c_step(&env);
c_render(&env);
}
free(env.observations);
free(env.actions);
free(env.rewards);
free(env.terminals);
c_close(&env);
}
152 changes: 152 additions & 0 deletions pufferlib/ocean/bitflip/bitflip.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#include "raylib.h"
#include <stdlib.h>
#include <string.h>

const Color PUFF_RED = (Color){187, 0, 0, 255};
const Color PUFF_CYAN = (Color){0, 187, 187, 255};
const Color PUFF_WHITE = (Color){241, 241, 241, 241};
const Color PUFF_BACKGROUND = (Color){6, 24, 24, 255};

const unsigned char NOOP = 0;
const unsigned char LEFT = 1;
const unsigned char RIGHT = 2;
const unsigned char FLIP = 3;

const unsigned char OFF = 0;
const unsigned char ON = 1;
const unsigned char EMPTY = 0;
const unsigned char CURSOR = 3;

typedef struct {
float perf; // Recommended 0-1 normalized single real number perf metric
float score; // Recommended unnormalized single real number perf metric
float episode_return; // Recommended metric: sum of agent rewards over episode
float episode_length; // Recommended metric: number of steps of agent episode
// Any extra fields you add here may be exported to Python in binding.c
float n; // Required as the last field
} Log;

typedef struct {
Log log;
unsigned char *observations;
int *actions;
float *rewards;
unsigned char *terminals;
int size;
int pos;
int n_correct;
int tick;
} BitFlip;

void add_log(BitFlip *env) {
env->log.perf += (env->rewards[0] > 0) ? 1 : 0;
env->log.score += env->rewards[0];
env->log.episode_length += env->tick;
env->log.episode_return += env->rewards[0];
env->log.n++;
}

void c_reset(BitFlip *env) {
memset(env->observations, OFF, env->size * 3 * sizeof(char));
env->n_correct = 0;
env->observations[0] = ON;
for (int i = 1; i < env->size; i++) {
env->observations[i] = (rand() % 2 == 1) ? ON : OFF;

// Track how many are correct to begin with
if (env->observations[i] == OFF) {
env->n_correct++;
}
}
env->pos = 2 * env->size + (env->size - 1) / 2;
env->observations[env->pos] = CURSOR;
env->tick = 0;
}

void c_step(BitFlip *env) {
env->tick += 1;

int action = env->actions[0];
env->terminals[0] = 0;
env->rewards[0] = 0.0;

env->observations[env->pos] = EMPTY;

if (action == LEFT) {
env->pos -= 1;
} else if (action == RIGHT) {
env->pos += 1;
}

if (env->tick == 12 * env->size || env->pos < 2 * env->size ||
env->pos >= env->size * 3) {
env->terminals[0] = 1;
env->rewards[0] = -1.0;
add_log(env);
c_reset(env);
return;
}

env->observations[env->pos] = CURSOR;

int state_idx = env->pos - env->size;
int target_idx = env->pos - 2 * env->size;

if (action == FLIP) {
env->observations[state_idx] ^= 1;

if (env->observations[state_idx] == env->observations[target_idx]) {
env->n_correct += 1;
} else {
env->n_correct -= 1;
}
}

if (env->n_correct == env->size) {
env->rewards[0] = 1.0;
env->terminals[0] = 1;
add_log(env);
c_reset(env);
return;
}
}

void c_render(BitFlip *env) {
int px = 64;

if (!IsWindowReady()) {
InitWindow(px * env->size, px * 3, "PufferLib BitFlip");
SetTargetFPS(5);
}

if (IsKeyDown(KEY_ESCAPE)) {
exit(0);
}

BeginDrawing();
ClearBackground(PUFF_BACKGROUND);

for (int i = 0; i < env->size * 2; i++) {
int tex = env->observations[i];
if (tex == OFF) {
continue;
}
DrawRectangle((i % env->size) * px, (i / env->size) * px, px, px,
PUFF_CYAN);
}
for (int i = env->size * 2; i < env->size * 3; i++) {
int tex = env->observations[i];
if (tex == EMPTY) {
continue;
}
DrawRectangle((i % env->size) * px, (i / env->size) * px, px, px, PUFF_RED);
}

EndDrawing();
}

void c_close(BitFlip *env) {
if (IsWindowReady()) {
CloseWindow();
}
}
77 changes: 77 additions & 0 deletions pufferlib/ocean/bitflip/bitflip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""A simple sample environment. Use this as a template for your own envs."""

import gymnasium
import numpy as np

import pufferlib
from pufferlib.ocean.bitflip import binding


class BitFlip(pufferlib.PufferEnv):
def __init__(
self, num_envs=1, render_mode=None, log_interval=128, size=5, buf=None, seed=0
):
self.single_observation_space = gymnasium.spaces.Box(
low=0, high=1, shape=(size * 3,), dtype=np.uint8
)
self.single_action_space = gymnasium.spaces.Discrete(4)
self.render_mode = render_mode
self.num_agents = num_envs
self.log_interval = log_interval

super().__init__(buf)
self.c_envs = binding.vec_init(
self.observations,
self.actions,
self.rewards,
self.terminals,
self.truncations,
num_envs,
seed,
size=size,
)

def reset(self, seed=0):
binding.vec_reset(self.c_envs, seed)
self.tick = 0
return self.observations, []

def step(self, actions):
self.tick += 1

self.actions[:] = actions
binding.vec_step(self.c_envs)

info = []
if self.tick % self.log_interval == 0:
info.append(binding.vec_log(self.c_envs))

return (self.observations, self.rewards, self.terminals, self.truncations, info)

def render(self):
binding.vec_render(self.c_envs, 0)

def close(self):
binding.vec_close(self.c_envs)


if __name__ == "__main__":
N = 4096

env = BitFlip(num_envs=N)
env.reset()
steps = 0

CACHE = 1024
actions = np.random.randint(0, 4, (CACHE, N))

i = 0
import time

start = time.time()
while time.time() - start < 10:
env.step(actions[i % CACHE])
steps += N
i += 1

print("BitFlip SPS:", int(steps / (time.time() - start)))
1 change: 1 addition & 0 deletions pufferlib/ocean/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def make_multiagent(buf=None, **kwargs):

MAKE_FUNCTIONS = {
'battle': 'Battle',
'bitflip': 'BitFlip',
'breakout': 'Breakout',
'blastar': 'Blastar',
'convert': 'Convert',
Expand Down