Skip to content

[tests] Automate Jupyter notebook tests using nbmake. #454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ examples-pip-install:
cd examples && python setup.py install

examples-test: examples-pip-install
cd examples && pytest --no-success-flaky-report --benchmark-disable -n auto --durations=5 . --cov=compiler_gym --cov-report=xml:$(COV_REPORT) $(PYTEST_ARGS)
cd examples && pytest --nbmake --no-success-flaky-report --benchmark-disable -n auto --durations=5 . --cov=compiler_gym --cov-report=xml:$(COV_REPORT) $(PYTEST_ARGS)

# Note we export $CI=1 so that the tests always run as if within the CI
# environement. This is to ensure that the reported coverage matches that of
Expand Down
13 changes: 10 additions & 3 deletions examples/getting-started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
"id": "AidRbcu8Pwxh"
},
"source": [
"<a href=\"https://colab.research.google.com/github/facebookresearch/CompilerGym/blob/stable/examples/getting-started.ipynb\">\n",
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open in Colab\" height=\"20\">\n",
"</a>\n",
"\n",
"# CompilerGym Getting Started\n",
"\n",
"CompilerGym is a toolkit for applying reinforcement learning to compiler optimization tasks. This document provides a short walkthrough of the key concepts, using the codesize reduction task of a production-grade compiler as an example. It will take about 20 minutes to work through. Lets get started!"
Expand Down Expand Up @@ -535,10 +539,13 @@
"name": "CompilerGym Getting Started.ipynb",
"provenance": []
},
"execution": {
"timeout": 900
},
"kernelspec": {
"display_name": "Python (compiler_gym)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "compiler_gym"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -550,7 +557,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.8.11"
}
},
"nbformat": 4,
Expand Down
5 changes: 2 additions & 3 deletions examples/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ dgl==0.6.1
geneticalgorithm>=1.0.2
hydra-core==1.1.0
keras==2.6.0
matplotlib>=3.3.0
matplotlib>=3.5.0
nevergrad>=0.4.3
numpy~=1.19.2 # Pin version for tensorflow.
opentuner>=0.8.5
pandas>=1.1.5
ray[default,rllib]==1.8.0
ray[default,rllib]==1.9.0
submitit>=1.2.0
submitit>=1.2.0
tensorflow==2.6.1
Expand Down
98 changes: 58 additions & 40 deletions examples/rllib.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
"id": "gsrdt9HooN9K"
},
"source": [
"<a href=\"https://colab.research.google.com/github/facebookresearch/CompilerGym/blob/stable/examples/rllib.ipynb\">\n",
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open in Colab\" height=\"20\">\n",
"</a>\n",
"\n",
"# Using CompilerGym environments with RLlib\n",
"\n",
"In this notebook we will use [RLlib](https://docs.ray.io/en/master/rllib.html) to train an agent for CompilerGym's [LLVM environment](https://facebookresearch.github.io/CompilerGym/llvm/index.html). RLlib is a popular library for scalable reinforcement learning, built on [Ray](https://docs.ray.io/en/master/index.html). It provides distributed implementations of several standard reinforcement learning algorithms.\n",
Expand Down Expand Up @@ -178,17 +182,17 @@
"from itertools import islice\n",
"\n",
"with make_env() as env:\n",
" # The two datasets we will be using:\n",
" npb = env.datasets[\"npb-v0\"]\n",
" chstone = env.datasets[\"chstone-v0\"]\n",
" # The two datasets we will be using:\n",
" npb = env.datasets[\"npb-v0\"]\n",
" chstone = env.datasets[\"chstone-v0\"]\n",
"\n",
" # Each dataset has a `benchmarks()` method that returns an iterator over the\n",
" # benchmarks within the dataset. Here we will use iterator sliceing to grab a \n",
" # handful of benchmarks for training and validation.\n",
" train_benchmarks = list(islice(npb.benchmarks(), 55))\n",
" train_benchmarks, val_benchmarks = train_benchmarks[:50], train_benchmarks[50:]\n",
" # We will use the entire chstone-v0 dataset for testing.\n",
" test_benchmarks = list(chstone.benchmarks())\n",
" # Each dataset has a `benchmarks()` method that returns an iterator over the\n",
" # benchmarks within the dataset. Here we will use iterator sliceing to grab a \n",
" # handful of benchmarks for training and validation.\n",
" train_benchmarks = list(islice(npb.benchmarks(), 55))\n",
" train_benchmarks, val_benchmarks = train_benchmarks[:50], train_benchmarks[50:]\n",
" # We will use the entire chstone-v0 dataset for testing.\n",
" test_benchmarks = list(chstone.benchmarks())\n",
"\n",
"print(\"Number of benchmarks for training:\", len(train_benchmarks))\n",
"print(\"Number of benchmarks for validation:\", len(val_benchmarks))\n",
Expand Down Expand Up @@ -217,11 +221,11 @@
"from compiler_gym.wrappers import CycleOverBenchmarks\n",
"\n",
"def make_training_env(*args) -> compiler_gym.envs.CompilerEnv:\n",
" \"\"\"Make a reinforcement learning environment that cycles over the\n",
" set of training benchmarks in use.\n",
" \"\"\"\n",
" del args # Unused env_config argument passed by ray\n",
" return CycleOverBenchmarks(make_env(), train_benchmarks)\n",
" \"\"\"Make a reinforcement learning environment that cycles over the\n",
" set of training benchmarks in use.\n",
" \"\"\"\n",
" del args # Unused env_config argument passed by ray\n",
" return CycleOverBenchmarks(make_env(), train_benchmarks)\n",
"\n",
"tune.register_env(\"compiler_gym\", make_training_env)"
]
Expand All @@ -241,12 +245,12 @@
"# Lets cycle through a few calls to reset() to demonstrate that this environment\n",
"# selects a new benchmark for each episode.\n",
"with make_training_env() as env:\n",
" env.reset()\n",
" print(env.benchmark)\n",
" env.reset()\n",
" print(env.benchmark)\n",
" env.reset()\n",
" print(env.benchmark)"
" env.reset()\n",
" print(env.benchmark)\n",
" env.reset()\n",
" print(env.benchmark)\n",
" env.reset()\n",
" print(env.benchmark)"
]
},
{
Expand Down Expand Up @@ -278,7 +282,7 @@
"\n",
"# (Re)Start the ray runtime.\n",
"if ray.is_initialized():\n",
" ray.shutdown()\n",
" ray.shutdown()\n",
"ray.init(include_dashboard=False, ignore_reinit_error=True)\n",
"\n",
"tune.register_env(\"compiler_gym\", make_training_env)\n",
Expand Down Expand Up @@ -366,18 +370,18 @@
"# performance on a set of benchmarks.\n",
"\n",
"def run_agent_on_benchmarks(benchmarks):\n",
" \"\"\"Run agent on a list of benchmarks and return a list of cumulative rewards.\"\"\"\n",
" with make_env() as env:\n",
" \"\"\"Run agent on a list of benchmarks and return a list of cumulative rewards.\"\"\"\n",
" rewards = []\n",
" for i, benchmark in enumerate(benchmarks, start=1):\n",
" observation, done = env.reset(benchmark=benchmark), False\n",
" while not done:\n",
" action = agent.compute_action(observation)\n",
" observation, _, done, _ = env.step(action)\n",
" rewards.append(env.episode_reward)\n",
" print(f\"[{i}/{len(benchmarks)}] {env.state}\")\n",
" with make_env() as env:\n",
" for i, benchmark in enumerate(benchmarks, start=1):\n",
" observation, done = env.reset(benchmark=benchmark), False\n",
" while not done:\n",
" action = agent.compute_action(observation)\n",
" observation, _, done, _ = env.step(action)\n",
" rewards.append(env.episode_reward)\n",
" print(f\"[{i}/{len(benchmarks)}] {env.state}\")\n",
"\n",
" return rewards\n",
" return rewards\n",
"\n",
"# Evaluate agent performance on the validation set.\n",
"val_rewards = run_agent_on_benchmarks(val_benchmarks)"
Expand Down Expand Up @@ -413,14 +417,15 @@
"outputs": [],
"source": [
"# Finally lets plot our results to see how we did!\n",
"%matplotlib inline\n",
"from matplotlib import pyplot as plt\n",
"\n",
"def plot_results(x, y, name, ax):\n",
" plt.sca(ax)\n",
" plt.bar(range(len(y)), y)\n",
" plt.ylabel(\"Reward (higher is better)\")\n",
" plt.xticks(range(len(x)), x, rotation = 90)\n",
" plt.title(f\"Performance on {name} set\")\n",
" plt.sca(ax)\n",
" plt.bar(range(len(y)), y)\n",
" plt.ylabel(\"Reward (higher is better)\")\n",
" plt.xticks(range(len(x)), x, rotation = 90)\n",
" plt.title(f\"Performance on {name} set\")\n",
"\n",
"fig, (ax1, ax2) = plt.subplots(1, 2)\n",
"fig.set_size_inches(13, 3)\n",
Expand All @@ -446,14 +451,27 @@
"provenance": [],
"toc_visible": true
},
"execution": {
"timeout": 900
},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 1
}
1 change: 1 addition & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
flaky==3.7.0
nbmake==0.10
psutil==5.8.0 # Implicit dependency of pytest-xdist
pytest==6.2.5
pytest-benchmark==3.4.1
Expand Down