Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions generate_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import csv
from pathlib import Path

import yaml

from CPDShell.generator.generator import ScipyDatasetGenerator
from CPDShell.generator.saver import DatasetSaver

SAMPLE_SIZE = 500
CP_LOCATION = 250

NUM_OF_SAMPLES = 1000

DIR_PATH = "/experiment/stage_1/"
CONFIG_NAME = "config.yml"

WORKING_DIR = Path()


class VerboseSafeDumper(yaml.SafeDumper):
def ignore_aliases(self, data):
return True


def distribution_template(distribution_type, length, parameters):
return {"type": distribution_type, "length": length, "parameters": parameters}


def add_distribution(distributions, distribution_type, parameters, length=CP_LOCATION):
distributions[distribution_type] = distribution_template(distribution_type, length, parameters)


distributions_left = {}
add_distribution(distributions_left, "normal", {"mean": 0.0, "variance": 1.0})
add_distribution(distributions_left, "exponential", {"rate": 1.0})
add_distribution(distributions_left, "uniform", {"min": 0.0, "max": 1.0})
add_distribution(distributions_left, "weibull", {"shape": 1.0, "scale": 0.5})
add_distribution(distributions_left, "beta", {"alpha": 0.5, "beta": 0.5})

distributions_right = {}
add_distribution(distributions_right, "normal", {"mean": 10.0, "variance": 5.0})
add_distribution(distributions_right, "exponential", {"rate": 5.0})
add_distribution(distributions_right, "uniform", {"min": 1.0, "max": 4.0})
add_distribution(distributions_right, "weibull", {"shape": 1.0, "scale": 5.0})
add_distribution(distributions_right, "beta", {"alpha": 5.0, "beta": 5.0})

distributions_no_cp = {}
add_distribution(distributions_no_cp, "normal", {"mean": 0.0, "variance": 1.0}, SAMPLE_SIZE)
add_distribution(distributions_no_cp, "exponential", {"rate": 1.0}, SAMPLE_SIZE)
add_distribution(distributions_no_cp, "uniform", {"min": 0.0, "max": 1.0}, SAMPLE_SIZE)
add_distribution(distributions_no_cp, "weibull", {"shape": 1.0, "scale": 0.5}, SAMPLE_SIZE)
add_distribution(distributions_no_cp, "beta", {"alpha": 0.5, "beta": 0.5}, SAMPLE_SIZE)


def generate_configs(distributions):
distribution_types = []
distributions_l, distributions_r, distributions_without_cp = distributions
for distribution in distributions_l:
distribution_types.append(distribution)

names = []
for distribution_type_l in distribution_types:
for distribution_type_r in distribution_types:
name = distribution_type_l + "-" + distribution_type_r
names.append(name)

config = [
{
"name": name,
"distributions": [distributions_l[distribution_type_l], distributions_r[distribution_type_r]],
}
]

Path(WORKING_DIR / f"experiment/stage_1/{name}/").mkdir(parents=True, exist_ok=True)

with open(WORKING_DIR / f"experiment/stage_1/{name}/config.yaml", "w") as outfile:
yaml.dump(config, outfile, default_flow_style=False, sort_keys=False, Dumper=VerboseSafeDumper)

for name in distribution_types:
names.append(name)

config = [
{
"name": name,
"distributions": [
distributions_without_cp[name],
],
}
]

Path(WORKING_DIR / f"experiment/stage_1/{name}/").mkdir(parents=True, exist_ok=True)

with open(WORKING_DIR / f"experiment/stage_1/{name}/config.yaml", "w") as outfile:
yaml.dump(config, outfile, default_flow_style=False, sort_keys=False, Dumper=VerboseSafeDumper)

return names


Path(WORKING_DIR / "experiment/stage_1/").mkdir(parents=True, exist_ok=True)
generated_names = generate_configs((distributions_left, distributions_right, distributions_no_cp))

with open(WORKING_DIR / "experiment/stage_1/experiment_description", "w", newline="") as f:
write = csv.writer(f)

write.writerow(["name", "samples_num"])
samples_description = [[s, str(NUM_OF_SAMPLES)] for s in generated_names]
write.writerows(samples_description)

for generated_name in generated_names:
Path(WORKING_DIR / f"experiment/stage_1/{generated_name}/").mkdir(parents=True, exist_ok=True)

for sample_num in range(NUM_OF_SAMPLES):
print(f"Name: {generated_name}. Sample num: {sample_num}")
Path(WORKING_DIR / f"experiment/stage_1/{generated_name}/sample_{sample_num}/").mkdir(
parents=True, exist_ok=True
)
saver = DatasetSaver(WORKING_DIR / f"experiment/stage_1/{generated_name}/sample_{sample_num}/", True)
generated = ScipyDatasetGenerator().generate_datasets(
Path(WORKING_DIR / f"experiment/stage_1/{generated_name}/config.yaml"), saver
)

Path(
WORKING_DIR / f"experiment/stage_1/{generated_name}/sample_{sample_num}/{generated_name}/sample.png"
).unlink(missing_ok=True)
Loading