Skip to content

Commit

Permalink
cleanup & updates
Browse files Browse the repository at this point in the history
  • Loading branch information
gcie committed Jun 9, 2021
1 parent ad2d578 commit 859b01e
Show file tree
Hide file tree
Showing 171 changed files with 2,532 additions and 660,955 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**/__pycache__
models
tmp
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}
2 changes: 2 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{
}
5 changes: 5 additions & 0 deletions LibriSpeech-Transcriptions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SIMI experiments

## Setup

GitHub does not allow to push files larger than 100MB, so after cloning you need to run script `prepare.py` from folder `LibriSpeech-Transcriptions`.
1 change: 0 additions & 1 deletion LibriSpeech-Transcriptions/conf/m1.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
name: 'm1'
extend_length:
- 3
- 4
Expand Down
12 changes: 12 additions & 0 deletions LibriSpeech-Transcriptions/conf/m2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
extend_length:
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
replace_prob: 0.1
replace_length:
- 1
12 changes: 12 additions & 0 deletions LibriSpeech-Transcriptions/conf/m3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
extend_length:
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
replace_prob: 0.0
replace_length:
- 0
2,703 changes: 0 additions & 2,703 deletions LibriSpeech-Transcriptions/dev-clean.txt

This file was deleted.

2,864 changes: 0 additions & 2,864 deletions LibriSpeech-Transcriptions/dev-other.txt

This file was deleted.

31 changes: 17 additions & 14 deletions LibriSpeech-Transcriptions/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import random
import os

random.seed(290956)

DATASETS = ['dev-clean', 'dev-other', 'test-clean', 'test-other', 'train-clean-100', 'train-clean-360', 'train-other-500']

Expand Down Expand Up @@ -39,6 +38,7 @@ def load_dataset(name):
return dataset, dictionary

def process(config_file_name):
random.seed(290956)
config = None
with open(Path('conf') / config_file_name) as config_file:
config = yaml.full_load(config_file)
Expand All @@ -51,29 +51,32 @@ def process(config_file_name):
replace_test = lambda: random.uniform(0, 1) < config['replace_prob']
replace_length = lambda: random.choice(config['replace_length'])

print("Preparing train-full-960...", flush=True)
if not os.path.exists('train-full-960.txt'):
print("Preparing train-full-960...", flush=True)

with open('train-full-960.txt', 'w', encoding='utf8') as out:
for name in ['train-clean-100', 'train-clean-360', 'train-other-500']:
for line in open(name + '.txt', 'r', encoding='utf8'):
out.write(line)
with open('train-full-960.txt', 'w', encoding='utf8') as out:
for name in ['train-clean-100', 'train-clean-360', 'train-other-500']:
for line in open(name + '.txt', 'r', encoding='utf8'):
out.write(line)

print("Preparing dataset modifications", flush=True)
for dataset_name in DATASETS:
dataset, dictionary = load_dataset(dataset_name)
out_file = open(outPath / (dataset_name + '.txt'), 'w', encoding='utf8')
print(f"Processing dataset {dataset_name}", flush=True)
modify(dataset, dictionary, extend_range, replace_test, replace_length, out_file)
if not os.path.exists(outPath / (dataset_name + '.txt')):
print(f"Processing dataset {dataset_name}", flush=True)
with open(outPath / (dataset_name + '.txt'), 'w', encoding='utf8') as out_file:
modify(dataset, dictionary, extend_range, replace_test, replace_length, out_file)


with open(outPath / 'train-full-960.txt', 'w', encoding='utf8') as out:
for name in ['train-clean-100', 'train-clean-360', 'train-other-500']:
for line in open(outPath / (name + '.txt'), 'r', encoding='utf8'):
out.write(line)
if not os.path.exists(outPath / 'train-full-960.txt'):
with open(outPath / 'train-full-960.txt', 'w', encoding='utf8') as out:
for name in ['train-clean-100', 'train-clean-360', 'train-other-500']:
for line in open(outPath / (name + '.txt'), 'r', encoding='utf8'):
out.write(line)

def main():
for file in os.listdir('./conf'):
if file.endswith('.yaml'):
print(f"Processing {file}", flush=True)
process(file)

if __name__ == '__main__':
Expand Down
2,620 changes: 0 additions & 2,620 deletions LibriSpeech-Transcriptions/test-clean.txt

This file was deleted.

2,938 changes: 0 additions & 2,938 deletions LibriSpeech-Transcriptions/test-other.txt

This file was deleted.

28,535 changes: 0 additions & 28,535 deletions LibriSpeech-Transcriptions/train-clean-100.txt

This file was deleted.

103,975 changes: 0 additions & 103,975 deletions LibriSpeech-Transcriptions/train-clean-360.txt

This file was deleted.

275,155 changes: 0 additions & 275,155 deletions LibriSpeech-Transcriptions/train-other-500.txt

This file was deleted.

Empty file added __init__.py
Empty file.
3 changes: 3 additions & 0 deletions env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

ROOTPATH = '/pio/scratch/1/i290956/zs2021/simi'
TRANSCRIPTIONS_DIR = 'LibriSpeech-Transcriptions'
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 859b01e

Please sign in to comment.