-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplay_image.py
64 lines (48 loc) · 1.84 KB
/
play_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from pydub import AudioSegment
import numpy as np
from PIL import Image
import sounddevice as sd
from playsound import playsound
import json
import argparse
import os
def read_metadata_from_image(image_path: str):
image = Image.open(image_path)
metadata_str = image.info.get("metadata")
if metadata_str is None:
print("No metadata found.")
return None
metadata = json.loads(metadata_str)
return metadata
def convert_image_to_audio(image_path: str):
# Load metadata
metadata = read_metadata_from_image(image_path)
# Load image
image = Image.open(image_path)
image_data = np.array(image).astype(np.float32)
# Denormalize data
denormalized_data = (
image_data / 255 * (metadata["max"] - metadata["min"]) + metadata["min"]
)
# Reshape data to original padded shape
denormalized_padded_data = denormalized_data.reshape(metadata["padded_shape"])
# Remove padding
decoded_data = denormalized_padded_data[: metadata["original_shape"][0]]
# Create audio segment from raw data
decoded_audio = AudioSegment(
decoded_data.astype(np.int16).tobytes(),
frame_rate=metadata["sample_rate"],
sample_width=2, # int16 implies 2 bytes per sample
channels=metadata["channels"],
)
# Export decoded audio to file
output_audio_path = os.path.splitext(image_path)[0] + ".mp3"
decoded_audio.export(output_audio_path, format="mp3")
print(f"Audio saved to {output_audio_path}")
playsound(output_audio_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Play audio from a png file")
parser.add_argument("--file", type=str, required=True, help="Path to the .png file")
args = parser.parse_args()
# Convert image back to audio using saved metadata
convert_image_to_audio(image_path=args.file)