-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
87 lines (66 loc) · 2.42 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydub import AudioSegment
import numpy as np
from PIL import Image
from PIL.PngImagePlugin import PngInfo
import io
import base64
import json
app = FastAPI()
origins = ["http://localhost:3000", "http://localhost:5173"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def convert_audio_to_image(file: UploadFile):
# Load audio file from bytes
audio = AudioSegment.from_file(io.BytesIO(file.file.read()), format="mp3")
# Convert to raw data
raw_data = np.array(audio.get_array_of_samples()).astype(np.float64)
# Get metadata
sample_rate = audio.frame_rate
channels = audio.channels
# Normalize audio data to range 0-255
normalized_data = (
(raw_data - raw_data.min()) / (raw_data.max() - raw_data.min()) * 255
).astype(np.uint8)
# Calculate the side length for the square image
side_len = int(np.ceil(np.sqrt(len(normalized_data))))
# Pad the data with zeros to make the length a perfect square
padded_data = np.concatenate(
(normalized_data, np.zeros(side_len**2 - len(normalized_data), dtype=np.uint8))
)
# Reshape the padded data into a square image
image_data = padded_data.reshape((side_len, side_len))
# Save as image
image = Image.fromarray(image_data, mode="L")
buffered = io.BytesIO()
# Create metadata
metadata = {
"sample_rate": sample_rate,
"channels": channels,
"original_shape": raw_data.shape, # Save the original shape of the raw data
"padded_shape": padded_data.shape, # Save the shape after padding
"max": raw_data.max(),
"min": raw_data.min(),
}
metadata_str = json.dumps(metadata)
# Embed metadata in the image's info dictionary
png_info = PngInfo()
png_info.add_text("metadata", metadata_str)
image.save(buffered, format="PNG", pnginfo=png_info)
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
return img_str
@app.post("/upload")
async def upload(file: UploadFile = File(...)):
image = convert_audio_to_image(file)
response = {"image": image}
return JSONResponse(content=response)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)