Skip to content

Commit b420b94

Browse files
committed
audio capture in unity, code clean up
1 parent 0aff80b commit b420b94

File tree

8 files changed

+107
-89
lines changed

8 files changed

+107
-89
lines changed

Assets/Project/Scripts/Player/Actions/HandInteractions.cs

+73-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
using System.IO.MemoryMappedFiles;
44
using System.IO;
55
using UnityEngine;
6+
using System.IO.Pipes;
7+
using System;
68

79
public class HandInteractions : MonoBehaviour
810
{
@@ -26,10 +28,19 @@ public class HandInteractions : MonoBehaviour
2628
bool CooldownDrink = false;
2729

2830
private string word;
31+
private bool isRecording = false;
32+
33+
private int recordingTime = 2; // Set the recording time in seconds
34+
2935

3036
private AudioSource pickUpItemSound;
3137
private AudioSource putToInventorySound;
3238
private AudioSource drinkSound;
39+
private AudioSource castingSound;
40+
private AudioClip recordedClip;
41+
42+
public float timeToFadeOutPopUp = 1;
43+
public float timeOfFadingOutPopUp = 0.007f;
3344

3445

3546
private void Awake() //get necessary components
@@ -183,14 +194,12 @@ void CastSpell() //cast spell with SpellCasting class
183194
{
184195
if (GameSettings.useSpeech && !PlayerParams.Variables.uiActive) //if using speach then microphone starting to record
185196
{
186-
MemoryMappedFile mmf_word = MemoryMappedFile.OpenExisting("whisper_run");
187-
MemoryMappedViewStream stream_word = mmf_word.CreateViewStream();
188-
BinaryWriter write_word = new BinaryWriter(stream_word);
189-
string noneString = "ok";
190-
byte[] noneBytes = System.Text.Encoding.UTF8.GetBytes(noneString);
191-
write_word.Write(noneBytes, 0, noneBytes.Length);
192-
193-
StartCoroutine(PlayerParams.Controllers.spellCasting.WaitForSpell());
197+
198+
recordedClip = Microphone.Start(null, false, recordingTime, 16000);
199+
200+
// Wait for the specified recording time
201+
StartCoroutine(WaitForRecording());
202+
194203
}
195204
else if (!PlayerParams.Variables.uiActive) //open spells menu if using speech is off
196205
{
@@ -199,6 +208,62 @@ void CastSpell() //cast spell with SpellCasting class
199208
}
200209
}
201210

211+
IEnumerator WaitForRecording()
212+
{
213+
isRecording = true;
214+
215+
// PopUp cast spell
216+
Debug.Log("Whisper listening");
217+
FindObjectOfType<HUD>().SpawnPopUp("", "Cast a Spell.", timeToFadeOutPopUp, timeOfFadingOutPopUp);
218+
castingSound = FindObjectOfType<SoundManager>().CreateAudioSource(SoundManager.Sound.SFX_CastingSpell);
219+
castingSound.Play();
220+
221+
// Wait for the specified recording time
222+
yield return new WaitForSeconds(recordingTime);
223+
224+
Microphone.End(null);
225+
226+
byte[] audioData = ConvertAudioClipToByteArray(recordedClip);
227+
228+
MemoryMappedFile mmf_audio = MemoryMappedFile.OpenExisting("magehand_whisper_audio");
229+
MemoryMappedViewStream stream_audio= mmf_audio.CreateViewStream();
230+
BinaryWriter write_audio= new BinaryWriter(stream_audio);
231+
232+
write_audio.Write(audioData, 0, audioData.Length);
233+
234+
PlayerParams.Controllers.spellCasting.WriteToMemoryMappedFile("magehand_whisper_run", "ok");
235+
236+
StartCoroutine(PlayerParams.Controllers.spellCasting.WaitForSpell());
237+
238+
isRecording = false;
239+
}
240+
241+
byte[] ConvertAudioClipToByteArray(AudioClip clip)
242+
{
243+
var samples = new float[clip.samples];
244+
245+
clip.GetData(samples, 0);
246+
247+
Int16[] intData = new Int16[samples.Length];
248+
//converting in 2 float[] steps to Int16[], //then Int16[] to Byte[]
249+
250+
Byte[] bytesData = new Byte[samples.Length * 2];
251+
//bytesData array is twice the size of
252+
//dataSource array because a float converted in Int16 is 2 bytes.
253+
254+
float rescaleFactor = 32767; //to convert float to Int16
255+
256+
for (int i = 0; i < samples.Length; i++)
257+
{
258+
intData[i] = (short)(samples[i] * rescaleFactor);
259+
Byte[] byteArr = new Byte[2];
260+
byteArr = BitConverter.GetBytes(intData[i]);
261+
byteArr.CopyTo(bytesData, i * 2);
262+
}
263+
264+
return bytesData;
265+
}
266+
202267
void PutDownObject() //put object down to inventory or if in hand is spell then some special interaction
203268
{
204269
CooldownPutDown = true;

Assets/Project/Scripts/Player/Actions/SpellCasting.cs

+30-34
Original file line numberDiff line numberDiff line change
@@ -313,64 +313,60 @@ static string NormalizeTranscribedTextToDisplay(string input)
313313
return cleanedString;
314314
}
315315

316+
public void WriteToMemoryMappedFile(string mapName, string data)
317+
{
318+
using (MemoryMappedFile mmf = MemoryMappedFile.OpenExisting(mapName))
319+
using (MemoryMappedViewStream stream = mmf.CreateViewStream())
320+
using (BinaryWriter writer = new BinaryWriter(stream))
321+
{
322+
byte[] bytes = System.Text.Encoding.UTF8.GetBytes(data);
323+
writer.Write(bytes, 0, bytes.Length);
324+
}
325+
}
326+
327+
public void ReadFromMemoryMappedFile(string mapName, int bytesNumber, out byte[] frame)
328+
{
329+
using (MemoryMappedFile mmf = MemoryMappedFile.OpenExisting(mapName))
330+
using (MemoryMappedViewStream stream = mmf.CreateViewStream())
331+
using (BinaryReader reader = new BinaryReader(stream))
332+
{
333+
frame = reader.ReadBytes(bytesNumber);
334+
}
335+
}
336+
316337
public IEnumerator WaitForSpell()
317338
{
318339
if (isListening)
319340
{
320-
Debug.Log("Whisper is currently listening - preventing new cast");
341+
Debug.Log("Whisper is currently transcrabing - preventing new cast");
321342
yield break;
322343
}
323344

324-
Debug.Log("Whisper listening");
325-
FindObjectOfType<HUD>().SpawnPopUp("", "Cast a Spell.", timeToFadeOutPopUp, timeOfFadingOutPopUp);
326-
castingSound = FindObjectOfType<SoundManager>().CreateAudioSource(SoundManager.Sound.SFX_CastingSpell);
327-
castingSound.Play();
328-
329345
isListening = true;
330346

331-
MemoryMappedFile mmf_delete = MemoryMappedFile.OpenExisting("whisper");
332-
MemoryMappedViewStream stream_delete = mmf_delete.CreateViewStream();
333-
BinaryWriter write_delete = new BinaryWriter(stream_delete);
334-
335-
string noneString = "None";
336-
byte[] noneBytes = System.Text.Encoding.UTF8.GetBytes(noneString);
337-
write_delete.Write(noneBytes, 0, noneBytes.Length);
347+
WriteToMemoryMappedFile("magehand_whisper_text", "None");
338348

339-
string word = "None";
340349
string okString = "ok";
341350

342351
while (okString == "ok")
343352
{
344-
MemoryMappedFile mmf_gesture = MemoryMappedFile.OpenExisting("whisper_run");
345-
MemoryMappedViewStream stream_gesture = mmf_gesture.CreateViewStream();
346-
BinaryReader reader_gesture = new BinaryReader(stream_gesture);
347-
byte[] frameGesture = reader_gesture.ReadBytes(2);
348-
353+
byte[] frameGesture;
354+
ReadFromMemoryMappedFile("magehand_whisper_run", 2, out frameGesture);
349355
okString = System.Text.Encoding.UTF8.GetString(frameGesture, 0, 2);
350356
//Debug.Log(word);
351357
yield return new WaitForFixedUpdate();
352358
}
353359

354-
MemoryMappedFile mmf_word = MemoryMappedFile.OpenExisting("whisper");
355-
MemoryMappedViewStream stream_word = mmf_word.CreateViewStream();
356-
BinaryReader read_word = new BinaryReader(stream_word);
357-
358-
byte[] frame = read_word.ReadBytes(10);
360+
byte[] frameWord;
361+
ReadFromMemoryMappedFile("magehand_whisper_text", 10, out frameWord);
359362

360-
word = System.Text.Encoding.UTF8.GetString(frame, 0, 10).Split(";")[0];
363+
string word = System.Text.Encoding.UTF8.GetString(frameWord).Split(";")[0];
361364
Debug.Log("Whisper transcribed word: " + word);
365+
362366
FindObjectOfType<HUD>().SpawnPopUp("", "Casting word:<br>" + word, timeToFadeOutPopUp, timeOfFadingOutPopUp, false);
363367
Destroy(castingSound);
364368

365-
366-
MemoryMappedFile mmf_run = MemoryMappedFile.OpenExisting("whisper_run");
367-
MemoryMappedViewStream stream_run = mmf_run.CreateViewStream();
368-
BinaryWriter write_run = new BinaryWriter(stream_run);
369-
370-
string runString = "no";
371-
byte[] runBytes = System.Text.Encoding.UTF8.GetBytes(runString);
372-
write_run.Write(runBytes, 0, runBytes.Length);
373-
369+
WriteToMemoryMappedFile("magehand_whisper_run", "no");
374370

375371
CastSpellFromName(word);
376372

Assets/StreamingAssets/Whisper/_internal/models/models--Systran--faster-whisper-tiny/blobs.meta

-8
This file was deleted.

Assets/StreamingAssets/Whisper/_internal/pyaudio.meta

-8
This file was deleted.
Binary file not shown.

Assets/StreamingAssets/Whisper/_internal/pyaudio/_portaudio.cp311-win_amd64.pyd.meta

-7
This file was deleted.
-12.6 KB
Binary file not shown.

PythonScripts/stream.py

+4-24
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,10 @@
33
import sys
44

55
import numpy as np
6-
import pyaudio
76
from faster_whisper import WhisperModel
87
from multiprocessing.shared_memory import SharedMemory
98

109

11-
STEP_IN_SEC: int = 5
12-
LENGHT_IN_SEC: int = 2
13-
NB_CHANNELS = 1
14-
RATE = 16000
15-
CHUNK = RATE
16-
1710
WHISPER_LANGUAGE = "en"
1811
WHISPER_THREADS = 2
1912

@@ -24,32 +17,19 @@
2417

2518
whisper = WhisperModel("tiny", device="cpu", compute_type="int8", cpu_threads=WHISPER_THREADS, download_root=task_file_path, local_files_only=True)
2619

27-
shared_mem_whisper = SharedMemory(name='whisper', create=True, size=15)
28-
shared_mem_run = SharedMemory(name='whisper_run', create=True, size=2)
20+
shared_mem_whisper = SharedMemory(name='magehand_whisper_text', create=True, size=15)
21+
shared_mem_run = SharedMemory(name='magehand_whisper_run', create=True, size=2)
22+
shared_mem_audio = SharedMemory(name='magehand_whisper_audio', create=True, size=64000)
2923

3024
run = 'no'
3125
shared_mem_run.buf[:2] = bytearray(run.encode('utf-8'))
3226

3327
running = 'None' + ';' + ('a' * (9 - len('None')))
3428
shared_mem_whisper.buf[:len(running)] = bytearray(running.encode('utf-8'))
3529

36-
audio = pyaudio.PyAudio()
37-
stream = audio.open(
38-
format=pyaudio.paInt16,
39-
channels=NB_CHANNELS,
40-
rate=RATE,
41-
input=True,
42-
frames_per_buffer=CHUNK,
43-
)
44-
4530
while True:
4631
if shared_mem_run.buf[:2].tobytes().decode('utf-8') == "ok":
47-
48-
audio_data = b""
49-
for _ in range(STEP_IN_SEC):
50-
chunk = stream.read(RATE)
51-
audio_data += chunk
52-
32+
audio_data = shared_mem_audio.buf[:64000].tobytes()
5333
audio_data_array: np.ndarray = np.frombuffer(audio_data, np.int16).astype(np.float32) / 255.0
5434

5535
segments, _ = whisper.transcribe(audio_data_array,

0 commit comments

Comments
 (0)