M.E.R.A./audio2face_streaming_utils.py at main · Jinash-Rouniyar/M.E.R.A. · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133


import sys
import grpc
import time
import numpy as np
import soundfile

import audio2face_pb2
import audio2face_pb2_grpc


def push_audio_track(url, audio_data, samplerate, instance_name):
    """
    This function pushes the whole audio track at once via PushAudioRequest()
    PushAudioRequest parameters:
     * audio_data: bytes, containing audio data for the whole track, where each sample is encoded as 4 bytes (float32)
     * samplerate: sampling rate for the audio data
     * instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
     * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished
    The request is passed to PushAudio()
    """

    block_until_playback_is_finished = True  # ADJUST
    with grpc.insecure_channel(url) as channel:
        stub = audio2face_pb2_grpc.Audio2FaceStub(channel)
        request = audio2face_pb2.PushAudioRequest()
        request.audio_data = audio_data.astype(np.float32).tobytes()
        request.samplerate = samplerate
        request.instance_name = instance_name
        request.block_until_playback_is_finished = block_until_playback_is_finished
        print("Sending audio data...")
        response = stub.PushAudio(request)
        if response.success:
            print("SUCCESS")
        else:
            print(f"ERROR: {response.message}")
    print("Closed channel")


def push_audio_track_stream(url, audio_data, samplerate, instance_name):
    """
    This function pushes audio chunks sequentially via PushAudioStreamRequest()
    The function emulates the stream of chunks, generated by splitting input audio track.
    But in a real application such stream of chunks may be aquired from some other streaming source.
    The first message must contain start_marker field, containing only meta information (without audio data):
     * samplerate: sampling rate for the audio data
     * instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
     * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message)
    Second and other messages must contain audio_data field:
     * audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32)
    All messages are packed into a Python generator and passed to PushAudioStream()
    """

    chunk_size = samplerate // 10  # ADJUST
    sleep_between_chunks = 0.04  # ADJUST
    block_until_playback_is_finished = True  # ADJUST

    with grpc.insecure_channel(url) as channel:
        print("Channel creadted")
        stub = audio2face_pb2_grpc.Audio2FaceStub(channel)

        def make_generator():
            start_marker = audio2face_pb2.PushAudioRequestStart(
                samplerate=samplerate,
                instance_name=instance_name,
                block_until_playback_is_finished=block_until_playback_is_finished,
            )
            # At first, we send a message with start_marker
            yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker)
            # Then we send messages with audio_data
            for i in range(len(audio_data) // chunk_size + 1):
                time.sleep(sleep_between_chunks)
                chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size]
                yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes())

        request_generator = make_generator()
        print("Sending audio data...")
        response = stub.PushAudioStream(request_generator)
        if response.success:
            print("SUCCESS")
        else:
            print(f"ERROR: {response.message}")
    print("Channel closed")


def main():
    """
    This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests.
    There two options:
     * Send the whole track at once using PushAudioRequest()
     * Send the audio chunks seuqntially in a stream using PushAudioStreamRequest()
    For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file.
    But in a real application such stream of chunks may be aquired from some other streaming source:
     * streaming audio via internet, streaming Text-To-Speech, etc
    gRPC protocol details could be find in audio2face.proto
    """

    if len(sys.argv) < 3:
        print("Format: python test_client.py PATH_TO_WAV INSTANCE_NAME")
        return

    # Sleep time emulates long latency of the request
    sleep_time = 2.0  # ADJUST

    # URL of the Audio2Face Streaming Audio Player server (where A2F App is running)
    url = "localhost:50051"  # ADJUST

    # Local input WAV file path
    audio_fpath = sys.argv[1]

    # Prim path of the Audio2Face Streaming Audio Player on the stage (were to push the audio data)
    instance_name = sys.argv[2]

    data, samplerate = soundfile.read(audio_fpath, dtype="float32")

    # Only Mono audio is supported
    if len(data.shape) > 1:
        data = np.average(data, axis=1)

    print(f"Sleeping for {sleep_time} seconds")
    time.sleep(sleep_time)

    if 0:  # ADJUST
        # Push the whole audio track at once
        push_audio_track(url, data, samplerate, instance_name)
    else:
        # Emulate audio stream and push audio chunks sequentially
        push_audio_track_stream(url, data, samplerate, instance_name)


if __name__ == "__main__":
    main()