Skip to content
430 changes: 430 additions & 0 deletions DATA_INTEGRITY.md

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions RealtimeSTT/audio_recorder_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ def __init__(self,
autostart_server: bool = True,
output_wav_file: str = None,
faster_whisper_vad_filter: bool = False,

# Data integrity verification
enable_data_verification: bool = False,
):

# Set instance variables from constructor parameters
Expand Down Expand Up @@ -255,6 +258,9 @@ def __init__(self,
self.data_url = data_url
self.autostart_server = autostart_server
self.output_wav_file = output_wav_file

# Data integrity verification
self.enable_data_verification = enable_data_verification

# Instance variables
self.muted = False
Expand Down Expand Up @@ -343,6 +349,12 @@ def text(self, on_transcription_finished=None):
print(f"Error in AudioToTextRecorderClient.text(): {e}")
return ""

def calculate_checksum(self, audio_data):
"""Calculate checksum for data verification"""
audio_array = np.frombuffer(audio_data, dtype=np.int16)
checksum = int(np.sum(audio_array, dtype=np.int64)) & 0xFFFFFFFF
return checksum

def feed_audio(self, chunk, audio_meta_data, original_sample_rate=16000):
# Start with the base metadata
metadata = {"sampleRate": original_sample_rate}
Expand All @@ -354,6 +366,13 @@ def feed_audio(self, chunk, audio_meta_data, original_sample_rate=16000):
metadata["server_sent_to_stt_formatted"] = format_timestamp_ns(server_sent_to_stt_ns)

metadata.update(audio_meta_data)

# Add verification data if server_sent_to_stt is present (enables verification)
if "server_sent_to_stt" in audio_meta_data:
audio_array = np.frombuffer(chunk, dtype=np.int16)
metadata["dataLength"] = len(audio_array)
metadata["checksum"] = self.calculate_checksum(chunk)
metadata["timestamp"] = int(time.time() * 1000)

# Convert metadata to JSON and prepare the message
metadata_json = json.dumps(metadata)
Expand Down Expand Up @@ -629,6 +648,15 @@ def record_and_send_audio(self):

if self.recording_start.is_set():
metadata = {"sampleRate": self.audio_input.device_sample_rate}

# Add verification data if enabled
if self.enable_data_verification:
audio_array = np.frombuffer(audio_data, dtype=np.int16)
metadata["dataLength"] = len(audio_array)
metadata["checksum"] = self.calculate_checksum(audio_data)
metadata["timestamp"] = int(time.time() * 1000)
metadata["server_sent_to_stt"] = True

metadata_json = json.dumps(metadata)
metadata_length = len(metadata_json)
message = struct.pack('<I', metadata_length) + metadata_json.encode('utf-8') + audio_data
Expand Down
3 changes: 3 additions & 0 deletions RealtimeSTT_server/stt_cli_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def main():
help="Minimum text similarity for hard break (default: 0.99)")
parser.add_argument("--min-chars", type=int, default=15,
help="Minimum characters for hard break (default: 15)")
parser.add_argument("--verify-data", action="store_true",
help="Enable data integrity verification (default: False)")

args = parser.parse_args()

Expand Down Expand Up @@ -216,6 +218,7 @@ def sentence_end(text: str):
use_microphone=True,
input_device_index=args.input_device, # Pass input device index
output_wav_file = args.write or None,
enable_data_verification=args.verify_data, # Enable data verification if requested
)

# Process command-line parameters
Expand Down
166 changes: 146 additions & 20 deletions RealtimeSTT_server/stt_server.py

Large diffs are not rendered by default.

Loading