@@ -179,7 +179,10 @@ def main():
179179 print (f" ⏱️ { current_sentence [0 ]['start_time' ]:.1f} s - { current_sentence [- 1 ]['end_time' ]:.1f} s" )
180180 print ()
181181
182- sentences .append ({"words" : current_sentence })
182+ sentences .append ({
183+ "image" : None ,
184+ "words" : current_sentence
185+ })
183186 current_sentence = []
184187 sentence_words = []
185188
@@ -188,7 +191,10 @@ def main():
188191 sentence_text = " " .join (sentence_words )
189192 print (f"\n ✅ Final sentence { len (sentences ) + 1 } : \" { sentence_text } \" " )
190193 print (f" ⏱️ { current_sentence [0 ]['start_time' ]:.1f} s - { current_sentence [- 1 ]['end_time' ]:.1f} s" )
191- sentences .append ({"words" : current_sentence })
194+ sentences .append ({
195+ "image" : None ,
196+ "words" : current_sentence
197+ })
192198
193199 # Calculate total song length from Whisper transcription
194200 total_song_length = max (segment ['end' ] for segment in result ['segments' ]) if result ['segments' ] else 0.0
@@ -223,7 +229,9 @@ def main():
223229
224230 # Add sentences with unquoted keys
225231 for i , sentence in enumerate (sentences ):
226- js_content += " {\n words: [\n "
232+ js_content += " {\n "
233+ js_content += f" image: { json .dumps (sentence ['image' ])} ,\n "
234+ js_content += " words: [\n "
227235 for j , word in enumerate (sentence ['words' ]):
228236 # Use JSON encoding for safe JavaScript string literals
229237 escaped_text = json .dumps (word ['text' ]) # This properly escapes quotes, brackets, etc.
@@ -236,7 +244,8 @@ def main():
236244 if j < len (sentence ['words' ]) - 1 :
237245 js_content += ","
238246 js_content += "\n "
239- js_content += " ]\n }"
247+ js_content += " ]\n "
248+ js_content += " }"
240249 if i < len (sentences ) - 1 :
241250 js_content += ","
242251 js_content += "\n "
0 commit comments