|
| 1 | +package org.telegram.messenger; |
| 2 | + |
| 3 | +import android.media.MediaCodec; |
| 4 | +import android.media.MediaExtractor; |
| 5 | +import android.media.MediaFormat; |
| 6 | +import android.media.MediaMuxer; |
| 7 | +import android.text.TextUtils; |
| 8 | +import android.util.Base64; |
| 9 | + |
| 10 | +import com.google.gson.Gson; |
| 11 | +import com.google.gson.annotations.Expose; |
| 12 | +import com.google.gson.annotations.SerializedName; |
| 13 | + |
| 14 | +import org.telegram.ui.Components.BulletinFactory; |
| 15 | +import org.telegram.ui.Components.AlertsCreator; |
| 16 | +import org.telegram.ui.LaunchActivity; |
| 17 | +import org.telegram.ui.ActionBar.BaseFragment; |
| 18 | + |
| 19 | +import java.io.BufferedReader; |
| 20 | +import java.io.File; |
| 21 | +import java.io.IOException; |
| 22 | +import java.io.InputStreamReader; |
| 23 | +import java.io.OutputStream; |
| 24 | +import java.net.HttpURLConnection; |
| 25 | +import java.net.URL; |
| 26 | +import java.nio.ByteBuffer; |
| 27 | +import java.nio.file.Files; |
| 28 | +import java.util.List; |
| 29 | +import java.util.concurrent.ExecutorService; |
| 30 | +import java.util.concurrent.Executors; |
| 31 | +import java.util.function.BiConsumer; |
| 32 | + |
| 33 | +public class CloudflareSTT { |
| 34 | + private static final Gson gson = new Gson(); |
| 35 | + private static final ExecutorService executorService = Executors.newCachedThreadPool(); |
| 36 | + |
| 37 | + public static boolean isConfigured() { |
| 38 | + return SharedConfig.cfEnableStt && !TextUtils.isEmpty(SharedConfig.cfAccountID) && !TextUtils.isEmpty(SharedConfig.cfApiToken); |
| 39 | + } |
| 40 | + |
| 41 | + public static void showErrorDialog(Exception e) { |
| 42 | + var fragment = LaunchActivity.getSafeLastFragment(); |
| 43 | + var message = e.getLocalizedMessage(); |
| 44 | + if (fragment == null || !BulletinFactory.canShowBulletin(fragment) || message == null) { |
| 45 | + return; |
| 46 | + } |
| 47 | + if (message.length() > 45) { |
| 48 | + AlertsCreator.showSimpleAlert(fragment, LocaleController.getString("ErrorOccurred", R.string.ErrorOccurred), e.getMessage()); |
| 49 | + } else { |
| 50 | + BulletinFactory.of(fragment).createErrorBulletin(message).show(); |
| 51 | + } |
| 52 | + } |
| 53 | + |
| 54 | + private static void extractAudio(String inputFilePath, String outputFilePath) throws IOException { |
| 55 | + var extractor = new MediaExtractor(); |
| 56 | + MediaMuxer muxer = null; |
| 57 | + try { |
| 58 | + extractor.setDataSource(inputFilePath); |
| 59 | + |
| 60 | + MediaFormat audioFormat = null; |
| 61 | + int audioTrackIndex = -1; |
| 62 | + for (int i = 0; i < extractor.getTrackCount(); i++) { |
| 63 | + var format = extractor.getTrackFormat(i); |
| 64 | + var mime = format.getString(MediaFormat.KEY_MIME); |
| 65 | + if (mime != null && mime.startsWith("audio/")) { |
| 66 | + audioFormat = format; |
| 67 | + audioTrackIndex = i; |
| 68 | + break; |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + if (audioFormat == null) { |
| 73 | + throw new IOException("No audio track found in " + inputFilePath); |
| 74 | + } |
| 75 | + |
| 76 | + muxer = new MediaMuxer(outputFilePath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4); |
| 77 | + var trackIndex = muxer.addTrack(audioFormat); |
| 78 | + muxer.start(); |
| 79 | + |
| 80 | + extractor.selectTrack(audioTrackIndex); |
| 81 | + |
| 82 | + var bufferInfo = new MediaCodec.BufferInfo(); |
| 83 | + var buffer = ByteBuffer.allocate(65536); |
| 84 | + |
| 85 | + while (true) { |
| 86 | + var sampleSize = extractor.readSampleData(buffer, 0); |
| 87 | + if (sampleSize < 0) { |
| 88 | + break; |
| 89 | + } |
| 90 | + |
| 91 | + bufferInfo.offset = 0; |
| 92 | + bufferInfo.size = sampleSize; |
| 93 | + bufferInfo.presentationTimeUs = extractor.getSampleTime(); |
| 94 | + bufferInfo.flags = 0; |
| 95 | + |
| 96 | + muxer.writeSampleData(trackIndex, buffer, bufferInfo); |
| 97 | + extractor.advance(); |
| 98 | + } |
| 99 | + |
| 100 | + muxer.stop(); |
| 101 | + } finally { |
| 102 | + if (muxer != null) { |
| 103 | + muxer.release(); |
| 104 | + } |
| 105 | + extractor.release(); |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + public static void requestWorkersAi(String path, boolean video, BiConsumer<String, Exception> callback) { |
| 110 | + if (!isConfigured()) { |
| 111 | + callback.accept(null, new Exception(LocaleController.getString("CloudflareCredentialsNotSet", R.string.CloudflareCredentialsNotSet))); |
| 112 | + return; |
| 113 | + } |
| 114 | + executorService.submit(() -> { |
| 115 | + File audioPath; |
| 116 | + if (video) { |
| 117 | + var audioFile = new File(path + ".m4a"); |
| 118 | + try { |
| 119 | + extractAudio(path, audioFile.getAbsolutePath()); |
| 120 | + } catch (IOException e) { |
| 121 | + FileLog.e(e); |
| 122 | + callback.accept(null, e); |
| 123 | + return; |
| 124 | + } |
| 125 | + audioPath = audioFile; |
| 126 | + } else { |
| 127 | + audioPath = new File(path); |
| 128 | + } |
| 129 | + byte[] audio; |
| 130 | + try { |
| 131 | + audio = Files.readAllBytes(audioPath.toPath()); |
| 132 | + } catch (IOException e) { |
| 133 | + callback.accept(null, e); |
| 134 | + return; |
| 135 | + } |
| 136 | + |
| 137 | + var payload = new WhisperRequest(); |
| 138 | + payload.audio = Base64.encodeToString(audio, Base64.NO_WRAP); |
| 139 | + payload.vadFilter = false; |
| 140 | + |
| 141 | + try { |
| 142 | + URL url = new URL("https://api.cloudflare.com/client/v4/accounts/" + SharedConfig.cfAccountID + "/ai/run/@cf/openai/whisper-large-v3-turbo"); |
| 143 | + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); |
| 144 | + conn.setRequestMethod("POST"); |
| 145 | + conn.setRequestProperty("Authorization", "Bearer " + SharedConfig.cfApiToken); |
| 146 | + conn.setRequestProperty("Content-Type", "application/json"); |
| 147 | + conn.setConnectTimeout(120000); |
| 148 | + conn.setReadTimeout(120000); |
| 149 | + conn.setDoOutput(true); |
| 150 | + |
| 151 | + String jsonInputString = gson.toJson(payload); |
| 152 | + try (OutputStream os = conn.getOutputStream()) { |
| 153 | + byte[] input = jsonInputString.getBytes("utf-8"); |
| 154 | + os.write(input, 0, input.length); |
| 155 | + } |
| 156 | + |
| 157 | + int code = conn.getResponseCode(); |
| 158 | + BufferedReader br = new BufferedReader(new InputStreamReader( |
| 159 | + code >= 200 && code < 300 ? conn.getInputStream() : conn.getErrorStream(), "utf-8")); |
| 160 | + StringBuilder response = new StringBuilder(); |
| 161 | + String responseLine; |
| 162 | + while ((responseLine = br.readLine()) != null) { |
| 163 | + response.append(responseLine.trim()); |
| 164 | + } |
| 165 | + |
| 166 | + var whisperResponse = gson.fromJson(response.toString(), WhisperResponse.class); |
| 167 | + if (whisperResponse.success != null && whisperResponse.success && whisperResponse.result != null) { |
| 168 | + callback.accept(whisperResponse.result.text, null); |
| 169 | + } else { |
| 170 | + var errors = whisperResponse.errors; |
| 171 | + if (errors != null && !errors.isEmpty()) { |
| 172 | + callback.accept(null, new Exception(errors.size() == 1 ? errors.get(0).message : errors.toString())); |
| 173 | + } else { |
| 174 | + callback.accept(null, new Exception("Unknown error from Cloudflare: " + code)); |
| 175 | + } |
| 176 | + } |
| 177 | + } catch (Exception e) { |
| 178 | + callback.accept(null, e); |
| 179 | + } |
| 180 | + }); |
| 181 | + } |
| 182 | + |
| 183 | + public static class WhisperRequest { |
| 184 | + @SerializedName("audio") |
| 185 | + @Expose |
| 186 | + public String audio; |
| 187 | + @SerializedName("vad_filter") |
| 188 | + @Expose |
| 189 | + public Boolean vadFilter; |
| 190 | + } |
| 191 | + |
| 192 | + public static class Result { |
| 193 | + @SerializedName("text") |
| 194 | + @Expose |
| 195 | + public String text; |
| 196 | + } |
| 197 | + |
| 198 | + public static class WhisperResponse { |
| 199 | + @SerializedName("result") |
| 200 | + @Expose |
| 201 | + public Result result; |
| 202 | + @SerializedName("success") |
| 203 | + @Expose |
| 204 | + public Boolean success; |
| 205 | + @SerializedName("errors") |
| 206 | + @Expose |
| 207 | + public List<Error> errors; |
| 208 | + } |
| 209 | + |
| 210 | + public static class Error { |
| 211 | + @SerializedName("message") |
| 212 | + @Expose |
| 213 | + public String message; |
| 214 | + |
| 215 | + @Override |
| 216 | + public String toString() { |
| 217 | + return message; |
| 218 | + } |
| 219 | + } |
| 220 | +} |
0 commit comments