diff --git a/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala b/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala index 8111e442901..70684730cfa 100644 --- a/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala +++ b/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala @@ -35,7 +35,7 @@ import java.util.Optional object UserAuthenticator extends Authenticator[JwtContext, SessionUser] with LazyLogging { override def authenticate(context: JwtContext): Optional[SessionUser] = { try { - Optional.of(JwtParser.claimsToSessionUser(context.getJwtClaims)) + JwtParser.claimsToOptionalSessionUser(context.getJwtClaims) } catch { case e: Exception => logger.error("Failed to authenticate the JwtContext", e) diff --git a/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala b/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala index bb139e7093a..8a4225df21e 100644 --- a/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala +++ b/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala @@ -38,7 +38,7 @@ object JwtParser extends LazyLogging { /** Verify and parse a Bearer token string. */ def parseToken(token: String): Optional[SessionUser] = { try { - Optional.of(claimsToSessionUser(JwtAuth.jwtConsumer.processToClaims(token))) + claimsToOptionalSessionUser(JwtAuth.jwtConsumer.processToClaims(token)) } catch { case _: UnresolvableKeyException => logger.error("Invalid JWT Signature") @@ -49,6 +49,19 @@ object JwtParser extends LazyLogging { } } + /** Convert already-verified claims to a [[SessionUser]], returning empty when + * the required Texera custom claims are missing or malformed. + */ + def claimsToOptionalSessionUser(claims: JwtClaims): Optional[SessionUser] = { + try { + Optional.of(claimsToSessionUser(claims)) + } catch { + case e: IllegalArgumentException => + logger.error(s"Invalid JWT claims: ${e.getMessage}") + Optional.empty() + } + } + /** Build a [[SessionUser]] from already-verified claims. Used by both * [[parseToken]] (which verifies then calls this) and amber's * `UserAuthenticator` (which the toastshaman filter calls after its own @@ -59,8 +72,12 @@ object JwtParser extends LazyLogging { val email = claims.getClaimValue("email", classOf[String]) // jose4j returns Long after JSON round-trip but the original setClaim // call writes Integer; widen via Number to handle both cases. - val userId = claims.getClaimValue("userId", classOf[Number]).intValue() - val role = UserRoleEnum.valueOf(claims.getClaimValue("role").asInstanceOf[String]) + val userId = Option(claims.getClaimValue("userId", classOf[Number])) + .map(_.intValue()) + .getOrElse(throw new IllegalArgumentException("JWT claim 'userId' is required.")) + val roleName = Option(claims.getClaimValue("role", classOf[String])) + .getOrElse(throw new IllegalArgumentException("JWT claim 'role' is required.")) + val role = UserRoleEnum.valueOf(roleName) val googleId = claims.getClaimValue("googleId", classOf[String]) val googleAvatar = claims.getClaimValue("googleAvatar", classOf[String]) val user = new User( diff --git a/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala b/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala index dc91de4d645..aa2e0c0423c 100644 --- a/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala +++ b/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala @@ -74,6 +74,18 @@ class JwtParserSpec extends AnyFlatSpec with Matchers { u.getGoogleAvatar shouldBe "avatar-blob" } + it should "return empty when already-verified claims are missing userId" in { + val claims = buildClaims() + claims.unsetClaim("userId") + JwtParser.claimsToOptionalSessionUser(claims).isPresent shouldBe false + } + + it should "return empty when already-verified claims are missing role" in { + val claims = buildClaims() + claims.unsetClaim("role") + JwtParser.claimsToOptionalSessionUser(claims).isPresent shouldBe false + } + "JwtParser.parseToken" should "return empty on a structurally invalid token" in { JwtParser.parseToken("not-a-real-jwt").isPresent shouldBe false } diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDesc.scala index 5f203717d1a..0e1062c75a4 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDesc.scala @@ -25,9 +25,12 @@ import org.apache.texera.amber.core.tuple.{AttributeType, Schema} import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity} import org.apache.texera.amber.operator.PythonOperatorDescriptor import org.apache.texera.amber.operator.huggingFace.codegen.{ + AudioTaskCodegen, CodegenContext, ImageTaskCodegen, + MediaGenCodegen, PythonCodegenBase, + QaRankingCodegen, TaskCodegen, TextGenCodegen } @@ -95,6 +98,36 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor { @AutofillAttributeName var inputImageColumn: EncodableString = "" + @JsonProperty(value = "audioInput", required = false) + @JsonSchemaTitle("Audio Upload") + @JsonPropertyDescription("Upload audio for Hugging Face audio tasks") + var audioInput: EncodableString = "" + + @JsonProperty(value = "inputAudioColumn", required = false) + @JsonSchemaTitle("Input Audio Column") + @JsonPropertyDescription("Column containing audio data from the input table") + @AutofillAttributeName + var inputAudioColumn: EncodableString = "" + + @JsonProperty(value = "contextColumn", required = false) + @JsonSchemaTitle("Context Column") + @JsonPropertyDescription("Column containing the context passage for question answering") + @AutofillAttributeName + var contextColumn: EncodableString = "" + + @JsonProperty(value = "candidateLabels", required = false) + @JsonSchemaTitle("Candidate Labels") + @JsonPropertyDescription("Comma-separated candidate labels for zero-shot classification") + var candidateLabels: EncodableString = "" + + @JsonProperty(value = "sentencesColumn", required = false) + @JsonSchemaTitle("Sentences Column") + @JsonPropertyDescription( + "Column with comma-separated sentences for sentence similarity and text ranking" + ) + @AutofillAttributeName + var sentencesColumn: EncodableString = "" + @JsonProperty( value = "systemPrompt", required = false, @@ -138,6 +171,9 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor { val byTask = scala.collection.mutable.Map.empty[String, TaskCodegen] byTask += (TextGenCodegen.task -> TextGenCodegen) ImageTaskCodegen.tasks.foreach(t => byTask += (t -> ImageTaskCodegen)) + AudioTaskCodegen.tasks.foreach(t => byTask += (t -> AudioTaskCodegen)) + MediaGenCodegen.tasks.foreach(t => byTask += (t -> MediaGenCodegen)) + QaRankingCodegen.tasks.foreach(t => byTask += (t -> QaRankingCodegen)) byTask.toMap } @@ -181,6 +217,16 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor { if (imageInput == null) "" else imageInput val safeInputImageColumn: EncodableString = if (inputImageColumn == null) "" else inputImageColumn + val safeAudioInput: EncodableString = + if (audioInput == null) "" else audioInput + val safeInputAudioColumn: EncodableString = + if (inputAudioColumn == null) "" else inputAudioColumn + val safeContextColumn: EncodableString = + if (contextColumn == null) "" else contextColumn + val safeCandidateLabels: EncodableString = + if (candidateLabels == null) "" else candidateLabels + val safeSentencesColumn: EncodableString = + if (sentencesColumn == null) "" else sentencesColumn val ctx = CodegenContext( hfApiToken = safeToken, @@ -192,7 +238,12 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor { safeMaxTokens = safeMaxTokens, safeTemp = safeTemp, imageInput = safeImageInput, - inputImageColumn = safeInputImageColumn + inputImageColumn = safeInputImageColumn, + audioInput = safeAudioInput, + inputAudioColumn = safeInputAudioColumn, + contextColumn = safeContextColumn, + candidateLabels = safeCandidateLabels, + sentencesColumn = safeSentencesColumn ) PythonCodegenBase.render(ctx, codegenForTask(safeTask)) diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/AudioTaskCodegen.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/AudioTaskCodegen.scala new file mode 100644 index 00000000000..560244962aa --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/AudioTaskCodegen.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.huggingFace.codegen + +/** + * Codegen for Hugging Face audio task families. + * + * ASR and audio-classification send audio bytes as the raw request body. + * Text-to-speech is prompt-driven and sends a JSON payload; its providers + * return either audio bytes directly or a JSON envelope pointing to audio. + */ +object AudioTaskCodegen extends TaskCodegen { + + override val task: String = "automatic-speech-recognition" + + override val tasks: Set[String] = Set( + "automatic-speech-recognition", + "audio-classification", + "text-to-speech" + ) + + override def payloadPython(ctx: CodegenContext): String = + """ if task in audio_only_tasks: + | payload = current_audio_bytes + | use_raw_binary_body = True + | raw_binary_headers = audio_headers + | elif task == "text-to-speech": + | payload = {"inputs": prompt_value}""".stripMargin + + override def parsePython(ctx: CodegenContext): String = + """ if task == "text-to-speech": + | if isinstance(body, dict): + | if "output" in body: + | out = body["output"] + | url = out[0] if isinstance(out, list) else out + | if isinstance(url, str) and url.startswith("http"): + | return self._url_to_data_url(url) + | if "audio" in body: + | audio = body["audio"] + | if isinstance(audio, dict): + | if "url" in audio: + | return self._url_to_data_url(audio["url"]) + | if "b64_json" in audio: + | return f"data:audio/mpeg;base64,{audio['b64_json']}" + | if "data" in body: + | data = body["data"] + | if data and isinstance(data[0], dict): + | if "url" in data[0]: + | return self._url_to_data_url(data[0]["url"]) + | if "b64_json" in data[0]: + | return f"data:audio/mpeg;base64,{data[0]['b64_json']}" + | return json.dumps(body) + | elif task == "automatic-speech-recognition": + | if isinstance(body, dict): + | if "text" in body: + | return body["text"] + | if "generated_text" in body: + | return body["generated_text"] + | return json.dumps(body) + | elif task == "audio-classification": + | return json.dumps(body)""".stripMargin +} diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala index c5c4a2669c4..5a5ee0a937e 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/ImageTaskCodegen.scala @@ -90,26 +90,16 @@ object ImageTaskCodegen extends TaskCodegen { | use_raw_binary_body = True | raw_binary_headers = image_headers | elif task == "zero-shot-image-classification": - | # Zero-shot requires the caller to supply candidate labels. - | # We reuse the prompt column as a comma-separated label list so - | # the task is shippable without a dedicated operator field. - | # TODO: replace with a first-class `candidateLabels` field once - | # the property panel supports task-specific inputs. - | # - | # Fail fast if usable labels can't be derived. Both modes lead to - | # a meaningless inference call: - | # 1. Empty prompt column -> labels = [] - | # The HF API rejects candidate_labels: [] with an opaque 400. - | # 2. Missing prompt column -> upstream sets prompt_value - | # to the fallback "What is shown in this image?", which has - | # no comma, so labels collapses to a single nonsense entry. - | # Zero-shot classification needs >= 2 candidate labels to be - | # meaningful — surface a configuration error in both cases. - | labels = [s.strip() for s in prompt_value.split(",") if s.strip()] + | # Prefer the dedicated candidateLabels property; fall back to + | # the prompt column for backward compatibility. + | label_source = (self.CANDIDATE_LABELS or "").strip() if self.CANDIDATE_LABELS else "" + | if not label_source and prompt_value: + | label_source = prompt_value + | labels = [s.strip() for s in label_source.split(",") if s.strip()] | if len(labels) < 2: | raise ValueError( | "zero-shot-image-classification requires at least 2 candidate " - | "labels: provide a comma-separated list in the prompt column." + | "labels: provide a comma-separated list in the Candidate Labels field." | ) | payload = { | "inputs": self._image_input_as_base64(current_image_bytes), diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/MediaGenCodegen.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/MediaGenCodegen.scala new file mode 100644 index 00000000000..73047da89c3 --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/MediaGenCodegen.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.huggingFace.codegen + +/** + * Codegen for prompt-driven media generation tasks. + * + * Providers return media in several shapes: raw bytes, OpenAI-style + * b64_json, or URLs. URL responses are normalized to data URLs by the + * shared `_url_to_data_url` helper so downstream result rendering receives + * a stable string format. + */ +object MediaGenCodegen extends TaskCodegen { + + override val task: String = "text-to-image" + + override val tasks: Set[String] = Set( + "text-to-image", + "text-to-video" + ) + + override def payloadPython(ctx: CodegenContext): String = + """ payload = {"inputs": prompt_value}""".stripMargin + + override def parsePython(ctx: CodegenContext): String = + """ if task == "text-to-image": + | if isinstance(body, dict): + | if "output" in body: + | out = body["output"] + | url = out[0] if isinstance(out, list) else out + | if isinstance(url, str) and url.startswith("http"): + | return self._url_to_data_url(url) + | if "images" in body: + | images = body["images"] + | if images and isinstance(images[0], dict) and "url" in images[0]: + | return self._url_to_data_url(images[0]["url"]) + | if "data" in body: + | data = body["data"] + | if isinstance(data, dict) and "outputs" in data: + | outputs = data["outputs"] + | if outputs and isinstance(outputs[0], str) and outputs[0].startswith("http"): + | return self._url_to_data_url(outputs[0]) + | if isinstance(data, list) and data and isinstance(data[0], dict): + | if "b64_json" in data[0]: + | return f"data:image/png;base64,{data[0]['b64_json']}" + | if "url" in data[0]: + | return self._url_to_data_url(data[0]["url"]) + | return json.dumps(body) + | elif task == "text-to-video": + | if isinstance(body, dict): + | if "output" in body: + | out = body["output"] + | url = out[0] if isinstance(out, list) else out + | if isinstance(url, str) and url.startswith("http"): + | return self._url_to_data_url(url) + | if "video" in body: + | video = body["video"] + | if isinstance(video, dict) and "url" in video: + | return self._url_to_data_url(video["url"]) + | return json.dumps(body)""".stripMargin +} diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/PythonCodegenBase.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/PythonCodegenBase.scala index eac4641c62e..4184182a2e0 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/PythonCodegenBase.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/PythonCodegenBase.scala @@ -57,6 +57,11 @@ object PythonCodegenBase { val temperature = ctx.safeTemp val imageInput = ctx.imageInput val inputImageColumn = ctx.inputImageColumn + val audioInput = ctx.audioInput + val inputAudioColumn = ctx.inputAudioColumn + val contextColumn = ctx.contextColumn + val candidateLabels = ctx.candidateLabels + val sentencesColumn = ctx.sentencesColumn pyb"""import os |import re |import json @@ -137,6 +142,11 @@ object PythonCodegenBase { | self.TEMPERATURE = $temperature | self.IMAGE_INPUT = $imageInput | self.INPUT_IMAGE_COLUMN = $inputImageColumn + | self.AUDIO_INPUT = $audioInput + | self.INPUT_AUDIO_COLUMN = $inputAudioColumn + | self.CONTEXT_COLUMN = $contextColumn + | self.CANDIDATE_LABELS = $candidateLabels + | self.SENTENCES_COLUMN = $sentencesColumn | | def _resolve_providers(self, token): | '''Query the HF Hub API for inference providers serving this model. @@ -286,7 +296,14 @@ object PythonCodegenBase { | if provider_name == "replicate": | url = f"{base}/v1/models/{provider_id}/predictions" | hdrs = {**json_headers, "Prefer": "wait"} - | if task == "image-to-image" and img_b64: + | if task == "text-to-speech": + | inp = {"text": prompt_value} + | elif task in ("text-to-image", "text-to-video"): + | inp = {"prompt": prompt_value} + | elif task in ("automatic-speech-recognition", "audio-classification") and img_b64: + | audio_content_type = raw_binary_headers.get("Content-Type", "audio/mpeg") + | inp = {"audio": f"data:{audio_content_type};base64,{img_b64}"} + | elif task == "image-to-image" and img_b64: | data_url = f"data:image/png;base64,{img_b64}" | inp = {"image": data_url, "images": [data_url], "input_image": data_url, "prompt": prompt_value} | elif img_b64: @@ -340,6 +357,10 @@ object PythonCodegenBase { | # Fal-ai: per-model endpoint. | if provider_name == "fal-ai": | url = f"{base}/{provider_id}" + | if task == "text-to-speech": + | return requests.post(url, headers=json_headers, json={"text": prompt_value}, timeout=120) + | if task in ("text-to-image", "text-to-video"): + | return requests.post(url, headers=json_headers, json={"prompt": prompt_value}, timeout=120) | if task == "image-to-image" and img_b64: | data_url = f"data:image/png;base64,{img_b64}" | return requests.post(url, headers=json_headers, json={"image_url": data_url, "image_urls": [data_url], "prompt": prompt_value}, timeout=120) @@ -398,6 +419,12 @@ object PythonCodegenBase { | return poll_resp | | if provider_name in self.OPENAI_COMPATIBLE_PROVIDERS: + | if task == "text-to-image": + | url = f"{base}/v1/images/generations" + | return requests.post(url, headers=json_headers, json={"model": provider_id, "prompt": prompt_value}, timeout=120) + | if task == "text-to-speech": + | url = f"{base}/v1/audio/speech" + | return requests.post(url, headers=json_headers, json={"model": provider_id, "input": prompt_value}, timeout=120) | url = f"{base}/{self.CHAT_ROUTES.get(provider_name, 'v1/chat/completions')}" | messages = [{"role": "user", "content": prompt_value}] | if img_b64: @@ -444,6 +471,7 @@ object PythonCodegenBase { | image_only_tasks = ("image-classification", "object-detection", "image-segmentation", "image-to-text") | image_prompt_tasks = ("visual-question-answering", "document-question-answering", "zero-shot-image-classification", "image-text-to-text", "image-to-image") | image_tasks = image_only_tasks + image_prompt_tasks + | audio_only_tasks = ("automatic-speech-recognition", "audio-classification") | | # --- validate MODEL_ID format before any HF URL is built --- | if not _HF_MODEL_ID_PATTERN.match(self.MODEL_ID or ""): @@ -463,12 +491,29 @@ object PythonCodegenBase { | # --- resolve all available inference providers for this model (tried in order) --- | providers = self._resolve_providers(token) | - | # --- validate prompt column exists (required for non-image tasks) --- - | if task not in image_tasks: + | # --- validate prompt column exists (skipped for image tasks and binary-only audio tasks) --- + | if task not in image_tasks and task not in audio_only_tasks: | assert prompt_col in table.columns, ( | f"Prompt column '{prompt_col}' not found in input table. " | f"Available columns: {list(table.columns)}" | ) + | if task == "zero-shot-classification": + | assert self.CANDIDATE_LABELS and self.CANDIDATE_LABELS.strip(), ( + | "Candidate Labels are required for zero-shot-classification. " + | "Provide a comma-separated list of labels." + | ) + | if task == "question-answering": + | ctx_col = self.CONTEXT_COLUMN + | assert ctx_col and ctx_col in table.columns, ( + | f"Context column '{ctx_col}' not found in input table. " + | f"Available columns: {list(table.columns)}" + | ) + | if task in ("sentence-similarity", "text-ranking"): + | sent_col = self.SENTENCES_COLUMN + | assert sent_col and sent_col in table.columns, ( + | f"Sentences column '{sent_col}' not found in input table. " + | f"Available columns: {list(table.columns)}" + | ) | | # --- handle empty table --- | if table.empty: @@ -484,12 +529,29 @@ object PythonCodegenBase { | "Authorization": f"Bearer {token}", | "Content-Type": "application/octet-stream", | } + | # --- pre-compute table dict for table-question-answering --- + | table_dict = None + | if task == "table-question-answering": + | table_dict = {} + | for col in table.columns: + | if col != prompt_col and col != result_col: + | table_dict[col] = [ + | str(v) if not pd.isna(v) else "" for v in table[col].tolist() + | ] | | # --- resolve image source (upload or column) for image tasks --- | has_image_upload = bool(self.IMAGE_INPUT) and bool(str(self.IMAGE_INPUT).strip()) | use_image_column = not has_image_upload and bool(self.INPUT_IMAGE_COLUMN) and self.INPUT_IMAGE_COLUMN in table.columns | image_bytes = None | image_error = None + | has_audio_upload = bool(self.AUDIO_INPUT) and bool(str(self.AUDIO_INPUT).strip()) + | use_audio_column = not has_audio_upload and bool(self.INPUT_AUDIO_COLUMN) and self.INPUT_AUDIO_COLUMN in table.columns + | audio_headers = { + | "Authorization": f"Bearer {token}", + | "Content-Type": "application/octet-stream" if use_audio_column else self._get_audio_content_type(), + | } + | audio_bytes = None + | audio_error = None | if task in image_tasks and not use_image_column: | if not has_image_upload: | image_error = "No image source. Set an Input Image Column or upload an image." @@ -498,15 +560,28 @@ object PythonCodegenBase { | image_bytes = self._read_image_input() | except Exception as e: | image_error = f"Could not read image input ({type(e).__name__}: {e})" + | if task in audio_only_tasks and not use_audio_column: + | if not has_audio_upload: + | audio_error = "No audio source. Set an Input Audio Column or upload audio." + | else: + | try: + | audio_bytes = self._read_audio_input() + | except Exception as e: + | audio_error = f"Could not read audio input ({type(e).__name__}: {e})" | | results = [] | for idx, row in table.iterrows(): | if image_error is not None: | results.append(self._format_error("Image task configuration error", image_error)) | continue + | if audio_error is not None: + | results.append(self._format_error("Audio task configuration error", audio_error)) + | continue | | if task in image_only_tasks: | prompt_value = "" + | elif task in audio_only_tasks: + | prompt_value = "" | elif task in image_prompt_tasks and prompt_col not in table.columns: | prompt_value = "What is shown in this image?" | else: @@ -529,6 +604,18 @@ object PythonCodegenBase { | results.append(self._format_error("Image data error", f"Row {idx}: {type(e).__name__}: {e}")) | continue | + | # --- resolve per-row audio bytes from column --- + | current_audio_bytes = audio_bytes + | if task in audio_only_tasks and use_audio_column: + | try: + | current_audio_bytes = self._read_binary_value(row[self.INPUT_AUDIO_COLUMN]) + | if current_audio_bytes is None: + | results.append(self._format_error("Audio data error", f"Row {idx}: audio column is empty")) + | continue + | except Exception as e: + | results.append(self._format_error("Audio data error", f"Row {idx}: {type(e).__name__}: {e}")) + | continue + | | # --- build task-specific payload (provided by per-task codegen) --- | use_raw_binary_body = False | raw_binary_headers = image_headers @@ -576,6 +663,10 @@ object PythonCodegenBase { | b64 = base64.b64encode(resp.content).decode("utf-8") | results.append(f"data:{content_type};base64,{b64}") | continue + | if content_type.startswith("audio/") or content_type.startswith("video/"): + | b64 = base64.b64encode(resp.content).decode("utf-8") + | results.append(f"data:{content_type};base64,{b64}") + | continue | | try: | body = resp.json() @@ -702,6 +793,22 @@ object PythonCodegenBase { | def _image_input_as_base64(self, image_bytes): | return base64.b64encode(image_bytes).decode("utf-8") | + | def _read_audio_input(self): + | audio_input = str(self.AUDIO_INPUT or "").strip() + | if audio_input.startswith("data:"): + | _, encoded = audio_input.split(",", 1) + | return base64.b64decode(encoded) + | if audio_input.startswith("http://") or audio_input.startswith("https://"): + | _, data = self._fetch_remote_url(audio_input) + | return data + | # Reading arbitrary worker-filesystem paths is intentionally NOT + | # supported: uploaded audio arrives as a data URL and remote audio + | # must be fetched through the hardened https-only helper above. + | raise ValueError( + | "Unsupported audio input. Upload an audio file (sent as a data URL) " + | "or provide a public https audio URL." + | ) + | | def _read_binary_value(self, value): | if value is None: | return None @@ -821,6 +928,28 @@ object PythonCodegenBase { | return text[start_pos:pos], pos | return None, start_pos | + | def _get_audio_content_type(self): + | audio_input = str(self.AUDIO_INPUT or "").strip().lower() + | if audio_input.startswith("data:"): + | header = audio_input.split(",", 1)[0] + | if ";" in header: + | return header[5:header.index(";")] + | return header[5:] + | extension_map = { + | ".mp3": "audio/mpeg", + | ".mpeg": "audio/mpeg", + | ".wav": "audio/wav", + | ".flac": "audio/flac", + | ".ogg": "audio/ogg", + | ".oga": "audio/ogg", + | ".webm": "audio/webm", + | ".opus": "audio/webm;codecs=opus", + | ".amr": "audio/amr", + | ".m4a": "audio/m4a", + | } + | _, ext = os.path.splitext(audio_input) + | return extension_map.get(ext, "audio/mpeg") + | | def _url_to_data_url(self, url): | '''Fetch a URL and return a data URL with the correct MIME type. | Fetched via _fetch_remote_url so a malicious/compromised provider @@ -831,12 +960,12 @@ object PythonCodegenBase { | if not content_type or content_type == "application/octet-stream": | from urllib.parse import urlparse as _urlparse | ext = os.path.splitext(_urlparse(url).path.lower())[1] - | mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif", ".webp": "image/webp", ".svg": "image/svg+xml", ".mp4": "video/mp4", ".webm": "video/webm"} + | mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".gif": "image/gif", ".webp": "image/webp", ".svg": "image/svg+xml", ".mp3": "audio/mpeg", ".mpeg": "audio/mpeg", ".wav": "audio/wav", ".flac": "audio/flac", ".ogg": "audio/ogg", ".oga": "audio/ogg", ".m4a": "audio/mp4", ".mp4": "video/mp4", ".webm": "video/webm"} | guessed = mime_map.get(ext, "") | if guessed: | content_type = guessed | else: - | task_mime = {"image-to-image": "image/png"} + | task_mime = {"image-to-image": "image/png", "text-to-image": "image/png", "text-to-video": "video/mp4", "text-to-speech": "audio/mpeg"} | content_type = task_mime.get(self.TASK, "application/octet-stream") | b64 = base64.b64encode(data).decode("utf-8") | return f"data:{content_type};base64,{b64}" diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/QaRankingCodegen.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/QaRankingCodegen.scala new file mode 100644 index 00000000000..79572e8259f --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/QaRankingCodegen.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.huggingFace.codegen + +/** + * Codegen for question-answering, zero-shot, similarity, and ranking tasks. + * + * These tasks are prompt-driven but need extra per-row or per-operator + * inputs: context text, candidate labels, table contents, or a list of + * comparison sentences/documents. + */ +object QaRankingCodegen extends TaskCodegen { + + override val task: String = "question-answering" + + override val tasks: Set[String] = Set( + "question-answering", + "table-question-answering", + "zero-shot-classification", + "sentence-similarity", + "text-ranking" + ) + + override def payloadPython(ctx: CodegenContext): String = + """ if task == "question-answering": + | ctx_val = row[self.CONTEXT_COLUMN] + | ctx_val = "" if pd.isna(ctx_val) else str(ctx_val) + | payload = {"inputs": {"question": prompt_value, "context": ctx_val}} + | elif task == "table-question-answering": + | payload = {"inputs": {"query": prompt_value, "table": table_dict}} + | elif task == "zero-shot-classification": + | labels = [l.strip() for l in self.CANDIDATE_LABELS.split(",") if l.strip()] + | payload = { + | "inputs": prompt_value, + | "parameters": {"candidate_labels": labels}, + | } + | elif task in ("sentence-similarity", "text-ranking"): + | sent_val = row[self.SENTENCES_COLUMN] + | sent_val = "" if pd.isna(sent_val) else str(sent_val) + | sentences_list = [s.strip() for s in sent_val.split(",") if s.strip()] + | payload = { + | "inputs": { + | "source_sentence": prompt_value, + | "sentences": sentences_list, + | } + | } + | else: + | payload = {"inputs": prompt_value}""".stripMargin + + override def parsePython(ctx: CodegenContext): String = + """ if task == "question-answering": + | return body.get("answer", json.dumps(body)) if isinstance(body, dict) else json.dumps(body) + | elif task == "table-question-answering": + | return body.get("answer", json.dumps(body)) if isinstance(body, dict) else json.dumps(body) + | elif task in ("zero-shot-classification", "sentence-similarity", "text-ranking"): + | return json.dumps(body)""".stripMargin +} diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/TaskCodegen.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/TaskCodegen.scala index 299ea5d6e3f..8abcef721b5 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/TaskCodegen.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/codegen/TaskCodegen.scala @@ -39,7 +39,12 @@ final case class CodegenContext( safeMaxTokens: Int, safeTemp: Double, imageInput: EncodableString = "", - inputImageColumn: EncodableString = "" + inputImageColumn: EncodableString = "", + audioInput: EncodableString = "", + inputAudioColumn: EncodableString = "", + contextColumn: EncodableString = "", + candidateLabels: EncodableString = "", + sentencesColumn: EncodableString = "" ) /** diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDescSpec.scala index 0d6e09302fb..b5c27ee521b 100644 --- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDescSpec.scala +++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceInferenceOpDescSpec.scala @@ -21,7 +21,13 @@ package org.apache.texera.amber.operator.huggingFace import org.apache.texera.amber.core.tuple.{AttributeType, Schema} import org.apache.texera.amber.core.workflow.PortIdentity -import org.apache.texera.amber.operator.huggingFace.codegen.{CodegenContext, TextGenCodegen} +import org.apache.texera.amber.operator.huggingFace.codegen.{ + AudioTaskCodegen, + CodegenContext, + MediaGenCodegen, + QaRankingCodegen, + TextGenCodegen +} import org.apache.texera.amber.operator.metadata.OperatorGroupConstants import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString import org.scalatest.flatspec.AnyFlatSpec @@ -39,7 +45,12 @@ class HuggingFaceInferenceOpDescSpec extends AnyFlatSpec with Matchers { temperature: Double = 0.7, resultColumn: EncodableString = "hf_response", imageInput: EncodableString = "", - inputImageColumn: EncodableString = "" + inputImageColumn: EncodableString = "", + audioInput: EncodableString = "", + inputAudioColumn: EncodableString = "", + contextColumn: EncodableString = "", + candidateLabels: EncodableString = "", + sentencesColumn: EncodableString = "" ): HuggingFaceInferenceOpDesc = { val desc = new HuggingFaceInferenceOpDesc() desc.hfApiToken = token @@ -52,6 +63,11 @@ class HuggingFaceInferenceOpDescSpec extends AnyFlatSpec with Matchers { desc.resultColumn = resultColumn desc.imageInput = imageInput desc.inputImageColumn = inputImageColumn + desc.audioInput = audioInput + desc.inputAudioColumn = inputAudioColumn + desc.contextColumn = contextColumn + desc.candidateLabels = candidateLabels + desc.sentencesColumn = sentencesColumn desc } @@ -152,6 +168,11 @@ class HuggingFaceInferenceOpDescSpec extends AnyFlatSpec with Matchers { desc.temperature = null desc.imageInput = null desc.inputImageColumn = null + desc.audioInput = null + desc.inputAudioColumn = null + desc.contextColumn = null + desc.candidateLabels = null + desc.sentencesColumn = null val code = desc.generatePythonCode() code should include("class ProcessTableOperator(UDFTableOperator):") code should include("def open(self):") @@ -272,10 +293,15 @@ class HuggingFaceInferenceOpDescSpec extends AnyFlatSpec with Matchers { // size cap code should include("MAX_REMOTE_FETCH_BYTES") code should include("Remote file exceeds the") - // all three fetch sites route through the helper (no raw requests.get on these URLs) + // all remote fetch sites route through the helper (no raw requests.get on these URLs) code should include("_, data = self._fetch_remote_url(image_input)") + code should include("_, data = self._fetch_remote_url(audio_input)") code should include("_, data = self._fetch_remote_url(val)") code should include("raw_content_type, data = self._fetch_remote_url(url)") + code should not include "def _audio_url_to_data_url" + code should not include "requests.get(audio_input" + code should not include "os.path.exists(audio_input)" + code should not include "open(audio_input" } it should "treat pandas NA sentinels (NaN, pd.NA, NaT) as missing in _read_binary_value" in { @@ -402,6 +428,143 @@ class HuggingFaceInferenceOpDescSpec extends AnyFlatSpec with Matchers { } } + "audio task family" should + "route ASR and audio-classification through AudioTaskCodegen as raw binary payloads" in { + val code = + makeDesc(task = "automatic-speech-recognition", inputAudioColumn = "audio") + .generatePythonCode() + code should include("self.AUDIO_INPUT = ") + code should include("self.INPUT_AUDIO_COLUMN = ") + code should include( + """audio_only_tasks = ("automatic-speech-recognition", "audio-classification")""" + ) + code should include("payload = current_audio_bytes") + code should include("raw_binary_headers = audio_headers") + code should include("self._read_audio_input()") + code should include( + """"Content-Type": "application/octet-stream" if use_audio_column else self._get_audio_content_type()""" + ) + code should include( + """audio_content_type = raw_binary_headers.get("Content-Type", "audio/mpeg")""" + ) + code should include( + """elif task in ("automatic-speech-recognition", "audio-classification") and img_b64:""" + ) + code should not include "data:audio/wav;base64" + code should include( + """if content_type.startswith("audio/") or content_type.startswith("video/"):""" + ) + } + + it should "route text-to-speech through AudioTaskCodegen and normalize audio URLs" in { + val code = makeDesc(task = "text-to-speech").generatePythonCode() + code should include("""elif task == "text-to-speech":""") + code should include("""payload = {"inputs": prompt_value}""") + code should include("self._url_to_data_url(") + code should include(""""text-to-speech": "audio/mpeg"""") + code should not include "_audio_url_to_data_url" + code should include("data:audio/mpeg;base64") + } + + it should "register all audio task strings under the dispatcher" in { + AudioTaskCodegen.tasks should contain allOf ( + "automatic-speech-recognition", + "audio-classification", + "text-to-speech" + ) + AudioTaskCodegen.tasks.foreach { t => + val code = makeDesc(task = t, inputAudioColumn = "audio").generatePythonCode() + code should include("if task in audio_only_tasks:") + } + } + + "media generation task family" should + "route text-to-image through MediaGenCodegen and parse URL or b64 responses as data URLs" in { + val code = makeDesc(task = "text-to-image").generatePythonCode() + code should include("if task not in image_tasks and task not in audio_only_tasks:") + code should include("""payload = {"inputs": prompt_value}""") + code should include("""if task == "text-to-image":""") + code should include("self._url_to_data_url(") + code should include("data:image/png;base64") + } + + it should "route text-to-video through MediaGenCodegen and normalize remote video URLs" in { + val code = makeDesc(task = "text-to-video").generatePythonCode() + code should include("""elif task == "text-to-video":""") + code should include("self._url_to_data_url(") + code should include("video/mp4") + } + + it should "register all media generation task strings under the dispatcher" in { + MediaGenCodegen.tasks should contain allOf ("text-to-image", "text-to-video") + MediaGenCodegen.tasks.foreach { t => + val code = makeDesc(task = t).generatePythonCode() + code should include("""payload = {"inputs": prompt_value}""") + } + } + + "qa and ranking task family" should + "route question-answering through QaRankingCodegen with context-column validation" in { + val code = makeDesc(task = "question-answering", contextColumn = "context").generatePythonCode() + code should include("self.CONTEXT_COLUMN = ") + code should include("""if task == "question-answering":""") + code should include("ctx_col = self.CONTEXT_COLUMN") + code should include("Context column") + code should include("""payload = {"inputs": {"question": prompt_value, "context": ctx_val}}""") + code should include( + """return body.get("answer", json.dumps(body)) if isinstance(body, dict) else json.dumps(body)""" + ) + } + + it should "route table-question-answering with a precomputed table payload" in { + val code = makeDesc(task = "table-question-answering").generatePythonCode() + code should include("""if task == "table-question-answering":""") + code should include("table_dict = {}") + code should include("""payload = {"inputs": {"query": prompt_value, "table": table_dict}}""") + code should include( + """return body.get("answer", json.dumps(body)) if isinstance(body, dict) else json.dumps(body)""" + ) + } + + it should "route zero-shot-classification with candidate labels" in { + val code = + makeDesc(task = "zero-shot-classification", candidateLabels = "positive,negative") + .generatePythonCode() + code should include("self.CANDIDATE_LABELS = ") + code should include("""if task == "zero-shot-classification":""") + code should include("Candidate Labels are required for zero-shot-classification.") + code should include("""elif task == "zero-shot-classification":""") + code should include("labels = [l.strip() for l in self.CANDIDATE_LABELS.split") + code should include(""""parameters": {"candidate_labels": labels}""") + } + + it should "route sentence-similarity and text-ranking with sentences-column validation" in { + Seq("sentence-similarity", "text-ranking").foreach { taskName => + val code = makeDesc(task = taskName, sentencesColumn = "sentences").generatePythonCode() + code should include("self.SENTENCES_COLUMN = ") + code should include("""elif task in ("sentence-similarity", "text-ranking"):""") + code should include("sent_col = self.SENTENCES_COLUMN") + code should include("Sentences column") + code should include(""""source_sentence": prompt_value""") + code should include(""""sentences": sentences_list""") + } + } + + it should "register all qa and ranking task strings under the dispatcher" in { + QaRankingCodegen.tasks should contain allOf ( + "question-answering", + "table-question-answering", + "zero-shot-classification", + "sentence-similarity", + "text-ranking" + ) + QaRankingCodegen.tasks.foreach { t => + val code = makeDesc(task = t, contextColumn = "context", sentencesColumn = "sentences") + .generatePythonCode() + code should include("""if task == "question-answering":""") + } + } + "getOutputSchemas" should "add the result column as a STRING to the inherited schema" in { val desc = makeDesc(resultColumn = "answer") val inputSchema = Schema().add("prompt", AttributeType.STRING) diff --git a/frontend/src/app/app.module.ts b/frontend/src/app/app.module.ts index 35e82f81b75..c2820725310 100644 --- a/frontend/src/app/app.module.ts +++ b/frontend/src/app/app.module.ts @@ -106,6 +106,8 @@ import { AgentPanelComponent } from "./workspace/component/agent/agent-panel/age import { AgentChatComponent } from "./workspace/component/agent/agent-panel/agent-chat/agent-chat.component"; import { AgentRegistrationComponent } from "./workspace/component/agent/agent-panel/agent-registration/agent-registration.component"; import { HuggingFaceImageUploadComponent } from "./workspace/component/hugging-face-image-upload/hugging-face-image-upload.component"; +import { HuggingFaceComponent } from "./workspace/component/hugging-face/hugging-face.component"; +import { HuggingFaceAudioUploadComponent } from "./workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component"; import { DatasetFileSelectorComponent } from "./workspace/component/dataset-file-selector/dataset-file-selector.component"; import { DatasetVersionSelectorComponent } from "./workspace/component/dataset-version-selector/dataset-version-selector.component"; import { DatasetSelectionModalComponent } from "./workspace/component/dataset-selection-modal/dataset-selection-modal.component"; @@ -332,6 +334,8 @@ registerLocaleData(en); AgentChatComponent, AgentRegistrationComponent, AgentInteractionComponent, + HuggingFaceComponent, + HuggingFaceAudioUploadComponent, HuggingFaceImageUploadComponent, DatasetFileSelectorComponent, DatasetVersionSelectorComponent, diff --git a/frontend/src/app/common/formly/formly-config.ts b/frontend/src/app/common/formly/formly-config.ts index ba80dc51f96..c4fc54fd77f 100644 --- a/frontend/src/app/common/formly/formly-config.ts +++ b/frontend/src/app/common/formly/formly-config.ts @@ -30,6 +30,8 @@ import { FormlyRepeatDndComponent } from "./repeat-dnd/repeat-dnd.component"; import { UiUdfParametersComponent } from "../../workspace/component/ui-udf-parameters/ui-udf-parameters.component"; import { DatasetVersionSelectorComponent } from "../../workspace/component/dataset-version-selector/dataset-version-selector.component"; import { HuggingFaceImageUploadComponent } from "../../workspace/component/hugging-face-image-upload/hugging-face-image-upload.component"; +import { HuggingFaceComponent } from "../../workspace/component/hugging-face/hugging-face.component"; +import { HuggingFaceAudioUploadComponent } from "../../workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component"; /** * Configuration for using Json Schema with Formly. @@ -81,6 +83,8 @@ export const TEXERA_FORMLY_CONFIG = { { name: "codearea", component: CodeareaCustomTemplateComponent }, { name: "inputautocomplete", component: DatasetFileSelectorComponent, wrappers: ["form-field"] }, { name: "datasetversionselector", component: DatasetVersionSelectorComponent, wrappers: ["form-field"] }, + { name: "huggingface", component: HuggingFaceComponent, wrappers: ["form-field"] }, + { name: "huggingface-audio-upload", component: HuggingFaceAudioUploadComponent, wrappers: ["form-field"] }, { name: "huggingface-image-upload", component: HuggingFaceImageUploadComponent, wrappers: ["form-field"] }, { name: "repeat-section-dnd", component: FormlyRepeatDndComponent }, { name: "ui-udf-parameters", component: UiUdfParametersComponent, wrappers: ["form-field"] }, diff --git a/frontend/src/app/common/util/media-type.util.spec.ts b/frontend/src/app/common/util/media-type.util.spec.ts new file mode 100644 index 00000000000..ed81fec9f45 --- /dev/null +++ b/frontend/src/app/common/util/media-type.util.spec.ts @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { isAudioUrl, isImageUrl, isVideoUrl } from "./media-type.util"; + +describe("isImageUrl", () => { + it("should return true for data:image/ data URLs", () => { + expect(isImageUrl("data:image/png;base64,abc123")).toBe(true); + expect(isImageUrl("data:image/jpeg;base64,abc123")).toBe(true); + expect(isImageUrl("data:image/webp;base64,abc123")).toBe(true); + }); + + it("should return true for common image file extensions", () => { + expect(isImageUrl("https://example.com/photo.png")).toBe(true); + expect(isImageUrl("https://example.com/photo.jpg")).toBe(true); + expect(isImageUrl("https://example.com/photo.jpeg")).toBe(true); + expect(isImageUrl("https://example.com/photo.gif")).toBe(true); + expect(isImageUrl("https://example.com/photo.webp")).toBe(true); + }); + + it("should be case-insensitive for extensions", () => { + expect(isImageUrl("https://example.com/photo.PNG")).toBe(true); + expect(isImageUrl("https://example.com/photo.JPG")).toBe(true); + }); + + it("should return true for URLs with query strings", () => { + expect(isImageUrl("https://example.com/photo.png?v=1")).toBe(true); + }); + + it("should return false for audio and video URLs", () => { + expect(isImageUrl("data:audio/mp3;base64,abc")).toBe(false); + expect(isImageUrl("data:video/mp4;base64,abc")).toBe(false); + expect(isImageUrl("https://example.com/clip.mp4")).toBe(false); + }); + + it("should return false for plain text strings", () => { + expect(isImageUrl("hello world")).toBe(false); + expect(isImageUrl("")).toBe(false); + }); +}); + +describe("isAudioUrl", () => { + it("should return true for data:audio/ data URLs", () => { + expect(isAudioUrl("data:audio/mp3;base64,abc123")).toBe(true); + expect(isAudioUrl("data:audio/wav;base64,abc123")).toBe(true); + }); + + it("should return true for common audio file extensions", () => { + expect(isAudioUrl("https://example.com/clip.mp3")).toBe(true); + expect(isAudioUrl("https://example.com/clip.wav")).toBe(true); + expect(isAudioUrl("https://example.com/clip.ogg")).toBe(true); + expect(isAudioUrl("https://example.com/clip.m4a")).toBe(true); + expect(isAudioUrl("https://example.com/clip.flac")).toBe(true); + }); + + it("should be case-insensitive for extensions", () => { + expect(isAudioUrl("https://example.com/clip.MP3")).toBe(true); + expect(isAudioUrl("https://example.com/clip.WAV")).toBe(true); + }); + + it("should return true for URLs with query strings", () => { + expect(isAudioUrl("https://example.com/clip.mp3?token=xyz")).toBe(true); + }); + + it("should return false for image and video URLs", () => { + expect(isAudioUrl("data:image/png;base64,abc")).toBe(false); + expect(isAudioUrl("data:video/mp4;base64,abc")).toBe(false); + expect(isAudioUrl("https://example.com/photo.png")).toBe(false); + }); + + it("should return false for plain text strings", () => { + expect(isAudioUrl("hello world")).toBe(false); + expect(isAudioUrl("")).toBe(false); + }); +}); + +describe("isVideoUrl", () => { + it("should return true for data:video/ data URLs", () => { + expect(isVideoUrl("data:video/mp4;base64,abc123")).toBe(true); + expect(isVideoUrl("data:video/webm;base64,abc123")).toBe(true); + }); + + it("should return true for common video file extensions", () => { + expect(isVideoUrl("https://example.com/clip.mp4")).toBe(true); + expect(isVideoUrl("https://example.com/clip.webm")).toBe(true); + expect(isVideoUrl("https://example.com/clip.ogv")).toBe(true); + }); + + it("should return true for fal.media CDN URLs", () => { + expect(isVideoUrl("https://v3b.fal.media/files/abc123/output.mp4")).toBe(true); + }); + + it("should be case-insensitive for extensions", () => { + expect(isVideoUrl("https://example.com/clip.MP4")).toBe(true); + expect(isVideoUrl("https://example.com/clip.WEBM")).toBe(true); + }); + + it("should return true for URLs with query strings", () => { + expect(isVideoUrl("https://example.com/clip.mp4?t=5")).toBe(true); + }); + + it("should return false for image and audio URLs", () => { + expect(isVideoUrl("data:image/png;base64,abc")).toBe(false); + expect(isVideoUrl("data:audio/mp3;base64,abc")).toBe(false); + expect(isVideoUrl("https://example.com/photo.jpg")).toBe(false); + }); + + it("should return false for plain text strings", () => { + expect(isVideoUrl("hello world")).toBe(false); + expect(isVideoUrl("")).toBe(false); + }); +}); diff --git a/frontend/src/app/common/util/media-type.util.ts b/frontend/src/app/common/util/media-type.util.ts new file mode 100644 index 00000000000..d60446573a8 --- /dev/null +++ b/frontend/src/app/common/util/media-type.util.ts @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +export function isVideoUrl(value: string): boolean { + if (typeof value !== "string") return false; + return ( + value.match(/\.(mp4|webm|ogv)(\?.*)?$/i) !== null || + value.startsWith("data:video/") || + value.startsWith("https://v3b.fal.media/files/") + ); +} + +export function isAudioUrl(value: string): boolean { + if (typeof value !== "string") return false; + return value.match(/\.(mp3|wav|ogg|m4a|flac)(\?.*)?$/i) !== null || value.startsWith("data:audio/"); +} + +export function isImageUrl(value: string): boolean { + if (typeof value !== "string") return false; + return value.match(/\.(png|jpg|jpeg|gif|webp)(\?.*)?$/i) !== null || value.startsWith("data:image/"); +} diff --git a/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.html b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.html new file mode 100644 index 00000000000..507528e8d4b --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.html @@ -0,0 +1,63 @@ + + +
+
+ Audio files are uploaded to temporary backend storage and referenced from the operator, so larger clips can be used + without bloating the workflow JSON. +
+ + + +
+ +
+ {{ fileName || "Selected audio" }} + Uploading... + +
+
+ +
+ {{ errorMessage }} +
+
diff --git a/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.scss b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.scss new file mode 100644 index 00000000000..0757524e04f --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.scss @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +.hf-audio-upload { + display: flex; + flex-direction: column; + gap: 8px; +} + +.hf-audio-guidance { + color: #595959; + font-size: 12px; + line-height: 1.4; +} + +.hf-audio-upload-input { + width: 100%; +} + +.hf-audio-preview { + border: 1px solid #d9d9d9; + border-radius: 4px; + padding: 8px; +} + +.hf-audio-preview audio { + display: block; + width: 100%; +} + +.hf-audio-meta { + display: flex; + align-items: center; + justify-content: space-between; + gap: 8px; + margin-top: 8px; +} + +.hf-audio-meta span { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.hf-audio-status { + color: #595959; + font-size: 12px; +} + +.hf-audio-error { + color: #cf1322; +} diff --git a/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.spec.ts b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.spec.ts new file mode 100644 index 00000000000..a73a9a0cd27 --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.spec.ts @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TestBed } from "@angular/core/testing"; +import { HttpClientTestingModule, HttpTestingController } from "@angular/common/http/testing"; +import { FormControl } from "@angular/forms"; +import { FieldTypeConfig } from "@ngx-formly/core"; +import { HuggingFaceAudioUploadComponent } from "./hugging-face-audio-upload.component"; + +describe("HuggingFaceAudioUploadComponent", () => { + let component: HuggingFaceAudioUploadComponent; + let httpTestingController: HttpTestingController; + let formControl: FormControl; + + function makeFileEvent(file: File | null): Event { + const input = document.createElement("input"); + if (file) { + Object.defineProperty(input, "files", { value: [file] }); + } + return { target: input } as unknown as Event; + } + + beforeEach(async () => { + await TestBed.configureTestingModule({ + imports: [HuggingFaceAudioUploadComponent, HttpClientTestingModule], + }).compileComponents(); + + const fixture = TestBed.createComponent(HuggingFaceAudioUploadComponent); + component = fixture.componentInstance; + formControl = new FormControl(""); + component.field = { formControl } as FieldTypeConfig; + httpTestingController = TestBed.inject(HttpTestingController); + }); + + it("should have the correct selector", () => { + const metadata = Reflect.getOwnPropertyDescriptor(HuggingFaceAudioUploadComponent, "__annotations"); + // Component decorator metadata is available via the Angular compiler; + // at minimum verify the class is importable and constructable metadata exists. + expect(HuggingFaceAudioUploadComponent.prototype).toBeDefined(); + }); +}); diff --git a/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.ts b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.ts new file mode 100644 index 00000000000..0cbeea53ff1 --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face-audio-upload/hugging-face-audio-upload.component.ts @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { Component, OnDestroy, OnInit } from "@angular/core"; +import { CommonModule } from "@angular/common"; +import { FieldType, FieldTypeConfig } from "@ngx-formly/core"; +import { HttpClient } from "@angular/common/http"; +import { NzButtonModule } from "ng-zorro-antd/button"; +import { firstValueFrom } from "rxjs"; +import { AppSettings } from "../../../common/app-setting"; + +interface HuggingFaceAudioUploadResponse { + path: string; + fileName: string; +} + +@Component({ + selector: "texera-hugging-face-audio-upload", + templateUrl: "./hugging-face-audio-upload.component.html", + styleUrls: ["./hugging-face-audio-upload.component.scss"], + imports: [CommonModule, NzButtonModule], +}) +export class HuggingFaceAudioUploadComponent extends FieldType implements OnInit, OnDestroy { + fileName = ""; + errorMessage = ""; + isUploading = false; + private localPreviewUrl = ""; + + ngOnInit(): void { + if (typeof this.formControl.value === "string" && this.formControl.value.trim().length > 0) { + this.fileName = this.getDisplayName(this.formControl.value); + } + } + + constructor(private http: HttpClient) { + super(); + } + + get previewSrc(): string { + if (this.localPreviewUrl) { + return this.localPreviewUrl; + } + const value = this.formControl.value; + if (typeof value !== "string" || value.trim().length === 0) { + return ""; + } + if (value.startsWith("data:audio/")) { + return value; + } + return `${AppSettings.getApiEndpoint()}/huggingface/audio-preview?path=${encodeURIComponent(value)}`; + } + + ngOnDestroy(): void { + this.revokePreviewUrl(); + } + + async onFileSelected(event: Event): Promise { + if (this.isUploading) { + return; + } + this.errorMessage = ""; + const input = event.target as HTMLInputElement; + const file = input.files?.[0]; + + if (!file) { + return; + } + if (!file.type.startsWith("audio/")) { + this.errorMessage = "Choose an audio file."; + input.value = ""; + return; + } + this.revokePreviewUrl(); + const previewUrl = URL.createObjectURL(file); + this.localPreviewUrl = previewUrl; + this.isUploading = true; + + try { + const response = await firstValueFrom( + this.http.post( + `${AppSettings.getApiEndpoint()}/huggingface/upload-audio?filename=${encodeURIComponent(file.name)}`, + file, + { + headers: { + "Content-Type": "application/octet-stream", + }, + } + ) + ); + // If the user clicked Clear while the upload was in flight, + // localPreviewUrl will have been revoked/reset — discard the stale response. + if (this.localPreviewUrl !== previewUrl) return; + this.fileName = response.fileName || file.name; + this.formControl.setValue(response.path); + if (typeof this.key === "string" && this.model) { + this.model[this.key] = response.path; + } + this.formControl.markAsDirty(); + this.formControl.markAsTouched(); + this.formControl.updateValueAndValidity(); + } catch { + if (this.localPreviewUrl !== previewUrl) return; + this.clearAudio(input, false); + this.errorMessage = "Could not upload this audio file."; + } finally { + this.isUploading = false; + } + } + + clearAudio(input: HTMLInputElement, clearError: boolean = true): void { + this.fileName = ""; + if (clearError) { + this.errorMessage = ""; + } + this.isUploading = false; + this.revokePreviewUrl(); + input.value = ""; + this.formControl.setValue(""); + if (typeof this.key === "string" && this.model) { + this.model[this.key] = ""; + } + this.formControl.markAsDirty(); + this.formControl.markAsTouched(); + this.formControl.updateValueAndValidity(); + } + + private revokePreviewUrl(): void { + if (this.localPreviewUrl) { + URL.revokeObjectURL(this.localPreviewUrl); + this.localPreviewUrl = ""; + } + } + + private getDisplayName(value: string): string { + const trimmedValue = value.trim(); + if (!trimmedValue) { + return ""; + } + if (trimmedValue.startsWith("data:audio/")) { + return "Selected audio"; + } + const segments = trimmedValue.split(/[\\/]/); + return segments[segments.length - 1] || "Selected audio"; + } +} diff --git a/frontend/src/app/workspace/component/hugging-face/hugging-face.component.html b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.html new file mode 100644 index 00000000000..777111cc3ea --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.html @@ -0,0 +1,203 @@ + + +
+ + + + + + + + +
+ {{ tasksError }} + +
+ + + + + + + + + + + + + +
+ + Loading models... +
+ + +
+ {{ errorMessage }} + +
+ + +
+ Results may be incomplete. Use the search bar to find models not shown here. +
+ + +
+ +
+ Selected: + {{ formControl.value }} + +
+ + +
+ {{ isSearching ? 'No models found for "' + searchText + '".' : 'No models available.' }} +
+ + +
+ {{ model.id }} + + + + {{ model.downloads | number }} + + + + {{ model.likes | number }} + + +
+
+ + +
+ + Page {{ currentPage + 1 }} of {{ totalPages }} + +
+
+ + diff --git a/frontend/src/app/workspace/component/hugging-face/hugging-face.component.scss b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.scss new file mode 100644 index 00000000000..f16ddc91536 --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.scss @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +.hf-model-select-container { + width: 100%; +} + +.hf-section-label { + display: block; + font-size: 14px; + font-weight: normal; + color: rgba(0, 0, 0, 0.85); + line-height: 32px; + margin-top: 8px; + + .hf-required { + display: inline-block; + color: #ff4d4f; + font-size: 14px; + font-family: SimSun, sans-serif; + line-height: 1; + margin-right: 4px; + } +} + +.hf-loading { + display: flex; + align-items: center; + gap: 8px; + padding: 4px 0; + + .loading-text { + font-size: 12px; + color: #999; + } +} + +.hf-error { + display: flex; + align-items: center; + gap: 4px; + padding: 4px 0; + + .error-text { + font-size: 12px; + color: #ff4d4f; + } +} + +.hf-truncation-notice { + font-size: 12px; + color: #faad14; + padding: 4px 0; + margin-bottom: 4px; +} + +.hf-model-list { + border: 1px solid #d9d9d9; + border-radius: 4px; + max-height: 360px; + overflow-y: auto; +} + +.hf-selected-model { + display: flex; + align-items: center; + padding: 6px 10px; + background: #e6f7ff; + border-bottom: 1px solid #d9d9d9; + font-size: 12px; + + .hf-selected-label { + font-weight: 500; + margin-right: 6px; + color: rgba(0, 0, 0, 0.65); + } + + .hf-selected-value { + color: #1890ff; + font-weight: 500; + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } +} + +.hf-empty { + padding: 16px; + text-align: center; + color: #999; + font-size: 12px; +} + +.hf-model-item { + display: flex; + align-items: center; + justify-content: space-between; + padding: 6px 10px; + cursor: pointer; + border-bottom: 1px solid #f0f0f0; + transition: background 0.15s; + + &:last-child { + border-bottom: none; + } + + &:hover { + background: #fafafa; + } + + &.hf-model-item-selected { + background: #e6f7ff; + } + + .hf-model-id { + font-size: 12px; + color: rgba(0, 0, 0, 0.85); + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + margin-right: 8px; + } + + .hf-model-meta { + font-size: 11px; + color: #999; + white-space: nowrap; + flex-shrink: 0; + } +} + +.hf-pagination { + display: flex; + align-items: center; + justify-content: center; + gap: 12px; + padding: 8px 0; + margin-top: 4px; + + .hf-page-info { + font-size: 12px; + color: rgba(0, 0, 0, 0.65); + } +} diff --git a/frontend/src/app/workspace/component/hugging-face/hugging-face.component.spec.ts b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.spec.ts new file mode 100644 index 00000000000..a7e76a4af68 --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.spec.ts @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { STATIC_TASK_OPTIONS, invalidateHuggingFaceModelCache } from "./hugging-face.component"; + +describe("HuggingFaceComponent (unit)", () => { + beforeEach(() => { + invalidateHuggingFaceModelCache(); + }); + + it("should export a non-empty static task list", () => { + expect(STATIC_TASK_OPTIONS.length).toBeGreaterThan(0); + }); + + it("should include text-generation in static task options", () => { + const textGen = STATIC_TASK_OPTIONS.find(t => t.tag === "text-generation"); + expect(textGen).toBeTruthy(); + expect(textGen!.label).toBe("Text Generation"); + }); + + it("should include image tasks in static task options", () => { + const imageTasks = STATIC_TASK_OPTIONS.filter(t => + ["image-classification", "object-detection", "image-segmentation", "image-to-text"].includes(t.tag) + ); + expect(imageTasks.length).toBe(4); + }); + + it("should include audio tasks in static task options", () => { + const audioTasks = STATIC_TASK_OPTIONS.filter(t => + ["automatic-speech-recognition", "audio-classification", "text-to-speech"].includes(t.tag) + ); + expect(audioTasks.length).toBe(3); + }); + + it("should include QA/ranking tasks in static task options", () => { + const qaTasks = STATIC_TASK_OPTIONS.filter(t => + ["question-answering", "zero-shot-classification", "sentence-similarity", "text-ranking"].includes(t.tag) + ); + expect(qaTasks.length).toBe(4); + }); + + it("should clear caches on invalidateHuggingFaceModelCache", () => { + // Just verify it doesn't throw — the function clears module-level Maps + expect(() => invalidateHuggingFaceModelCache()).not.toThrow(); + }); + + it("should have unique tags in static task options", () => { + const tags = STATIC_TASK_OPTIONS.map(t => t.tag); + const uniqueTags = new Set(tags); + expect(uniqueTags.size).toBe(tags.length); + }); +}); diff --git a/frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts new file mode 100644 index 00000000000..f634c66a2a1 --- /dev/null +++ b/frontend/src/app/workspace/component/hugging-face/hugging-face.component.ts @@ -0,0 +1,659 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { Component, OnInit, OnDestroy, ChangeDetectorRef } from "@angular/core"; +import { CommonModule } from "@angular/common"; +import { FormsModule } from "@angular/forms"; +import { FieldType, FieldTypeConfig, FormlyModule } from "@ngx-formly/core"; +import { HttpClient } from "@angular/common/http"; +import { NzSelectModule } from "ng-zorro-antd/select"; +import { NzInputModule } from "ng-zorro-antd/input"; +import { NzSpinModule } from "ng-zorro-antd/spin"; +import { NzButtonModule } from "ng-zorro-antd/button"; +import { NzIconModule } from "ng-zorro-antd/icon"; +import { AppSettings } from "../../../common/app-setting"; +import { of, Subject, Subscription } from "rxjs"; +import { catchError, debounceTime, finalize, switchMap, takeUntil } from "rxjs/operators"; + +export interface HuggingFaceModelOption { + id: string; + label: string; + pipeline_tag?: string; + downloads?: number; + likes?: number; +} + +export interface HuggingFaceTaskOption { + tag: string; + label: string; +} + +// ── Static fallback task list (used when the dynamic fetch fails) ── +export const STATIC_TASK_OPTIONS: HuggingFaceTaskOption[] = [ + { tag: "text-generation", label: "Text Generation" }, + { tag: "automatic-speech-recognition", label: "Automatic Speech Recognition" }, + { tag: "audio-classification", label: "Audio Classification" }, + { tag: "text-classification", label: "Text Classification" }, + { tag: "text-to-speech", label: "Text to Speech" }, + { tag: "token-classification", label: "Token Classification" }, + { tag: "question-answering", label: "Question Answering" }, + { tag: "table-question-answering", label: "Table Question Answering" }, + { tag: "zero-shot-classification", label: "Zero-Shot Classification" }, + { tag: "translation", label: "Translation" }, + { tag: "summarization", label: "Summarization" }, + { tag: "feature-extraction", label: "Feature Extraction" }, + { tag: "fill-mask", label: "Fill-Mask" }, + { tag: "sentence-similarity", label: "Sentence Similarity" }, + { tag: "text-ranking", label: "Text Ranking" }, + { tag: "image-classification", label: "Image Classification" }, + { tag: "object-detection", label: "Object Detection" }, + { tag: "image-segmentation", label: "Image Segmentation" }, + { tag: "image-to-text", label: "Image to Text" }, + { tag: "visual-question-answering", label: "Visual Question Answering" }, + { tag: "document-question-answering", label: "Document Question Answering" }, + { tag: "zero-shot-image-classification", label: "Zero-Shot Image Classification" }, +]; + +const PAGE_SIZE = 50; + +const TRUNCATED_HEADER = "X-Texera-Truncated"; + +// ── Module-level caches (reused across component instances) ── +const allModelsByTag: Map = new Map(); +const truncatedByTag: Set = new Set(); +const inFlightByTag: Map = new Map(); +const errorByTag: Map = new Map(); + +let cachedTaskOptions: HuggingFaceTaskOption[] | null = null; +let tasksFetchSubscription: Subscription | null = null; +let tasksFetchError: string | null = null; + +/** Clear all cached data (useful for tests or manual invalidation). */ +export function invalidateHuggingFaceModelCache(): void { + allModelsByTag.clear(); + truncatedByTag.clear(); + errorByTag.clear(); + inFlightByTag.forEach(sub => sub.unsubscribe()); + inFlightByTag.clear(); + cachedTaskOptions = null; + tasksFetchError = null; + tasksFetchSubscription?.unsubscribe(); + tasksFetchSubscription = null; +} + +@Component({ + selector: "texera-hugging-face-model-select", + templateUrl: "./hugging-face.component.html", + styleUrls: ["hugging-face.component.scss"], + imports: [ + CommonModule, + FormsModule, + NzSelectModule, + NzInputModule, + NzSpinModule, + NzButtonModule, + NzIconModule, + FormlyModule, + ], +}) +export class HuggingFaceComponent extends FieldType implements OnInit, OnDestroy { + private readonly taskScopedKeys = [ + "modelId", + "promptColumn", + "imageInput", + "audioInput", + "inputImageColumn", + "inputAudioColumn", + "candidateLabels", + "sentencesColumn", + "contextColumn", + "systemPrompt", + "maxNewTokens", + "temperature", + ] as const; + private readonly taskStateByTag = new Map>>(); + // ── Task state ── + taskOptions: HuggingFaceTaskOption[] = cachedTaskOptions ?? STATIC_TASK_OPTIONS; + selectedTaskTag = "text-generation"; + tasksLoading = false; + tasksError: string | null = null; + + // ── All models for the current task (fetched once from backend, cached) ── + private allModels: HuggingFaceModelOption[] = []; + + // ── Displayed state ── + pagedModels: HuggingFaceModelOption[] = []; + currentPage = 0; + totalPages = 0; + + loading = false; + errorMessage: string | null = null; + + // ── Truncation notice ── + truncated = false; + + // ── Search state ── + searchText = ""; + searchLoading = false; + private filteredModels: HuggingFaceModelOption[] | null = null; + private readonly searchSubject$ = new Subject(); + private searchSubscription: Subscription | null = null; + + private readonly destroy$ = new Subject(); + private subscription: Subscription | null = null; + private taskPollInterval: ReturnType | null = null; + private modelPollInterval: ReturnType | null = null; + private initTimeout: ReturnType | null = null; + + constructor( + private http: HttpClient, + private cdr: ChangeDetectorRef + ) { + super(); + } + + ngOnInit(): void { + const savedTag = this.getCurrentTaskTag(); + this.selectedTaskTag = savedTag ?? this.selectedTaskTag; + this.syncTaskSelection(this.selectedTaskTag, false); + this.loadTasks(); + this.loadAllModels(); + this.setupServerSearch(); + // Formly can attach sibling controls after this field initializes. + // Re-sync once the control tree settles so a fresh operator starts in a valid task state. + this.initTimeout = setTimeout( + () => this.syncTaskSelection(this.getCurrentTaskTag() ?? this.selectedTaskTag, false), + 0 + ); + } + + ngOnDestroy(): void { + this.destroy$.next(); + this.destroy$.complete(); + this.subscription?.unsubscribe(); + this.searchSubscription?.unsubscribe(); + this.searchSubject$.complete(); + if (this.taskPollInterval !== null) { + clearInterval(this.taskPollInterval); + } + if (this.modelPollInterval !== null) { + clearInterval(this.modelPollInterval); + } + if (this.initTimeout !== null) { + clearTimeout(this.initTimeout); + } + } + + // ── Task loading ── + + /** + * Fetch available pipeline tags from the backend, which proxies HuggingFace's /api/tasks. + * Falls back to STATIC_TASK_OPTIONS if the fetch fails. + */ + private loadTasks(): void { + // Already fetched and cached + if (cachedTaskOptions !== null) { + this.taskOptions = cachedTaskOptions; + return; + } + + // Previous fetch errored — show static list, don't retry automatically + if (tasksFetchError !== null) { + this.tasksError = tasksFetchError; + this.taskOptions = STATIC_TASK_OPTIONS; + return; + } + + // Another component instance already has a fetch in flight — wait for it + if (tasksFetchSubscription !== null) { + this.tasksLoading = true; + if (this.taskPollInterval !== null) clearInterval(this.taskPollInterval); + const poll = setInterval(() => { + if (cachedTaskOptions !== null || tasksFetchError !== null) { + clearInterval(poll); + this.taskPollInterval = null; + this.tasksLoading = false; + this.taskOptions = cachedTaskOptions ?? STATIC_TASK_OPTIONS; + if (tasksFetchError) this.tasksError = tasksFetchError; + this.cdr.detectChanges(); + } else if (tasksFetchSubscription === null) { + // Fetch was canceled before populating caches; stop polling and fall back. + clearInterval(poll); + this.taskPollInterval = null; + this.tasksLoading = false; + this.taskOptions = STATIC_TASK_OPTIONS; + this.cdr.detectChanges(); + } + }, 200); + this.taskPollInterval = poll; + return; + } + + this.tasksLoading = true; + this.tasksError = null; + this.cdr.detectChanges(); + + tasksFetchSubscription = this.http + .get(`${AppSettings.getApiEndpoint()}/huggingface/tasks`) + .pipe( + finalize(() => { + if (cachedTaskOptions === null && tasksFetchError === null) { + tasksFetchSubscription = null; + } + }) + ) + // eslint-disable-next-line rxjs-angular/prefer-takeuntil + .subscribe({ + next: tasks => { + tasksFetchSubscription = null; + cachedTaskOptions = tasks.length > 0 ? tasks : STATIC_TASK_OPTIONS; + this.taskOptions = cachedTaskOptions; + this.tasksLoading = false; + this.cdr.detectChanges(); + }, + error: (err: unknown) => { + console.error("Failed to load HuggingFace tasks:", err); + tasksFetchSubscription = null; + tasksFetchError = "Could not load tasks from Hugging Face. Using default list."; + this.tasksError = tasksFetchError; + this.taskOptions = STATIC_TASK_OPTIONS; + this.tasksLoading = false; + this.cdr.detectChanges(); + }, + }); + } + + retryTasksLoad(): void { + tasksFetchError = null; + this.tasksError = null; + this.loadTasks(); + } + + // ── Task selection ── + + onTaskSelected(tag: string): void { + const previousTask = this.getCurrentTaskTag() ?? this.selectedTaskTag; + this.snapshotTaskState(previousTask); + this.syncTaskSelection(tag, true); + this.restoreTaskState(tag); + this.searchText = ""; + this.filteredModels = null; + this.loadAllModels(); + } + + // ── Data loading ── + + /** + * Fetch ALL models for the selected task. + * The backend paginates through HF Hub internally and caches the result. + * The first request per task may be slow; subsequent requests are instant. + */ + private loadAllModels(): void { + const tag = this.selectedTaskTag || "text-generation"; + + this.loading = false; + this.errorMessage = null; + + // Fast path: cached on the frontend + if (allModelsByTag.has(tag)) { + this.allModels = allModelsByTag.get(tag)!; + this.truncated = truncatedByTag.has(tag); + this.goToPage(0); + return; + } + + // Previous error + if (errorByTag.has(tag)) { + this.errorMessage = errorByTag.get(tag)!; + this.allModels = []; + this.pagedModels = []; + this.totalPages = 0; + return; + } + + // Another instance is already fetching this task — wait for it + if (inFlightByTag.has(tag)) { + this.loading = true; + if (this.modelPollInterval !== null) clearInterval(this.modelPollInterval); + const poll = setInterval(() => { + if (allModelsByTag.has(tag) || errorByTag.has(tag)) { + clearInterval(poll); + this.modelPollInterval = null; + this.loading = false; + if (allModelsByTag.has(tag)) { + this.allModels = allModelsByTag.get(tag)!; + this.truncated = truncatedByTag.has(tag); + this.goToPage(0); + } else { + this.errorMessage = errorByTag.get(tag)!; + this.cdr.detectChanges(); + } + } else if (!inFlightByTag.has(tag)) { + // Fetch was canceled before populating caches; stop polling and fall back. + clearInterval(poll); + this.modelPollInterval = null; + this.loading = false; + this.cdr.detectChanges(); + } + }, 200); + this.modelPollInterval = poll; + return; + } + + // Cancel previous + this.subscription?.unsubscribe(); + this.subscription = null; + + this.allModels = []; + this.pagedModels = []; + this.totalPages = 0; + + // Show spinner immediately for the initial fetch — it can take a while + // as the backend pages through HF Hub for the first time. + this.loading = true; + this.cdr.detectChanges(); + + this.subscription = this.http + .get( + `${AppSettings.getApiEndpoint()}/huggingface/models?task=${encodeURIComponent(tag)}`, + { observe: "response" } + ) + .pipe( + finalize(() => inFlightByTag.delete(tag)), + takeUntil(this.destroy$) + ) + .subscribe({ + next: resp => { + const models = resp.body ?? []; + if (resp.headers.get(TRUNCATED_HEADER) === "true") { + truncatedByTag.add(tag); + } + allModelsByTag.set(tag, models); + this.loading = false; + this.truncated = truncatedByTag.has(tag); + this.allModels = models; + this.goToPage(0); + }, + error: (err: unknown) => { + console.error(`Failed to load HuggingFace models for task '${tag}':`, err); + const msg = "Failed to load models. Click retry to try again."; + errorByTag.set(tag, msg); + this.loading = false; + this.errorMessage = msg; + this.cdr.detectChanges(); + }, + }); + + inFlightByTag.set(tag, this.subscription); + } + + // ── Pagination (client-side over the active list) ── + + private get activeList(): HuggingFaceModelOption[] { + return this.filteredModels !== null ? this.filteredModels : this.allModels; + } + + goToPage(page: number): void { + const list = this.activeList; + this.totalPages = Math.max(1, Math.ceil(list.length / PAGE_SIZE)); + this.currentPage = Math.min(page, this.totalPages - 1); + const start = this.currentPage * PAGE_SIZE; + this.pagedModels = list.slice(start, start + PAGE_SIZE); + this.cdr.detectChanges(); + } + + prevPage(): void { + if (this.currentPage > 0) { + this.goToPage(this.currentPage - 1); + } + } + + nextPage(): void { + if (this.currentPage < this.totalPages - 1) { + this.goToPage(this.currentPage + 1); + } + } + + get hasNextPage(): boolean { + return this.currentPage < this.totalPages - 1; + } + + retryLoad(): void { + const tag = this.selectedTaskTag || "text-generation"; + errorByTag.delete(tag); + this.loadAllModels(); + } + + // ── Search ── + + private setupServerSearch(): void { + this.searchSubscription = this.searchSubject$ + .pipe( + debounceTime(300), + switchMap(query => { + const tag = this.selectedTaskTag || "text-generation"; + this.searchLoading = true; + this.cdr.detectChanges(); + return this.http + .get< + HuggingFaceModelOption[] + >(`${AppSettings.getApiEndpoint()}/huggingface/models?task=${encodeURIComponent(tag)}&search=${encodeURIComponent(query)}`) + .pipe( + catchError((err: unknown) => { + console.error("Server-side search failed:", err); + this.searchLoading = false; + this.cdr.detectChanges(); + return of(null); + }) + ); + }), + takeUntil(this.destroy$) + ) + .subscribe({ + next: models => { + if (models === null) return; + this.searchLoading = false; + this.filteredModels = models; + this.goToPage(0); + }, + }); + } + + onSearchInput(query: string): void { + this.searchText = query; + if (!query.trim()) { + this.filteredModels = null; + this.searchLoading = false; + this.goToPage(0); + return; + } + if (this.truncated) { + // Server-side search — needed because local list is incomplete + this.searchSubject$.next(query); + } else { + // Local filter — full list is available + const lower = query.toLowerCase(); + this.filteredModels = this.allModels.filter(m => m.id.toLowerCase().includes(lower)); + this.goToPage(0); + } + } + + clearSearch(): void { + this.searchText = ""; + this.filteredModels = null; + this.searchLoading = false; + this.goToPage(0); + } + + get isSearching(): boolean { + return this.filteredModels !== null || this.searchLoading; + } + + // ── Model selection ── + + onModelSelected(modelId: string): void { + this.formControl.setValue(modelId); + } + + // ── Private helpers ── + + private getCurrentTaskTag(): string | undefined { + const fromModel = this.model?.task; + if (typeof fromModel === "string" && fromModel.trim().length > 0) { + return fromModel; + } + const fromParentControl = this.formControl?.parent?.get("task")?.value; + if (typeof fromParentControl === "string" && fromParentControl.trim().length > 0) { + return fromParentControl; + } + const fromFieldForm = this.field.form?.get("task")?.value; + if (typeof fromFieldForm === "string" && fromFieldForm.trim().length > 0) { + return fromFieldForm; + } + return undefined; + } + + private persistTaskSelection(tag: string): void { + // 1. Update the backing model FIRST so expression functions read the new value. + if (this.model) { + this.model.task = tag; + } + + // 2. Update the hidden task form control. Using emitEvent: true (default) + // ensures formly picks up the change and re-evaluates all sibling expressions. + const taskControlFromField = this.field.form?.get("task"); + if (taskControlFromField) { + taskControlFromField.setValue(tag); + } + + const taskControlFromParent = this.formControl?.parent?.get("task"); + if (taskControlFromParent && taskControlFromParent !== taskControlFromField) { + taskControlFromParent.setValue(tag); + } + + // 3. Force formly to re-evaluate ALL field expressions (not just this field's subtree). + // this.field is the modelId field; its parent covers all sibling fields. + const rootField = this.field.parent ?? this.field; + this.field.options?.detectChanges?.(rootField); + } + + private syncTaskSelection(tag: string, resetTaskSpecificFields: boolean): void { + this.selectedTaskTag = tag; + if (resetTaskSpecificFields) { + this.resetTaskStateForFirstVisit(tag); + } + this.persistTaskSelection(tag); + this.refreshTaskScopedValidity(); + } + + private refreshTaskScopedValidity(): void { + const keys = [ + "task", + "modelId", + "promptColumn", + "imageInput", + "audioInput", + "inputImageColumn", + "inputAudioColumn", + "candidateLabels", + "sentencesColumn", + "contextColumn", + "systemPrompt", + "maxNewTokens", + "temperature", + ]; + for (const key of keys) { + const control = this.field.form?.get(key) ?? this.formControl?.parent?.get(key); + control?.updateValueAndValidity({ emitEvent: false }); + } + this.field.form?.updateValueAndValidity({ emitEvent: false }); + this.formControl?.parent?.updateValueAndValidity({ emitEvent: false }); + + // Emit a single value change after all fields are settled so the + // workflow action service picks up the new operator properties. + this.formControl?.parent?.updateValueAndValidity({ emitEvent: true }); + } + + private snapshotTaskState(tag: string): void { + if (!tag) { + return; + } + const snapshot: Partial> = {}; + for (const key of this.taskScopedKeys) { + snapshot[key] = this.readFieldValue(key); + } + this.taskStateByTag.set(tag, snapshot); + } + + private restoreTaskState(tag: string): void { + const snapshot = this.taskStateByTag.get(tag); + if (!snapshot) { + return; + } + for (const key of this.taskScopedKeys) { + if (Object.prototype.hasOwnProperty.call(snapshot, key)) { + this.writeFieldValue(key, snapshot[key]); + } + } + this.refreshTaskScopedValidity(); + } + + private resetTaskStateForFirstVisit(tag: string): void { + if (this.taskStateByTag.has(tag)) { + return; + } + const defaults: Partial> = { + modelId: "", + promptColumn: "", + imageInput: "", + audioInput: "", + inputImageColumn: "", + inputAudioColumn: "", + candidateLabels: "", + sentencesColumn: "", + contextColumn: "", + systemPrompt: "You are a helpful assistant.", + maxNewTokens: 256, + temperature: 0.7, + }; + for (const key of this.taskScopedKeys) { + this.writeFieldValue(key, defaults[key] ?? ""); + } + } + + private readFieldValue(key: (typeof this.taskScopedKeys)[number]): unknown { + const control = this.field.form?.get(key) ?? this.formControl?.parent?.get(key); + if (control) { + return control.value; + } + return this.model?.[key]; + } + + private writeFieldValue(key: (typeof this.taskScopedKeys)[number], value: unknown): void { + const control = this.field.form?.get(key) ?? this.formControl?.parent?.get(key); + if (control) { + control.setValue(value, { emitEvent: false }); + control.markAsDirty(); + control.updateValueAndValidity({ emitEvent: false }); + } + if (this.model) { + (this.model as Record)[key] = value; + } + } +} diff --git a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html index 3cdd88911af..de255386aef 100644 --- a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html +++ b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.html @@ -103,6 +103,88 @@ *ngIf="formlyFields && formlyFormGroup" [formGroup]="formlyFormGroup" class="property-editor-form"> +
+

Task Preview:

+
+ + + Hugging Face task preview + +
+
{{ preview.title }}
+
{{ preview.body }}
+
+ {{ preview.outputBody }} +
+
+
+ +
+
{{ preview.title }}
+
+ {{ preview.inputLabel }} + + {{ preview.outputLabel }} +
+
+ {{ preview.body }} +
+
+ {{ preview.outputBody }} +
+
+ {{ pill }} +
+
+
+
{ expect(descEl).toBeNull(); }); }); + + // ── HuggingFace task-aware visibility tests ── + + it("should return null huggingFaceTaskPreview for non-HF operators", () => { + workflowActionService.addOperator(mockScanPredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, mockScanPredicate.operatorID, true), + }); + fixture.detectChanges(); + expect(component.huggingFaceTaskPreview).toBeNull(); + }); + + it("should return a task preview for HuggingFace operator with a known task", () => { + workflowActionService.addOperator(mockHuggingFacePredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, mockHuggingFacePredicate.operatorID, true), + }); + fixture.detectChanges(); + const preview = component.huggingFaceTaskPreview; + expect(preview).toBeTruthy(); + expect(preview!.kind).toBe("text"); + expect(preview!.title).toBe("Text generation preview"); + }); + + it("should return a fallback preview for HuggingFace operator with an unknown task", () => { + const hfPredicate = { + ...cloneDeep(mockHuggingFacePredicate), + operatorProperties: { task: "some-unknown-task", modelId: "" }, + }; + workflowActionService.addOperator(hfPredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, hfPredicate.operatorID, true), + }); + fixture.detectChanges(); + const preview = component.huggingFaceTaskPreview; + expect(preview).toBeTruthy(); + expect(preview!.kind).toBe("text"); + expect(preview!.title).toBe("Some Unknown Task"); + }); + + it("should return image kind preview for image-classification task", () => { + const hfPredicate = { + ...cloneDeep(mockHuggingFacePredicate), + operatorProperties: { task: "image-classification", modelId: "" }, + }; + workflowActionService.addOperator(hfPredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, hfPredicate.operatorID, true), + }); + fixture.detectChanges(); + const preview = component.huggingFaceTaskPreview; + expect(preview).toBeTruthy(); + expect(preview!.kind).toBe("image"); + }); + + it("should return audio kind preview for text-to-speech task", () => { + const hfPredicate = { + ...cloneDeep(mockHuggingFacePredicate), + operatorProperties: { task: "text-to-speech", modelId: "" }, + }; + workflowActionService.addOperator(hfPredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, hfPredicate.operatorID, true), + }); + fixture.detectChanges(); + const preview = component.huggingFaceTaskPreview; + expect(preview).toBeTruthy(); + expect(preview!.kind).toBe("audio"); + }); + + it("should return video kind preview for text-to-video task", () => { + const hfPredicate = { + ...cloneDeep(mockHuggingFacePredicate), + operatorProperties: { task: "text-to-video", modelId: "" }, + }; + workflowActionService.addOperator(hfPredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, hfPredicate.operatorID, true), + }); + fixture.detectChanges(); + const preview = component.huggingFaceTaskPreview; + expect(preview).toBeTruthy(); + expect(preview!.kind).toBe("video"); + }); + + it("should return null preview when HuggingFace task is empty", () => { + const hfPredicate = { ...cloneDeep(mockHuggingFacePredicate), operatorProperties: { task: "", modelId: "" } }; + workflowActionService.addOperator(hfPredicate, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, hfPredicate.operatorID, true), + }); + fixture.detectChanges(); + expect(component.huggingFaceTaskPreview).toBeNull(); + }); + + // ── HuggingFace field visibility and validator tests ── + + function getHfField(key: string): FormlyFieldConfig | undefined { + return component.formlyFields?.[0]?.fieldGroup?.find(f => f.key === key); + } + + let currentTask: string = ""; + + let hfOperatorCounter = 0; + + function initHfOperator(task: string): void { + currentTask = task; + hfOperatorCounter++; + const pred = { + ...cloneDeep(mockHuggingFacePredicate), + operatorID: `hf-test-${hfOperatorCounter}`, + operatorProperties: { task, modelId: "org/model" }, + }; + workflowActionService.addOperator(pred, mockPoint); + component.ngOnChanges({ + currentOperatorId: new SimpleChange(undefined, pred.operatorID, true), + }); + fixture.detectChanges(); + } + + function evalHide(field: FormlyFieldConfig | undefined): boolean { + if (!field || !field.expressions) return false; + const hideFn = (field.expressions as Record)["hide"]; + if (!hideFn) return !!field.hide; + // Provide model context so getSelectedTask can find the task + const fieldWithModel = { ...field, model: { task: currentTask } } as FormlyFieldConfig; + return hideFn(fieldWithModel); + } + + it("should hide imageInput for text-generation task", () => { + initHfOperator("text-generation"); + expect(evalHide(getHfField("imageInput"))).toBe(true); + }); + + it("should show imageInput for image-classification task", () => { + initHfOperator("image-classification"); + expect(evalHide(getHfField("imageInput"))).toBe(false); + }); + + it("should hide audioInput for text-generation task", () => { + initHfOperator("text-generation"); + expect(evalHide(getHfField("audioInput"))).toBe(true); + }); + + it("should show audioInput for automatic-speech-recognition task", () => { + initHfOperator("automatic-speech-recognition"); + expect(evalHide(getHfField("audioInput"))).toBe(false); + }); + + it("should hide promptColumn for image-only tasks", () => { + initHfOperator("image-classification"); + expect(evalHide(getHfField("promptColumn"))).toBe(true); + }); + + it("should hide promptColumn for audio-only tasks", () => { + initHfOperator("automatic-speech-recognition"); + expect(evalHide(getHfField("promptColumn"))).toBe(true); + }); + + it("should show promptColumn for text-generation task", () => { + initHfOperator("text-generation"); + expect(evalHide(getHfField("promptColumn"))).toBe(false); + }); + + it("should show systemPrompt only for text-generation", () => { + initHfOperator("text-generation"); + expect(evalHide(getHfField("systemPrompt"))).toBe(false); + + initHfOperator("image-classification"); + expect(evalHide(getHfField("systemPrompt"))).toBe(true); + }); + + it("should show contextColumn only for question-answering", () => { + initHfOperator("question-answering"); + expect(evalHide(getHfField("contextColumn"))).toBe(false); + + initHfOperator("text-generation"); + expect(evalHide(getHfField("contextColumn"))).toBe(true); + }); + + it("should show candidateLabels only for classification tasks", () => { + initHfOperator("zero-shot-classification"); + expect(evalHide(getHfField("candidateLabels"))).toBe(false); + + initHfOperator("text-generation"); + expect(evalHide(getHfField("candidateLabels"))).toBe(true); + }); + + it("requiredPromptColumn validator should pass when not a prompt-required task", () => { + initHfOperator("image-classification"); + const field = getHfField("promptColumn"); + const validator = field?.validators?.["requiredPromptColumn"]; + expect(validator).toBeDefined(); + const mockField = { ...field, model: { task: "image-classification", promptColumn: "" } } as FormlyFieldConfig; + expect(validator!.expression(null as any, mockField)).toBe(true); + }); + + it("requiredPromptColumn validator should fail when prompt-required task has no column", () => { + initHfOperator("text-generation"); + const field = getHfField("promptColumn"); + const validator = field?.validators?.["requiredPromptColumn"]; + expect(validator).toBeDefined(); + const mockField = { ...field, model: { task: "text-generation", promptColumn: "" } } as FormlyFieldConfig; + expect(validator!.expression(null as any, mockField)).toBe(false); + }); + + it("requiredImageInput validator should pass when not an image task", () => { + initHfOperator("text-generation"); + const field = getHfField("imageInput"); + const validator = field?.validators?.["requiredImageInput"]; + expect(validator).toBeDefined(); + const mockField = { ...field, model: { task: "text-generation", imageInput: "" } } as FormlyFieldConfig; + expect(validator!.expression(null as any, mockField)).toBe(true); + }); + + it("requiredAudioInput validator should pass when not an audio task", () => { + initHfOperator("text-generation"); + const field = getHfField("audioInput"); + const validator = field?.validators?.["requiredAudioInput"]; + expect(validator).toBeDefined(); + const mockField = { ...field, model: { task: "text-generation", audioInput: "" } } as FormlyFieldConfig; + expect(validator!.expression(null as any, mockField)).toBe(true); + }); }); diff --git a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts index 2512fecdacb..1d86a61ae72 100644 --- a/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts +++ b/frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts @@ -61,7 +61,7 @@ import * as Y from "yjs"; import { OperatorSchema } from "src/app/workspace/types/operator-schema.interface"; import { AttributeType, PortSchema } from "../../../types/workflow-compiling.interface"; import { GuiConfigService } from "../../../../common/service/gui-config.service"; -import { NgIf } from "@angular/common"; +import { NgFor, NgIf, NgSwitch, NgSwitchCase } from "@angular/common"; import { NzSpaceCompactItemDirective } from "ng-zorro-antd/space"; import { NzButtonComponent } from "ng-zorro-antd/button"; import { ɵNzTransitionPatchDirective } from "ng-zorro-antd/core/transition-patch"; @@ -100,6 +100,9 @@ Quill.register("modules/cursors", QuillCursors); styleUrls: ["./operator-property-edit-frame.component.scss"], imports: [ NgIf, + NgFor, + NgSwitch, + NgSwitchCase, NzSpaceCompactItemDirective, NzButtonComponent, ɵNzTransitionPatchDirective, @@ -167,6 +170,273 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On // used to tear down subscriptions that takeUntil(teardownObservable) private teardownObservable: Subject = new Subject(); + readonly huggingFaceTaskPreviewSamples: Record< + string, + { + kind: "image" | "video" | "audio" | "text"; + inputLabel?: string; + outputLabel?: string; + title?: string; + body?: string; + outputBody?: string; + pills?: string[]; + assetSrc?: string; + } + > = { + "text-to-image": { + kind: "image", + inputLabel: "Text prompt", + outputLabel: "Generated image", + title: "Comic-style city action scene", + body: "Prompt becomes a generated image preview.", + assetSrc: "assets/sample-image.png", + }, + "image-to-image": { + kind: "image", + inputLabel: "Source image", + outputLabel: "Edited image", + title: "Image transformation preview", + body: "Image input produces a modified image result.", + assetSrc: "assets/sample-image.png", + }, + "text-to-video": { + kind: "video", + inputLabel: "Text prompt", + outputLabel: "Generated video", + title: "Prompt-based motion preview", + body: "Prompt becomes a generated video clip.", + assetSrc: "assets/sample-video.mp4", + }, + "image-to-video": { + kind: "video", + inputLabel: "Source image", + outputLabel: "Animated clip", + title: "Image animation preview", + body: "Image input becomes a short generated video.", + assetSrc: "assets/sample-video.mp4", + }, + "text-to-speech": { + kind: "audio", + inputLabel: "Text input", + outputLabel: "Spoken audio", + title: "Speech synthesis preview", + body: "Text becomes an audio clip the user can play back.", + assetSrc: "assets/sample-audio.wav", + }, + "automatic-speech-recognition": { + kind: "audio", + inputLabel: "Audio input", + outputLabel: "Transcript text", + title: "Speech-to-text preview", + body: "Uploaded audio is transcribed into plain text.", + assetSrc: "assets/sample-audio.wav", + }, + "audio-classification": { + kind: "audio", + inputLabel: "Audio input", + outputLabel: "Labels and scores", + title: "Audio tagging preview", + body: "Uploaded audio returns classification labels.", + assetSrc: "assets/sample-audio.wav", + }, + "image-text-to-text": { + kind: "image", + inputLabel: "Image + text prompt", + outputLabel: "Generated text", + title: "Image-text-to-text preview", + body: "The model reads an image and a text prompt to produce a response.", + outputBody: "The image shows a superhero leaping across rooftops at sunset.", + assetSrc: "assets/sample-image.png", + }, + "image-classification": { + kind: "image", + inputLabel: "Image input", + outputLabel: "Predicted labels", + title: "Image classification preview", + body: "The model assigns labels such as superhero, city, or action scene.", + assetSrc: "assets/sample-image.png", + pills: ["superhero", "cityscape", "action"], + }, + "object-detection": { + kind: "image", + inputLabel: "Image input", + outputLabel: "Detected objects", + title: "Object detection preview", + body: "The model returns detected objects and bounding boxes.", + assetSrc: "assets/sample-image.png", + pills: ["person", "building", "sky"], + }, + "image-segmentation": { + kind: "image", + inputLabel: "Image input", + outputLabel: "Segmented regions", + title: "Segmentation preview", + body: "The model separates the image into labeled regions.", + assetSrc: "assets/sample-image.png", + pills: ["foreground", "background", "subject"], + }, + "image-to-text": { + kind: "image", + inputLabel: "Image input", + outputLabel: "Caption text", + title: "Captioning preview", + body: "The model turns an uploaded image into a textual description.", + outputBody: "A superhero leaps above a dense downtown skyline at sunset.", + assetSrc: "assets/sample-image.png", + }, + "visual-question-answering": { + kind: "image", + inputLabel: "Image + question", + outputLabel: "Answer text", + title: "Visual question answering preview", + body: "The model reads the image and answers the user question.", + outputBody: "Spider-Man is jumping over a city skyline.", + assetSrc: "assets/sample-image.png", + }, + "document-question-answering": { + kind: "image", + inputLabel: "Document image + question", + outputLabel: "Answer text", + title: "Document QA preview", + body: "The model extracts answers from a document image.", + outputBody: "Invoice total: $248.90", + assetSrc: "assets/sample-image.png", + }, + "zero-shot-image-classification": { + kind: "image", + inputLabel: "Image + candidate labels", + outputLabel: "Ranked labels", + title: "Zero-shot image labeling preview", + body: "Candidate labels are scored against the uploaded image.", + assetSrc: "assets/sample-image.png", + pills: ["superhero", "sports", "travel"], + }, + "text-generation": { + kind: "text", + inputLabel: "Prompt", + outputLabel: "Generated text", + title: "Text generation preview", + body: "Write a short action scene set above a crowded city skyline.", + outputBody: "The hero vaulted between rooftops as the city lights came alive below.", + }, + "text-classification": { + kind: "text", + inputLabel: "Text input", + outputLabel: "Predicted label", + title: "Text classification preview", + body: "This launch update sounds confident and customer-focused.", + pills: ["positive", "announcement"], + }, + "token-classification": { + kind: "text", + inputLabel: "Text input", + outputLabel: "Tagged spans", + title: "Token classification preview", + body: "Peter Parker visited New York yesterday.", + pills: ["Peter Parker: PERSON", "New York: LOCATION"], + }, + "question-answering": { + kind: "text", + inputLabel: "Question + context", + outputLabel: "Answer span", + title: "Question answering preview", + body: "Question: Who led the launch?\nContext: Maya led the launch while Jordan handled analytics.", + outputBody: "Maya", + }, + "table-question-answering": { + kind: "text", + inputLabel: "Question + table", + outputLabel: "Answer", + title: "Table QA preview", + body: "Question: Which month had the highest revenue?", + outputBody: "March", + }, + "zero-shot-classification": { + kind: "text", + inputLabel: "Text + candidate labels", + outputLabel: "Ranked labels", + title: "Zero-shot classification preview", + body: "We need to accelerate onboarding for enterprise customers.", + pills: ["business", "operations", "support"], + }, + translation: { + kind: "text", + inputLabel: "Source text", + outputLabel: "Translated text", + title: "Translation preview", + body: "Good morning, thanks for joining the call.", + outputBody: "Buenos dias, gracias por unirte a la llamada.", + }, + summarization: { + kind: "text", + inputLabel: "Long text", + outputLabel: "Summary", + title: "Summarization preview", + body: "A long project update is compressed into a short summary.", + outputBody: "The team shipped the release, fixed two regressions, and started the next milestone.", + }, + "feature-extraction": { + kind: "text", + inputLabel: "Text input", + outputLabel: "Embedding/vector output", + title: "Feature extraction preview", + body: "Input text is converted into a numeric representation.", + pills: ["0.12", "-0.08", "0.44", "..."], + }, + "fill-mask": { + kind: "text", + inputLabel: "Masked sentence", + outputLabel: "Top completions", + title: "Fill-mask preview", + body: "The hero saved the [MASK].", + pills: ["city", "day", "crowd"], + }, + "sentence-similarity": { + kind: "text", + inputLabel: "Source + candidate sentences", + outputLabel: "Similarity scores", + title: "Sentence similarity preview", + body: "Compare one sentence against several alternatives.", + pills: ["0.93", "0.61", "0.22"], + }, + "text-ranking": { + kind: "text", + inputLabel: "Query + candidate texts", + outputLabel: "Ranked results", + title: "Text ranking preview", + body: "Candidate passages are ordered by relevance to the query.", + pills: ["doc_2", "doc_5", "doc_1"], + }, + }; + + get huggingFaceTaskPreview(): { + kind: "image" | "video" | "audio" | "text"; + inputLabel?: string; + outputLabel?: string; + title?: string; + body?: string; + outputBody?: string; + pills?: string[]; + assetSrc?: string; + } | null { + if (!this.isHuggingFaceOperator()) { + return null; + } + const task = this.formData?.["task"]; + if (typeof task !== "string" || task.trim().length === 0) { + return null; + } + return ( + this.huggingFaceTaskPreviewSamples[task] ?? { + kind: "text", + inputLabel: "Task input", + outputLabel: "Task output", + title: this.formatTaskTitle(task), + body: "This task transforms the provided input into a model response.", + } + ); + } + constructor( private formlyJsonschema: FormlyJsonschema, private workflowActionService: WorkflowActionService, @@ -237,6 +507,20 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On }); } + private isHuggingFaceOperator(): boolean { + if (!this.currentOperatorId) return false; + const graph = this.workflowActionService.getTexeraGraph(); + if (!graph.hasOperator(this.currentOperatorId)) return false; + return graph.getOperator(this.currentOperatorId).operatorType === "HuggingFace"; + } + + private formatTaskTitle(task: string): string { + return task + .split("-") + .map(part => part.charAt(0).toUpperCase() + part.slice(1)) + .join(" "); + } + async ngOnDestroy() { // await this.checkAndSavePreset(); this.teardownObservable.complete(); @@ -541,6 +825,203 @@ export class OperatorPropertyEditFrameComponent implements OnInit, OnChanges, On mappedField.type = "inputautocomplete"; } + if (mappedField.key === "huggingFaceModel") { + mappedField.type = "huggingface"; + } + + if (mappedField.key === "modelId" && this.currentOperatorSchema?.operatorType === "HuggingFace") { + mappedField.type = "huggingface"; + } + + if (mappedField.key === "task" && this.currentOperatorSchema?.operatorType === "HuggingFace") { + mappedField.hide = true; + } + + // ── Dynamic field visibility for HuggingFace based on selected task ── + if (this.currentOperatorSchema?.operatorType === "HuggingFace" && typeof mappedField.key === "string") { + const hfKey = mappedField.key; + const imageOnlyTasks = ["image-classification", "object-detection", "image-segmentation", "image-to-text"]; + const imageInputTasks = [ + ...imageOnlyTasks, + "visual-question-answering", + "document-question-answering", + "zero-shot-image-classification", + "image-text-to-text", + "image-to-image", + ]; + const audioInputTasks = ["automatic-speech-recognition", "audio-classification"]; + const promptRequiredTasks = [ + "text-generation", + "text-classification", + "token-classification", + "question-answering", + "table-question-answering", + "zero-shot-classification", + "translation", + "summarization", + "feature-extraction", + "fill-mask", + "sentence-similarity", + "text-ranking", + "visual-question-answering", + "document-question-answering", + "zero-shot-image-classification", + ]; + const getSelectedTask = (field: FormlyFieldConfig): string | undefined => { + const fromForm = field.form?.get("task")?.value ?? field.formControl?.parent?.get("task")?.value; + if (typeof fromForm === "string" && fromForm.trim().length > 0) { + return fromForm; + } + const fromModel = field.model?.task; + if (typeof fromModel === "string" && fromModel.trim().length > 0) { + return fromModel; + } + return undefined; + }; + if (hfKey === "imageInput") { + mappedField.type = "huggingface-image-upload"; + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t === undefined || !imageInputTasks.includes(t); + }, + }; + mappedField.validators = { + ...mappedField.validators, + requiredImageInput: { + expression: (_control: AbstractControl, field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + if (t === undefined || !imageInputTasks.includes(t)) { + return true; + } + const inputImageCol = field.model?.inputImageColumn; + if (typeof inputImageCol === "string" && inputImageCol.trim().length > 0) { + return true; + } + const value = field.formControl?.value ?? field.model?.imageInput; + return typeof value === "string" && value.trim().length > 0; + }, + message: () => "Upload an image or select an Input Image Column for this task.", + }, + }; + mappedField.validation = { + ...mappedField.validation, + show: true, + }; + } + if (hfKey === "audioInput") { + mappedField.type = "huggingface-audio-upload"; + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t === undefined || !audioInputTasks.includes(t); + }, + }; + mappedField.validators = { + ...mappedField.validators, + requiredAudioInput: { + expression: (_control: AbstractControl, field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + if (t === undefined || !audioInputTasks.includes(t)) { + return true; + } + const inputAudioCol = field.model?.inputAudioColumn; + if (typeof inputAudioCol === "string" && inputAudioCol.trim().length > 0) { + return true; + } + const value = field.formControl?.value ?? field.model?.audioInput; + return typeof value === "string" && value.trim().length > 0; + }, + message: () => "Upload audio or select an Input Audio Column for this task.", + }, + }; + mappedField.validation = { + ...mappedField.validation, + show: true, + }; + } + if (hfKey === "inputImageColumn") { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t === undefined || !imageInputTasks.includes(t); + }, + }; + } + if (hfKey === "inputAudioColumn") { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t === undefined || !audioInputTasks.includes(t); + }, + }; + } + if (hfKey === "promptColumn") { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t !== undefined && (imageOnlyTasks.includes(t) || audioInputTasks.includes(t)); + }, + }; + mappedField.validators = { + ...mappedField.validators, + requiredPromptColumn: { + expression: (_control: AbstractControl, field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + if (t === undefined || !promptRequiredTasks.includes(t)) { + return true; + } + const value = field.formControl?.value ?? field.model?.promptColumn; + return typeof value === "string" && value.trim().length > 0; + }, + message: () => "Select a prompt column for this task.", + }, + }; + mappedField.validation = { + ...mappedField.validation, + show: true, + }; + } + if (["systemPrompt", "maxNewTokens", "temperature"].includes(hfKey)) { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t !== "text-generation"; + }, + }; + } + if (hfKey === "contextColumn") { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => getSelectedTask(field) !== "question-answering", + }; + } + if (hfKey === "candidateLabels") { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t !== "zero-shot-classification" && t !== "zero-shot-image-classification"; + }, + }; + } + if (hfKey === "sentencesColumn") { + mappedField.expressions = { + ...mappedField.expressions, + hide: (field: FormlyFieldConfig) => { + const t = getSelectedTask(field); + return t !== "sentence-similarity" && t !== "text-ranking"; + }, + }; + } + } + if (mappedField.key === "datasetVersionPath") { mappedField.type = "datasetversionselector"; } diff --git a/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.html b/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.html index 5f8fbe4674a..71e45335917 100644 --- a/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.html +++ b/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.html @@ -18,7 +18,56 @@ --> diff --git a/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts b/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts index 278a01dd5a7..a41c4c8c0b6 100644 --- a/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts +++ b/frontend/src/app/workspace/component/result-panel/result-panel-modal.component.ts @@ -18,11 +18,16 @@ */ import { Component, inject, OnChanges } from "@angular/core"; +import { CommonModule } from "@angular/common"; import { NZ_MODAL_DATA, NzModalRef } from "ng-zorro-antd/modal"; +import { NzButtonModule } from "ng-zorro-antd/button"; +import { NzIconModule } from "ng-zorro-antd/icon"; import { WorkflowResultService } from "../../service/workflow-result/workflow-result.service"; import { UntilDestroy, untilDestroyed } from "@ngneat/until-destroy"; import { PanelResizeService } from "../../service/workflow-result/panel-resize/panel-resize.service"; -import { NgxJsonViewerModule } from "ngx-json-viewer"; +import { NotificationService } from "../../../common/service/notification/notification.service"; +import { isAudioUrl, isVideoUrl, isImageUrl } from "src/app/common/util/media-type.util"; +import { AppSettings } from "../../../common/app-setting"; /** * @@ -42,29 +47,79 @@ import { NgxJsonViewerModule } from "ngx-json-viewer"; selector: "texera-row-modal-content", templateUrl: "./result-panel-modal.component.html", styleUrls: ["./result-panel-model.component.scss"], - imports: [NgxJsonViewerModule], + imports: [CommonModule, NzButtonModule, NzIconModule], }) export class RowModalComponent implements OnChanges { + rowEntries: { key: string; value: string; mediaSrc: string; isVideo: boolean; isImage: boolean; isAudio: boolean }[] = + []; // Index of current displayed row in currentResult - readonly operatorId: string = inject(NZ_MODAL_DATA).operatorId; - rowIndex: number = inject(NZ_MODAL_DATA).rowIndex; + private readonly modalData: { operatorId: string; rowIndex: number; rowData?: Record } = + inject(NZ_MODAL_DATA); + readonly operatorId: string = this.modalData.operatorId; + rowIndex: number = this.modalData.rowIndex; currentDisplayRowData: Record = {}; constructor( public modal: NzModalRef, private workflowResultService: WorkflowResultService, - private resizeService: PanelResizeService + private resizeService: PanelResizeService, + private notificationService: NotificationService ) { + if (this.modalData.rowData) { + this.currentDisplayRowData = this.modalData.rowData; + this.rowEntries = this.buildRowEntries(this.currentDisplayRowData); + } this.ngOnChanges(); } + get prettyRowJson(): string { + return JSON.stringify(this.currentDisplayRowData, null, 2); + } + + copyText(text: string): void { + navigator.clipboard.writeText(text).then( + () => this.notificationService.success("Copied to clipboard"), + () => this.notificationService.error("Failed to copy") + ); + } + ngOnChanges(): void { this.workflowResultService .getPaginatedResultService(this.operatorId) ?.selectTuple(this.rowIndex, this.resizeService.pageSize) .pipe(untilDestroyed(this)) .subscribe(res => { - this.currentDisplayRowData = res.tuple; + if (res?.tuple) { + this.currentDisplayRowData = res.tuple; + this.rowEntries = this.buildRowEntries(this.currentDisplayRowData); + } }); } + + trackByEntryKey(_index: number, entry: { key: string }): string { + return entry.key; + } + + private resolveMediaSrc(value: string): string { + if (!value.startsWith("http://") && !value.startsWith("https://")) { + return value; + } + return `${AppSettings.getApiEndpoint()}/huggingface/media-proxy?url=${encodeURIComponent(value)}`; + } + + private buildRowEntries( + rowData: Record + ): { key: string; value: string; mediaSrc: string; isVideo: boolean; isImage: boolean; isAudio: boolean }[] { + return Object.entries(rowData).map(([key, val]) => { + const value = typeof val === "string" ? val : JSON.stringify(val) ?? String(val); + return { + key, + value, + mediaSrc: this.resolveMediaSrc(value), + isVideo: typeof val === "string" && isVideoUrl(val), + isImage: typeof val === "string" && isImageUrl(val), + isAudio: typeof val === "string" && isAudioUrl(val), + }; + }); + } } diff --git a/frontend/src/app/workspace/component/result-panel/result-panel-model.component.scss b/frontend/src/app/workspace/component/result-panel/result-panel-model.component.scss index eb5ff4dece7..aafefae2d36 100644 --- a/frontend/src/app/workspace/component/result-panel/result-panel-model.component.scss +++ b/frontend/src/app/workspace/component/result-panel/result-panel-model.component.scss @@ -23,3 +23,54 @@ height: 100%; width: 100%; } + +.modal-toolbar { + display: flex; + justify-content: flex-end; + margin-bottom: 12px; +} + +.row-detail-list { + display: flex; + flex-direction: column; + gap: 12px; +} + +.row-detail-item { + border: 1px solid #d9d9d9; + border-radius: 4px; + padding: 12px; + background: #fff; +} + +.row-detail-header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + margin-bottom: 8px; +} + +.row-detail-key { + font-weight: 600; + word-break: break-word; +} + +.row-detail-pre { + margin: 0; + white-space: pre-wrap; + word-break: break-word; + font-family: monospace; + font-size: 12px; + line-height: 1.5; +} + +.row-detail-value { + max-height: none; + overflow: visible; + padding: 8px; + border: 1px solid #d9d9d9; + border-radius: 4px; + background: #fafafa; + user-select: text; +} diff --git a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html index 5400d978ee3..9313dbb12ec 100644 --- a/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html +++ b/frontend/src/app/workspace/component/result-panel/result-table-frame/result-table-frame.component.html @@ -161,7 +161,40 @@
- {{ column.getCell(row) }} + + + + Play Video + + + + + Play Audio + + + + + + View Image + + + {{ column.getCell(row) }} +