Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
8b03c0f
feat(huggingface): add audio and media tasks
anishshiva7 May 29, 2026
cc68387
fix(huggingface): address audio media review
anishshiva7 Jun 15, 2026
57a901c
fix(huggingface): refine audio media routing
anishshiva7 Jun 19, 2026
46b5488
fix(huggingface): harden audio input fetching
anishshiva7 Jun 19, 2026
210b1a4
fix(auth): handle missing JWT claims
anishshiva7 Jun 19, 2026
4d8475d
fix(huggingface): reuse media URL conversion for audio
anishshiva7 Jun 19, 2026
8ba6b9f
feat(huggingface): add qa and ranking tasks
anishshiva7 May 29, 2026
292b800
fix(huggingface): validate qa ranking inputs
anishshiva7 Jun 15, 2026
2d4e37a
feat(frontend): add HuggingFace task selector and model browser compo…
ELin2025 May 29, 2026
16d843f
fix(frontend): add explicit type annotations to rxjs error callbacks
ELin2025 Jun 8, 2026
918946c
style(frontend): format HuggingFace component with prettier
ELin2025 Jun 8, 2026
cd2648c
fix(frontend): add takeUntil to rxjs subscribe calls in HuggingFaceCo…
ELin2025 Jun 8, 2026
7bfec5c
fix(frontend): fix memory leaks and remove dead code in HuggingFaceCo…
ELin2025 Jun 11, 2026
c8c9c85
style(frontend): format HuggingFaceComponent with prettier
ELin2025 Jun 11, 2026
948727f
fix(frontend): add in-flight dedup guard for model fetches in Hugging…
ELin2025 Jun 11, 2026
d69c8b2
feat(frontend): surface truncation notice and wire server-side search…
ELin2025 Jun 11, 2026
94219f2
fix(frontend): restore takeUntil on subscribes to satisfy rxjs-angula…
ELin2025 Jun 11, 2026
6db846b
fix(frontend): add finalize guard to model fetch for cancellation cle…
ELin2025 Jun 16, 2026
b37ca09
Potential fix for pull request finding
ELin2025 Jun 20, 2026
df1947d
fix(frontend): address Copilot review feedback on HuggingFaceComponent
ELin2025 Jun 20, 2026
40b46c9
fix(frontend): handle search stream errors and fix clear model value
ELin2025 Jun 20, 2026
7889a13
style(frontend): format HuggingFaceComponent with prettier
ELin2025 Jun 20, 2026
543f91c
feat(frontend): add HuggingFace audio upload component
ELin2025 May 29, 2026
ceb1d2b
style(frontend): format audio upload spec with prettier
ELin2025 Jun 8, 2026
36a91d0
style(frontend): format audio upload HTML with prettier
ELin2025 Jun 11, 2026
f6edc4c
fix(frontend): guard concurrent uploads and remove dead test in audio…
ELin2025 Jun 11, 2026
394dca9
test(frontend): add TestBed-based tests for HuggingFaceAudioUploadCom…
ELin2025 Jun 12, 2026
bc8383d
fix(frontend): guard against stale upload response after clear
ELin2025 Jun 20, 2026
853e59e
fix(frontend): disable file input and clear button during audio upload
ELin2025 Jun 20, 2026
0db5d49
feat(frontend): add task-aware field visibility and preview to Huggin…
ELin2025 May 29, 2026
8286651
test(frontend): add HuggingFace visibility tests and mock operator data
ELin2025 May 29, 2026
a839736
style(frontend): format spec files with prettier
ELin2025 Jun 8, 2026
ca80616
style(frontend): format property editor component with prettier
ELin2025 Jun 11, 2026
a96e00e
fix(frontend): fix HuggingFace field visibility and add missing previ…
ELin2025 Jun 12, 2026
eaaab13
fix(frontend): add missing preview assets and guard deleted operator
ELin2025 Jun 20, 2026
180f482
fix(huggingface): use CANDIDATE_LABELS property for zero-shot image c…
ELin2025 Jun 20, 2026
57f7306
test(frontend): add HuggingFace field visibility and validator tests
ELin2025 Jun 20, 2026
089487c
feat(huggingFace): add image task family via ImageTaskCodegen
PG1204 May 28, 2026
a4e0274
chore: retrigger CI
PG1204 Jun 16, 2026
08ccc7b
chore: retrigger CI
PG1204 Jun 16, 2026
bc40309
chore: retrigger CI
PG1204 Jun 16, 2026
34c82ef
feat(huggingFace): add HuggingFaceModelResource for model browsing an…
PG1204 May 17, 2026
601142e
fix: address review feedback on HuggingFaceModelResource
PG1204 May 19, 2026
9933c62
fix(huggingFace): cap heap usage on /media-proxy and /audio-preview
PG1204 May 28, 2026
65a0e2b
chore: retrigger CI
PG1204 May 28, 2026
3fabf73
chore(huggingFace): annotate HuggingFaceModelResource with @RolesAllowed
PG1204 May 29, 2026
e6d018a
style(frontend): format HuggingFace components with prettier
ELin2025 Jun 8, 2026
18330c8
style(frontend): format spec files with prettier
ELin2025 Jun 8, 2026
d4198cc
feat(frontend): render HuggingFace media results inline in the result…
juliethecao Jun 13, 2026
eacd156
fix(frontend): use toBe(true/false) instead of toBeTrue/toBeFalse in …
juliethecao Jun 13, 2026
237affd
fix(frontend): use observe: "response" to access HuggingFace response…
juliethecao Jun 22, 2026
8dc98bc
fix(frontend): decouple shared HuggingFace tasks fetch from component…
juliethecao Jun 22, 2026
d0b1ba3
fix(frontend): guard against JSON.stringify returning undefined in re…
juliethecao Jun 22, 2026
1eeba34
fix(frontend): remove unused NgxJsonViewerModule, guard JSON.stringif…
juliethecao Jun 22, 2026
f97d230
style(huggingFace): apply scalafmt formatting
juliethecao Jun 22, 2026
de92c93
style(frontend): apply prettier formatting to result-panel-modal
juliethecao Jun 22, 2026
de3f53d
fix(frontend): satisfy rxjs-angular/prefer-takeuntil lint rule in Hug…
juliethecao Jun 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import java.util.Optional
object UserAuthenticator extends Authenticator[JwtContext, SessionUser] with LazyLogging {
override def authenticate(context: JwtContext): Optional[SessionUser] = {
try {
Optional.of(JwtParser.claimsToSessionUser(context.getJwtClaims))
JwtParser.claimsToOptionalSessionUser(context.getJwtClaims)
} catch {
case e: Exception =>
logger.error("Failed to authenticate the JwtContext", e)
Expand Down
23 changes: 20 additions & 3 deletions common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ object JwtParser extends LazyLogging {
/** Verify and parse a Bearer token string. */
def parseToken(token: String): Optional[SessionUser] = {
try {
Optional.of(claimsToSessionUser(JwtAuth.jwtConsumer.processToClaims(token)))
claimsToOptionalSessionUser(JwtAuth.jwtConsumer.processToClaims(token))
} catch {
case _: UnresolvableKeyException =>
logger.error("Invalid JWT Signature")
Expand All @@ -49,6 +49,19 @@ object JwtParser extends LazyLogging {
}
}

/** Convert already-verified claims to a [[SessionUser]], returning empty when
* the required Texera custom claims are missing or malformed.
*/
def claimsToOptionalSessionUser(claims: JwtClaims): Optional[SessionUser] = {
try {
Optional.of(claimsToSessionUser(claims))
} catch {
case e: IllegalArgumentException =>
logger.error(s"Invalid JWT claims: ${e.getMessage}")
Optional.empty()
}
}

/** Build a [[SessionUser]] from already-verified claims. Used by both
* [[parseToken]] (which verifies then calls this) and amber's
* `UserAuthenticator` (which the toastshaman filter calls after its own
Expand All @@ -59,8 +72,12 @@ object JwtParser extends LazyLogging {
val email = claims.getClaimValue("email", classOf[String])
// jose4j returns Long after JSON round-trip but the original setClaim
// call writes Integer; widen via Number to handle both cases.
val userId = claims.getClaimValue("userId", classOf[Number]).intValue()
val role = UserRoleEnum.valueOf(claims.getClaimValue("role").asInstanceOf[String])
val userId = Option(claims.getClaimValue("userId", classOf[Number]))
.map(_.intValue())
.getOrElse(throw new IllegalArgumentException("JWT claim 'userId' is required."))
val roleName = Option(claims.getClaimValue("role", classOf[String]))
.getOrElse(throw new IllegalArgumentException("JWT claim 'role' is required."))
val role = UserRoleEnum.valueOf(roleName)
val googleId = claims.getClaimValue("googleId", classOf[String])
val googleAvatar = claims.getClaimValue("googleAvatar", classOf[String])
val user = new User(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ class JwtParserSpec extends AnyFlatSpec with Matchers {
u.getGoogleAvatar shouldBe "avatar-blob"
}

it should "return empty when already-verified claims are missing userId" in {
val claims = buildClaims()
claims.unsetClaim("userId")
JwtParser.claimsToOptionalSessionUser(claims).isPresent shouldBe false
}

it should "return empty when already-verified claims are missing role" in {
val claims = buildClaims()
claims.unsetClaim("role")
JwtParser.claimsToOptionalSessionUser(claims).isPresent shouldBe false
}

"JwtParser.parseToken" should "return empty on a structurally invalid token" in {
JwtParser.parseToken("not-a-real-jwt").isPresent shouldBe false
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.huggingFace.codegen.{
AudioTaskCodegen,
CodegenContext,
ImageTaskCodegen,
MediaGenCodegen,
PythonCodegenBase,
QaRankingCodegen,
TaskCodegen,
TextGenCodegen
}
Expand Down Expand Up @@ -95,6 +98,36 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor {
@AutofillAttributeName
var inputImageColumn: EncodableString = ""

@JsonProperty(value = "audioInput", required = false)
@JsonSchemaTitle("Audio Upload")
@JsonPropertyDescription("Upload audio for Hugging Face audio tasks")
var audioInput: EncodableString = ""

@JsonProperty(value = "inputAudioColumn", required = false)
@JsonSchemaTitle("Input Audio Column")
@JsonPropertyDescription("Column containing audio data from the input table")
@AutofillAttributeName
var inputAudioColumn: EncodableString = ""

@JsonProperty(value = "contextColumn", required = false)
@JsonSchemaTitle("Context Column")
@JsonPropertyDescription("Column containing the context passage for question answering")
@AutofillAttributeName
var contextColumn: EncodableString = ""

@JsonProperty(value = "candidateLabels", required = false)
@JsonSchemaTitle("Candidate Labels")
@JsonPropertyDescription("Comma-separated candidate labels for zero-shot classification")
var candidateLabels: EncodableString = ""

@JsonProperty(value = "sentencesColumn", required = false)
@JsonSchemaTitle("Sentences Column")
@JsonPropertyDescription(
"Column with comma-separated sentences for sentence similarity and text ranking"
)
@AutofillAttributeName
var sentencesColumn: EncodableString = ""

@JsonProperty(
value = "systemPrompt",
required = false,
Expand Down Expand Up @@ -138,6 +171,9 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor {
val byTask = scala.collection.mutable.Map.empty[String, TaskCodegen]
byTask += (TextGenCodegen.task -> TextGenCodegen)
ImageTaskCodegen.tasks.foreach(t => byTask += (t -> ImageTaskCodegen))
AudioTaskCodegen.tasks.foreach(t => byTask += (t -> AudioTaskCodegen))
MediaGenCodegen.tasks.foreach(t => byTask += (t -> MediaGenCodegen))
QaRankingCodegen.tasks.foreach(t => byTask += (t -> QaRankingCodegen))
byTask.toMap
}

Expand Down Expand Up @@ -181,6 +217,16 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor {
if (imageInput == null) "" else imageInput
val safeInputImageColumn: EncodableString =
if (inputImageColumn == null) "" else inputImageColumn
val safeAudioInput: EncodableString =
if (audioInput == null) "" else audioInput
val safeInputAudioColumn: EncodableString =
if (inputAudioColumn == null) "" else inputAudioColumn
val safeContextColumn: EncodableString =
if (contextColumn == null) "" else contextColumn
val safeCandidateLabels: EncodableString =
if (candidateLabels == null) "" else candidateLabels
val safeSentencesColumn: EncodableString =
if (sentencesColumn == null) "" else sentencesColumn

val ctx = CodegenContext(
hfApiToken = safeToken,
Expand All @@ -192,7 +238,12 @@ class HuggingFaceInferenceOpDesc extends PythonOperatorDescriptor {
safeMaxTokens = safeMaxTokens,
safeTemp = safeTemp,
imageInput = safeImageInput,
inputImageColumn = safeInputImageColumn
inputImageColumn = safeInputImageColumn,
audioInput = safeAudioInput,
inputAudioColumn = safeInputAudioColumn,
contextColumn = safeContextColumn,
candidateLabels = safeCandidateLabels,
sentencesColumn = safeSentencesColumn
)

PythonCodegenBase.render(ctx, codegenForTask(safeTask))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.huggingFace.codegen

/**
* Codegen for Hugging Face audio task families.
*
* ASR and audio-classification send audio bytes as the raw request body.
* Text-to-speech is prompt-driven and sends a JSON payload; its providers
* return either audio bytes directly or a JSON envelope pointing to audio.
*/
object AudioTaskCodegen extends TaskCodegen {

override val task: String = "automatic-speech-recognition"

override val tasks: Set[String] = Set(
"automatic-speech-recognition",
"audio-classification",
"text-to-speech"
)

override def payloadPython(ctx: CodegenContext): String =
""" if task in audio_only_tasks:
| payload = current_audio_bytes
| use_raw_binary_body = True
| raw_binary_headers = audio_headers
| elif task == "text-to-speech":
| payload = {"inputs": prompt_value}""".stripMargin

override def parsePython(ctx: CodegenContext): String =
""" if task == "text-to-speech":
| if isinstance(body, dict):
| if "output" in body:
| out = body["output"]
| url = out[0] if isinstance(out, list) else out
| if isinstance(url, str) and url.startswith("http"):
| return self._url_to_data_url(url)
| if "audio" in body:
| audio = body["audio"]
| if isinstance(audio, dict):
| if "url" in audio:
| return self._url_to_data_url(audio["url"])
| if "b64_json" in audio:
| return f"data:audio/mpeg;base64,{audio['b64_json']}"
| if "data" in body:
| data = body["data"]
| if data and isinstance(data[0], dict):
| if "url" in data[0]:
| return self._url_to_data_url(data[0]["url"])
| if "b64_json" in data[0]:
| return f"data:audio/mpeg;base64,{data[0]['b64_json']}"
| return json.dumps(body)
| elif task == "automatic-speech-recognition":
| if isinstance(body, dict):
| if "text" in body:
| return body["text"]
| if "generated_text" in body:
| return body["generated_text"]
| return json.dumps(body)
| elif task == "audio-classification":
| return json.dumps(body)""".stripMargin
}
Original file line number Diff line number Diff line change
Expand Up @@ -90,26 +90,16 @@ object ImageTaskCodegen extends TaskCodegen {
| use_raw_binary_body = True
| raw_binary_headers = image_headers
| elif task == "zero-shot-image-classification":
| # Zero-shot requires the caller to supply candidate labels.
| # We reuse the prompt column as a comma-separated label list so
| # the task is shippable without a dedicated operator field.
| # TODO: replace with a first-class `candidateLabels` field once
| # the property panel supports task-specific inputs.
| #
| # Fail fast if usable labels can't be derived. Both modes lead to
| # a meaningless inference call:
| # 1. Empty prompt column -> labels = []
| # The HF API rejects candidate_labels: [] with an opaque 400.
| # 2. Missing prompt column -> upstream sets prompt_value
| # to the fallback "What is shown in this image?", which has
| # no comma, so labels collapses to a single nonsense entry.
| # Zero-shot classification needs >= 2 candidate labels to be
| # meaningful — surface a configuration error in both cases.
| labels = [s.strip() for s in prompt_value.split(",") if s.strip()]
| # Prefer the dedicated candidateLabels property; fall back to
| # the prompt column for backward compatibility.
| label_source = (self.CANDIDATE_LABELS or "").strip() if self.CANDIDATE_LABELS else ""
| if not label_source and prompt_value:
| label_source = prompt_value
| labels = [s.strip() for s in label_source.split(",") if s.strip()]
| if len(labels) < 2:
| raise ValueError(
| "zero-shot-image-classification requires at least 2 candidate "
| "labels: provide a comma-separated list in the prompt column."
| "labels: provide a comma-separated list in the Candidate Labels field."
| )
| payload = {
| "inputs": self._image_input_as_base64(current_image_bytes),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.texera.amber.operator.huggingFace.codegen

/**
* Codegen for prompt-driven media generation tasks.
*
* Providers return media in several shapes: raw bytes, OpenAI-style
* b64_json, or URLs. URL responses are normalized to data URLs by the
* shared `_url_to_data_url` helper so downstream result rendering receives
* a stable string format.
*/
object MediaGenCodegen extends TaskCodegen {

override val task: String = "text-to-image"

override val tasks: Set[String] = Set(
"text-to-image",
"text-to-video"
)

override def payloadPython(ctx: CodegenContext): String =
""" payload = {"inputs": prompt_value}""".stripMargin

override def parsePython(ctx: CodegenContext): String =
""" if task == "text-to-image":
| if isinstance(body, dict):
| if "output" in body:
| out = body["output"]
| url = out[0] if isinstance(out, list) else out
| if isinstance(url, str) and url.startswith("http"):
| return self._url_to_data_url(url)
| if "images" in body:
| images = body["images"]
| if images and isinstance(images[0], dict) and "url" in images[0]:
| return self._url_to_data_url(images[0]["url"])
| if "data" in body:
| data = body["data"]
| if isinstance(data, dict) and "outputs" in data:
| outputs = data["outputs"]
| if outputs and isinstance(outputs[0], str) and outputs[0].startswith("http"):
| return self._url_to_data_url(outputs[0])
| if isinstance(data, list) and data and isinstance(data[0], dict):
| if "b64_json" in data[0]:
| return f"data:image/png;base64,{data[0]['b64_json']}"
| if "url" in data[0]:
| return self._url_to_data_url(data[0]["url"])
| return json.dumps(body)
| elif task == "text-to-video":
| if isinstance(body, dict):
| if "output" in body:
| out = body["output"]
| url = out[0] if isinstance(out, list) else out
| if isinstance(url, str) and url.startswith("http"):
| return self._url_to_data_url(url)
| if "video" in body:
| video = body["video"]
| if isinstance(video, dict) and "url" in video:
| return self._url_to_data_url(video["url"])
| return json.dumps(body)""".stripMargin
}
Loading
Loading