Skip to content

Commit 3b0c22e

Browse files
feat: Implemented functionality to enable multilingual filename uploads in the Admin App. (#1404)
1 parent 28e628a commit 3b0c22e

1 file changed

Lines changed: 7 additions & 1 deletion

File tree

code/backend/pages/01_Ingest_Data.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from os import path
2+
import re
23
import streamlit as st
34
import traceback
45
import requests
@@ -56,6 +57,11 @@ def add_urls():
5657
add_url_embeddings(urls)
5758

5859

60+
def sanitize_metadata_value(value):
61+
# Remove invalid characters
62+
return re.sub(r"[^a-zA-Z0-9-_ .]", "?", value)
63+
64+
5965
def add_url_embeddings(urls: list[str]):
6066
params = {}
6167
if env_helper.FUNCTION_KEY is not None:
@@ -89,7 +95,7 @@ def add_url_embeddings(urls: list[str]):
8995
for up in uploaded_files:
9096
# To read file as bytes:
9197
bytes_data = up.getvalue()
92-
title = up.name.encode("latin-1", "replace").decode("latin-1")
98+
title = sanitize_metadata_value(up.name)
9399
if st.session_state.get("filename", "") != up.name:
94100
# Upload a new file
95101
st.session_state["filename"] = up.name

0 commit comments

Comments
 (0)