Skip to content

Commit 0cd08bb

Browse files
committed
Agregar utilidades para consultar first_block y extraer palabras clave
1 parent fb9da61 commit 0cd08bb

1 file changed

Lines changed: 56 additions & 0 deletions

File tree

markup_doc/labeling_utils.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,3 +780,59 @@ def create_labeled_object2(i, item, state, sections):
780780
}
781781

782782
return obj, result, state
783+
784+
785+
def get_data_first_block(text, metadata, user_id):
786+
payload = {
787+
'text': text,
788+
'metadata': metadata
789+
}
790+
791+
model = LlamaModel.objects.first()
792+
793+
if model.name_file:
794+
user = User.objects.get(pk=user_id)
795+
refresh = RefreshToken.for_user(user)
796+
access_token = refresh.access_token
797+
798+
# FIXME: Hardcoded URL
799+
url = "http://django:8000/api/v1/first_block/"
800+
801+
headers = {
802+
'Authorization': f'Bearer {access_token}',
803+
'Content-Type': 'application/json'
804+
}
805+
806+
response = requests.post(url, json=payload, headers=headers)
807+
808+
if response.status_code == 200:
809+
response_json = response.json()
810+
message_str = response_json['message']
811+
812+
resp_json = json.loads(message_str)
813+
814+
return resp_json
815+
816+
817+
def extract_keywords(text):
818+
# Quitar punto final si existe
819+
text = text.strip()
820+
if text.endswith('.'):
821+
text = text[:-1].strip()
822+
823+
# Ver si contiene una etiqueta con dos puntos
824+
match = re.match(r'(?i)\s*(.+?)\s*:\s*(.+)', text)
825+
826+
if match:
827+
label = match.group(1).strip()
828+
content = match.group(2).strip()
829+
else:
830+
label = None
831+
content = text
832+
833+
# Separar por punto y coma o coma
834+
keywords = re.split(r'\s*[;,]\s*', content)
835+
clean_keywords = [p.strip() for p in keywords if p.strip()]
836+
clean_keywords = ", ".join(keywords)
837+
838+
return {"title": label, "keywords": clean_keywords}

0 commit comments

Comments
 (0)