scieloorg
diff --git a/‎config/settings/base.py‎
Lines changed: 1 addition & 0 deletions b/‎config/settings/base.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎markup_doc/labeling_utils.py‎
Lines changed: 782 additions & 0 deletions b/‎markup_doc/labeling_utils.py‎
Lines changed: 782 additions & 0 deletions
diff --git a/‎markup_doc/marker.py‎
Lines changed: 46 additions & 0 deletions b/‎markup_doc/marker.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎markup_doc/tasks.py‎
Lines changed: 136 additions & 0 deletions b/‎markup_doc/tasks.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎markup_doc/wagtail_hooks.py‎
Lines changed: 30 additions & 11 deletions b/‎markup_doc/wagtail_hooks.py‎
Lines changed: 30 additions & 11 deletions
diff --git a/‎markuplib/__init__.py‎ b/‎markuplib/__init__.py‎
@@ -83,6 +83,7 @@
     "xml_manager",
     "model_ai",
     "markup_doc",
+    "markuplib",
 ]
 
 INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS + WAGTAIL
 
@@ -0,0 +1,46 @@
+# Standard library imports
+import re
+
+# Local application imports
+from model_ai.llama import LlamaService, LlamaInputSettings
+
+
+def mark_article(text, metadata):
+    if metadata == 'author':
+        messages, response_format = LlamaInputSettings.get_author_config()
+    if metadata == 'affiliation':
+        messages, response_format = LlamaInputSettings.get_affiliations()
+    if metadata == 'doi':
+        messages, response_format = LlamaInputSettings.get_doi_and_section()
+    if metadata == 'title':
+        messages, response_format = LlamaInputSettings.get_titles()
+
+    gll = LlamaService(messages, response_format)
+    output = gll.run(text)
+    output = output['choices'][0]['message']['content']
+    if metadata == 'doi':
+        output = re.search(r'\{.*\}', output, re.DOTALL)
+    else:
+        output = re.search(r'\[.*\]', output, re.DOTALL)
+    if output:
+        output = output.group(0)
+    return output
+
+def mark_reference(reference_text):
+    messages, response_format = LlamaInputSettings.get_messages_and_response_format_for_reference(reference_text)
+    reference_marker = LlamaService(messages, response_format)
+    output = reference_marker.run(reference_text)
+
+    for item in output["choices"]:
+        yield item["message"]["content"]
+
+
+def mark_references(reference_block):
+    for ref_row in reference_block.split("\n"):
+        ref_row = ref_row.strip()
+        if ref_row:
+            choices = mark_reference(ref_row)
+            yield {
+                "reference": ref_row,
+                "choices": list(choices)
+            }
@@ -1,9 +1,145 @@
 # Local application imports
 from config import celery_app
 
+# Standard library imports
+import json
+import re
+
+from markup_doc.models import UploadDocx
+from markup_doc.labeling_utils import (
+    split_in_three,
+    process_reference,
+    process_references,
+    create_labeled_object2,
+    get_llm_model_name
+)
+
+from markup_doc.models import ProcessStatus
+from markup_doc.labeling_utils import MODEL_NAME_GEMINI, MODEL_NAME_LLAMA
+from markuplib.function_docx import functionsDocx
+from model_ai.llama import LlamaService
+from reference.config_gemini import create_prompt_reference
 from markup_doc.sync_api import sync_journals_from_api
 
 
 @celery_app.task()
 def task_sync_journals_from_api():
     sync_journals_from_api()
+
+
+@celery_app.task()
+def get_labels(title, user_id):
+    article_docx = UploadDocx.objects.get(title=title)
+    doc = functionsDocx.openDocx(article_docx.file.path)
+    sections, content = functionsDocx().extractContent(doc, article_docx.file.path)
+    article_docx_markup = article_docx
+    text_title = ''
+    text_paragraph = ''
+    stream_data = []
+    stream_data_body = []
+    stream_data_back = []
+    num_ref=0
+    state = {
+        'label': None,
+        'label_next': None,
+        'label_next_reset': None,
+        'reset': False,
+        'repeat': None,
+        'body_trans': False,
+        'body': False,
+        'back': False,
+        'references': False
+    }
+    counts = {
+        'numref': 0,
+        'numtab': 0,
+        'numfig': 0,
+        'numeq': 0
+    }
+
+    next_item  = None
+    obj_reference = []
+    llama_model = False
+
+    for i, item in enumerate(content):
+        if next_item:
+            next_item = None
+            continue
+
+        obj = {}
+
+        if item.get('text') is None or item.get('text') == '':
+            state['label_next'] = state['label_next_reset'] if state['reset'] else state['label_next']
+            if state['back']:
+                state['back'] = False
+                state['body'] = False
+                state['references'] = True
+        else:
+
+            obj, result, state = create_labeled_object2(i, item, state, sections)
+                        
+            if result:           
+                if item.get('text').lower() in ['introducción', 'introduction', 'introdução'] and state['references']:
+                    state['body_trans'] = True
+                    obj_trans = {
+                            'type': 'paragraph_with_language',
+                            'value': {
+                                'label': '<translate-body>',
+                                'paragraph': 'Translate'
+                            }
+                        }
+                    stream_data_body.append(obj_trans)    
+                if state['body']:
+                    if state['references']:
+                        if state['body_trans']:
+                            stream_data_body.append(obj)
+                        else:
+                            stream_data.append(obj)
+                    else:
+                        stream_data_body.append(obj)
+                elif state['back']:
+                    if state['label'] == '<sec>':
+                        stream_data_back.append(obj)
+                    if state['label'] == '<p>':
+                        num_ref = num_ref + 1
+                        #obj = {}#process_reference(num_ref, obj, user_id)
+                        obj_reference.append({"num_ref": num_ref, "obj": obj, "text": obj['value']['paragraph'],})
+                    #stream_data_back.append(obj)
+                else:
+                    stream_data.append(obj)
+    
+    num_refs = [item["num_ref"] for item in obj_reference]
+
+    if get_llm_model_name() == 'LLAMA':
+        for obj_ref in obj_reference:
+            obj = process_reference(obj_ref['num_ref'], obj_ref['obj'], user_id)
+            stream_data_back.append(obj)
+
+    else:
+        chunks = split_in_three(obj_reference)
+        output=[]
+
+        llm_first_block = LlamaService(mode='prompt', temperature=0.1)
+
+        for chunk in chunks:
+            if len(chunk) > 0:
+                text_references = "\n".join([item["text"] for item in chunk]).replace('<italic>', '').replace('</italic>', '')
+                prompt_reference = create_prompt_reference(text_references)
+
+                result = llm_first_block.run(prompt_reference) 
+
+                match = re.search(r'\[.*\]', result, re.DOTALL)
+                if match:
+                    parsed = json.loads(match.group(0))
+                    output.extend(parsed)  # Agrega a la lista de salida
+    
+        stream_data_back.extend(process_references(num_refs, output))
+
+    article_docx_markup.content = stream_data
+    article_docx_markup.content_body = stream_data_body
+    article_docx_markup.content_back = stream_data_back
+    article_docx_markup.save()
+
+    article_docx.estatus = ProcessStatus.PROCESSED
+    article_docx.save()
+
@@ -1,27 +1,45 @@
 from django.http import HttpResponseRedirect
-from django.template.response import TemplateResponse
 from django.utils.translation import gettext_lazy as _
-from wagtail.admin import messages
-from wagtail.snippets.models import register_snippet
+from django.contrib import messages
+from django.template.response import TemplateResponse
+from wagtail_modeladmin.options import ModelAdmin
+
 from wagtail.snippets.views.snippets import (
     CreateView,
     EditView,
     SnippetViewSet,
-    SnippetViewSetGroup,
+    SnippetViewSetGroup
 )
-from wagtail_modeladmin.options import ModelAdmin
 
-from markup_doc.models import (
+from markup_doc.models import ( 
     ArticleDocx,
     ArticleDocxMarkup,
+    UploadDocx,
+    MarkupXML,
     CollectionModel,
     JournalModel,
-    MarkupXML,
-    ProcessStatus,
-    UploadDocx,
+    ProcessStatus
 )
-from markup_doc.sync_api import sync_collection_from_api
-from markup_doc.tasks import task_sync_journals_from_api
+
+from config.menu import get_menu_order
+from markup_doc.tasks import get_labels, task_sync_journals_from_api
+from django.urls import path, reverse
+from django.utils.html import format_html
+from wagtail.admin import messages
+from wagtail.admin.views import generic
+
+from django.shortcuts import redirect, get_object_or_404
+from django.views import View
+
+from wagtail.snippets.models import register_snippet
+from django.db.models.signals import post_save
+from django.dispatch import receiver
+from django.db import transaction
+
+from wagtail import hooks
+from django.templatetags.static import static
+from markup_doc.sync_api import sync_collection_from_api, sync_journals_from_api
+
 
 
 class ArticleDocxCreateView(CreateView):
@@ -41,6 +59,7 @@ def form_valid(self, form):
         self.object = form.save_all(self.request.user)
         self.object.estatus = ProcessStatus.PROCESSING
         self.object.save()
+        transaction.on_commit(lambda: get_labels.delay(self.object.title, self.request.user.id))
         return HttpResponseRedirect(self.get_success_url())
Original file line number	Diff line number	Diff line change
`@@ -83,6 +83,7 @@`
`83`	`83`	`"xml_manager",`
`84`	`84`	`"model_ai",`
`85`	`85`	`"markup_doc",`
	`86`	`+ "markuplib",`
`86`	`87`	`]`
`87`	`88`
`88`	`89`	`INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS + WAGTAIL`