scieloorg · eduranm · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 19, 2026
diff --git a/config/api_router.py b/config/api_router.py
@@ -2,14 +2,14 @@
 from rest_framework.routers import DefaultRouter, SimpleRouter
 
 from reference.api.v1.views import ReferenceViewSet
-
-app_name = "reference"
+from markup_doc.api.v1.views import ArticleViewSet
 
 if settings.DEBUG:
     router = DefaultRouter()
 else:
     router = SimpleRouter()
 
 router.register("reference", ReferenceViewSet, basename="reference")
+router.register("first_block", ArticleViewSet, basename="first_block")
 
 urlpatterns = router.urls
diff --git a/config/settings/base.py b/config/settings/base.py
@@ -82,6 +82,8 @@
     "reference",
     "xml_manager",
     "model_ai",
+    "markup_doc",
+    "markuplib",
 ]
 
 INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS + WAGTAIL

diff --git a/fixtures/Artigo 5.docx b/fixtures/Artigo 5.docx
diff --git a/fixtures/e14790.docx b/fixtures/e14790.docx
diff --git a/fixtures/e740.docx b/fixtures/e740.docx
diff --git a/markup_doc/__init__.py b/markup_doc/__init__.py
diff --git a/markup_doc/admin.py b/markup_doc/admin.py
@@ -0,0 +1,3 @@
+from django.contrib import admin
+
+# Register your models here.
diff --git a/markup_doc/api/__init__.py b/markup_doc/api/__init__.py
diff --git a/markup_doc/api/v1/__init__.py b/markup_doc/api/v1/__init__.py
diff --git a/markup_doc/api/v1/serializers.py b/markup_doc/api/v1/serializers.py
@@ -0,0 +1,7 @@
+from rest_framework import serializers
+from markup_doc.models import ArticleDocx
+
+class ArticleDocxSerializer(serializers.ModelSerializer):
+    class Meta:
+        model = ArticleDocx
+        fields = "__all__"  
diff --git a/markup_doc/api/v1/views.py b/markup_doc/api/v1/views.py
@@ -0,0 +1,43 @@
+from django.shortcuts import render
+from django.http import JsonResponse
+from rest_framework.permissions import IsAuthenticated
+from rest_framework.viewsets import GenericViewSet
+from rest_framework.mixins import CreateModelMixin
+from rest_framework.response import Response
+from markup_doc.api.v1.serializers import ArticleDocxSerializer
+from markup_doc.marker import mark_article
+
+import json
+
+# Create your views here.
+
+class ArticleViewSet(
+    GenericViewSet,  # generic view functionality
+    CreateModelMixin,  # handles POSTs
+):
+    serializer_class = ArticleDocxSerializer
+    permission_classes = [IsAuthenticated]
+    http_method_names = [
+        "post",
+    ]
+
+    def create(self, request, *args, **kwargs):
+        return self.api_article(request)
+
+    def api_article(self, request):
+        try:
+            data = json.loads(request.body)
+            post_text = data.get('text')  # Obtiene el parámetro
+            post_metadata = data.get('metadata')  # Obtiene el parámetro
+
+            resp_data = mark_article(post_text, post_metadata)
+
+            response_data = {
+                'message': resp_data,
+            }
+        except json.JSONDecodeError:
+            response_data = {
+                'error': 'Error processing'
+            }
+
+        return JsonResponse(response_data)
diff --git a/markup_doc/apps.py b/markup_doc/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class MarkupDocConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "markup_doc"
diff --git a/markup_doc/choices.py b/markup_doc/choices.py
@@ -0,0 +1,121 @@
+front_labels = [
+    ('<abstract>', '<abstract>'),
+    ('<abstract-title>', '<abstract-title>'),
+    ('<aff>', '<aff>'),
+    ('<article-id>', '<article-id>'),
+    ('<article-title>', '<article-title>'),
+    ('<author-notes>', '<author-notes>'), 
+    ('<contrib>', '<contrib>'),
+    ('<date-accepted>', '<date-accepted>'),
+    ('<date-received>', '<date-received>'),
+    ('<fig>', '<fig>'),
+    ('<fig-attrib>', '<fig-attrib>'),
+    ('<history>', '<history>'),
+    ('<kwd-title>', '<kwd-title>'),
+    ('<kwd-group>', '<kwd-group>'),
+    ('<list>', '<list>'),
+    ('<p>', '<p>'),
+    ('<sec>', '<sec>'),
+    ('<sub-sec>', '<sub-sec>'),
+    ('<subject>', '<subject>'),
+    ('<table>', '<table>'),
+    ('<table-foot>', '<table-foot>'),
+    ('<title>', '<title>'),
+    ('<trans-abstract>', '<trans-abstract>'),
+    ('<trans-title>', '<trans-title>'),
+    ('<translate-front>', '<translate-front>'),
+    ('<translate-body>', '<translate-body>'),
+    ('<disp-formula>', '<disp-formula>'),
+    ('<inline-formula>', '<inline-formula>'),
+    ('<formula>', '<formula>'),
+
+]
+
+order_labels = {
+    '<article-id>':{
+        'pos' : 1,
+        'next' : '<subject>'
+    },
+    '<subject>':{
+        'pos' : 2,
+        'next' : '<article-title>'
+    },
+    '<article-title>':{
+        'pos' : 3,
+        'next' : '<trans-title>',
+        'lan' : True
+    },
+    '<trans-title>':{
+        'size' : 14,
+        'bold' : True,
+        'lan' : True,
+        'next' : '<contrib>'
+    },
+    '<contrib>':{
+        'reset' : True,
+        'size' : 12,
+        'next' : '<aff>'
+    },
+    '<aff>':{
+        'reset' : True,
+        'size' : 12,
+    },
+    '<abstract>':{
+        'size' : 12,
+        'bold' : True,
+        'lan' : True,
+        'next' : '<p>'
+    },
+    '<p>':{
+        'size' : 12,
+        'next' : '<p>',
+        'repeat' : True
+    },
+    '<trans-abstract>':{
+        'size' : 12,
+        'bold' : True,
+        'lan' : True,
+        'next' : '<p>'
+    },
+    '<kwd-group>':{
+        'size' : 12,
+        'regex' : r'(?i)(palabra.*clave.*:|keyword.*:)',
+    },
+    '<history>':{
+        'size' : 12,
+        'regex' : r'\d{2}/\d{2}/\d{4}',
+    },
+    '<corresp>':{
+        'size' : 12,
+        'regex' : r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
+    },
+    '<sec>':{
+        'size' : 16,
+        'bold' : True,
+        'next' : None
+    },
+    '<sub-sec>':{
+        'size' : 12,
+        'italic' : True,
+        'next' : None
+    },
+    '<sub-sec-2>':{
+        'size' : 14,
+        'bold' : True,
+        'next' : None 
+    },
+}
+
+order_labels_body = {
+    '<sec>':{
+        'size' : 16,
+        'bold' : True,
+    },
+    '<sub-sec>':{
+        'size' : 12,
+        'italic' : True,
+    },
+    '<p>':{
+        'size' : 12,
+    },
+}
diff --git a/markup_doc/forms.py b/markup_doc/forms.py
@@ -0,0 +1 @@
+from wagtail.admin.forms.models import WagtailAdminModelForm
diff --git a/markup_doc/issue_proc.py b/markup_doc/issue_proc.py
@@ -0,0 +1,150 @@
+from lxml import etree
+from urllib.parse import urlparse
+from packtools.sps.pid_provider.xml_sps_lib import get_xml_with_pre
+import os
+
+
+class Asset:
+    def __init__(self, wagtail_image):
+        self.file = wagtail_image.file  # tiene .path (ruta absoluta)
+        self.original_href = wagtail_image.file.name  # nombre en el storage
+
+
+class XmlIssueProc:
+    def __init__(self, registro):
+        self.registro = registro
+        self.xmltree = self._extract_xml_tree()
+        self.journal_proc = self._extract_journal_proc()
+        self.issue_folder = self._extract_issue_folder()
+
+    def _extract_xml_tree(self):
+        return get_xml_with_pre(self.registro.text_xml).xmltree
+
+    def _extract_journal_proc(self):
+        acron = self.xmltree.findtext(".//journal-id[@journal-id-type='publisher-id']")
+        return type("JournalProc", (), {"acron": acron or "journal"})
+
+    def _get_issn(self):
+        issn = self.xmltree.findtext(".//issn[@pub-type='epub']")
+        if not issn:
+            issn = self.xmltree.findtext(".//issn[@pub-type='ppub']")
+        return issn
+
+    def _extract_issue_folder(self, lot=None):
+        issn = self._get_issn() or ""
+        acron = self.journal_proc.acron or ""
+        vol = (self.xmltree.findtext(".//volume") or "").strip()
+        issue = (self.xmltree.findtext(".//issue") or "").strip().lower()
+        year = self.xmltree.findtext(".//pub-date[@date-type='collection']/year")
+
+        parts = [p for p in [issn, acron] if p]
+
+        # volumen
+        if vol:
+            parts.append(f"v{vol}")
+
+        # issue puede ser número, suplemento o especial
+        if issue:
+            if issue.startswith("suppl"):
+                # suplemento de volumen → v10s2
+                parts[-1] = parts[-1] + f"s{issue.replace('suppl','').strip()}"
+            elif "suppl" in issue:
+                # suplemento de número → v10n4s2
+                tokens = issue.split()
+                num = tokens[0]
+                sup = tokens[1:]
+                parts.append(f"n{num}")
+                sup_num = "".join(sup).replace("suppl", "").strip()
+                parts[-1] = parts[-1] + f"s{sup_num}"
+            elif issue.startswith("spe"):
+                # número especial → v10nspe1
+                parts[-1] = parts[-1] + f"nspe{issue.replace('spe','').strip()}"
+            else:
+                # número normal → v4n10
+                parts.append(f"n{issue}")
+
+        # carpeta de publicación continua con lote
+        if lot and year:
+            lot_str = f"{lot:02d}{year[-2:]}"
+            parts.append(lot_str)
+
+        return "-".join(parts)
+
+    def build_pkg_name(self, lang=None):
+        issn = self._get_issn() or ""
+        acron = self.journal_proc.acron or ""
+
+        # base igual que issue_folder, pero sin el ISSN y acron aún
+        vol = (self.xmltree.findtext(".//volume") or "").strip()
+        issue = (self.xmltree.findtext(".//issue") or "").strip().lower()
+
+        parts = [issn, acron]
+
+        if vol:
+            parts.append(vol)
+
+        if issue:
+            if issue.startswith("suppl"):
+                # suplemento de volumen
+                parts[-1] = parts[-1] + f"s{issue.replace('suppl','').strip()}"
+            elif "suppl" in issue:
+                # suplemento de número
+                tokens = issue.split()
+                num = tokens[0]
+                sup = tokens[1:]
+                parts.append(num)
+                sup_num = "".join(sup).replace("suppl", "").strip()
+                parts[-1] = parts[-1] + f"s{sup_num}"
+            elif issue.startswith("spe"):
+                # número especial
+                parts[-1] = parts[-1] + f"nspe{issue.replace('spe','').strip()}"
+            else:
+                # número normal
+                parts.append(issue)
+
+        # ARTID
+        elocation = self.xmltree.findtext(".//elocation-id")
+        fpage = self.xmltree.findtext(".//fpage")
+        pid = self.xmltree.findtext(".//article-id[@specific-use='scielo-v2']")
+
+        if elocation:
+            parts.append(elocation.strip())
+        elif fpage:
+            parts.append(fpage.strip())
+        elif pid:
+            parts.append(pid.strip())
+        else:
+            parts.append("na")  # fallback si no hay nada
+
+        # idioma solo si es traducción
+        if lang:
+            parts.append(lang)
+
+        return "-".join(parts)
+
+    def find_asset(self, basename, name):
+        """
+        Devuelve las imágenes del StreamField como Asset
+        si coinciden con el nombre puesto en el XML (original_filename)
+        o con el nombre real en storage.
+        """
+        assets = []
+        if self.registro.content_body:
+            for block in self.registro.content_body:
+                if block.block_type == "image" and block.value:
+                    wagtail_image = block.value.get("image")
+                    if not wagtail_image:
+                        continue
+
+                    # Nombre real en storage (ej: foto1.abcd1234.jpg)
+                    storage_basename = os.path.basename(wagtail_image.file.name)
+
+                    # Nombre usado en el XML (ej: foto1.jpg)
+                    original_url = wagtail_image.get_rendition("original").url
+                    xml_basename = os.path.basename(urlparse(original_url).path)
+
+                    # Si coincide con cualquiera → se acepta
+                    if basename in (storage_basename, xml_basename):
+                        assets.append(Asset(wagtail_image))
+
+        return assets
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from django.contrib import admin

		# Register your models here.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from wagtail.admin.forms.models import WagtailAdminModelForm