fix: test and pre-commit failures

dittops · dittops · commit dc6faadddf0d · 2025-10-21T10:33:07.000Z
Signed-off-by: dittops &lt;dittops@gmail.com&gt;
diff --git a/PolyLingua/README.md b/PolyLingua/README.md
@@ -30,6 +30,7 @@ cd PolyLingua
 ```
 
 You'll be prompted for:
+
 - **HuggingFace API Token** - Get from https://huggingface.co/settings/tokens
 - **Model ID** - Default: `swiss-ai/Apertus-8B-Instruct-2509` (translation-optimized model)
 - **Host IP** - Your server's IP address
@@ -42,6 +43,7 @@ You'll be prompted for:
 ```
 
 This builds:
+
 - Translation backend service
 - Next.js UI service
 
@@ -82,13 +84,13 @@ curl -X POST http://localhost:8888/v1/translation \
 
 Key variables in `.env`:
 
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `HF_TOKEN` | HuggingFace API token | Required |
+| Variable       | Description                  | Default                             |
+| -------------- | ---------------------------- | ----------------------------------- |
+| `HF_TOKEN`     | HuggingFace API token        | Required                            |
 | `LLM_MODEL_ID` | Model to use for translation | `swiss-ai/Apertus-8B-Instruct-2509` |
-| `MODEL_CACHE` | Directory for model storage | `./data` |
-| `host_ip` | Server IP address | `localhost` |
-| `NGINX_PORT` | External port for web access | `80` |
+| `MODEL_CACHE`  | Directory for model storage  | `./data`                            |
+| `host_ip`      | Server IP address            | `localhost`                         |
+| `NGINX_PORT`   | External port for web access | `80`                                |
 
 See `.env.example` for full configuration options.
 
@@ -99,7 +101,6 @@ The service works with any HuggingFace text generation model. Recommended models
 - **swiss-ai/Apertus-8B-Instruct-2509** - Multilingual translation (default)
 - **haoranxu/ALMA-7B** - Specialized translation model
 
-
 ## 🛠️ Development
 
 ### Project Structure
@@ -128,6 +129,7 @@ PolyLingua/
 ### Running Locally (Development)
 
 **Backend:**
+
 ```bash
 # Install dependencies
 pip install -r requirements.txt
@@ -142,6 +144,7 @@ python polylingua.py
 ```
 
 **Frontend:**
+
 ```bash
 cd ui
 npm install
@@ -155,6 +158,7 @@ npm run dev
 Translate text between languages.
 
 **Request:**
+
 ```json
 {
   "language_from": "English",
@@ -164,17 +168,20 @@ Translate text between languages.
 ```
 
 **Response:**
+
 ```json
 {
   "model": "polylingua",
-  "choices": [{
-    "index": 0,
-    "message": {
-      "role": "assistant",
-      "content": "Translated text here"
-    },
-    "finish_reason": "stop"
-  }],
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Translated text here"
+      },
+      "finish_reason": "stop"
+    }
+  ],
   "usage": {}
 }
 ```
@@ -224,11 +231,13 @@ docker compose down -v
 ### Service won't start
 
 1. Check if ports are available:
+
    ```bash
    sudo lsof -i :80,8888,9000,8028,5173
    ```
 
 2. Verify environment variables:
+
    ```bash
    cat .env
    ```
@@ -258,8 +267,6 @@ docker compose down -v
 - Check if backend is running: `docker compose ps`
 - Test API directly: `curl http://localhost:8888/v1/translation`
 
-
-
 ## 🔗 Resources
 
 - [OPEA Project](https://github.com/opea-project)
@@ -270,6 +277,7 @@ docker compose down -v
 ## 📧 Support
 
 For issues and questions:
+
 - Open an issue on GitHub
 - Check existing issues for solutions
 - Review OPEA documentation
diff --git a/PolyLingua/polylingua.py b/PolyLingua/polylingua.py
@@ -17,9 +17,6 @@
 import os
 import tempfile
 from pathlib import Path
-from langdetect import detect, LangDetectException
-from docling.document_converter import DocumentConverter
-from docling.datamodel.base_models import InputFormat
 
 from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
 from comps.cores.proto.api_protocol import (
@@ -29,8 +26,11 @@
     ChatMessage,
     UsageInfo,
 )
-from fastapi import Request, UploadFile, File, Form, HTTPException
+from docling.datamodel.base_models import InputFormat
+from docling.document_converter import DocumentConverter
+from fastapi import File, Form, HTTPException, Request, UploadFile
 from fastapi.responses import StreamingResponse
+from langdetect import LangDetectException, detect
 
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
@@ -79,8 +79,7 @@ def __init__(self):
         self.converter = DocumentConverter()
 
     async def process_file(self, file: UploadFile) -> list[str]:
-        """
-        Process an uploaded file and extract text content in chunks.
+        """Process an uploaded file and extract text content in chunks.
 
         Args:
             file: The uploaded file
@@ -100,8 +99,7 @@ async def process_file(self, file: UploadFile) -> list[str]:
         file_ext = Path(file.filename).suffix.lower()
         if file_ext not in SUPPORTED_EXTENSIONS:
             raise ValueError(
-                f"Unsupported file type: {file_ext}. "
-                f"Supported types: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
+                f"Unsupported file type: {file_ext}. " f"Supported types: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
             )
 
         page_texts = []
@@ -112,19 +110,19 @@ async def process_file(self, file: UploadFile) -> list[str]:
             print(f"Reading text file {file.filename}...")
             try:
                 # Try UTF-8 first
-                text_content = contents.decode('utf-8')
+                text_content = contents.decode("utf-8")
             except UnicodeDecodeError:
                 # Fallback to latin-1 for other encodings
                 print("UTF-8 decode failed, trying latin-1...")
-                text_content = contents.decode('latin-1')
+                text_content = contents.decode("latin-1")
 
             print(f"Read {len(text_content)} characters from text file")
 
             # Split into chunks if needed
             if len(text_content) > CHUNK_SIZE:
                 print(f"Splitting into chunks of {CHUNK_SIZE} chars")
                 for i in range(0, len(text_content), CHUNK_SIZE):
-                    chunk = text_content[i:i + CHUNK_SIZE]
+                    chunk = text_content[i : i + CHUNK_SIZE]
                     page_texts.append(chunk)
                     print(f"Chunk {len(page_texts)}: {len(chunk)} chars")
             else:
@@ -145,7 +143,7 @@ async def process_file(self, file: UploadFile) -> list[str]:
                 # Convert document using docling
                 print(f"Converting document {file.filename}...")
                 result = self.converter.convert(tmp_path)
-                print(f"Conversion completed")
+                print("Conversion completed")
 
                 # Export entire document to markdown
                 full_markdown = result.document.export_to_markdown()
@@ -156,7 +154,7 @@ async def process_file(self, file: UploadFile) -> list[str]:
                     print(f"Splitting into chunks of {CHUNK_SIZE} chars")
                     # Split into manageable chunks
                     for i in range(0, len(full_markdown), CHUNK_SIZE):
-                        chunk = full_markdown[i:i + CHUNK_SIZE]
+                        chunk = full_markdown[i : i + CHUNK_SIZE]
                         page_texts.append(chunk)
                         print(f"Chunk {len(page_texts)}: {len(chunk)} chars")
                 else:
@@ -209,16 +207,14 @@ async def translate_page(self, page_text: str, language_from: str, language_to:
             {source_language}
 
         """
-        prompt = prompt_template.format(
-            language_from=language_from, language_to=language_to, source_language=page_text
-        )
+        prompt = prompt_template.format(language_from=language_from, language_to=language_to, source_language=page_text)
 
         # Create chat completion request with streaming
         chat_request_dict = {
             "model": LLM_MODEL_ID,
             "messages": [{"role": "user", "content": prompt}],
             "max_tokens": 4096,
-            "stream": True
+            "stream": True,
         }
 
         result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=chat_request_dict)
@@ -235,21 +231,21 @@ async def translate_page(self, page_text: str, language_from: str, language_to:
 
                 # Get the response body iterator
                 async for chunk in response.body_iterator:
-                    chunk_str = chunk.decode('utf-8') if isinstance(chunk, bytes) else chunk
+                    chunk_str = chunk.decode("utf-8") if isinstance(chunk, bytes) else chunk
 
                     # Parse SSE format
-                    lines = chunk_str.split('\n')
+                    lines = chunk_str.split("\n")
                     for line in lines:
-                        if line.startswith('data: '):
+                        if line.startswith("data: "):
                             data = line[6:]  # Remove "data: " prefix
 
-                            if data == '[DONE]':
+                            if data == "[DONE]":
                                 continue
 
                             try:
                                 parsed = json.loads(data)
                                 # Extract content from chat completion format
-                                text = parsed.get('choices', [{}])[0].get('delta', {}).get('content', '')
+                                text = parsed.get("choices", [{}])[0].get("delta", {}).get("content", "")
                                 if text:
                                     accumulated_text += text
                             except:
@@ -274,7 +270,7 @@ async def handle_request(self, request: Request):
             language_to = form_data.get("language_to")
             file = form_data.get("file")
 
-            if not file or not hasattr(file, 'filename'):
+            if not file or not hasattr(file, "filename"):
                 raise HTTPException(status_code=400, detail="No file uploaded")
 
             if not language_to:
@@ -368,7 +364,7 @@ async def handle_request(self, request: Request):
             chat_request_dict = {
                 "model": LLM_MODEL_ID,
                 "messages": [{"role": "user", "content": prompt}],
-                "stream": True
+                "stream": True,
             }
 
             result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=chat_request_dict)
diff --git a/PolyLingua/requirements.txt b/PolyLingua/requirements.txt
@@ -1,17 +1,17 @@
-# OPEA GenAIComps Framework
-opea-comps>=1.3.0
-
-# Core Dependencies
-fastapi>=0.109.0
-uvicorn[standard]>=0.27.0
-python-multipart>=0.0.9
 
 # Async Support
 aiohttp>=3.9.0
 asyncio>=3.4.3
 
-# Language Detection
-langdetect>=1.0.9
-
 # Document Processing
 docling>=2.0.0
+
+# Core Dependencies
+fastapi>=0.109.0
+
+# Language Detection
+langdetect>=1.0.9
+# OPEA GenAIComps Framework
+opea-comps>=1.3.0
+python-multipart>=0.0.9
+uvicorn[standard]>=0.27.0
diff --git a/PolyLingua/tests/test_compose_on_xeon.sh b/PolyLingua/tests/test_compose_on_xeon.sh