Complete REST API documentation for PMOVES-DoX backend.
http://localhost:8484
- Core Endpoints
- Document Management
- Ingestion
- Search & Query
- Analysis
- Tag Extraction
- Data Processing
- Visualization
- Export
- Task Management
- Error Codes
- Cipher Integration
GET /
GET /healthCheck if the API is running.
Response:
{
"status": "ok",
"timestamp": "2024-01-15T10:30:00Z"
}GET /configGet runtime configuration.
Response:
{
"gpu_available": true,
"ollama_available": true,
"vlm_enabled": true,
"hrm_enabled": false,
"db_backend": "sqlite"
}POST /uploadUpload documents for processing.
Content-Type: multipart/form-data
Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
files |
file[] | Yes | Files to upload (PDF, CSV, XLSX, XML, etc.) |
report_week |
string | No | ISO week (e.g., "2024-W01") for grouping |
async_pdf |
boolean | No | Process PDFs asynchronously (default: false) |
urls |
string[] | No | Web URLs to ingest |
Example:
curl -X POST http://localhost:8484/upload \
-F "files=@document.pdf" \
-F "files=@data.csv" \
-F "report_week=2024-W20"Response:
{
"results": [
{
"filename": "document.pdf",
"status": "success",
"facts_count": 42,
"evidence_count": 15,
"artifact_id": "uuid"
}
]
}Error Response (413):
{
"detail": "File size exceeds maximum allowed size of 100MB"
}GET /artifactsGet all uploaded artifacts.
Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
report_week |
string | No | Filter by report week |
Response:
{
"artifacts": [
{
"id": "uuid",
"filename": "report.pdf",
"filepath": "/app/uploads/uuid_report.pdf",
"filetype": ".pdf",
"report_week": "2024-W20",
"status": "processed",
"facts_count": 42,
"table_evidence": 8,
"chart_evidence": 3,
"formula_evidence": 12
}
]
}GET /artifacts/{artifact_id}Get detailed information about a specific artifact.
Path Parameters:
| Field | Type | Description |
|---|---|---|
artifact_id |
string | UUID of the artifact |
Response:
{
"id": "uuid",
"filename": "report.pdf",
"facts": [...],
"evidence": [...],
"table_evidence": 8,
"chart_evidence": 3
}GET /artifacts/mediaGet artifacts from audio, video, image OCR, and web pages.
Response:
{
"artifacts": [
{
"id": "uuid",
"filename": "interview.mp3",
"filetype": ".mp3",
"transcript": "...",
"duration": 1800
}
]
}GET /documentsGet all ingested documents.
Response:
{
"documents": [
{
"id": "uuid",
"filename": "report.pdf",
"artifact_id": "uuid",
"facts_count": 42
}
]
}POST /load_samplesLoad sample files for testing.
Response:
{
"loaded": [
"sample.csv",
"sample.xml",
"sample_openapi.json"
]
}POST /ingest/xmlIngest XML log files with custom XPath mapping.
Content-Type: application/json
Request Body:
{
"filepath": "logs/app.xml",
"xpath_map": {
"entry": "//log",
"fields": {
"ts": "./timestamp",
"level": "./severity",
"code": "./errorCode",
"component": "./service",
"message": "./message"
}
}
}Response:
{
"logs_processed": 1523,
"artifact_id": "uuid"
}POST /ingest/openapiIngest OpenAPI/Swagger specification.
Request Body:
{
"filepath": "specs/api-v3.json"
}Response:
{
"operations_processed": 45,
"artifact_id": "uuid"
}POST /ingest/postmanIngest Postman collection.
Request Body:
{
"collection_path": "collections/api-tests.json"
}Response:
{
"requests_processed": 32,
"artifact_id": "uuid"
}POST /searchPerform vector-based semantic search.
Content-Type: application/json
Request Body:
{
"q": "total revenue in Q4",
"k": 10,
"types": ["pdf", "api", "log", "tag"]
}Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
q |
string | Yes | Search query |
k |
integer | No | Number of results (default: 10) |
types |
string[] | No | Filter by type (pdf, api, log, tag) |
Response:
{
"results": [
{
"content": "Q4 Revenue: $1.2M",
"score": 0.89,
"artifact_id": "uuid",
"page": 12,
"type": "pdf",
"evidence_id": "uuid"
}
],
"query": "total revenue in Q4",
"took_ms": 45
}POST /search/rebuildRebuild the vector search index.
Response:
{
"status": "rebuilt",
"indexed_facts": 1523,
"took_ms": 2340
}POST /askAsk natural language questions.
Query Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
question |
string | Yes | Question to ask |
use_hrm |
boolean | No | Use HRM refinement (default: false) |
Example:
curl -X POST "http://localhost:8484/ask?question=What%20is%20the%20total%20revenue?"Response:
{
"answer": "Total revenue was $5.8 million",
"confidence": 0.92,
"citations": [
{
"artifact_id": "uuid",
"page": 5,
"content": "Q1 Revenue: $1.2M"
}
],
"hrm_steps": 3
}GET /logsGet log entries with filtering.
Query Parameters:
| Field | Type | Description |
|---|---|---|
level |
string | Filter by level (ERROR, WARN, INFO, DEBUG) |
from |
string | ISO 8601 timestamp |
to |
string | ISO 8601 timestamp |
component |
string | Filter by component |
code |
string | Filter by error code |
Example:
curl "http://localhost:8484/logs?level=ERROR&from=2024-01-01T00:00:00Z"Response:
{
"logs": [
{
"ts": "2024-01-15T10:30:00Z",
"level": "ERROR",
"code": "E1001",
"component": "payment-service",
"message": "Payment gateway timeout"
}
]
}GET /logs/exportExport logs as CSV.
Query Parameters: Same as /logs
Response: CSV file
ts,level,code,component,message
2024-01-15T10:30:00Z,ERROR,E1001,payment-service,Payment gateway timeoutGET /apisGet all API operations from OpenAPI/Postman.
Response:
{
"apis": [
{
"id": "uuid",
"path": "/users",
"method": "POST",
"operation_id": "createUser",
"summary": "Create a new user",
"tags": ["users"]
}
]
}GET /apis/{api_id}Get detailed API operation information.
Response:
{
"path": "/users",
"method": "POST",
"operation_id": "createUser",
"summary": "Create a new user",
"parameters": [...],
"responses": {...}
}GET /factsGet all extracted facts.
Query Parameters:
| Field | Type | Description |
|---|---|---|
report_week |
string | Filter by report week |
artifact_id |
string | Filter by artifact |
Response:
{
"facts": [
{
"id": "uuid",
"artifact_id": "uuid",
"page_number": 5,
"content": "Revenue: $1.2M",
"confidence": 0.95,
"report_week": "2024-W01"
}
]
}GET /evidence/{evidence_id}Get specific evidence details.
Response:
{
"id": "uuid",
"artifact_id": "uuid",
"content_type": "table",
"locator": "Page 5, Table 2",
"preview": "Revenue breakdown...",
"full_data": {
"headers": ["Quarter", "Revenue"],
"rows": [...]
}
}GET /analysis/entitiesGet named entities from NER.
Response:
{
"entities": [
{
"text": "Microsoft",
"label": "ORG",
"count": 12,
"artifacts": ["uuid1", "uuid2"]
}
]
}GET /analysis/structureGet document heading hierarchy.
Response:
{
"headings": [
{
"level": 1,
"text": "Executive Summary",
"page": 3
}
]
}GET /analysis/metricsGet extracted business metrics.
Response:
{
"metrics": [
{
"metric": "revenue",
"value": "$1.2M",
"context": "Q1 revenue was $1.2M",
"page": 5
}
]
}GET /analysis/financialsGet detected financial statements.
Response:
{
"statements": [
{
"artifact_id": "uuid",
"type": "income_statement",
"page": 5,
"confidence": 0.94,
"metrics": {
"revenue": "$1.2M",
"net_income": "$340K"
}
}
]
}GET /analysis/artifacts/{artifact_id}Get tables, charts, and formulas for an artifact.
Response:
{
"tables": 8,
"charts": 3,
"formulas": 12,
"details": [...]
}GET /tagsGet all extracted tags.
Query Parameters:
| Field | Type | Description |
|---|---|---|
document_id |
string | Filter by document |
Response:
{
"tags": [
{
"id": "uuid",
"document_id": "uuid",
"name": "Python",
"category": "Programming Languages",
"confidence": 0.95
}
]
}POST /extract/tagsExtract tags using LangExtract.
Request Body:
{
"document_id": "uuid",
"preset": "lms_comprehensive",
"custom_prompt": "Extract skill tags...",
"dry_run": false,
"use_hrm": true,
"api_key": "optional-override-key"
}Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
document_id |
string | Yes | Document UUID |
preset |
string | No | Preset name (lms_comprehensive, lms_skills, etc.) |
custom_prompt |
string | No | Custom extraction prompt |
dry_run |
boolean | No | Preview without saving (default: false) |
use_hrm |
boolean | No | Use HRM refinement (default: false) |
api_key |
string | No | Override API key |
Response:
{
"tags": [
{
"name": "Python",
"category": "Programming Languages",
"confidence": 0.95
}
],
"hrm_steps": 2,
"dry_run": false
}POST /autotag/{artifact_id}Quick auto-tagging with default preset.
Response:
{
"tags": [...],
"count": 15
}GET /tags/presetsGet available LangExtract presets.
Response:
{
"presets": [
{
"name": "lms_comprehensive",
"description": "Full LMS taxonomy"
},
{
"name": "lms_skills",
"description": "Skills and competencies only"
}
]
}GET /tags/prompt/{document_id}
POST /tags/prompt/{document_id}Get or save custom tag extraction prompt.
POST Request Body:
{
"prompt": "Extract the following tags..."
}POST /structure/chrRun Constellation Harvest Regularization.
Request Body:
{
"artifact_id": "uuid",
"K": 6,
"units_mode": "sentences",
"cluster_params": {
"min_samples": 2,
"min_cluster_size": 5
}
}Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
artifact_id |
string | Yes | Artifact UUID |
K |
integer | Yes | Number of clusters |
units_mode |
string | Yes | "sentences" or "paragraphs" |
cluster_params |
object | No | HDBSCAN parameters |
Response:
{
"K": 6,
"units_processed": 234,
"artifacts": {
"rel_csv": "artifacts/chr_clusters.csv",
"pca_plot": "artifacts/chr_pca.png",
"relation_csv": "artifacts/chr_relation_strength.csv"
}
}POST /convertConvert artifact to TXT or DOCX.
Request Body:
{
"artifact_id": "uuid",
"format": "txt"
}Response:
{
"converted_file": "artifacts/document.txt"
}POST /extract/langextractRun Google LangExtract on text.
Request Body:
{
"text": "Course content...",
"prompt": "Extract tags..."
}Response:
{
"extracted": [...]
}POST /viz/datavzrdGenerate datavzrd visualization project.
Request Body:
{
"csv_path": "artifacts/chr_clusters.csv",
"name": "Analysis Dashboard"
}Response:
{
"viz_file": "artifacts/datavzrd.yaml",
"message": "Start with: docker compose --profile tools up datavzrd"
}POST /viz/datavzrd/logsGenerate datavzrd for log analysis.
Request Body:
{
"level": "ERROR",
"from": "2024-01-01T00:00:00Z",
"to": "2024-01-31T23:59:59Z"
}POST /export/pomlGenerate POML for Microsoft Copilot.
Request Body:
{
"document_id": "uuid",
"title": "Financial Analysis",
"variant": "catalog"
}Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
document_id |
string | Yes | Document UUID |
title |
string | No | POML title |
variant |
string | No | "generic", "troubleshoot", or "catalog" |
Response:
{
"rel": "artifacts/poml_uuid.poml"
}GET /downloadDownload artifact files.
Query Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
rel |
string | Yes | Relative path to file |
Example:
curl "http://localhost:8484/download?rel=artifacts/chr_clusters.csv" > clusters.csvGET /open/pdfServe PDF at specific page (requires OPEN_PDF_ENABLED=true).
Query Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
artifact_id |
string | Yes | Artifact UUID |
page |
integer | No | Page number |
Response: PDF file stream
POST /summaries/generateGenerate multi-document summary.
Request Body:
{
"style": "executive",
"scope": "workspace",
"artifact_ids": ["uuid1", "uuid2"]
}Parameters:
| Field | Type | Required | Description |
|---|---|---|---|
style |
string | Yes | "bullet", "executive", or "action_items" |
scope |
string | Yes | "workspace" or "artifact" |
artifact_ids |
string[] | Conditional | Required if scope="artifact" |
Response:
{
"id": "uuid",
"style": "executive",
"scope": "workspace",
"summary": "The analysis reveals...",
"created_at": "2024-01-15T10:30:00Z"
}GET /summariesGet summary history.
Query Parameters:
| Field | Type | Description |
|---|---|---|
scope |
string | Filter by scope |
style |
string | Filter by style |
Response:
{
"summaries": [...]
}GET /tasksGet all background tasks.
Response:
{
"tasks": [
{
"id": "uuid",
"status": "processing",
"filename": "large_report.pdf",
"progress": 45
}
]
}GET /tasks/{task_id}Get specific task details.
Response:
{
"id": "uuid",
"status": "completed",
"filename": "report.pdf",
"result": {
"facts_count": 42,
"evidence_count": 15
}
}POST /experiments/hrm/echoTest HRM text normalization.
Request Body:
{
"text": " hello world "
}Response:
{
"normalized": "hello world",
"steps": 2
}POST /experiments/hrm/sort_digitsTest HRM digit sorting.
Request Body:
{
"seq": "93241"
}Response:
{
"sorted": "12349",
"steps": 4
}GET /metrics/hrmGet HRM performance metrics.
Response:
{
"total_calls": 145,
"avg_steps": 2.4,
"avg_time_ms": 123
}GET /metricsGet system metrics (Prometheus format).
Response: Prometheus metrics
# HELP pmoves_requests_total Total requests
# TYPE pmoves_requests_total counter
pmoves_requests_total{endpoint="/upload"} 42
DELETE /resetResponse:
{
"status": "reset",
"deleted": {
"artifacts": 15,
"facts": 523,
"evidence": 187
}
}GET /watchGet auto-ingestion watch folder status.
Response:
{
"enabled": true,
"path": "/app/watch",
"files_pending": 3
}| Code | Meaning | Description |
|---|---|---|
| 200 | OK | Request successful |
| 201 | Created | Resource created |
| 400 | Bad Request | Invalid request parameters |
| 403 | Forbidden | Access denied (SSRF, disabled feature) |
| 404 | Not Found | Resource not found |
| 413 | Payload Too Large | File exceeds size limit |
| 422 | Unprocessable Entity | Validation error |
| 500 | Internal Server Error | Server error |
Validation Error (422):
{
"detail": [
{
"loc": ["body", "artifact_id"],
"msg": "field required",
"type": "value_error.missing"
}
]
}SSRF Protection (403):
{
"detail": "SSRF protection: Access to localhost is not allowed"
}File Too Large (413):
{
"detail": "File size exceeds maximum allowed size of 100MB"
}Currently no rate limiting is implemented. For production use, add a reverse proxy with rate limiting (nginx, Caddy).
Currently no authentication is required. This is designed for local/internal use. For production:
- Use reverse proxy with auth (nginx + basic auth)
- Deploy behind VPN
- Use API gateway (AWS API Gateway, Kong)
Most list endpoints do not currently support pagination. For large datasets:
- Use filtering parameters (report_week, artifact_id)
- Export to CSV and process externally
- Use search with
kparameter to limit results
multipart/form-data- File uploadsapplication/json- API callsapplication/x-www-form-urlencoded- Form data
application/json- JSON responsestext/csv- CSV exportsapplication/pdf- PDF filestext/plain- Prometheus metrics
Configured via FRONTEND_ORIGIN environment variable.
Default: http://localhost:3737
Not currently supported. For event notifications, poll /tasks endpoint.
import requests
class PMOVESDoxClient:
def __init__(self, base_url="http://localhost:8484"):
self.base_url = base_url
def upload(self, filepath, report_week=None):
with open(filepath, "rb") as f:
files = {"files": f}
data = {"report_week": report_week} if report_week else {}
resp = requests.post(f"{self.base_url}/upload", files=files, data=data)
return resp.json()
def search(self, query, k=10, types=None):
payload = {"q": query, "k": k}
if types:
payload["types"] = types
resp = requests.post(f"{self.base_url}/search", json=payload)
return resp.json()
def ask(self, question, use_hrm=False):
params = {"question": question, "use_hrm": str(use_hrm).lower()}
resp = requests.post(f"{self.base_url}/ask", params=params)
return resp.json()
# Usage
client = PMOVESDoxClient()
result = client.upload("document.pdf", "2024-W20")
answers = client.ask("What is the total revenue?")class PMOVESDoxClient {
constructor(baseUrl = "http://localhost:8484") {
this.baseUrl = baseUrl;
}
async upload(file, reportWeek = null) {
const formData = new FormData();
formData.append("files", file);
if (reportWeek) formData.append("report_week", reportWeek);
const resp = await fetch(`${this.baseUrl}/upload`, {
method: "POST",
body: formData
});
return resp.json();
}
async search(query, k = 10, types = null) {
const payload = { q: query, k };
if (types) payload.types = types;
const resp = await fetch(`${this.baseUrl}/search`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(payload)
});
return resp.json();
}
async ask(question, useHrm = false) {
const params = new URLSearchParams({ question, use_hrm: useHrm });
const resp = await fetch(`${this.baseUrl}/ask?${params}`, {
method: "POST"
});
return resp.json();
}
}
// Usage
const client = new PMOVESDoxClient();
const result = await client.upload(fileInput.files[0], "2024-W20");
const answer = await client.ask("What is the total revenue?");- 📖 Read USER_GUIDE.md for feature tutorials
- 🍳 Follow COOKBOOKS.md for recipes
- 🎨 See DEMOS.md for examples
Questions? Open an issue on GitHub!