|
| 1 | +from typing import Any, Optional |
| 2 | +import logging |
| 3 | +import mimetypes |
| 4 | +import urllib.parse |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +from mcp import types |
| 8 | +from rdflib import Literal, URIRef |
| 9 | +from rdflib.namespace import XSD |
| 10 | +from rdflib.query import Result |
| 11 | + |
| 12 | +from web_algebra.client import FileClient |
| 13 | +from web_algebra.json_result import JSONResult |
| 14 | +from web_algebra.mcp_tool import MCPTool |
| 15 | +from web_algebra.operation import Operation |
| 16 | + |
| 17 | + |
| 18 | +class AddFile(Operation, MCPTool): |
| 19 | + """RDF/POST a file to a LinkedDataHub document, returning the minted upload URI. |
| 20 | +
|
| 21 | + The file's RDF description (`nfo:FileDataObject` + filename + MIME type + |
| 22 | + sha1 + title) is appended to the target document; the file bytes |
| 23 | + themselves are stored by LDH at `<base>/uploads/{sha1}` under its |
| 24 | + built-in upload namespace, independent of the target document's URI. |
| 25 | +
|
| 26 | + Unlike the rest of the `ldh-Add*` family, this op does not subclass |
| 27 | + `POST` — file upload uses `multipart/form-data` with LDH's RDF/POST |
| 28 | + dialect rather than an N-triples body, so it carries its own |
| 29 | + `FileClient` instance instead of inheriting `LinkedDataClient` plumbing. |
| 30 | + """ |
| 31 | + |
| 32 | + def model_post_init(self, __context: Any) -> None: |
| 33 | + self.client = FileClient( |
| 34 | + cert_pem_path=getattr(self.settings, "cert_pem_path", None), |
| 35 | + cert_password=getattr(self.settings, "cert_password", None), |
| 36 | + verify_ssl=False, |
| 37 | + ) |
| 38 | + |
| 39 | + @classmethod |
| 40 | + def name(cls): |
| 41 | + return "ldh-AddFile" |
| 42 | + |
| 43 | + @classmethod |
| 44 | + def description(cls) -> str: |
| 45 | + return """Adds a file to a LinkedDataHub document via multipart RDF/POST. |
| 46 | +
|
| 47 | + Appends `a nfo:FileDataObject ; nfo:fileName ; dct:title ; ...` |
| 48 | + to the target document and stores the file bytes at |
| 49 | + `<base>/uploads/{sha1}` (LDH's built-in upload namespace). |
| 50 | +
|
| 51 | + Arguments: |
| 52 | + - `url` — URI of the target document to add the file's description to. |
| 53 | + - `file` — absolute local file path. The bytes are read and streamed |
| 54 | + to the server. |
| 55 | + - `title` — human-readable title (`dct:title`). |
| 56 | + - `description` — optional description (`dct:description`). |
| 57 | + - `content_type` — optional MIME-type override; auto-detected from |
| 58 | + the file path if absent. |
| 59 | +
|
| 60 | + Returns a result with `url` (the minted `<base>/uploads/{sha1}` URI |
| 61 | + the file resource is now addressable at) and `status` (HTTP status |
| 62 | + code) bindings. |
| 63 | + """ |
| 64 | + |
| 65 | + @classmethod |
| 66 | + def inputSchema(cls) -> dict: |
| 67 | + return { |
| 68 | + "type": "object", |
| 69 | + "properties": { |
| 70 | + "url": { |
| 71 | + "type": "string", |
| 72 | + "description": "Target document URI to add the file's description to.", |
| 73 | + }, |
| 74 | + "file": { |
| 75 | + "type": "string", |
| 76 | + "description": "Absolute local file path. The bytes are read and uploaded.", |
| 77 | + }, |
| 78 | + "title": { |
| 79 | + "type": "string", |
| 80 | + "description": "Title of the file (dct:title).", |
| 81 | + }, |
| 82 | + "description": { |
| 83 | + "type": "string", |
| 84 | + "description": "Optional description (dct:description).", |
| 85 | + }, |
| 86 | + "content_type": { |
| 87 | + "type": "string", |
| 88 | + "description": "Optional MIME-type override; auto-detected from path if absent.", |
| 89 | + }, |
| 90 | + }, |
| 91 | + "required": ["url", "file", "title"], |
| 92 | + } |
| 93 | + |
| 94 | + def execute( |
| 95 | + self, |
| 96 | + url: URIRef, |
| 97 | + file_path: Literal, |
| 98 | + title: Literal, |
| 99 | + description: Optional[Literal] = None, |
| 100 | + content_type: Optional[Literal] = None, |
| 101 | + ) -> Result: |
| 102 | + """Pure function: RDF/POST a file from disk with RDFLib terms.""" |
| 103 | + if not isinstance(url, URIRef): |
| 104 | + raise TypeError( |
| 105 | + f"AddFile.execute expects url to be URIRef, got {type(url)}" |
| 106 | + ) |
| 107 | + if not isinstance(file_path, Literal): |
| 108 | + raise TypeError( |
| 109 | + f"AddFile.execute expects file_path to be Literal, got {type(file_path)}" |
| 110 | + ) |
| 111 | + if not isinstance(title, Literal): |
| 112 | + raise TypeError( |
| 113 | + f"AddFile.execute expects title to be Literal, got {type(title)}" |
| 114 | + ) |
| 115 | + if description is not None and not isinstance(description, Literal): |
| 116 | + raise TypeError( |
| 117 | + f"AddFile.execute expects description to be Literal or None, got {type(description)}" |
| 118 | + ) |
| 119 | + if content_type is not None and not isinstance(content_type, Literal): |
| 120 | + raise TypeError( |
| 121 | + f"AddFile.execute expects content_type to be Literal or None, got {type(content_type)}" |
| 122 | + ) |
| 123 | + |
| 124 | + path_str = str(file_path) |
| 125 | + with open(path_str, "rb") as f: |
| 126 | + body = f.read() |
| 127 | + |
| 128 | + ct: Optional[str] = str(content_type) if content_type is not None else None |
| 129 | + if ct is None: |
| 130 | + ct, _ = mimetypes.guess_type(path_str) |
| 131 | + if ct is None: |
| 132 | + ct = "application/octet-stream" |
| 133 | + |
| 134 | + url_str = str(url) |
| 135 | + logging.info( |
| 136 | + "RDF/POSTing file %s (%d bytes, %s) to <%s>", |
| 137 | + path_str, len(body), ct, url_str, |
| 138 | + ) |
| 139 | + |
| 140 | + response, sha1 = self.client.add_file( |
| 141 | + target_url=url_str, |
| 142 | + file_body=body, |
| 143 | + content_type=ct, |
| 144 | + title=str(title), |
| 145 | + description=str(description) if description is not None else None, |
| 146 | + filename=Path(path_str).name, |
| 147 | + ) |
| 148 | + |
| 149 | + # The minted file URI lives at `<scheme>://<host>/uploads/<sha1>` |
| 150 | + # regardless of which target document we RDF/POSTed to. Reconstruct |
| 151 | + # from the target URL's host so callers don't need to thread the |
| 152 | + # base URL through separately. |
| 153 | + parsed = urllib.parse.urlparse(url_str) |
| 154 | + file_uri = f"{parsed.scheme}://{parsed.netloc}/uploads/{sha1}" |
| 155 | + |
| 156 | + logging.info("AddFile status %s → <%s>", response.status, file_uri) |
| 157 | + |
| 158 | + return JSONResult( |
| 159 | + vars=["status", "url"], |
| 160 | + bindings=[ |
| 161 | + { |
| 162 | + "status": Literal(response.status, datatype=XSD.integer), |
| 163 | + "url": URIRef(file_uri), |
| 164 | + } |
| 165 | + ], |
| 166 | + ) |
| 167 | + |
| 168 | + def execute_json(self, arguments: dict, variable_stack: list = []) -> Result: |
| 169 | + """JSON execution: process arguments with strict type checking.""" |
| 170 | + url_data = Operation.process_json( |
| 171 | + self.settings, arguments["url"], self.context, variable_stack |
| 172 | + ) |
| 173 | + if not isinstance(url_data, URIRef): |
| 174 | + raise TypeError( |
| 175 | + f"ldh-AddFile expects 'url' to be URIRef, got {type(url_data)}" |
| 176 | + ) |
| 177 | + |
| 178 | + file_data = Operation.process_json( |
| 179 | + self.settings, arguments["file"], self.context, variable_stack |
| 180 | + ) |
| 181 | + file_literal = self.to_string_literal(file_data) |
| 182 | + |
| 183 | + title_data = Operation.process_json( |
| 184 | + self.settings, arguments["title"], self.context, variable_stack |
| 185 | + ) |
| 186 | + title_literal = self.to_string_literal(title_data) |
| 187 | + |
| 188 | + description_literal: Optional[Literal] = None |
| 189 | + if "description" in arguments: |
| 190 | + description_data = Operation.process_json( |
| 191 | + self.settings, arguments["description"], self.context, variable_stack |
| 192 | + ) |
| 193 | + description_literal = self.to_string_literal(description_data) |
| 194 | + |
| 195 | + content_type_literal: Optional[Literal] = None |
| 196 | + if "content_type" in arguments: |
| 197 | + content_type_data = Operation.process_json( |
| 198 | + self.settings, arguments["content_type"], self.context, variable_stack |
| 199 | + ) |
| 200 | + content_type_literal = self.to_string_literal(content_type_data) |
| 201 | + |
| 202 | + return self.execute( |
| 203 | + url_data, |
| 204 | + file_literal, |
| 205 | + title_literal, |
| 206 | + description_literal, |
| 207 | + content_type_literal, |
| 208 | + ) |
| 209 | + |
| 210 | + def mcp_run(self, arguments: dict, context: Any = None) -> Any: |
| 211 | + """MCP execution: plain args → plain results.""" |
| 212 | + url = URIRef(arguments["url"]) |
| 213 | + file_path = Literal(arguments["file"], datatype=XSD.string) |
| 214 | + title = Literal(arguments["title"], datatype=XSD.string) |
| 215 | + description = ( |
| 216 | + Literal(arguments["description"], datatype=XSD.string) |
| 217 | + if "description" in arguments |
| 218 | + else None |
| 219 | + ) |
| 220 | + content_type = ( |
| 221 | + Literal(arguments["content_type"], datatype=XSD.string) |
| 222 | + if "content_type" in arguments |
| 223 | + else None |
| 224 | + ) |
| 225 | + |
| 226 | + result = self.execute(url, file_path, title, description, content_type) |
| 227 | + url_binding = result.bindings[0]["url"] |
| 228 | + return [types.TextContent(type="text", text=f"File added: {url_binding}")] |
0 commit comments