|
| 1 | +#! |
| 2 | +# What this sample does: |
| 3 | +# - Merges multiple inputs (PDFs and non-PDFs) into a single PDF. |
| 4 | +# - Non-PDFs are converted to PDF; PDFs are uploaded. Collected IDs are merged via /merged-pdf. |
| 5 | +# |
| 6 | +# Setup (.Renviron): |
| 7 | +# - Copy .Renviron.example to .Renviron (R folder root) |
| 8 | +# - Set PDFREST_API_KEY=your_api_key_here |
| 9 | +# - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use: |
| 10 | +# PDFREST_URL=https://eu-api.pdfrest.com |
| 11 | +# |
| 12 | +# Usage: |
| 13 | +# Rscript "Complex Flow Examples/merge-different-file-types.R" /path/to/file1 /path/to/file2 [/path/to/file3 ...] |
| 14 | +# |
| 15 | +# Output: |
| 16 | +# - Prints the API JSON response to stdout. Non-2xx responses quit with a concise message. |
| 17 | +# - Tip: pipe output to a file: Rscript ... > response.json |
| 18 | + |
| 19 | +suppressWarnings(suppressMessages({ |
| 20 | + if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package") |
| 21 | + if (!requireNamespace("jsonlite", quietly = TRUE)) stop("Please install 'jsonlite' package") |
| 22 | +})) |
| 23 | + |
| 24 | +stderrf <- function(...) cat(sprintf(...), file = stderr()) |
| 25 | + |
| 26 | +api_key <- Sys.getenv("PDFREST_API_KEY", unset = "") |
| 27 | +if (identical(api_key, "")) { |
| 28 | + stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n") |
| 29 | + quit(status = 1) |
| 30 | +} |
| 31 | + |
| 32 | +api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com")) |
| 33 | + |
| 34 | +args <- commandArgs(trailingOnly = TRUE) |
| 35 | +if (length(args) < 2 || any(!file.exists(args))) { |
| 36 | + stderrf("Usage: Rscript merge-different-file-types.R /path/to/file1 /path/to/file2 [/path/to/file3 ... ]\n") |
| 37 | + quit(status = 1) |
| 38 | +} |
| 39 | + |
| 40 | +content_type_for <- function(path) { |
| 41 | + ext <- tolower(tools::file_ext(path)) |
| 42 | + switch(ext, |
| 43 | + pdf = "application/pdf", |
| 44 | + png = "image/png", |
| 45 | + jpg = "image/jpeg", jpeg = "image/jpeg", |
| 46 | + gif = "image/gif", |
| 47 | + tif = "image/tiff", tiff = "image/tiff", |
| 48 | + bmp = "image/bmp", |
| 49 | + webp = "image/webp", |
| 50 | + doc = "application/msword", |
| 51 | + docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
| 52 | + ppt = "application/vnd.ms-powerpoint", |
| 53 | + pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation", |
| 54 | + xls = "application/vnd.ms-excel", |
| 55 | + xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
| 56 | + txt = "text/plain", |
| 57 | + rtf = "application/rtf", |
| 58 | + html = "text/html", htm = "text/html", |
| 59 | + "application/octet-stream" |
| 60 | + ) |
| 61 | +} |
| 62 | + |
| 63 | +tryCatch({ |
| 64 | + collected_ids <- character(0) |
| 65 | + |
| 66 | + for (i in seq_along(args)) { |
| 67 | + p <- args[[i]] |
| 68 | + ext <- tolower(tools::file_ext(p)) |
| 69 | + if (ext == "pdf") { |
| 70 | + # Upload PDF to get id |
| 71 | + upload_url <- paste0(api_base, "/upload") |
| 72 | + upload_resp <- httr::POST( |
| 73 | + upload_url, |
| 74 | + httr::add_headers( |
| 75 | + "api-key" = api_key, |
| 76 | + "content-filename" = basename(p), |
| 77 | + "Content-Type" = "application/octet-stream" |
| 78 | + ), |
| 79 | + body = readBin(p, what = "raw", n = file.info(p)$size) |
| 80 | + ) |
| 81 | + txt <- httr::content(upload_resp, as = "text", encoding = "UTF-8") |
| 82 | + message(txt) |
| 83 | + if (httr::http_error(upload_resp)) { |
| 84 | + stop(sprintf("Upload failed for input #%d with status %s", i, httr::status_code(upload_resp))) |
| 85 | + } |
| 86 | + up_json <- jsonlite::fromJSON(txt) |
| 87 | + collected_ids <- c(collected_ids, if (is.data.frame(up_json$files)) up_json$files$id[[1]] else up_json$files[[1]]$id) |
| 88 | + message(sprintf("Uploaded PDF (#%d); id=%s", i, tail(collected_ids, 1))) |
| 89 | + } else { |
| 90 | + # Convert to PDF via /pdf to get outputId |
| 91 | + conv_url <- paste0(api_base, "/pdf") |
| 92 | + body <- list(file = httr::upload_file(p, type = content_type_for(p))) |
| 93 | + conv_resp <- httr::POST(conv_url, httr::add_headers("api-key" = api_key), body = body, encode = "multipart") |
| 94 | + txt <- httr::content(conv_resp, as = "text", encoding = "UTF-8") |
| 95 | + message(txt) |
| 96 | + if (httr::http_error(conv_resp)) { |
| 97 | + stop(sprintf("Conversion failed for input #%d with status %s", i, httr::status_code(conv_resp))) |
| 98 | + } |
| 99 | + cv_json <- jsonlite::fromJSON(txt) |
| 100 | + collected_ids <- c(collected_ids, cv_json$outputId) |
| 101 | + message(sprintf("Converted non-PDF (#%d); outputId=%s", i, tail(collected_ids, 1))) |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + # Build x-www-form-urlencoded merge body |
| 106 | + enc <- function(x) utils::URLencode(x, reserved = TRUE) |
| 107 | + parts <- character(0) |
| 108 | + for (id in collected_ids) { |
| 109 | + parts <- c(parts, paste0("id[]=", enc(id))) |
| 110 | + parts <- c(parts, paste0("pages[]=", enc("1-last"))) |
| 111 | + parts <- c(parts, paste0("type[]=", enc("id"))) |
| 112 | + } |
| 113 | + merge_body <- paste(parts, collapse = "&") |
| 114 | + |
| 115 | + merge_url <- paste0(api_base, "/merged-pdf") |
| 116 | + merge_resp <- httr::POST( |
| 117 | + merge_url, |
| 118 | + httr::add_headers("api-key" = api_key, "Content-Type" = "application/x-www-form-urlencoded"), |
| 119 | + body = merge_body |
| 120 | + ) |
| 121 | + |
| 122 | + merge_text <- httr::content(merge_resp, as = "text", encoding = "UTF-8") |
| 123 | + cat(merge_text) |
| 124 | + if (httr::http_error(merge_resp)) { |
| 125 | + stop(sprintf("Merge failed with status %s", httr::status_code(merge_resp))) |
| 126 | + } |
| 127 | + |
| 128 | +}, error = function(e) { |
| 129 | + stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e)) |
| 130 | + quit(status = 1) |
| 131 | +}) |
0 commit comments