Skip to content

Commit 6c95a75

Browse files
Add R examples for pdfRest API integration
- Added JSON and Multipart payload examples for generating Markdown and rasterized PDFs. - Introduced a "Complex Flow" R script for merging multiple file types into a single PDF. - Included `.Renviron.example` for API key configuration and optional EU endpoint setup. - Added `README.md` with setup instructions, usage examples, and project structure details. - Provided `requirements.R` for dependency installation.
1 parent 26bc57d commit 6c95a75

8 files changed

Lines changed: 512 additions & 0 deletions

File tree

R/.Renviron.example

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copy this file to `.Renviron` in this folder (or to `~/.Renviron`).
2+
# Do NOT commit your real API key.
3+
4+
# Required: your pdfRest API key
5+
PDFREST_API_KEY=your_api_key_here
6+
7+
# Optional: override base URL
8+
# For EU/GDPR or regional routing you may use:
9+
# PDFREST_URL=https://eu-api.pdfrest.com/
10+
PDFREST_URL=https://api.pdfrest.com
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!
2+
# What this sample does:
3+
# - Merges multiple inputs (PDFs and non-PDFs) into a single PDF.
4+
# - Non-PDFs are converted to PDF; PDFs are uploaded. Collected IDs are merged via /merged-pdf.
5+
#
6+
# Setup (.Renviron):
7+
# - Copy .Renviron.example to .Renviron (R folder root)
8+
# - Set PDFREST_API_KEY=your_api_key_here
9+
# - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use:
10+
# PDFREST_URL=https://eu-api.pdfrest.com
11+
#
12+
# Usage:
13+
# Rscript "Complex Flow Examples/merge-different-file-types.R" /path/to/file1 /path/to/file2 [/path/to/file3 ...]
14+
#
15+
# Output:
16+
# - Prints the API JSON response to stdout. Non-2xx responses quit with a concise message.
17+
# - Tip: pipe output to a file: Rscript ... > response.json
18+
19+
suppressWarnings(suppressMessages({
20+
if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package")
21+
if (!requireNamespace("jsonlite", quietly = TRUE)) stop("Please install 'jsonlite' package")
22+
}))
23+
24+
stderrf <- function(...) cat(sprintf(...), file = stderr())
25+
26+
api_key <- Sys.getenv("PDFREST_API_KEY", unset = "")
27+
if (identical(api_key, "")) {
28+
stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n")
29+
quit(status = 1)
30+
}
31+
32+
api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com"))
33+
34+
args <- commandArgs(trailingOnly = TRUE)
35+
if (length(args) < 2 || any(!file.exists(args))) {
36+
stderrf("Usage: Rscript merge-different-file-types.R /path/to/file1 /path/to/file2 [/path/to/file3 ... ]\n")
37+
quit(status = 1)
38+
}
39+
40+
content_type_for <- function(path) {
41+
ext <- tolower(tools::file_ext(path))
42+
switch(ext,
43+
pdf = "application/pdf",
44+
png = "image/png",
45+
jpg = "image/jpeg", jpeg = "image/jpeg",
46+
gif = "image/gif",
47+
tif = "image/tiff", tiff = "image/tiff",
48+
bmp = "image/bmp",
49+
webp = "image/webp",
50+
doc = "application/msword",
51+
docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
52+
ppt = "application/vnd.ms-powerpoint",
53+
pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation",
54+
xls = "application/vnd.ms-excel",
55+
xlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
56+
txt = "text/plain",
57+
rtf = "application/rtf",
58+
html = "text/html", htm = "text/html",
59+
"application/octet-stream"
60+
)
61+
}
62+
63+
tryCatch({
64+
collected_ids <- character(0)
65+
66+
for (i in seq_along(args)) {
67+
p <- args[[i]]
68+
ext <- tolower(tools::file_ext(p))
69+
if (ext == "pdf") {
70+
# Upload PDF to get id
71+
upload_url <- paste0(api_base, "/upload")
72+
upload_resp <- httr::POST(
73+
upload_url,
74+
httr::add_headers(
75+
"api-key" = api_key,
76+
"content-filename" = basename(p),
77+
"Content-Type" = "application/octet-stream"
78+
),
79+
body = readBin(p, what = "raw", n = file.info(p)$size)
80+
)
81+
txt <- httr::content(upload_resp, as = "text", encoding = "UTF-8")
82+
message(txt)
83+
if (httr::http_error(upload_resp)) {
84+
stop(sprintf("Upload failed for input #%d with status %s", i, httr::status_code(upload_resp)))
85+
}
86+
up_json <- jsonlite::fromJSON(txt)
87+
collected_ids <- c(collected_ids, if (is.data.frame(up_json$files)) up_json$files$id[[1]] else up_json$files[[1]]$id)
88+
message(sprintf("Uploaded PDF (#%d); id=%s", i, tail(collected_ids, 1)))
89+
} else {
90+
# Convert to PDF via /pdf to get outputId
91+
conv_url <- paste0(api_base, "/pdf")
92+
body <- list(file = httr::upload_file(p, type = content_type_for(p)))
93+
conv_resp <- httr::POST(conv_url, httr::add_headers("api-key" = api_key), body = body, encode = "multipart")
94+
txt <- httr::content(conv_resp, as = "text", encoding = "UTF-8")
95+
message(txt)
96+
if (httr::http_error(conv_resp)) {
97+
stop(sprintf("Conversion failed for input #%d with status %s", i, httr::status_code(conv_resp)))
98+
}
99+
cv_json <- jsonlite::fromJSON(txt)
100+
collected_ids <- c(collected_ids, cv_json$outputId)
101+
message(sprintf("Converted non-PDF (#%d); outputId=%s", i, tail(collected_ids, 1)))
102+
}
103+
}
104+
105+
# Build x-www-form-urlencoded merge body
106+
enc <- function(x) utils::URLencode(x, reserved = TRUE)
107+
parts <- character(0)
108+
for (id in collected_ids) {
109+
parts <- c(parts, paste0("id[]=", enc(id)))
110+
parts <- c(parts, paste0("pages[]=", enc("1-last")))
111+
parts <- c(parts, paste0("type[]=", enc("id")))
112+
}
113+
merge_body <- paste(parts, collapse = "&")
114+
115+
merge_url <- paste0(api_base, "/merged-pdf")
116+
merge_resp <- httr::POST(
117+
merge_url,
118+
httr::add_headers("api-key" = api_key, "Content-Type" = "application/x-www-form-urlencoded"),
119+
body = merge_body
120+
)
121+
122+
merge_text <- httr::content(merge_resp, as = "text", encoding = "UTF-8")
123+
cat(merge_text)
124+
if (httr::http_error(merge_resp)) {
125+
stop(sprintf("Merge failed with status %s", httr::status_code(merge_resp)))
126+
}
127+
128+
}, error = function(e) {
129+
stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e))
130+
quit(status = 1)
131+
})
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#!
2+
# What this sample does:
3+
# - Converts a PDF to Markdown using pdfRest.
4+
# - Uses a JSON payload in two steps: upload to /upload, then call /markdown with the returned id.
5+
#
6+
# Setup (.Renviron):
7+
# - Copy .Renviron.example to .Renviron (R folder root)
8+
# - Set PDFREST_API_KEY=your_api_key_here
9+
# - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use:
10+
# PDFREST_URL=https://eu-api.pdfrest.com
11+
#
12+
# Usage:
13+
# Rscript "Endpoint Examples/JSON Payload/markdown.R" /path/to/input.pdf
14+
#
15+
# Output:
16+
# - Prints the API JSON response to stdout. Non-2xx responses quit with a concise message.
17+
# - Tip: pipe output to a file: Rscript ... > response.json
18+
19+
suppressWarnings(suppressMessages({
20+
if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package")
21+
if (!requireNamespace("jsonlite", quietly = TRUE)) stop("Please install 'jsonlite' package")
22+
}))
23+
24+
stderrf <- function(...) cat(sprintf(...), file = stderr())
25+
26+
api_key <- Sys.getenv("PDFREST_API_KEY", unset = "")
27+
if (identical(api_key, "")) {
28+
stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n")
29+
quit(status = 1)
30+
}
31+
32+
api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com"))
33+
34+
args <- commandArgs(trailingOnly = TRUE)
35+
pdf_path <- args[1]
36+
if (is.null(pdf_path) || !file.exists(pdf_path)) {
37+
stderrf("Usage: Rscript markdown.R /path/to/file.pdf\n")
38+
quit(status = 1)
39+
}
40+
41+
filename <- basename(pdf_path)
42+
file_bytes <- readBin(pdf_path, what = "raw", n = file.info(pdf_path)$size)
43+
44+
tryCatch({
45+
# Step 1: Upload the file to receive a reusable id
46+
upload_url <- paste0(api_base, "/upload")
47+
upload_resp <- httr::POST(
48+
upload_url,
49+
httr::add_headers(
50+
"api-key" = api_key,
51+
"content-filename" = filename,
52+
"Content-Type" = "application/octet-stream"
53+
),
54+
body = file_bytes
55+
)
56+
57+
upload_text <- httr::content(upload_resp, as = "text", encoding = "UTF-8")
58+
message(upload_text)
59+
if (httr::http_error(upload_resp)) {
60+
stop(sprintf("Upload failed with status %s", httr::status_code(upload_resp)))
61+
}
62+
63+
upload_json <- jsonlite::fromJSON(upload_text)
64+
# jsonlite parses arrays of objects as a data.frame; extract first id safely
65+
uploaded_id <- if (is.data.frame(upload_json$files)) upload_json$files$id[[1]] else upload_json$files[[1]]$id
66+
message(sprintf("Successfully uploaded with an id of: %s", uploaded_id))
67+
68+
# Step 2: Request Markdown output using the uploaded id
69+
md_url <- paste0(api_base, "/markdown")
70+
body <- jsonlite::toJSON(list(id = uploaded_id, page_break_comments = "on"), auto_unbox = TRUE)
71+
md_resp <- httr::POST(
72+
md_url,
73+
httr::add_headers(
74+
"api-key" = api_key,
75+
"Content-Type" = "application/json"
76+
),
77+
body = body
78+
)
79+
80+
md_text <- httr::content(md_resp, as = "text", encoding = "UTF-8")
81+
cat(md_text)
82+
if (httr::http_error(md_resp)) {
83+
stop(sprintf("Markdown conversion failed with status %s", httr::status_code(md_resp)))
84+
}
85+
86+
}, error = function(e) {
87+
stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e))
88+
quit(status = 1)
89+
})
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!
2+
# What this sample does:
3+
# - Creates a rasterized version of a PDF using pdfRest.
4+
# - Uses a JSON payload in two steps: upload to /upload, then call /rasterized-pdf with the returned id.
5+
#
6+
# Setup (.Renviron):
7+
# - Copy .Renviron.example to .Renviron (R folder root)
8+
# - Set PDFREST_API_KEY=your_api_key_here
9+
# - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use:
10+
# PDFREST_URL=https://eu-api.pdfrest.com
11+
#
12+
# Usage:
13+
# Rscript "Endpoint Examples/JSON Payload/rasterized-pdf.R" /path/to/input.pdf
14+
#
15+
# Output:
16+
# - Prints the API JSON response to stdout. Non-2xx responses quit with a concise message.
17+
# - Tip: pipe output to a file: Rscript ... > response.json
18+
19+
suppressWarnings(suppressMessages({
20+
if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package")
21+
if (!requireNamespace("jsonlite", quietly = TRUE)) stop("Please install 'jsonlite' package")
22+
}))
23+
24+
stderrf <- function(...) cat(sprintf(...), file = stderr())
25+
26+
api_key <- Sys.getenv("PDFREST_API_KEY", unset = "")
27+
if (identical(api_key, "")) {
28+
stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n")
29+
quit(status = 1)
30+
}
31+
32+
api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com"))
33+
34+
args <- commandArgs(trailingOnly = TRUE)
35+
pdf_path <- args[1]
36+
if (is.null(pdf_path) || !file.exists(pdf_path)) {
37+
stderrf("Usage: Rscript rasterized-pdf.R /path/to/file.pdf\n")
38+
quit(status = 1)
39+
}
40+
41+
filename <- basename(pdf_path)
42+
file_bytes <- readBin(pdf_path, what = "raw", n = file.info(pdf_path)$size)
43+
44+
tryCatch({
45+
# Step 1: Upload the file to receive a reusable id
46+
upload_url <- paste0(api_base, "/upload")
47+
upload_resp <- httr::POST(
48+
upload_url,
49+
httr::add_headers(
50+
"api-key" = api_key,
51+
"content-filename" = filename,
52+
"Content-Type" = "application/octet-stream"
53+
),
54+
body = file_bytes
55+
)
56+
57+
upload_text <- httr::content(upload_resp, as = "text", encoding = "UTF-8")
58+
message(upload_text)
59+
if (httr::http_error(upload_resp)) {
60+
stop(sprintf("Upload failed with status %s", httr::status_code(upload_resp)))
61+
}
62+
63+
upload_json <- jsonlite::fromJSON(upload_text)
64+
uploaded_id <- if (is.data.frame(upload_json$files)) upload_json$files$id[[1]] else upload_json$files[[1]]$id
65+
message(sprintf("Successfully uploaded with an id of: %s", uploaded_id))
66+
67+
# Step 2: Request a rasterized PDF using the uploaded id
68+
rast_url <- paste0(api_base, "/rasterized-pdf")
69+
body <- jsonlite::toJSON(list(id = uploaded_id), auto_unbox = TRUE)
70+
rast_resp <- httr::POST(
71+
rast_url,
72+
httr::add_headers(
73+
"api-key" = api_key,
74+
"Content-Type" = "application/json"
75+
),
76+
body = body
77+
)
78+
79+
rast_text <- httr::content(rast_resp, as = "text", encoding = "UTF-8")
80+
cat(rast_text)
81+
if (httr::http_error(rast_resp)) {
82+
stop(sprintf("Rasterization failed with status %s", httr::status_code(rast_resp)))
83+
}
84+
85+
}, error = function(e) {
86+
stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e))
87+
quit(status = 1)
88+
})
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!
2+
# What this sample does:
3+
# - Converts a PDF to Markdown using pdfRest.
4+
# - Sends a single multipart/form-data request directly to /markdown with the file.
5+
#
6+
# Setup (.Renviron):
7+
# - Copy .Renviron.example to .Renviron (R folder root)
8+
# - Set PDFREST_API_KEY=your_api_key_here
9+
# - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use:
10+
# PDFREST_URL=https://eu-api.pdfrest.com
11+
#
12+
# Usage:
13+
# Rscript "Endpoint Examples/Multipart Payload/markdown.R" /path/to/input.pdf
14+
#
15+
# Output:
16+
# - Prints the API JSON response to stdout. Non-2xx responses quit with a concise message.
17+
# - Tip: pipe output to a file: Rscript ... > response.json
18+
19+
suppressWarnings(suppressMessages({
20+
if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package")
21+
}))
22+
23+
stderrf <- function(...) cat(sprintf(...), file = stderr())
24+
25+
api_key <- Sys.getenv("PDFREST_API_KEY", unset = "")
26+
if (identical(api_key, "")) {
27+
stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n")
28+
quit(status = 1)
29+
}
30+
31+
api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com"))
32+
33+
args <- commandArgs(trailingOnly = TRUE)
34+
pdf_path <- args[1]
35+
if (is.null(pdf_path) || !file.exists(pdf_path)) {
36+
stderrf("Usage: Rscript markdown.R /path/to/file.pdf\n")
37+
quit(status = 1)
38+
}
39+
40+
filename <- basename(pdf_path)
41+
42+
tryCatch({
43+
conn_url <- paste0(api_base, "/markdown")
44+
45+
# Build multipart form body. httr sets multipart/form-data with boundary.
46+
body <- list(
47+
file = httr::upload_file(pdf_path, type = "application/pdf"),
48+
output = "pdfrest_markdown",
49+
page_break_comments = "on"
50+
# Optional parameters:
51+
# page_range = "1-3"
52+
)
53+
54+
resp <- httr::POST(
55+
conn_url,
56+
httr::add_headers("api-key" = api_key),
57+
body = body,
58+
encode = "multipart"
59+
)
60+
61+
txt <- httr::content(resp, as = "text", encoding = "UTF-8")
62+
cat(txt)
63+
if (httr::http_error(resp)) {
64+
stop(sprintf("Markdown conversion failed with status %s", httr::status_code(resp)))
65+
}
66+
67+
}, error = function(e) {
68+
stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e))
69+
quit(status = 1)
70+
})

0 commit comments

Comments
 (0)