Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions controllers/bulk.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#!/usr/bin/env node

/**
* Bulk operations controller for RERUM operations
* Handles bulk create and bulk update operations
* @author Claude Sonnet 4, cubap, thehabes
*/

import { newID, isValidID, db } from '../database/index.js'
import utils from '../utils.js'
import { _contextid, ObjectID, createExpressError, getAgentClaim, parseDocumentID, idNegotiation } from './utils.js'

/**
* Create many objects at once with the power of MongoDB bulkWrite() operations.
*
* @see https://www.mongodb.com/docs/manual/reference/method/db.collection.bulkWrite/
*/
const bulkCreate = async function (req, res, next) {
res.set("Content-Type", "application/json; charset=utf-8")
const documents = req.body
let err = {}
if (!Array.isArray(documents)) {
err.message = "The request body must be an array of objects."
err.status = 400
next(createExpressError(err))
return
}
if (documents.length === 0) {
err.message = "No action on an empty array."
err.status = 400
next(createExpressError(err))
return
}
const gatekeep = documents.filter(d=> {
// Each item must be valid JSON, but can't be an array.
if(Array.isArray(d) || typeof d !== "object") return d
try {
JSON.parse(JSON.stringify(d))
} catch (err) {
return d
}
// Items must not have an @id, and in some cases same for id.
const idcheck = _contextid(d["@context"]) ? (d.id ?? d["@id"]) : d["@id"]
if(idcheck) return d
})
if (gatekeep.length > 0) {
err.message = "All objects in the body of a `/bulkCreate` must be JSON and must not contain a declared identifier property."
err.status = 400
next(createExpressError(err))
return
}

// TODO: bulkWrite SLUGS? Maybe assign an id to each document and then use that to create the slug?
// let slug = req.get("Slug")
// if(slug){
// const slugError = await exports.generateSlugId(slug)
// if(slugError){
// next(createExpressError(slugError))
// return
// }
// else{
// slug = slug_json.slug_id
// }
// }

// unordered bulkWrite() operations have better performance metrics.
let bulkOps = []
const generatorAgent = getAgentClaim(req, next)
for(let d of documents) {
// Do not create empty {}s
if(Object.keys(d).length === 0) continue
const providedID = d?._id
const id = isValidID(providedID) ? providedID : ObjectID()
d = utils.configureRerumOptions(generatorAgent, d)
// id is also protected in this case, so it can't be set.
if(_contextid(d["@context"])) delete d.id
d._id = id
d['@id'] = `${process.env.RERUM_ID_PREFIX}${id}`
bulkOps.push({ insertOne : { "document" : d }})
}
try {
let dbResponse = await db.bulkWrite(bulkOps, {'ordered':false})
res.set("Content-Type", "application/json; charset=utf-8")
res.set("Link",dbResponse.result.insertedIds.map(r => `${process.env.RERUM_ID_PREFIX}${r._id}`)) // https://www.rfc-editor.org/rfc/rfc5988
res.status(201)
const estimatedResults = bulkOps.map(f=>{
let doc = f.insertOne.document
doc = idNegotiation(doc)
return doc
})
res.json(estimatedResults) // https://www.rfc-editor.org/rfc/rfc7231#section-6.3.2
}
catch (error) {
//MongoServerError from the client has the following properties: index, code, keyPattern, keyValue
next(createExpressError(error))
}
}

/**
* Update many objects at once with the power of MongoDB bulkWrite() operations.
* Make sure to alter object __rerum.history as appropriate.
* The same object may be updated more than once, which will create history branches (not straight sticks)
*
* @see https://www.mongodb.com/docs/manual/reference/method/db.collection.bulkWrite/
*/
const bulkUpdate = async function (req, res, next) {
res.set("Content-Type", "application/json; charset=utf-8")
const documents = req.body
let err = {}
let encountered = []
if (!Array.isArray(documents)) {
err.message = "The request body must be an array of objects."
err.status = 400
next(createExpressError(err))
return
}
if (documents.length === 0) {
err.message = "No action on an empty array."
err.status = 400
next(createExpressError(err))
return
}
const gatekeep = documents.filter(d => {
// Each item must be valid JSON, but can't be an array.
if(Array.isArray(d) || typeof d !== "object") return d
try {
JSON.parse(JSON.stringify(d))
} catch (err) {
return d
}
// Items must have an @id, or in some cases an id will do
const idcheck = _contextid(d["@context"]) ? (d.id ?? d["@id"]) : d["@id"]
if(!idcheck) return d
})
// The empty {}s will cause this error
if (gatekeep.length > 0) {
err.message = "All objects in the body of a `/bulkUpdate` must be JSON and must contain a declared identifier property."
err.status = 400
next(createExpressError(err))
return
}
// unordered bulkWrite() operations have better performance metrics.
let bulkOps = []
const generatorAgent = getAgentClaim(req, next)
for(const objectReceived of documents){
// We know it has an id
const idReceived = objectReceived["@id"] ?? objectReceived.id
// Update the same thing twice? can vs should.
// if(encountered.includes(idReceived)) continue
encountered.push(idReceived)
if(!idReceived.includes(process.env.RERUM_ID_PREFIX)) continue
let id = parseDocumentID(idReceived)
let originalObject
try {
originalObject = await db.findOne({"$or":[{"_id": id}, {"__rerum.slug": id}]})
} catch (error) {
next(createExpressError(error))
return
}
if (null === originalObject) continue
if (utils.isDeleted(originalObject)) continue
id = ObjectID()
let context = objectReceived["@context"] ? { "@context": objectReceived["@context"] } : {}
let rerumProp = { "__rerum": utils.configureRerumOptions(generatorAgent, originalObject, true, false)["__rerum"] }
delete objectReceived["__rerum"]
delete objectReceived["_id"]
delete objectReceived["@id"]
// id is also protected in this case, so it can't be set.
if(_contextid(objectReceived["@context"])) delete objectReceived.id
delete objectReceived["@context"]
let newObject = Object.assign(context, { "@id": process.env.RERUM_ID_PREFIX + id }, objectReceived, rerumProp, { "_id": id })
bulkOps.push({ insertOne : { "document" : newObject }})
if(originalObject.__rerum.history.next.indexOf(newObject["@id"]) === -1){
originalObject.__rerum.history.next.push(newObject["@id"])
const replaceOp = { replaceOne :
{
"filter" : { "_id": originalObject["_id"] },
"replacement" : originalObject,
"upsert" : false
}
}
bulkOps.push(replaceOp)
}
}
try {
let dbResponse = await db.bulkWrite(bulkOps, {'ordered':false})
res.set("Content-Type", "application/json; charset=utf-8")
res.set("Link", dbResponse.result.insertedIds.map(r => `${process.env.RERUM_ID_PREFIX}${r._id}`)) // https://www.rfc-editor.org/rfc/rfc5988
res.status(200)
const estimatedResults = bulkOps.filter(f=>f.insertOne).map(f=>{
let doc = f.insertOne.document
doc = idNegotiation(doc)
return doc
})
res.json(estimatedResults) // https://www.rfc-editor.org/rfc/rfc7231#section-6.3.2
}
catch (error) {
//MongoServerError from the client has the following properties: index, code, keyPattern, keyValue
next(createExpressError(error))
}
}

export { bulkCreate, bulkUpdate }
127 changes: 127 additions & 0 deletions controllers/crud.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env node

/**
* Basic CRUD operations for RERUM v1
* @author Claude Sonnet 4, cubap, thehabes
*/
import { newID, isValidID, db } from '../database/index.js'
import utils from '../utils.js'
import { _contextid, idNegotiation, generateSlugId, ObjectID, createExpressError, getAgentClaim, parseDocumentID } from './utils.js'

/**
* Create a new Linked Open Data object in RERUM v1.
* Order the properties to preference @context and @id. Put __rerum and _id last.
* Respond RESTfully
* */
const create = async function (req, res, next) {
res.set("Content-Type", "application/json; charset=utf-8")
let slug = ""
if(req.get("Slug")){
let slug_json = await generateSlugId(req.get("Slug"), next)
if(slug_json.code){
next(createExpressError(slug_json))
return
}
else{
slug = slug_json.slug_id
}
}

let generatorAgent = getAgentClaim(req, next)
let context = req.body["@context"] ? { "@context": req.body["@context"] } : {}
let provided = JSON.parse(JSON.stringify(req.body))
let rerumProp = { "__rerum": utils.configureRerumOptions(generatorAgent, provided, false, false)["__rerum"] }
rerumProp.__rerum.slug = slug
const providedID = provided._id
const id = isValidID(providedID) ? providedID : ObjectID()
delete provided["__rerum"]
delete provided["@id"]
// id is also protected in this case, so it can't be set.
if(_contextid(provided["@context"])) delete provided.id
delete provided["@context"]

let newObject = Object.assign(context, { "@id": process.env.RERUM_ID_PREFIX + id }, provided, rerumProp, { "_id": id })
console.log("CREATE")
try {
let result = await db.insertOne(newObject)
res.set(utils.configureWebAnnoHeadersFor(newObject))
newObject = idNegotiation(newObject)
newObject.new_obj_state = JSON.parse(JSON.stringify(newObject))
res.location(newObject[_contextid(newObject["@context"]) ? "id":"@id"])
res.status(201)
res.json(newObject)
}
catch (error) {
//MongoServerError from the client has the following properties: index, code, keyPattern, keyValue
next(createExpressError(error))
}
}

/**
* Query the MongoDB for objects containing the key:value pairs provided in the JSON Object in the request body.
* This will support wildcards and mongo params like {"key":{$exists:true}}
* The return is always an array, even if 0 or 1 objects in the return.
* */
const query = async function (req, res, next) {
res.set("Content-Type", "application/json; charset=utf-8")
let props = req.body
const limit = parseInt(req.query.limit ?? 100)
const skip = parseInt(req.query.skip ?? 0)
if (Object.keys(props).length === 0) {
//Hey now, don't ask for everything...this can happen by accident. Don't allow it.
let err = {
message: "Detected empty JSON object. You must provide at least one property in the /query request body JSON.",
status: 400
}
next(createExpressError(err))
return
}
try {
let matches = await db.find(props).limit(limit).skip(skip).toArray()

Check failure

Code scanning / CodeQL

Database query built from user-controlled sources High

This query object depends on a
user-provided value
.

Copilot Autofix

AI 10 months ago

To fix the issue, we need to ensure that the user-provided input (req.body) is sanitized or validated before being used in the database query. Specifically:

  1. Use MongoDB's $eq operator to ensure that the user input is treated as a literal value and not as a query object.
  2. Alternatively, validate that props contains only literal values (e.g., strings, numbers) and does not include any special query operators.

The best approach in this case is to validate props to ensure it contains only literal values. This ensures that the query is safe while preserving the intended functionality.


Suggested changeset 1
controllers/crud.js

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/controllers/crud.js b/controllers/crud.js
--- a/controllers/crud.js
+++ b/controllers/crud.js
@@ -78,2 +78,13 @@
     }
+    // Validate props to ensure it contains only literal values
+    for (const key in props) {
+        if (typeof props[key] !== "string" && typeof props[key] !== "number" && typeof props[key] !== "boolean") {
+            let err = {
+                message: `Invalid value for key '${key}'. Only string, number, or boolean values are allowed.`,
+                status: 400
+            }
+            next(createExpressError(err))
+            return
+        }
+    }
     try {
EOF
@@ -78,2 +78,13 @@
}
// Validate props to ensure it contains only literal values
for (const key in props) {
if (typeof props[key] !== "string" && typeof props[key] !== "number" && typeof props[key] !== "boolean") {
let err = {
message: `Invalid value for key '${key}'. Only string, number, or boolean values are allowed.`,
status: 400
}
next(createExpressError(err))
return
}
}
try {
Copilot is powered by AI and may make mistakes. Always verify output.
matches = matches.map(o => idNegotiation(o))
res.set(utils.configureLDHeadersFor(matches))
res.json(matches)
} catch (error) {
next(createExpressError(error))
}
}

/**
* Query the MongoDB for objects with the _id provided in the request body or request URL
* Note this specifically checks for _id, the @id pattern is irrelevant.
* Note /v1/id/{blank} does not route here. It routes to the generic 404
* */
const id = async function (req, res, next) {
res.set("Content-Type", "application/json; charset=utf-8")
let id = req.params["_id"]
try {
let match = await db.findOne({"$or": [{"_id": id}, {"__rerum.slug": id}]})
if (match) {
res.set(utils.configureWebAnnoHeadersFor(match))
//Support built in browser caching
res.set("Cache-Control", "max-age=86400, must-revalidate")
//Support requests with 'If-Modified_Since' headers
res.set(utils.configureLastModifiedHeader(match))
// Include current version for optimistic locking
const currentVersion = match.__rerum?.isOverwritten ?? ""
res.set('Current-Overwritten-Version', currentVersion)
match = idNegotiation(match)
res.location(_contextid(match["@context"]) ? match.id : match["@id"])
res.json(match)
return
}
let err = {
"message": `No RERUM object with id '${id}'`,
"status": 404
}
next(createExpressError(err))
} catch (error) {
next(createExpressError(error))
}
}

export {
create,
query,
id
}
Loading
Loading