Skip to content

Commit a5f7ebf

Browse files
committed
add a dump route
1 parent 8b2b4cf commit a5f7ebf

4 files changed

Lines changed: 160 additions & 0 deletions

File tree

src/api/app.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import curiexploreRoutes from "./curiexplore/curiexplore.routes";
2222
import documentTypesRoutes from "./document-types/document-types.routes";
2323
import documentsRoutes from "./documents/documents.routes";
2424
import domainsRoutes from "./domains/domains.routes";
25+
import dumpRoutes from "./dump/dump.routes";
2526
import emailTypesRoutes from "./email-types/email-types.routes";
2627
import followUpsRoutes from "./followups/followups.routes";
2728
import geographicalcategoriesRoutes from "./geographicalcategories/geographicalcategories.routes";
@@ -132,6 +133,7 @@ app.use(curiexploreRoutes);
132133
app.use(documentsRoutes);
133134
app.use(documentTypesRoutes);
134135
app.use(domainsRoutes);
136+
app.use(dumpRoutes);
135137
app.use(emailTypesRoutes);
136138
app.use(followUpsRoutes);
137139
app.use(geographicalcategoriesRoutes);

src/api/dump/dump.routes.js

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import express from 'express';
2+
import { createGzip } from 'zlib';
3+
import { Transform } from 'stream';
4+
import { pipeline } from 'stream/promises';
5+
import { db } from '../../services/mongo.service';
6+
import { requireRoles } from '../commons/middlewares/rbac.middlewares';
7+
8+
const router = new express.Router();
9+
10+
/**
11+
* Transform stream that converts MongoDB documents to NDJSON format
12+
* (Newline Delimited JSON - perfect for Python consumption)
13+
*/
14+
class NDJSONTransform extends Transform {
15+
constructor() {
16+
super({ objectMode: true });
17+
}
18+
19+
_transform(doc, encoding, callback) {
20+
try {
21+
const jsonLine = JSON.stringify(doc) + '\n';
22+
callback(null, jsonLine);
23+
} catch (error) {
24+
callback(error);
25+
}
26+
}
27+
}
28+
29+
router.route('/dump/structures')
30+
.get([
31+
requireRoles(['admin']),
32+
async (req, res) => {
33+
try {
34+
res.setHeader('Content-Type', 'application/x-ndjson');
35+
res.setHeader('Content-Encoding', 'gzip');
36+
res.setHeader('Content-Disposition', 'attachment; filename="structures-dump.ndjson.gz"');
37+
res.setHeader('Transfer-Encoding', 'chunked');
38+
res.setHeader('Cache-Control', 'no-cache');
39+
40+
const cursor = db.collection('structures-dump').find({}, {
41+
batchSize: 1000,
42+
noCursorTimeout: false,
43+
});
44+
45+
const ndjsonTransform = new NDJSONTransform();
46+
const gzip = createGzip({ level: 6 });
47+
48+
req.on('close', () => {
49+
cursor.close();
50+
});
51+
52+
await pipeline(
53+
cursor.stream(),
54+
ndjsonTransform,
55+
gzip,
56+
res
57+
);
58+
} catch (error) {
59+
console.error(error);
60+
return res.status(500).json({ error: 'Failed to generate dump', message: error.message });
61+
}
62+
},
63+
]);
64+
65+
export default router;

src/jobs/dump/structures.js

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import currentCategoryQuery from "../../api/commons/queries/current-category.query";
2+
import currentEmailsQuery from "../../api/commons/queries/current-emails.query";
3+
import currentIdentifiersQuery from "../../api/commons/queries/current-identifiers.query";
4+
import currentLegalCategoryQuery from "../../api/commons/queries/current-legal-category.query";
5+
import currentLocalisationQuery from "../../api/commons/queries/current-localisation.query";
6+
import currentNameQuery from "../../api/commons/queries/current-name.query";
7+
import currentWebsitesQuery from "../../api/commons/queries/current-websites.query";
8+
import currentSocialsQuery from "../../api/commons/queries/current-socials.query";
9+
import relationsQuery from "../../api/commons/queries/relations.query";
10+
11+
12+
const structureDumpQuery = [
13+
...metas,
14+
...currentCategoryQuery,
15+
...currentEmailsQuery,
16+
...currentIdentifiersQuery,
17+
...currentLegalCategoryQuery,
18+
...currentLocalisationQuery,
19+
...currentNameQuery,
20+
...currentSocialsQuery,
21+
...currentWebsitesQuery,
22+
{
23+
$lookup: {
24+
from: 'relationships',
25+
localField: 'id',
26+
foreignField: 'resourceId',
27+
pipeline: [...relationsQuery],
28+
as: 'rel1'
29+
}
30+
},
31+
{
32+
$lookup: {
33+
from: 'relationships',
34+
localField: 'id',
35+
foreignField: 'relatedObjectId',
36+
pipeline: [...relationsQuery],
37+
as: 'rel2'
38+
}
39+
},
40+
{
41+
$set: {
42+
relations: {
43+
$concatArrays: ['$rel1', '$rel2']
44+
}
45+
}
46+
},
47+
{
48+
$project: {
49+
_id: 0,
50+
id: 1,
51+
object: 'structures',
52+
status: { $ifNull: ['$structureStatus', null] },
53+
displayName: '$currentName.usualName',
54+
currentName: { $ifNull: ['$currentName', {}] },
55+
descriptionEn: { $ifNull: ['$descriptionEn', null] },
56+
descriptionFr: { $ifNull: ['$descriptionFr', null] },
57+
currentLocalisation: { $ifNull: ['$currentLocalisation', {}] },
58+
localisations: 1,
59+
category: { $ifNull: ['$category', {}] },
60+
legalcategory: { $ifNull: ['$legalcategory', {}] },
61+
identifiers: { $ifNull: ['$identifiers', []] },
62+
relations: { $ifNull: ['$relations', []] },
63+
socialmedias: { $ifNull: ['$socialmedias', []] },
64+
categories: { $ifNull: ['$categories', []] },
65+
closureDate: { $ifNull: ['$closureDate', null] },
66+
createdAt: 1,
67+
creationDate: { $ifNull: ['$creationDate', null] },
68+
emails: { $ifNull: ['$emails', []] },
69+
websites: { $ifNull: ['$websites', []] },
70+
dumpedAt: new Date()
71+
},
72+
},
73+
{
74+
$out: 'structures-dump',
75+
},
76+
];
77+
78+
79+
export default async function createStructuresDump() {
80+
try {
81+
const res = await db.collection('structures').aggregate(structureDumpQuery, { allowDiskUse: true }).toArray();
82+
return {status: 'success', data: res};
83+
} catch (error) {
84+
console.error('Error creating structures dump:', error);
85+
throw error;
86+
}
87+
}

src/jobs/index.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
} from "./emails";
1616
import reindex from "./indexer";
1717
import updateKeyNumbers from "./key-numbers";
18+
import createStructuresDump from "./dump/structures";
1819
import {
1920
exportFrEsrAnnelisPaysageEtablissements,
2021
exportFrEsrPaysageFonctionsGourvernance,
@@ -48,6 +49,11 @@ agenda.define(
4849
{ shouldSaveResult: true },
4950
sendNewUserNotificationEmail,
5051
);
52+
agenda.define(
53+
"create structure dump",
54+
{ shouldSaveResult: true },
55+
sendNewUserNotificationEmail,
56+
);
5157
agenda.define(
5258
"send welcome email",
5359
{ shouldSaveResult: true },

0 commit comments

Comments
 (0)