11import { parse } from 'csv-parse'
22
3- import {
4- bulkUpsertProjectCatalog ,
5- findLatestProjectCatalogSyncedAt ,
6- } from '@crowd/data-access-layer'
3+ import { bulkInsertProjectCatalog } from '@crowd/data-access-layer'
74import { IDbProjectCatalogCreate } from '@crowd/data-access-layer/src/project-catalog/types'
85import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor'
96import { getServiceLogger } from '@crowd/logging'
@@ -23,13 +20,9 @@ export async function listSources(): Promise<string[]> {
2320export async function listDatasets ( sourceName : string ) : Promise < IDatasetDescriptor [ ] > {
2421 const source = getSource ( sourceName )
2522
26- const qx = pgpQx ( svc . postgres . reader . connection ( ) )
27- const latestSyncedAt = await findLatestProjectCatalogSyncedAt ( qx )
28- const scoredAfter = latestSyncedAt ? latestSyncedAt . slice ( 0 , 10 ) : undefined
23+ log . info ( { sourceName } , 'Listing datasets.' )
2924
30- log . info ( { sourceName, scoredAfter : scoredAfter ?? 'none (full fetch)' } , 'Listing datasets.' )
31-
32- const datasets = await source . listAvailableDatasets ( { scoredAfter } )
25+ const datasets = await source . listAvailableDatasets ( )
3326
3427 log . info ( { sourceName, count : datasets . length , newest : datasets [ 0 ] ?. id } , 'Datasets listed.' )
3528
@@ -104,7 +97,7 @@ export async function processDataset(
10497 if ( batch . length >= BATCH_SIZE ) {
10598 batchNumber ++
10699
107- await bulkUpsertProjectCatalog ( qx , batch )
100+ await bulkInsertProjectCatalog ( qx , batch )
108101 totalProcessed += batch . length
109102 batch = [ ]
110103
@@ -119,7 +112,7 @@ export async function processDataset(
119112 { sourceName, datasetId : dataset . id , batchSize : batch . length } ,
120113 'Flushing final batch...' ,
121114 )
122- await bulkUpsertProjectCatalog ( qx , batch )
115+ await bulkInsertProjectCatalog ( qx , batch )
123116 totalProcessed += batch . length
124117 }
125118
0 commit comments