-
Notifications
You must be signed in to change notification settings - Fork 33
1510 lines (1373 loc) · 73.3 KB
/
Copy pathmain.yml
File metadata and controls
1510 lines (1373 loc) · 73.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
name: Automated DataPusher+ Testing Run
on:
workflow_dispatch:
inputs:
branch:
description: "datapusher-plus branch to clone & test (defaults to main)"
required: false
type: string
default: "main"
env:
FILES_DIR: "custom"
# When dispatched with no input, this still resolves to "main" — preserves
# the existing zero-arg behaviour. Pass `branch=<name>` to target another.
DATAPUSHER_BRANCH: ${{ inputs.branch || 'main' }}
# Opt into Node 24 for JavaScript actions ahead of the 2026-06-02 default
# switch. Silences the deprecation annotation on actions/checkout@v4 and
# actions/upload-artifact@v4 without needing version bumps.
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
CKAN_VERSION: "2.11"
POSTGRES_PASSWORD: postgres
CKAN_DB_PASSWORD: pass
CKAN_SITE_URL: http://localhost:5000
CKAN_SITE_ID: default
CKAN_SITE_TITLE: "CKAN Test Instance"
QSV_VER : "20.1.0"
jobs:
setup:
runs-on: ubuntu-latest
container:
image: ckan/ckan-dev:2.11
options: --user root
services:
solr:
image: ckan/ckan-solr:2.11-solr9
ports: ["8983:8983"]
postgres:
image: postgres:15
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
ports: ["5432:5432"]
options: >-
--health-cmd "pg_isready -h 127.0.0.1 -U postgres -p 5432"
--health-interval 10s
--health-timeout 5s
--health-retries 10
--health-start-period 10s
redis:
image: redis:3
ports: ["6379:6379"]
# Job-specific environment (these will be available inside the container)
env:
CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test
CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test
CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test
CKAN_SOLR_URL: http://solr:8983/solr/ckan
CKAN_REDIS_URL: redis://redis:6379/1
CKAN_SITE_URL: http://localhost:5000
steps:
- name: Fix permissions and install essential tools
run: |
mkdir -p /__w/_temp
chmod -R 777 /__w/_temp
chmod -R 777 /__w/
apt-get update -y
apt-get install -y curl wget net-tools procps postgresql-client jq
echo "Essential tools installed successfully"
- uses: actions/checkout@v4
- name: Wait for PostgreSQL to be ready
run: |
echo "Waiting for PostgreSQL to be ready..."
timeout=90
while [ $timeout -gt 0 ]; do
if PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "SELECT 1;" >/dev/null 2>&1; then
echo "PostgreSQL is ready!"
break
fi
echo "Postgres not ready yet ($timeout s left)..."
sleep 3
timeout=$((timeout-3))
done
if [ $timeout -le 0 ]; then
echo "Timeout waiting for PostgreSQL"
exit 1
fi
- name: Setup database users and permissions
run: |
set -eu
echo "Creating database users (if not exist)..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='ckan_default'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_write'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_write WITH PASSWORD '$CKAN_DB_PASSWORD';"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_roles WHERE rolname='datastore_read'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER datastore_read WITH PASSWORD '$CKAN_DB_PASSWORD';"
echo "Creating databases (if not exist)..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='ckan_test'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE ckan_test OWNER ckan_default;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='datastore_test'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE datastore_test OWNER ckan_default;"
# Prefect server backing database (shared Postgres; separate logical DB).
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -Atc "SELECT 1 FROM pg_database WHERE datname='prefect_test'" | grep -q 1 || \
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE prefect_test OWNER postgres;"
echo "Granting permissions (best-effort)..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE ckan_test TO ckan_default;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT ALL PRIVILEGES ON DATABASE datastore_test TO datastore_write;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "GRANT CONNECT ON DATABASE datastore_test TO datastore_read;"
# PostgreSQL 15+ removed the implicit CREATE on the public schema for non-owners.
# vanilla postgres:15 needs these explicit schema grants for ckan db init / datastore set-permissions.
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -d ckan_test -c "GRANT USAGE, CREATE ON SCHEMA public TO ckan_default;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -d datastore_test -c "GRANT USAGE, CREATE ON SCHEMA public TO datastore_write;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -d datastore_test -c "GRANT USAGE ON SCHEMA public TO datastore_read;"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -d datastore_test -c "GRANT USAGE ON SCHEMA public TO ckan_default;"
echo "Database setup completed"
- name: Install requirements, ckanapi and datapusher-plus
run: |
set -eu
# Install system dependencies first (including GDAL and geospatial libraries)
apt-get update
apt-get install -y \
python3-virtualenv \
python3-dev \
python3-pip \
python3-wheel \
build-essential \
libxslt1-dev \
libxml2-dev \
libffi-dev \
libpq-dev \
zlib1g-dev \
git \
uchardet \
unzip \
gdal-bin \
libgdal-dev \
libspatialindex-dev \
libgeos-dev \
libproj-dev \
b3sum
# ^ Issue #221: ``b3sum`` is the BLAKE3 CLI; DP+'s download
# stage hashes via the Python ``blake3`` package, but the
# CLI is installed here so test scripts and ad-hoc CI
# diagnostics can verify hashes against the same algorithm.
# Get GDAL version for Python bindings
export GDAL_VERSION=$(gdal-config --version)
echo "GDAL version: $GDAL_VERSION"
# Set GDAL environment variables
export CPLUS_INCLUDE_PATH=/usr/include/gdal
export C_INCLUDE_PATH=/usr/include/gdal
# Upgrade pip and build tools
python3 -m pip install --upgrade pip setuptools wheel
# Install GDAL Python bindings matching system GDAL version
pip install "GDAL==$GDAL_VERSION"
# Install project requirements if they exist
if [ -f requirements.txt ]; then
pip install -r requirements.txt
fi
if [ -f requirements-dev.txt ]; then
pip install -r requirements-dev.txt
fi
# Install current repo in editable mode if setup files exist
if [ -f setup.py ] || [ -f pyproject.toml ]; then
pip install -e .
fi
# Install core dependencies
pip install --upgrade ckanapi
# Install datapusher-plus from the checked-out workspace so the
# current PR's code is what gets exercised — not whatever is on
# main. This makes the CI run a real validation of the branch.
echo "Installing datapusher-plus from $GITHUB_WORKSPACE"
if [ -f "$GITHUB_WORKSPACE/requirements.txt" ]; then
pip install -r "$GITHUB_WORKSPACE/requirements.txt"
fi
pip install -e "$GITHUB_WORKSPACE"
# Install ckanext-scheming
pip install -e 'git+https://github.com/ckan/ckanext-scheming.git#egg=ckanext-scheming'
echo "Installation complete: GDAL, ckanapi, datapusher-plus, and ckanext-scheming"
- name: Install qsv (musl static)
run: |
set -eu
echo "Attempting to download static qsv musl binary (best-effort)..."
QSV_ZIP="qsv-${QSV_VER}-x86_64-unknown-linux-musl.zip"
QSV_URL="https://github.com/dathere/qsv/releases/download/${QSV_VER}/${QSV_ZIP}"
mkdir -p /tmp/qsv && cd /tmp/qsv
if wget -q --spider "$QSV_URL"; then
wget -q "$QSV_URL" -O "$QSV_ZIP"
unzip -o "$QSV_ZIP"
# try to find 'qsv' or 'qsvdp' binary
if [ -f qsvdp ]; then
mv qsvdp /usr/local/bin/qsvdp
chmod +x /usr/local/bin/qsvdp
echo "Installed qsvdp to /usr/local/bin/qsvdp"
elif [ -f qsv ]; then
mv qsv /usr/local/bin/qsv
chmod +x /usr/local/bin/qsv
echo "Installed qsv to /usr/local/bin/qsv"
else
echo "Downloaded archive but could not find qsv binary inside"
fi
else
echo "qsv release URL not reachable; skipping qsv install"
fi
/usr/local/bin/qsvdp --version >/dev/null 2>&1 || /usr/local/bin/qsv --version >/dev/null 2>&1 || echo "qsv not installed or not runnable (this is okay for plugin presence test)."
- name: Setup CKAN configuration (/srv/app/src/ckan/test-core.ini)
run: |
set -eu
# Defensive URL substitutions (keep these)
sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}|g" /srv/app/src/ckan/test-core.ini
if ! grep -q "^solr_url" /srv/app/src/ckan/test-core.ini; then
echo "solr_url = ${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}" >> /srv/app/src/ckan/test-core.ini
fi
if ! grep -q "^ckan.redis.url" /srv/app/src/ckan/test-core.ini; then
echo "ckan.redis.url = ${CKAN_REDIS_URL:-redis://redis:6379/1}" >> /srv/app/src/ckan/test-core.ini
fi
# Desired values (use env vars when present, otherwise fall back)
CKAN_SITE_URL="${CKAN_SITE_URL:-http://localhost:5000}"
CKAN_SQLALCHEMY_URL="${CKAN_SQLALCHEMY_URL:-***postgres/ckan_test}"
CKAN_DATASTORE_WRITE_URL="${CKAN_DATASTORE_WRITE_URL:-***postgres/datastore_test}"
CKAN_DATASTORE_READ_URL="${CKAN_DATASTORE_READ_URL:-***postgres/datastore_test}"
CKAN_SOLR_URL="${CKAN_SOLR_URL:-http://solr:8983/solr/ckan}"
CKAN_REDIS_URL="${CKAN_REDIS_URL:-redis://redis:6379/1}"
# create temp files to hold lists (POSIX sh-safe)
REPLACE_FILE="$(mktemp)"
ADD_FILE="$(mktemp)"
MISSING_ADD_FILE="$(mktemp)"
: > "$REPLACE_FILE"
: > "$ADD_FILE"
: > "$MISSING_ADD_FILE"
# REPLACE_ENTRIES (key|value) - write expanded lines to REPLACE_FILE
printf '%s\n' \
"ckan.site_url|${CKAN_SITE_URL}" \
"sqlalchemy.url|${CKAN_SQLALCHEMY_URL}" \
"ckan.datastore.write_url|${CKAN_DATASTORE_WRITE_URL}" \
"ckan.datastore.read_url|${CKAN_DATASTORE_READ_URL}" \
"solr_url|${CKAN_SOLR_URL}" \
"ckan.redis.url|${CKAN_REDIS_URL}" \
> "$REPLACE_FILE"
# ADD_LINES content (one entry per line). Comments start with '#'
cat > "$ADD_FILE" <<'EOF'
ckan.site_id = default
ckan.site_title = CKAN Test
ckan.auth.create_default_api_keys = true
ckanext.datapusher_plus.qsv_bin = /usr/local/bin/qsvdp
scheming.dataset_schemas = ckanext.datapusher_plus:dataset-druf.yaml
scheming.presets = ckanext.scheming:presets.json
scheming.dataset_fallback = false
ckanext.datapusher_plus.download_proxy =
ckanext.datapusher_plus.ssl_verify = false
# supports INFO, DEBUG, TRACE - use DEBUG or TRACE when debugging scheming Formulas
ckanext.datapusher_plus.upload_log_level = INFO
ckanext.datapusher_plus.formats = csv tsv tab ssv xls xlsx xlsb xlsm ods geojson shp qgis zip
ckanext.datapusher_plus.pii_screening = false
ckanext.datapusher_plus.pii_found_abort = false
ckanext.datapusher_plus.pii_regex_resource_id_or_alias =
ckanext.datapusher_plus.pii_show_candidates = false
ckanext.datapusher_plus.pii_quick_screen = false
ckanext.datapusher_plus.preview_rows = 100
ckanext.datapusher_plus.download_timeout = 300
ckanext.datapusher_plus.max_content_length = 1256000000000
ckanext.datapusher_plus.chunk_size = 16384
ckanext.datapusher_plus.default_excel_sheet = 0
ckanext.datapusher_plus.sort_and_dupe_check = true
ckanext.datapusher_plus.dedup = false
ckanext.datapusher_plus.unsafe_prefix = unsafe_
ckanext.datapusher_plus.reserved_colnames = _id
ckanext.datapusher_plus.prefer_dmy = false
ckanext.datapusher_plus.ignore_file_hash = true
ckanext.datapusher_plus.auto_index_threshold = 3
ckanext.datapusher_plus.auto_index_dates = true
ckanext.datapusher_plus.auto_unique_index = true
ckanext.datapusher_plus.summary_stats_options =
ckanext.datapusher_plus.add_summary_stats_resource = false
ckanext.datapusher_plus.summary_stats_with_preview = false
ckanext.datapusher_plus.qsv_stats_string_max_length = 32767
ckanext.datapusher_plus.qsv_dates_whitelist = date,time,due,open,close,created
ckanext.datapusher_plus.qsv_freq_limit = 10
ckanext.datapusher_plus.auto_alias = true
ckanext.datapusher_plus.auto_alias_unique = false
ckanext.datapusher_plus.copy_readbuffer_size = 1048576
ckanext.datapusher_plus.type_mapping = {"String": "text", "Integer": "numeric","Float": "numeric","DateTime": "timestamp","Date": "date","NULL": "text"}
ckanext.datapusher_plus.auto_spatial_simplication = true
ckanext.datapusher_plus.spatial_simplication_relative_tolerance = 0.1
ckanext.datapusher_plus.latitude_fields = latitude,lat
ckanext.datapusher_plus.longitude_fields = longitude,long,lon
ckanext.datapusher_plus.jinja2_bytecode_cache_dir = /tmp/jinja2_butecode_cache
ckanext.datapusher_plus.auto_unzip_one_file = true
# v3.0: ingestion runs on Prefect instead of RQ.
ckanext.datapusher_plus.prefect_deployment_name = datapusher-plus/datapusher-plus
ckanext.datapusher_plus.prefect_work_pool = datapusher-plus
ckanext.datapusher_plus.prefect_ui_base = http://localhost:4200
ckanext.datapusher_plus.flow_timeout = 600
ckanext.datapusher_plus.max_quarantine_pct = 5.0
EOF
if [ -f /srv/app/src/ckan/test-core.ini ]; then
echo "Patching selective keys in /srv/app/src/ckan/test-core.ini (only the keys you listed)..."
# Ensure single debug = true under [DEFAULT]: remove existing debug lines in DEFAULT then add one
awk 'BEGIN{in=0}
/^\[DEFAULT\]/{ print; in=1; next }
/^\[.*\]/{ if(in){ print "debug = true"; in=0 } }
{
if(in){
if($1 == "debug") next
print
} else {
print
}
}
END { if(in) print "debug = true" }' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.tmp && mv /srv/app/src/ckan/test-core.ini.tmp /srv/app/src/ckan/test-core.ini
# Process REPLACE_FILE: replace if present, otherwise write to missing file
while IFS= read -r entry || [ -n "$entry" ]; do
key="$(printf '%s' "$entry" | cut -d'|' -f1)"
value="$(printf '%s' "$entry" | cut -d'|' -f2-)"
# escape backslashes and ampersands for sed replacement
esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')"
if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then
sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini
else
printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE"
fi
done < "$REPLACE_FILE"
# Process ADD_FILE: replace if present, otherwise collect to missing file
while IFS= read -r ln || [ -n "$ln" ]; do
# comment lines - check if exact comment exists
case "$ln" in
\#*)
if ! grep -Fq "$ln" /srv/app/src/ckan/test-core.ini; then
printf '%s\n' "$ln" >> "$MISSING_ADD_FILE"
fi
;;
*)
key="$(printf '%s' "$ln" | cut -d'=' -f1 | sed 's/[[:space:]]*$//')"
value="$(printf '%s' "$ln" | cut -d'=' -f2- | sed 's/^[[:space:]]*//')"
esc_value="$(printf '%s' "$value" | sed -e 's/[\/&]/\\&/g')"
if grep -q -E "^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=" /srv/app/src/ckan/test-core.ini; then
sed -i -E "s|^[[:space:]]*$(printf '%s' "$key" | sed 's/[][^$.*/]/\\&/g')[[:space:]]*=.*|${key} = ${esc_value}|g" /srv/app/src/ckan/test-core.ini
else
printf '%s\n' "${key} = ${value}" >> "$MISSING_ADD_FILE"
fi
;;
esac
done < "$ADD_FILE"
# If there are missing lines, insert them after the first [app:main] header, or append the section
if [ -s "$MISSING_ADD_FILE" ]; then
awk -v addfile="$MISSING_ADD_FILE" '
BEGIN{
inserted=0
while ((getline line < addfile) > 0) { add[++na]=line }
close(addfile)
}
{
print
if(!inserted && $0=="[app:main]") {
for(i=1;i<=na;i++) print add[i]
inserted=1
}
}
END{
if(!inserted){
print "[app:main]"
for(i=1;i<=na;i++) print add[i]
}
}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini
fi
# Final defensive catch: ensure sqlalchemy and datastore URLs reflect env (again)
sed -i "s|^sqlalchemy.url.*|sqlalchemy.url = ${CKAN_SQLALCHEMY_URL}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.write_url.*|ckan.datastore.write_url = ${CKAN_DATASTORE_WRITE_URL}|g" /srv/app/src/ckan/test-core.ini
sed -i "s|^ckan.datastore.read_url.*|ckan.datastore.read_url = ${CKAN_DATASTORE_READ_URL}|g" /srv/app/src/ckan/test-core.ini
else
echo "/srv/app/src/ckan/test-core.ini not found — no selective patching performed."
fi
# Append datapusher plugin(s) to ckan.plugins if present; otherwise add a plugins line
REQUIRED_PLUGINS="datastore datapusher_plus scheming_datasets"
if grep -q "^ckan.plugins" /srv/app/src/ckan/test-core.ini; then
echo "Appending required plugins to existing ckan.plugins line"
current=$(grep "^ckan.plugins" /srv/app/src/ckan/test-core.ini | head -n1 | cut -d'=' -f2-)
for p in $REQUIRED_PLUGINS; do
echo "$current" | grep -qw "$p" || current="$current $p"
done
awk -v new="ckan.plugins = $current" 'BEGIN{done=0} {if(!done && $1=="ckan.plugins") {print new; done=1} else print $0}' /srv/app/src/ckan/test-core.ini > /srv/app/src/ckan/test-core.ini.new && mv /srv/app/src/ckan/test-core.ini.new /srv/app/src/ckan/test-core.ini
else
echo "ckan.plugins = $REQUIRED_PLUGINS" >> /srv/app/src/ckan/test-core.ini
echo "Added ckan.plugins line with required plugins."
fi
echo "---- /srv/app/src/ckan/test-core.ini (cat) ----"
cat /srv/app/src/ckan/test-core.ini
echo "---- end ----"
- name: Initialize CKAN database
run: |
echo "Testing connectivity with CKAN DB user..."
if ! PGPASSWORD=$CKAN_DB_PASSWORD psql -h postgres -U ckan_default -d ckan_test -c "SELECT 1;" >/dev/null 2>&1; then
echo "Cannot connect as ckan_default. Attempting to create database owner and db..."
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE USER IF NOT EXISTS ckan_default WITH PASSWORD '$CKAN_DB_PASSWORD';"
PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres -c "CREATE DATABASE IF NOT EXISTS ckan_test OWNER ckan_default;"
fi
echo "Running ckan db init (may be idempotent)..."
if ckan -c /srv/app/src/ckan/test-core.ini db init; then
echo "CKAN DB initialized."
else
echo "ckan db init returned non-zero; continuing (may already be initialized)."
fi
echo "Setting datastore permissions..."
# This step's shell is `sh` (dash) on the ckan-dev image, which has
# no `set -o pipefail`. To still catch a crash in `ckan` (without it
# only psql's exit status is checked, and psql succeeds happily on an
# empty stream), capture the SQL to a file and check `ckan`'s exit
# status directly before piping it into psql.
PERMS_SQL="$(mktemp)"
if ! ckan -c /srv/app/src/ckan/test-core.ini datastore set-permissions > "$PERMS_SQL"; then
echo "ckan datastore set-permissions failed."
cat "$PERMS_SQL"
rm -f "$PERMS_SQL"
exit 1
fi
# CKAN 2.11 emits a CSRF deprecation warning to stdout before the
# SQL output, which would otherwise be piped into psql and trip a
# syntax error on the timestamp prefix. Strip only lines matching
# CKAN's actual log-line format (ISO-8601 timestamp + level), so a
# future SQL line that merely starts with a date is not dropped.
if grep -vE '^[0-9]{4}-[0-9]{2}-[0-9]{2}[ T][0-9]{2}:[0-9]{2}:[0-9]{2}' "$PERMS_SQL" \
| PGPASSWORD=$POSTGRES_PASSWORD psql -h postgres -U postgres --set ON_ERROR_STOP=1; then
echo "Datastore permissions set."
else
echo "Datastore permission step failed."
rm -f "$PERMS_SQL"
exit 1
fi
rm -f "$PERMS_SQL"
- name: Start CKAN server
run: |
set -eu
echo "Starting CKAN server in background..."
# Use nohup to keep it running in background
nohup ckan -c /srv/app/src/ckan/test-core.ini run --host 0.0.0.0 --port 5000 --disable-reloader > /tmp/ckan_stdout.log 2>&1 &
CKAN_PID=$!
echo "CKAN PID=$CKAN_PID"
# wait for port / API
timeout=120
while [ $timeout -gt 0 ]; do
if ! kill -0 "$CKAN_PID" >/dev/null 2>&1; then
echo "CKAN process died. Showing last lines of log:"
tail -n 200 /tmp/ckan_stdout.log
exit 1
fi
if curl -fsS "${CKAN_SITE_URL}/api/3/action/status_show" >/dev/null 2>&1; then
echo "CKAN API responding"
break
fi
echo "Waiting for CKAN API... ($timeout s left)"
sleep 3
timeout=$((timeout-3))
done
if [ $timeout -le 0 ]; then
echo "Timeout waiting for CKAN to start. Dumping logs..."
tail -n 200 /tmp/ckan_stdout.log
ss -tlnp || netstat -tlnp
exit 1
fi
echo "CKAN started successfully"
- name: Create sysadmin user admin_ckan and get apikey
run: |
set -eu
echo "Creating user admin_ckan..."
user_response=$(ckanapi action user_create --config /srv/app/src/ckan/test-core.ini \
name=admin_ckan \
email=admins@example.com \
password=test1234 \
fullname="CKAN Administrator" \
with_apitoken=true \
about="Created by GitHub Actions test" 2>/dev/null) || echo "user_create returned non-zero (user may already exist)"
echo "User creation response: $user_response"
echo "Converting admin_ckan user to sysadmin..."
ckan -c /srv/app/src/ckan/test-core.ini sysadmin add admin_ckan
echo "User admin_ckan promoted to sysadmin"
# Extract only the JSON part (everything from { to })
json_response=$(echo "$user_response" | sed -n '/{/,/}/p')
# Extract API key from the JSON
api_key=$(echo "$json_response" | jq -r '.token // empty')
if [ -n "$api_key" ] && [ "$api_key" != "null" ] && [ "$api_key" != "empty" ]; then
echo "CKAN_API_KEY=$api_key" >> $GITHUB_ENV
echo "API key saved: $api_key"
else
echo "No API key found in response"
fi
echo "User admin_ckan creation completed"
- name: Create API token for datapusher-plus and add to config
run: |
set -eu
echo "Creating API token for datapusher-plus service account..."
# Create API token for admin_ckan user specifically for datapusher-plus
echo "Running: ckan user token add admin_ckan dpplus"
dp_token_output=$(ckan -c /srv/app/src/ckan/test-core.ini user token add admin_ckan dpplus 2>&1)
echo "Full token creation output:"
echo "$dp_token_output"
dp_token=$(echo "$dp_token_output" | tail -n 1 | tr -d '\t')
echo "Extracted token: '$dp_token'"
if [ -n "$dp_token" ] && [ "$dp_token" != "null" ]; then
echo "Created datapusher-plus API token: $dp_token"
# Add the token to the CKAN configuration file
ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$dp_token"
# Verify it was added
echo "Verifying token was added to config:"
grep "ckanext.datapusher_plus.api_token" /srv/app/src/ckan/test-core.ini || echo "Token not found in config!"
# Also set in environment for potential use in other steps
echo "DATAPUSHER_PLUS_API_TOKEN=$dp_token" >> $GITHUB_ENV
echo "API token added to CKAN configuration successfully"
else
echo "Failed to create API token for datapusher-plus"
echo "Using main CKAN API key as fallback..."
ckan config-tool /srv/app/src/ckan/test-core.ini "ckanext.datapusher_plus.api_token=$CKAN_API_KEY"
fi
- name: Create organization with ckanapi
run: |
set -eu
echo "Creating organization demo-organization (idempotent)..."
ckanapi action organization_create --config /srv/app/src/ckan/test-core.ini \
name=demo-organization \
title="Demo Data Publishing Organization" \
description="Demo org created by GitHub Actions for datapusher-plus testing." || echo "organization_create returned non-zero (may already exist)"
echo "Add admin_ckan as admin to the organization"
ckanapi action organization_member_create --config /srv/app/src/ckan/test-core.ini \
id=demo-organization username=admin_ckan role=admin || echo "organization_member_create returned non-zero (may already be member)"
- name: Create dataset with ckanapi
run: |
set -eu
echo "Creating dataset my-first-dataset (idempotent)..."
if ckanapi action package_create \
name=my-first-dataset \
title="My First Comprehensive Dataset" \
notes="This is a comprehensive demo dataset created via ckanapi and GitHub Actions for testing CKAN functionality and datapusher-plus integration." \
owner_org=demo-organization \
license_id=cc-by \
version=1.0.0 \
author="GitHub Actions Automation" \
author_email=noreply@example.com \
maintainer="CKAN Admin" \
maintainer_email=admin@example.com \
url=https://github.com/your-repo/your-project \
private:false \
state=active \
'tags:[{"name":"demo"},{"name":"test"},{"name":"github-actions"},{"name":"automation"},{"name":"csv-data"},{"name":"datapusher-plus"}]' \
-c /srv/app/src/ckan/test-core.ini; then
echo "Dataset created successfully!"
else
echo "Dataset might already exist, continuing..."
fi
- name: Add resource to dataset with ckanapi
run: |
set -eu
echo "Adding resource to my-first-dataset..."
if ckanapi action resource_create \
package_id=my-first-dataset \
url="https://raw.githubusercontent.com/frictionlessdata/test-data/master/files/csv/100kb.csv" \
name="Sample CSV Data - 100KB Test File" \
description="Test CSV resource for datapusher-plus pipeline." \
format=CSV \
mimetype="text/csv" \
-c /srv/app/src/ckan/test-core.ini; then
echo "Resource created successfully!"
else
echo "Resource creation failed"
ckanapi action package_show id=my-first-dataset -c /srv/app/src/ckan/test-core.ini
exit 1
fi
- name: Display CKAN instance inventory
run: |
set -eu
echo "=== CKAN Status (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/status_show" | python3 -m json.tool
echo ""
echo "=== All Datasets (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/package_list" | python3 -m json.tool
echo ""
echo "=== All Organizations (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/organization_list" | python3 -m json.tool
echo ""
echo "=== All Users (HTTP API) ==="
curl -s "http://localhost:5000/api/3/action/user_list" | python3 -m json.tool
- name: Test datastore functionality
run: |
set -eu
echo "Testing datastore functionality..."
# Test 1: Check if datastore is accessible by querying table metadata
echo "=== Testing datastore read access ==="
metadata_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=_table_metadata")
echo "Table metadata response: $metadata_response"
if echo "$metadata_response" | jq -e '.success == true' >/dev/null 2>&1; then
echo "✓ Datastore read access working"
else
echo "✗ Datastore read access failed"
exit 1
fi
# Test 2: Create a test datastore table
echo "=== Testing datastore write access ==="
test_response=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Authorization: $CKAN_API_KEY" \
-d '{
"resource": {"package_id": "my-first-dataset"},
"fields": [{"id": "test_col", "type": "text"}, {"id": "value", "type": "int"}],
"records": [{"test_col": "hello", "value": 1}, {"test_col": "world", "value": 2}]
}' \
"http://localhost:5000/api/3/action/datastore_create")
echo "Test table creation response: $test_response"
if echo "$test_response" | jq -e '.success == true' >/dev/null 2>&1; then
echo "✓ Datastore write access working"
# Extract resource_id for cleanup
test_resource_id=$(echo "$test_response" | jq -r '.result.resource_id')
# Test 3: Query the test table
echo "=== Testing datastore query ==="
query_response=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$test_resource_id")
echo "Query response: $query_response"
# Cleanup: Delete test table
echo "=== Cleaning up test table ==="
curl -s -X POST \
-H "Content-Type: application/json" \
-H "Authorization: $CKAN_API_KEY" \
-d "{\"resource_id\": \"$test_resource_id\"}" \
"http://localhost:5000/api/3/action/datastore_delete" >/dev/null
echo "✓ Datastore functionality test completed successfully"
else
echo "✗ Datastore write access failed"
fi
- name: Start Prefect server (DP+ v3.0)
env:
PREFECT_API_DATABASE_CONNECTION_URL: postgresql+asyncpg://postgres:${{ env.POSTGRES_PASSWORD }}@postgres:5432/prefect_test
PREFECT_SERVER_API_HOST: 0.0.0.0
PREFECT_API_URL: http://localhost:4200/api
run: |
set -eu
echo "PREFECT_API_URL=http://localhost:4200/api" >> $GITHUB_ENV
echo "Starting Prefect server (Postgres-backed) on :4200..."
nohup prefect server start > /tmp/prefect_server.log 2>&1 &
PREFECT_SERVER_PID=$!
echo "PREFECT_SERVER_PID=$PREFECT_SERVER_PID" >> $GITHUB_ENV
# Poll until the API is healthy. Server start can take 30–60s on
# first run as Alembic migrations apply.
for i in $(seq 1 60); do
if curl -fsS http://localhost:4200/api/health > /dev/null 2>&1; then
echo "Prefect server is healthy after ${i}s"
break
fi
if ! kill -0 "$PREFECT_SERVER_PID" >/dev/null 2>&1; then
echo "Prefect server died:"
tail -n 80 /tmp/prefect_server.log
exit 1
fi
sleep 1
done
curl -fsS http://localhost:4200/api/health || (tail -n 80 /tmp/prefect_server.log; exit 1)
- name: Register DataPusher+ deployment
env:
PREFECT_API_URL: http://localhost:4200/api
run: |
set -eu
ckan -c /srv/app/src/ckan/test-core.ini datapusher_plus prefect-deploy
- name: Start Prefect worker (DP+ v3.0)
env:
PREFECT_API_URL: http://localhost:4200/api
CKAN_INI: /srv/app/src/ckan/test-core.ini
run: |
set -eu
echo "Starting Prefect worker on the datapusher-plus pool..."
nohup prefect worker start --pool datapusher-plus > /tmp/prefect_worker.log 2>&1 &
PREFECT_WORKER_PID=$!
echo "PREFECT_WORKER_PID=$PREFECT_WORKER_PID" >> $GITHUB_ENV
sleep 5
if kill -0 "$PREFECT_WORKER_PID" >/dev/null 2>&1; then
echo "Prefect worker started"
head -n 30 /tmp/prefect_worker.log || true
else
echo "Prefect worker failed to start"
cat /tmp/prefect_worker.log
exit 1
fi
- name: Start CKAN background job worker
run: |
set -eu
# Note: as of DP+ v3.0 the ingestion pipeline runs on Prefect,
# not RQ. We still start CKAN's RQ worker because other CKAN
# extensions (search reindex, mailer, etc.) rely on it.
echo "Starting CKAN background job worker..."
nohup ckan -c /srv/app/src/ckan/test-core.ini jobs worker > /tmp/ckan_worker.log 2>&1 &
WORKER_PID=$!
echo "CKAN Worker PID=$WORKER_PID"
echo "CKAN_WORKER_PID=$WORKER_PID" >> $GITHUB_ENV
# Give worker a moment to start up
sleep 5
# Verify worker is running
if kill -0 "$WORKER_PID" >/dev/null 2>&1; then
echo "Background job worker started successfully"
echo "Worker logs:"
head -n 20 /tmp/ckan_worker.log || echo "No worker logs yet"
else
echo "Worker failed to start"
cat /tmp/ckan_worker.log
exit 1
fi
- name: Test DataPusher Plus functionality - Remote Files (CSV Input)
run: |
set -eu
echo "=== Testing DataPusher Plus Functionality - Remote Files from CSV ==="
# Initialize results tracking
echo "timestamp,file_name,upload_status,resource_id,datapusher_status,datastore_active,rows_imported,processing_time,error_message,expected,actual" > /tmp/test_results.csv
# Initialize skipped files tracking
echo "file_name,reason_skipped" > /tmp/skipped_files.csv
# Set path for CSV input file
CSV_INPUT_FILE="${GITHUB_WORKSPACE}/tests/$FILES_DIR/base_files.csv"
# Check if CSV input file exists
if [ ! -f "$CSV_INPUT_FILE" ]; then
echo "ERROR: CSV input file not found: $CSV_INPUT_FILE"
echo "Please ensure the tests/$FILES_DIR/base_files.csv file exists in your repository"
echo "Expected CSV format: file_name,file_url,file_format,file_mimetype,file_description"
exit 1
fi
echo "Using CSV input file: $CSV_INPUT_FILE"
echo "CSV file size: $(du -h "$CSV_INPUT_FILE" | cut -f1)"
echo ""
# Validate CSV structure
echo "Validating CSV structure..."
header=$(head -n 1 "$CSV_INPUT_FILE")
echo "CSV Header: $header"
# Check if header contains required columns
if ! echo "$header" | grep -qi "file_url"; then
echo "ERROR: CSV must contain 'file_url' column"
echo "Expected format: file_name,file_url,file_format,file_mimetype,file_description"
exit 1
fi
# Count total entries in CSV
total_entries=$(tail -n +2 "$CSV_INPUT_FILE" | grep -v '^[[:space:]]*$' | wc -l)
echo "Total entries in CSV: $total_entries"
echo ""
# Display first few entries for verification
echo "First 5 entries from CSV:"
head -n 6 "$CSV_INPUT_FILE"
echo ""
# Create test dataset once
echo "Creating test dataset for DataPusher Plus..."
if ckanapi action package_create \
name=datapusher-plus-test-remote \
title="DataPusher Plus Remote Files Test Dataset" \
owner_org=demo-organization \
-c /srv/app/src/ckan/test-core.ini >/dev/null 2>&1; then
echo "Test dataset created"
else
echo "Test dataset might already exist, continuing..."
fi
# Initialize counters
total_files=0
passed_files=0
failed_files=0
skipped_files=0
# Process each line from CSV (skip header).
# Redirect the loop from a header-stripped temp file rather
# than `tail -n +2 ... | while` — a pipe runs the loop in a
# subshell, so passed_files/failed_files/total_files would be
# lost and the step would report "NO FILES TESTED" and exit 0
# even when every file failed. This step runs under `sh` (the
# container default), so bash process substitution `< <(...)`
# is NOT available — a plain file redirect is the portable way.
CSV_BODY=$(mktemp)
tail -n +2 "$CSV_INPUT_FILE" > "$CSV_BODY"
while IFS=',' read -r file_name file_url file_format file_mimetype file_desc expected || [ -n "$file_name" ]; do
# Skip empty lines and comments
[ -z "$file_name" ] && continue
case "$file_name" in
'#'*) continue ;;
''|*[[:space:]]*)
# Skip lines with only whitespace
[ -z "$(echo "$file_name" | tr -d '[:space:]')" ] && continue
;;
esac
# Trim whitespace from all fields
file_name=$(echo "$file_name" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
file_url=$(echo "$file_url" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | tr -d '"')
file_format=$(echo "$file_format" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
file_mimetype=$(echo "$file_mimetype" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
file_desc=$(echo "$file_desc" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
expected=$(echo "$expected" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | tr -d '\r')
# Expected ingestion outcome for this file:
# complete - file should fully ingest into the datastore
# skipped - DP+ completes the job but loads no table
# (zero-record / header-only file)
# error - DP+ should reject it (e.g. malformed CSV)
# declined - DP+ should not even submit it (unsupported
# format, empty/nested archive)
[ -z "$expected" ] && expected="complete"
# Validate required fields
if [ -z "$file_url" ]; then
echo "SKIP: Missing URL for file: $file_name"
echo "$file_name,Missing file_url in CSV" >> /tmp/skipped_files.csv
skipped_files=$((skipped_files + 1))
continue
fi
# Set defaults if fields are empty
[ -z "$file_name" ] && file_name=$(basename "$file_url")
[ -z "$file_format" ] && file_format="UNKNOWN"
[ -z "$file_mimetype" ] && file_mimetype="application/octet-stream"
[ -z "$file_desc" ] && file_desc="Remote file: $file_name"
# Test if URL is accessible
echo "Testing accessibility of: $file_url"
if ! curl -s --head --max-time 10 "$file_url" > /dev/null 2>&1; then
echo "SKIP: File not accessible via HTTP: $file_url"
echo "$file_name,File not accessible or timed out" >> /tmp/skipped_files.csv
skipped_files=$((skipped_files + 1))
continue
fi
total_files=$((total_files + 1))
echo ""
echo "=========================================="
echo "Testing File #${total_files}: $file_name"
echo "URL: $file_url"
echo "Format: $file_format"
echo "Description: $file_desc"
echo "Expected outcome: $expected"
# Try to get file size
file_size=$(curl -sI "$file_url" | grep -i content-length | cut -d' ' -f2 | tr -d '\r' || echo "unknown")
echo "File size: $file_size bytes"
echo "=========================================="
# Initialize tracking variables for this file
start_time=$(date +%s)
upload_status="FAILED"
resource_id=""
datapusher_status="N/A"
datastore_active="false"
rows_imported="0"
error_message=""
# Create resource with URL for this test file
echo "Creating resource with URL for $file_name..."
if resource_response=$(ckanapi action resource_create \
package_id=datapusher-plus-test-remote \
url="$file_url" \
name="Remote Test: $file_name" \
description="$file_desc" \
format="$file_format" \
mimetype="$file_mimetype" \
-c /srv/app/src/ckan/test-core.ini 2>&1); then
echo "Resource created successfully for $file_name"
upload_status="SUCCESS"
# Extract resource ID
resource_id=$(echo "$resource_response" | grep -o '"id"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"id"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
if [ -z "$resource_id" ]; then
resource_id=$(echo "$resource_response" | sed -n 's/.*"id"[[:space:]]*:[[:space:]]*"\([a-f0-9-]*\)".*/\1/p')
fi
echo "Resource ID: $resource_id"
if [ -n "$resource_id" ] && [ "$resource_id" != "null" ]; then
# Monitor DataPusher Plus processing
echo "Monitoring DataPusher Plus processing for $file_name..."
# Flows now finish in ~20-30s (no caching backup, no
# deterministic retries), so 45 attempts x 2s = 90s is a
# generous ceiling. A 'declined' file is never submitted,
# so DP+ has no task_status row to ever return 'complete'
# -- poll it only briefly to confirm it stays unsubmitted
# rather than burning the full window.
if [ "$expected" = "declined" ]; then
max_attempts=8
else
max_attempts=45
fi
for attempt in $(seq 1 $max_attempts); do
sleep 2
# Check DataPusher status
if dp_status_response=$(curl -s -H "Authorization: $CKAN_API_KEY" \
"http://localhost:5000/api/3/action/datapusher_status?resource_id=$resource_id" 2>/dev/null); then
if echo "$dp_status_response" | grep -q '"success"[[:space:]]*:[[:space:]]*true'; then
datapusher_status=$(echo "$dp_status_response" | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*"status"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
if [ -z "$datapusher_status" ]; then
datapusher_status="unknown"
fi
# Clean up status string
datapusher_status=$(echo "$datapusher_status" | tr -d '\n\r\t ' | cut -c1-10)
echo " Attempt $attempt/$max_attempts: DataPusher status = $datapusher_status"
if [ "$datapusher_status" = "complete" ]; then
echo " ✓ DataPusher processing completed for $file_name!"
break
elif [ "$datapusher_status" = "error" ]; then
error_info=$(echo "$dp_status_response" | grep -o '"message"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*"message"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' | head -1)
if [ -z "$error_info" ]; then
error_info="DataPusher processing error"
fi
error_message="DataPusher error: $error_info"
echo " ✗ DataPusher processing failed for $file_name: $error_message"
break
fi
else
# API returned success=false
if [ $attempt -eq $max_attempts ]; then
error_message="DataPusher status API returned success=false"
echo " ✗ DataPusher status API error for $file_name"
fi
fi
else
# Curl failed
if [ $attempt -eq $max_attempts ]; then
error_message="Failed to get DataPusher status"
echo " ✗ Cannot reach DataPusher status API for $file_name"
fi
fi
# Progress indicator
if [ $((attempt % 15)) -eq 0 ]; then
echo " Still processing $file_name... (${attempt}/${max_attempts})"
fi
done
# Check final resource status
echo "Checking final status for $file_name..."
if final_resource=$(curl -s "http://localhost:5000/api/3/action/resource_show?id=$resource_id" 2>/dev/null); then
if echo "$final_resource" | grep -q '"datastore_active"[[:space:]]*:[[:space:]]*true'; then
datastore_active="true"
echo " ✓ DataStore activated for $file_name"
# Get row count
if datastore_data=$(curl -s "http://localhost:5000/api/3/action/datastore_search?resource_id=$resource_id&limit=1" 2>/dev/null); then
rows_imported=$(echo "$datastore_data" | grep -o '"total"[[:space:]]*:[[:space:]]*[0-9]*' | sed 's/.*"total"[[:space:]]*:[[:space:]]*\([0-9]*\).*/\1/')
if [ -z "$rows_imported" ]; then
rows_imported="0"
fi