Skip to content

Commit 37201d3

Browse files
committed
Push v0.03
1 parent cbf40cc commit 37201d3

4 files changed

Lines changed: 46 additions & 26 deletions

File tree

Changes

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
Revision history for Perl distribution OMOP-CSV-Validator
22

3-
0.03 2025-04-02T00:00:00Z (Manuel Rueda <mrueda@cpan.org>)
3+
0.03 2025-04-03T00:00:00Z (Manuel Rueda <mrueda@cpan.org>)
44

55
- Added blank_is_undef => 1 to TEXT::CSV_XS
66
- Fixed issue with non-required properties with NULL values
77
- Added a custom "_coerce" property on numeric fields to enable coercion into number
8+
- Added Term::ANSIColor + emojis to stdout
9+
- Added optional <--no-colors> parameter to turn off Term::ANSIColor
810

911
0.02 2025-04-02T00:00:00Z (Manuel Rueda <mrueda@cpan.org>)
1012

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ The OMOP CSV Validator is a **CLI tool** (and module) that **validates OMOP CDM
55
## Features
66

77
- **DDL Parsing:** Automatically converts PostgreSQL OMOP CDM DDL into JSON schemas.
8+
- **Version Independent** Works with any DDL (e.g., 5.3, 5.4).
89
- **CSV Validation:** Validates CSV files using JSON::Validator.
910
- **Numeric Coercion:** Coerces CSV numeric fields (including handling of null values).
1011
- **Modular Design:** Separate CLI and module for easy testing and integration.

bin/omop-csv-validator

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@ use Path::Tiny;
1616
use OMOP::CSV::Validator;
1717
use JSON::XS;
1818
use Pod::Usage;
19-
use Data::Dumper;
19+
#use Data::Dumper;
2020
use Term::ANSIColor; # For colored output
21+
use File::Basename;
2122
binmode STDOUT, ':encoding(UTF-8)';
23+
binmode STDERR, ':encoding(UTF-8)';
2224

2325
# Module version
2426
my $VERSION = $OMOP::CSV::Validator::VERSION;
@@ -29,13 +31,15 @@ my $csv_file;
2931
my $sep = ',';
3032
my $table_name; # Optional parameter to override table name
3133
my $schemas_file; # Optional: file to save schemas
34+
my $nocolor = 0;
3235

3336
GetOptions(
3437
'ddl=s' => \$ddl_file, # Path to the DDL file (PostgreSQL)
3538
'input=s' => \$csv_file, # Path to the input CSV file
3639
'sep=s' => \$sep, # Field separator (default: comma)
3740
'table|t=s' => \$table_name, # Optional: override table name
3841
'save-schemas=s' => \$schemas_file, # Optional: file to save schemas
42+
'no-color|nc' => \$nocolor, # Optional: Turn off STDOUT color
3943
'help|h' => \my $help, # Show help message
4044
'version|V' => sub {
4145
say color("cyan"), "$0 Version $VERSION", color("reset");
@@ -45,12 +49,24 @@ GetOptions(
4549

4650
pod2usage(1) if $help;
4751

52+
# Turning color off if argument <--no-color>
53+
$ENV{'ANSI_COLORS_DISABLED'} = 1 if $nocolor;
54+
4855
unless ( $ddl_file && $csv_file ) {
4956
warn color("red"), "[ERROR] --ddl and --input are required parameters.\n",
5057
color("reset");
5158
pod2usage(1);
5259
}
5360

61+
# Define the hash with emoji values
62+
my %msg_emoji = (
63+
error => '',
64+
warning => '⚠️ ',
65+
dot => '',
66+
success => '',
67+
save => '💾'
68+
);
69+
5470
# Read the DDL file (UTF-8)
5571
my $ddl_text = path($ddl_file)->slurp_utf8;
5672

@@ -64,40 +80,42 @@ my $schemas = $validator->load_schemas_from_ddl($ddl_text);
6480
if ($schemas_file) {
6581
my $json = JSON::XS->new->utf8->pretty->encode($schemas);
6682
path($schemas_file)->spew_utf8($json);
67-
say color("green"), "💾 Schemas saved to '$schemas_file'", color("reset");
83+
say color("green"), "$msg_emoji{save} Schemas saved to '$schemas_file'",
84+
color("reset");
6885
}
6986

7087
# Determine the schema to use (either from --table or derived from CSV filename)
71-
my $schema;
88+
my ($schema, $schema_name);
7289
if ($table_name) {
73-
$schema = $schemas->{ lc $table_name }
74-
or die color("red"), "❌ No schema found for table '$table_name'\n",
75-
color("reset");
90+
$schema = $schemas->{ lc $table_name } or die color("red"), "$msg_emoji{error} No schema found for table '$table_name'\n", color("reset");
91+
$schema_name = $table_name;
7692
}
7793
else {
78-
$schema = $validator->get_schema_from_csv_filename( $csv_file, $schemas )
79-
or die color("red"),
80-
"❌ No schema found for table derived from '$csv_file'\n", color("reset");
94+
$schema = $validator->get_schema_from_csv_filename( $csv_file, $schemas ) or die color("red"), "$msg_emoji{error} No schema found for table derived from '$csv_file'\n", color("reset");
95+
$schema_name = basename($csv_file);
96+
$schema_name =~ s/\.csv$//i;
8197
}
8298

8399
# Validate the CSV file
84100
my $errors = $validator->validate_csv_file( $csv_file, $schema, $sep );
85101

86102
if (@$errors) {
87-
say color("bold white on_red"), "❌ Validation errors found:",
103+
say color("bold white on_red"),
104+
"$msg_emoji{error} Validation errors found:",
88105
color("reset");
89106
foreach my $err (@$errors) {
90-
say color("red"), "⚠️ Row $err->{row} validation failed:",
107+
say color("red"),
108+
"$msg_emoji{warning} Row $err->{row} validation failed:",
91109
color("reset");
92110
foreach my $msg ( @{ $err->{errors} } ) {
93-
say " $msg";
111+
say " $msg_emoji{dot} $msg";
94112
}
95113
}
96114
exit 1;
97115
}
98116
else {
99117
say color("bold white on_green"),
100-
" CSV file 'person.csv' is valid against the 'person' schema.",
118+
"$msg_emoji{success} CSV file '$csv_file' is valid against the '$schema_name' schema.",
101119
color("reset");
102120
exit 0;
103121
}
@@ -137,6 +155,10 @@ to derive the table name from the CSV filename.
137155
138156
(optional) Path to a file where the DDL-derived schemas should be saved (in JSON format).
139157
158+
=item B<--no-color>, B<-nc>
159+
160+
(Optional) Turn off STDOUT color
161+
140162
=item B<--help>, B<-h>
141163
142164
Display this help message.

lib/OMOP/CSV/Validator.pm

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use Scalar::Util qw(looks_like_number);
1010
use Path::Tiny;
1111
use Data::Dumper;
1212

13-
our $VERSION = '0.02_1';
13+
our $VERSION = '0.03';
1414

1515
=head1 NAME
1616
@@ -26,7 +26,7 @@ OMOP::CSV::Validator - Validates OMOP CDM CSV files against their expected data
2626
my $schemas = $validator->load_schemas_from_ddl($ddl_text);
2727
2828
# Retrieve specific table schema for a CSV file
29-
my $schema = $validator->get_schema_from_csv_filename($csv_file, $schemas);
29+
my $schema = $validator->get_schema_from_csv_filename($csv_file, $schemas);
3030
3131
# Validate CSV file
3232
my $errors = $validator->validate_csv_file($csv_file, $schema);
@@ -121,12 +121,12 @@ sub _build_schema {
121121
my $prop = {};
122122

123123
if ( $type =~ /int/ ) {
124-
$prop->{type} = 'integer';
125-
$prop->{_type} = 'integer';
124+
$prop->{type} = 'integer';
125+
$prop->{_coerce} = 1;
126126
}
127127
elsif ( $type =~ /numeric|real|double/ ) {
128-
$prop->{type} = 'number';
129-
$prop->{_type} = 'number';
128+
$prop->{type} = 'number';
129+
$prop->{_coerce} = 1;
130130

131131
}
132132
elsif ( $type eq 'date' ) {
@@ -210,12 +210,7 @@ sub validate_csv_file {
210210
for my $col ( keys %{ $schema->{properties} } ) {
211211
if ( exists $record->{$col} ) {
212212
my $prop = $schema->{properties}->{$col};
213-
if (
214-
defined $prop->{_type}
215-
&& ( $prop->{_type} eq 'integer'
216-
or $prop->{_type} eq 'number' )
217-
)
218-
{
213+
if ( defined $prop->{_coerce} && $prop->{_coerce} ) {
219214
$record->{$col} =
220215
$self->dotify_and_coerce_number( $record->{$col} );
221216
}

0 commit comments

Comments
 (0)