Skip to content

Commit 36dcb47

Browse files
committed
Add varchar support
1 parent e978542 commit 36dcb47

2 files changed

Lines changed: 45 additions & 9 deletions

File tree

lib/OMOP/CSV/Validator.pm

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,16 @@ sub _build_schema {
106106
required => [],
107107
additionalProperties => 0,
108108
};
109+
109110
for my $line ( grep /\S/, split /\n/, $cols_block ) {
110-
$line =~ s/^\s+|\s+$//g;
111-
$line =~ s/,$//;
112-
next if $line =~ /^--/; # skip comment lines
113-
# Greedy match for the type
114-
if ( $line =~
115-
/^(\w+)\s+([A-Za-z]+)(?:\(\d+(?:,\d+)?\))?(?:\s+(NOT NULL))?/i )
116-
{
117-
my ( $col, $type, $notnull ) = ( lc $1, lc $2, defined $3 );
111+
$line =~ s/^\s+|\s+$//g;
112+
$line =~ s/,$//;
113+
next if $line =~ /^--/; # Skip comment lines
114+
115+
if ( $line =~ /^(\w+)\s+([A-Za-z]+)(?:\((\d+(?:,\d+)?)\))?(?:\s+(NOT NULL))?/i ) {
116+
my ( $col, $type, $length, $notnull ) = ( lc $1, lc $2, $3, defined $4 );
118117
my $prop = {};
118+
119119
if ( $type =~ /int/ ) {
120120
$prop->{type} = 'integer';
121121
}
@@ -130,9 +130,19 @@ sub _build_schema {
130130
$prop->{type} = 'string';
131131
$prop->{format} = 'date-time';
132132
}
133+
elsif ( $type eq 'varchar' ) {
134+
$prop->{type} = 'string';
135+
if ( defined $length ) {
136+
# Capture only the first number if a comma is present (e.g., varchar(10,2))
137+
if ( $length =~ /^(\d+)/ ) {
138+
$prop->{maxLength} = int($1);
139+
}
140+
}
141+
}
133142
else {
134143
$prop->{type} = 'string';
135144
}
145+
136146
$schema->{properties}{$col} = $prop;
137147
push @{ $schema->{required} }, $col if $notnull;
138148
}

t/01-csv.t

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use strict;
33
use warnings;
44
use utf8;
55
use lib 'lib';
6-
use Test::More tests => 4;
6+
use Test::More tests => 6;
77
use Path::Tiny;
88
use JSON::XS;
99
use Text::CSV_XS;
@@ -134,6 +134,22 @@ my $errors_obs_invalid =
134134
ok( scalar(@$errors_obs_invalid) > 0,
135135
'Invalid observation CSV returns errors' );
136136

137+
# (5) Person invalid CSV due to varchar length violation:
138+
# person_source_value exceeds 50 characters.
139+
my $person_invalid_varchar_csv = $sections{'CSV_person_invalid_varchar'} // '';
140+
my $errors_person_invalid_varchar =
141+
validate_csv_from_text( $validator, $person_invalid_varchar_csv, $person_schema, ',' );
142+
ok( scalar(@$errors_person_invalid_varchar) > 0,
143+
'Person CSV with varchar length violation returns errors' );
144+
145+
# (6) Observation invalid CSV due to varchar length violation:
146+
# value_as_string exceeds 60 characters.
147+
my $obs_invalid_varchar_csv = $sections{'CSV_observation_invalid_varchar'} // '';
148+
my $errors_obs_invalid_varchar =
149+
validate_csv_from_text( $validator, $obs_invalid_varchar_csv, $obs_schema, ',' );
150+
ok( scalar(@$errors_obs_invalid_varchar) > 0,
151+
'Observation CSV with varchar length violation returns errors' );
152+
137153
done_testing();
138154

139155
__DATA__
@@ -179,6 +195,11 @@ person_id,gender_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_date
179195
A,8532,1963,12,31,"1966-12-31T00:00:00Z",8516,0,\N,\N,\N,source1,F,0,black,0,west_indian,0
180196
__END_CSV_person_invalid__
181197
198+
__CSV_person_invalid_varchar__
199+
person_id,gender_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_datetime,race_concept_id,ethnicity_concept_id,location_id,provider_id,care_site_id,person_source_value,gender_source_value,gender_source_concept_id,race_source_value,race_source_concept_id,ethnicity_source_value,ethnicity_source_concept_id
200+
2,8532,1963,12,31,"1966-12-31T00:00:00Z",8516,0,\N,\N,\N,"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",F,0,black,0,west_indian,0
201+
__END_CSV_person_invalid_varchar__
202+
182203
__CSV_observation_valid__
183204
observation_id,person_id,observation_date,observation_datetime,value_as_number,value_as_string
184205
1,1,1963-12-31,"1963-12-31T00:00:00Z",123.45,valid observation
@@ -188,3 +209,8 @@ __CSV_observation_invalid__
188209
observation_id,person_id,observation_date,observation_datetime,value_as_number,value_as_string
189210
X,1,1963-12-31,"not a timestamp",abc,invalid observation
190211
__END_CSV_observation_invalid__
212+
213+
__CSV_observation_invalid_varchar__
214+
observation_id,person_id,observation_date,observation_datetime,value_as_number,value_as_string
215+
2,1,1963-12-31,"1963-12-31T00:00:00Z",123.45,"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
216+
__END_CSV_observation_invalid_varchar__

0 commit comments

Comments
 (0)