Skip to content

Commit 2c2a1b2

Browse files
authored
Merge pull request #688 from jvalue/table-schema-value-type-impl
RFC0020 implementation
2 parents 6607f21 + 99ab83f commit 2c2a1b2

72 files changed

Lines changed: 1903 additions & 810 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

apps/interpreter/test/assets/broken-model.jv

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,50 @@ pipeline CarsPipeline {
2828
write: ["name"];
2929
}
3030

31+
valuetype Car {
32+
property name oftype text;
33+
property mpg oftype decimal;
34+
property cyl oftype integer;
35+
property disp oftype decimal;
36+
property hp oftype integer;
37+
property drat oftype decimal;
38+
property wt oftype decimal;
39+
property qsec oftype decimal;
40+
property vs oftype integer;
41+
property am oftype integer;
42+
property gear oftype integer;
43+
property carb oftype integer;
44+
}
45+
46+
transform CarParser {
47+
from r oftype Collection<text>;
48+
to car oftype Car;
49+
50+
car: {
51+
name: asText (r cellInColumn "name"),
52+
mpg: asDecimal (r cellInColumn "mpg"),
53+
cyl: asInteger (r cellInColumn 2),
54+
disp: asDecimal (r cellInColumn 3),
55+
hp: asInteger (r cellInColumn "hp"),
56+
drat: asDecimal (r cellInColumn "drat"),
57+
wt: asDecimal (r cellInColumn "wt"),
58+
qsec: asDecimal (r cellInColumn "qsec"),
59+
vs: asInteger (r cellInColumn "vs"),
60+
am: asInteger (r cellInColumn "am"),
61+
gear: asInteger (r cellInColumn "gear"),
62+
carb: asInteger (r cellInColumn "carb")
63+
};
64+
}
65+
3166
block CarsTableInterpreter oftype TableInterpreter {
3267
header: true;
33-
columns: [
34-
"name" oftype text,
35-
"mpg" oftype decimal,
36-
"cyl" oftype integer,
37-
"disp" oftype decimal,
38-
"hp" oftype integer,
39-
"drat" oftype decimal,
40-
"wt" oftype decimal,
41-
"qsec" oftype decimal,
42-
"vs" oftype integer,
43-
"am" oftype integer,
44-
"gear" oftype integer,
45-
"carb" oftype integer
46-
];
68+
columns: Car;
69+
parseWith: CarParser;
4770
}
4871

4972
block CarsLoader oftype SQLiteLoader {
5073
table: "Cars";
5174
file: "./cars.sqlite";
5275
}
5376

54-
}
77+
}

example/cars.jv

Lines changed: 68 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,21 @@
77
// - Understand the core concepts pipeline, block, and pipe
88
// - Understand the general structure of a pipeline
99

10-
// 1. This Jayvee model describes a pipeline
11-
// from a CSV file in the web
12-
// to a SQLite file sink.
10+
// 1. This Jayvee model describes a pipeline from a CSV file in the web to a
11+
// SQLite file sink.
1312
pipeline CarsPipeline {
1413

15-
// 2. We describe the structure of the pipeline,
16-
// usually at the top of the pipeline.
17-
// by connecting blocks via pipes.
14+
// 2. We describe the structure of the pipeline, usually at the top of the
15+
// pipeline, by connecting blocks via pipes.
1816

1917
// 3. Syntax of a pipe
2018
// connecting the block CarsExtractor
2119
// with the block CarsTextFileInterpreter.
2220
CarsExtractor
2321
-> CarsTextFileInterpreter;
2422

25-
// 4. The output of the preceding block is hereby used
26-
// as input for the succeeding block.
23+
// 4. The output of the preceding block is hereby used as input for the
24+
// succeeding block.
2725

2826
// 5. Pipes can be further chained,
2927
// leading to an overview of the pipeline.
@@ -34,74 +32,100 @@ pipeline CarsPipeline {
3432
-> CarsLoader;
3533

3634

37-
// 6. Below the pipes, we usually define the blocks
38-
// that are connected by the pipes.
35+
// 6. Below the pipes, we usually define the blocks that are connected by the
36+
// pipes.
3937

4038
// 7. Blocks instantiate a block type by using the oftype keyword.
41-
// The block type defines the available properties that the block
42-
// can use to specify the intended behavior of the block
39+
// The block type defines the available properties that can be used to specify
40+
// the intended behavior of the block.
4341
block CarsExtractor oftype HttpExtractor {
4442

4543
// 8. Properties are assigned to concrete values.
4644
// Here, we specify the URL where the file shall be downloaded from.
4745
url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv";
4846
}
4947

50-
// 9. The HttpExtractor requires no input and produces a binary file as output.
51-
// This file has to be interpreted, e.g., as text file.
48+
// 9. The HttpExtractor requires no input and produces a binary file as
49+
// output. This file has to be interpreted, e.g., as text file.
5250
block CarsTextFileInterpreter oftype TextFileInterpreter { }
5351

5452
// 10. Next, we interpret the text file as sheet.
55-
// A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells.
53+
// A sheet only contains text cells and is useful for manipulating the shape
54+
// of data before assigning more strict value types to cells.
5655
block CarsCSVInterpreter oftype CSVInterpreter {
5756
enclosing: '"';
5857
}
5958

6059
// 11. We can write into cells of a sheet using the CellWriter block type.
6160
block NameHeaderWriter oftype CellWriter {
6261
// 12. We utilize a syntax similar to spreadsheet programs.
63-
// Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which
64-
// cells are selected for the write action.
62+
// Cell ranges can be described using the keywords "cell", "row", "column",
63+
// or "range" that indicate which cells are selected for the write action.
6564
at: cell A1;
6665

67-
// 13. For each cell we selected with the "at" property above,
68-
// we can specify what value shall be written into the cell.
66+
// 13. For each cell we selected with the "at" property above, we can
67+
// specify what value shall be written into the cell.
6968
write: [
7069
"name"
7170
];
7271
}
7372

74-
// 14. As a next step, we interpret the sheet as a table by adding structure.
75-
// We define a value type per column that specifies the data type of the column.
76-
// Rows that include values that are not valid according to the their value types are dropped automatically.
73+
// 14. Next, we define the schema of our table.
74+
// For this, we use a value type where each property corresponds to a column
75+
// in the table. The column will have the same valuetype as the property.
76+
valuetype Car {
77+
property name oftype text;
78+
property mpg oftype decimal;
79+
property cyl oftype integer;
80+
property disp oftype decimal;
81+
property hp oftype integer;
82+
property drat oftype decimal;
83+
property wt oftype decimal;
84+
property qsec oftype decimal;
85+
property vs oftype integer;
86+
property am oftype integer;
87+
property gear oftype integer;
88+
property carb oftype integer;
89+
}
90+
91+
transform CarParser {
92+
from r oftype Collection<text>;
93+
to car oftype Car;
94+
95+
car: {
96+
name: asText (r cellInColumn "name"),
97+
mpg: asDecimal (r cellInColumn "mpg"),
98+
cyl: asInteger (r cellInColumn 2),
99+
disp: asDecimal (r cellInColumn 3),
100+
hp: asInteger (r cellInColumn "hp"),
101+
drat: asDecimal (r cellInColumn "drat"),
102+
wt: asDecimal (r cellInColumn "wt"),
103+
qsec: asDecimal (r cellInColumn "qsec"),
104+
vs: asInteger (r cellInColumn "vs"),
105+
am: asInteger (r cellInColumn "am"),
106+
gear: asInteger (r cellInColumn "gear"),
107+
carb: asInteger (r cellInColumn "carb")
108+
};
109+
}
110+
111+
// 15. As a next step, we interpret the sheet as a table, using the valuetype
112+
// defined above. Rows that include values that are not valid according to the
113+
// their value types are dropped automatically.
77114
block CarsTableInterpreter oftype TableInterpreter {
78115
header: true;
79-
columns: [
80-
"name" oftype text,
81-
"mpg" oftype decimal,
82-
"cyl" oftype integer,
83-
"disp" oftype decimal,
84-
"hp" oftype integer,
85-
"drat" oftype decimal,
86-
"wt" oftype decimal,
87-
"qsec" oftype decimal,
88-
"vs" oftype integer,
89-
"am" oftype integer,
90-
"gear" oftype integer,
91-
"carb" oftype integer
92-
];
116+
columns: Car;
117+
parseWith: CarParser;
93118
}
94119

95-
// 15. As a last step, we load the table into a sink,
96-
// here into a sqlite file.
97-
// The structural information of the table is used
98-
// to generate the correct table.
120+
// 16. As a last step, we load the table into a sink, here into a sqlite file.
121+
// The structural information of the table is used to generate the correct
122+
// table.
99123
block CarsLoader oftype SQLiteLoader {
100124
table: "Cars";
101125
file: "./cars.sqlite";
102126
}
103127

104-
// 16. Congratulations!
105-
// You can now use the sink for your data analysis, app,
106-
// or whatever you want to do with the cleaned data.
107-
}
128+
// 17. Congratulations!
129+
// You can now use the sink for your data analysis, app, or whatever you want
130+
// to do with the cleaned data.
131+
}

example/electric-vehicles.jv

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,30 +49,56 @@ pipeline ElectricVehiclesPipeline {
4949

5050
block ElectricVehiclesCSVInterpreter oftype CSVInterpreter { }
5151

52+
valuetype ElectricVehicle {
53+
property vin oftype VehicleIdentificationNumber10;
54+
property county oftype text;
55+
property city oftype text;
56+
property state oftype UsStateCode;
57+
property postal oftype text;
58+
property modelYear oftype integer;
59+
property make oftype text;
60+
property model oftype text;
61+
property evType oftype text;
62+
property cafvEligibility oftype text;
63+
property electricRange oftype integer;
64+
property baseMSRP oftype integer;
65+
property legislativeDistrict oftype text;
66+
property dolID oftype integer;
67+
property location oftype text;
68+
property utility oftype text;
69+
property censusTract oftype text;
70+
}
71+
72+
transform ElectricVehicleParser {
73+
from r oftype Collection<text>;
74+
to ev oftype ElectricVehicle;
75+
76+
ev: {
77+
vin: r cellInColumn "VIN (1-10)",
78+
county: asText (r cellInColumn "County"),
79+
city: asText (r cellInColumn "City"),
80+
state: asText (r cellInColumn "State"),
81+
postal: asText (r cellInColumn "Postal Code"),
82+
modelYear: asInteger (r cellInColumn "Model Year"),
83+
make: asText (r cellInColumn "Make"),
84+
model: asText (r cellInColumn "Model"),
85+
evType: asText (r cellInColumn "Electric Vehicle Type"),
86+
cafvEligibility: asText (r cellInColumn "Clean Alternative Fuel Vehicle (CAFV) Eligibility"),
87+
electricRange: asInteger (r cellInColumn "Electric Range"),
88+
baseMSRP: asInteger (r cellInColumn "Base MSRP"),
89+
legislativeDistrict: asText (r cellInColumn "LegislativeDistrict"),
90+
dolID: asInteger (r cellInColumn "DOL Vehicle ID"),
91+
location: asText (r cellInColumn "Vehicle Location"),
92+
utility: asText (r cellInColumn "Electric Utility"),
93+
censusTract: asText (r cellInColumn "2020 Census Tract"),
94+
};
95+
}
96+
97+
5298
block ElectricVehiclesTableInterpreter oftype TableInterpreter {
5399
header: true;
54-
columns: [
55-
// 4. Here, a user-deifned value type is used to describe this column.
56-
// The capital letter indicates that the value type is not built-in
57-
// by convention. The value type itself is defined further below.
58-
"VIN (1-10)" oftype VehicleIdentificationNumber10,
59-
"County" oftype text,
60-
"City" oftype text,
61-
"State" oftype UsStateCode, // We can just use the element as if it was defined in this file.
62-
"Postal Code" oftype text,
63-
"Model Year" oftype integer,
64-
"Make" oftype text,
65-
"Model" oftype text,
66-
"Electric Vehicle Type" oftype text,
67-
"Clean Alternative Fuel Vehicle (CAFV) Eligibility" oftype text,
68-
"Electric Range" oftype integer,
69-
"Base MSRP" oftype integer,
70-
"Legislative District" oftype text,
71-
"DOL Vehicle ID" oftype integer,
72-
"Vehicle Location" oftype text,
73-
"Electric Utility" oftype text,
74-
"2020 Census Tract" oftype text,
75-
];
100+
columns: ElectricVehicle;
101+
parseWith: ElectricVehicleParser;
76102
}
77103

78104
// 5. This block describes the application of a transform function
@@ -81,9 +107,9 @@ pipeline ElectricVehiclesPipeline {
81107
// by the "use" property.
82108
block ElectricRangeTransformer oftype TableTransformer {
83109
inputColumns: [
84-
"Electric Range"
110+
"electricRange"
85111
];
86-
outputColumn: "Electric Range (km)";
112+
outputColumn: "electricRange (km)";
87113
uses: MilesToKilometers;
88114
}
89115

0 commit comments

Comments
 (0)