77// - Understand the core concepts pipeline, block, and pipe
88// - Understand the general structure of a pipeline
99
10- // 1. This Jayvee model describes a pipeline
11- // from a CSV file in the web
12- // to a SQLite file sink.
10+ // 1. This Jayvee model describes a pipeline from a CSV file in the web to a
11+ // SQLite file sink.
1312pipeline CarsPipeline {
1413
15- // 2. We describe the structure of the pipeline,
16- // usually at the top of the pipeline.
17- // by connecting blocks via pipes.
14+ // 2. We describe the structure of the pipeline, usually at the top of the
15+ // pipeline, by connecting blocks via pipes.
1816
1917 // 3. Syntax of a pipe
2018 // connecting the block CarsExtractor
2119 // with the block CarsTextFileInterpreter.
2220 CarsExtractor
2321 -> CarsTextFileInterpreter;
2422
25- // 4. The output of the preceding block is hereby used
26- // as input for the succeeding block.
23+ // 4. The output of the preceding block is hereby used as input for the
24+ // succeeding block.
2725
2826 // 5. Pipes can be further chained,
2927 // leading to an overview of the pipeline.
@@ -34,74 +32,100 @@ pipeline CarsPipeline {
3432 -> CarsLoader;
3533
3634
37- // 6. Below the pipes, we usually define the blocks
38- // that are connected by the pipes.
35+ // 6. Below the pipes, we usually define the blocks that are connected by the
36+ // pipes.
3937
4038 // 7. Blocks instantiate a block type by using the oftype keyword.
41- // The block type defines the available properties that the block
42- // can use to specify the intended behavior of the block
39+ // The block type defines the available properties that can be used to specify
40+ // the intended behavior of the block.
4341 block CarsExtractor oftype HttpExtractor {
4442
4543 // 8. Properties are assigned to concrete values.
4644 // Here, we specify the URL where the file shall be downloaded from.
4745 url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv";
4846 }
4947
50- // 9. The HttpExtractor requires no input and produces a binary file as output.
51- // This file has to be interpreted, e.g., as text file.
48+ // 9. The HttpExtractor requires no input and produces a binary file as
49+ // output. This file has to be interpreted, e.g., as text file.
5250 block CarsTextFileInterpreter oftype TextFileInterpreter { }
5351
5452 // 10. Next, we interpret the text file as sheet.
55- // A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells.
53+ // A sheet only contains text cells and is useful for manipulating the shape
54+ // of data before assigning more strict value types to cells.
5655 block CarsCSVInterpreter oftype CSVInterpreter {
5756 enclosing: '"';
5857 }
5958
6059 // 11. We can write into cells of a sheet using the CellWriter block type.
6160 block NameHeaderWriter oftype CellWriter {
6261 // 12. We utilize a syntax similar to spreadsheet programs.
63- // Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which
64- // cells are selected for the write action.
62+ // Cell ranges can be described using the keywords "cell", "row", "column",
63+ // or "range" that indicate which cells are selected for the write action.
6564 at: cell A1;
6665
67- // 13. For each cell we selected with the "at" property above,
68- // we can specify what value shall be written into the cell.
66+ // 13. For each cell we selected with the "at" property above, we can
67+ // specify what value shall be written into the cell.
6968 write: [
7069 "name"
7170 ];
7271 }
7372
74- // 14. As a next step, we interpret the sheet as a table by adding structure.
75- // We define a value type per column that specifies the data type of the column.
76- // Rows that include values that are not valid according to the their value types are dropped automatically.
73+ // 14. Next, we define the schema of our table.
74+ // For this, we use a value type where each property corresponds to a column
75+ // in the table. The column will have the same valuetype as the property.
76+ valuetype Car {
77+ property name oftype text;
78+ property mpg oftype decimal;
79+ property cyl oftype integer;
80+ property disp oftype decimal;
81+ property hp oftype integer;
82+ property drat oftype decimal;
83+ property wt oftype decimal;
84+ property qsec oftype decimal;
85+ property vs oftype integer;
86+ property am oftype integer;
87+ property gear oftype integer;
88+ property carb oftype integer;
89+ }
90+
91+ transform CarParser {
92+ from r oftype Collection<text>;
93+ to car oftype Car;
94+
95+ car: {
96+ name: asText (r cellInColumn "name"),
97+ mpg: asDecimal (r cellInColumn "mpg"),
98+ cyl: asInteger (r cellInColumn 2),
99+ disp: asDecimal (r cellInColumn 3),
100+ hp: asInteger (r cellInColumn "hp"),
101+ drat: asDecimal (r cellInColumn "drat"),
102+ wt: asDecimal (r cellInColumn "wt"),
103+ qsec: asDecimal (r cellInColumn "qsec"),
104+ vs: asInteger (r cellInColumn "vs"),
105+ am: asInteger (r cellInColumn "am"),
106+ gear: asInteger (r cellInColumn "gear"),
107+ carb: asInteger (r cellInColumn "carb")
108+ };
109+ }
110+
111+ // 15. As a next step, we interpret the sheet as a table, using the valuetype
112+ // defined above. Rows that include values that are not valid according to the
113+ // their value types are dropped automatically.
77114 block CarsTableInterpreter oftype TableInterpreter {
78115 header: true;
79- columns: [
80- "name" oftype text,
81- "mpg" oftype decimal,
82- "cyl" oftype integer,
83- "disp" oftype decimal,
84- "hp" oftype integer,
85- "drat" oftype decimal,
86- "wt" oftype decimal,
87- "qsec" oftype decimal,
88- "vs" oftype integer,
89- "am" oftype integer,
90- "gear" oftype integer,
91- "carb" oftype integer
92- ];
116+ columns: Car;
117+ parseWith: CarParser;
93118 }
94119
95- // 15. As a last step, we load the table into a sink,
96- // here into a sqlite file.
97- // The structural information of the table is used
98- // to generate the correct table.
120+ // 16. As a last step, we load the table into a sink, here into a sqlite file.
121+ // The structural information of the table is used to generate the correct
122+ // table.
99123 block CarsLoader oftype SQLiteLoader {
100124 table: "Cars";
101125 file: "./cars.sqlite";
102126 }
103127
104- // 16 . Congratulations!
105- // You can now use the sink for your data analysis, app,
106- // or whatever you want to do with the cleaned data.
107- }
128+ // 17 . Congratulations!
129+ // You can now use the sink for your data analysis, app, or whatever you want
130+ // to do with the cleaned data.
131+ }
0 commit comments