|
2 | 2 |
|
3 | 3 | import java.io.InputStream; |
4 | 4 | import java.io.Serializable; |
5 | | -import java.sql.Timestamp; |
6 | 5 | import java.time.Duration; |
7 | 6 | import java.time.Instant; |
8 | 7 | import java.util.Properties; |
|
13 | 12 | import org.apache.flink.api.java.tuple.Tuple2; |
14 | 13 | import org.apache.flink.api.java.typeutils.TypeExtractor; |
15 | 14 | import org.apache.flink.connector.jdbc.JdbcConnectionOptions; |
16 | | -import org.apache.flink.connector.jdbc.JdbcExecutionOptions; |
17 | | -import org.apache.flink.connector.jdbc.JdbcSink; |
18 | 15 | import org.apache.flink.connector.kafka.source.KafkaSource; |
19 | 16 | import org.apache.flink.formats.json.JsonDeserializationSchema; |
20 | | -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; |
21 | | -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; |
22 | 17 | import org.apache.flink.streaming.api.datastream.DataStream; |
23 | 18 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; |
24 | 19 | import org.apache.flink.streaming.api.functions.sink.SinkFunction; |
|
27 | 22 | import org.apache.flink.streaming.api.windowing.time.Time; |
28 | 23 |
|
29 | 24 | import models.EnrichedFileSegment; |
| 25 | +import sinks.JdbcSinkFactory; |
30 | 26 | import stats.UserAggregateStat; |
| 27 | +import stats.UserRollingStat; |
31 | 28 | import stats.UserProjectAggregateStat; |
32 | 29 | import stats.StatFactory; |
33 | 30 |
|
@@ -64,100 +61,66 @@ public static void main(String[] args) throws Exception { |
64 | 61 | new UserStatWindowFunction<Integer, UserAggregateStat>("daily", new UserAggregateStatFactory())) |
65 | 62 | .returns(TypeExtractor.getForClass(UserAggregateStat.class)); |
66 | 63 |
|
67 | | - DataStream<UserAggregateStat> rollingStream = timestampedStream.keyBy(EnrichedFileSegment::getUser_id) |
| 64 | + DataStream<UserRollingStat> rollingStream = timestampedStream.keyBy(EnrichedFileSegment::getUser_id) |
68 | 65 | .window(SlidingEventTimeWindows.of(Time.hours(24), Time.seconds(10))) |
69 | | - .process(new UserStatWindowFunction<Integer, UserAggregateStat>("rolling_24h", |
70 | | - new UserAggregateStatFactory())) |
71 | | - .returns(TypeExtractor.getForClass(UserAggregateStat.class)); |
| 66 | + .process(new UserStatWindowFunction<Integer, UserRollingStat>("rolling_24h", |
| 67 | + new UserRollingStatFactory())) |
| 68 | + .returns(TypeExtractor.getForClass(UserRollingStat.class)); |
72 | 69 |
|
73 | 70 | DataStream<UserProjectAggregateStat> projectsRollingStream = timestampedStream |
74 | 71 | .keyBy(new KeySelector<EnrichedFileSegment, Tuple2<Integer, String>>() { |
75 | 72 | @Override |
76 | 73 | public Tuple2<Integer, String> getKey(EnrichedFileSegment seg) throws Exception { |
77 | 74 | return Tuple2.of(seg.getUser_id(), seg.getProject_path()); |
78 | 75 | } |
79 | | - }).window(SlidingEventTimeWindows.of(Time.days(24), Time.seconds(10))) |
80 | | - .process(new UserStatWindowFunction<Tuple2<Integer, String>, UserProjectAggregateStat>("daily_project", |
| 76 | + }).window(TumblingEventTimeWindows.of(Time.days(1))) |
| 77 | + .process(new UserStatWindowFunction<Tuple2<Integer, String>, UserProjectAggregateStat>("daily", |
81 | 78 | new UserProjectAggregateStatFactory())) |
82 | 79 | .returns(TypeExtractor.getForClass(UserProjectAggregateStat.class)); |
83 | 80 |
|
| 81 | + // Use stat type definitions for columns and conflict keys |
| 82 | + |
84 | 83 | JdbcConnectionOptions jdbcOptions = new JdbcConnectionOptions.JdbcConnectionOptionsBuilder() |
85 | 84 | .withUrl("jdbc:postgresql://postgres_db:5432/myapp").withDriverName("org.postgresql.Driver") |
86 | 85 | .withUsername("admin").withPassword("secure_password").build(); |
87 | 86 |
|
88 | | - dailyStream.addSink(createJdbcSink(jdbcOptions, 1, 0, false)); |
89 | | - |
90 | | - rollingStream.addSink(createJdbcSink(jdbcOptions, 1, 0, true)); |
91 | | - |
92 | | - projectsRollingStream.print(); |
| 87 | + // Sinks using asRecord (createGeneralSink) |
| 88 | + SinkFunction<UserAggregateStat> dailySink = JdbcSinkFactory.createGeneralSink("user_stats_aggregate", |
| 89 | + UserAggregateStat.PRIMITIVE_COLUMNS, UserAggregateStat.JSONB_COLUMNS, |
| 90 | + String.join(", ", UserAggregateStat.CONFLICT_KEYS), jdbcOptions, 10, 1000); |
| 91 | + SinkFunction<UserRollingStat> rollingSink = JdbcSinkFactory.createGeneralSink("user_stats_rolling", |
| 92 | + UserRollingStat.PRIMITIVE_COLUMNS, UserRollingStat.JSONB_COLUMNS, |
| 93 | + String.join(", ", UserRollingStat.CONFLICT_KEYS), jdbcOptions, 10, 1000); |
| 94 | + SinkFunction<UserProjectAggregateStat> projectSink = JdbcSinkFactory.createGeneralSink( |
| 95 | + "user_project_stats_aggregate", UserProjectAggregateStat.PRIMITIVE_COLUMNS, |
| 96 | + UserProjectAggregateStat.JSONB_COLUMNS, String.join(", ", UserProjectAggregateStat.CONFLICT_KEYS), |
| 97 | + jdbcOptions, 10, 1000); |
| 98 | + |
| 99 | + dailyStream.addSink(dailySink); |
| 100 | + rollingStream.addSink(rollingSink); |
| 101 | + projectsRollingStream.addSink(projectSink); |
| 102 | + |
| 103 | + // Optionally print for debug |
| 104 | + // rollingStream.print(); |
| 105 | + // projectsRollingStream.print(); |
93 | 106 |
|
94 | 107 | env.execute("FileSegment Analytics"); |
95 | 108 | } |
96 | 109 |
|
97 | | - private static SinkFunction<UserAggregateStat> createJdbcSink(JdbcConnectionOptions jdbcOptions, int batchSize, |
98 | | - long batchIntervalMs, boolean rolling) { |
99 | | - String sql; |
100 | | - if (rolling) { |
101 | | - sql = "INSERT INTO user_stats_rolling (" |
102 | | - + "user_id, window_type, lang_durations, machine_durations, editor_durations, " |
103 | | - + "project_durations, activity_durations) " |
104 | | - + "VALUES (?, ?, ?::jsonb, ?::jsonb, ?::jsonb, ?::jsonb, ?::jsonb) " |
105 | | - + "ON CONFLICT (user_id, window_type) DO UPDATE SET " + "lang_durations = EXCLUDED.lang_durations, " |
106 | | - + "machine_durations = EXCLUDED.machine_durations, " |
107 | | - + "editor_durations = EXCLUDED.editor_durations, " |
108 | | - + "project_durations = EXCLUDED.project_durations, " |
109 | | - + "activity_durations = EXCLUDED.activity_durations, " + "updated_at = NOW();"; |
110 | | - } else { |
111 | | - sql = "INSERT INTO user_stats_aggregate (" |
112 | | - + "user_id, window_type, lang_durations, machine_durations, editor_durations, " |
113 | | - + "project_durations, activity_durations, window_start, window_end) " |
114 | | - + "VALUES (?, ?, ?::jsonb, ?::jsonb, ?::jsonb, ?::jsonb, ?::jsonb, ?, ?)" |
115 | | - + "ON CONFLICT (user_id, window_type, window_start) DO UPDATE SET " |
116 | | - + "window_end = EXCLUDED.window_end, " + "lang_durations = EXCLUDED.lang_durations, " |
117 | | - + "machine_durations = EXCLUDED.machine_durations, " |
118 | | - + "editor_durations = EXCLUDED.editor_durations, " |
119 | | - + "project_durations = EXCLUDED.project_durations, " |
120 | | - + "activity_durations = EXCLUDED.activity_durations, " + "updated_at = NOW();"; |
121 | | - } |
122 | | - |
123 | | - return JdbcSink.sink(sql, (ps, stat) -> { |
124 | | - ObjectMapper mapper = new ObjectMapper(); |
125 | | - UserAggregateStat userStat = (UserAggregateStat) stat; |
126 | | - ps.setInt(1, userStat.getUserId()); |
127 | | - ps.setString(2, stat.getWindowType()); |
128 | | - |
129 | | - if (!rolling) { |
130 | | - ps.setTimestamp(8, Timestamp.from(userStat.getWindowStart())); |
131 | | - ps.setTimestamp(9, Timestamp.from(userStat.getWindowEnd())); |
132 | | - } |
133 | | - |
134 | | - try { |
135 | | - // 5-9: Serialize Map fields to JSON strings |
136 | | - ps.setString(3, mapper.writeValueAsString(userStat.getLangDurations())); |
137 | | - ps.setString(4, mapper.writeValueAsString(userStat.getMachineDurations())); |
138 | | - ps.setString(5, mapper.writeValueAsString(userStat.getEditorDurations())); |
139 | | - ps.setString(6, mapper.writeValueAsString(userStat.getProjectDurations())); |
140 | | - ps.setString(7, mapper.writeValueAsString(userStat.getActivityDurations())); |
141 | | - } catch (JsonProcessingException e) { |
142 | | - // Handle serialization error gracefully |
143 | | - e.printStackTrace(); |
144 | | - for (int i = 3; i <= 7; i++) |
145 | | - ps.setString(i, "{}"); // Send empty JSON object on error |
146 | | - } |
147 | | - }, |
148 | | - // Define execution options (batching) |
149 | | - JdbcExecutionOptions.builder().withBatchSize(batchSize).withBatchIntervalMs(batchIntervalMs).build(), |
150 | | - // Provide JDBC connection details |
151 | | - jdbcOptions); |
152 | | - } |
153 | | - |
154 | 110 | public static class UserAggregateStatFactory implements StatFactory<Integer, UserAggregateStat>, Serializable { |
155 | 111 | @Override |
156 | 112 | public UserAggregateStat create(Integer key) { |
157 | 113 | return new UserAggregateStat(key); |
158 | 114 | } |
159 | 115 | } |
160 | 116 |
|
| 117 | + public static class UserRollingStatFactory implements StatFactory<Integer, UserRollingStat>, Serializable { |
| 118 | + @Override |
| 119 | + public UserRollingStat create(Integer key) { |
| 120 | + return new UserRollingStat(key); |
| 121 | + } |
| 122 | + } |
| 123 | + |
161 | 124 | public static class UserProjectAggregateStatFactory |
162 | 125 | implements StatFactory<Tuple2<Integer, String>, UserProjectAggregateStat>, Serializable { |
163 | 126 | @Override |
|
0 commit comments