11import json
2+ import pathlib
3+ from typing import Literal
24
3- from utils import MONGODB_BASE_NAME , MONGODB_CONNECTION_STRING
5+ from utils import MONGODB_BASE_NAME , MONGODB_CONNECTION_STRING , MongoDBContext
46
57import pathway as pw
68from pathway .internals .parse_graph import G
79
810
9- def test_mongodb (tmp_path , mongodb ):
11+ def write_items_with_connector (
12+ * ,
13+ mongodb : MongoDBContext ,
14+ test_items : list [dict ],
15+ input_path : pathlib .Path ,
16+ schema : type [pw .Schema ],
17+ output_collection : str ,
18+ output_table_type : Literal ["stream_of_changes" , "snapshot" ],
19+ persistence_config : pw .persistence .Config | None = None ,
20+ ) -> list [dict ]:
21+ G .clear ()
22+ with open (input_path , "w" ) as f :
23+ for test_item in test_items :
24+ f .write (json .dumps (test_item ) + "\n " )
25+ table = pw .io .jsonlines .read (input_path , schema = schema , mode = "static" )
26+ pw .io .mongodb .write (
27+ table ,
28+ connection_string = MONGODB_CONNECTION_STRING ,
29+ database = MONGODB_BASE_NAME ,
30+ collection = output_collection ,
31+ output_table_type = output_table_type ,
32+ )
33+ pw .run (persistence_config = persistence_config )
34+
35+ result = mongodb .get_collection (output_collection , schema .column_names ())
36+ result .sort (key = lambda item : (item ["name" ], item ["available" ]))
37+ return result
38+
39+
40+ def check_special_fields (
41+ mongodb : MongoDBContext , output_collection : str , * , are_expected : bool
42+ ):
43+ full_collection = mongodb .get_full_collection (output_collection )
44+ for document in full_collection :
45+ time_in_document = "time" in document
46+ diff_in_document = "diff" in document
47+ assert time_in_document == are_expected , document
48+ assert diff_in_document == are_expected , document
49+
50+
51+ def test_mongodb_stream_of_changes (tmp_path , mongodb ):
1052 class InputSchema (pw .Schema ):
1153 name : str
1254 count : int
@@ -15,39 +57,122 @@ class InputSchema(pw.Schema):
1557
1658 input_path = tmp_path / "input.txt"
1759 output_collection = mongodb .generate_collection_name ()
60+ test_items = [
61+ {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : False },
62+ {"name" : "Water" , "count" : 600 , "price" : 0.5 , "available" : True },
63+ ]
64+ result = write_items_with_connector (
65+ mongodb = mongodb ,
66+ test_items = test_items ,
67+ input_path = input_path ,
68+ schema = InputSchema ,
69+ output_collection = output_collection ,
70+ output_table_type = "stream_of_changes" ,
71+ )
72+ assert result == test_items
73+ check_special_fields (mongodb , output_collection , are_expected = True )
74+
75+ new_test_items = [{"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : True }]
76+ result = write_items_with_connector (
77+ mongodb = mongodb ,
78+ test_items = new_test_items ,
79+ input_path = input_path ,
80+ schema = InputSchema ,
81+ output_collection = output_collection ,
82+ output_table_type = "stream_of_changes" ,
83+ )
84+ expected_result = [
85+ {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : False },
86+ {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : True },
87+ {"name" : "Water" , "count" : 600 , "price" : 0.5 , "available" : True },
88+ ]
89+ assert result == expected_result
90+ check_special_fields (mongodb , output_collection , are_expected = True )
91+
1892
19- def run (test_items : list [dict ]) -> None :
20- G .clear ()
21- with open (input_path , "w" ) as f :
22- for test_item in test_items :
23- f .write (json .dumps (test_item ) + "\n " )
24- table = pw .io .jsonlines .read (input_path , schema = InputSchema , mode = "static" )
25- pw .io .mongodb .write (
26- table ,
27- connection_string = MONGODB_CONNECTION_STRING ,
28- database = MONGODB_BASE_NAME ,
29- collection = output_collection ,
30- )
31- pw .run ()
93+ def test_mongodb_snapshot (tmp_path , mongodb ):
94+ class InputSchema (pw .Schema ):
95+ name : str = pw .column_definition (primary_key = True )
96+ count : int
97+ price : float
98+ available : bool
99+
100+ input_path = tmp_path / "input.txt"
101+ output_collection = mongodb .generate_collection_name ()
32102
33103 test_items = [
34104 {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : False },
35105 {"name" : "Water" , "count" : 600 , "price" : 0.5 , "available" : True },
36106 ]
37- run (test_items )
38-
39- result = mongodb .get_collection (output_collection , InputSchema .column_names ())
40- result .sort (key = lambda item : (item ["name" ], item ["available" ]))
107+ result = write_items_with_connector (
108+ mongodb = mongodb ,
109+ test_items = test_items ,
110+ input_path = input_path ,
111+ schema = InputSchema ,
112+ output_collection = output_collection ,
113+ output_table_type = "snapshot" ,
114+ )
41115 assert result == test_items
116+ check_special_fields (mongodb , output_collection , are_expected = False )
42117
43118 new_test_items = [{"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : True }]
44- run (new_test_items )
45-
46- result = mongodb .get_collection (output_collection , InputSchema .column_names ())
47- result .sort (key = lambda item : (item ["name" ], item ["available" ]))
119+ result = write_items_with_connector (
120+ mongodb = mongodb ,
121+ test_items = new_test_items ,
122+ input_path = input_path ,
123+ schema = InputSchema ,
124+ output_collection = output_collection ,
125+ output_table_type = "snapshot" ,
126+ )
48127 expected_result = [
49- {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : False },
50128 {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : True },
51129 {"name" : "Water" , "count" : 600 , "price" : 0.5 , "available" : True },
52130 ]
53131 assert result == expected_result
132+ check_special_fields (mongodb , output_collection , are_expected = False )
133+
134+
135+ def test_mongodb_snapshot_remove (tmp_path , mongodb ):
136+ class InputSchema (pw .Schema ):
137+ name : str = pw .column_definition (primary_key = True )
138+ count : int
139+ price : float
140+ available : bool
141+
142+ input_path = tmp_path / "input.txt"
143+ pstorage_path = tmp_path / "PStorage"
144+ persistence_config = pw .persistence .Config (
145+ backend = pw .persistence .Backend .filesystem (pstorage_path )
146+ )
147+
148+ output_collection = mongodb .generate_collection_name ()
149+ test_items = [
150+ {"name" : "Milk" , "count" : 500 , "price" : 1.5 , "available" : False },
151+ {"name" : "Water" , "count" : 600 , "price" : 0.5 , "available" : True },
152+ ]
153+ result = write_items_with_connector (
154+ mongodb = mongodb ,
155+ test_items = test_items ,
156+ input_path = input_path ,
157+ schema = InputSchema ,
158+ output_collection = output_collection ,
159+ output_table_type = "snapshot" ,
160+ persistence_config = persistence_config ,
161+ )
162+ assert result == test_items
163+ check_special_fields (mongodb , output_collection , are_expected = False )
164+
165+ test_items = [
166+ {"name" : "Water" , "count" : 600 , "price" : 0.5 , "available" : True },
167+ ]
168+ result = write_items_with_connector (
169+ mongodb = mongodb ,
170+ test_items = test_items ,
171+ input_path = input_path ,
172+ schema = InputSchema ,
173+ output_collection = output_collection ,
174+ output_table_type = "snapshot" ,
175+ persistence_config = persistence_config ,
176+ )
177+ assert result == test_items
178+ check_special_fields (mongodb , output_collection , are_expected = False )
0 commit comments