Skip to content

Commit 54112d0

Browse files
committed
Update differ to ouptut MCF files
1 parent 95e6411 commit 54112d0

19 files changed

Lines changed: 576 additions & 362 deletions

tools/import_differ/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ python import_differ.py \
2424
- previous\_data: Path to the previous data (wildcard on local/GCS supported).
2525
- output\_location: Path to the output data folder (local/GCS).
2626
- file\_format: Format of the input data (mcf,tfrecord).
27-
- runner\_mode: Runner mode: local (Python) / cloud (Dataflow in Cloud).
27+
- runner\_mode: Runner mode: native (Python) / direct (Java runner) /cloud (Dataflow in Cloud).
2828
- project\_id: GCP project Id for the dataflow job.
2929
- job\_name: Name of the differ dataflow job.
3030

tools/import_differ/differ_utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,28 @@ def write_json_data(data, dest: str, file: str, tmp_dir: str):
8484
upload_output_data(path, dest)
8585

8686

87+
def write_mcf_nodes(nodes: list, dest: str, file: str, tmp_dir: str):
88+
""" Writes mcf nodes to a file with the given path."""
89+
if dest.startswith('gs://'):
90+
path = os.path.join(tmp_dir, file)
91+
else:
92+
path = os.path.join(dest, file)
93+
with open(path, mode='w', encoding='utf-8') as out_file:
94+
for node in nodes:
95+
if 'Node' in node:
96+
out_file.write(f'Node: {node["Node"]}\n')
97+
elif 'dcid' in node:
98+
out_file.write(f'dcid: {node["dcid"]}\n')
99+
100+
for key, value in node.items():
101+
if key in ['Node', 'dcid']:
102+
continue
103+
out_file.write(f'{key}: {value}\n')
104+
out_file.write('\n')
105+
if dest.startswith('gs://'):
106+
upload_output_data(path, dest)
107+
108+
87109
def upload_output_data(src: str, dest: str):
88110
client = storage.Client()
89111
bucket_name = dest.split('/')[2]

0 commit comments

Comments
 (0)