Skip to content

Commit e8d33a3

Browse files
committed
Fix exploratory Jupyter notebooks
1 parent f3997bd commit e8d33a3

7 files changed

Lines changed: 567 additions & 621 deletions

File tree

domains/anomaly-detection/explore/NodeEmbeddingsHyperparameterTuningExploration.ipynb

Lines changed: 67 additions & 186 deletions
Large diffs are not rendered by default.

domains/external-dependencies/explore/ExternalDependenciesJava.ipynb

Lines changed: 267 additions & 226 deletions
Large diffs are not rendered by default.

domains/external-dependencies/explore/ExternalDependenciesTypescript.ipynb

Lines changed: 159 additions & 149 deletions
Large diffs are not rendered by default.

domains/java/explore/MethodMetricsJavaExploration.ipynb

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"# before starting jupyter notebook to provide the password for the user \"neo4j\". \n",
3939
"# It is not recommended to hardcode the password into jupyter notebook for security reasons.\n",
4040
"\n",
41-
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\")))\n",
41+
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\") or \"\"))\n",
4242
"driver.verify_connectivity()"
4343
]
4444
},
@@ -49,13 +49,15 @@
4949
"metadata": {},
5050
"outputs": [],
5151
"source": [
52-
"def get_cypher_query_from_file(filename):\n",
52+
"def get_cypher_query_from_file(filename: str) -> str:\n",
53+
" \"\"\"Read and return the contents of a Cypher query file.\"\"\"\n",
5354
" with open(filename) as file:\n",
54-
" return ' '.join(file.readlines())\n",
55+
" return \" \".join(file.readlines())\n",
5556
" \n",
5657
"\n",
57-
"def query_cypher_to_data_frame(filename):\n",
58-
" records, summary, keys = driver.execute_query(get_cypher_query_from_file(filename))\n",
58+
"def query_cypher_to_data_frame(filename: str) -> pd.DataFrame:\n",
59+
" \"\"\"Execute a Cypher query from file and return results as a DataFrame.\"\"\"\n",
60+
" records, _, keys = driver.execute_query(get_cypher_query_from_file(filename)) # type: ignore[arg-type]\n",
5961
" return pd.DataFrame([r.values() for r in records], columns=keys)"
6062
]
6163
},
@@ -192,24 +194,24 @@
192194
"if effective_method_line_count_distribution_normalized.empty:\n",
193195
" print(\"No data to plot\")\n",
194196
"else:\n",
195-
" plot.figure();\n",
196-
" method_line_count_x_ticks=range(1,20)\n",
197+
" plot.figure()\n",
198+
" method_line_count_x_ticks = range(1, 20)\n",
197199
" axes = effective_method_line_count_distribution_normalized.head(20).plot(\n",
198-
" kind='line',\n",
200+
" kind=\"line\",\n",
199201
" logx=True,\n",
200202
" grid=True,\n",
201203
" xlim=[2, 20],\n",
202204
" ylim=[0, 20],\n",
203205
" xticks=method_line_count_x_ticks,\n",
204-
" title='Effective Method Line Count Distribution', \n",
205-
" xlabel='effective line count',\n",
206-
" ylabel='percent of methods',\n",
206+
" title=\"Effective Method Line Count Distribution\", \n",
207+
" xlabel=\"effective line count\",\n",
208+
" ylabel=\"percent of methods\",\n",
207209
" cmap=main_color_map,\n",
208210
" figsize=(10, 6),\n",
209211
" lw=2,\n",
210212
" )\n",
211-
" axes.set_xticklabels(method_line_count_x_ticks)\n",
212-
" axes.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
213+
" axes.set_xticklabels([str(i) for i in method_line_count_x_ticks])\n",
214+
" axes.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
213215
" plot.show()"
214216
]
215217
},
@@ -312,7 +314,7 @@
312314
"source": [
313315
"cyclomatic_method_complexity_distribution_max_artifacts=15\n",
314316
"\n",
315-
"cyclomatic_method_complexity_distribution=query_cypher_to_data_frame(\"../cypher/Overview/Cyclomatic_Method_Complexity_Distribution.cypher\")\n",
317+
"cyclomatic_method_complexity_distribution=query_cypher_to_data_frame(\"../queries/method-metrics/Cyclomatic_Method_Complexity_Distribution.cypher\")\n",
316318
"cyclomatic_method_complexity_distribution=cyclomatic_method_complexity_distribution.pivot(index='cyclomaticComplexity', columns='artifactName', values='methods')\n",
317319
"\n",
318320
"# Fill missing values with zero\n",
@@ -367,29 +369,32 @@
367369
"metadata": {},
368370
"outputs": [],
369371
"source": [
372+
"from scipy import interpolate\n",
373+
"\n",
374+
"\n",
370375
"if cyclomatic_method_complexity_distribution_normalized.empty:\n",
371376
" print(\"No data to plot\")\n",
372377
"else:\n",
373-
" plot.figure();\n",
374-
" method_line_count_x_ticks=range(1,11)\n",
375-
" cyclomatic_complexity_y_ticks=[1, 2, 3, 4, 5, 7, 10, 20, 30, 40, 50, 100]\n",
378+
" plot.figure()\n",
379+
" method_line_count_x_ticks = range(1, 11)\n",
380+
" cyclomatic_complexity_y_ticks = [1, 2, 3, 4, 5, 7, 10, 20, 30, 40, 50, 100]\n",
376381
" axes = cyclomatic_method_complexity_distribution_normalized.plot(\n",
377-
" kind='line', \n",
382+
" kind=\"line\",\n",
378383
" logx=True,\n",
379384
" logy=True,\n",
380385
" grid=True,\n",
381-
" xlim=[1,11],\n",
382-
" ylim=[1,100],\n",
386+
" xlim=[1, 11],\n",
387+
" ylim=[1, 100],\n",
383388
" xticks=method_line_count_x_ticks,\n",
384389
" yticks=cyclomatic_complexity_y_ticks,\n",
385-
" title='Cyclomatic complexity distribution of methods', \n",
386-
" xlabel='cyclomatic complexity',\n",
387-
" ylabel='percentage of methods',\n",
390+
" title=\"Cyclomatic complexity distribution of methods\", \n",
391+
" xlabel=\"cyclomatic complexity\",\n",
392+
" ylabel=\"percentage of methods\",\n",
388393
" cmap=main_color_map,\n",
389394
" )\n",
390-
" axes.set_xticklabels(method_line_count_x_ticks)\n",
391-
" axes.set_yticklabels(cyclomatic_complexity_y_ticks)\n",
392-
" axes.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
395+
" axes.set_xticklabels([str(i) for i in method_line_count_x_ticks])\n",
396+
" axes.set_yticklabels([str(i) for i in cyclomatic_complexity_y_ticks])\n",
397+
" axes.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
393398
" plot.show()"
394399
]
395400
},
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Cyclomatic Complexity Method Complexity Distribution
2+
3+
MATCH (artifact:Artifact)-[:CONTAINS]->(type:Type)-[:DECLARES]->(method:Method)
4+
WHERE method.effectiveLineCount > 0
5+
WITH last(split(artifact.fileName, '/')) AS artifactName
6+
,method.cyclomaticComplexity AS cyclomaticComplexity
7+
,count(method) AS methods
8+
RETURN artifactName, cyclomaticComplexity, methods
9+
ORDER BY artifactName asc, cyclomaticComplexity

domains/node-embeddings/explore/NodeEmbeddingsJavaExploration.ipynb

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@
114114
"# before starting jupyter notebook to provide the password for the user \"neo4j\". \n",
115115
"# It is not recommended to hardcode the password into jupyter notebook for security reasons.\n",
116116
"\n",
117-
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\")))\n",
117+
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\"))) # pyright: ignore[reportArgumentType]\n",
118118
"driver.verify_connectivity()"
119119
]
120120
},
@@ -131,7 +131,7 @@
131131
" \n",
132132
"\n",
133133
"def query_cypher_to_data_frame(filename, parameters_: typ.Optional[typ.Dict[str, typ.Any]] = None):\n",
134-
" records, summary, keys = driver.execute_query(get_cypher_query_from_file(filename),parameters_=parameters_)\n",
134+
" records, summary, keys = driver.execute_query(get_cypher_query_from_file(filename),parameters_=parameters_) # type: ignore\n",
135135
" return pd.DataFrame([r.values() for r in records], columns=keys)\n",
136136
"\n",
137137
"\n",
@@ -173,15 +173,15 @@
173173
" The number of the dimensions and therefore size of the resulting array of floating point numbers\n",
174174
" \"\"\"\n",
175175
" \n",
176-
" is_data_missing=query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_0_Check_Projectable.cypher\", parameters).empty\n",
176+
" is_data_missing=query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_0_Check_Projectable.cypher\", parameters).empty\n",
177177
" if is_data_missing: return False\n",
178178
"\n",
179-
" query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_1_Delete_Projection.cypher\", parameters)\n",
180-
" query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_2_Delete_Subgraph.cypher\", parameters)\n",
179+
" query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_1_Delete_Projection.cypher\", parameters)\n",
180+
" query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_2_Delete_Subgraph.cypher\", parameters)\n",
181181
" # To include the direction of the relationships use the following line to create the projection:\n",
182-
" # query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_3_Create_Projection.cypher\", parameters)\n",
183-
" query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_4_Create_Undirected_Projection.cypher\", parameters)\n",
184-
" query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_5_Create_Subgraph.cypher\", parameters)\n",
182+
" # query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_3_Create_Projection.cypher\", parameters)\n",
183+
" query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_4_Create_Undirected_Projection.cypher\", parameters)\n",
184+
" query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_5_Create_Subgraph.cypher\", parameters)\n",
185185
" return True"
186186
]
187187
},
@@ -204,7 +204,7 @@
204204
" parameters = dict(\n",
205205
" dependencies_projection=projection_name,\n",
206206
" )\n",
207-
" return query_cypher_to_data_frame(\"../cypher/Dependencies_Projection/Dependencies_12_Get_Projection_Statistics.cypher\", parameters)\n",
207+
" return query_cypher_to_data_frame(\"../../../cypher/Dependencies_Projection/Dependencies_12_Get_Projection_Statistics.cypher\", parameters)\n",
208208
"\n",
209209
"\n",
210210
"def get_projected_graph_node_count(projection_name: str) -> int:\n",
@@ -275,7 +275,7 @@
275275
" print(\"No projected data for node embeddings calculation available\")\n",
276276
" return empty_embeddings()\n",
277277
"\n",
278-
" existing_embeddings_query_filename=\"../cypher/Node_Embeddings/Node_Embeddings_0a_Query_Calculated.cypher\"\n",
278+
" existing_embeddings_query_filename=\"../queries/node-embeddings/Node_Embeddings_0a_Query_Calculated.cypher\"\n",
279279
" embeddings = query_first_non_empty_cypher_to_data_frame(existing_embeddings_query_filename, cypher_file_name, parameters=parameters)\n",
280280
" display(embeddings.head()) # Display the first entries of the table\n",
281281
" return embeddings"
@@ -315,10 +315,10 @@
315315
" print(\"GraphSAGE node embeddings training will be skipped for \" + str(node_count) + \" (>500) nodes, since it is computationally expensive and not eagerly needed for demonstration purposes.\")\n",
316316
" return empty_embeddings()\n",
317317
"\n",
318-
" query_cypher_to_data_frame(\"../cypher/Node_Embeddings/Node_Embeddings_0b_Prepare_Degree.cypher\", parameters)\n",
319-
" query_cypher_to_data_frame(\"../cypher/Node_Embeddings/Node_Embeddings_0c_Drop_Model.cypher\", parameters)\n",
320-
" display(query_cypher_to_data_frame(\"../cypher/Node_Embeddings/Node_Embeddings_4b_GraphSAGE_Train.cypher\", parameters))\n",
321-
" embeddings=query_cypher_to_data_frame(\"../cypher/Node_Embeddings/Node_Embeddings_4d_GraphSAGE_Stream.cypher\", parameters)\n",
318+
" query_cypher_to_data_frame(\"../queries/node-embeddings/Node_Embeddings_0b_Prepare_Degree.cypher\", parameters)\n",
319+
" query_cypher_to_data_frame(\"../queries/node-embeddings/Node_Embeddings_0c_Drop_Model.cypher\", parameters)\n",
320+
" display(query_cypher_to_data_frame(\"../queries/node-embeddings/Node_Embeddings_4b_GraphSAGE_Train.cypher\", parameters))\n",
321+
" embeddings=query_cypher_to_data_frame(\"../queries/node-embeddings/Node_Embeddings_4d_GraphSAGE_Stream.cypher\", parameters)\n",
322322
" \n",
323323
" display(embeddings.head()) # Display the first entries of the table\n",
324324
" return embeddings"
@@ -707,7 +707,7 @@
707707
" \"dependencies_projection_write_property\": \"embeddingsFastRandomProjection\",\n",
708708
" \"dependencies_projection_embedding_dimension\":\"32\"\n",
709709
"}\n",
710-
"embeddings_fastRP = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher\", java_package_embeddings_parameters)\n"
710+
"embeddings_fastRP = create_node_embeddings(\"../queries/node-embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher\", java_package_embeddings_parameters)\n"
711711
]
712712
},
713713
{
@@ -777,7 +777,7 @@
777777
" \"dependencies_projection_write_property\": \"embeddingsHashGNN\",\n",
778778
" \"dependencies_projection_embedding_dimension\":\"64\"\n",
779779
"}\n",
780-
"embeddings_hashGNN = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher\", java_package_embeddings_parameters)\n",
780+
"embeddings_hashGNN = create_node_embeddings(\"../queries/node-embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher\", java_package_embeddings_parameters)\n",
781781
"embeddings_hashGNN = prepare_node_embeddings_for_2d_visualization(embeddings_hashGNN)\n",
782782
"scores_hashGNN = CommunityScores.calculate(embeddings_hashGNN)\n",
783783
"plot_2d_node_embeddings(embeddings_hashGNN, get_plot_title(\"Java Packages\", \"HashGNN\", scores_hashGNN))"
@@ -803,7 +803,7 @@
803803
" \"dependencies_projection_write_property\": \"embeddingsNode2Vec\",\n",
804804
" \"dependencies_projection_embedding_dimension\":\"32\"\n",
805805
"}\n",
806-
"embeddings_node2vec = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher\", java_package_embeddings_parameters)\n",
806+
"embeddings_node2vec = create_node_embeddings(\"../queries/node-embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher\", java_package_embeddings_parameters)\n",
807807
"embeddings_node2vec = prepare_node_embeddings_for_2d_visualization(embeddings_node2vec)\n",
808808
"scores_node2vec = CommunityScores.calculate(embeddings_node2vec)\n",
809809
"plot_2d_node_embeddings(embeddings_node2vec, get_plot_title(\"Java Packages\", \"node2vec\", scores_node2vec))"

0 commit comments

Comments
 (0)