Skip to content

Commit 14656b5

Browse files
committed
Fix exploratory Jupyter notebooks
1 parent f3997bd commit 14656b5

8 files changed

Lines changed: 594 additions & 702 deletions

File tree

domains/anomaly-detection/explore/NodeEmbeddingsHyperparameterTuningExploration.ipynb

Lines changed: 67 additions & 186 deletions
Large diffs are not rendered by default.

domains/external-dependencies/explore/ExternalDependenciesJava.ipynb

Lines changed: 266 additions & 226 deletions
Large diffs are not rendered by default.

domains/external-dependencies/explore/ExternalDependenciesTypescript.ipynb

Lines changed: 158 additions & 149 deletions
Large diffs are not rendered by default.

domains/git-history/explore/GitHistoryGeneralExploration.ipynb

Lines changed: 29 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,10 @@
175175
" **plotly_main_layout_base_settings\n",
176176
")\n",
177177
"plotly_treemap_figure_show_settings = dict(\n",
178-
" renderer=\"svg\" if is_command_line_execution() else None,\n",
179-
" width=680 if is_command_line_execution() else 1080,\n",
180-
" height=680 if is_command_line_execution() else 1080,\n",
181-
" config={'scrollZoom': False, 'displaylogo': False, 'displayModeBar': False} if is_command_line_execution() else {}\n",
178+
" renderer=None,\n",
179+
" width=1080,\n",
180+
" height=1080,\n",
181+
" config={}\n",
182182
")\n",
183183
"\n",
184184
"plotly_treemap_marker_base_style = dict(\n",
@@ -191,27 +191,6 @@
191191
")"
192192
]
193193
},
194-
{
195-
"cell_type": "code",
196-
"execution_count": null,
197-
"id": "03ee42af",
198-
"metadata": {},
199-
"outputs": [],
200-
"source": [
201-
"def get_plotly_figure_write_image_settings(name: str):\n",
202-
" \"\"\"\n",
203-
" Returns the settings for the plotly figure write_image method\n",
204-
" :param name: Name of the figure\n",
205-
" :return: Dictionary with settings for the write_image method\n",
206-
" \"\"\"\n",
207-
" return dict(\n",
208-
" file=get_offline_path() + \"/\" + name + \".svg\", \n",
209-
" format=\"svg\", \n",
210-
" width=1080, \n",
211-
" height=1080\n",
212-
" )"
213-
]
214-
},
215194
{
216195
"cell_type": "code",
217196
"execution_count": null,
@@ -601,6 +580,11 @@
601580
"git_files_with_commit_statistics = add_directory_name_column(git_files_with_commit_statistics, 'directoryPath', 'directoryName')\n",
602581
"git_files_with_commit_statistics = add_parent_directory_column(git_files_with_commit_statistics, 'directoryPath', 'directoryParentPath')\n",
603582
"\n",
583+
"# Convert date columns to strings for JSON serialization in Plotly\n",
584+
"git_files_with_commit_statistics['lastCommitDate'] = git_files_with_commit_statistics['lastCommitDate'].astype(str)\n",
585+
"git_files_with_commit_statistics['lastCreationDate'] = git_files_with_commit_statistics['lastCreationDate'].astype(str)\n",
586+
"git_files_with_commit_statistics['lastModificationDate'] = git_files_with_commit_statistics['lastModificationDate'].astype(str)\n",
587+
"\n",
604588
"# Debug\n",
605589
"# display(\"5. added parent and name columns ------------\")\n",
606590
"# display(git_files_with_commit_statistics)\n",
@@ -683,9 +667,7 @@
683667
" **plotly_treemap_layout_base_settings,\n",
684668
" title='Directories and their file count'\n",
685669
")\n",
686-
"figure.show(**plotly_treemap_figure_show_settings)\n",
687-
"if is_command_line_execution():\n",
688-
" figure.write_image(**get_plotly_figure_write_image_settings(\"NumberOfFilesPerDirectory\"))"
670+
"figure.show(**plotly_treemap_figure_show_settings)"
689671
]
690672
},
691673
{
@@ -721,9 +703,7 @@
721703
" **plotly_treemap_layout_base_settings,\n",
722704
" title='Most frequent file extension per directory'\n",
723705
")\n",
724-
"figure.show(**plotly_treemap_figure_show_settings)\n",
725-
"if is_command_line_execution():\n",
726-
" figure.write_image(**get_plotly_figure_write_image_settings(\"MostFrequentFileExtensionPerDirectory\"))"
706+
"figure.show(**plotly_treemap_figure_show_settings)"
727707
]
728708
},
729709
{
@@ -757,9 +737,7 @@
757737
" **plotly_treemap_layout_base_settings,\n",
758738
" title='Number of git commits',\n",
759739
")\n",
760-
"figure.show(**plotly_treemap_figure_show_settings)\n",
761-
"if is_command_line_execution():\n",
762-
" figure.write_image(**get_plotly_figure_write_image_settings(\"NumberOfGitCommits\"))"
740+
"figure.show(**plotly_treemap_figure_show_settings)"
763741
]
764742
},
765743
{
@@ -793,9 +771,7 @@
793771
" **plotly_treemap_layout_base_settings,\n",
794772
" title='Number of distinct commit authors',\n",
795773
")\n",
796-
"figure.show(**plotly_treemap_figure_show_settings)\n",
797-
"if is_command_line_execution():\n",
798-
" figure.write_image(**get_plotly_figure_write_image_settings(\"NumberOfDistinctCommitAuthors\"))"
774+
"figure.show(**plotly_treemap_figure_show_settings)"
799775
]
800776
},
801777
{
@@ -837,9 +813,7 @@
837813
" title='Number of distinct commit authors (red/black = only one or very few authors)',\n",
838814
")\n",
839815
"\n",
840-
"figure.show(**plotly_treemap_figure_show_settings)\n",
841-
"if is_command_line_execution():\n",
842-
" figure.write_image(**get_plotly_figure_write_image_settings(\"NumberOfDistinctCommitAuthorsLowFocus\"))"
816+
"figure.show(**plotly_treemap_figure_show_settings)"
843817
]
844818
},
845819
{
@@ -875,9 +849,7 @@
875849
" **plotly_treemap_layout_base_settings,\n",
876850
" title='Main authors with highest number of commits'\n",
877851
")\n",
878-
"figure.show(**plotly_treemap_figure_show_settings)\n",
879-
"if is_command_line_execution():\n",
880-
" figure.write_image(**get_plotly_figure_write_image_settings(\"MainAuthorsWithHighestNumberOfCommits\"))"
852+
"figure.show(**plotly_treemap_figure_show_settings)"
881853
]
882854
},
883855
{
@@ -914,9 +886,7 @@
914886
" **plotly_treemap_layout_base_settings,\n",
915887
" title='Second author with the second highest number of commits'\n",
916888
")\n",
917-
"figure.show(**plotly_treemap_figure_show_settings)\n",
918-
"if is_command_line_execution():\n",
919-
" figure.write_image(**get_plotly_figure_write_image_settings(\"SecondAuthorWithTheSecondHighestNumberOfCommits\"))"
889+
"figure.show(**plotly_treemap_figure_show_settings)"
920890
]
921891
},
922892
{
@@ -951,9 +921,7 @@
951921
" **plotly_treemap_layout_base_settings,\n",
952922
" title='Days since last commit',\n",
953923
")\n",
954-
"figure.show(**plotly_treemap_figure_show_settings)\n",
955-
"if is_command_line_execution():\n",
956-
" figure.write_image(**get_plotly_figure_write_image_settings(\"DaysSinceLastCommit\"))"
924+
"figure.show(**plotly_treemap_figure_show_settings)"
957925
]
958926
},
959927
{
@@ -988,9 +956,7 @@
988956
" **plotly_treemap_layout_base_settings,\n",
989957
" title='Rank of days since last commit',\n",
990958
")\n",
991-
"figure.show(**plotly_treemap_figure_show_settings)\n",
992-
"if is_command_line_execution():\n",
993-
" figure.write_image(**get_plotly_figure_write_image_settings(\"DaysSinceLastCommitRanked\"))"
959+
"figure.show(**plotly_treemap_figure_show_settings)"
994960
]
995961
},
996962
{
@@ -1024,9 +990,7 @@
1024990
" **plotly_treemap_layout_base_settings,\n",
1025991
" title='Days since last file creation',\n",
1026992
")\n",
1027-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1028-
"if is_command_line_execution():\n",
1029-
" figure.write_image(**get_plotly_figure_write_image_settings(\"DaysSinceLastFileCreation\"))"
993+
"figure.show(**plotly_treemap_figure_show_settings)"
1030994
]
1031995
},
1032996
{
@@ -1060,9 +1024,7 @@
10601024
" **plotly_treemap_layout_base_settings,\n",
10611025
" title='Rank of days since last file creation',\n",
10621026
")\n",
1063-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1064-
"if is_command_line_execution():\n",
1065-
" figure.write_image(**get_plotly_figure_write_image_settings(\"DaysSinceLastFileCreationRanked\"))"
1027+
"figure.show(**plotly_treemap_figure_show_settings)"
10661028
]
10671029
},
10681030
{
@@ -1096,9 +1058,7 @@
10961058
" **plotly_treemap_layout_base_settings,\n",
10971059
" title='Days since last file modification',\n",
10981060
")\n",
1099-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1100-
"if is_command_line_execution():\n",
1101-
" figure.write_image(**get_plotly_figure_write_image_settings(\"DaysSinceLastFileModification\"))"
1061+
"figure.show(**plotly_treemap_figure_show_settings)"
11021062
]
11031063
},
11041064
{
@@ -1132,9 +1092,7 @@
11321092
" **plotly_treemap_layout_base_settings,\n",
11331093
" title='Rank of days since last file modification',\n",
11341094
")\n",
1135-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1136-
"if is_command_line_execution():\n",
1137-
" figure.write_image(**get_plotly_figure_write_image_settings(\"DaysSinceLastFileModificationRanked\"))"
1095+
"figure.show(**plotly_treemap_figure_show_settings)"
11381096
]
11391097
},
11401098
{
@@ -1200,9 +1158,7 @@
12001158
" xaxis_title='file count',\n",
12011159
" yaxis_title='commit count'\n",
12021160
" )\n",
1203-
" figure.show(**plotly_treemap_figure_show_settings)\n",
1204-
" if is_command_line_execution():\n",
1205-
" figure.write_image(**get_plotly_figure_write_image_settings(\"ChangedFilesPerCommit\"))"
1161+
" figure.show(**plotly_treemap_figure_show_settings)"
12061162
]
12071163
},
12081164
{
@@ -1312,9 +1268,7 @@
13121268
" **plotly_treemap_layout_base_settings,\n",
13131269
" title='Files that likely co-change with others in update commits',\n",
13141270
")\n",
1315-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1316-
"if is_command_line_execution():\n",
1317-
" figure.write_image(**get_plotly_figure_write_image_settings(\"CoChangingFiles\"))"
1271+
"figure.show(**plotly_treemap_figure_show_settings)"
13181272
]
13191273
},
13201274
{
@@ -1340,9 +1294,7 @@
13401294
" **plotly_treemap_layout_base_settings,\n",
13411295
" title='Co-Changing files in update commits max lift (1=random, >1=more than random, <1=less than random)',\n",
13421296
")\n",
1343-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1344-
"if is_command_line_execution():\n",
1345-
" figure.write_image(**get_plotly_figure_write_image_settings(\"CoChangingFilesMaxLift\"))"
1297+
"figure.show(**plotly_treemap_figure_show_settings)"
13461298
]
13471299
},
13481300
{
@@ -1368,9 +1320,7 @@
13681320
" **plotly_treemap_layout_base_settings,\n",
13691321
" title='Co-Changing files in update commits average lift (1=random, >1=more than random, <1=less than random)',\n",
13701322
")\n",
1371-
"figure.show(**plotly_treemap_figure_show_settings)\n",
1372-
"if is_command_line_execution():\n",
1373-
" figure.write_image(**get_plotly_figure_write_image_settings(\"CoChangingFilesAverageLift\"))"
1323+
"figure.show(**plotly_treemap_figure_show_settings)"
13741324
]
13751325
},
13761326
{
@@ -1577,9 +1527,7 @@
15771527
" showlegend=False\n",
15781528
" )\n",
15791529
"\n",
1580-
" figure.show(**plotly_treemap_figure_show_settings)\n",
1581-
" if is_command_line_execution():\n",
1582-
" figure.write_image(**get_plotly_figure_write_image_settings(output_file_name))"
1530+
" figure.show(**plotly_treemap_figure_show_settings)"
15831531
]
15841532
},
15851533
{
@@ -1935,7 +1883,7 @@
19351883
}
19361884
],
19371885
"kernelspec": {
1938-
"display_name": "codegraph",
1886+
"display_name": "code-graph-analysis-pipeline (3.12.8)",
19391887
"language": "python",
19401888
"name": "python3"
19411889
},
@@ -1949,7 +1897,7 @@
19491897
"name": "python",
19501898
"nbconvert_exporter": "python",
19511899
"pygments_lexer": "ipython3",
1952-
"version": "3.12.9"
1900+
"version": "3.12.8"
19531901
},
19541902
"title": "Git History General Exploration"
19551903
},

domains/java/explore/MethodMetricsJavaExploration.ipynb

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"# before starting jupyter notebook to provide the password for the user \"neo4j\". \n",
3939
"# It is not recommended to hardcode the password into jupyter notebook for security reasons.\n",
4040
"\n",
41-
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\")))\n",
41+
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\") or \"\"))\n",
4242
"driver.verify_connectivity()"
4343
]
4444
},
@@ -49,13 +49,15 @@
4949
"metadata": {},
5050
"outputs": [],
5151
"source": [
52-
"def get_cypher_query_from_file(filename):\n",
52+
"def get_cypher_query_from_file(filename: str) -> str:\n",
53+
" \"\"\"Read and return the contents of a Cypher query file.\"\"\"\n",
5354
" with open(filename) as file:\n",
54-
" return ' '.join(file.readlines())\n",
55+
" return \" \".join(file.readlines())\n",
5556
" \n",
5657
"\n",
57-
"def query_cypher_to_data_frame(filename):\n",
58-
" records, summary, keys = driver.execute_query(get_cypher_query_from_file(filename))\n",
58+
"def query_cypher_to_data_frame(filename: str) -> pd.DataFrame:\n",
59+
" \"\"\"Execute a Cypher query from file and return results as a DataFrame.\"\"\"\n",
60+
" records, _, keys = driver.execute_query(get_cypher_query_from_file(filename)) # type: ignore[arg-type]\n",
5961
" return pd.DataFrame([r.values() for r in records], columns=keys)"
6062
]
6163
},
@@ -192,24 +194,24 @@
192194
"if effective_method_line_count_distribution_normalized.empty:\n",
193195
" print(\"No data to plot\")\n",
194196
"else:\n",
195-
" plot.figure();\n",
196-
" method_line_count_x_ticks=range(1,20)\n",
197+
" plot.figure()\n",
198+
" method_line_count_x_ticks = range(1, 20)\n",
197199
" axes = effective_method_line_count_distribution_normalized.head(20).plot(\n",
198-
" kind='line',\n",
200+
" kind=\"line\",\n",
199201
" logx=True,\n",
200202
" grid=True,\n",
201203
" xlim=[2, 20],\n",
202204
" ylim=[0, 20],\n",
203205
" xticks=method_line_count_x_ticks,\n",
204-
" title='Effective Method Line Count Distribution', \n",
205-
" xlabel='effective line count',\n",
206-
" ylabel='percent of methods',\n",
206+
" title=\"Effective Method Line Count Distribution\", \n",
207+
" xlabel=\"effective line count\",\n",
208+
" ylabel=\"percent of methods\",\n",
207209
" cmap=main_color_map,\n",
208210
" figsize=(10, 6),\n",
209211
" lw=2,\n",
210212
" )\n",
211-
" axes.set_xticklabels(method_line_count_x_ticks)\n",
212-
" axes.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
213+
" axes.set_xticklabels([str(i) for i in method_line_count_x_ticks])\n",
214+
" axes.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
213215
" plot.show()"
214216
]
215217
},
@@ -312,7 +314,7 @@
312314
"source": [
313315
"cyclomatic_method_complexity_distribution_max_artifacts=15\n",
314316
"\n",
315-
"cyclomatic_method_complexity_distribution=query_cypher_to_data_frame(\"../cypher/Overview/Cyclomatic_Method_Complexity_Distribution.cypher\")\n",
317+
"cyclomatic_method_complexity_distribution=query_cypher_to_data_frame(\"../queries/method-metrics/Cyclomatic_Method_Complexity_Distribution.cypher\")\n",
316318
"cyclomatic_method_complexity_distribution=cyclomatic_method_complexity_distribution.pivot(index='cyclomaticComplexity', columns='artifactName', values='methods')\n",
317319
"\n",
318320
"# Fill missing values with zero\n",
@@ -367,29 +369,32 @@
367369
"metadata": {},
368370
"outputs": [],
369371
"source": [
372+
"from scipy import interpolate\n",
373+
"\n",
374+
"\n",
370375
"if cyclomatic_method_complexity_distribution_normalized.empty:\n",
371376
" print(\"No data to plot\")\n",
372377
"else:\n",
373-
" plot.figure();\n",
374-
" method_line_count_x_ticks=range(1,11)\n",
375-
" cyclomatic_complexity_y_ticks=[1, 2, 3, 4, 5, 7, 10, 20, 30, 40, 50, 100]\n",
378+
" plot.figure()\n",
379+
" method_line_count_x_ticks = range(1, 11)\n",
380+
" cyclomatic_complexity_y_ticks = [1, 2, 3, 4, 5, 7, 10, 20, 30, 40, 50, 100]\n",
376381
" axes = cyclomatic_method_complexity_distribution_normalized.plot(\n",
377-
" kind='line', \n",
382+
" kind=\"line\",\n",
378383
" logx=True,\n",
379384
" logy=True,\n",
380385
" grid=True,\n",
381-
" xlim=[1,11],\n",
382-
" ylim=[1,100],\n",
386+
" xlim=[1, 11],\n",
387+
" ylim=[1, 100],\n",
383388
" xticks=method_line_count_x_ticks,\n",
384389
" yticks=cyclomatic_complexity_y_ticks,\n",
385-
" title='Cyclomatic complexity distribution of methods', \n",
386-
" xlabel='cyclomatic complexity',\n",
387-
" ylabel='percentage of methods',\n",
390+
" title=\"Cyclomatic complexity distribution of methods\", \n",
391+
" xlabel=\"cyclomatic complexity\",\n",
392+
" ylabel=\"percentage of methods\",\n",
388393
" cmap=main_color_map,\n",
389394
" )\n",
390-
" axes.set_xticklabels(method_line_count_x_ticks)\n",
391-
" axes.set_yticklabels(cyclomatic_complexity_y_ticks)\n",
392-
" axes.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
395+
" axes.set_xticklabels([str(i) for i in method_line_count_x_ticks])\n",
396+
" axes.set_yticklabels([str(i) for i in cyclomatic_complexity_y_ticks])\n",
397+
" axes.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
393398
" plot.show()"
394399
]
395400
},
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Cyclomatic Complexity Method Complexity Distribution
2+
3+
MATCH (artifact:Artifact)-[:CONTAINS]->(type:Type)-[:DECLARES]->(method:Method)
4+
WHERE method.effectiveLineCount > 0
5+
WITH last(split(artifact.fileName, '/')) AS artifactName
6+
,method.cyclomaticComplexity AS cyclomaticComplexity
7+
,count(method) AS methods
8+
RETURN artifactName, cyclomaticComplexity, methods
9+
ORDER BY artifactName asc, cyclomaticComplexity

0 commit comments

Comments
 (0)