Skip to content

Commit f3997bd

Browse files
committed
Improve charts to look similar to original explore notebooks
1 parent d1b9819 commit f3997bd

3 files changed

Lines changed: 100 additions & 39 deletions

File tree

domains/external-dependencies/externalDependencyCharts.py

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,12 @@ def filter_entries_below_percentage_threshold(
182182
threshold_percent: float,
183183
) -> pd.DataFrame:
184184
"""
185-
Returns only rows whose percentage share of the *original* total is strictly
185+
Returns only rows whose percentage share of the *original* total is at or
186186
below threshold_percent. Used to drill down into the 'others' slice.
187187
"""
188188
result = add_percentage_column(data_frame, value_column)
189189
percent_column = value_column + "Percent"
190-
result = result[result[percent_column] < threshold_percent]
190+
result = result[result[percent_column] <= threshold_percent]
191191
return result.reset_index(drop=True)
192192

193193

@@ -452,9 +452,8 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
452452

453453
# ── Top external packages (Table 1 equivalent) ────────────────────────────
454454
if not overall_data.empty:
455-
top20 = overall_data.head(20)
456455
save_pie_chart_pair(
457-
source_data=top20,
456+
source_data=overall_data,
458457
value_column="numberOfExternalCallerTypes",
459458
name_column="externalPackageName",
460459
chart_name_prefix="Java_Top_external_packages_by_types",
@@ -463,7 +462,7 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
463462
verbose=verbose,
464463
)
465464
save_pie_chart_pair(
466-
source_data=top20,
465+
source_data=overall_data,
467466
value_column="numberOfExternalCallerPackages",
468467
name_column="externalPackageName",
469468
chart_name_prefix="Java_Top_external_packages_by_packages",
@@ -474,9 +473,8 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
474473

475474
# ── Second-level package grouping (Table 2 equivalent) ────────────────────
476475
if not second_level_overall_data.empty:
477-
top20_second_level = second_level_overall_data.head(20)
478476
save_pie_chart_pair(
479-
source_data=top20_second_level,
477+
source_data=second_level_overall_data,
480478
value_column="numberOfExternalCallerTypes",
481479
name_column="externalSecondLevelPackageName",
482480
chart_name_prefix="Java_Top_second_level_packages_by_types",
@@ -485,7 +483,7 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
485483
verbose=verbose,
486484
)
487485
save_pie_chart_pair(
488-
source_data=top20_second_level,
486+
source_data=second_level_overall_data,
489487
value_column="numberOfExternalCallerPackages",
490488
name_column="externalSecondLevelPackageName",
491489
chart_name_prefix="Java_Top_second_level_packages_by_packages",
@@ -496,9 +494,8 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
496494

497495
# ── Most spread external packages (Table 3 equivalent) ────────────────────
498496
if not spread_data.empty:
499-
top20_spread = spread_data.head(20)
500497
save_pie_chart_pair(
501-
source_data=top20_spread,
498+
source_data=spread_data,
502499
value_column="sumNumberOfTypes",
503500
name_column="externalPackageName",
504501
chart_name_prefix="Java_Most_spread_packages_by_types",
@@ -507,7 +504,7 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
507504
verbose=verbose,
508505
)
509506
save_pie_chart_pair(
510-
source_data=top20_spread,
507+
source_data=spread_data,
511508
value_column="sumNumberOfPackages",
512509
name_column="externalPackageName",
513510
chart_name_prefix="Java_Most_spread_packages_by_packages",
@@ -518,9 +515,8 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
518515

519516
# ── Most spread second-level packages (Table 4 equivalent) ────────────────
520517
if not second_level_spread_data.empty:
521-
top20_second_level_spread = second_level_spread_data.head(20)
522518
save_pie_chart_pair(
523-
source_data=top20_second_level_spread,
519+
source_data=second_level_spread_data,
524520
value_column="sumNumberOfTypes",
525521
name_column="externalSecondLevelPackageName",
526522
chart_name_prefix="Java_Most_spread_second_level_packages_by_types",
@@ -529,7 +525,7 @@ def generate_java_charts(queries_directory: str, report_directory: str, verbose:
529525
verbose=verbose,
530526
)
531527
save_pie_chart_pair(
532-
source_data=top20_second_level_spread,
528+
source_data=second_level_spread_data,
533529
value_column="sumNumberOfPackages",
534530
name_column="externalSecondLevelPackageName",
535531
chart_name_prefix="Java_Most_spread_second_level_packages_by_packages",
@@ -633,9 +629,8 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
633629

634630
# ── Module usage overall ───────────────────────────────────────────────────
635631
if not module_overall_data.empty:
636-
top20_modules = module_overall_data.head(20)
637632
save_pie_chart_pair(
638-
source_data=top20_modules,
633+
source_data=module_overall_data,
639634
value_column="numberOfExternalCallerElements",
640635
name_column="externalModuleName",
641636
chart_name_prefix="Typescript_Top_external_modules_by_elements",
@@ -644,7 +639,7 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
644639
verbose=verbose,
645640
)
646641
save_pie_chart_pair(
647-
source_data=top20_modules,
642+
source_data=module_overall_data,
648643
value_column="numberOfExternalCallerModules",
649644
name_column="externalModuleName",
650645
chart_name_prefix="Typescript_Top_external_modules_by_modules",
@@ -655,9 +650,8 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
655650

656651
# ── Namespace usage overall ────────────────────────────────────────────────
657652
if not namespace_overall_data.empty:
658-
top20_namespaces = namespace_overall_data.head(20)
659653
save_pie_chart_pair(
660-
source_data=top20_namespaces,
654+
source_data=namespace_overall_data,
661655
value_column="numberOfExternalCallerElements",
662656
name_column="externalNamespaceName",
663657
chart_name_prefix="Typescript_Top_external_namespaces_by_elements",
@@ -666,7 +660,7 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
666660
verbose=verbose,
667661
)
668662
save_pie_chart_pair(
669-
source_data=top20_namespaces,
663+
source_data=namespace_overall_data,
670664
value_column="numberOfExternalCallerModules",
671665
name_column="externalNamespaceName",
672666
chart_name_prefix="Typescript_Top_external_namespaces_by_modules",
@@ -677,9 +671,8 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
677671

678672
# ── Module spread ──────────────────────────────────────────────────────────
679673
if not module_spread_data.empty:
680-
top20_module_spread = module_spread_data.head(20)
681674
save_pie_chart_pair(
682-
source_data=top20_module_spread,
675+
source_data=module_spread_data,
683676
value_column="sumNumberOfUsedExternalDeclarations",
684677
name_column="externalModuleName",
685678
chart_name_prefix="Typescript_Most_spread_modules_by_declarations",
@@ -688,7 +681,7 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
688681
verbose=verbose,
689682
)
690683
save_pie_chart_pair(
691-
source_data=top20_module_spread,
684+
source_data=module_spread_data,
692685
value_column="numberOfInternalModules",
693686
name_column="externalModuleName",
694687
chart_name_prefix="Typescript_Most_spread_modules_by_modules",
@@ -699,9 +692,8 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
699692

700693
# ── Namespace spread ───────────────────────────────────────────────────────
701694
if not namespace_spread_data.empty:
702-
top20_namespace_spread = namespace_spread_data.head(20)
703695
save_pie_chart_pair(
704-
source_data=top20_namespace_spread,
696+
source_data=namespace_spread_data,
705697
value_column="sumNumberOfUsedExternalDeclarations",
706698
name_column="externalModuleNamespace",
707699
chart_name_prefix="Typescript_Most_spread_namespaces_by_declarations",
@@ -710,7 +702,7 @@ def generate_typescript_charts(queries_directory: str, report_directory: str, ve
710702
verbose=verbose,
711703
)
712704
save_pie_chart_pair(
713-
source_data=top20_namespace_spread,
705+
source_data=namespace_spread_data,
714706
value_column="numberOfInternalModules",
715707
name_column="externalModuleNamespace",
716708
chart_name_prefix="Typescript_Most_spread_namespaces_by_modules",

domains/java/javaCharts.py

Lines changed: 79 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
TOP_ANNOTATION_LIMIT = 15
4545

4646
HORIZONTAL_BAR_COLOR = "steelblue"
47+
LINE_COUNT_DISTRIBUTION_MAX_ARTIFACTS = 20
48+
CYCLOMATIC_DISTRIBUTION_MAX_ARTIFACTS = 15
49+
DISTRIBUTION_CHART_COLORMAP = "nipy_spectral"
4750

4851

4952
# ── Parameters ────────────────────────────────────────────────────────────────
@@ -237,25 +240,42 @@ def generate_spread_per_dependent_chart(report_directory: str, verbose: bool) ->
237240
# ── Method metrics charts ─────────────────────────────────────────────────────
238241

239242
def generate_method_line_count_distribution_chart(report_directory: str, verbose: bool) -> None:
240-
"""Generate a histogram showing the distribution of effective method line counts."""
243+
"""Generate a normalized per-artifact line chart of effective method line count distribution."""
241244
data_frame = load_csv(report_directory, "EffectiveMethodLineCountDistribution.csv", verbose)
242245
if data_frame.empty:
243246
return
244247

245-
# Aggregate across all artifacts: sum method counts per line count
246-
distribution = data_frame.groupby("effectiveLineCount")["methods"].sum().reset_index()
247-
figure, axis = plot.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT))
248-
axis.bar(
249-
distribution["effectiveLineCount"],
250-
distribution["methods"],
251-
width=1.0,
252-
color=HORIZONTAL_BAR_COLOR,
253-
edgecolor="white",
254-
linewidth=0.3,
248+
distribution = (
249+
data_frame
250+
.pivot(index="effectiveLineCount", columns="artifactName", values="methods")
251+
.fillna(0)
252+
.astype(int)
255253
)
254+
artifact_totals = distribution.sum()
255+
top_artifacts = artifact_totals.sort_values(ascending=False).index[:LINE_COUNT_DISTRIBUTION_MAX_ARTIFACTS]
256+
distribution = distribution[top_artifacts]
257+
258+
normalized = distribution.div(distribution.sum(axis=0), axis=1).multiply(100)
259+
260+
colormap = matplotlib.colormaps[DISTRIBUTION_CHART_COLORMAP]
261+
num_artifacts = len(normalized.columns)
262+
colors = [colormap(i / max(num_artifacts - 1, 1)) for i in range(num_artifacts)]
263+
264+
figure, axis = plot.subplots(figsize=(10, 6))
265+
for i, column in enumerate(normalized.columns):
266+
axis.plot(normalized.index, normalized[column], label=column, color=colors[i], linewidth=2)
267+
268+
x_ticks = list(range(1, 20))
269+
axis.set_xscale("log")
270+
axis.set_xlim(2, 20)
271+
axis.set_ylim(0, 20)
272+
axis.set_xticks(x_ticks)
273+
axis.set_xticklabels([str(t) for t in x_ticks])
256274
axis.set_xlabel("Effective Line Count")
257-
axis.set_ylabel("Number of Methods")
258-
axis.set_title("Effective Method Line Count Distribution")
275+
axis.set_ylabel("Percent of Methods")
276+
axis.set_title("Effective Method Line Count Distribution (Normalized)")
277+
axis.grid(True)
278+
axis.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=7)
259279

260280
save_figure(figure, report_directory, "MethodMetrics_LineCountDistribution_Histogram", verbose)
261281

@@ -298,6 +318,51 @@ def generate_top_packages_by_loc_chart(report_directory: str, verbose: bool) ->
298318
save_figure(figure, report_directory, "MethodMetrics_TopPackagesLOC_Bar", verbose)
299319

300320

321+
def generate_cyclomatic_complexity_distribution_chart(report_directory: str, verbose: bool) -> None:
322+
"""Generate a normalized per-artifact line chart of cyclomatic method complexity distribution."""
323+
data_frame = load_csv(report_directory, "CyclomaticMethodComplexityDistribution.csv", verbose)
324+
if data_frame.empty:
325+
return
326+
327+
distribution = (
328+
data_frame
329+
.pivot(index="cyclomaticComplexity", columns="artifactName", values="methods")
330+
.fillna(0)
331+
.astype(int)
332+
)
333+
artifact_totals = distribution.sum()
334+
top_artifacts = artifact_totals.sort_values(ascending=False).index[:CYCLOMATIC_DISTRIBUTION_MAX_ARTIFACTS]
335+
distribution = distribution[top_artifacts]
336+
337+
normalized = distribution.div(distribution.sum(axis=0), axis=1).multiply(100)
338+
339+
colormap = matplotlib.colormaps[DISTRIBUTION_CHART_COLORMAP]
340+
num_artifacts = len(normalized.columns)
341+
colors = [colormap(i / max(num_artifacts - 1, 1)) for i in range(num_artifacts)]
342+
343+
figure, axis = plot.subplots(figsize=(10, 6))
344+
for i, column in enumerate(normalized.columns):
345+
axis.plot(normalized.index, normalized[column], label=column, color=colors[i], linewidth=2)
346+
347+
x_ticks = list(range(1, 11))
348+
y_ticks = [1, 2, 3, 4, 5, 7, 10, 20, 30, 40, 50, 100]
349+
axis.set_xscale("log")
350+
axis.set_yscale("log")
351+
axis.set_xlim(1, 11)
352+
axis.set_ylim(1, 100)
353+
axis.set_xticks(x_ticks)
354+
axis.set_xticklabels([str(t) for t in x_ticks])
355+
axis.set_yticks(y_ticks)
356+
axis.set_yticklabels([str(t) for t in y_ticks])
357+
axis.set_xlabel("Cyclomatic Complexity")
358+
axis.set_ylabel("Percentage of Methods")
359+
axis.set_title("Cyclomatic Complexity Distribution of Methods (Normalized)")
360+
axis.grid(True)
361+
axis.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=7)
362+
363+
save_figure(figure, report_directory, "MethodMetrics_CyclomaticComplexityDistribution_Normalized", verbose)
364+
365+
301366
# ── Java code quality charts ──────────────────────────────────────────────────
302367

303368
def generate_annotation_type_distribution_chart(report_directory: str, verbose: bool) -> None:
@@ -430,6 +495,7 @@ def generate_all_charts(report_directory: str, verbose: bool) -> None:
430495
generate_spread_per_dependency_chart(report_directory, verbose)
431496
generate_spread_per_dependent_chart(report_directory, verbose)
432497
generate_method_line_count_distribution_chart(report_directory, verbose)
498+
generate_cyclomatic_complexity_distribution_chart(report_directory, verbose)
433499
generate_top_types_by_loc_chart(report_directory, verbose)
434500
generate_top_packages_by_loc_chart(report_directory, verbose)
435501
generate_annotation_type_distribution_chart(report_directory, verbose)

domains/java/javaCsv.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ execute_cypher "${JAVA_CODE_QUALITY_CYPHER_DIR}/JakartaEE_REST_Annotations.cyphe
8383
execute_cypher "${METHOD_METRICS_CYPHER_DIR}/Effective_Method_Line_Count_Distribution.cypher" \
8484
> "${FULL_REPORT_DIRECTORY}/EffectiveMethodLineCountDistribution.csv"
8585

86+
execute_cypher "${METHOD_METRICS_CYPHER_DIR}/Cyclomatic_Method_Complexity_Distribution.cypher" \
87+
> "${FULL_REPORT_DIRECTORY}/CyclomaticMethodComplexityDistribution.csv"
88+
8689
execute_cypher "${METHOD_METRICS_CYPHER_DIR}/Effective_lines_of_method_code_per_type.cypher" \
8790
> "${FULL_REPORT_DIRECTORY}/EffectiveLinesOfMethodCodePerType.csv"
8891

0 commit comments

Comments
 (0)