Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions COMMANDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ The [analyze.sh](./scripts/analysis/analyze.sh) command comes with these command

- `--profile Neo4j-latest-low-memory` is based on the default profile (`Neo4j-latest`) but uses only half of the memory (RAM) as configured in [template-neo4j-low-memory.conf](./scripts/configuration/template-neo4j-low-memory.conf). This is useful for the analysis of smaller codebases with less resources. Other profiles can be found in the directory [scripts/profiles](./scripts/profiles/).

- `--profile Neo4j-latest-high-memory` is based on the default profile (`Neo4j-latest`) but uses more memory (RAM) as configured in [template-neo4j-high-memory.conf](./scripts/configuration/template-neo4j-high-memory.conf). This is useful for the analysis of larger codebases with more resources. Other profiles can be found in the directory [scripts/profiles](./scripts/profiles/).

- `--explore` activates the "explore" mode where no reports are generated. Furthermore, Neo4j won't be stopped at the end of the script and will therefore continue running. This makes it easy to just set everything up but then use the running Neo4j server to explore the data manually.

### Notes
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,13 @@ The [Code Structure Analysis Pipeline](./.github/workflows/internal-java-code-an
./../../scripts/analysis/analyze.sh --profile Neo4j-latest-low-memory
```

- How can i increase the memory (RAM) consumption?
👉 Use the profile `Neo4j-latest-high-memory` (default = `Neo4j-latest`):

```shell
./../../scripts/analysis/analyze.sh --profile Neo4j-latest-high-memory
```

## 🕸 Web References

- [code-graph-analysis-examples](https://github.com/JohT/code-graph-analysis-examples)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
WHERE git_file.deletedAt IS NULL // filter out deleted files
ORDER BY git_file.relativePath
WITH *
,datetime.fromepochMillis(git_file.createdAtEpoch) AS fileCreatedAtTimestamp
,datetime.fromepochMillis(coalesce(git_file.createdAtEpoch, 0)) AS fileCreatedAtTimestamp
,datetime.fromepochMillis(coalesce(git_file.lastModificationAtEpoch, git_file.createdAtEpoch)) AS fileLastModificationAtTimestamp
WITH *, git_repository.name + '/' + git_file.relativePath AS filePath
WITH *, split(filePath, '/') AS pathElements
Expand Down
6 changes: 6 additions & 0 deletions cypher/GitLog/Verify_git_missing_create_date.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Verify that git to code file relationships aren't ambiguous

MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file:Git&File&!Repository)
WHERE git_file.deletedAt IS NULL // Ignore deleted git files
AND git_file.createdAtEpoch IS NULL
RETURN count(DISTINCT git_file) AS numberOfMissingCreateDateEntires
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
// Propagates "DEPENDS_ON" relations between modules to their resolved modules with a property "resolved:true".
// Propagates "DEPENDS_ON" relations between modules to their resolved modules with a property "resolved:true" or "updated:true".
// Inspired by https://github.com/jQAssistant/jqassistant/blob/4cd7face5d6d2953449d8e6ff5b484f00ffbdc2f/plugin/java/src/main/resources/META-INF/jqassistant-rules/java-classpath.xml#L5

MATCH (module:TS:Module)-[dependsOn:DEPENDS_ON]->(externalModule:TS:ExternalModule)
MATCH (externalModule)-[:IS_IMPLEMENTED_IN]->(resolvedModule:TS:Module)
OPTIONAL MATCH (module)-[existingDependency:DEPENDS_ON]->(resolvedModule)
WHERE module <> resolvedModule
CALL { WITH module, dependsOn, resolvedModule
CALL { WITH module, dependsOn, resolvedModule, existingDependency
MERGE (module)-[resolvedDependsOn:DEPENDS_ON]->(resolvedModule)
ON CREATE SET resolvedDependsOn = dependsOn
,resolvedDependsOn.resolved = true
ON CREATE SET resolvedDependsOn = dependsOn
,resolvedDependsOn.resolved = true
ON MATCH SET resolvedDependsOn = dependsOn // Overwrites existing properties
,resolvedDependsOn.cardinality = existingDependency.cardinality + dependsOn.cardinality // Add cardinalities
,resolvedDependsOn.updated = true
} IN TRANSACTIONS
RETURN count(*) as resolvedDependencies
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Verify that there are either no Typescript modules at all or that there is at least one module dependency.

MATCH (source:TS:Module)
OPTIONAL MATCH (source)-[moduleDependency:DEPENDS_ON2]->(:TS:Module)
OPTIONAL MATCH (source)-[moduleDependency:DEPENDS_ON]->(:TS:Module)
WITH count(DISTINCT source) AS moduleCount
,count(moduleDependency) AS moduleDependencyCount
WITH *, ((moduleCount = 0) OR (moduleDependencyCount > 0)) AS valid
Expand Down
2 changes: 1 addition & 1 deletion domains/anomaly-detection/anomalyDetectionCsv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ anomaly_detection_features() {
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Abstractness-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature_Abstractness_JavaType.cypher" "${@}"
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Abstractness-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature_Abstractness_TypeScriptModules.cypher" "${@}"
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature_Abstractness_TypeScriptModule.cypher" "${@}"
# Determines strongly connected components if not already done
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-StronglyConnectedComponents-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-StronglyConnectedComponents-Write.cypher" "${@}"
Expand Down
2 changes: 1 addition & 1 deletion domains/anomaly-detection/anomalyDetectionPython.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ anomaly_detection_features() {
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Abstractness-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature_Abstractness_JavaType.cypher" "${@}"
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-Abstractness-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature_Abstractness_TypeScriptModules.cypher" "${@}"
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature_Abstractness_TypeScriptModule.cypher" "${@}"
# Determines strongly connected components if not already done
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-StronglyConnectedComponents-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-StronglyConnectedComponents-Write.cypher" "${@}"
Expand Down
28 changes: 28 additions & 0 deletions scripts/configuration/template-neo4j-high-memory.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

# The following static configuration entries were taken from "template-neo4j.conf".

# Anonymous usage data reporting
dbms.usage_report.enabled=false

# List of procedures and user defined functions that are allowed
# full access to the database through unsupported/insecure internal APIs.
dbms.security.procedures.unrestricted=apoc.*,gds.*

# Memory: Java Heap Size
server.memory.heap.initial_size=24g
server.memory.heap.max_size=24g

# Memory: The amount of memory to use for mapping the store files.
server.memory.pagecache.size=3g

# Memory: Exits JVM on the first occurrence of an out-of-memory error.
server.jvm.additional=-XX:+ExitOnOutOfMemoryError

# Memory: Limit the amount of memory that all of the running transaction can consume.
db.memory.transaction.total.max=18g

# Memory: Limit the amount of memory that a single transaction can consume.
db.memory.transaction.max=18g

# Transaction: Retention policy for transaction logs needed to perform recovery and backups.
db.tx_log.rotation.retention_policy=keep_none
3 changes: 3 additions & 0 deletions scripts/configuration/template-neo4j-low-memory.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

# The following static configuration entries were taken from "template-neo4j.conf".

# Anonymous usage data reporting
dbms.usage_report.enabled=false

# List of procedures and user defined functions that are allowed
# full access to the database through unsupported/insecure internal APIs.
dbms.security.procedures.unrestricted=apoc.*,gds.*
Expand Down
3 changes: 3 additions & 0 deletions scripts/configuration/template-neo4j.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

# The following static configuration entries were taken from "template-neo4j.conf".

# Anonymous usage data reporting
dbms.usage_report.enabled=false

# List of procedures and user defined functions that are allowed
# full access to the database through unsupported/insecure internal APIs.
dbms.security.procedures.unrestricted=apoc.*,gds.*
Expand Down
13 changes: 13 additions & 0 deletions scripts/importGit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"} # Get the source repository direc
IMPORT_DIRECTORY=${IMPORT_DIRECTORY:-"import"}
IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT=${IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT:-"plugin"} # Select how to import git log data. Options: "none", "aggregated", "full" and "plugin". Default="plugin".

# Local constants
COLOR_YELLOW='\033[0;33m'
COLOR_DEFAULT='\033[0m'

# Default and initial values for command line options
source="${SOURCE_DIRECTORY}"

Expand Down Expand Up @@ -136,6 +140,15 @@ commonPostGitImport() {
execute_cypher "${GIT_LOG_CYPHER_DIR}/Verify_git_to_code_file_unambiguous.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Verify_code_to_git_file_unambiguous.cypher"
execute_cypher "${GIT_LOG_CYPHER_DIR}/Verify_git_missing_CHANGED_TOGETHER_WITH_properties.cypher"

dataVerificationResult=$( execute_cypher "${GIT_LOG_CYPHER_DIR}/Verify_git_missing_create_date.cypher")
if ! is_csv_column_greater_zero "${dataVerificationResult}" "numberOfMissingCreateDateEntires"; then
# Warning: The git file creation date must not be missing. However, this is not important enough to stop the analysis.
# Therefore, it will only be a warning and subsequent queries will use a default date in these cases.
echo -e "${COLOR_YELLOW}importGit: Data verification warning: Git:File nodes with missing createdAtEpoch property detected! Affected number of nodes:${COLOR_DEFAULT}"
echo -e "${COLOR_YELLOW}${dataVerificationResult}${COLOR_DEFAULT}"
# Since this is now only a warning, execution will be continued.
fi
}

postGitLogImport() {
Expand Down
2 changes: 1 addition & 1 deletion scripts/prepareAnalysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ execute_cypher_summarized "${METRICS_CYPHER_DIR}/Set_Outgoing_Java_Package_Depen

# Preparation - Language agnostic node properties "dependencyDegree", "dependencyDegreeWeighted", "dependencyDegreeRank"
execute_cypher_summarized "${METRICS_CYPHER_DIR}/Set_Dependency_Degree.cypher"
execute_cypher_summarized "${METRICS_CYPHER_DIR}/Set_Dependency_DegreeRank.cypher"
execute_cypher_summarized "${METRICS_CYPHER_DIR}/Set_Dependency_Degree_Rank.cypher"

# Preparation - Add Java Method node property "declaringType"
execute_cypher "${TYPES_CYPHER_DIR}/Set_declaring_type_on_method_nodes.cypher"
Expand Down
2 changes: 1 addition & 1 deletion scripts/profiles/Neo4j-latest-continue-on-scan-errors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"}

JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.9.0"}
JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"}
JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4j-latest-jqassistant-continue-on-error.yaml"}
JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant-continue-on-error.yaml"}
21 changes: 21 additions & 0 deletions scripts/profiles/Neo4j-latest-high-memory.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash

# Sets all settings variables for an analysis with Neo4j v5.x (newest version as of june 2023).
# The chosen settings are tested to be compatible and working.

NEO4J_VERSION=${NEO4J_VERSION:-"2026.01.4"} # Neo4j Graph Database Version. Current versions: >= 2025.03.0. Version 4.4.42 and 5.26.5 are the previous LTS (long term support) versions as of April 2025.
NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/neo4j/tx/commit"}
NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j-high-memory.conf"}

# Awesome Procedures (APOC) Plugin for Neo4j
NEO4J_APOC_PLUGIN_VERSION=${NEO4J_APOC_PLUGIN_VERSION:-"2026.01.4"}
NEO4J_APOC_PLUGIN_EDITION=${NEO4J_APOC_PLUGIN_EDITION:-"core"}
NEO4J_APOC_PLUGIN_GITHUB=${NEO4J_APOC_PLUGIN_GITHUB:-"neo4j/apoc"}

NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.26.0"}
NEO4J_OPEN_GDS_PLUGIN_VERSION=${NEO4J_OPEN_GDS_PLUGIN_VERSION:-"2.26.0"}
NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"}

JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.9.0"}
JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"}
JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4j-remote-jqassistant.yaml"}
2 changes: 1 addition & 1 deletion scripts/profiles/Neo4jv5-continue-on-scan-errors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"}

JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.7.0-RC1"}
JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"}
JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4j-latest-jqassistant-continue-on-error.yaml"}
JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant-continue-on-error.yaml"}
143 changes: 143 additions & 0 deletions scripts/testFilenameReferences.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash

# Tests: scan all *.sh files (current directory including subdirectories)
# for occurrences of cypher and other filename references without paths and ensure a file with
# the referenced basename exists somewhere in the tree.

# Fail on any error
set -o errexit

# Fail if any command in a pipeline fails (not just the last one)
if set -o pipefail 2>/dev/null; then
set -o pipefail
fi

SCRIPT_NAME="testCypherReferences.sh"
COLOR_ERROR='\033[0;31m'
COLOR_DE_EMPHASIZED='\033[0;90m'
COLOR_SUCCESSFUL="\033[0;32m"
COLOR_DEFAULT='\033[0m'

# Determine this scripts dir (POSIX-friendly)
SCRIPTS_DIR=${SCRIPTS_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)}

tearDown() {
rm -rf "${temporaryTestDirectory}"
}

successful() {
echo -e "${COLOR_DE_EMPHASIZED}${SCRIPT_NAME}:${COLOR_DEFAULT} ${COLOR_SUCCESSFUL}✅ Tests finished successfully.${COLOR_DEFAULT}"
tearDown
}

info() {
local infoMessage="${1}"
echo -e "${COLOR_DE_EMPHASIZED}${SCRIPT_NAME}:${COLOR_DEFAULT} ${infoMessage}"
}

fail() {
local errorMessage="${1}"
echo -e "${COLOR_DE_EMPHASIZED}${SCRIPT_NAME}: ${COLOR_ERROR}${errorMessage}${COLOR_DEFAULT}"
tearDown
return 1
}

printTestLogFileContent() {
local logFileName="${temporaryTestDirectory}/${SCRIPT_NAME}-${reference_extension}.log"
if [ -f "${logFileName}" ]; then
local logFileContent
logFileContent=$(cat "${logFileName}")
# Remove common color codes for readability
echo -e "${COLOR_DE_EMPHASIZED}${logFileContent}${COLOR_DEFAULT}"
else
echo -e "${COLOR_ERROR}No log file found at expected location: %s${COLOR_DEFAULT}" "${logFileName}"
fi
}

find_missing_file_references() {
# Capture stdout/stderr into a log file for this run
reference_extension="$1"
{
missing_file="${temporaryTestDirectory}/missing_${reference_extension}_references.txt"
: > "${missing_file}"

# Collect all reference filenames without path present in the repo
reference_filenames="${temporaryTestDirectory}/reference_${reference_extension}_filenames.txt"
find . \
\( -type d \( -name "temp" -o -name ".git" -o -name "node_modules" \) -prune \) -o \
\( -type f -name "*.${reference_extension}" -print \) \
2>/dev/null | sed 's#.*/##' > "${reference_filenames}"

echo "pnpm-lock.yaml" >> "${reference_filenames}" # Ignore pnpm-lock file references by pretending it exists since it is not checked in but generated by users locally.

# Iterate over all shell scripts
find . \
\( -type d \( -name "temp" -o -name ".git" -o -name "node_modules" \) -prune \) -o \
\( -type f -name '*.sh' ! -name "test*" -print0 \) \
| while IFS= read -r -d '' script_file; do
# Skip this test file itself
if [ "$(basename "${script_file}")" = "${SCRIPT_NAME}" ]; then
continue
fi

# Use awk to extract all reference file name extension matches from non-comment lines
awk -v reference_extension="${reference_extension}" '
/^[[:space:]]*#/ { next }
{
line = $0
pattern = "(/[^/[:space:]\"]+\\." reference_extension ")|(\"[^/[:space:]\"]+\\." reference_extension "\")"

while (match(line, pattern)) {
ref = substr(line, RSTART, RLENGTH)
gsub(/^"|\"$/, "", ref) # remove surrounding quotes
gsub(/^\//, "", ref) # remove leading slash

if (ref !~ /\$/ && ref !~ /\*/) {
print ref
}
line = substr(line, RSTART + RLENGTH)
}
}
' "${script_file}" | while IFS= read -r reference_file; do
[ -z "${reference_file}" ] && continue
reference_filename=$(basename "${reference_file}")
if ! grep -Fx -- "${reference_filename}" "${reference_filenames}" >/dev/null 2>&1; then
printf '%s\t%s\n' "${script_file#./}" "${reference_file}" >> "${missing_file}"
fi
done
done
} > "${temporaryTestDirectory}/${SCRIPT_NAME}-${reference_extension}.log" 2>&1

if [ -s "${missing_file}" ]; then
echo -e "${COLOR_DE_EMPHASIZED}${SCRIPT_NAME}:${COLOR_ERROR} ERROR: Missing referenced ${reference_extension} files (by basename):${COLOR_DEFAULT}"
awk -F"\t" '{ printf(" - In %s -> referenced %s\n", $1, $2) }' "${missing_file}"

# Print content of reference_filenames for debugging purposes
# echo -e "\n${COLOR_DE_EMPHASIZED}List of all ${reference_extension} files found in the repository (by basename):${COLOR_DEFAULT}"
# cat "${reference_filenames}" | while IFS= read -r filename; do
# echo -e " - ${filename}"
# done
# echo ""

printTestLogFileContent
fail "${test_case_number}.) ❌ Test failed. Missing referenced ${reference_extension} files found. See details above."
fi
}

info "Starting tests...."

temporaryTestDirectory=$(mktemp -d 2>/dev/null || mktemp -d -t "temporaryTestDirectory_${SCRIPT_NAME}")
mkdir -p "${temporaryTestDirectory}"

# ------- Integration-style Test Case
test_case_number=1
info "${test_case_number}.) Scan all .sh files for '/...*.cypher' references and verify filenames exist."
find_missing_file_references "cypher"

# ------- Integration-style Test Case
test_case_number=2
info "${test_case_number}.) Scan all .sh files for '/...*.yaml' references and verify filenames exist."
find_missing_file_references "yaml"

successful
return 0
Loading