Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions conda-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ dependencies:
- pandas=2.2.3
- pip=25.0.1
- setuptools=80.9.0 # opentsne uses sklearn.base uses joblib uses distutils missing in Python >= 12 (TODO use native openTSNE?)
- typing-extensions=4.15.0 # Also needed for opentsne and Python >= 3.12
- opentsne=1.0.4 # to visualize node embeddings in 2D (t-SNE dimensionality reduction)
- wordcloud=1.9.4
- monotonic=1.6
- plotly=6.5.0
Expand Down
28 changes: 28 additions & 0 deletions cypher/Node_Embeddings/Node_Embeddings_0b_Prepare_Degree.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Node Embeddings 0b: Prepare: Calculate Degree Property.

CALL gds.degree.mutate(
$dependencies_projection + '-cleaned', {
orientation: 'UNDIRECTED'
,relationshipWeightProperty: CASE $dependencies_projection_weight_property WHEN '' THEN null ELSE $dependencies_projection_weight_property END
,mutateProperty: 'degreeForNodeEmbeddings'
})
YIELD nodePropertiesWritten
,preProcessingMillis
,computeMillis
,mutateMillis
,postProcessingMillis
,centralityDistribution
RETURN nodePropertiesWritten
,preProcessingMillis
,computeMillis
,mutateMillis
,postProcessingMillis
,centralityDistribution.min
,centralityDistribution.mean
,centralityDistribution.max
,centralityDistribution.p50
,centralityDistribution.p75
,centralityDistribution.p90
,centralityDistribution.p95
,centralityDistribution.p99
,centralityDistribution.p999
21 changes: 21 additions & 0 deletions cypher/Node_Embeddings/Node_Embeddings_0c_Drop_Model.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Node Embeddings 0b: Prepare: Calculate Degree Property.

CALL gds.model.drop($dependencies_projection + '-graphSAGE', false)
YIELD modelName,
modelType,
modelInfo,
creationTime,
trainConfig,
graphSchema,
loaded,
stored,
published
RETURN modelName,
modelType,
modelInfo,
creationTime,
trainConfig,
graphSchema,
loaded,
stored,
published
27 changes: 27 additions & 0 deletions cypher/Node_Embeddings/Node_Embeddings_4b_GraphSAGE_Train.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Node Embeddings 4c using GraphSAGE (Graph Neural Networks): Train. Requires: "Node_Embeddings_0b_Prepare_Degree.cypher".

CALL gds.beta.graphSage.train(
$dependencies_projection + '-cleaned', {
modelName: $dependencies_projection + '-graphSAGE'
,featureProperties: ['degreeForNodeEmbeddings']
,embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
,relationshipWeightProperty: CASE $dependencies_projection_weight_property WHEN '' THEN null ELSE $dependencies_projection_weight_property END
,batchSize: 64
,activationFunction: 'relu'
,sampleSizes: [25, 20, 20, 10]
//,aggregator: 'pool'
//,epochs: 10
//,penaltyL2: 0.0000001
//,tolerance: 0.0001
//,learningRate: 0.1
//,searchDepth: 5
,randomSeed: 47
}
)
YIELD modelInfo AS info, trainMillis
RETURN
info.modelName AS modelName,
info.metrics.didConverge AS didConverge,
info.metrics.ranEpochs AS ranEpochs,
info.metrics.epochLosses AS epochLosses,
trainMillis AS trainingTimeMilliseconds
22 changes: 22 additions & 0 deletions cypher/Node_Embeddings/Node_Embeddings_4d_GraphSAGE_Stream.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Node Embeddings 4d using GraphSAGE: Stream. Requires "Add_file_name and_extension.cypher".

CALL gds.beta.graphSage.stream(
$dependencies_projection + '-cleaned', {
modelName: $dependencies_projection + '-graphSAGE'
}
)
YIELD nodeId, embedding
WITH gds.util.asNode(nodeId) AS codeUnit
,embedding
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
WITH *, artifact.name AS artifactName
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
RETURN DISTINCT
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
,codeUnit.name AS shortCodeUnitName
,elementId(codeUnit) AS nodeElementId
,coalesce(artifactName, projectName) AS projectName
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
,embedding
Loading
Loading