Skip to content

Commit 990e51a

Browse files
authored
Merge pull request #503 from JohT/feature/improve-node-embeddings
Improve Node Embeddings
2 parents 6c4e94c + ca6d81f commit 990e51a

8 files changed

Lines changed: 1096 additions & 228 deletions

conda-environment.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ dependencies:
1515
- pandas=2.2.3
1616
- pip=25.0.1
1717
- setuptools=80.9.0 # opentsne uses sklearn.base uses joblib uses distutils missing in Python >= 12 (TODO use native openTSNE?)
18-
- typing-extensions=4.15.0 # Also needed for opentsne and Python >= 3.12
19-
- opentsne=1.0.4 # to visualize node embeddings in 2D (t-SNE dimensionality reduction)
2018
- wordcloud=1.9.4
2119
- monotonic=1.6
2220
- plotly=6.5.0
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Node Embeddings 0b: Prepare: Calculate Degree Property.
2+
3+
CALL gds.degree.mutate(
4+
$dependencies_projection + '-cleaned', {
5+
orientation: 'UNDIRECTED'
6+
,relationshipWeightProperty: CASE $dependencies_projection_weight_property WHEN '' THEN null ELSE $dependencies_projection_weight_property END
7+
,mutateProperty: 'degreeForNodeEmbeddings'
8+
})
9+
YIELD nodePropertiesWritten
10+
,preProcessingMillis
11+
,computeMillis
12+
,mutateMillis
13+
,postProcessingMillis
14+
,centralityDistribution
15+
RETURN nodePropertiesWritten
16+
,preProcessingMillis
17+
,computeMillis
18+
,mutateMillis
19+
,postProcessingMillis
20+
,centralityDistribution.min
21+
,centralityDistribution.mean
22+
,centralityDistribution.max
23+
,centralityDistribution.p50
24+
,centralityDistribution.p75
25+
,centralityDistribution.p90
26+
,centralityDistribution.p95
27+
,centralityDistribution.p99
28+
,centralityDistribution.p999
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Node Embeddings 0b: Prepare: Calculate Degree Property.
2+
3+
CALL gds.model.drop($dependencies_projection + '-graphSAGE', false)
4+
YIELD modelName,
5+
modelType,
6+
modelInfo,
7+
creationTime,
8+
trainConfig,
9+
graphSchema,
10+
loaded,
11+
stored,
12+
published
13+
RETURN modelName,
14+
modelType,
15+
modelInfo,
16+
creationTime,
17+
trainConfig,
18+
graphSchema,
19+
loaded,
20+
stored,
21+
published
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Node Embeddings 4c using GraphSAGE (Graph Neural Networks): Train. Requires: "Node_Embeddings_0b_Prepare_Degree.cypher".
2+
3+
CALL gds.beta.graphSage.train(
4+
$dependencies_projection + '-cleaned', {
5+
modelName: $dependencies_projection + '-graphSAGE'
6+
,featureProperties: ['degreeForNodeEmbeddings']
7+
,embeddingDimension: toInteger($dependencies_projection_embedding_dimension)
8+
,relationshipWeightProperty: CASE $dependencies_projection_weight_property WHEN '' THEN null ELSE $dependencies_projection_weight_property END
9+
,batchSize: 64
10+
,activationFunction: 'relu'
11+
,sampleSizes: [25, 20, 20, 10]
12+
//,aggregator: 'pool'
13+
//,epochs: 10
14+
//,penaltyL2: 0.0000001
15+
//,tolerance: 0.0001
16+
//,learningRate: 0.1
17+
//,searchDepth: 5
18+
,randomSeed: 47
19+
}
20+
)
21+
YIELD modelInfo AS info, trainMillis
22+
RETURN
23+
info.modelName AS modelName,
24+
info.metrics.didConverge AS didConverge,
25+
info.metrics.ranEpochs AS ranEpochs,
26+
info.metrics.epochLosses AS epochLosses,
27+
trainMillis AS trainingTimeMilliseconds
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Node Embeddings 4d using GraphSAGE: Stream. Requires "Add_file_name and_extension.cypher".
2+
3+
CALL gds.beta.graphSage.stream(
4+
$dependencies_projection + '-cleaned', {
5+
modelName: $dependencies_projection + '-graphSAGE'
6+
}
7+
)
8+
YIELD nodeId, embedding
9+
WITH gds.util.asNode(nodeId) AS codeUnit
10+
,embedding
11+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
12+
WITH *, artifact.name AS artifactName
13+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
14+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
15+
RETURN DISTINCT
16+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
17+
,codeUnit.name AS shortCodeUnitName
18+
,elementId(codeUnit) AS nodeElementId
19+
,coalesce(artifactName, projectName) AS projectName
20+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
21+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
22+
,embedding

0 commit comments

Comments
 (0)