Skip to content

Commit decc31d

Browse files
committed
Apply degree normalization to property part of initial random vectors
A seemingly strange/buggy behavior was reported. Somewhat simplified, it was expected that when nodeSelfInfluence > 0 and propertyRatio is high (0.8), that among (n1: {val: [0.1, 1.0]}), (n2: {val: [0.1, 1.0]}) and (n3: {val: [0.2, 1.0]}, it should "often" hold that SIM(emb(n1), emb(n2)) > SIM(emb(n1), emb(n3). Whether this expectation is reasonable is out of the scope of this commit, but to partly remedy this, something else was discovered and fixed: The initial random vector is split into two segments corresponding to embeddingDimension = baseEmbeddingDimension + propertyDimension. The the first segment, entries were scaled by the degree of the node to the power of `normalizationStrength`, however, the second segment of length propertyDimension was not scaled by this scaling factor. The current commit applies the scaling also to the second segment. This removes the effect of reduced influence of entries in the second segment, that is reduced node property influence, when normalizationStrength > 0 and deg(n) > 1.
1 parent d623614 commit decc31d

File tree

1 file changed

+9
-4
lines changed
  • algo/src/main/java/org/neo4j/gds/embeddings/fastrp

1 file changed

+9
-4
lines changed

algo/src/main/java/org/neo4j/gds/embeddings/fastrp/FastRP.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,37 +344,42 @@ public void run() {
344344

345345
float entryValue = scaling * sqrtSparsity / sqrtEmbeddingDimension;
346346
random.reseed(randomSeed ^ graph.toOriginalNodeId(nodeId));
347-
var randomVector = computeRandomVector(nodeId, random, entryValue);
347+
var randomVector = computeRandomVector(nodeId, random, entryValue, scaling);
348348
embeddingB.set(nodeId, randomVector);
349349
embeddingA.set(nodeId, new float[embeddingDimension]);
350350
});
351351
progressTracker.logProgress(partition.nodeCount());
352352
}
353353

354-
private float[] computeRandomVector(long nodeId, Random random, float entryValue) {
354+
private float[] computeRandomVector(long nodeId, Random random, float entryValue, float scaling) {
355355
var randomVector = new float[embeddingDimension];
356356
for (int i = 0; i < baseEmbeddingDimension; i++) {
357357
randomVector[i] = computeRandomEntry(random, entryValue);
358358
}
359359

360360
propertyVectorAdder.setRandomVector(randomVector);
361+
propertyVectorAdder.setScaling(scaling);
361362
FeatureExtraction.extract(nodeId, -1, featureExtractors, propertyVectorAdder);
362363

363364
return randomVector;
364365
}
365366

366367
private class PropertyVectorAdder implements FeatureConsumer {
367368
private float[] randomVector;
369+
private float scaling = 1.0f;
368370

369371
void setRandomVector(float[] randomVector) {
370372
this.randomVector = randomVector;
371373
}
374+
void setScaling(float scaling) {
375+
this.scaling = scaling;
376+
}
372377

373378
@Override
374379
public void acceptScalar(long ignored, int offset, double value) {
375380
float floatValue = (float) value;
376381
for (int i = baseEmbeddingDimension; i < embeddingDimension; i++) {
377-
randomVector[i] += floatValue * propertyVectors[offset][i - baseEmbeddingDimension];
382+
randomVector[i] += scaling * floatValue * propertyVectors[offset][i - baseEmbeddingDimension];
378383
}
379384
}
380385

@@ -384,7 +389,7 @@ public void acceptArray(long ignored, int offset, double[] values) {
384389
var value = (float) values[j];
385390
float[] propertyVector = propertyVectors[offset + j];
386391
for (int i = baseEmbeddingDimension; i < embeddingDimension; i++) {
387-
randomVector[i] += value * propertyVector[i - baseEmbeddingDimension];
392+
randomVector[i] += scaling * value * propertyVector[i - baseEmbeddingDimension];
388393
}
389394
}
390395
}

0 commit comments

Comments
 (0)