Skip to content

Commit

Permalink
Implemented turf-clusters-dbscan with spatial data structure (fix Tur…
Browse files Browse the repository at this point in the history
…fjs#2492) (Turfjs#2497)

* feat: implement turf-clusters-dbscan

* Import 'RBush' for spatial indexing

* Removed 'dbscan-clustering'

* Reimplemented DBSCAN for performance

* Test

* fix: write comments

* Write more comments

* Improved region query (Turfjs#2492)

* Smaller bounding box for region query with RBush

* Bulk-Insertion for RBush (Turfjs#2492)

* Use Bulk-Insertion for adding data to RBush tree

* Slightly improved performance of region query

* Handle the case where the bounding box crosses the poles (Turfjs#2492)

* Fix incorrect bounding box (Turfjs#2492)

* Fix incorrect bounding box

* updated packages for turf-clusters-dbscan

 - remove unused packages
 - add rbush

---------

Co-authored-by: James Beard <[email protected]>
  • Loading branch information
TadaTeruki and smallsaucepan authored Nov 25, 2023
1 parent c8b37e8 commit 407619b
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 32 deletions.
153 changes: 123 additions & 30 deletions packages/turf-clusters-dbscan/index.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import { GeoJsonProperties, FeatureCollection, Point } from "geojson";
import clone from "@turf/clone";
import distance from "@turf/distance";
import { coordAll } from "@turf/meta";
import { convertLength, Units } from "@turf/helpers";
import clustering from "density-clustering";
import { degreesToRadians, lengthToDegrees, Units } from "@turf/helpers";
import RBush from "rbush";

export type Dbscan = "core" | "edge" | "noise";
export type DbscanProps = GeoJsonProperties & {
dbscan?: Dbscan;
cluster?: number;
};

// Structure of a point in the spatial index
type IndexedPoint = {
minX: number;
minY: number;
maxX: number;
maxY: number;
index: number;
};

/**
* Takes a set of {@link Point|points} and partition them into clusters according to {@link DBSCAN's|https://en.wikipedia.org/wiki/DBSCAN} data clustering algorithm.
*
Expand Down Expand Up @@ -53,37 +61,122 @@ function clustersDbscan(
if (options.mutate !== true) points = clone(points);

// Defaults
options.minPoints = options.minPoints || 3;

// create clustered ids
var dbscan = new clustering.DBSCAN();
var clusteredIds = dbscan.run(
coordAll(points),
convertLength(maxDistance, options.units),
options.minPoints,
distance
const minPoints = options.minPoints || 3;

// Calculate the distance in degrees for region queries
const latDistanceInDegrees = lengthToDegrees(maxDistance, options.units);

// Create a spatial index
var tree = new RBush(points.features.length);

// Keeps track of whether a point has been visited or not.
var visited = points.features.map((_) => false);

// Keeps track of whether a point is assigned to a cluster or not.
var assigned = points.features.map((_) => false);

// Keeps track of whether a point is noise|edge or not.
var isnoise = points.features.map((_) => false);

// Keeps track of the clusterId for each point
var clusterIds: number[] = points.features.map((_) => -1);

// Index each point for spatial queries
tree.load(
points.features.map((point, index) => {
var [x, y] = point.geometry.coordinates;
return {
minX: x,
minY: y,
maxX: x,
maxY: y,
index: index,
} as IndexedPoint;
})
);

// Tag points to Clusters ID
var clusterId = -1;
clusteredIds.forEach(function (clusterIds) {
clusterId++;
// assign cluster ids to input points
clusterIds.forEach(function (idx) {
var clusterPoint = points.features[idx];
if (!clusterPoint.properties) clusterPoint.properties = {};
clusterPoint.properties.cluster = clusterId;
clusterPoint.properties.dbscan = "core";
});
// Function to find neighbors of a point within a given distance
const regionQuery = (index: number): IndexedPoint[] => {
const point = points.features[index];
const [x, y] = point.geometry.coordinates;

const minY = Math.max(y - latDistanceInDegrees, -90.0);
const maxY = Math.min(y + latDistanceInDegrees, 90.0);

const lonDistanceInDegrees = (function () {
// Handle the case where the bounding box crosses the poles
if (minY < 0 && maxY > 0) {
return latDistanceInDegrees;
}
if (Math.abs(minY) < Math.abs(maxY)) {
return latDistanceInDegrees / Math.cos(degreesToRadians(maxY));
} else {
return latDistanceInDegrees / Math.cos(degreesToRadians(minY));
}
})();

const minX = Math.max(x - lonDistanceInDegrees, -360.0);
const maxX = Math.min(x + lonDistanceInDegrees, 360.0);

// Calculate the bounding box for the region query
const bbox = { minX, minY, maxX, maxY };
return tree.search(bbox).filter((neighbor) => {
const neighborIndex = (neighbor as IndexedPoint).index;
const neighborPoint = points.features[neighborIndex];
const distanceInKm = distance(point, neighborPoint, {
units: "kilometers",
});
return distanceInKm <= maxDistance;
}) as IndexedPoint[];
};

// Function to expand a cluster
const expandCluster = (clusteredId: number, neighbors: IndexedPoint[]) => {
for (var i = 0; i < neighbors.length; i++) {
var neighbor = neighbors[i];
const neighborIndex = neighbor.index;
if (!visited[neighborIndex]) {
visited[neighborIndex] = true;
const nextNeighbors = regionQuery(neighborIndex);
if (nextNeighbors.length >= minPoints) {
neighbors.push(...nextNeighbors);
}
}
if (!assigned[neighborIndex]) {
assigned[neighborIndex] = true;
clusterIds[neighborIndex] = clusteredId;
}
}
};

// Main DBSCAN clustering algorithm
var nextClusteredId = 0;
points.features.forEach((_, index) => {
if (visited[index]) return;
const neighbors = regionQuery(index);
if (neighbors.length >= minPoints) {
const clusteredId = nextClusteredId;
nextClusteredId++;
visited[index] = true;
expandCluster(clusteredId, neighbors);
} else {
isnoise[index] = true;
}
});

// handle noise points, if any
// edges points are tagged by DBSCAN as both 'noise' and 'cluster' as they can "reach" less than 'minPoints' number of points
dbscan.noise.forEach(function (noiseId) {
var noisePoint = points.features[noiseId];
if (!noisePoint.properties) noisePoint.properties = {};
if (noisePoint.properties.cluster) noisePoint.properties.dbscan = "edge";
else noisePoint.properties.dbscan = "noise";
// Assign DBSCAN properties to each point
points.features.forEach((_, index) => {
var clusterPoint = points.features[index];
if (!clusterPoint.properties) {
clusterPoint.properties = {};
}

if (clusterIds[index] >= 0) {
clusterPoint.properties.dbscan = isnoise[index] ? "edge" : "core";
clusterPoint.properties.cluster = clusterIds[index];
} else {
clusterPoint.properties.dbscan = "noise";
}
});

return points as FeatureCollection<Point, DbscanProps>;
Expand Down
3 changes: 1 addition & 2 deletions packages/turf-clusters-dbscan/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
"devDependencies": {
"@turf/centroid": "^7.0.0-alpha.2",
"@turf/clusters": "^7.0.0-alpha.2",
"@types/density-clustering": "^1.3.3",
"@types/tape": "^4.2.32",
"benchmark": "^2.1.4",
"chromatism": "^3.0.0",
Expand All @@ -75,7 +74,7 @@
"@turf/distance": "^7.0.0-alpha.2",
"@turf/helpers": "^7.0.0-alpha.2",
"@turf/meta": "^7.0.0-alpha.2",
"density-clustering": "1.3.0",
"rbush": "^3.0.1",
"tslib": "^2.6.2"
}
}

0 comments on commit 407619b

Please sign in to comment.