Skip to content

Commit 125d6b0

Browse files
committed
Add process file and Docker run for batch
1 parent 674d58e commit 125d6b0

File tree

7 files changed

+1371
-86
lines changed

7 files changed

+1371
-86
lines changed

.yarnrc.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nodeLinker: node-modules

Dockerfile

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Start with an official Node image
2+
FROM node:18
3+
4+
# Update the system and install necessary dependencies
5+
RUN apt-get update && \
6+
apt-get install -y parallel jq && \
7+
apt-get clean && \
8+
rm -rf /var/lib/apt/lists/*
9+
10+
# Create an app directory to hold the application code inside the image
11+
WORKDIR /usr/src/app
12+
13+
# Copy your package.json and package-lock.json (if you have one) into the container
14+
COPY package*.json ./
15+
16+
# Install your Node dependencies
17+
RUN npm install
18+
19+
# Copy your Node scripts into the container
20+
COPY fetchOsc.js ./
21+
COPY parser.js ./
22+
23+
# The main script to run the tasks
24+
COPY process.sh ./
25+
26+
# Give execute permissions to the script
27+
RUN chmod +x process.sh
28+
29+
# The command to run when the container starts
30+
ENTRYPOINT [ "./process.sh" ]

fetchOsc.js

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
const maxRetries = 10;
2+
const retryInterval = 5000;
3+
const fs = require("fs");
4+
const axios = require("axios");
5+
6+
const OSC_SOURCE_URL =
7+
"http://s3-eu-west-1.amazonaws.com/overpass-db-eu-west-1/augmented-diffs";
8+
9+
const sequenceNumber = process.argv[2];
10+
const outputFileName = process.argv[3];
11+
const url = `${OSC_SOURCE_URL}/${sequenceNumber}.osc`;
12+
13+
async function downloadFile(url, outputFileName) {
14+
const oscFile = fs.createWriteStream(outputFileName);
15+
try {
16+
const res = await axios({
17+
method: "get",
18+
url,
19+
responseType: "stream",
20+
});
21+
res.data.pipe(oscFile);
22+
} catch (err) {
23+
console.error(err);
24+
}
25+
}
26+
27+
downloadFile(url, outputFileName).then(() => {
28+
console.log(`Downloaded ${url} to ${outputFileName}`);
29+
});

package.json

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"author": "",
1010
"license": "ISC",
1111
"dependencies": {
12+
"axios": "^1.5.0",
1213
"osm-adiff-parser": "^1.1.0",
1314
"real-changesets-parser": "https://github.com/developmentseed/real-changesets-parser.git"
1415
}

parser.js

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,29 @@
1-
const adiffParser = require('osm-adiff-parser');
2-
const changesetParser = require('real-changesets-parser');
3-
const fs = require('fs');
1+
const adiffParser = require("osm-adiff-parser");
2+
const changesetParser = require("real-changesets-parser");
3+
const fs = require("fs");
44

55
// Read the file name from the command-line argument
66
const fileName = process.argv[2];
77

88
// Read the file
9-
fs.readFile(fileName, 'utf8', (err, data) => {
9+
fs.readFile(fileName, "utf8", (err, data) => {
1010
if (err) {
11-
console.error('Error reading the file:', err);
11+
console.error("Error reading the file:", err);
1212
return;
1313
}
14-
let DONE = false;
14+
let ranOnce = false;
1515
adiffParser(data, null, (err, result) => {
16-
if (DONE) { return };
17-
DONE = true;
16+
if (ranOnce) {
17+
return;
18+
}
19+
ranOnce = true;
1820
// console.log('keys', Object.keys(result));
1921
const featureCollection = {
20-
'type': 'FeatureCollection',
21-
'features': []
22+
type: "FeatureCollection",
23+
features: [],
2224
};
23-
Object.keys(result).forEach(changesetId => {
24-
result[changesetId].forEach(element => {
25+
Object.keys(result).forEach((changesetId) => {
26+
result[changesetId].forEach((element) => {
2527
const change = changesetParser.elementParser(element);
2628
// console.log('change', change);
2729
featureCollection.features = featureCollection.features.concat(change);
@@ -33,6 +35,4 @@ fs.readFile(fileName, 'utf8', (err, data) => {
3335
// console.log('changeset', changeset);
3436
// return result;
3537
});
36-
37-
3838
});

process.sh

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
3+
# Check if the necessary arguments were provided
4+
if [[ -z "$1" ]] || [[ -z "$2" ]]; then
5+
echo "Usage: $0 <start_number> <num_files>"
6+
exit 1
7+
fi
8+
9+
# Starting number provided as the first argument
10+
start_num=$1
11+
12+
# Number of files to fetch
13+
num_files=$2
14+
15+
# Calculate the end number
16+
end_num=$((start_num - num_files))
17+
18+
# Fetch .osc files using fetchOsc.js
19+
fetchOscFunction() {
20+
sequence_num=$1
21+
file_path="/tmp/${sequence_num}.osc"
22+
node fetchOsc.js $sequence_num $file_path
23+
}
24+
25+
export -f fetchOscFunction
26+
27+
seq $end_num $start_num | parallel fetchOscFunction
28+
29+
# Parse .osc files and collect into a single geojson using parser.js and jq
30+
echo '{ "type": "FeatureCollection", "features": [' > /tmp/final_geojson.json
31+
32+
echo 'Parsing files'
33+
for (( i=start_num; i>end_num; i-- )); do
34+
echo $i
35+
file_path="/tmp/${i}.osc"
36+
node parser.js $file_path | jq '.features[]' >> /tmp/final_geojson.json
37+
# Add a comma between geojsons except for the last one
38+
if [[ "$i" -ne "$((end_num+1))" ]]; then
39+
echo ',' >> /tmp/final_geojson.json
40+
fi
41+
done
42+
echo 'Done parsing files'
43+
44+
echo '] }' >> /tmp/final_geojson.json

0 commit comments

Comments
 (0)