Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new api for wikipedia image info #29

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion rest.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const { getImagesFromCommonsWithTitle } = require('./wikimedia-commons');
const { getImagesEuropeana } = require('./europeana');
const { getWikidata } = require('./wikidata');
const { getWikidataByLatLon } = require('./wikidata-latlon');
const { findWikidataItemFromWikipedia, getWikipediaData } = require('./wikipedia');
const { findWikidataItemFromWikipedia, getWikipediaData, getImageInfoFromWikipedia } = require('./wikipedia');

//does deprecating bodyParser make something dysfunctional?
const urlencodedParser = bodyParser.urlencoded({ extended: false });
Expand Down Expand Up @@ -109,6 +109,18 @@ app.get('/wiki', asyncMiddleware(async function(req, res) {
}));


app.get('/wiki/imageinfo', asyncMiddleware(async function(req, res) {
console.log(req.originalUrl);

const language = req.query.language;
const titles = req.query.titles;

const wikiImageInfo = await getImageInfoFromWikipedia(language, titles);

res.send({
wikiImageInfo
});
}));
app.get('/wiki/items/by/latlon', asyncMiddleware(async function(req, res) {

console.log(req.originalUrl);
Expand Down
129 changes: 76 additions & 53 deletions wikimedia-commons.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,65 +17,88 @@ const requestConfigTemplate = {
};

module.exports = {
async getImagesFromCommonsWithTitle(topic, commonsCategory) {
let requestConfig = requestConfigTemplate;
getImagesFromCommonsWithTitle,
getImageFromPage
};

if (commonsCategory !== undefined) {
requestConfig.params.generator = 'categorymembers';
requestConfig.params.gcmtype = 'file';
requestConfig.params.gcmtitle = 'Category:' + commonsCategory;
requestConfig.params.gcmlimit = 30;
} else {
requestConfig.params.generator = 'search';
requestConfig.params.gsrsearch = topic;
requestConfig.params.gsrlimit = 30;
requestConfig.params.gsrnamespace = 6;
}
function getImageFromPage(page, source){
if (!page.imageinfo){
return null;
}
const extMetadata = page.imageinfo[0].extmetadata;
let image = {
id: page.pageid,
source: source,
imageURL: page.imageinfo[0].url,
thumbURL: page.imageinfo[0].thumburl,
title: [],
creators: [],
uploader: page.imageinfo[0].user,
institutions: [],
infoURL: page.imageinfo[0].descriptionurl,
location: null,
geoLocations: [],
year: null,
license: '',
license_link: null,
description: [],
datecreated: [],
downloadURL: page.imageinfo[0].url,
};

const response = await axios.request(requestConfig);
image.title.push(page.title.replace('File:', '').replace(/\.[^/.]+$/, ''));

if (!response.data.query || !response.data.query.pages) {
return [];
}

return Object.keys(response.data.query.pages).map(p => {
const page = response.data.query.pages[p];
let image = {
id: page.pageid,
source: 'Wikimedia Commons',
imageURL: page.imageinfo[0].url,
thumbURL: page.imageinfo[0].thumburl,
title: [],
creators: [],
uploader: page.imageinfo[0].user,
institutions: [],
infoURL: page.imageinfo[0].descriptionurl,
location: null,
geoLocations: [],
year: null,
license: '',
};
if (extMetadata.GPSLatitude !== undefined && extMetadata.GPSLongitude !== undefined) {
image.geoLocations.push('POINT(' + extMetadata.GPSLongitude.value + ' ' + extMetadata.GPSLatitude.value + ')')
}

const extMetadata = page.imageinfo[0].extmetadata;
image.title.push(page.title.replace('File:', '').replace(/\.[^/.]+$/, ''));
if (extMetadata.DateTimeOriginal !== undefined) {
const dateString = extMetadata.DateTimeOriginal.value;
image.datecreated.push(dateString);
const year = parseInt(dateString.substr(0, 4), 10);
if (year !== NaN) {
image.year = year;
}
}
if (extMetadata.Artist) {
image.creators.push(extMetadata.Artist.value);
}
if (extMetadata.ImageDescription) {
image.description.push(extMetadata.ImageDescription.value);
}
if (extMetadata.LicenseShortName !== undefined) {
image.license = extMetadata.LicenseShortName.value;
}
if (extMetadata.LicenseUrl !== undefined) {
image.license_link = extMetadata.LicenseUrl.value;
}

return image;
}

if (extMetadata.GPSLatitude !== undefined && extMetadata.GPSLongitude !== undefined) {
image.geoLocations.push('POINT(' + extMetadata.GPSLongitude.value + ' ' + extMetadata.GPSLatitude.value + ')')
}
async function getImagesFromCommonsWithTitle(topic, commonsCategory) {
let requestConfig = requestConfigTemplate;

if (extMetadata.DateTimeOriginal !== undefined) {
const dateString = extMetadata.DateTimeOriginal.value;
const year = parseInt(dateString.substr(0, 4), 10);
if (year !== NaN) {
image.year = year;
}
}
if (commonsCategory !== undefined) {
requestConfig.params.generator = 'categorymembers';
requestConfig.params.gcmtype = 'file';
requestConfig.params.gcmtitle = 'Category:' + commonsCategory;
requestConfig.params.gcmlimit = 30;
} else {
requestConfig.params.generator = 'search';
requestConfig.params.gsrsearch = topic;
requestConfig.params.gsrlimit = 30;
requestConfig.params.gsrnamespace = 6;
}

if (extMetadata.LicenseShortName !== undefined) {
image.license = extMetadata.LicenseShortName.value;
}
const response = await axios.request(requestConfig);

return image;
});
if (!response.data.query || !response.data.query.pages) {
return [];
}
};

return Object.keys(response.data.query.pages).map(p => {
const page = response.data.query.pages[p];
return getImageFromPage(page, 'Wikimedia Commons');
});
}
56 changes: 56 additions & 0 deletions wikipedia.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
const axios = require('axios');
const cheerio = require('cheerio');
const { getImageFromPage } = require('./wikimedia-commons');

module.exports = {
findWikidataItemFromWikipedia,
getWikipediaData,
getImageInfoFromWikipedia,
};

async function findWikidataItemFromWikipedia(language, topic) {
Expand Down Expand Up @@ -38,6 +40,60 @@ async function findWikidataItemFromWikipedia(language, topic) {
return null;
}

// input the title of an image, returns the first image item with metadata in the api results
async function getImageInfoFromWikipedia(language, titles) {

titleString = decodeURIComponent(titles.join("|"));
var requestConfig = {
baseURL: "https://" + language + ".wikipedia.org/w/api.php",
method: "get",
responseType: "json",
headers: {
'Api-User-Agent': process.env.WIKIDOCUMENTARIES_API_USER_AGENT
},
params: {
action: "query",
prop: "imageinfo",
titles: titleString,
format: "json",
iiprop: "url|extmetadata",
iiextmetadatalanguage: language,
}
};
const response = await axios.request(requestConfig);
if (response.data) {
let titleChanges = response.data.query.normalized;
const titleChangesMap = new Map();
for (var titleChange of titleChanges){
titleChangesMap.set(titleChange.to, titleChange.from);
}
const keys = [Object.keys(response.data.query.pages)][0];
const pages = response.data.query.pages;
const orderedPages = Array(titles.length);
const decodeTitle = [];
for (var title of titles){
title = decodeURIComponent(title);
decodeTitle.push(title);
}
for (var key of keys){
let currImgTitle = pages[key]["title"];
if (titleChangesMap.has(currImgTitle)){
currImgTitle = titleChangesMap.get(currImgTitle);
}
let index = decodeTitle.indexOf(currImgTitle);

orderedPages[index] = pages[key];

}
const images = [];
for (var page of orderedPages){
images.push(getImageFromPage(page, 'Wikipedia'));
}
return images;
}
return null;
}

async function getWikipediaData(language, topic) {

const encodedLanguage = language && encodeURIComponent(language);
Expand Down