Skip to content

Commit

Permalink
Changed the implementation of step 3 in checking for resolved hyperlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
Reuben Tan committed Mar 26, 2018
1 parent f5df139 commit 1975fe7
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
public class resolveHyperlinks {
private String file = "/shared/preprocessed/wikiparser/unifiedParserOutput/RedirectTitle2ResolvedTitle.txt";
private static String unresolvedOutput = "/shared/preprocessed/wikiparser/unifiedParserOutput/unresolvedHyperlinks.txt";
private static Set<String> unresolvedTitles = new HashSet<String>();
private Set<String> resolvedTitles;
private Map<String, String> redirectToResolved;

Expand Down Expand Up @@ -45,6 +47,22 @@ private void parseMap(){
}
}

public static void writeUnresolvedTitles(){
File unresolvedFile = new File(unresolvedOutput);
try{
FileWriter fw = new FileWriter(unresolvedFile.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
for(String title : unresolvedTitles){
bw.write(title + "\n");
}
bw.close();
}
catch (IOException e){
e.printStackTrace();
System.exit(-1);
}
}

public Map<List<Integer>, String> resolve(Map<List<Integer>, String> hyperlinks){
Set<List<Integer>> keys = hyperlinks.keySet();
for(List<Integer> i : keys){
Expand All @@ -66,6 +84,13 @@ public Map<List<Integer>, String> resolve(Map<List<Integer>, String> hyperlinks)
} else{
// Capitalizes first letter
String cap_title = title.substring(0, 1).toUpperCase() + title.substring(1);
if(resolvedTitles.contains(cap_title)){
hyperlinks.replace(i, cap_title);
continue;
} else if(redirectToResolved.containsKey(title)){
hyperlinks.replace(i, redirectToResolved.get(cap_title));
continue;
}
// Capitalizes all letters directly succeeding '_'
for(int c = 1; c < cap_title.length(); c++){
if(cap_title.charAt(c) == '_'){
Expand All @@ -79,6 +104,8 @@ public Map<List<Integer>, String> resolve(Map<List<Integer>, String> hyperlinks)
hyperlinks.replace(i, cap_title);
} else if(redirectToResolved.containsKey(title)){
hyperlinks.replace(i, redirectToResolved.get(cap_title));
} else{
unresolvedTitles.add(title);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public void extractWiki(){
}
logger.log.info("Total Files: " + Integer.toString(totalFiles));
System.out.println("[#] Total Files: " + totalFiles);
while(parser.getActiveCount() > 0){
// wait
}
resolveHyperlinks.writeUnresolvedTitles();
}

public static void main(String [] args){
Expand Down

0 comments on commit 1975fe7

Please sign in to comment.