forked from merenlab/merenlab.org
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpubs.py
144 lines (110 loc) · 4.46 KB
/
pubs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -*- coding: utf-8 -*-
# an ugly hack to convert some stuff into other stuff...
import sys
# people who have links
author_links = {}
my_people = ['Eren, A. M', 'Delmont, T. O.', 'Esen, Ö. C.']
keep_pubs_after = 2009
pubs_dict = {}
journals_list = []
authors_list = []
recent_authors_list = []
# takes an EndNote library exported as a TXT file. here is a sample line from this txt file:
#
# Winterberg, K. M., and Reznikoff, W. S. (2007). "Screening transposon mutant libraries using full-genome oligonucleotide microarrays." Methods Enzymol, 421, 110-25.
#
f = open('pubs.txt')
bad_entries = []
def get_author_links(authors_str):
for author in my_people:
authors_str = authors_str.replace(author, '<span class="pub-member-author">%s</span>' % (author))
return authors_str
for line in [l.strip() for l in f.readlines()]:
if line.find('(ed.)') > 0 or line.find('(eds.)') > 0:
bad_entries.append((line, 'ed/eds. found...'), )
continue
p_s = line.find(' (')
p_e = p_s + 6
if not p_s > 0:
bad_entries.append((line, 'p_s <= 0...'), )
continue
if not line[p_e] == ')':
bad_entries.append((line, 'p_e != )...'), )
continue
year = int(line[p_s + 2:p_e])
if year < keep_pubs_after:
bad_entries.append((line, 'year >= keep_pubs_after...'), )
continue
authors = line[0: p_s]
q_s = line.find(' "', p_e)
if not q_s > 0:
bad_entries.append((line, 'q_s <= 0...'), )
continue
q_e = line.find('."', q_s)
if not q_e > 0:
q_e = line.find('?"', q_s)
if not q_e > 0:
bad_entries.append((line, 'q_e <= 0...'), )
continue
title = line[q_s + 2:q_e + 1]
c = line.find(', ', q_e + 2)
if not c > 0:
bad_entries.append((line, 'c <= 0...'), )
continue
journal = line[q_e + 3:c]
issue = line[c + 2:-1]
# ad hoc fixes for journal names
journal = journal.replace('The ISME journal', 'ISME J')
journal = journal.replace('Proceedings of the National Academy of Sciences of the United States of America', 'Proc Natl Acad Sci U S A')
journal = journal.replace('Proceedings of the National Academy of Sciences', 'Proc Natl Acad Sci U S A')
journal = journal.replace('Frontiers in Microbiology', 'Front Microbiol')
journals_list.append(journal)
authors = authors.replace('Esen, Ö.,', 'Esen, Ö. C.,')
authors = authors.replace('Murat Eren, A.,', 'Eren, A. M.,')
if not pubs_dict.has_key(year):
pubs_dict[year] = [{'authors': authors, 'title': title, 'journal': journal, 'issue': issue}]
else:
pubs_dict[year].append({'authors': authors, 'title': title, 'journal': journal, 'issue': issue})
if authors.count(',') == 1:
authors_list.append(authors)
if year > 2004:
recent_authors_list.append(authors)
else:
for author in [a + '.' if not a.endswith('.') else a for a in authors.replace('and ', '').split('., ')]:
authors_list.append(author)
if year > 2004:
recent_authors_list.append(author)
# check for failed entries
if len(bad_entries):
print "Some entries failed. Quitting."
print
for tpl in bad_entries:
print ' - Failed (reason: "%s"): %s' % (tpl[1], tpl[0])
sys.exit()
years = ''.join(['<a href="#%s"><span class="category-item">%s <small>(%d)</small></span></a>' % (y, y, len(pubs_dict[y])) for y in sorted(pubs_dict.keys(), reverse=True)])
top_journals = ", ".join(['<b>%s</b> (<i>%d</i>)' % (x[1], x[0]) for x in sorted([(journals_list.count(journal), journal) for journal in set(journals_list)], reverse = True)[0:25]])
print """---
layout: publications
modified: 2015-02-05
comments: false
---
"""
# print "<h1>Journals</h1>"
# print top_journals
# print
print '<div class="category-box">'
print years
print '</div>'
print
for year in sorted(pubs_dict.keys(), reverse=True):
print '<a name="%s"> </a>' % year
print '<h1>%s</h1>' % year
print
for pub in pubs_dict[year]:
print '<div class="pub">'
print ' <h3><a href="http://scholar.google.com/scholar?hl=en&q=%s" target="_new">%s</a></h3>' % ('http://scholar.google.com/scholar?hl=en&q=%s' % (pub['title'].replace(' ', '+')), pub['title'])
print ' <span class="pub-authors">%s</span>' % get_author_links(pub['authors'])
print ' <span class="pub-journal"><i>%s</i>. <b>%s</b></span>' % (pub['journal'], pub['issue'])
print '</div>'
print
print