diff --git a/Procfile b/Procfile index 115328b..6644e1f 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -worker: python neotomabot.py \ No newline at end of file +worker: python3 neotomabot.py \ No newline at end of file diff --git a/c b/c deleted file mode 100644 index 2d75265..0000000 Binary files a/c and /dev/null differ diff --git a/neotomabot.py b/neotomabot.py index 8381a62..d116990 100644 --- a/neotomabot.py +++ b/neotomabot.py @@ -1,210 +1,76 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- #!python3 +""" Neotoma Database Twitter Manager v2.0 + by: Simon Goring + This Twitter bot is intended to provide updated information to individuals about additions to the Neotoma + Paleoecology database. The script leverages the `schedule` package for Python, running continually in + the background, sending out tweets at a specified time and interval. +""" + +from TwitterAPI import TwitterAPI +import random +import xmltodict +import urllib.request +import schedule +import time +import os + +twitstuff = {'consumer_key':os.environ['consumer_key'], + 'consumer_secret': os.environ['consumer_secret'], + 'access_token_key':os.environ['access_token_key'], + 'access_token_secret':os.environ['access_token_secret']} + +datasets = set() + +api = TwitterAPI(consumer_key=twitstuff['consumer_key'], + consumer_secret=twitstuff['consumer_secret'], + access_token_key=twitstuff['access_token_key'], + access_token_secret=twitstuff['access_token_secret']) + +def randomtweet(api): + """ Tweet a random statement from a plain text document. Passing in the twitter API object. + The tweets are all present in the file `resources/cannedtweets.txt`. These can be edited + directly on GitHub if anyone chooses to. + """ + with open('resources/cannedtweets.txt', 'r') as f: + alltweets = f.read().splitlines() + line = random.choice(alltweets) + api.request('statuses/update', {'status':line}) + +def recentsite(api): + """ Tweet one of the recent data uploads from Neotoma. Passing in the twitter API object. + This leverages the v1.5 API's XML response for recent uploads. It selects one of the new uploads + (except geochronology uploads) and tweets it out. It selects them randomly, and adds the selected + dataset to a set object so that values cannot be repeatedly tweeted out. + """ + with urllib.request.urlopen('https://api.neotomadb.org/v1.5/data/recentuploads/1') as response: + html = response.read() + output = xmltodict.parse(html)['results']['results'] + records = list(filter(lambda x: x['record']['datasettype'] != 'geochronology' or x['record']['datasetid'] not in datasets, output)) + if len(records) > 0: + tweet = random.choice(records)['record'] + while tweet['datasetid'] in datasets: + tweet = random.choice(records)['record'] + string = "It's a new {datasettype} dataset from the {databasename} at {sitename}! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + else: + string = "It's a new dataset from the {databasename} at {sitename}! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) -import os, tweepy, time, sys, json, requests, random, imp, datetime, schedule, time, random - -def twit_auth(): - # Authenticate the twitter session. - # Should only be needed once at the initiation of the code. - - CONSUMER_KEY = os.environ['CONSUMER_KEY'] - CONSUMER_SECRET = os.environ['CONSUMER_SECRET'] - ACCESS_KEY = os.environ['ACCESS_KEY'] - ACCESS_SECRET = os.environ['ACCESS_SECRET'] - - auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) - auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) - api = tweepy.API(auth) - print('Twitter authenticated \n') - return api - - -def check_neotoma(): - # This function call to neotoma, reads a text file, compares the two - # and then outputs all the 'new' records to a different text file. - # Function returns the number of new records returned. - - # inputs: - # 1. text file: old_results.json - # 2. text file: to_print.json - # 3. json call: neotoma - - with open('old_results.json', 'r') as old_file: - old_calls = json.loads(old_file.read()) - - with open('to_print.json', 'r') as print_file: - to_print = json.loads(print_file.read()) - - neotoma = requests.get("http://ceiwin10.cei.psu.edu/NDB/RecentUploads?months=1") - inp_json = json.loads(neotoma.text)['data'] - - def get_datasets(x): - did = [] - for y in x: - did.append(y["DatasetID"]) - return did - - neo_datasets = get_datasets(inp_json) - old_datasets = get_datasets(old_calls) - new_datasets = get_datasets(to_print) - - # So this works - # We now have the numeric dataset IDs for the most recent month of - # new files to neotoma (neo_datasets), all the ones we've already tweeted - # (old_datasets) and all the ones in our queue (new_datasets). - # - # The next thing we want to do is to remove all the neo_datasets that - # are in old_datasets and then remove all the new_datasets that are - # in neo_datasets, append neo_datasets to new_datasets (if new_datasets - # has a length > 0) and then dump new_datasets. - # - # Old datasets gets re-written when the tweets go out. - - # remove all the neo_datasets: - for i in range(len(neo_datasets)-1, 0, -1): - if neo_datasets[i] in old_datasets: - del inp_json[i] - - # This now gives us a pared down version of inp_json - # Now we need to make sure to add any of the to_print to neo_dataset. - # We do this by cycling through new_datasets. Any dataset number that - # is not in old_datasets or neo_datasets gets added to the beginning of - # the new list. This way it is always the first called up when twitter - # posts: - - for i in range(0, len(new_datasets)-1): - if new_datasets[i] not in old_datasets and new_datasets[i] not in neo_datasets: - inp_json.insert(0,to_print[i]) - - # Now write out to file. Old file doesn't get changed until the - # twitter app is run. - with open('to_print.json', 'w') as print_file: - json.dump(inp_json, print_file) - return len(inp_json) - len(to_print) - -def print_neotoma_update(api): - # Check for new records by using the neotoma "recent" API: - old_toprint = check_neotoma() - - # load files: - with open('to_print.json', 'r') as print_file: - to_print = json.loads(print_file.read()) - with open('old_results.json', 'r') as print_file: - old_files = json.loads(print_file.read()) - - print('Neotoma dataset updated.\n') - if (old_toprint) == 1: - # If only a single site has been added: - line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " site has been added since I last checked Neotoma. http://neotomadb.org" - elif (old_toprint) > 1: - line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " sites have been added since I last checked Neotoma. http://neotomadb.org" - else: - line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet. Nothing new has been added since I last checked. http://neotomadb.org" - - print('%s' % line) - try: - print('%s' % line) - api.update_status(status=line) - except tweepy.error.TweepError: - print("Twitter error raised") - -def post_tweet(api): - # Read in the printable tweets: - with open('to_print.json', 'r') as print_file: - to_print = json.loads(print_file.read()) - - with open('old_results.json', 'r') as print_file: - old_files = json.loads(print_file.read()) - - print('Files opened\n') - - pr_tw = random.randint(0,len(to_print) - 1) - site = to_print[pr_tw] - - # Get ready to print the first [0] record in to_print: - weblink = 'http://apps.neotomadb.org/Explorer/?datasetid=' + str(site["DatasetID"]) - - # The datasets have long names. I want to match to simplify: - - line = 'Neotoma welcomes ' + site["SiteName"] + ', a ' + site["DatasetType"] + ' dataset by ' + site["Investigator"] + " " + weblink - - # There's a few reasons why the name might be very long, one is the site name, the other is the author name: - if len(line) > 170: - line = 'Neotoma welcomes ' + site["SiteName"] + " by " + site["Investigator"] + " " + weblink - - # If it's still too long then clip the author list: - if len(line) > 170 & site["Investigator"].find(','): - author = site["Investigator"][0:to_print[0]["Investigator"].find(',')] - line = 'Neotoma welcomes ' + site["SiteName"] + " by " + author + " et al. " + weblink - - try: - print('%s' % line) - api.update_status(status=line) - old_files.append(site) - del to_print[pr_tw] - with open('to_print.json', 'w') as print_file: - json.dump(to_print, print_file) - with open('old_results.json', 'w') as print_file: - json.dump(old_files, print_file) - except tweepy.error.TweepError: - print("Twitter error raised") - - -def self_identify(api): - - # Identify myself as the owner of the bot: - line = 'This twitter bot for the Neotoma Paleoecological Database is managed by @sjgoring. Letting you know what\'s new at http://neotomadb.org' - try: - print('%s' % line) - api.update_status(status=line) - except tweepy.error.TweepError: - print("Twitter error raised") def self_identify_hub(api): - # Identify the codebase for the bot: - line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot' - try: - print('%s' % line) - api.update_status(status=line) - except tweepy.error.TweepError: - print("Twitter error raised") - -def other_inf_hub(api): - # Identify the codebase for the bot: - line = ['The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot', - 'Neotoma has teaching modules you can use in the class room, check it out: https://www.neotomadb.org/education/category/higher_ed/', - 'The governance for Neotoma includes representatives from our constituent databases. Find out more: https://www.neotomadb.org/about/category/governance', - 'We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists', - 'We keep a list of all publications that have used Neotoma for their research. Want to be added? Contact us! https://www.neotomadb.org/references', - 'These days everyone\'s got a Google Scholar page. So does Neotoma! https://scholar.google.ca/citations?user=idoixqkAAAAJ&hl=en', - 'If you use #rstats then you can access Neotoma data directly thanks to @rOpenSci! https://ropensci.org/tutorials/neotoma_tutorial.html', - 'Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://www.neotomadb.org/data/category/explorer', - 'Think you\'ve got better tweets? Add them to my code & make a pull request! https://github.com/SimonGoring/neotomabot', - 'Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1', - 'We\'ve got some new R tutorials up online. Is there anything you\'d like to do with Neotoma? http://neotomadb.github.io', - 'Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship.', - 'Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring', - 'You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you\'re doing with Neotoma data!', - 'Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105', - 'How is Neotoma leveraging text mining to improve its data holdings? Find out on the @earthcube blog: https://earthcube.wordpress.com/2018/03/06/geodeepdive-into-darkdata/', - "Building an application that could leverage Neotoma data? Our API (https://api-dev.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/", - "The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage", - "Learn more about how Neotoma makes the most of teaching and cutting-edge research in a new publication in Elements of Paleontology: http://dx.doi.org/10.1017/9781108681582", - "Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ" - ] - - try: - print('%s' % line) - api.update_status(status=line[random.randint(0,len(line))]) - except tweepy.error.TweepError: - print("Twitter error raised") + """ Identify the codebase for the bot through a tweet. """ + line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot' + api.request('statuses/update', {'status':line}) -api = twit_auth() -schedule.every(3).hours.do(post_tweet, api) -schedule.every().day.at("15:37").do(print_neotoma_update, api) -schedule.every().wednesday.at("14:30").do(self_identify, api) +schedule.every(6).hours.do(recentsite, api) +schedule.every(5).hours.do(randomtweet, api) schedule.every().monday.at("14:30").do(self_identify_hub, api) schedule.every().day.at("10:30").do(other_inf_hub, api) diff --git a/requirements.txt b/requirements.txt index 26b386c..a0f9945 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ -tweepy==3.7.0 -requests==2.21.0 -schedule==0.5.0 +schedule==1.1.0 +requests==2.22.0 +xmltodict==0.12.0 +tweepy==4.1.0 +TwitterAPI==2.7.5 diff --git a/resources/cannedtweets.txt b/resources/cannedtweets.txt new file mode 100644 index 0000000..7b8907b --- /dev/null +++ b/resources/cannedtweets.txt @@ -0,0 +1,18 @@ +The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot +Neotoma has teaching modules you can use in the classroom, check it out: https://www.neotomadb.org/education/category/higher_ed/ +Governance for Neotoma includes representatives from our 34 constituent databases. Find out more: https://www.neotomadb.org/about/category/governance +We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists +There's a big @zotero library of Neotoma publications that we've been working on. Check it out here: https://www.zotero.org/groups/2321378/neotomadb +Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://apps.neotomadb.org/explorer +Think you've got better tweets? Add them to my code & make a pull request! https://github.com/NeotomaDB/neotomabot +Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1 +Our site at https://open.neotomadb.org hosts all our #openscience work, including a link to the database schema. Check it out! +Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship. +Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring +You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you're doing with Neotoma data! +Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105 +How is Neotoma leveraging text mining to improve its data holdings? We've been working with @geodeepdive to discover articles that have yet to be submitted to the database. @earthcube +Building an application that could leverage Neotoma data? Our API (https://api.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/ #openscience +The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage Check them out here: https://data.neotomadb.org +Learn more about how Neotoma makes the most of teaching and cutting-edge research in our Elements of Paleontology publication: http://dx.doi.org/10.1017/9781108681582 +Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ \ No newline at end of file diff --git a/resources/cannedtwttes.txt b/resources/cannedtwttes.txt new file mode 100644 index 0000000..e89d4ea --- /dev/null +++ b/resources/cannedtwttes.txt @@ -0,0 +1,19 @@ +The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot +Neotoma has teaching modules you can use in the class room, check it out: https://www.neotomadb.org/education/category/higher_ed/ +The governance for Neotoma includes representatives from our constituent databases. Find out more: https://www.neotomadb.org/about/category/governance +We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists +We keep a list of all publications that have used Neotoma for their research. Want to be added? Contact us! https://www.neotomadb.org/references +These days everyone's got a Google Scholar page. So does Neotoma! https://scholar.google.ca/citations?user=idoixqkAAAAJ&hl=en +Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://apps.neotomadb.org/explorer +Think you've got better tweets? Add them to my code & make a pull request! https://github.com/NeotomaDB/neotomabot +Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1 +Our site at https://open.neotomadb.org hosts all our #openscience work, including a link to the database schema. Check it out! +Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship. +Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring +You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you're doing with Neotoma data! +Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105 +How is Neotoma leveraging text mining to improve its data holdings? We've been working with @geodeepdive to discover articles that have yet to be submitted to the database. +Building an application that could leverage Neotoma data? Our API (https://api.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/ #openscience +The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage Check them out here: https://data.neotomadb.org +Learn more about how Neotoma makes the most of teaching and cutting-edge research in our Elements of Paleontology publication: http://dx.doi.org/10.1017/9781108681582 +Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ \ No newline at end of file diff --git a/v1/neotomabot.py b/v1/neotomabot.py new file mode 100644 index 0000000..8381a62 --- /dev/null +++ b/v1/neotomabot.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#!python3 + +import os, tweepy, time, sys, json, requests, random, imp, datetime, schedule, time, random + +def twit_auth(): + # Authenticate the twitter session. + # Should only be needed once at the initiation of the code. + + CONSUMER_KEY = os.environ['CONSUMER_KEY'] + CONSUMER_SECRET = os.environ['CONSUMER_SECRET'] + ACCESS_KEY = os.environ['ACCESS_KEY'] + ACCESS_SECRET = os.environ['ACCESS_SECRET'] + + auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) + auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) + api = tweepy.API(auth) + print('Twitter authenticated \n') + return api + + +def check_neotoma(): + # This function call to neotoma, reads a text file, compares the two + # and then outputs all the 'new' records to a different text file. + # Function returns the number of new records returned. + + # inputs: + # 1. text file: old_results.json + # 2. text file: to_print.json + # 3. json call: neotoma + + with open('old_results.json', 'r') as old_file: + old_calls = json.loads(old_file.read()) + + with open('to_print.json', 'r') as print_file: + to_print = json.loads(print_file.read()) + + neotoma = requests.get("http://ceiwin10.cei.psu.edu/NDB/RecentUploads?months=1") + inp_json = json.loads(neotoma.text)['data'] + + def get_datasets(x): + did = [] + for y in x: + did.append(y["DatasetID"]) + return did + + neo_datasets = get_datasets(inp_json) + old_datasets = get_datasets(old_calls) + new_datasets = get_datasets(to_print) + + # So this works + # We now have the numeric dataset IDs for the most recent month of + # new files to neotoma (neo_datasets), all the ones we've already tweeted + # (old_datasets) and all the ones in our queue (new_datasets). + # + # The next thing we want to do is to remove all the neo_datasets that + # are in old_datasets and then remove all the new_datasets that are + # in neo_datasets, append neo_datasets to new_datasets (if new_datasets + # has a length > 0) and then dump new_datasets. + # + # Old datasets gets re-written when the tweets go out. + + # remove all the neo_datasets: + for i in range(len(neo_datasets)-1, 0, -1): + if neo_datasets[i] in old_datasets: + del inp_json[i] + + # This now gives us a pared down version of inp_json + # Now we need to make sure to add any of the to_print to neo_dataset. + # We do this by cycling through new_datasets. Any dataset number that + # is not in old_datasets or neo_datasets gets added to the beginning of + # the new list. This way it is always the first called up when twitter + # posts: + + for i in range(0, len(new_datasets)-1): + if new_datasets[i] not in old_datasets and new_datasets[i] not in neo_datasets: + inp_json.insert(0,to_print[i]) + + # Now write out to file. Old file doesn't get changed until the + # twitter app is run. + with open('to_print.json', 'w') as print_file: + json.dump(inp_json, print_file) + return len(inp_json) - len(to_print) + +def print_neotoma_update(api): + # Check for new records by using the neotoma "recent" API: + old_toprint = check_neotoma() + + # load files: + with open('to_print.json', 'r') as print_file: + to_print = json.loads(print_file.read()) + with open('old_results.json', 'r') as print_file: + old_files = json.loads(print_file.read()) + + print('Neotoma dataset updated.\n') + if (old_toprint) == 1: + # If only a single site has been added: + line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " site has been added since I last checked Neotoma. http://neotomadb.org" + elif (old_toprint) > 1: + line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " sites have been added since I last checked Neotoma. http://neotomadb.org" + else: + line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet. Nothing new has been added since I last checked. http://neotomadb.org" + + print('%s' % line) + try: + print('%s' % line) + api.update_status(status=line) + except tweepy.error.TweepError: + print("Twitter error raised") + +def post_tweet(api): + # Read in the printable tweets: + with open('to_print.json', 'r') as print_file: + to_print = json.loads(print_file.read()) + + with open('old_results.json', 'r') as print_file: + old_files = json.loads(print_file.read()) + + print('Files opened\n') + + pr_tw = random.randint(0,len(to_print) - 1) + site = to_print[pr_tw] + + # Get ready to print the first [0] record in to_print: + weblink = 'http://apps.neotomadb.org/Explorer/?datasetid=' + str(site["DatasetID"]) + + # The datasets have long names. I want to match to simplify: + + line = 'Neotoma welcomes ' + site["SiteName"] + ', a ' + site["DatasetType"] + ' dataset by ' + site["Investigator"] + " " + weblink + + # There's a few reasons why the name might be very long, one is the site name, the other is the author name: + if len(line) > 170: + line = 'Neotoma welcomes ' + site["SiteName"] + " by " + site["Investigator"] + " " + weblink + + # If it's still too long then clip the author list: + if len(line) > 170 & site["Investigator"].find(','): + author = site["Investigator"][0:to_print[0]["Investigator"].find(',')] + line = 'Neotoma welcomes ' + site["SiteName"] + " by " + author + " et al. " + weblink + + try: + print('%s' % line) + api.update_status(status=line) + old_files.append(site) + del to_print[pr_tw] + with open('to_print.json', 'w') as print_file: + json.dump(to_print, print_file) + with open('old_results.json', 'w') as print_file: + json.dump(old_files, print_file) + except tweepy.error.TweepError: + print("Twitter error raised") + + +def self_identify(api): + + # Identify myself as the owner of the bot: + line = 'This twitter bot for the Neotoma Paleoecological Database is managed by @sjgoring. Letting you know what\'s new at http://neotomadb.org' + try: + print('%s' % line) + api.update_status(status=line) + except tweepy.error.TweepError: + print("Twitter error raised") + +def self_identify_hub(api): + # Identify the codebase for the bot: + line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot' + try: + print('%s' % line) + api.update_status(status=line) + except tweepy.error.TweepError: + print("Twitter error raised") + +def other_inf_hub(api): + # Identify the codebase for the bot: + line = ['The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot', + 'Neotoma has teaching modules you can use in the class room, check it out: https://www.neotomadb.org/education/category/higher_ed/', + 'The governance for Neotoma includes representatives from our constituent databases. Find out more: https://www.neotomadb.org/about/category/governance', + 'We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists', + 'We keep a list of all publications that have used Neotoma for their research. Want to be added? Contact us! https://www.neotomadb.org/references', + 'These days everyone\'s got a Google Scholar page. So does Neotoma! https://scholar.google.ca/citations?user=idoixqkAAAAJ&hl=en', + 'If you use #rstats then you can access Neotoma data directly thanks to @rOpenSci! https://ropensci.org/tutorials/neotoma_tutorial.html', + 'Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://www.neotomadb.org/data/category/explorer', + 'Think you\'ve got better tweets? Add them to my code & make a pull request! https://github.com/SimonGoring/neotomabot', + 'Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1', + 'We\'ve got some new R tutorials up online. Is there anything you\'d like to do with Neotoma? http://neotomadb.github.io', + 'Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship.', + 'Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring', + 'You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you\'re doing with Neotoma data!', + 'Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105', + 'How is Neotoma leveraging text mining to improve its data holdings? Find out on the @earthcube blog: https://earthcube.wordpress.com/2018/03/06/geodeepdive-into-darkdata/', + "Building an application that could leverage Neotoma data? Our API (https://api-dev.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/", + "The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage", + "Learn more about how Neotoma makes the most of teaching and cutting-edge research in a new publication in Elements of Paleontology: http://dx.doi.org/10.1017/9781108681582", + "Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ" + ] + + try: + print('%s' % line) + api.update_status(status=line[random.randint(0,len(line))]) + except tweepy.error.TweepError: + print("Twitter error raised") + +api = twit_auth() + +schedule.every(3).hours.do(post_tweet, api) +schedule.every().day.at("15:37").do(print_neotoma_update, api) +schedule.every().wednesday.at("14:30").do(self_identify, api) +schedule.every().monday.at("14:30").do(self_identify_hub, api) +schedule.every().day.at("10:30").do(other_inf_hub, api) + +while 1: + schedule.run_pending() + time.sleep(61) diff --git a/old_results.json b/v1/old_results.json similarity index 100% rename from old_results.json rename to v1/old_results.json diff --git a/to_print.json b/v1/to_print.json similarity index 100% rename from to_print.json rename to v1/to_print.json diff --git a/tweets.json b/v1/tweets.json similarity index 100% rename from tweets.json rename to v1/tweets.json diff --git a/v2/neotomabot.py b/v2/neotomabot.py new file mode 100644 index 0000000..ccb8cb0 --- /dev/null +++ b/v2/neotomabot.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#!python3 +""" Neotoma Database Twitter Manager + by: Simon Goring + This Twitter bot is intended to provide updated information to individuals about additions to the Neotoma + Paleoecology database. The script leverages the `schedule` package for Python, running continually in + the background, sending out tweets at a specified time and interval. +""" + +from TwitterAPI import TwitterAPI +import random +import xmltodict +import urllib.request +import schedule +import time +import os + +twitstuff = {'consumer_key':os.environ['consumer_key'], + 'consumer_secret': os.environ['consumer_secret'], + 'access_token_key':os.environ['access_token_key'], + 'access_token_secret':os.environ['access_token_secret']} + +datasets = set() + +api = TwitterAPI(consumer_key=twitstuff['consumer_key'], + consumer_secret=twitstuff['consumer_secret'], + access_token_key=twitstuff['access_token_key'], + access_token_secret=twitstuff['access_token_secret']) + +def randomtweet(api): + """ Tweet a random statement from a plain text document. Passing in the twitter API object. + The tweets are all present in the file `resources/cannedtweets.txt`. These can be edited + directly on GitHub if anyone chooses to. + """ + with open('../resources/cannedtweets.txt', 'r') as f: + alltweets = f.read().splitlines() + line = random.choice(alltweets) + api.request('statuses/update', {'status':line}) + +def recentsite(api): + """ Tweet one of the recent data uploads from Neotoma. Passing in the twitter API object. + This leverages the v1.5 API's XML response for recent uploads. It selects one of the new uploads + (except geochronology uploads) and tweets it out. It selects them randomly, and adds the selected + dataset to a set object so that values cannot be repeatedly tweeted out. + """ + with urllib.request.urlopen('https://api.neotomadb.org/v1.5/data/recentuploads/1') as response: + html = response.read() + output = xmltodict.parse(html)['results']['results'] + records = list(filter(lambda x: x['record']['datasettype'] != 'geochronology' or x['record']['datasetid'] not in datasets, output)) + if len(records) > 0: + tweet = random.choice(records)['record'] + while tweet['datasetid'] in datasets: + tweet = random.choice(records)['record'] + string = "It's a new {datasettype} dataset from the {databasename} at {sitename}! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + else: + string = "It's a new dataset from the {databasename} at {sitename}! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + + +def self_identify_hub(api): + """ Identify the codebase for the bot through a tweet. """ + line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot' + api.request('statuses/update', {'status':line}) + + +schedule.every(6).hours.do(recentsite, api) +schedule.every(5).hours.do(randomtweet, api) +schedule.every().monday.at("14:30").do(self_identify_hub, api) +schedule.every().day.at("10:30").do(other_inf_hub, api) + +while 1: + schedule.run_pending() + time.sleep(61)