From b67069294723d805302503566f9a81908b7e22be Mon Sep 17 00:00:00 2001 From: Erik Castricum Date: Wed, 24 Nov 2021 13:45:42 +0100 Subject: [PATCH] crawler: if argument not given on cli, ask for them also, add ability for a searchterm instead of a hashtag Signed-off-by: Erik Castricum --- hashtag-crawler.py | 29 +++++++++++++++++++++++------ post.py | 4 ++-- tag_hourly.py | 4 ++-- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/hashtag-crawler.py b/hashtag-crawler.py index 8517058..65c7607 100755 --- a/hashtag-crawler.py +++ b/hashtag-crawler.py @@ -49,11 +49,11 @@ def writetweets(hashtag, tweets): writer.writerow(["screen_name", "id", "created_at", "trucated text"]) writer.writerows(tweets) -def getweets(hashtag, datumi, tabel): +def getweets(tek, hashtag, datumi, tabel): hashtweets, sorttweets, datatweets = [], [], [] twitter_datum = datetime.strptime(datum, '%d-%m-%Y').strftime('%Y-%m-%d') - hashtwit =f'#{hashtag}' + hashtwit =f'{tek}{hashtag}' counter = 0 for tweet in tweepy.Cursor(api.search, q = hashtwit, since=twitter_datum,count=200).items(): @@ -74,20 +74,37 @@ def getweets(hashtag, datumi, tabel): writetweets(hashtag, hashtweets) tag_data(hashtag, datatweets) - plot_data(hashtag, datatweets, datum) + plot_data(tek, hashtag, datatweets, datum) leaders(hashtag, sorttweets) if tabel == 'ja': print_table(hashtag, sorttweets) - post_twitter(hashtag, counter, tabel, datum) + post_twitter(tek, hashtag, counter, tabel, datum) try: hashtek = sys.argv[1] + tek = '#' datum = sys.argv[2] if sys.argv[3]: tabel = sys.argv[3] else: tabel = 'nee' - getweets(hashtek, datum, tabel) + getweets(tek, hashtek, datum, tabel) except IndexError: - print('no hashtag given') + q = input('Geen of onvoldoende criteria ingegeven. Handmatig invoeren (j|n): ') + if q =='n': + sys.exit() + else: + q = input('Zoeken naar hastag (j|n): ') + if q == 'j': + tek = '#' + else: + tek = '' + hashtek = input('Zoekterm (let op: voor een hastag 1 woord zonder "#" invoeren): ') + datum = input('Sinds welke datum (dd-mm-jjjj): ') + q = ('Tabel maken van de top 10 posters (j|n): ') + if q == 'j': + tabel = 'ja' + else: + tabel = 'nee' + getweets(tek, hashtek, datum, tabel) diff --git a/post.py b/post.py index f456950..f0edbfc 100644 --- a/post.py +++ b/post.py @@ -20,8 +20,8 @@ # calling the api api = tweepy.API(auth) -def post_twitter(hashtag, counter, tabel, datum): - text = f'#{hashtag} fun facts\n\n \ +def post_twitter(tek, hashtag, counter, tabel, datum): + text = f'{tek}{hashtag} fun facts\n\n \ Aantal tweets sinds {datum}: {counter}\n \ Grafische voorstelling in de plaatje(s) hieronder\n\n \ Volg ook @inter_crap voor breaking news!' diff --git a/tag_hourly.py b/tag_hourly.py index 021322e..6831be3 100644 --- a/tag_hourly.py +++ b/tag_hourly.py @@ -13,7 +13,7 @@ def tag_data(hashtag, datatweets): stand = counts.items() pd.DataFrame(stand).to_csv(filename, header=['Tijd', 'Aantal']) -def plot_data(hashtag, datatweets, datum): +def plot_data(tek, hashtag, datatweets, datum): filename = f'{hashtag}_tweet_graph.csv' with open(filename) as f: reader = csv.reader(f) @@ -36,7 +36,7 @@ def plot_data(hashtag, datatweets, datum): ax.plot(tijd, aantal, c='red') # Format plot - titel = f'{hashtag} tweets per uur sinds {datum}' + titel = f'{tek}{hashtag} tweets per uur sinds {datum}' plt.title(titel, fontsize=20) plt.xlabel('', fontsize=16) fig.autofmt_xdate()