Skip to content
This repository was archived by the owner on Jan 13, 2021. It is now read-only.

Commit 52af7fc

Browse files
committed
Basic twitter scraping example
1 parent be1ff23 commit 52af7fc

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

examples/twscrape.hy

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; Reports the users mentioned in the initially loaded tweets on a user's
2+
; Twitter page.
3+
4+
(import hyper bs4)
5+
6+
(setv results {})
7+
8+
; Get hold of a stream ID for the data for a single page.
9+
(defn request-page [conn path]
10+
(.request conn "GET" path))
11+
12+
; Read the stream to exhaustion.
13+
(defn get-page-data [conn req-id]
14+
(-> (.get_response conn req-id) (.read)))
15+
16+
; Yield all at-reply elements from the html.
17+
(defn at-replies [html]
18+
(let [[soup (bs4.BeautifulSoup html)]]
19+
(apply .find_all [soup] {"class_" "twitter-atreply"})))
20+
21+
(defn get-refs [replies]
22+
(list-comp (.get reply "href") [reply replies]))
23+
24+
(defn mentions [html]
25+
(for [ref (remove none? (get-refs (at-replies html)))]
26+
(-> (.lstrip ref "/") (yield))))
27+
28+
; Simple test: print the people referenced on the most recent tweets page.
29+
(defn main []
30+
(with [[conn (hyper.HTTPConnection "twitter.com" 443)]]
31+
(let [[req-id (request-page conn "/Lukasaoz")]]
32+
(for [mention (mentions (get-page-data conn req-id))]
33+
(print mention)))))
34+
35+
(main)

0 commit comments

Comments
 (0)