|
| 1 | +""" |
| 2 | +Install the requirements in requirements.txt in a virtualenv |
| 3 | +
|
| 4 | +To see the latest output for past-events/index.md, execute |
| 5 | +
|
| 6 | + $ python add_meetup_events.py |
| 7 | +
|
| 8 | +To over-write with latest output, execute |
| 9 | +
|
| 10 | + $ python add_meetup_events.py --rewrite |
| 11 | +""" |
| 12 | +import httpx |
| 13 | +from bs4 import BeautifulSoup |
| 14 | +from collections import defaultdict |
| 15 | +from datetime import datetime |
| 16 | +from slugify import slugify |
| 17 | +from jinja2 import Environment |
| 18 | +import sys |
| 19 | +from pathlib import Path |
| 20 | + |
| 21 | + |
| 22 | +root = Path(__file__).parent.resolve() |
| 23 | + |
| 24 | +# past events meetup URL |
| 25 | +url = "https://www.meetup.com/bostonpython/events/past/" |
| 26 | + |
| 27 | +# markdown file template saved under past-events |
| 28 | +MD = """--- |
| 29 | +title: {{ title }} |
| 30 | +sidebar_link: false |
| 31 | +--- |
| 32 | +
|
| 33 | +{{ event_date }} |
| 34 | +{% for c in contents %} |
| 35 | +{{ c }} |
| 36 | +{% endfor %} |
| 37 | +
|
| 38 | +Meetup link: [{{ event_url }}]({{ event_url }}) |
| 39 | +
|
| 40 | +[Back to Past Events Page](index.md) |
| 41 | +""" |
| 42 | + |
| 43 | +# list of all events which at the end are joined to contruct |
| 44 | +# the index.md file inside past-events folder |
| 45 | +past_events = [ |
| 46 | + "---", |
| 47 | + "title: Past Events", |
| 48 | + "sidebar_link: false", |
| 49 | + "---\n", |
| 50 | + "List of past events:\n", |
| 51 | +] |
| 52 | + |
| 53 | + |
| 54 | +if __name__ == "__main__": |
| 55 | + try: |
| 56 | + # by default this get only gets the 10 most recent events |
| 57 | + r = httpx.get(url) |
| 58 | + except httpx.RequestError as exc: |
| 59 | + print(f"An error occurred while requesting {exc.request.url!r}.") |
| 60 | + raise |
| 61 | + |
| 62 | + soup = BeautifulSoup(r.content.decode('utf-8','ignore'), "lxml") |
| 63 | + |
| 64 | + datemap = defaultdict(list) |
| 65 | + |
| 66 | + # beautifulsoup finding the list of recent 10 events |
| 67 | + # mapping to defaultdict datemap |
| 68 | + lis = soup.find_all("li", {"class": "list-item border--none"}) |
| 69 | + for li in lis: |
| 70 | + t = li.find("time") |
| 71 | + d = datetime.fromtimestamp(int(t["datetime"][:10])) |
| 72 | + year = d.year |
| 73 | + datemap[year].append(li) |
| 74 | + |
| 75 | + # get list of already existing event files under past-events |
| 76 | + processed = Path('past-events').glob('**/*') |
| 77 | + processed_files = [x for x in processed if x.is_file()] |
| 78 | + |
| 79 | + # structure of data for existing_events |
| 80 | + # { |
| 81 | + # 2007: { |
| 82 | + # datetime.date(2007, 5, 23): [ |
| 83 | + # ('The Cambridge Python May Meetup', '20070523-the-cambridge-python-may-meetup.md') |
| 84 | + # ] |
| 85 | + # } |
| 86 | + # } |
| 87 | + existing_events = defaultdict(lambda: defaultdict(list)) |
| 88 | + |
| 89 | + # iterate all the files under past-events and populate |
| 90 | + # existing_events |
| 91 | + for p in processed_files: |
| 92 | + if p.name == "index.md": |
| 93 | + continue |
| 94 | + event_date = datetime.strptime(p.name.split("-")[0], "%Y%m%d").date() |
| 95 | + lines = p.read_text(encoding="utf-8").split("\n") |
| 96 | + title = None |
| 97 | + for line in lines: |
| 98 | + if line.startswith("title:"): |
| 99 | + _, title = line.split(": ") |
| 100 | + break |
| 101 | + existing_events[event_date.year][event_date].append((title, p.name)) |
| 102 | + |
| 103 | + last_processed_year = max(existing_events.keys()) |
| 104 | + max_processed_date = max(existing_events[last_processed_year]) |
| 105 | + |
| 106 | + # now iterate over data downloaded from meetup and check whats not been processed |
| 107 | + # and saved under past-events |
| 108 | + for year, events in datemap.items(): |
| 109 | + for event in events: |
| 110 | + link = event.find("a") |
| 111 | + url = link["href"] |
| 112 | + title = link.text |
| 113 | + parts = [part.strip() for part in title.split(":")] |
| 114 | + title = " - ".join(parts) |
| 115 | + if title == "Monday office hour": |
| 116 | + continue |
| 117 | + t = event.find("time") |
| 118 | + d = datetime.fromtimestamp(int(t["datetime"][:10])) |
| 119 | + if d.date() <= max_processed_date: |
| 120 | + continue |
| 121 | + name = slugify(title) |
| 122 | + filename = f"past-events/{d.strftime('%Y%m%d')}-{name}.md" |
| 123 | + location = event.find("div", {"class": "venueDisplay"}) |
| 124 | + if not location: |
| 125 | + location = event.find("p", {"class": "venueDisplay"}) |
| 126 | + attendees = event.find("li", {"class": "avatarRow--attendingCount"}) |
| 127 | + try: |
| 128 | + count = attendees.text |
| 129 | + except: |
| 130 | + count = 0 |
| 131 | + contents = event.find_all("p", {"class": "text--small"}) |
| 132 | + new_contents = [] |
| 133 | + for content in contents: |
| 134 | + if 'class' in content.attrs: |
| 135 | + del content.attrs['class'] |
| 136 | + if 'style' in content.attrs: |
| 137 | + del content.attrs['style'] |
| 138 | + new_contents.append(content) |
| 139 | + |
| 140 | + event_date = d.strftime("%B %d, %Y") |
| 141 | + mydict = { |
| 142 | + "title": title, |
| 143 | + "event_date": event_date, |
| 144 | + "location": location.text, |
| 145 | + "event_url": "https://www.meetup.com"+url, |
| 146 | + "contents": new_contents, |
| 147 | + } |
| 148 | + # write event file to past-events |
| 149 | + with open(filename, "w") as e: |
| 150 | + output = Environment().from_string(MD).render(**mydict).replace('\xa0', '') |
| 151 | + e.write(output) |
| 152 | + print(f"Write {filename}") |
| 153 | + # add to existing_events data structure |
| 154 | + existing_events[year][d.date()].append((title, f"{d.strftime('%Y%m%d')}-{name}.md")) |
| 155 | + |
| 156 | + # Now go over all existing_events and construct past_events list |
| 157 | + for year in reversed(sorted(existing_events.keys())): |
| 158 | + past_events.append(f"- {year}") |
| 159 | + for dts in reversed(sorted(existing_events[year].keys())): |
| 160 | + for title, filename in sorted(existing_events[year][dts], key=lambda x:x[0]): |
| 161 | + past_events.append(f" - [{title}]({filename}) ({dts:%m/%d/%Y})") |
| 162 | + |
| 163 | + # either print or over-write past-event/index.md file |
| 164 | + if "--rewrite" in sys.argv: |
| 165 | + index_file = root / "past-events" / "index.md" |
| 166 | + index_txt = "\n".join(past_events).strip() |
| 167 | + index_file.open("w").write(index_txt) |
| 168 | + else: |
| 169 | + print("\n".join(past_events)) |
0 commit comments