Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvement Coomer #2214

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions scrapers/Coomer/Coomer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
'Referer': 'https://coomer.su/search_hash'
}

def extract_mentions_and_tags(text):
mentions = re.findall(r'@([\w\-.]+)', text) if text else []
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usernames can also have digits and _

hashtags = re.findall(r'#([\w\-.]+)', text) if text else []
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar with hashtags, you might want to keep scanning until you hit another # or word boundary

return mentions, hashtags


def debugPrint(t):
sys.stderr.write(t + "\n")

Expand Down Expand Up @@ -64,6 +70,7 @@ def post_query(service, user_id, id):
post = data['post']
user_name = user_query(service, user_id)
studio = {"Name": user_name}

if service == "onlyfans":
studio["URL"] = f"https://onlyfans.com/{user_name}"
elif service == "fansly":
Expand All @@ -73,24 +80,34 @@ def post_query(service, user_id, id):
else:
debugPrint("No service listed")

tags = []
mentions, hashtags = extract_mentions_and_tags(post.get('content', ''))

unique_performers = {user_name}
unique_performers.update(mentions)

performers = [{"Name": name, "urls": [studio['URL']]} for name in unique_performers]

if post['tags'] is not None:
tags = [{"name": item } for item in post['tags']]

out = {"Title": post['title'],
"Date": post['published'][:10],
"URL": f"https://coomer.su/{post['service']}/user/{post['user']}/post/{post['id']}",
"Details": clean_text(post['content']),
"Studio": studio,
"Performers": [{"Name": user_name, "urls": [studio['URL']]}],
"Tags": tags
tags = [{"name": item} for item in post['tags']]
else:
tags = [{"name": tag} for tag in hashtags]

out = {
"Title": post['title'],
"Date": post['published'][:10],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be better to instead split on T of ISO8601 or a more robust parsing

"URL": f"https://coomer.su/{post['service']}/user/{post['user']}/post/{post['id']}",
"Details": clean_text(post['content']),
"Studio": studio,
"Performers": performers,
"Tags": tags,
}

log.debug(out)
return out
else:
debugPrint(f'Response: {str(post_lookup_response.status_code)} \n Text: {str(post_lookup_response.text)}')


def get_scene(inputurl):
match = re.search(r'/(\w+?)/user/(.+?)/post/(\d+)', inputurl)
if match:
Expand Down