changed to more object-y style
This commit is contained in:
129
reddit.py
Normal file
129
reddit.py
Normal file
@@ -0,0 +1,129 @@
|
||||
from helper import helper
|
||||
import praw
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
|
||||
class reddit_scraper:
|
||||
def __init__(self, config):
|
||||
self.login = praw.Reddit(
|
||||
client_id=config["reddit"]["client_id"],
|
||||
client_secret=config["reddit"]["client_secret"],
|
||||
password=config["reddit"]["password"],
|
||||
user_agent=config["reddit"]["user_agent"],
|
||||
username=config["reddit"]["username"])
|
||||
self.places = config["reddit"]["places"]
|
||||
savefile = open("savefile.json", "r")
|
||||
savefile = json.load(savefile)
|
||||
try: self.seent = savefile["reddit"]
|
||||
except: self.seent = {}
|
||||
|
||||
|
||||
### REDDIT METHODS
|
||||
# gets posts from a given subreddit
|
||||
def scrape(self, sub, limit):
|
||||
# make sure self.seent has the sub, add if not
|
||||
if sub not in self.seent: self.seent[sub] = time.time()
|
||||
# get posts that aren't in seent list
|
||||
post_list = []
|
||||
posts = self.login.subreddit(sub).new(limit=limit)
|
||||
posts = helper.reddit_listify(posts)
|
||||
for p in posts[::-1]:
|
||||
if helper.ts_older(p.created, self.seent[sub]):
|
||||
break
|
||||
logging.info(f"Scraping post {p.id}")
|
||||
post_list.append(p)
|
||||
self.seent[sub] = p.created
|
||||
return post_list
|
||||
|
||||
# scrapes all subreddits
|
||||
def scrape_all(self, limit):
|
||||
subposts = {}
|
||||
for place in self.places:
|
||||
subposts[place] = self.scrape(place, limit)
|
||||
return subposts
|
||||
|
||||
# downloads a given post; media is stored in temp/post_id/n
|
||||
# returns a list of the stored file locations for that post
|
||||
def download(self, post):
|
||||
def make_gallery_urls():
|
||||
nonlocal post
|
||||
urls = []
|
||||
for m in post.media_metadata:
|
||||
mimetype = post.media_metadata[m]["m"]
|
||||
end = mimetype[mimetype.find("/")+1:]
|
||||
urls.append(f"https://i.redd.it/{m}.{end}")
|
||||
return urls
|
||||
# video is sketchy, sorta WIP but maybe impossible
|
||||
# to have consistently. this function does its best
|
||||
def try_video_urls(post):
|
||||
try:
|
||||
raw_url = post.media["video"]["fallback_url"]
|
||||
return [raw_url[:raw_url.find("?")]]
|
||||
except:
|
||||
try:
|
||||
raw_url = post.media["reddit_video"]["fallback_url"]
|
||||
return [raw_url[:raw_url.find("?")]]
|
||||
except:
|
||||
return []
|
||||
return [] # should never be reached but just in case
|
||||
|
||||
# get the media URLs in array
|
||||
urls = []
|
||||
post_type = helper.get_post_type(post)
|
||||
if post_type == "image":
|
||||
urls = [post.url]
|
||||
elif post_type == "video":
|
||||
urls = try_video_urls(post)
|
||||
elif post_type == "gallery":
|
||||
urls = make_gallery_urls()
|
||||
|
||||
urls = [] # nueter download
|
||||
|
||||
# download all media
|
||||
local_urls = []
|
||||
i = 0
|
||||
for url in urls:
|
||||
i += 1
|
||||
name = f"temp/{post.id}/{i}"
|
||||
logging.info(f"Downloading {url} ({i}/{len(urls)})")
|
||||
helper.download_media(url, name)
|
||||
local_urls.append(name)
|
||||
|
||||
return local_urls
|
||||
|
||||
# posts if it's been a while. checks each sub and
|
||||
def keep_lively(self):
|
||||
for sub in self.places:
|
||||
if helper.been_awhile(self.seent[sub]):
|
||||
self.random_post(sub)
|
||||
|
||||
# gets a random post from reddit
|
||||
def random_post(self, place):
|
||||
return self.login.subreddit(place).random()
|
||||
|
||||
# creates the savefile for a list of posts.
|
||||
def remember(self):
|
||||
print(f"{self.seent}")
|
||||
savefile = json.load(open("savefile.json", "r"))
|
||||
savefile["reddit"] = self.seent
|
||||
savefile = json.dumps(savefile)
|
||||
with open("savefile.json", "w") as f:
|
||||
f.write(savefile)
|
||||
|
||||
### TOOTER METHODS
|
||||
# takes a toot and returns a dict of the text and media IDs
|
||||
def build_toot(self, masto, post):
|
||||
toot = {}
|
||||
toot["text"] = post.title
|
||||
if helper.get_post_type(post) == "video": toot["text"] += f"\n\n{post.url}"
|
||||
local_media = self.download(post)
|
||||
toot["media"] = masto.upload_all_media(local_media)
|
||||
return toot
|
||||
|
||||
# toots all posts in list
|
||||
def toot_posts(self, masto, posts):
|
||||
for post in posts:
|
||||
to_toot = self.build_toot(masto, post)
|
||||
masto.toot(to_toot["text"], to_toot["media"])
|
||||
return True
|
||||
Reference in New Issue
Block a user