79 lines
2.8 KiB
Python
79 lines
2.8 KiB
Python
import os
|
|
import logging
|
|
import json
|
|
from reddit import reddit_scraper as reddit
|
|
from time import sleep
|
|
|
|
class scraper:
|
|
def __init__(self, service, config, neuter=False):
|
|
# error checking
|
|
scrapers = ["reddit"]
|
|
if service.lower() not in scrapers:
|
|
logging.error(f"Scraper {service} invalid. Choose one of {', '.join(scrapers)}")
|
|
return None
|
|
# make sure necessary filestructure is in place
|
|
if not os.path.isdir("temp"): os.mkdir("temp")
|
|
if not os.path.exists("savefile.json"):
|
|
f = open("savefile.json", "w+")
|
|
f.write("{}")
|
|
f.close()
|
|
# set object variables
|
|
self.service = service
|
|
self.neuter = neuter
|
|
# login to service
|
|
if service == "reddit": self.login = reddit(config)
|
|
|
|
### WRAPPER METHODS
|
|
def scrape(self, place, limit=10):
|
|
logging.warning(f"Scraping {self.service}: {place}... ")
|
|
result = self.login.scrape(place, limit)
|
|
logging.warning(f"Done scraping {self.service}: {place}.")
|
|
return result
|
|
|
|
# gets posts from a gives service's places (ie, multiple subreddits or feeds)
|
|
def scrape_all(self, limit=10):
|
|
return self.login.scrape_all(limit)
|
|
|
|
# downloads a given post's media and return the locations
|
|
def download(self, post):
|
|
logging.warning(f"Downloading {post.id}... ")
|
|
if not self.neuter: self.login.download(post)
|
|
else: print(f"Neuter: would have downloaded {post} content")
|
|
logging.warning(f"Done downloading {post.id}.")
|
|
return result
|
|
|
|
# downloads a list of post's media and returns a list of the locations
|
|
def download_all(self, posts):
|
|
post_ids = [p.id for p in posts]
|
|
locations = []
|
|
for post in post_ids:
|
|
locations.append(self.login.download(post))
|
|
return locations
|
|
|
|
# creates the savefile for a list of posts.
|
|
def remember(self):
|
|
logging.warning(f"Remembering {self.service}...")
|
|
self.login.remember()
|
|
logging.warning(f"Remembered {self.service}.")
|
|
|
|
# posts for each place if it has been a while
|
|
def keep_lively(self):
|
|
self.login.keep_lively()
|
|
|
|
# posts a random post from the given place
|
|
def random_post(self, place):
|
|
logging.warning(f"Getting random post for {place}")
|
|
return self.login.random_post(place)
|
|
|
|
### TOOTER METHODS
|
|
# takes a toot and returns a dict of the text and media IDs
|
|
def build_toot(self, masto, post):
|
|
return self.login.build_toot(masto, post, neuter=self.neuter)
|
|
|
|
# toots all posts in list
|
|
def toot_posts(self, masto, posts):
|
|
for post in posts:
|
|
to_toot = self.build_toot(masto, post)
|
|
masto.toot(to_toot["text"], to_toot["media"])
|
|
return True
|