Files
autotoot/scraper.py
2022-04-20 20:13:15 -04:00

79 lines
2.8 KiB
Python

import os
import logging
import json
from reddit import reddit_scraper as reddit
from time import sleep
class scraper:
def __init__(self, service, config, neuter=False):
# error checking
scrapers = ["reddit"]
if service.lower() not in scrapers:
logging.error(f"Scraper {service} invalid. Choose one of {', '.join(scrapers)}")
return None
# make sure necessary filestructure is in place
if not os.path.isdir("temp"): os.mkdir("temp")
if not os.path.exists("savefile.json"):
f = open("savefile.json", "w+")
f.write("{}")
f.close()
# set object variables
self.service = service
self.neuter = neuter
# login to service
if service == "reddit": self.login = reddit(config)
### WRAPPER METHODS
def scrape(self, place, limit=10):
logging.warning(f"Scraping {self.service}: {place}... ")
result = self.login.scrape(place, limit)
logging.warning(f"Done scraping {self.service}: {place}.")
return result
# gets posts from a gives service's places (ie, multiple subreddits or feeds)
def scrape_all(self, limit=10):
return self.login.scrape_all(limit)
# downloads a given post's media and return the locations
def download(self, post):
logging.warning(f"Downloading {post.id}... ")
if not self.neuter: self.login.download(post)
else: print(f"Neuter: would have downloaded {post} content")
logging.warning(f"Done downloading {post.id}.")
return result
# downloads a list of post's media and returns a list of the locations
def download_all(self, posts):
post_ids = [p.id for p in posts]
locations = []
for post in post_ids:
locations.append(self.login.download(post))
return locations
# creates the savefile for a list of posts.
def remember(self):
logging.warning(f"Remembering {self.service}...")
self.login.remember()
logging.warning(f"Remembered {self.service}.")
# posts for each place if it has been a while
def keep_lively(self):
self.login.keep_lively()
# posts a random post from the given place
def random_post(self, place):
logging.warning(f"Getting random post for {place}")
return self.login.random_post(place)
### TOOTER METHODS
# takes a toot and returns a dict of the text and media IDs
def build_toot(self, masto, post):
return self.login.build_toot(masto, post, neuter=self.neuter)
# toots all posts in list
def toot_posts(self, masto, posts):
for post in posts:
to_toot = self.build_toot(masto, post)
masto.toot(to_toot["text"], to_toot["media"])
return True