autotoot/scraper.py

import os
import logging
import json
from reddit import reddit_scraper as reddit
from time import sleep

class scraper:
    def __init__(self, service, config, neuter=False):
        # error checking
        scrapers = ["reddit"]
        if service.lower() not in scrapers:
            logging.error(f"Scraper {service} invalid. Choose one of {', '.join(scrapers)}")
            return None
        # make sure necessary filestructure is in place
        if not os.path.isdir("temp"): os.mkdir("temp")
        if not os.path.exists("savefile.json"):
            f = open("savefile.json", "w+")
            f.write("{}")
            f.close()
        # set object variables
        self.service = service
        self.neuter = neuter
        # login to service
        if service == "reddit": self.login = reddit(config)

    ### WRAPPER METHODS
    def scrape(self, place, limit=10):
        logging.warning(f"Scraping {self.service}: {place}... ")
        result = self.login.scrape(place, limit)
        logging.warning(f"Done scraping {self.service}: {place}.")
        return result

    # gets posts from a gives service's places (ie, multiple subreddits or feeds)
    def scrape_all(self, limit=10):
        return self.login.scrape_all(limit)

    # downloads a given post's media and return the locations
    def download(self, post):
        logging.warning(f"Downloading {post.id}... ")
        if not self.neuter: self.login.download(post)
        else: print(f"Neuter: would have downloaded {post} content")
        logging.warning(f"Done downloading {post.id}.")
        return result

    # downloads a list of post's media and returns a list of the locations
    def download_all(self, posts):
        post_ids = [p.id for p in posts]
        locations = []
        for post in post_ids:
            locations.append(self.login.download(post))
        return locations

    # creates the savefile for a list of posts.
    def remember(self):
        logging.warning(f"Remembering {self.service}...")
        self.login.remember()
        logging.warning(f"Remembered {self.service}.")

    # posts for each place if it has been a while
    def keep_lively(self):
        self.login.keep_lively()

    # posts a random post from the given place
    def random_post(self, place):
        logging.warning(f"Getting random post for {place}")
        return self.login.random_post(place)

    ### TOOTER METHODS
    # takes a toot and returns a dict of the text and media IDs
    def build_toot(self, masto, post):
        return self.login.build_toot(masto, post, neuter=self.neuter)

    # toots all posts in list
    def toot_posts(self, masto, posts):
        for post in posts:
            to_toot = self.build_toot(masto, post)
            masto.toot(to_toot["text"], to_toot["media"])
        return True