From fcfe1839902af0d29a8ae73140f22dbee1bf615a Mon Sep 17 00:00:00 2001 From: Luke Ogburn <21106956+logburn@users.noreply.github.com> Date: Wed, 20 Apr 2022 19:43:46 -0400 Subject: [PATCH] added neuter and fixed seent (hopefully) --- helper.py | 12 ++---------- reddit.py | 12 +++++------- scraper.py | 4 +++- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/helper.py b/helper.py index 6e2b62c..e1fee9c 100644 --- a/helper.py +++ b/helper.py @@ -4,8 +4,6 @@ import logging from datetime import datetime ### HELPER METHODS -# helper method to clean out folder (delete all contents) -# expected structure: [["temp/a/1", "temp/a/2"], [], [], ["temp/e/1"]] class helper(): def __init__(service): # copy the service's variables to make them local @@ -48,15 +46,9 @@ class helper(): return "unknown" # returns True if the ts1 is older than ts2 - # tsx should be a timestamp value + # ts_ should be a timestamp value def ts_older(ts1, ts2): - # timedelta of `hours` - hours_delta = datetime.fromtimestamp(ts2) - datetime.fromtimestamp(0) - # timedelta of timestamp - stamp_delta = datetime.fromtimestamp(ts1) - stamp_delta = datetime.now() - stamp_delta - print(f" ts_older: {stamp_delta} > {hours_delta}") - return stamp_delta > hours_delta + return datetime.fromtimestamp(ts1) < datetime.fromtimestamp(ts2) # returns True if place hasn't had a post in the past 12 hours according # to the savefile diff --git a/reddit.py b/reddit.py index 2b42953..a809de9 100644 --- a/reddit.py +++ b/reddit.py @@ -27,13 +27,11 @@ class reddit_scraper: posts = self.login.subreddit(sub).new(limit=limit) posts = helper.reddit_listify(posts) for p in posts[::-1]: - if helper.ts_older(self.seent[sub], p.created): + if helper.ts_older(p.created, self.seent[sub]): break - else: - print(f"helper.ts_older({self.seent[sub]}, {p.created}) :: {self.seent[sub] - p.created}") logging.info(f"Scraping post {p.id}") post_list.append(p) - self.seent[sub] = p.created + self.seent[sub] = posts[0].created return post_list # scrapes all subreddits @@ -102,7 +100,6 @@ class reddit_scraper: # creates the savefile for a list of posts. def remember(self): - print(f"{self.seent}") savefile = json.load(open("savefile.json", "r")) savefile["reddit"] = self.seent savefile = json.dumps(savefile) @@ -111,11 +108,12 @@ class reddit_scraper: ### TOOTER METHODS # takes a toot and returns a dict of the text and media IDs - def build_toot(self, masto, post): + def build_toot(self, masto, post, neuter=False): toot = {} toot["text"] = post.title if helper.get_post_type(post) == "video": toot["text"] += f"\n\n{post.url}" - local_media = self.download(post) + if not neuter: local_media = self.download(post) + else: local_media = [] toot["media"] = masto.upload_all_media(local_media) return toot diff --git a/scraper.py b/scraper.py index a4e1f13..d3691eb 100644 --- a/scraper.py +++ b/scraper.py @@ -2,6 +2,7 @@ import os import logging import json from reddit import reddit_scraper as reddit +from time import sleep class scraper: def __init__(self, service, config, neuter=False): @@ -15,6 +16,7 @@ class scraper: if not os.path.exists("savefile.json"): f = open("savefile.json", "w+") f.write("{}") + f.close() # set object variables self.service = service self.neuter = neuter @@ -66,7 +68,7 @@ class scraper: ### TOOTER METHODS # takes a toot and returns a dict of the text and media IDs def build_toot(self, masto, post): - return self.login.build_toot(masto, post) + return self.login.build_toot(masto, post, neuter=self.neuter) # toots all posts in list def toot_posts(self, masto, posts):