-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodbot.py
334 lines (319 loc) · 17.9 KB
/
modbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
## Import Modules - Start
# These two lines will set urllib3 to use PyOpenSSL which will disable the InsecurePlatformWarning#
import signal
import sys
import os
import urllib3.contrib.pyopenssl
import praw
import time
import datetime
from pprint import pprint
from urlparse import urlparse
from urlparse import parse_qs
import string
import re
from apiclient.discovery import build
import traceback
## Import Modules - End
## Signal Handling - Start
def signal_handler(signal, frame):
print ' '
print 'Exiting!'
os._exit(0)
signal.signal(signal.SIGINT, signal_handler)
## Signal Handling - End
## Settings - Start
# Secrets come from the environment
YT_DEVELOPER_KEY = os.environ['GAPIK']
R_PASSWORD = os.environ['MODBOTPASS']
R_CLIENT_SECRET = os.environ['MODBOTCLIENTS']
R_CLIENT_ID = os.environ['MODBOTCLIENTID']
# This is the name of the modbot config file
MODBOT_SETTINGS_FILE = 'modbot.conf'
## Settings - End
## Function Definitions - Start
# Function to load settings file
def modbotsettings():
try:
# Try to execfile on the settings file and load them into the globals
# this is terrible coding, but I don't feel like making a proper settings function
execfile(MODBOT_SETTINGS_FILE, globals())
return True
except:
# Failed to load the settings file
print '!!!! Syntax error in ' + MODBOT_SETTINGS_FILE + ' !!!'
return False
#Load the settings before continuing as they are used by just about everything
if not modbotsettings():
print 'Unable to initialize settings. Exiting.'
exit()
# Function to determine the youtube video ID from a URL
def video_id(value):
"""
Example URL's that can be parsed:
- http://youtu.be/SA2iWivDJiE
- http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu
- http://www.youtube.com/movie?v=_oPAwA_Udwc&feature=feedu
- http://www.youtube.com/attribution_link?a=AbE6fYtNaa4&u=%2Fwatch%3Fv%3DNbyHNASFi6U%26feature%3Dshare
- http://www.youtube.com/embed/SA2iWivDJiE
- http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
"""
query = urlparse(value)
pattern = re.compile('[^\w-].*$')
if query.hostname == 'youtu.be':
return pattern.sub('',query.path[1:])
if query.hostname in yt_hostnames:
if query.path == '/watch' or query.path == '/movie':
p = parse_qs(query.query)
return pattern.sub('',p['v'][0])
if query.path == '/attribution_link':
p = parse_qs(query.query)
p = urlparse(p['u'][0])
p = parse_qs(p.query)
return pattern.sub('',p['v'][0])
if query.path[:7] == '/embed/':
return pattern.sub('',query.path.split('/')[2])
if query.path[:3] == '/v/':
return pattern.sub('',query.path.split('/')[2])
# fail?
raise ValueError('No video ID could be extracted from URL %s' % value)
## Function Definitions - End
## API Initializations - Start
# YouTube API initialization
try:
yt_service = build(yt_api_service_name, yt_api_version, developerKey=YT_DEVELOPER_KEY)
except:
traceback.print_exc()
print '!!!! Unable to initialize to youtube API !!!'
exit()
# Reddit API initialization
try:
r = praw.Reddit(client_id = R_CLIENT_ID,
client_secret = R_CLIENT_SECRET,
user_agent = r_praw,
username = r_user,
password = R_PASSWORD)
except:
traceback.print_exc()
print '!!!! Unable to login to Reddit API !!!'
exit()
## API Initializations - End
## Variable Initializations - Start
# Last Time the top submissions were grabbed
lasttopget = 0
# List of submission ID's that have already been processes so we can skip them
already_done = []
# List of top submission YouTube video IDs
topsubmissionvids = []
# List of top submission Reddit IDa
topsubmissionsids = []
# list of reasons a submission is being removed
reasons = []
## Variable Initializations - End
## Main Loop - Start
while True:
# set the time this loop iteration was started
loopstart = time.time()
# Update the settings
modbotsettings()
# create the subreddit object
subreddit = r.subreddit(r_subredit)
# If it has been long enough since the last top submission pull, pull them again
if lasttopget < loopstart - bot_reloadTopSubsSec:
print 'Getting top ' + str(bot_topSubsLimit) + ' submissions...'
try:
topsubmissions = list(subreddit.top(limit=bot_topSubsLimit))
except:
print '** Failed to get top sumissions'
print 'time: ' + time.strftime("%c")
print ''
else:
# Set the last top submission pull tim to now
lasttopget = time.time()
# Blank out the top submissions lists
topsubmissionvids = []
topsubmissionsids = []
# Fill the top submissions lists
for topsubmission in topsubmissions:
try:
topytvid=video_id(topsubmission.url)
except:
print
else:
if topytvid != None:
topsubmissionvids.append(topytvid)
topsubmissionsids.append(topsubmission.id)
print 'Top submissions grabbed!'
print 'time: ' + time.strftime("%c")
print ''
# Take a break before continuing to stay compliant with API rules
sleepfor = max(0.0, bot_sleepsec - (time.time() - loopstart))
time.sleep(sleepfor)
# Try to pull the newest submissions
try:
submissions = list(subreddit.new(limit=bot_subsLimit))
except:
e = sys.exc_info()[0]
print '**get_new failed: %s' % str(e)
print 'time: ' + time.strftime("%c")
print ''
else:
# Loop through the newest submissions
try:
for submission in submissions:
reasons = []
# Lets check if it's a good post, but only if wqe haven't already checked it before
if submission.id not in already_done:
# We don't need to check anything if it is a selfpost, but it's good to see them
if submission.is_self:
print '!!! Selfpost !!!!'
print 'Permalink:'
print 'link: https://www.reddit.com%s' % submission.permalink
print 'title: %s' % submission.title
print 'author: /u/%s' % submission.author.name
print 'time: ' + time.strftime("%c")
print ''
# ok, not a self post, so lets see if it's ok
else:
# Create a urlpares object of the submission URL for tests
suburl = urlparse(submission.url)
# check if it is an approved URL hostname
if any(suburl.hostname in s for s in yt_hostnames):
# Try to grab the YouTube video ID from the URL
try:
ytvid=video_id(submission.url)
# Couldn't get a video ID so it must not be a valid URL
except:
print '!!!! poorly formated URL !!!'
reasons.append('* The URL you submitted appears to be poorly formated. Only direct links to YouTube videos are allowed. Links to YouTube channels or playlist are prohibited.')
else:
# Check if the Youtube Video ID is in the top submissions
# AND make sure this post is not a top post itself
if ytvid in topsubmissionvids and submission.id not in topsubmissionsids:
print '!!!! Repost of a top ' + str(bot_topSubsLimit) + ' submission !!!'
reasons.append('* This video is in the [top ' + str(bot_topSubsLimit) +'](http://www.reddit.com/r/'+ r_subredit +'/top/?sort=top&t=all) submission of all time in this sub. ')
try:
# Take a break before searching for reposts, but only if not enough time since the last search has passed.
sleepfor = max(0.0, bot_sleepsec - (time.time() - loopstart))
time.sleep(sleepfor)
# Search the subreddit to see
searchres = list(subreddit.search('url:"%s"' % str(ytvid)))
except:
traceback.print_exc()
print '**Search failed! %s' % str(e)
print 'time: ' + time.strftime("%c")
print ''
else:
# Check if we found more than 1 instance of this video being posted
if len(list(searchres)) > 1:
# tma is the max age time of reposts
tma = time.time() - bot_repostMaxAge
# cycle through the possible repost submissions
for curres in searchres:
# if the current search result is not the current submission and it is neweer than tma, it is a repost
if curres.id != submission.id and curres.created_utc > tma:
print '!!!! Repost !!!'
print 'previous post:'
pprint(curres.url)
pprint(curres.permalink)
print 'title: %s' % submission.title
reasons.append("* This video has [already been posted in the last " + bot_repostMaxAgeTxt + "](http://www.reddit.com/r/" + r_subredit + "/search?q=url%3A%22" + str(ytvid) + "%22&restrict_sr=on).")
break
try:
# Try to get the YouTube API data for the current submissions video ID
entry = yt_service.videos().list(id=ytvid, part='snippet,statistics').execute()
except:
traceback.print_exc()
print '**Youtube look up for %s failed!' % str(ytvid)
reasons.append('* I was unable to locate data on the YouTube video. Perhaps the URL is malformed or the video is video is no longer available. ')
print 'url: %s' % submission.url
print 'link: https://www.reddit.com%s' % submission.permalink
print 'time: ' + time.strftime("%c")
print ''
else:
if not entry["items"]:
print '**Youtube look up for %s failed!' % str(ytvid)
reasons.append('* I was unable to locate data on the YouTube video. Perhaps the URL is malformed or the video is video is no longer available. ')
print 'url: %s' % submission.url
print 'link: https://www.reddit.com%s' % submission.permalink
print 'time: ' + time.strftime("%c")
print ''
else:
# Determins the age of the youtube video
_tmp = time.strptime(entry["items"][0]["snippet"]["publishedAt"], '%Y-%m-%dT%H:%M:%SZ')
ptime = datetime.datetime(*_tmp[:6])
now = datetime.datetime.now()
tdelta = now - ptime
seconds = tdelta.total_seconds()
# if the age of the video is less than the minium age, then it is too neweer
if seconds < bot_minVideoAge:
print '!!! Video is newer than ' + bot_minVideoAgeTxt + ' !!!!'
reasons.append('* Your submission violates rule #1, no videos uploaded to YouTube in the past ' + bot_minVideoAgeTxt + ' are allowed. ')
# If the video has more view than the max view count, then it is not allowed
if float(entry["items"][0]["statistics"]["viewCount"]) > bot_maxViewCount:
print '!!! Video has been viewed more than ' + str(bot_maxViewCount) + ' times !!!!'
reasons.append('* Your submission violates rule #2, no YouTube videos with greater than ' + bot_maxViewCountTxt + ' views are allowed. ')
# Check if the link is to a banned YouTube channel
if entry["items"][0]["snippet"]["channelTitle"].lower() in (banname.lower() for banname in yt_bannedchannels):
print '!!! Video is from a banned channel: ' + entry["items"][0]["snippet"]["channelTitle"] + ' !!!!'
reasons.append('* Your submission links to the ' + entry["items"][0]["snippet"]["channelTitle"] + ' YouTube channel which has been banned in /r/' + r_subredit + '. ')
else:
# after all that, this probably not a YouTube link
print '!!!! Submission does not contain valid youtube link !!!'
reasons.append('* Your submission does not appear to contain a link to YouTube. Only direct links to YouTube videos are allowed. Links to any site other than YouTube are prohibited.')
# If we have indicated there are any reasons to remove this post, it's time to do that
if len(reasons) > 0:
# built modcommenttxt wityh all the reasons
modcommenttxt = "Your submission has been automatically removed for the following reason(s):\n\n"
for reason in reasons:
modcommenttxt += str(reason) + "\n\n"
modcommenttxt += "\n\nPlease review the subreddit rules. If you believe your submission has been removed in error, [message the moderators](http://www.reddit.com/message/compose?to=%2Fr%2F" + r_subredit + ") as replies to this comment or PM's to /u/" + r_user + " will not be read by the moderators."
print 'Submission Info:'
print 'url: %s' % submission.url
print 'link: https://www.reddit.com%s' % submission.permalink
print 'title: %s' % submission.title
print 'author: /u/%s' % submission.author.name
try:
print 'Video ID: %s' % ytvid
print 'Video published on: %s ' % entry["items"][0]["snippet"]["publishedAt"]
print 'Video view count: %s' % entry["items"][0]["statistics"]["viewCount"]
print 'Video channel: %s' % entry["items"][0]["snippet"]["channelTitle"]
except:
pass
# Try to distinguished comment on the cubmission and remove it
try:
modcomment = submission.reply(modcommenttxt)
modcomment.mod.distinguish(how='yes', sticky=True)
submission.mod.remove(spam=False)
except:
print 'time: ' + time.strftime("%c")
traceback.print_exc()
print '** Comment or removal failed! link possibly deleted by user during checks.'
print ''
else:
print 'Submission removed!'
print 'time: ' + time.strftime("%c")
print ''
# Update the list of already processed submissions, even if there were erros so we dont get stuck in endless loops.
already_done.append(submission.id)
except:
# How the hell did this happen with all those try/excepts?
e, u, l = sys.exc_info()
print '**Main For loop failed: %s' % str(e)
print 'Line No: %s' % str(l.tb_lineno)
print 'time: ' + time.strftime("%c")
print 'Submission Info:'
try:
print 'url: %s' % submission.url
print 'link: https://www.reddit.com%s' % submission.permalink
print 'title: %s' % submission.title
print 'author: /u/%s' % submission.author.name
already_done.append(submission.id)
except:
pass
print ''
# Time to sleep again before the next loop itteration.
loopend = time.time()
sleepfor = max(0.0, bot_sleepsec - (loopend - loopstart))
time.sleep(sleepfor)
## Main Loop - End