User:RonBot/1/Source1
Jump to navigation
Jump to search
PYconfig.py
pagelist=list()
basicsearch=""
Main Program
from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import PYconfig
site = wiki.Wiki('https://commons.wikimedia.org/w/api.php') #Tell Python to use the common's API
site.login(userpassbot.username, userpassbot.password) #login
#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
try:
print(s)
except UnicodeEncodeError:
print(s.encode('utf-8'))
def startAllowed():
textpage = page.Page(site, "User:RonBot/1/Run").getWikiText()
if textpage == "Run":
return "run"
else:
return "no"
def allow_bots(text, user):
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name.matches(['bots', 'nobots']):
break
else:
return True
print "template found" #Have we found one
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
print "We have an ALLOW" # allow found
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
print "We have a DENY" # deny found
if ''.join(bots) == 'none':
print "none - true"
return True
for bot in bots:
if bot in (user, 'all'):
pnt(bot)
pnt(user)
print "all - false"
return False
if (tl.name.matches('nobots') and len(tl.params) == 0):
print "match - false"
return False
return True
def remove_duplicates(l):
return list(set(l))
def firstrevision(page):
params = {'action':'query',
'prop':'revisions',
'titles':page,
'rvlimit':'max'
}
req = api.APIRequest(site, params)
res = req.query(False)
pageid = res['query']['pages'].keys()[0]
#print len(res['query']['pages'][pageid]['revisions'])
first=len(res['query']['pages'][pageid]['revisions'])-1
timestamp = str(res['query']['pages'][pageid]['revisions'][first]['timestamp'])
#print
m = re.search(r'(.*?)T', timestamp)
datebit = m.group(1)
print datebit
return datebit
def lastrevision(page):
params = {'action':'query',
'prop':'revisions',
'titles':page,
'rvlimit':'1'
}
req = api.APIRequest(site, params)
res = req.query(False)
pageid = res['query']['pages'].keys()[0]
timestamp = str(res['query']['pages'][pageid]['revisions'][0]['timestamp'])
m = re.search(r'(.*?)T', timestamp)
datebit = m.group(1)
#print date
return datebit
def SearchReplace(search1, search2, title, size):
#:/[Ss]ource *\= *w*\.*[Tt]witter/
processed=0
lastContinue='0'
#print PYconfig.basicsearch
print"============================================"
searchstr=PYconfig.basicsearch + " " + search1 + " " + search2+' -incategory: "Extracted images"'
print "search = ", searchstr
while True:
params = {'action':'query',
'list':'search',
'srsearch':searchstr,
'srnamespace':6,
'srlimit':size,
'sroffset':lastContinue
}
#print searchstr
print "SR.params"
result="" #clear out previous run
request = api.APIRequest(site, params) #Set the API request
print "SR.request"
result = request.query(False)
#print result
totalhits=result['query']['searchinfo']['totalhits']
#print "search", search
print "TotalHits this search", totalhits
size=totalhits
if totalhits>500:
size=totalhits-processed
processed=processed+500
if size>500:
size=500
print totalhits, size, processed
if totalhits>0:
for loopvar in range(0, size):
#print loopvar,
#print ""
try:
pagetitle = result['query']['search'][loopvar]['title']
except:
pagetitle="Not Found"
#pnt(pagetitle)
#timestamp = str(result['query']['search'][loopvar]['timestamp'])
#m = re.search(r'(.*?)T', timestamp)
#datepart = m.group(1)
datepart=firstrevision(pagetitle)
PYconfig.pagelist.append("* "+datepart+" "+title+" [[:"+pagetitle+"]]")
try:
lastContinue = result['continue']['sroffset']
print "continue"
except:
print "End of Cat"
break
return
def writepage(title):
pagetitle=title
pagepage = page.Page(site, pagetitle)
pagetext=""
remove_duplicates(PYconfig.pagelist)
PYconfig.pagelist.sort(reverse=True)
for line in PYconfig.pagelist:
if 'phillip medhurst' not in line.lower():
pagetext=pagetext+line+"\n"
print "witing page"
pagepage.edit(text=pagetext, skipmd5=True, summary="update page")
def main():
go = startAllowed() #Check if task is enabled
if go == "no":
sys.exit('Disabled Task')
searchlist=list()
title=list()
PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Files from external sources with reviewed licenses" -incategory:"CC-PD-Mark" -incategory:"PD ineligible"'
searchlist.append('incategory:"CC-BY-SA-4.0"')
searchlist.append('incategory:"CC-BY-4.0"')
searchlist.append('incategory:"CC-BY-SA-3.0"')
searchlist.append('incategory:"CC-BY-3.0"')
searchlist.append('incategory:"CC-BY-SA-2.0"')
searchlist.append('incategory:"CC-BY-2.0"')
searchlist.append('incategory:"CC-BY-SA-1.0"')
searchlist.append('incategory:"CC-BY-1.0"')
searchlist.append('incategory:"CC-Zero"')
title.append('cc-by-sa-4.0')
title.append('cc-by-4.0')
title.append('cc-by-sa-3.0')
title.append('cc-by-3.0')
title.append('cc-by-sa-2.0')
title.append('cc-by-2.0')
title.append('cc-by-sa-1.0')
title.append('cc-by-1.0')
title.append('CC-Zero')
numlist=9
#parameters for API request
PYconfig.pagelist=list()
search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ff]acebook|https:\/\/www.[Ff]acebook|Https:\/\/[Ff]acebook|https:\/\/[Ff]acebook|www.[Ff]acebook|[Ff]acebook)/'
x=0
while x<numlist:
SearchReplace(searchlist[x],search, title[x],500)
x=x+1
writepage("user:RonBot/FacebookSource")
PYconfig.pagelist=list()
search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ii]nstagram|https:\/\/www.[Ii]nstagram|Https:\/\/[Ii]nstagram|https:\/\/[Ii]nstagram|www.[Ii]nstagram|[Ii]nstagram)/'
x=0
while x<numlist:
SearchReplace(searchlist[x],search, title[x],500)
x=x+1
writepage("user:RonBot/InstagramSource")
PYconfig.pagelist=list()
search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Gg]oogle|https:\/\/www.[Gg]oogle|www.[Gg]oogle|[[Gg]oogle)/ -insource:/[Ss]ource *\= *w*\.*[Gg]oogle [Aa]rt/ -insource:/[Ss]ource *\= *w*\.*[Gg]oogle [Bb]ooks/'
x=0
while x<numlist:
SearchReplace(searchlist[x], search, title[x],500)
x=x+1
writepage("user:RonBot/GoogleSource")
PYconfig.pagelist=list()
search='insource:"{{FlickrVerifiedByUploadWizard|Public Domain Mark}}" -incategory:"Flickr public domain images needing specific copyright tags"'
x=0
while x<numlist:
SearchReplace(searchlist[x], search, title[x],500)
x=x+1
writepage("user:RonBot/PDMSource")
PYconfig.pagelist=list()
search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Tt]witter|https:\/\/www.[Tt]witter|Https:\/\/[Tt]witter|https:\/\/[Tt]witter|www.[Tt]witter|[Tt]witter)/'
x=0
while x<numlist:
SearchReplace(searchlist[x], search, title[x],500)
x=x+1
writepage("user:RonBot/TwitterSource")
PYconfig.pagelist=list()
PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Flickr images reviewed by File Upload Bot (Magnus Manske)" -incategory:"Flickr images uploaded by Flickr upload bot"'
PYconfig.basicsearch=PYconfig.basicsearch+' -incategory:"Flickr review needed" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images verified by UploadWizard" -incategory:"Flickr images reviewed by FlickreviewR" -incategory:"Flickr images reviewed by FlickreviewR 2" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images reviewed by trusted users"'
search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ff]lickr|https:\/\/www.[Ff]lickr|www.[Ff]lickr|[Ff]lickr)/'
x=0
while x<numlist:
SearchReplace(searchlist[x], search, title[x],500)
x=x+1
writepage("user:RonBot/FlickrSource")
PYconfig.pagelist=list()
if __name__ == "__main__":
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
main()