User:RonBot/2/Source1
Jump to navigation
Jump to search
PYconfig.py
pagelist=list()
basicsearch=""
namelist=list()
tstamp=""
user=""
Main Program
from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import PYconfig
site = wiki.Wiki('https://commons.wikimedia.org/w/api.php') #Tell Python to use the common's API
site.login(userpassbot.username, userpassbot.password) #login
#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
try:
print(s)
except UnicodeEncodeError:
print(s.encode('utf-8'))
def startAllowed():
textpage = page.Page(site, "User:RonBot/2/Run").getWikiText()
if textpage == "Run":
return "run"
else:
return "no"
def allow_bots(text, user):
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name.matches(['bots', 'nobots']):
break
else:
return True
print "template found" #Have we found one
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
print "We have an ALLOW" # allow found
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
print "We have a DENY" # deny found
if ''.join(bots) == 'none':
print "none - true"
return True
for bot in bots:
if bot in (user, 'all'):
pnt(bot)
pnt(user)
print "all - false"
return False
if (tl.name.matches('nobots') and len(tl.params) == 0):
print "match - false"
return False
return True
def remove_duplicates(l):
return list(set(l))
def firstrevision(page):
params = {'action':'query',
'prop':'revisions',
'titles':page,
'rvlimit':'max'
}
req = api.APIRequest(site, params)
res = req.query(False)
print "FR1"
#pnt(res)
print ""
pageid = res['query']['pages'].keys()[0]
#print len(res['query']['pages'][pageid]['revisions'])
first=len(res['query']['pages'][pageid]['revisions'])-1
PYconfig.user = res['query']['pages'][pageid]['revisions'][first]['user']
pnt(PYconfig.user)
timestamp = str(res['query']['pages'][pageid]['revisions'][first]['timestamp'])
PYconfig.tstamp=timestamp
print "FR2"
pnt(timestamp)
#print
m = re.search(r'(.*?)T', timestamp)
datebit = m.group(1)
print datebit
return datebit
def noperm(pagetitle):
firstrevision(pagetitle) # sets PYconfig.user
pagepage = page.Page(site, pagetitle)
print "main.pagepage"
pagetext = pagepage.getWikiText()
pagetext="{{subst:npd}}\n"+ pagetext
pagetitletext = pagetitle.encode('utf-8')
pnt(pagetitle)
pnt(pagetext)
pagepage.edit(text=pagetext, bot=True, summary="(Task 2 trial) Tagging image - No permission - no proper Flickr link- ")
print "Added {{No permission}}"
print "Uploader"
username1="User talk:"
username=username1.encode('utf-8')+PYconfig.user
pagepage = page.Page(site, username)
print "main.pagepage2"
#test for exiting page
try:
pagetext = pagepage.getWikiText()
newpage=False
except:
pagetext="{{subst:Welcome}} ~~~~"
newpage=True
#page either exists or has a welcome template
pnt(pagetext)
pnt(pagetitle)
go = allow_bots(pagetext, 'RonBot')# does user page allow bots
if go:
print"bot allowed on talk page"
pagetext=pagetext+"\n==[[:"
pagetext=pagetext+pagetitletext
pagetext=pagetext+"]]==\n"
pagetext=pagetext+"{{subst:image permission|"+pagetitletext+"}}"
pnt(pagetext)
try:
if newpage==True:
print "Newpage"
pagepage.edit(text=pagetext, createonly=True, bot=True, summary="(Task 2 trial) Please send permission for "+pagetitletext+" to [[Commons:OTRS|OTRS]] ~~~~")
else:
print "Existing Page"
pagepage.edit(text=pagetext, bot=True, summary="(Task 2 trial) Please send permission for "+pagetitletext+" to [[Commons:OTRS|OTRS]] ~~~~")
print "Added {{Please send permission}} to",username
except:
print
print "Failed to add"
else:
print "no bots on user page"
print"##################################################################"
return
def check4templates(pagetext):
if re.search(r'\{\{[Nn]o permission',pagetext):
print "No permission found"
return True
if re.search(r'\{\{[Ff]lickrreview',pagetext):
print "Flickrreview found"
return True
if re.search(r'\{\{{[Dd]elete',pagetext):
print "Delete found"
return True
if re.search(r'\{\{[Nn]o source',pagetext):
print "No source found"
return True
if re.search(r'\{\{[Dd]w no source',pagetext):
print "No source found"
return True
return False
def SearchReplace(search1, search2, title, size):
#:/[Ss]ource *\= *w*\.*[Tt]witter/
processed=0
lastContinue='0'
#print PYconfig.basicsearch
print"============================================"
searchstr=PYconfig.basicsearch + " " + search1 + " " + search2+' -incategory: "Extracted images"'
print "search = ", searchstr
while True:
params = {'action':'query',
'list':'search',
'srsearch':searchstr,
'srnamespace':6,
'srlimit':size,
'sroffset':lastContinue
}
#print searchstr
print "SR.params"
result="" #clear out previous run
request = api.APIRequest(site, params) #Set the API request
print "SR.request"
result = request.query(False)
#print result
totalhits=result['query']['searchinfo']['totalhits']
#print "search", search
print "TotalHits this search", totalhits
size=totalhits
if totalhits>500:
size=totalhits-processed
processed=processed+500
if size>500:
size=500
print totalhits, size, processed
if totalhits>0:
for loopvar in range(0, size):
#print loopvar,
#print ""
try:
pagetitle = result['query']['search'][loopvar]['title']
except:
pagetitle="Not Found"
datepart=firstrevision(pagetitle)
PYconfig.pagelist.append("* "+datepart+" "+title+" [[:"+pagetitle+"]]")
timestamp = datetime.datetime.strptime(PYconfig.tstamp, '%Y-%m-%dT%H:%M:%SZ')
print "abusechecks.timestamp2", "*";datetime.datetime.utcnow();"*", "*";timestamp;"*"
print
if timestamp < datetime.datetime.utcnow()-datetime.timedelta(days=31):
print 'oid image'
else:
if "crop" in title.lower():
print "crop image - skip"
else:
print 'new iamge'
PYconfig.pagelist.append("* "+datepart+" "+title+" [[:"+pagetitle+"]]")
PYconfig.namelist.append(pagetitle)
try:
lastContinue = result['continue']['sroffset']
print "continue"
except:
print "End of Cat"
break
return
def writepage(title):
pagetitle=title
pagepage = page.Page(site, pagetitle)
pagetext=""
remove_duplicates(PYconfig.namelist)
PYconfig.pagelist.sort(reverse=True)
for line in PYconfig.namelist:
pagetext=pagetext+line+"\n"
print "witing page"
pagepage.edit(text=pagetext, skipmd5=True, summary="{Task 2 trial) update page")
print
#pnt(PYconfig.pagelist)
print
pnt(PYconfig.namelist)
return
def AddTemplate():
added=0
print (time.ctime())
size=len(PYconfig.namelist)
print size
for pagetitle in PYconfig.namelist:
pagetitletext = pagetitle.encode('utf-8')
pnt(pagetitle)
pagepage = page.Page(site, pagetitle, True, False) # dont follow redirects!
pageredir= pagepage.isRedir()
pagetext = pagepage.getWikiText()
pnt(pagetext)
if not check4templates(pagetext):
go = allow_bots(pagetext, 'RonBot')# does user page allow bots
if go:
if re.search(r'[Ss]ource.*?=.*?[Hh]ttps',pagetext):
#print"++++++++++++++++++++++++++++++++++++++++"
print pagetitletext+ " ADDITION bot allowed on article"
pagetext = pagetext+"\n"+"{{flickrreview}}"
try:
pagepage.edit(text=pagetext, bot=True, summary="(Task 2 Trial) - Addition of flickrreview")
added += 1
print "writing changed page"
pnt(pagetitle)
except:
print"Failed to write"
print"++++++++++++++++++++++++++++++++++++++++"
else:
noperm(pagetitle)
else:
print "NO ACTION"
print "Added", added
return
def main():
go = startAllowed() #Check if task is enabled
if go == "no":
sys.exit(1)
searchlist=list()
title=list()
PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Files from external sources with reviewed licenses" -incategory:"CC-PD-Mark" -incategory:"PD ineligible"'
searchlist.append('incategory:"CC-BY-SA-4.0"')
searchlist.append('incategory:"CC-BY-4.0"')
searchlist.append('incategory:"CC-BY-SA-3.0"')
searchlist.append('incategory:"CC-BY-3.0"')
searchlist.append('incategory:"CC-BY-SA-2.0"')
searchlist.append('incategory:"CC-BY-2.0"')
searchlist.append('incategory:"CC-BY-SA-1.0"')
searchlist.append('incategory:"CC-BY-1.0"')
searchlist.append('incategory:"CC-Zero"')
title.append('cc-by-sa-4.0')
title.append('cc-by-4.0')
title.append('cc-by-sa-3.0')
title.append('cc-by-3.0')
title.append('cc-by-sa-2.0')
title.append('cc-by-2.0')
title.append('cc-by-sa-1.0')
title.append('cc-by-1.0')
title.append('CC-Zero')
numlist=9
#parameters for API request
PYconfig.pagelist=list()
PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Flickr images reviewed by File Upload Bot (Magnus Manske)" -incategory:"Flickr images uploaded by Flickr upload bot"'
PYconfig.basicsearch=PYconfig.basicsearch+' -incategory:"Flickr review needed" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images verified by UploadWizard" -incategory:"Flickr images reviewed by FlickreviewR" -incategory:"Flickr images reviewed by FlickreviewR 2" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images reviewed by trusted users"'
search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ff]lickr|https:\/\/www.[Ff]lickr|\[[Hh]ttps:\/\/www.[Ff]lickr|www.[Ff]lickr|[Ff]lickr)/'
#[Ss]ource *\= *(?:\[*Https:\/\/www.[Ff]lickr|\[*https:\/\/www.[Ff]lickr|\[*www.[Ff]lickr|\[*[Ff]lickr)
x=0
while x<numlist:
SearchReplace(searchlist[x], search, title[x],500)
x=x+1
writepage("User:RonBot/2/FlickrSource")
AddTemplate()
print "End of Main"
if __name__ == "__main__":
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
main()