User:RonBot/2/Source1

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

PYconfig.py

pagelist=list()
basicsearch=""
namelist=list()
tstamp=""
user=""

Main Program

from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import PYconfig


site = wiki.Wiki('https://commons.wikimedia.org/w/api.php') #Tell Python to use the common's API
site.login(userpassbot.username, userpassbot.password) #login

#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))
      
def startAllowed():
    textpage = page.Page(site, "User:RonBot/2/Run").getWikiText()
    if textpage == "Run":
        return "run"
    else:
        return "no"

def allow_bots(text, user):
    user = user.lower().strip()
    text = mwparserfromhell.parse(text)
    for tl in text.filter_templates():
        if tl.name.matches(['bots', 'nobots']):
            break
    else:
        return True
    print "template found" #Have we found one
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
	if param.name == 'allow':
            print "We have an ALLOW" # allow found
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            print "We have a DENY" # deny found
            if ''.join(bots) == 'none':
                print "none - true"
                return True
	    for bot in bots:
                if bot in (user, 'all'):
                    pnt(bot)
                    pnt(user)
                    print "all - false"
                    return False
    if (tl.name.matches('nobots') and len(tl.params) == 0):
        print "match - false"
        return False
    return True

def remove_duplicates(l):
    return list(set(l))

def firstrevision(page):
    params = {'action':'query',
              'prop':'revisions',
              'titles':page,
              'rvlimit':'max'
              }
    req = api.APIRequest(site, params)
    res = req.query(False)
    print "FR1"
    #pnt(res)
    print ""
    pageid = res['query']['pages'].keys()[0]
    #print len(res['query']['pages'][pageid]['revisions'])
    first=len(res['query']['pages'][pageid]['revisions'])-1
    PYconfig.user = res['query']['pages'][pageid]['revisions'][first]['user']
    pnt(PYconfig.user)
    timestamp = str(res['query']['pages'][pageid]['revisions'][first]['timestamp'])
    PYconfig.tstamp=timestamp
    print "FR2"
    pnt(timestamp)
    #print
    m = re.search(r'(.*?)T', timestamp)
    datebit = m.group(1)
    print datebit
    return datebit

def noperm(pagetitle):
    firstrevision(pagetitle) # sets PYconfig.user
    pagepage = page.Page(site, pagetitle)
    print "main.pagepage"
    pagetext = pagepage.getWikiText() 
    pagetext="{{subst:npd}}\n"+ pagetext
    pagetitletext = pagetitle.encode('utf-8')
    pnt(pagetitle)
    pnt(pagetext)
    pagepage.edit(text=pagetext, bot=True, summary="(Task 2 trial) Tagging image - No permission - no proper Flickr link- ")
    print "Added {{No permission}}"
    print "Uploader"
    username1="User talk:"
    username=username1.encode('utf-8')+PYconfig.user
    pagepage = page.Page(site, username)
    print "main.pagepage2"
    #test for exiting page
    try:
        pagetext = pagepage.getWikiText()
        newpage=False
    except:
        pagetext="{{subst:Welcome}} ~~~~"
        newpage=True
    #page either exists or has a welcome template
    pnt(pagetext)
    pnt(pagetitle)
    go = allow_bots(pagetext, 'RonBot')# does user page allow bots
    if go:
        print"bot allowed on talk page"
        pagetext=pagetext+"\n==[[:"
        pagetext=pagetext+pagetitletext
        pagetext=pagetext+"]]==\n"
        pagetext=pagetext+"{{subst:image permission|"+pagetitletext+"}}"
        pnt(pagetext)
        try:
            if newpage==True:
                print "Newpage"
                pagepage.edit(text=pagetext, createonly=True, bot=True, summary="(Task 2 trial) Please send permission for "+pagetitletext+" to [[Commons:OTRS|OTRS]] ~~~~")
            else:
                print "Existing Page"
                pagepage.edit(text=pagetext, bot=True, summary="(Task 2 trial) Please send permission for "+pagetitletext+" to [[Commons:OTRS|OTRS]] ~~~~")
            print "Added {{Please send permission}} to",username
        except:
            print
            print "Failed to add"
    else:
        print "no bots on user page"
        print"##################################################################"
    return

def check4templates(pagetext):
    if re.search(r'\{\{[Nn]o permission',pagetext):
        print "No permission found"
        return True
    if re.search(r'\{\{[Ff]lickrreview',pagetext):
        print "Flickrreview found"
        return True
    if re.search(r'\{\{{[Dd]elete',pagetext):
        print "Delete found"
        return True
    if re.search(r'\{\{[Nn]o source',pagetext):
        print "No source found"
        return True
    if re.search(r'\{\{[Dd]w no source',pagetext):
        print "No source found"
        return True
    return False

def SearchReplace(search1, search2, title, size):
    #:/[Ss]ource *\= *w*\.*[Tt]witter/
    processed=0
    lastContinue='0'
    #print PYconfig.basicsearch
    print"============================================"
    searchstr=PYconfig.basicsearch + " " + search1 + " " + search2+' -incategory: "Extracted images"'
    print "search = ", searchstr
    while True:
        params = {'action':'query',
                 'list':'search',
                 'srsearch':searchstr, 
                 'srnamespace':6,
                 'srlimit':size,
                 'sroffset':lastContinue
                 }
        #print searchstr
        print "SR.params"
        result="" #clear out previous run
        request = api.APIRequest(site, params) #Set the API request
        print "SR.request"
        result = request.query(False)
        #print result
        totalhits=result['query']['searchinfo']['totalhits']
        #print "search", search
        print "TotalHits this search", totalhits
        size=totalhits
        if totalhits>500:
            size=totalhits-processed
            processed=processed+500
            if size>500:
                size=500
        print totalhits, size, processed
        if totalhits>0:
            for loopvar in range(0, size):
                #print loopvar,
                #print ""
                try:
                    pagetitle = result['query']['search'][loopvar]['title']
                except:
                    pagetitle="Not Found"
                datepart=firstrevision(pagetitle)
                PYconfig.pagelist.append("* "+datepart+" "+title+" [[:"+pagetitle+"]]")
                timestamp = datetime.datetime.strptime(PYconfig.tstamp, '%Y-%m-%dT%H:%M:%SZ')
                print "abusechecks.timestamp2", "*";datetime.datetime.utcnow();"*", "*";timestamp;"*"
                print 
                if timestamp < datetime.datetime.utcnow()-datetime.timedelta(days=31):
                    print 'oid image'
                else:
                    if "crop" in title.lower():
                        print "crop image - skip"
                    else:
                        print 'new iamge'
                        PYconfig.pagelist.append("* "+datepart+" "+title+" [[:"+pagetitle+"]]")
                        PYconfig.namelist.append(pagetitle)
        try:
            lastContinue = result['continue']['sroffset']
            print "continue"
        except:
            print "End of Cat"
            break
    return 

def writepage(title):
    pagetitle=title
    pagepage = page.Page(site, pagetitle)
    pagetext=""
    remove_duplicates(PYconfig.namelist)
    PYconfig.pagelist.sort(reverse=True)
    for line in PYconfig.namelist:
        pagetext=pagetext+line+"\n"
    print "witing page"
    pagepage.edit(text=pagetext, skipmd5=True, summary="{Task 2 trial) update page")
    print
    #pnt(PYconfig.pagelist)
    print
    pnt(PYconfig.namelist)
    return

def AddTemplate():
    added=0
    print (time.ctime())
    size=len(PYconfig.namelist)
    print size
    for pagetitle in PYconfig.namelist:
        pagetitletext = pagetitle.encode('utf-8')
        pnt(pagetitle)
        pagepage = page.Page(site, pagetitle, True, False) # dont follow redirects!
        pageredir= pagepage.isRedir()
        pagetext = pagepage.getWikiText()
        pnt(pagetext)
        if not check4templates(pagetext):
            go = allow_bots(pagetext, 'RonBot')# does user page allow bots
            if go:
                if re.search(r'[Ss]ource.*?=.*?[Hh]ttps',pagetext):
                    #print"++++++++++++++++++++++++++++++++++++++++"
                    print pagetitletext+ " ADDITION bot allowed on article"
                    pagetext = pagetext+"\n"+"{{flickrreview}}"
                    try:
                        pagepage.edit(text=pagetext, bot=True, summary="(Task 2 Trial) - Addition of flickrreview")
                        added += 1
                        print "writing changed page"
                        pnt(pagetitle)
                    except:
                        print"Failed to write"
                    print"++++++++++++++++++++++++++++++++++++++++"
                else:
                    noperm(pagetitle)
        else:
            print "NO ACTION"
        print "Added", added
    return

def main():
    go = startAllowed() #Check if task is enabled
    if go == "no":
        sys.exit(1)
    searchlist=list()
    title=list()
    PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Files from external sources with reviewed licenses" -incategory:"CC-PD-Mark" -incategory:"PD ineligible"'
    searchlist.append('incategory:"CC-BY-SA-4.0"')
    searchlist.append('incategory:"CC-BY-4.0"')
    searchlist.append('incategory:"CC-BY-SA-3.0"')
    searchlist.append('incategory:"CC-BY-3.0"')
    searchlist.append('incategory:"CC-BY-SA-2.0"')
    searchlist.append('incategory:"CC-BY-2.0"')
    searchlist.append('incategory:"CC-BY-SA-1.0"')
    searchlist.append('incategory:"CC-BY-1.0"')
    searchlist.append('incategory:"CC-Zero"')
    title.append('cc-by-sa-4.0')
    title.append('cc-by-4.0')
    title.append('cc-by-sa-3.0')
    title.append('cc-by-3.0')
    title.append('cc-by-sa-2.0')
    title.append('cc-by-2.0')
    title.append('cc-by-sa-1.0')
    title.append('cc-by-1.0')
    title.append('CC-Zero')
    numlist=9
    #parameters for API request
    PYconfig.pagelist=list()

    PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Flickr images reviewed by File Upload Bot (Magnus Manske)" -incategory:"Flickr images uploaded by Flickr upload bot"'
    PYconfig.basicsearch=PYconfig.basicsearch+' -incategory:"Flickr review needed" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images verified by UploadWizard" -incategory:"Flickr images reviewed by FlickreviewR" -incategory:"Flickr images reviewed by FlickreviewR 2" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images reviewed by trusted users"'
    search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ff]lickr|https:\/\/www.[Ff]lickr|\[[Hh]ttps:\/\/www.[Ff]lickr|www.[Ff]lickr|[Ff]lickr)/'
    #[Ss]ource *\= *(?:\[*Https:\/\/www.[Ff]lickr|\[*https:\/\/www.[Ff]lickr|\[*www.[Ff]lickr|\[*[Ff]lickr)
    x=0
    while x<numlist:
        SearchReplace(searchlist[x], search, title[x],500)
        x=x+1
    writepage("User:RonBot/2/FlickrSource")
    AddTemplate()
    print "End of Main"
       
if __name__ == "__main__":
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        main()