User:RonBot/1/Source1

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

PYconfig.py

pagelist=list()
basicsearch=""

Main Program

from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import PYconfig


site = wiki.Wiki('https://commons.wikimedia.org/w/api.php') #Tell Python to use the common's API
site.login(userpassbot.username, userpassbot.password) #login

#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))
      
def startAllowed():
    textpage = page.Page(site, "User:RonBot/1/Run").getWikiText()
    if textpage == "Run":
        return "run"
    else:
        return "no"

def allow_bots(text, user):
    user = user.lower().strip()
    text = mwparserfromhell.parse(text)
    for tl in text.filter_templates():
        if tl.name.matches(['bots', 'nobots']):
            break
    else:
        return True
    print "template found" #Have we found one
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
	if param.name == 'allow':
            print "We have an ALLOW" # allow found
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            print "We have a DENY" # deny found
            if ''.join(bots) == 'none':
                print "none - true"
                return True
	    for bot in bots:
                if bot in (user, 'all'):
                    pnt(bot)
                    pnt(user)
                    print "all - false"
                    return False
    if (tl.name.matches('nobots') and len(tl.params) == 0):
        print "match - false"
        return False
    return True

def remove_duplicates(l):
    return list(set(l))

def firstrevision(page):
    params = {'action':'query',
              'prop':'revisions',
              'titles':page,
              'rvlimit':'max'
              }
    req = api.APIRequest(site, params)
    res = req.query(False)
    pageid = res['query']['pages'].keys()[0]
    #print len(res['query']['pages'][pageid]['revisions'])
    first=len(res['query']['pages'][pageid]['revisions'])-1
    timestamp = str(res['query']['pages'][pageid]['revisions'][first]['timestamp'])
    #print
    m = re.search(r'(.*?)T', timestamp)
    datebit = m.group(1)
    print datebit
    return datebit


def lastrevision(page):
    params = {'action':'query',
              'prop':'revisions',
              'titles':page,
              'rvlimit':'1'
              }
    req = api.APIRequest(site, params)
    res = req.query(False)
    pageid = res['query']['pages'].keys()[0]
    timestamp = str(res['query']['pages'][pageid]['revisions'][0]['timestamp'])
    m = re.search(r'(.*?)T', timestamp)
    datebit = m.group(1)
    #print date
    return datebit

def SearchReplace(search1, search2, title, size):
    #:/[Ss]ource *\= *w*\.*[Tt]witter/
    processed=0
    lastContinue='0'
    #print PYconfig.basicsearch
    print"============================================"
    searchstr=PYconfig.basicsearch + " " + search1 + " " + search2+' -incategory: "Extracted images"'
    print "search = ", searchstr
    while True:
        params = {'action':'query',
                 'list':'search',
                 'srsearch':searchstr, 
                 'srnamespace':6,
                 'srlimit':size,
                 'sroffset':lastContinue
                 }
        #print searchstr
        print "SR.params"
        result="" #clear out previous run
        request = api.APIRequest(site, params) #Set the API request
        print "SR.request"
        result = request.query(False)
        #print result
        totalhits=result['query']['searchinfo']['totalhits']
        #print "search", search
        print "TotalHits this search", totalhits
        size=totalhits
        if totalhits>500:
            size=totalhits-processed
            processed=processed+500
            if size>500:
                size=500
        print totalhits, size, processed
        if totalhits>0:
            for loopvar in range(0, size):
                #print loopvar,
                #print ""
                try:
                    pagetitle = result['query']['search'][loopvar]['title']
                except:
                    pagetitle="Not Found"
                #pnt(pagetitle)

                #timestamp = str(result['query']['search'][loopvar]['timestamp'])
                #m = re.search(r'(.*?)T', timestamp)
                #datepart = m.group(1)



                    
                datepart=firstrevision(pagetitle)
                PYconfig.pagelist.append("* "+datepart+" "+title+" [[:"+pagetitle+"]]")
        try:
            lastContinue = result['continue']['sroffset']
            print "continue"
        except:
            print "End of Cat"
            break
    return 

def writepage(title):
    pagetitle=title
    pagepage = page.Page(site, pagetitle)
    pagetext=""
    remove_duplicates(PYconfig.pagelist)
    PYconfig.pagelist.sort(reverse=True)
    for line in PYconfig.pagelist:
        if 'phillip medhurst' not in line.lower():
            pagetext=pagetext+line+"\n"
    print "witing page"
    pagepage.edit(text=pagetext, skipmd5=True, summary="update page")


def main():
    go = startAllowed() #Check if task is enabled
    if go == "no":
        sys.exit('Disabled Task')
    searchlist=list()
    title=list()
    PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Files from external sources with reviewed licenses" -incategory:"CC-PD-Mark" -incategory:"PD ineligible"'
    searchlist.append('incategory:"CC-BY-SA-4.0"')
    searchlist.append('incategory:"CC-BY-4.0"')
    searchlist.append('incategory:"CC-BY-SA-3.0"')
    searchlist.append('incategory:"CC-BY-3.0"')
    searchlist.append('incategory:"CC-BY-SA-2.0"')
    searchlist.append('incategory:"CC-BY-2.0"')
    searchlist.append('incategory:"CC-BY-SA-1.0"')
    searchlist.append('incategory:"CC-BY-1.0"')
    searchlist.append('incategory:"CC-Zero"')
    title.append('cc-by-sa-4.0')
    title.append('cc-by-4.0')
    title.append('cc-by-sa-3.0')
    title.append('cc-by-3.0')
    title.append('cc-by-sa-2.0')
    title.append('cc-by-2.0')
    title.append('cc-by-sa-1.0')
    title.append('cc-by-1.0')
    title.append('CC-Zero')
    numlist=9
    #parameters for API request
    PYconfig.pagelist=list()
    search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ff]acebook|https:\/\/www.[Ff]acebook|Https:\/\/[Ff]acebook|https:\/\/[Ff]acebook|www.[Ff]acebook|[Ff]acebook)/'
    x=0
    while x<numlist:
        SearchReplace(searchlist[x],search, title[x],500)
        x=x+1
    writepage("user:RonBot/FacebookSource")
    PYconfig.pagelist=list()
    
    search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ii]nstagram|https:\/\/www.[Ii]nstagram|Https:\/\/[Ii]nstagram|https:\/\/[Ii]nstagram|www.[Ii]nstagram|[Ii]nstagram)/'
    x=0
    while x<numlist:
        SearchReplace(searchlist[x],search, title[x],500)
        x=x+1
    writepage("user:RonBot/InstagramSource")
    PYconfig.pagelist=list()
        
    search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Gg]oogle|https:\/\/www.[Gg]oogle|www.[Gg]oogle|[[Gg]oogle)/ -insource:/[Ss]ource *\= *w*\.*[Gg]oogle [Aa]rt/ -insource:/[Ss]ource *\= *w*\.*[Gg]oogle [Bb]ooks/'
    x=0
    while x<numlist:
        SearchReplace(searchlist[x], search, title[x],500)
        x=x+1
    writepage("user:RonBot/GoogleSource")
    PYconfig.pagelist=list()
    
    search='insource:"{{FlickrVerifiedByUploadWizard|Public Domain Mark}}" -incategory:"Flickr public domain images needing specific copyright tags"'
    x=0
    while x<numlist:
        SearchReplace(searchlist[x], search, title[x],500)
        x=x+1
    writepage("user:RonBot/PDMSource")
    PYconfig.pagelist=list()
   
    search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Tt]witter|https:\/\/www.[Tt]witter|Https:\/\/[Tt]witter|https:\/\/[Tt]witter|www.[Tt]witter|[Tt]witter)/'
    x=0
    while x<numlist:
        SearchReplace(searchlist[x], search, title[x],500)
        x=x+1
    writepage("user:RonBot/TwitterSource")
    PYconfig.pagelist=list()

    PYconfig.basicsearch='-incategory:"Items with OTRS permission confirmed" -incategory:"Flickr images reviewed by File Upload Bot (Magnus Manske)" -incategory:"Flickr images uploaded by Flickr upload bot"'
    PYconfig.basicsearch=PYconfig.basicsearch+' -incategory:"Flickr review needed" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images verified by UploadWizard" -incategory:"Flickr images reviewed by FlickreviewR" -incategory:"Flickr images reviewed by FlickreviewR 2" -incategory:"Files from external sources with reviewed licenses" -incategory:"Flickr images reviewed by trusted users"'
    search='insource:/[Ss]ource *\= *(?:Https:\/\/www.[Ff]lickr|https:\/\/www.[Ff]lickr|www.[Ff]lickr|[Ff]lickr)/'
    x=0
    while x<numlist:
        SearchReplace(searchlist[x], search, title[x],500)
        x=x+1
    writepage("user:RonBot/FlickrSource")
    PYconfig.pagelist=list()
       
if __name__ == "__main__":
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        main()