User:Inkwina/catlistcount2.py
Jump to navigation
Jump to search
#!/usr/bin/python # -*- coding: UTF-8 -*- import shelve import urllib import simplejson import time import re import mwclient Howmany=200 Whichcategory='Category:Vector version available' Wheretosave=u'Top 200 Images which have a Vector version available by usage' shelffile="./catlistcount2.cache" apiurl="http://commons.wikimedia.org/w/api.php" apiparams={'format': "json", 'action': "query", 'list': "categorymembers", 'cmlimit': "50", 'cmprop': 'title', 'cmtitle': Whichcategory } checkusageurl="http://tools.wikimedia.de/~daniel/WikiSense/CheckUsage.php" checkusageparams={'i': '', #filename 'w': '_wp_20', #which wikis to check (top 20 wikipedias not to kill server) 'x': 'main', #what kind of pages 'r': 'on', #RAW 'b': '1' # not Bulk, we check 1 by 1 } wikire=re.compile('\s*\[([^\]]*)\]\s*(\d*)') datastore=shelve.open(shelffile, writeback=True) if not ("items" in datastore): #newfile datastore["items"]={} ### datastore["wikis"]={} datastore["all-done"] = False else: apiparams["cmcontinue"]=datastore["query-continue"] #pick up where we left last time while not datastore["all-done"]: checkusageparams['i']='' query=urllib.urlopen(apiurl,urllib.urlencode(apiparams)) data=simplejson.load(query) for item in data["query"]["categorymembers"]: Fname=item["title"].split(':')[-1] Fname=Fname.encode('UTF-8').replace(' ','_') datastore["items"][Fname]={"ns": item["ns"]} if item["ns"] == 6: #pick out Image: checkusageparams['i'] += Fname+"\n" datastore["items"][Fname]["countof"]={} datastore["items"][Fname]["counttotal"]=0 datastore["items"][Fname]["checked"]=False print "Added: "+Fname datastore.sync() print "--- Cached Data ---" print "From: "+data["query"]["categorymembers"][0]["title"] print "To: "+data["query"]["categorymembers"][-1]["title"] if "query-continue" in data: apiparams["cmcontinue"]=data["query-continue"]["categorymembers"]["cmcontinue"].encode("UTF-8") datastore["query-continue"] =apiparams["cmcontinue"] datastore.sync() else: datastore["all-done"] =True query=urllib.urlopen(checkusageurl,urllib.urlencode(checkusageparams)) for line in query.readlines(): sulfarini=wikire.match(line) if sulfarini != None: print sulfarini.group(1)+" : "+sulfarini.group(2) whichwiki=sulfarini.group(1) else: try: page,file=line.split() except: continue if whichwiki in datastore["items"][file]["countof"]: datastore["items"][file]["countof"][whichwiki]+=1 ### datastore["wikis"][whichwiki]+=1 else: datastore["items"][file]["countof"][whichwiki]=1 ### datastore["wikis"][whichwiki]=1 datastore["items"][file]["counttotal"]+=1 print file+","+whichwiki+","+ str(datastore["items"][file]["countof"][whichwiki])+","+str(datastore["items"][file]["counttotal"]) datastore.sync() time.sleep(2) datastore["wikis"]={} ftotal=0 for item in datastore["items"].itervalues(): if item['ns']==6: ftotal+=int(item["counttotal"]) for w,c in item['countof'].iteritems(): if w in datastore["wikis"]: datastore["wikis"][w]+=c else: datastore["wikis"][w]=c output= """ This Page is an Automatically generated list of the 200 most used Images with a [[:Category:Vector version available|Vector version available]] The code for making this list is available [[User:Inkwina/catlistcount2.py|here]] The images are only checkd for use in Articles (not talk pages etc.) on the 20 largest wikipedias --{{User|Inkwina}} ---- [[Category:Vector version available|* Top 200 by Usage]] """ output +="\n'''Last Update "+time.strftime("%a, %d %b %Y %H:%M:%S %Z")+"'''\n" wtotal=0 for x in datastore["wikis"].itervalues(): wtotal+=int(x) output += "\n*Items in Total: "+str(len(datastore["items"])) output += "\n**Total use(from wikis) : "+str(wtotal) output += "\n**Total use(from files) : "+str(ftotal) output += "\n----\n" wikisort = [(v, k) for k, v in datastore["wikis"].items()] wikisort.sort() wikisort.reverse() for w,v in wikisort: output += "\n# "+str(v)+": "+str(w) def mycmp(x,y): # print x+" : "+str(datastore["items"][x]["counttotal"]) return cmp(datastore["items"][x]["counttotal"],datastore["items"][y]["counttotal"])*-1 sortall=[x for x in datastore["items"] if datastore["items"][x]["ns"]==6] sortall.sort(mycmp) output += "\n<gallery>\n" for x in range(Howmany): output+= "Image:"+sortall[x] output+= "|"+str(x+1)+". Used "+str(datastore["items"][sortall[x]]["counttotal"])+" times [[:Image:"+sortall[x]+"]] " for w,c in datastore["items"][sortall[x]]["countof"].items(): output+=" "+str(w)+": "+str(c)+", " output=output[:-2]+".\n" # print output output+= "</gallery>\n" #print output site = mwclient.Site('commons.wikimedia.org') site.login("usernae","passwrd" page = site.Pages[Wheretosave] page.save(output, summary = u'Inkwina Bot Update')