User:Inkwina/catlistcount3.py
Jump to navigation
Jump to search
#!/usr/bin/python # -*- coding: UTF-8 -*- import shelve import urllib import simplejson import time import re import mwclient Howmany=200 iconpix=128*128 iconsize=10*1024 Whichcategory='Category:Vector version available' Wheretosave=u'User:Inkwina/Top 200 Non-Icons which have a Vector version available by usage' shelffile="./catlistcount3.cache" # http://commons.wikimedia.org/w/api.php?format=jsonfm&action=query&generator=categorymembers&gcmtitle=Category:Vector%20version%20available&prop=imageinfo&iiprop=size&gcmnamespace=6 apiurl="http://commons.wikimedia.org/w/api.php" apiparams={'format': "json", 'action': "query", 'prop': 'imageinfo', 'iiprop': 'size', 'generator': 'categorymembers', 'gcmnamespace': '6', 'gcmlimit': "50", 'gcmtitle': Whichcategory } checkusageurl="http://tools.wikimedia.de/~daniel/WikiSense/CheckUsage.php" checkusageparams={ 'i': '', #filename 'w': '_wp_20', #which wikis to check (top 20 wikipedias not to kill server) 'x': 'main', #what kind of pages 'r': 'on', #RAW 'b': '1' # not Bulk, we check 1 by 1 } wikire=re.compile('\s*\[([^\]]*)\]\s*(\d*)') ########## datastore=shelve.open(shelffile, writeback=True) if not ("items" in datastore): #newfile datastore["items"]={} ### datastore["wikis"]={} datastore["all-done"] = False else: apiparams["gcmcontinue"]=datastore["query-continue"] #pick up where we left last time print "Continuing from: ",apiparams["gcmcontinue"] while not datastore["all-done"]: checkusageparams['i']='' query=urllib.urlopen(apiurl,urllib.urlencode(apiparams)) data=simplejson.load(query) for item in data["query"]["pages"].itervalues(): # print item['imageinfo'][0]['size'] try: itemsize=item['imageinfo'][0]['width']*item['imageinfo'][0]['height'] #imageinfo returns a list with one dictionary. bleh!! if ((itemsize>iconpix) & (int(item['imageinfo'][0]['size'])>iconsize) ): #pick out non-icons Fname=item["title"].split(':')[-1] Fname=Fname.encode('UTF-8').replace(' ','_') checkusageparams['i'] += Fname+"\n" datastore["items"][Fname]={"countof": {}, "counttotal": 0, "checked": False} print "Added: "+Fname else: print "Skipped: "+item["title"].split(':')[-1] except KeyError: print "Gagged on: ",item datastore.sync() print "--- Cached Data ---" print "From: "+data["query"]["pages"].values()[1]["title"] print "To: "+data["query"]["pages"].values()[-1]["title"] if "query-continue" in data: apiparams["gcmcontinue"]=data["query-continue"]["categorymembers"]["gcmcontinue"].encode("UTF-8") datastore["query-continue"] =apiparams["gcmcontinue"] datastore.sync() else: datastore["all-done"] =True query=urllib.urlopen(checkusageurl,urllib.urlencode(checkusageparams)) for line in query.readlines(): sulfarini=wikire.match(line) if sulfarini != None: print sulfarini.group(1)+" : "+sulfarini.group(2) whichwiki=sulfarini.group(1) else: try: page,image=line.split() except: continue if whichwiki in datastore["items"][image]["countof"]: datastore["items"][image]["countof"][whichwiki]+=1 ### datastore["wikis"][whichwiki]+=1 else: datastore["items"][image]["countof"][whichwiki]=1 ### datastore["wikis"][whichwiki]=1 datastore["items"][image]["counttotal"]+=1 print image+","+whichwiki+","+ str(datastore["items"][image]["countof"][whichwiki])+","+str(datastore["items"][image]["counttotal"]) datastore.sync() time.sleep(2) datastore["wikis"]={} ftotal=0 for item in datastore["items"].itervalues(): ftotal+=int(item["counttotal"]) for w,c in item['countof'].iteritems(): if w in datastore["wikis"]: datastore["wikis"][w]+=c else: datastore["wikis"][w]=c output= """ This Page is an Automatically generated list of the 200 most used Non-Icon Images with a [[:Category:Vector version available|Vector version available]]. For the purpuses of the page an Icon is any image that is less than 128x128px (specifically has less than 16384 pixels) OR is smaller than 10K (10240 bytes) is size. The code for making this list is available [[User:Inkwina/catlistcount3.py|here]] The images are only checked for use in Articles (not talk pages etc.) on the 20 largest wikipedias --{{User|Inkwina}} ---- [[Category:Vector version available|Top 200 Non-Icons which have a Vector version available by usage]] """ output +="\n'''Last Update "+time.strftime("%a, %d %b %Y %H:%M:%S %Z")+"'''\n" wtotal=0 for x in datastore["wikis"].itervalues(): wtotal+=int(x) output += "\n*Items in Total: "+str(len(datastore["items"])) output += "\n**Total use(from wikis) : "+str(wtotal) output += "\n**Total use(from files) : "+str(ftotal) output += "\n----\n" wikisort = [(v, k) for k, v in datastore["wikis"].items()] wikisort.sort() wikisort.reverse() for w,v in wikisort: output += "\n# "+str(v)+": "+str(w) def mycmp(x,y): # print x+" : "+str(datastore["items"][x]["counttotal"]) return cmp(datastore["items"][x]["counttotal"],datastore["items"][y]["counttotal"])*-1 sortall=[x for x in datastore["items"] ] sortall.sort(mycmp) output += "\n<gallery>\n" for x in range(Howmany): output+= "Image:"+sortall[x] output+= "|"+str(x+1)+". Used "+str(datastore["items"][sortall[x]]["counttotal"])+" times [[:Image:"+sortall[x]+"]] " for w,c in datastore["items"][sortall[x]]["countof"].items(): output+=" "+str(w)+": "+str(c)+", " output=output[:-2]+".\n" #count=0 #for k,v in datastore["items"].items(): # if ( v["ns"]==6 and v["counttotal"]<=2 ) : # if ( ("commons.wikimedia.org" in v["countof"] and v["countof"]["commons.wikimedia.org"]<=2) or v["counttotal"]==0 ) : # output+= "Image:"+k # output+= "| Used "+str(v["counttotal"])+" times [[:Image:"+k+"]]\n" # count+=1 output+= "</gallery>\n" print output #print count site = mwclient.Site('commons.wikimedia.org') site.login("", "") page = site.Pages[Wheretosave] page.save(output, summary = u'Inkwina Bot Update')