User:Inkwina/catlistcount.py
Jump to navigation
Jump to search
#!/usr/bin/python # -*- coding: UTF-8 -*- import shelve import urllib import simplejson import time import re import mwclient Howmany=200 Whichcategory='Category:Images that should use vector graphics' Wheretosave=u'Top 200 Images that should use vector graphics by usage' shelffile="./catlistcount.cache" apiurl="http://commons.wikimedia.org/w/api.php" apiparams={'format': "json", 'action': "query", 'list': "categorymembers", 'cmlimit': "50", 'cmprop': 'title', 'cmtitle': Whichcategory } checkusageurl="http://toolserver.org/~daniel/WikiSense/CheckUsage.php" checkusageparams={'i': , #filename 'w': '_wp_20', #which wikis to check (top 20 wikipedias not to kill server) 'x': 'main', #what kind of pages 'r': 'on', #RAW 'b': '1' # not Bulk, we check 1 by 1 } wikire=re.compile('\s*\[([^\]]*)\]\s*(\d*)') datastore=shelve.open(shelffile, writeback=True) if not ("items" in datastore): #newfile datastore["items"]={} ### datastore["wikis"]={} datastore["all-done"] = False else: apiparams["cmcontinue"]=datastore["query-continue"] #pick up where we left last time while not datastore["all-done"]: checkusageparams['i']= query=urllib.urlopen(apiurl,urllib.urlencode(apiparams)) data=simplejson.load(query) for item in data["query"]["categorymembers"]: Fname=item["title"].split(':')[-1] Fname=Fname.encode('UTF-8').replace(' ','_') datastore["items"][Fname]={"ns": item["ns"]} if item["ns"] == 6: #pick out Image: checkusageparams['i'] += Fname+"\n" datastore["items"][Fname]["countof"]={} datastore["items"][Fname]["counttotal"]=0 datastore["items"][Fname]["checked"]=False print "Added: "+Fname datastore.sync() print "--- Cached Data ---" print "From: "+data["query"]["categorymembers"][0]["title"] print "To: "+data["query"]["categorymembers"][-1]["title"] if "query-continue" in data: apiparams["cmcontinue"]=data["query-continue"]["categorymembers"]["cmcontinue"].encode("UTF-8") datastore["query-continue"] =apiparams["cmcontinue"] datastore.sync() else: datastore["all-done"] =True query=urllib.urlopen(checkusageurl,urllib.urlencode(checkusageparams)) for line in query.readlines(): sulfarini=wikire.match(line) if sulfarini != None: print sulfarini.group(1)+" : "+sulfarini.group(2) whichwiki=sulfarini.group(1) else: try: page,file=line.split() except: continue if whichwiki in datastore["items"][file]["countof"]: datastore["items"][file]["countof"][whichwiki]+=1 ### datastore["wikis"][whichwiki]+=1 else: datastore["items"][file]["countof"][whichwiki]=1 ### datastore["wikis"][whichwiki]=1 datastore["items"][file]["counttotal"]+=1 print file+","+whichwiki+","+ str(datastore["items"][file]["countof"][whichwiki])+","+str(datastore["items"][file]["counttotal"]) datastore.sync() time.sleep(2) datastore["wikis"]={} ftotal=0 for item in datastore["items"].itervalues(): if item['ns']==6: ftotal+=int(item["counttotal"]) for w,c in item['countof'].iteritems(): if w in datastore["wikis"]: datastore["wikis"][w]+=c else: datastore["wikis"][w]=c output= """ This Page is an Automatically generated list of the 200 most used Images that should use vector graphics The code for making this list is available here The images are only checkd for use in Articles (not talk pages etc.) on the 20 largest wikipedias --Inkwina (talk · contribs)
""" output +="\nLast Update "+time.strftime("%a, %d %b %Y %H:%M:%S %Z")+"\n" wtotal=0 for x in datastore["wikis"].itervalues(): wtotal+=int(x) output += "\n*Items in Total: "+str(len(datastore["items"])) output += "\n**Total use(from wikis) : "+str(wtotal) output += "\n**Total use(from files) : "+str(ftotal) output += "\n----\n" wikisort = [(v, k) for k, v in datastore["wikis"].items()] wikisort.sort() wikisort.reverse() for w,v in wikisort: output += "\n# "+str(v)+": "+str(w) def mycmp(x,y): # print x+" : "+str(datastore["items"][x]["counttotal"]) return cmp(datastore["items"][x]["counttotal"],datastore["items"][y]["counttotal"])*-1 sortall=[x for x in datastore["items"] if datastore["items"][x]["ns"]==6] sortall.sort(mycmp) output += "\n
-
"+str(x+1)+". Used "+str(datastore["items"][sortall[x]]["counttotal"])+" times [[:Image:"+sortall[x]+"]] "