User:LifeBot/Code
Jump to navigation
Jump to search
# -*- coding: utf-8 -*- """ lifebot FAMILIA Create the subcategory named 'FAMILIA (Indexed)' if it doesn't exist. This cat itself has the tags 'Category:FAMILIA' and 'Category:Indexed plant families'. Collect all names of images that are included in articles under the category FAMILIA. For every of these images: 1. Add tag 'Category:FAMILIA (Indexed)' if it isn't there. 2. Remove all tags 'Category:FAMILIA' if there. (Alternatively, just rename) Options: """ # # (C) R Stephan 2006 # # Distributed under the terms of the GPL2. # __version__ = '0.10' # import wikipedia,re,sys,config import catlib wikipedia.get_throttle.setDelay(5) wikipedia.put_throttle.setDelay(10) msg={ 'en': 'LifeBot:Tree of Life maintenance', } def main (FAMILIA): # TODO: catch more read/write errors gracefully site = wikipedia.getSite() pl = catlib.Category (site, 'Category:'+FAMILIA) subcats = pl.subcategories (recurse = False); # Create index cat if it doesn't exist indexedcat = 'This category is for photos of '+FAMILIA+' which have been indexed in a '+FAMILIA+""" Commons article. [[Category: """+FAMILIA+"""]] [[Category:Plantae by familia (Indexed)]] """ pli = catlib.Category (site, 'Category:'+FAMILIA+' (Indexed)') if not pli.exists(): print '---> Index cat does not exist. Creating... ' pli.put(indexedcat, 'Maintenance category') # Get list of pages in FAMILIA category (but not subcategories) pages = pl.articles() print '---> number of pages in ',FAMILIA,': ',len(pages) # Read all article pages, make list of images in all article pages num_arts = 0 indexed_images = [] for page in pages: if not page.isImage(): print '---> Reading article '+ page.aslink().encode(config.console_encoding, 'replace') num_arts = num_arts + 1 indexed_images = indexed_images + page.imagelinks() indexed_images = catlib.unique (indexed_images) print '---> number of articles in ',FAMILIA,': ',num_arts print '---> number of images linked from articles in ',FAMILIA,': ',len(indexed_images) # Prepare patterns for search/replace fam_re = re.compile ('\[\[ *[Cc]ategory *: *%s *\|*.*\]\]' % FAMILIA, re.IGNORECASE) fami_str = '[[Category:'+FAMILIA+' (Indexed)]]' # Make changes to image for image in indexed_images: try: text = image.get() except wikipedia.NoPage: continue changed = True if text.find (fami_str) < 0: if not fam_re.search (text) == None: text = fam_re.sub ('', text) text = text + '\n' + fami_str else: if not fam_re.search (text) == None: changed = False else: text = fam_re.sub ('', text) if changed: image.put(text, 'Plant image indexed in species article') print '---> Changed '+image.aslink().encode(config.console_encoding, 'replace') else: print '---> Unchanged '+image.aslink().encode(config.console_encoding, 'replace') if __name__ == '__main__': for arg in sys.argv[1:]: if arg: try: main (arg) finally: wikipedia.stopme()