User:LifeBot/Code

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

# -*- coding: utf-8 -*-
"""
lifebot FAMILIA
Create the subcategory named 'FAMILIA (Indexed)' if it doesn't exist. This cat itself has the tags 'Category:FAMILIA' and 'Category:Indexed plant families'.
Collect all names of images that are included in articles under the category FAMILIA.
For every of these images:
   1. Add tag 'Category:FAMILIA (Indexed)' if it isn't there.
   2. Remove all tags 'Category:FAMILIA' if there. (Alternatively, just rename)

Options:

"""
#
# (C) R Stephan 2006
#
# Distributed under the terms of the GPL2.
# 
__version__ = '0.10'
#

import wikipedia,re,sys,config
import catlib

wikipedia.get_throttle.setDelay(5)
wikipedia.put_throttle.setDelay(10)

msg={
		'en': 'LifeBot:Tree of Life maintenance',
    }

def main (FAMILIA):
# TODO: catch more read/write errors gracefully
	site = wikipedia.getSite()
	pl = catlib.Category (site, 'Category:'+FAMILIA)
	subcats = pl.subcategories (recurse = False);
	
	# Create index cat if it doesn't exist
	indexedcat = 'This category is for photos of '+FAMILIA+' which have been indexed in a '+FAMILIA+""" Commons article.

[[Category: """+FAMILIA+"""]]
[[Category:Plantae by familia (Indexed)]]
"""

	pli = catlib.Category (site, 'Category:'+FAMILIA+' (Indexed)')
	if not pli.exists():
		print '---> Index cat does not exist. Creating... '
		pli.put(indexedcat, 'Maintenance category')
	
	# Get list of pages in FAMILIA category (but not subcategories)
	pages = pl.articles()
	print '---> number of pages in ',FAMILIA,': ',len(pages)
	
	# Read all article pages, make list of images in all article pages
	num_arts = 0
	indexed_images = []
	for page in pages:
		if not page.isImage():
			print '---> Reading article '+ page.aslink().encode(config.console_encoding, 'replace')
			num_arts = num_arts + 1
			indexed_images = indexed_images + page.imagelinks()
	indexed_images = catlib.unique (indexed_images)
	print '---> number of articles in ',FAMILIA,': ',num_arts
	print '---> number of images linked from articles in ',FAMILIA,': ',len(indexed_images)
	
	# Prepare patterns for search/replace
	fam_re = re.compile ('\[\[ *[Cc]ategory *: *%s *\|*.*\]\]' % FAMILIA, re.IGNORECASE)
	fami_str = '[[Category:'+FAMILIA+' (Indexed)]]'
	
	# Make changes to image
	for image in indexed_images:
		try:
			text = image.get()
		except wikipedia.NoPage:
			continue
		changed = True
		if text.find (fami_str) < 0:
			if not fam_re.search (text) == None:
				text = fam_re.sub ('', text)
			text = text + '\n' + fami_str
		else:
			if not fam_re.search (text) == None:
				changed = False
			else:
				text = fam_re.sub ('', text)
		if changed:
			image.put(text, 'Plant image indexed in species article')
			print '---> Changed '+image.aslink().encode(config.console_encoding, 'replace')
		else:
			print '---> Unchanged '+image.aslink().encode(config.console_encoding, 'replace')

if __name__ == '__main__':
	for arg in sys.argv[1:]:
		if arg:
			try:
				main (arg)
			finally:
				wikipedia.stopme()