User:DRBot/source/udel archiver.py

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
import re, time

import locale
#locale.setlocale(locale.LC_TIME,('en_US','utf-8'))
MONTHS = ('January', 'February', 'March', 'April', 'May', 
		'June', 'July', 'August', 'October', 
		'November', 'December')

import mwclient

class UndeletionArchiver(object):
	
	def find_archive_sections(self, text):
		r_section = ur'(?is)\{\{(?:u?)delh\}\}.*?\{\{(?:u?)delf\}\}\s*'
		return re.findall(r_section, text)
		
	def find_eligible_sections(self, sections, threshold):
		for section in sections:
			timestamp = self.get_timestamp(section)
			if not timestamp:
				yield section
			elif time.time() - timestamp > threshold:
				yield section
			
	r_timestamp = re.compile(ur'([0-9]{2}\:[0-9]{2}\,' + \
		' [0-9]{1,2} (?:%s) [0-9]{4})' % \
		'|'.join(MONTHS) + ' \(UTC\)\s*$', re.MULTILINE)
	def get_timestamp(self, section):
		timestamps = self.r_timestamp.findall(section)
		timestamps = [time.mktime(time.strptime(timestamp,
			'%H:%M, %d %B %Y')) for timestamp in timestamps]
		timestamps.sort()
		
		if not timestamps:
			return None
		else:
			return timestamps[-1]
	
	def run(self):
		site = mwclient.ex.ConfiguredSite('.config', '.deletion_config')
			
		page = site.Pages['Commons:Undeletion requests/Current requests']
		text = page.edit()
		
		sections = self.find_archive_sections(text)
		archive = []
		for section in self.find_eligible_sections(sections,
				site.config['archival_threshold']):
			archive.append(section)
			text = text.replace(section, '')
			
		archive_page = site.Pages[time.strftime('Commons:Undeletion requests/Archive/%Y-%m')]
		archive_text = archive_page.edit()
		if u'{{Commons:Undeletion requests/Archive/Template}}' not in archive_text:
			archive_text = u'{{Commons:Undeletion requests/Archive/Template}}\n' + archive_text
		if '__NOTOC__' not in archive_text:
			archive_text = '__NOTOC__\n' + archive_text
		archive_text = archive_text + '\n'.join(archive)
		archive_page.save(archive_text, 'Archiving %s threads' % \
			len(archive))
			
		page.save(text, '%s threads archived to [[%s]]' % (
			len(archive), archive_page.name))

if __name__ == '__main__':
	import os
	os.chdir(os.path.abspath(os.path.dirname(__file__)))
	
	bot = UndeletionArchiver()
	bot.run()