User:Dispenser/Absurd overhead.py

#!/usr/bin/env python
# -*- coding: utf-8  -*-
"""
Absurd Overhead

usage: python absurd_overhead.py [language] [img_name|file page id]

# Ubuntu dependancies 
#      Programs: exiftool, jpegtran, optipng, unrar-nonfree, identify, pngcheck, gifsicle
sudo apt-get install libimage-exiftool-perl libjpeg-progs optipng unrar imagemagick pngcheck, gifsicle
sudo pip install phpserialize oursql

# NON-FREE SOFTWARE, DO NOT USE ON WIKIMEDIA SERVERS
# pngout <http://www.advsys.net/ken/utils.htm> [Freeware]
wget http://static.jonof.id.au/dl/kenutils/pngout-20150319-linux.tar.gz -O - | tar -zxv
cd pngout-20150319-linux/ # Find your files for your system
# sudo cp . /opt
"""
import hashlib, mmap, os, re, time, urllib
import dbm, oursql, shutil, subprocess
import phpserialize
try:
	# https://github.com/toollabs/embeddeddata
	from detection import detect
except ImportError:
	detect = None
os.sys.path.append(os.path.expanduser('~/pywikibot/'))
StartTime=time.time()
os.nice(10) # Lower CPU

# Configuration
skip_ifunder   = 250*1024
lang           = (os.sys.argv[1:2] or ['commons'])[0]
test_img_name  = (os.sys.argv[2:3] or [''])[0]
log_db         = dbm.open(os.path.expanduser('./absurd_cache'), 'c')
download_dir   = os.path.expanduser('/user-data/images/%s/'%lang)
output_file    = os.path.expanduser('./Absurd_overhead.%s.txt'%lang)
overhead_table = 'u2815__.file_overhead'
os.chdir( './' ) # Change working directory


# Magic numbers
magic_numbers = {
	# Hidden archive formats
	'7zip': (b'7z\xBC\xAF\x27\x1C',                 '7z archive', ),
	'rar4': (b'Rar!\x1A\x07\x00',                   'RAR 1.5 to 4.0', ),
	'rar5': (b'Rar!\x1A\x07\x01\x00',               'RAR 5+', ),
	#'tar':  (b'ustar  \x00'                         'Tar archive', ),
	'tar':  (b'ustar\x0000'                         'Tar archive', ),
	'zip':  (b'PK\x03\x04',                         'ZIP archive', ),
	'zips': (b'PK\x07\x08',                         'ZIP spanned archive', ),
	# too short
	#'bz2':  (b'BZh',                                'bzip2 archive' ), # BHh[1-9]
	#'gz':   (b'\x1F\x8B\x08',                       'GZip', ),
	# Metadata
	'exif': (b'EXIF\x00', ''),
	'jfif': (b'JFIF\x00',                           ''),
	'xmp':  (b'<x:xmpmeta', ''),
	'icc':  (b'ICC_PROFILE',                        'ICC profile', ),
	# Misc
	'asf':  (b'\x30\x26\xB2\x75\x8E\x66\xCF\x11',   'WMA/ASF media', ),# http://www.digitalpreservation.gov/formats/fdd/fdd000027.shtml 
	'wmv':  (b'\x30\x26\xB2\x75\x8E\x66\xCF',       'Windows Video file', ),
	'msi':  (b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1',   'Microsoft Office file/MSI', ),
	'mkv':  (b'\x1A\x45\xDF\xA3',                   'Matroska/WebM video', ),
	'djvu': (b'AT&TFORM',                           'DjVu document',),
	
	'jp2':  (b'\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A', 'JPEG 2000 graphic file', ),
	'gif9': (b'GIF89',                              'GIF graphic file', ),
	'gif':  (b'GIF87',                              'GIF graphic file', ),
	'eps':  (b'%!PS-Adobe-3.0 EPSF-3 0',            'EPS File', ),
	'pdf':  (b'%PDF',                               'PDF Document', ),
	'docx': (b'\x50\x4B\x03\x04PK',                 'Office 2010 file', ),
	# MPEG 4 seems to be r'...ftyp\w{2,4}' http://www.ftyps.com/
	'mp4':  (b'ftyp',                           'MPEG-4 video|QuickTime file', ),
}


connections = {}
def getConn(dbname, host=None, reconnect=False):
	try: connections[host,dbname].ping()
	except: reconnect = True
	if (host,dbname) not in connections or reconnect:
		connections[host,dbname] = oursql.connect(
			db=dbname,
			host=host or dbname[:-2]+'.labsdb',
			read_default_file=os.path.expanduser('~/.my.cnf'),
			charset=None,
			compress=True,
			use_unicode=False,
			autoping=True
		)
	return connections[host,dbname]

def base36encode(number):
	alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
	base36 = ''
	while number:
		number, i = divmod(number, 36)
		base36 = alphabet[i] + base36
	return base36.zfill(31)

def sha1file(filepath):
	sha1 = hashlib.sha1()
	with open(filepath, 'rb') as f:
		sha1.update(f.read())
	return sha1.hexdigest()

def sha1file36(filepath):
	return base36encode(int(sha1file(filepath), 16))

def mkdir_p(path):
	import errno
	try:
		os.makedirs(path)
	except OSError as exc:  # Python >2.5
		if exc.errno == errno.EEXIST and os.path.isdir(path):
			pass
		else:
			raise

def reduction(end_size, img_size):
	return b'%+2.0f%%' % (100.0*(end_size-img_size)/float(img_size),) if end_size >= 0 else b'-'

def put_pywikibot(lang, title, new_text, summary=None, prompt=True):
	import pywikibot
	site = pywikibot.Site('commons', 'commons') if lang=='commons' else pywikibot.Site(lang)
	page = pywikibot.Page(site, title)
	new_text = re.sub(ur'^\{\|.*?^\|\}$', "", page.get(), flags=re.U | re.M | re.DOTALL) + new_text
	if new_text.count('\n') - page.get().count('\n') > -200:
		pywikibot.showDiff(page.get(), new_text)
	site.login()
	summary = ''#raw_input('Summary for edit:') or 'Update '
	print page.title(asLink=True)
	if page.botMayEdit():
		if summary and (raw_input('Save to wiki as User:%s? [yes/No] ' % (site.user(), )) in ('yes', 'y')):
				pywikibot.config.simulate = True
				#page.put(new_text, summary, minorEdit=False, botflag=False)


# Pregenerate table for quick debugging
def cache_images(cursor):
 	cursor.execute('''
-- ; mysql -h commonswiki.labsdb commonswiki_p
DROP TABLE IF EXISTS u2815__.absurd_images;
-- ''');cursor.execute('''
/* absurd_images 15 min <SLOW_OK> */
CREATE TABLE u2815__.absurd_images (
  ao_img_name VARBINARY(255) NOT NULL PRIMARY KEY
) ENGINE=MyISAM AS
SELECT img_name AS ao_img_name
FROM image 
WHERE img_media_type="BITMAP" 
  AND img_major_mime="image"
  AND img_minor_mime IN ("jpeg", "png")
  AND img_size > IF(img_minor_mime="jpeg", 3,
    IF(img_metadata LIKE '%s:16:"truecolour-alpha"%', 4,
      IF(img_bits<8 OR img_metadata LIKE '%s:14:"index-coloured"%' OR img_metadata LIKE '%s:9:"greyscale"%', 1, 3)
    ) * img_bits / 8
  ) * img_width * img_height + 16*1024;
''')

def main():
	cursor = getConn('%swiki_p'%lang).cursor()
	cursor.execute('CREATE DATABASE IF NOT EXISTS '+overhead_table.partition('.')[0])
	query_where = ["img_size > ?"]
	query_data  = [skip_ifunder]
	if test_img_name:
		if test_img_name.isdigit():
			query_where.append("page_id=?")
			query_data.append(test_img_name)
		else:
			query_where.append("img_name=?")
			query_data.append(test_img_name)
	else:
		if lang=='commons':
			#cursor.execute('DROP TABLE IF EXISTS '+overhead_table)
			#cache_images(cursor)
			pass
	cursor.execute('''
CREATE TABLE IF NOT EXISTS '''+overhead_table+''' (
  fo_page INT NOT NULL,
  fo_sha1 VARCHAR(32) NOT NULL PRIMARY KEY,
  fo_icc_size INT,
  fo_identify_size INT,
  fo_exiftool_size INT,
  fo_size INT NOT NULL
);''')
	cursor.execute('''/* absurd_images 20 min <SLOW_OK> */
SELECT 
  img_name, 
  img_size,
  img_width,
  img_height,
  img_bits,
  IF(img_minor_mime="jpeg", 3, /* Workaround for [[phab:T132986]] */
    IF(img_metadata LIKE '%s:16:"truecolour-alpha"%', 4,
      IF(img_bits<8 OR img_metadata LIKE '%s:14:"index-coloured"%' OR img_metadata LIKE '%s:9:"greyscale"%', 1, 3)
    ) * img_bits / 8
  ) * img_width * img_height + IFNULL(fo_size, 50*1024) AS est_size,
  img_minor_mime,
  img_metadata,
  user_name,
  user_editcount,
  img_timestamp,
  img_sha1,
  EXISTS (SELECT 1 FROM ipblocks_ipindex WHERE ipb_user=user_id AND (ipb_expiry="infinity" OR ipb_expiry>NOW()) LIMIT 1) AS user_block,
  fo_size,
  '''+('(SELECT COUNT(*) FROM globalimagelinks WHERE gil_to=img_name)' if lang=='commons' else '-1')+''' AS img_usage
FROM image '''+
('' if lang!='commons' else 'JOIN u2815__.absurd_images      ON img_name=ao_img_name /*quicker debugging*/')+
'''
JOIN page                        ON page_namespace=6 AND page_title=img_name
LEFT JOIN user                   ON user_id=img_user
LEFT JOIN '''+overhead_table+''' ON fo_sha1=img_sha1
LEFT JOIN categorylinks          ON cl_from=page_id AND cl_to IN ("Animated_PNG", "Fireworks_PNG_files")
WHERE img_width>0 AND img_height>0
  AND img_media_type="BITMAP" AND img_major_mime="image"
  AND img_minor_mime IN ("jpeg", "png")
  AND '''+' AND '.join(query_where)+'''
  AND img_size * 0.996 > IF(img_minor_mime="jpeg", 3,
    IF(img_metadata LIKE '%s:16:"truecolour-alpha"%', 4,
      IF(img_bits<8 OR img_metadata LIKE '%s:14:"index-coloured"%' OR img_metadata LIKE '%s:9:"greyscale"%', 1, 3)
    ) * img_bits / 8
  ) * img_width * img_height + IFNULL(fo_size, 0) + 4 * 1024
  AND cl_from IS NULL
ORDER BY CAST(img_size AS SIGNED)-est_size DESC
LIMIT 50000;
''', tuple(query_data))
	print 'Queried for images in %2.4g minutes, %swiki, rows: %s'%((time.time()-StartTime)/60.0,lang,cursor.rowcount)
	
	
	
	f = open(output_file, 'w+b')
	f.write(b'\xEF\xBB\xBF')
	def write(text):
		f.write(text.encode('utf-8') if isinstance(text, unicode) else str(text))
		f.write(b'\n')

	write(b"""\
{| class="wikitable sortable plainlinks" style="text-align:center"
|-
! Name
! Date
! Size (KB)
! BMP
! Zip
! Trim
! Opti
! Links
! Usage
! Uploader
! Notes""")

	count = 0
	for img_name, img_size, img_width, img_height, img_bits, est_size, img_minor_mime, img_metadata, user_name, user_editcount, img_timestamp, img_sha1, user_block, fo_size, img_usage in cursor.fetchall():
		if img_sha1+'.skip' in log_db:
			# We skipped this before
			continue
		notes = []
		print
		print img_name.center(len(img_name)+2).decode('utf-8').center(79, '=').encode('utf-8')
		
		metadata = {}
		if img_metadata not in ('', '-1', '0'): # https://phabricator.wikimedia.org/T155741
			try:
				metadata = phpserialize.loads(img_metadata)
			except ValueError as e:
				print 'img_metadata decode error:', e
		
		# Check for animation
		img_framecount = metadata.get('frameCount', 1)
		if img_framecount > 1:
			print 'Skipping animation [[%s]] (%s frames)' % (img_name, img_framecount)
			continue
		
		# Use color channels from MediaWiki
		if   metadata.get('colorType') in (b'index-coloured', 'greyscale') or img_bits < 8:
			img_channels = 1
		elif metadata.get('colorType') == b'truecolour':
			img_channels = 3
		elif metadata.get('colorType') == b'truecolour-alpha':
			img_channels = 4
		elif metadata.get('colorType'):
			raise Exception('Unknown colorType: %s' % metadata.get('colorType'))
		else: # Assume 3 channels
			img_channels = 1 if img_minor_mime=='gif' else 3 if img_minor_mime=='jpeg' else 4
		
		# Make image URL
		img_name_md5 = hashlib.md5(img_name).hexdigest()
		mw_url   = 'https://upload.wikimedia.org/wikipedia/%s/%s/%s/%s' % (
			lang,
			img_name_md5[0:1],
			img_name_md5[0:2],
			urllib.quote(img_name),
		)
		mkdir_p(download_dir)
		if os.path.exists('absurd.img'):
			os.remove('absurd.img')
		# Do we already have the file?
		if os.path.isfile(os.path.join(download_dir, img_name)):
			# Is it the right size?
			new_name = os.path.join(download_dir, img_name)
			if os.path.getsize(new_name) != img_size:
				print 'File size mismatch: DL %s != SQL %s for %s' % (os.path.getsize(new_name), img_size, img_name)
				while os.path.exists(new_name):
					new_name += '_'
				os.rename(os.path.join(download_dir, img_name),os.path.join(download_dir, new_name))
		# Either (re)download it or copy it over
		if not os.path.isfile(os.path.join(download_dir, img_name)):
			ec = os.system('wget "%s" --output-document="absurd.img" --no-clobber --limit-rate=2M'%(mw_url,))
			if ec: raise ec
			
			shutil.copy2('absurd.img', os.path.join(download_dir, img_name))
		else:
			shutil.copy2(os.path.join(download_dir, img_name), 'absurd.img')
		
		# SHA1 check
		if sha1file36('absurd.img') != img_sha1:
			print sha1file36('absurd.img'), '!=', img_sha1
			raise ValueError('SHA-1 hash mismatch for [[File:%s]]' % img_name)
		
		# Find metadata overhead
		if fo_size == None:
			# ImageMagick's identify (better for JPEGs)
			identify_size = 0
			fo_icc_size = None
			p7zip = subprocess.Popen(
				['identify', '-verbose', 'absurd.img'],
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE
			)
			stdout, stderr = p7zip.communicate()
			if stderr or p7zip.returncode:
				print stderr
				print 'Exit code:', p7zip.returncode
			for m in re.finditer(r' *(.*?): (\d+) bytes', stdout):
				print m.group()
				identify_size += int(m.group(2))
				if m.group(1) == 'Profile-icc':
					fo_icc_size = int(m.group(2))
			# ExifTool (better for PNGs)
			exiftool_size = 0
			exif= subprocess.Popen(
				['exiftool', '-a', '-b', 'absurd.img'],
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE
			)
			#{
			#	'Profile-8bim': '',
			#	'Profile-APP3': '',
			#	'Profile-exif': '-exif',
			#	'Profile-icc':  '-icc_profile',
			#	'Profile-iptc': '-iptc',
			#	'Profile-xmp':  '-xmp',
			#}
			stdout, stderr = exif.communicate()
			if stderr:
				print 'ERROR', stderr
			if exif.returncode:
				print 'Exit code:', exif.returncode, ' xmp bytes: ', len(stdout)
			elif stdout:
				exiftool_size = len(stdout)
				print '\tExifTool: %d bytes' % (exiftool_size,)
				#print 'stdout', stdout, 
			
			fo_size = identify_size if img_minor_mime=='jpeg' else exiftool_size
			cursor = getConn('%swiki_p'%lang).cursor()
			cursor.execute(
				"INSERT INTO "+overhead_table+
				"(fo_page, fo_sha1, fo_icc_size, fo_identify_size, fo_exiftool_size, fo_size) "+
				"VALUES (?, ?, ?, ?, ?, ?) "+
				"ON DUPLICATE KEY UPDATE fo_size=fo_size",
				(0, img_sha1, fo_icc_size, identify_size, exiftool_size, fo_size)
			)
		
		if img_minor_mime=='png':
			pngcheck = subprocess.Popen(
				['pngcheck', '-v', 'absurd.img'],
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE
			)
			stdout, stderr = pngcheck.communicate()
			if 'Macromedia Fireworks private' in stdout and not notes:
				print 'Skipping Fireworks file'
				log_db[img_sha1+'.skip'] = "Fireworks"
				continue
			elif 'Microsoft Picture It private' in stdout and not notes:
				print 'Skipping Picture It! file'
				log_db[img_sha1+'.skip'] = "PictureIt"
				continue
			else:
				print stdout if stdout.count('\n') < 10 else stdout[stdout.rstrip().rfind('\n')+1:]
			if 'ERRORS DETECTED' in stdout:
				notes.append(stdout.strip().split('\n')[-2].strip())
			if stderr or pngcheck.returncode:
				print stderr
				print '!'*60
			if '-bit RGB+alpha, ' in stdout:
				img_channels = 4
			elif '-bit RGB, 'in stdout:
				img_channels = 3
			elif '-bit palette, ' in stdout:
				img_channels = 1
			elif '-bit grayscale, ' in stdout:
				img_channels = 1
			else:
				print stdout
				print stderr
				raise Exception('Unrecongized pngcheck -v output')
		
		def magicSearch(needle):
			with open('absurd.img', 'r+b') as f:
				data   = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
				result = data.find(needle)
				return result != -1
		magic = dict((k, magicSearch(v[0])) for k,v in magic_numbers.iteritems())
		
		if any(magic.values()):
			print 'Magic numbers: %s'%(', '.join((str(k) for k,v in magic.iteritems() if v)))
		
		# Check common archive formats
		is_7z_file = 0
		is_rar_file = 0
		is_encrypted = False
		if magic['rar4'] or magic['rar5']:
			unrar = subprocess.Popen(
				['unrar-nonfree', 'Lb', '-ppassword', 'absurd.img'],
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE
			)
			stdout, stderr = unrar.communicate()
			is_rar_file = stdout.count('\n')
			print stdout, stderr
			print 'RAR: v1-4:%5s   v5+:%5s   %3s files ' % (magic['rar4'], magic['rar5'], is_rar_file,)
			if unrar.returncode:
				print '!'*60
				print unrar.returncode
				raise Exception(stdout)
		
		if magic['7zip'] or (not is_rar_file and magic['rar4']) \
		or magic['zip'] or magic['zips']:
			p7zip = subprocess.Popen(
				['7z', 'l', '-p', 'absurd.img'],
				stdout=subprocess.PIPE,
				stderr=subprocess.PIPE
			)
			stdout, stderr = p7zip.communicate()
			print stdout, stderr, 'exit code: ', p7zip.returncode
			m = re.search(r'\s(\d) files, ', stdout)
			is_7z_file = m.group(1) if m else 0
			print '7z: %s   %3s files ' % (magic['7zip'], is_7z_file,)
			if 'encrypted archive' in stdout:
				is_encrypted = True
			if p7zip.returncode:
				print '!'*30
				#raise Exception(p7zip.returncode)
			
		# Recompute with new img_channels and overhead adjustment
		est_size = img_width * img_height * img_channels * img_bits/8.0 + fo_size + 4 * 1024
		
		# Identify PNG channels, oversized?
		if est_size >= img_size * 0.996:
			if 	is_encrypted or is_rar_file or is_7z_file:
				raise Exception('found something strange')
			if not notes:
				print 'Skipping RGBA' if img_channels > 3 else 'Skipping overhead adjusted', '{:,} < {:,}'.format(img_size, est_size)
				continue
			else:
				print 'Would skip, but note: %s'%(notes,)
				
		
		# Test entropy with DEFLATE
		if img_sha1+'.compressed'  not in log_db:
			os.system('gzip -9 < absurd.img > compressed.img')
			log_db[img_sha1+'.compressed'] = str(os.path.getsize('compressed.img'))
			os.remove('compressed.img')
		cmpr_size = int(log_db[img_sha1+'.compressed'])
		
		# Format specific for trimmers and optimizers
		if img_sha1+'.optimized' not in log_db or img_sha1+'.trimmed' not in log_db:
			if img_minor_mime=='jpeg':
				_=subprocess.call('jpegtran -copy all absurd.img > trimmed.img', shell=True)
				_=subprocess.call('jpegtran -copy all -optimize absurd.img > optimized.img', shell=True)
			elif img_minor_mime=='png':
				# trim
				_=subprocess.call('pngout -force -ks -s4 -y absurd.img trimmed.img', shell=True)
				if _: # PNGOUT error
					print('PNGOUT exit code: %s'%(_,))
					shutil.copy2('absurd.img', 'trimmed.img.png')
				os.system('mv trimmed.img.png trimmed.img')
					
				# optimize
				_=subprocess.call('optipng -fix -force -quiet absurd.img -out optimized.img', shell=True)
				_=subprocess.call('pngout -s0 -q -y optimized.img optimized.img.png', shell=True)
				os.system('mv -f optimized.img.png optimized.img')
			#elif img_minor_mime=='gif':
			#	subprocess.call('gifsicle absurd.img --output optimized.img')
			else:
				raise 'Optimization Unsupported for %s' % img_minor_mime
			
			log_db[img_sha1+'.trimmed']   = str(os.path.getsize('trimmed.img'))
			log_db[img_sha1+'.optimized'] = str(os.path.getsize('optimized.img'))
			
			os.remove('trimmed.img')
			os.remove('optimized.img')
		trim_size = int(log_db[img_sha1+'.trimmed'])
		opti_size = int(log_db[img_sha1+'.optimized'])
		
		if detect:
			res = detect('absurd.img')
			if res:
				notes.append(repr(res))

		# Delete our work files
		os.remove('absurd.img')
		
		# Skip heuristics
		if not notes:
			# Trim is low, but image seems to be uncompressed
			if cmpr_size + 0.10*img_size > opti_size > cmpr_size - 0.10*img_size and trim_size > img_size * 0.90:
				print 'Skipping uncompressed image: opti {:4.2f}% within compr'.format(
					opti_size/float(cmpr_size)*100.0,
				)
				continue
			# Indication of "zero" padding at the end of the image
			if opti_size < est_size and trim_size + 0.05*img_size > cmpr_size: # TODO improve allow [Zip: -20, Opti: -90%], disalllow [Zip -1%, opti-90%]
				print "Skipping padded image"
				continue

			# Files under threshold
			if img_size - min(est_size, trim_size, opti_size) < skip_ifunder:
				print "Skipping {:,} - min({:,}, {:,}, {:,}) = {:,} under {:,}".format(
					img_size, est_size, trim_size, opti_size,
					img_size - min(est_size, trim_size, opti_size),
					skip_ifunder,
				)
				continue

		notes += [
			str(metadata.get('Software', '')),
			'%d KB metadata' % (fo_size/1024.0) if fo_size > img_size * 0.05 > 1024 else ''
			# arhives
			'PK Zip header'       if magic['zip'] or magic['zips'] else '',
			"'''7z''' (%s files)"%is_7z_file   if is_7z_file  != 0 else '',
			"'''RAR''' (%s files)"%is_rar_file if is_rar_file != 0 else '',
			"'''encrypted archive?'''"         if is_encrypted     else '',
		]

		notes.append(';'.join(k.upper() for k,v in magic.iteritems() if v and k in ('mp4', 'mp4-', 'gp5', 'asf', 'tar', 'jar')))
		
		count += 1
		write('\n'.join((
			'|-',
			'| align=left | [[:File:{}]]'.format(img_name.replace('_', ' ')),
			'| '+time.strftime('%Y-%m-%d', time.strptime(img_timestamp, '%Y%m%d%H%M%S')),
			'| align=right| {:,.0f}'.format(img_size // 1024.0),
			'| '+reduction(est_size, img_size),
			'| '+reduction(cmpr_size, img_size),
			'| '+reduction(trim_size, img_size),
			'| '+reduction(opti_size, img_size),
			'| [http://imgops.com/{{filepath:%s}} ImgOps],&nbsp;[http://exif.regex.info/exif.cgi?url={{urlencode:https:{{filepath:%s}}|QUERY}} Exif],&nbsp;[//images.google.com/searchbyimage?site=search&image_url={{filepath:%s|%s}} Google]' % (
				img_name, img_name, img_name, 120 if img_width<=300 else 300 if img_width<=800 else 800,
			),
			'| %s'%img_usage,
			'| align=left | [[User:%(user_name)s|%(user_name)s]] ([[User talk:%(user_name)s|talk]]%(blocked)s%(lowcount)s)' % dict(
				user_name=user_name,
				blocked=", '''BLOCKED'''" if user_block else '',
				lowcount=', %s edits'%user_editcount if user_editcount <=100 else '',
			),
			'| align=left | %s' % ', '.join(note for note in notes if note) if notes else '',
		)))

	write('|}')
	f.close()
	print 'Found %s images in %4.2f minutes' % (count, (time.time()-StartTime)/60.0,)

if __name__ == '__main__':
	main()
	# Save
	with open(output_file, 'r') as f:
		f.seek(3)
		new_text = f.read().decode('utf-8')
	if lang=='commons':
		try:
			put_pywikibot(lang, 'User:Dispenser/Absurd overhead', new_text)
		except Exception as e:
			print 'Error pasting new page: %s' % (e,)
			print new_text.encode('utf-8')
			raise
	else:
		if len(new_text) > 200:
			print new_text.encode('utf-8')
User:Dispenser/Absurd overhead.py

Navigation menu

Search