User:JarektUploadBot/UploadWGA.py
Jump to navigation
Jump to search
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
A program to upload all the images in the Web Gallery of Art website at http://www.wga.hu/
'''
import sys, os.path, glob, re, hashlib, base64, StringIO
sys.path.append("C:/Programs/pywikipedia/")
sys.path.append("../")
import wikipedia, upload, csv, urllib2, string
def processFile(row):
# Read line of metadata
enc='utf-8'
metadata = {
'IMG_ID' : int (row.get(u'IMG_ID') ),
'CREATOR' : unicode(row.get(u'CREATOR') , enc),
'DATE' : unicode(row.get(u'DATE') , enc),
'TITLE' : unicode(row.get(u'TITLE') , enc),
#'TITLE-original' : unicode(row.get(u'TITLE-original') , enc),
'DIMENSIONS' : unicode(row.get(u'DIMENSIONS') , enc),
'TECHNIQUE' : unicode(row.get(u'TECHNIQUE') , enc),
'FILENAME' : unicode(row.get(u'FILENAME') , enc),
'FILENAME1' : unicode(row.get(u'FILENAME1') , enc),
'FORM' : unicode(row.get(u'FORM') , enc),
'TYPE' : unicode(row.get(u'TYPE') , enc),
'SCHOOL' : unicode(row.get(u'SCHOOL') , enc),
'TIMELINE' : unicode(row.get(u'TIMELINE') , enc),
'INSTITUTION' : unicode(row.get(u'INSTITUTION') , enc),
'CREATOR_CAT' : unicode(row.get(u'CREATOR_CAT') , enc),
'INSTITUTION_CAT' : unicode(row.get(u'INSTITUTION_CAT'), enc),
'TITLE_CAT' : unicode(row.get(u'TITLE_CAT') , enc),
'DATE_CAT' : unicode(row.get(u'DATE_CAT') , enc),
'URL' : unicode(row.get(u'URL') , enc),
'IMAGEURL' : unicode(row.get(u'IMAGEURL') , enc),
'FRAME' : unicode(row.get(u'FRAME') , enc),
}
metadata['FORM1'] = metadata['FORM'].capitalize();
metadata['FILENAME1'] = metadata['FILENAME1'].strip();
metadata['CREATOR'] = metadata['CREATOR'].strip();
metadata['INSTITUTION'] = metadata['INSTITUTION'].strip();
targetSite = wikipedia.getSite('commons', 'commons')
filepath1 = u'C:/Documents and Settings/tuszynskij/My Documents/Downloads/WGA/%(FILENAME)s' % metadata
filepath = u'file:///C:/Documents and Settings/tuszynskij/My Documents/Downloads/WGA/%(FILENAME)s' % metadata
#filename = u'%(CREATOR_CAT)s - %(TITLE-original)s - ' % metadata + u'WGA%04i.jpg' % metadata['IMG_ID']
filename = metadata['FILENAME1']
if not os.path.exists(filepath1):
wikipedia.output(u'File not found: %s' % filepath1)
return
# We don't want to upload duplicates
# So take the photo, calculate the SHA1 hash and ask the mediawiki api for a list of duplicates.
imageFile = urllib2.urlopen(filepath).read()
photo = StringIO.StringIO(imageFile)
hashObject = hashlib.sha1()
hashObject.update(photo.getvalue())
SHA1 = base64.b16encode(hashObject.digest())
duplicates = targetSite.getFilesFromAnHash(SHA1)
if duplicates:
str = duplicates.pop()
wikipedia.output(u'Duplicate image: %s' % str + u' = %(FILENAME)s' % metadata)
return
# Format file description
template = u"""{{Artwork
|artist = %(CREATOR)s
|title = {{en|%(TITLE)s}}
|description =
|date = %(DATE)s
|medium = %(TECHNIQUE)s
|dimensions = %(DIMENSIONS)s
|institution = %(INSTITUTION)s
|location = <!-- location within the gallery/museum -->
|references =
|object history =
|credit line =
|inscriptions =
|notes =
|accession number =
|source = {{WGA link|ID=%(IMG_ID)s|pic-url=%(IMAGEURL)s|info-url=%(URL)s}}
|permission = {{PD-art|PD-old-100}}
|other_versions =
}}
%(FRAME)s
{{WGA tag|%(FORM)s|%(TYPE)s|%(SCHOOL)s|%(TIMELINE)s}}
{{subst:#ifexist:Category:%(FORM1)ss by %(CREATOR_CAT)s|[[Category:%(FORM1)ss by %(CREATOR_CAT)s]]|[[Category:%(CREATOR_CAT)s]]}}
{{subst:#ifexist:Category:%(FORM1)ss in the %(INSTITUTION_CAT)s|[[Category:%(FORM1)ss in the %(INSTITUTION_CAT)s]]|[[Category:%(INSTITUTION_CAT)s]]}}
[[Category:%(TITLE_CAT)s]]
[[Category:%(DATE_CAT)s]]
[[Category:WGA form: %(FORM)s]]
[[Category:WGA type: %(TYPE)s]]
[[Category:WGA School: %(SCHOOL)s]]
[[Category:WGA time period: %(TIMELINE)s]]
"""
description = template % metadata
description = string.replace(description, "[[Category:]]\n", "")
description = string.replace(description, "[[Category:]]", "")
description = string.replace(description, "{{}}", "")
description = string.replace(description, "[[Category: ", "[[Category:")
description = string.replace(description, "cannvas", "canvas")
description = string.replace(description, "\n\n", "\n")
description = string.replace(description, "Paintings by ", "Paintings by ")
wikipedia.output("================================================================================" )
wikipedia.output(u'Preparing upload for %s' % filename )
wikipedia.output("================================================================================" )
#wikipedia.output(description)
# upload file to Commons
bot = upload.UploadRobot(url=filepath,
description = description,
useFilename = filename,
keepFilename = True,
verifyDescription=False,
targetSite = targetSite)
bot.run()
def main(args):
csvFile = 'WGA_batch2d.csv'
reader = csv.DictReader(open(csvFile, "rb"), dialect='excel', delimiter=',')
try:
for row in reader:
#print 'Row read successfully:', row
processFile(row)
except csv.Error, e:
sys.exit('file %s, line %d: %s' % (csvFile, reader.line_num, e))
if __name__ == "__main__":
try:
main(sys.argv[1:])
finally:
print "All done!"