User:JarektBot/AddCreators2WaltersFiles.py
Jump to navigation
Jump to search
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
'''
import sys, wikipedia as pywikibot, csv, string, catlib, re
def main(args):
csvFile = "Jarek's/Walters_Creators1.csv"
edit_sum= "Add Creator template"
Debug = False
# Read CSV file
enc='utf-8'
Old = []
New = []
Cat = []
Artists = {}
reader = csv.DictReader(open(csvFile, "rb"), dialect='excel', delimiter=',')
for row in reader:
Old.append(unicode(row.get(u'old'), enc))
New.append(unicode(row.get(u'new'), enc))
Cat.append(unicode(row.get(u'cat'), enc))
#for i in range(200, 250):
# pywikibot.output(Old[i]+' '+New[i]+' '+Cat[i])
#
site = pywikibot.getSite(u'commons', u'commons')
old_cat = "Category:Media from the Walters Art Museum: no creator template"
old_cat2 = "\[\[\s*Category:\s*Media from the Walters Art Museum: no creator template\s*\]\]\s*"
file_cat = catlib.Category(site, old_cat)
iCount=0
for file_page in file_cat.articles(startFrom=None):
if (file_page.namespace()==6): # make sure it is of correct namespace
filename = file_page.title()
#pywikibot.output(filename)
#file_txt = file_page.get()
m = re.search("File:([^\-]*)\-", filename)
#m = re.search("artist\s*=\s*([^\(\n]*)", file_txt)
changed = False
if m!=None:
artist = m.group(1).strip()
#artist = re.sub('^[ \t]+|[ \t]+$','',artist) # trim spaces
if artist in Old:
i = Old.index(artist)
file_txt = file_page.get()
new_txt = file_txt
new_txt = re.sub("artist\s*=\s*"+Old[i]+"\s*\([^\)]*\)", "artist = "+New[i], new_txt, count=1)
new_txt = re.sub("artist\s*=\s*"+Old[i], "artist = "+New[i], new_txt, count=1)
if (file_txt!=new_txt):
if Cat[i] not in new_txt:
new_txt = re.sub(old_cat2, "[["+Cat[i]+"]]\n", new_txt, count=1)
else:
new_txt = re.sub(old_cat2, "", new_txt, count=1)
new_txt = re.sub("\n\n\n", "\n\n", new_txt) # delete empty lines
changed = True
else:
if artist in Artists.keys():
Artists[artist] += 1
else:
Artists[artist] = 1
# save changed text if any
if (changed):
pywikibot.showDiff(file_txt, new_txt)
iCount+=1
if (iCount==-1):
break
try:
if not Debug:
status, reason, data = file_page.put(new_txt, edit_sum, False, True)
if str(status) != '302':
pywikibot.output(status, reason)
except pywikibot.LockedPage:
pywikibot.output(u"page is locked")
# Save info on missing creator pages
txt = '';
for artist in sorted(Artists, key=Artists.get, reverse=True):
if Artists[artist]==1:
break
txt += '* [[:Category:'+artist+'|'+artist+']] ! '+str(Artists[artist])+'\n'
page = pywikibot.Page(site, u'User:Jarekt/JarektBot Tasks')
page.put(txt, 'New Creators')
if __name__ == "__main__":
try:
main(sys.argv[1:])
finally:
print "All done!"