User:Pfctdayelise/daily-image-l
Jump to navigation
Jump to search
cron job:
pfctdayelise@blants:0 ~$ crontab -l #min hr mday month wday cmd 0 15 * * * ~pfctdayelise/dailyimagel.py
dailyimagel.py
[edit]#!/usr/bin/python
# -*- coding: utf-8 -*-
wget = '''wget -S -erobots=off -q -O - '''
todaypotd = r'http://commons.wikimedia.org/w/index.php?title=Commons:Picture_of_the_day/Today&action=purge'
urlbase = r'http://commons.wikimedia.org/wiki/'
querycat = 'http://commons.wikimedia.org/w/query.php?what=categories&format=txt&titles='
querylinks = r'http://commons.wikimedia.org/w/query.php?what=imagelinks&ilnamespace=4&format=txt&illimit=300&titles='
import os,sys,re
from commands import getoutput
from datetime import date
repotdcontent = re.compile('<!-- start content -->(.*?)<!-- end content -->', re.DOTALL)
reimagename = re.compile('<div class="magnify"><a href="/wiki/([^"]*)" class="internal"')
recats = re.compile('Category:(.*)')
refplinks = re.compile('Commons:Featured pictures/([^c].*)')
reqilinks = re.compile('Commons:Quality [Ii]mages/([^c].*)')
recaptions = re.compile('<ul>(.*?)</ul>', re.DOTALL)
reli = re.compile('</?li[^>]*>')
rea = re.compile('</?a[^>]*>')
rei = re.compile('</?i>')
renocaption = re.compile('\n[^:]*: Template:Potd[^)]*\)')
SENDMAIL = "/usr/sbin/sendmail"
mailfilename = "/users/blaugher/dailyimagel/dailyimagel.txt"
mailerror = "/users/blaugher/dailyimagel/mailerror.txt"
mailto = "brianna.laugher@gmail.com"
#mailto = "daily-image-l@lists.wikimedia.org"
def createmail():
'''
Attempts to create an email at mailfilename.
'''
f = getoutput(wget + '--post-data submit "' + todaypotd + '"')
wgetfile = open('wget.txt','w')
wgetfile.write(f)
wgetfile.close()
content = repotdcontent.findall(f)
# extract image name/url
imagename = reimagename.findall(content[0])[0]
imageurl = urlbase + imagename
# attempt to determine license status from categories
catstext = getoutput(wget + '"' + querycat + imagename + '"')
categories = recats.findall(catstext)
licenses = {"GFDL":"GNU Free Documentation License",
"CC-BY-SA-2.5,2.0,1.0":"Creative Commons Attribution ShareAlike license, all versions",
"CC-BY-SA-1.0":"Creative Commons Attribution ShareAlike license, version 1.0",
"CC-BY-SA-2.0":"Creative Commons Attribution ShareAlike license, version 2.0",
"CC-BY-SA-2.5":"Creative Commons Attribution ShareAlike license, version 2.5",
"CC-BY-1.0":"Creative Commons Attribution license, version 1.0",
"CC-BY-2.0":"Creative Commons Attribution license, version 2.0",
"CC-BY-2.5":"Creative Commons Attribution license, version 2.5"
}
lic = ""
if "Self-published work" in categories:
lic = "Created by a Wikimedian (see image page for details); "
for l in licenses.keys():
if l in categories:
lic += "Licensed under the " + licenses[l] +'. '
if "Public domain" in categories:
lic = "Public domain"
for cat in categories:
if cat.startswith("PD"):
if cat=="PD-self":
lic = "Created by a Wikimedian (see image page for details); released into the public domain."
elif cat=="PD Art":
lic = "Reproduction of a two-dimensional work of art whose copyright has expired (public domain)."
elif cat=="PD Old":
lic = "Public domain (copyright expired due to the age of the work)."
else:
lic = "Public domain as a work of the " + cat[3:] + " organisation."
# determine FP category (or 'topic')
linkstext = getoutput(wget + '"' + querylinks + imagename + '"')
isFP = True
try:
topics = refplinks.findall(linkstext)[0]
except IndexError:
try:
isFP = False
topics = reqilinks.findall(linkstext)[0]
except IndexError:
print "Could not find FP or QI backlink, aborting"
raise IndexError, 'Could not find FP or QI backlink'
if '/' in topics:
topic = topics.split('/')[0] + ' (' + topics.split('/')[1] + ')'
else:
topic = topics
# extract multilingual captions
try:
captions = recaptions.findall(content[0])[0]
except IndexError:
raise IndexError, 'no captions??'
#print captions
captions = reli.sub('',captions)
captions = rea.sub('',captions)
captions = rei.sub('',captions)
captions = renocaption.sub('',captions)
# write info to file
g= open(mailfilename,'w')
g.write("To: " + mailto + '\n')
g.write('Content-Type: text/plain; charset=utf-8\r\n')
#don't need this?
#g.write("From: brianna.laugher@gmail.com\n")
g.write("Subject: " + str(date.today()) + '\r\n\r\n')
g.write("Body of email:\r\n")
g.write(imageurl + '\n')
g.write('Copyright status: ' + lic + '\n')
if isFP:
g.write('Featured Picture category: ' + topic + '\n\n')
else:
if 'Subject' in topic:
g.write('Recognised as a Quality Image due to subject matter\n\n')
else:
g.write('Recognised as a Quality Image due to technical merit\n\n')
g.write('Descriptions:\n')
g.write(captions)
g.close()
return
###############################
error = None
try:
createmail()
except:
# some Python error, catch its name and send error mail
error = sys.exc_info()[0]
mailfilename = mailerror
# get the email message from a file
f = open(mailfilename, 'r')
mail = f.read()
f.close()
if error:
mail += "Error information: " + str(error)
# open a pipe to the mail program and
# write the data to the pipe
p = os.popen("%s -t" % SENDMAIL, 'w')
p.write(mail)
exitcode = p.close()
if exitcode:
print "sendmail error: Exit code: %s" % exitcode