!/usr/bin/python
-*- coding: utf-8 -*-
Copyright (c) 2014 Zhuyifei1999
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

import os import re import time import urllib try:

   from hashlib import sha1

except ImportError:

   from sha import sha as sha1

import tempfile import pywikibot from pywikibot import catlib from pywikibot import pagegenerators

class Robot(pywikibot.Bot):

   def __init__(self):
       self.site = pywikibot.getSite()
       self.idregex = [
                          re.compile(r"https?://(?:[^/]+\.)?panoramio\.com/photo/(\d+)/?", re.I),
                          re.compile(r"https?://(?:commondatastorage\.googleapis\.com/)?static\.panoramio\.com/photos/(?:original|large|medium|small|thumbnail|square|mini_square)/(\d+)\.jpg", re.I),
                          re.compile(r"https?://(?:[^/]+\.)?panoramio\.com/photo_explorer#view=photo&position=\d+&with_photo_id=(\d+)", re.I),
                          re.compile(r"https?://(?:[^/]+\.)?google.com/[^/]*panoramio/photos/[^/]+/(\d+).jpg", re.I),
                      ]
       self.authoregex = re.compile(r'<a href="[^"]+" rel="author">(.+?)</a>')
       self.review_template_regex = re.compile(
           r'(\{\{panoramioreview\}\})', re.S | re.I)
       self.cc_license_link = re.compile(r'https?\:\/\/creativecommons.org\/licenses\/(.*?)\/([0-9]\.[0-9])\/')
       self.cc_license_template_regex = re.compile(
           r'\{\{(cc\-by(?:\-sa)?(?:\-[0-9]\.[0-9])?)(?:\|.*?)?\}\}', re.S | re.I)

   def getid(self):
       for regex in self.idregex:
           reobj = regex.search(self.text)
           if reobj:
               self.id = reobj.group(1)
               return 0
       self.returndata = "no_panoramio_link",
       return 1

   def run(self):
       for page in pagegenerators.CategorizedPageGenerator(
               catlib.Category(self.site, "Category:Panoramio_review_needed")):
           if page.namespace() != 6: continue
           self.page = pywikibot.ImagePage(page)
           self.review()
           self.save()

   def review(self):
       self.text = self.page.get()
       # Cleanup
       self.id = 
       self.author = 
       self.returndata = ()
       self.reupload = False

       if self.getid(): return
       try:
           # FIXME: I'm very sorry but Panoramio has no good api
           self.html = urllib.urlopen("http://www.panoramio.com/photo/"+self.id).read().decode("utf-8")

if "

Photo Not Found

" in self.html:

               self.returndata = "panoramio_not_found",
               return
       except:
           self.returndata = "size_not_found", # Should never happen
           return

       self.author = self.authoregex.search(self.html).group(1)

       if self.rev_sha1(): return
       if self.rev_license(): return
       if self.reupload: self.upload_hires()

   def rev_license(self):

if '

' in self.html: self.returndata = "failed", "All Rights Reserved" return 1 reobj = self.cc_license_link.search(self.html) if reobj: lic = ("cc-%s-%s" % reobj.groups()).lower() licr = lic.split("-") for item in licr: if item in ["nc", "nd"]: self.returndata = "failed", lic return 1 for cat in self.page.categories(): if cat.title() == "Category:"+lic.upper(): self.returndata = "passed", lic return 0 self.returndata = "passed_changed", lic return 0 return "size_not_found", #Should never happen, using "size_not_found" as fallback for manual review def rev_sha1(self): imageinfo = self.site.loadimageinfo(self.page) for size in ["original", "large", "medium", "small", "thumbnail", "square", "mini_square"]: path = self.download(size) if not os.path.getsize(path) == imageinfo['size']: continue else: hash = sha1() file = open(path) s = '\x00' while s: s = file.read(8192) hash.update(s) file.close() if not hash.hexdigest() == imageinfo['sha1']: continue else: self.reupload = size != "original" return 0 self.returndata = "size_not_found", return 1 def download(self, size): (f, path) = tempfile.mkstemp(suffix=".jpg", dir=os.getenv("HOME")+"/temp") url = "http://static.panoramio.com/photos/%s/%s.jpg" % (size, self.id) urllib.urlretrieve(url, path) return path def upload_hires(self): path = self.download("original") try: self.site.upload(self.page, source_filename=path, comment="Replacing image by its original image from Panoramio", ignore_warnings=True) except pywikibot.UploadWarning: pass

print "sim upload"

def save(self): text = self.page.get() text_o = text self.returndata = self.returndata + (,) * (2 - len(self.returndata)) if self.returndata[0] == "passed_changed": match = self.cc_license_template_regex.search(text) if match: old_license = match.group(1) text = text.replace(match.group(1), self.returndata[1]) else: old_license = self.returndata = self.returndata + (old_license,) tag = u'

' % \

           (self.site.username().replace("_", " "), self.returndata[0], self.author, self.id, time.strftime('%Y-%m-%d %H:%M:%S'), self.returndata[1], self.returndata[2])
       text = self.review_template_regex.sub(tag, text)
       pywikibot.output(u'* %s %s' % (self.page.title(), " ".join(self.returndata)))
       comment = "Panoramio Review Bot: %s" % " ".join(self.returndata)

       try:
           self.page.put(text, comment)

self.userPut(self.page, text_o, text)

       except KeyboardInterrupt:
           raise
       except Exception, e:
           pywikibot.output(u"Page %s not saved: %s" % (self.page.title(asLink=True), str(e)))

def main():

   pywikibot.handleArgs()
   bot = Robot()
   bot.run()

if __name__ == "__main__":

   try:
       main()
   finally:
       pywikibot.stopme()

User:Panoramio Review Bot/panrb.py

Photo Not Found

Navigation menu

User:Panoramio Review Bot/panrb.py

Photo Not Found

Navigation menu

Search