User:Panoramio Review Bot/panrb.py

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search

<source lang="python">

  1. !/usr/bin/python
  2. -*- coding: utf-8 -*-
  3. Copyright (c) 2014 Zhuyifei1999
  4. Permission is hereby granted, free of charge, to any person
  5. obtaining a copy of this software and associated documentation
  6. files (the "Software"), to deal in the Software without
  7. restriction, including without limitation the rights to use,
  8. copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. copies of the Software, and to permit persons to whom the
  10. Software is furnished to do so, subject to the following
  11. conditions:
  12. The above copyright notice and this permission notice shall be
  13. included in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  16. OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18. HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  19. WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. OTHER DEALINGS IN THE SOFTWARE.

import os import re import time import urllib try:

   from hashlib import sha1

except ImportError:

   from sha import sha as sha1

import tempfile import pywikibot from pywikibot import catlib from pywikibot import pagegenerators

class Robot(pywikibot.Bot):

   def __init__(self):
       self.site = pywikibot.getSite()
       self.idregex = [
                          re.compile(r"https?://(?:[^/]+\.)?panoramio\.com/photo/(\d+)/?", re.I),
                          re.compile(r"https?://(?:commondatastorage\.googleapis\.com/)?static\.panoramio\.com/photos/(?:original|large|medium|small|thumbnail|square|mini_square)/(\d+)\.jpg", re.I),
                          re.compile(r"https?://(?:[^/]+\.)?panoramio\.com/photo_explorer#view=photo&position=\d+&with_photo_id=(\d+)", re.I),
                          re.compile(r"https?://(?:[^/]+\.)?google.com/[^/]*panoramio/photos/[^/]+/(\d+).jpg", re.I),
                      ]
       self.authoregex = re.compile(r'<a href="[^"]+" rel="author">(.+?)</a>')
       self.review_template_regex = re.compile(
           r'(\{\{panoramioreview\}\})', re.S | re.I)
       self.cc_license_link = re.compile(r'https?\:\/\/creativecommons.org\/licenses\/(.*?)\/([0-9]\.[0-9])\/')
       self.cc_license_template_regex = re.compile(
           r'\{\{(cc\-by(?:\-sa)?(?:\-[0-9]\.[0-9])?)(?:\|.*?)?\}\}', re.S | re.I)
   def getid(self):
       for regex in self.idregex:
           reobj = regex.search(self.text)
           if reobj:
               self.id = reobj.group(1)
               return 0
       self.returndata = "no_panoramio_link",
       return 1
   def run(self):
       for page in pagegenerators.CategorizedPageGenerator(
               catlib.Category(self.site, "Category:Panoramio_review_needed")):
           if page.namespace() != 6: continue
           self.page = pywikibot.ImagePage(page)
           self.review()
           self.save()
   def review(self):
       self.text = self.page.get()
       # Cleanup
       self.id = 
       self.author = 
       self.returndata = ()
       self.reupload = False
       if self.getid(): return
       try:
           # FIXME: I'm very sorry but Panoramio has no good api
           self.html = urllib.urlopen("http://www.panoramio.com/photo/"+self.id).read().decode("utf-8")

if "

Photo Not Found

" in self.html:

               self.returndata = "panoramio_not_found",
               return
       except:
           self.returndata = "size_not_found", # Should never happen
           return
       self.author = self.authoregex.search(self.html).group(1)
       if self.rev_sha1(): return
       if self.rev_license(): return
       if self.reupload: self.upload_hires()
   def rev_license(self):

if '

  • ' in self.html: self.returndata = "failed", "All Rights Reserved" return 1 reobj = self.cc_license_link.search(self.html) if reobj: lic = ("cc-%s-%s" % reobj.groups()).lower() licr = lic.split("-") for item in licr: if item in ["nc", "nd"]: self.returndata = "failed", lic return 1 for cat in self.page.categories(): if cat.title() == "Category:"+lic.upper(): self.returndata = "passed", lic return 0 self.returndata = "passed_changed", lic return 0 return "size_not_found", #Should never happen, using "size_not_found" as fallback for manual review def rev_sha1(self): imageinfo = self.site.loadimageinfo(self.page) for size in ["original", "large", "medium", "small", "thumbnail", "square", "mini_square"]: path = self.download(size) if not os.path.getsize(path) == imageinfo['size']: continue else: hash = sha1() file = open(path) s = '\x00' while s: s = file.read(8192) hash.update(s) file.close() if not hash.hexdigest() == imageinfo['sha1']: continue else: self.reupload = size != "original" return 0 self.returndata = "size_not_found", return 1 def download(self, size): (f, path) = tempfile.mkstemp(suffix=".jpg", dir=os.getenv("HOME")+"/temp") url = "http://static.panoramio.com/photos/%s/%s.jpg" % (size, self.id) urllib.urlretrieve(url, path) return path def upload_hires(self): path = self.download("original") try: self.site.upload(self.page, source_filename=path, comment="Replacing image by its original image from Panoramio", ignore_warnings=True) except pywikibot.UploadWarning: pass
    1. print "sim upload"
    def save(self): text = self.page.get() text_o = text self.returndata = self.returndata + (,) * (2 - len(self.returndata)) if self.returndata[0] == "passed_changed": match = self.cc_license_template_regex.search(text) if match: old_license = match.group(1) text = text.replace(match.group(1), self.returndata[1]) else: old_license = self.returndata = self.returndata + (old_license,) tag = u'
    ' % \
               (self.site.username().replace("_", " "), self.returndata[0], self.author, self.id, time.strftime('%Y-%m-%d %H:%M:%S'), self.returndata[1], self.returndata[2])
           text = self.review_template_regex.sub(tag, text)
           pywikibot.output(u'* %s %s' % (self.page.title(), " ".join(self.returndata)))
           comment = "Panoramio Review Bot: %s" % " ".join(self.returndata)
    
           try:
               self.page.put(text, comment)
    
    1. self.userPut(self.page, text_o, text)
           except KeyboardInterrupt:
               raise
           except Exception, e:
               pywikibot.output(u"Page %s not saved: %s" % (self.page.title(asLink=True), str(e)))
    

    def main():

       pywikibot.handleArgs()
       bot = Robot()
       bot.run()
    

    if __name__ == "__main__":

       try:
           main()
       finally:
           pywikibot.stopme()