User:Misza13/Scripts/catorfu.py

From Wikipedia, the free encyclopedia

[edit] catorfu.py

import sys, os, threading, Queue, time, locale
sys.path.append(os.environ['HOME']+'/pywikipedia')
import wikipedia, catlib, pagegenerators

IMAGEreason     = u'Unused unfree copyrighted image. ([[WP:CSD#I5]])'
IMAGETALKreason = u'Talk page of a deleted page. ([[WP:CSD#G8]])'

TAGGERS = [
    'BJBot',
    'Fritzbot',
    'OrphanBot',
    'Gay Cdn',
    ]

class ORFUThread(threading.Thread):
  def __init__(self):
    threading.Thread.__init__(self)
    self.IsRunning = False

  def run(self):
    self.IsRunning = True
    print "Thread starting..."
    global ORFUQueue

    while self.IsRunning:
      try:
        Page = ORFUQueue.get(block=False)

        if not Page.isImage():
          continue

        wikipedia.output(u'Now considering [[%s]] for deletion...' % Page.title())

        text = Page.get()
        if re.search('{{(PD|GFDL|L?GPL|cc)[^}]*}}',text,re.IGNORECASE):
          wikipedia.output(u'Image tagged with a free license tag.')
          continue

        ImagePage = wikipedia.ImagePage(Site,Page.title())
        LinkCount = len([x for x in ImagePage.usingPages() if x.namespace()==0])
        if LinkCount > 0:
          wikipedia.output(u'Image is not orphaned. Skipping.')
          continue
        wikipedia.output(u'Image is orphaned.')

        LastEdit = Page.getVersionHistory()[0]
        LastEditTime = LastEdit[1]
        LastEditTime = time.mktime(time.strptime(LastEditTime,'%H:%M, %d %B %Y'))
        LastEditor = LastEdit[2]
        wikipedia.output(u'Last editor was [[User:%s]].' % LastEditor)
        if not LastEditor in TAGGERS:
          continue
        if time.time()-LastEditTime < 7*24*60*60:
          continue

        Page.delete(reason=IMAGEreason, prompt=False)

        TalkPage = Page.toggleTalkPage()
        if TalkPage.exists():
          wikipedia.output(u'Talk page of image exists.')
          TalkPage.delete(reason=IMAGETALKreason, prompt=False)

      except (Queue.Empty, wikipedia.UserBlocked):
        self.IsRunning = False

      except:
        pass

    print "Thread ends execution."

if __name__ == '__main__':
  try:
    if len(sys.argv) < 2:
      wikipedia.output(u'Nuking category:')
      day = wikipedia.input(u'Category:Orphaned fairuse images as of')
    else:
      day = sys.argv[1]

    NumThreads = 1
    if len(sys.argv) >= 3:
      NumThreads = int(sys.argv[2])

    locale.setlocale(locale.LC_TIME,('en_US','utf-8'))

    Site = wikipedia.getSite()
    ORFUcat = catlib.Category(Site,u'Category:Orphaned fairuse images as of %s' % day)
    Generator = pagegenerators.CategorizedPageGenerator(ORFUcat)

    global ORFUQueue
    ORFUQueue = Queue.Queue()

    for Page in Generator:
      ORFUQueue.put(Page)

    Threads = []

    for i in range(NumThreads):
      NewThread = ORFUThread()
      NewThread.start()
      Threads.append(NewThread)
      time.sleep(3)

    while True:
      try:
        time.sleep(1)
        if len([t for t in Threads if t.IsRunning]) == 0:
          break

      except KeyboardInterrupt:
        print "Stopping threads..."
        break

    for t in Threads:
      t.IsRunning = False
      t.join()
      
  finally:
    wikipedia.stopme()