Wikipedia:List of administrators/script

From Wikipedia, the free encyclopedia

This is the script I use for activity updates. I run it on a Mac OS X - I suspect it works on Linux. -- Rick Block (talk) 20:10, 3 December 2006 (UTC)

#!/bin/bash

WGET="/usr/bin/curl"  # on a mac OS X
# WGET="wget -q -O -"   # on a linux box with wget

# files 
ACTIVE="admins.active"
SEMIACTIVE="admins.semiactive"
INACTIVE="admins.inactive"
CONTRIBS="contrib.times"
WPLA="wpla"

function prevmonth () {
  case $1 in
    January) echo "December";;
    February) echo "January";;
    March) echo "February";;
    April) echo "March";;
    May) echo "April";;
    June) echo "May";;
    July) echo "June";;
    August) echo "July";;
    September) echo "August";;
    October) echo "September";;
    November) echo "October";;
    December) echo "November";;
  esac
}

function inactive () {
  # $1 is day number of latest contrib
  # $2 is month of latest contrib
  # $3 is year of latest contrib
  # $4 - $6 are day, month, year for today


  
  # if latest contrib is this month, not inactive
  [ $2 = $5 -a $3 = $6 ] && return 1

  # if latest contrib is last month, not inactive
  MONTH=`prevmonth $5`
  YEAR=$6
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR ] && return 1

  # if latest contrib is two months ago, not inactive
  MONTH=`prevmonth $MONTH`
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR ] && return 1

  # if latest contrib is less than three months ago, not inactive
  MONTH=`prevmonth $MONTH`
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR -a $1 -gt $4 ] && return 1

  return 0
}

function semiactive () {
  # $1 is day number of 30th most recent contrib
  # $2 is month of 30th most recent contrib
  # $3 is year of 30th most recent contrib
  # $4 - $6 are day, month, year for today

  # if 30th most recent contrib is this month, not semi-active
  if [ $2 = $5 -a $3 = $6 ]; then
    return 1
  fi

  # if 30th most recent contrib is last month, not semi-active
  MONTH=`prevmonth $5`
  YEAR=$6
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR ] && return 1

  # if 30th most recent contrib is less than two months ago, not semi-active
  MONTH=`prevmonth $MONTH`
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR -a $1 -gt $4 ] && return 1

  return 0
}

TODAY=`date +"%e %B %Y"`

rm -f $ACTIVE
rm -f $INACTIVE
rm -f $SEMIACTIVE

let n=1
$WGET 'http://en.wikipedia.org/w/index.php?title=Special%3aListusers&group=sysop&limit=1500'  >sysops
# <li><a href="/wiki/User:A_Man_In_Black" title="User:A Man In Black">A Man In Black</a>
cat sysops | sed 's;</a>;</a>\
;g' | grep -F "<li>" | sed 's/^.*<li>[^U]*User://' | sed 's/">.*//' | sed 's/.amp;action=edit. class=.new//' | while read line; do


  let n=n+1
  if [ $n -ge 10 -a "$1" != "-" ]; then
      echo $realname
      sleep 10
      let n=1
  fi
  urlname=${line%%\" title=\"User:*}
  realname=${line##*\" title=\"User:}

if [ "$1" != "-" -o ! -f $CONTRIBS.$urlname ]; then
  $WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30" | grep -F "<li>" | sed -e 's/^.*<li>//' -e 's/ (.*//' >$CONTRIBS.$urlname
fi
  LATEST=`head -1 $CONTRIBS.$urlname | cut -c8-`
  THIRTIETH=`tail -1 $CONTRIBS.$urlname | cut -c8-`
  
  # inactive if LATEST contrib not within last three months
  # semi-active if 30th most recent contrib is more than two months ago
  inactive $LATEST $TODAY 
  if [ $? -eq 0 ]; then
    case "$urlname" in
      Jasonr) echo "# {{admin|$realname}} - has administrator access for technical rather than administrative reasons.  Works on hardware upgrades." >> $INACTIVE;;
      *) echo "# {{admin|$realname}} - $LATEST" >>$INACTIVE;;
    esac
    continue
  fi
  semiactive $THIRTIETH $TODAY
  if [ $? -eq 0 ]; then
    echo "# {{admin|$realname}}" >>$SEMIACTIVE
  else
    echo "# {{admin|$realname}}" >>$ACTIVE
  fi
done


# get the current contents of WP:LA
$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrators&action=raw' >$WPLA
egrep "^# {{admin.*<noinclude>" $WPLA >$WPLA.notes

# fix the format and sort order
sort -fd $ACTIVE >tmp.$ACTIVE
( cat $WPLA.notes ; echo "END OF NOTES"; cat tmp.$ACTIVE ) | awk >$ACTIVE '
BEGIN {
  print "===#==="
  print "</noinclude>"
  nextletter = "A"
  alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ-"
}

/END OF NOTES/ {
  noting=1
  next
}

{
  if (noting == 0) {
    admin = $0
    sub("}} *<noinclude.*","}}",admin)
    notetext = $0
    sub("^.*}} *<noinclude","<noinclude",notetext)
    note[admin] = " " notetext
  } else {
     if ( $0 ~ "^# {{admin." nextletter) {
       print "<noinclude>"
       print ""
       print "===" nextletter "==="
       print "</noinclude>"
       nextletter = substr(alphabet,index(alphabet,nextletter)+1,1)
     }
     print $0 note[$0]
  }
}

END {
  print "</onlyinclude>"
}'

sort -fd $SEMIACTIVE >tmp.$SEMIACTIVE
( cat $WPLA.notes ; echo "END OF NOTES"; cat tmp.$SEMIACTIVE ) | awk >$SEMIACTIVE '
BEGIN {
  print "==Semi-active=="
  print "'"''"'These users are less active than those above, but do edit at least occasionally.'"''"'"
  print 
}

/END OF NOTES/ {
  noting=1
  next
}

{
  if (noting == 0) {
    admin = $0
    sub("}} *<noinclude.*","}}",admin)
    notetext = $0
    sub("^.*}} *<noinclude","<noinclude",notetext)
    note[admin] = " " notetext
  } else {
     print $0 note[$0]
  }
}'


cat <<! >tmp.$INACTIVE
== Inactive ==
''Sorted by length of inactivity:''

!
grep "Jasonr}}" $INACTIVE >>tmp.$INACTIVE
for year in 2002 2003 2004 2005 2006 2007; do
  echo "#: " >>tmp.$INACTIVE
  egrep "$year$" $INACTIVE | awk >>tmp.$INACTIVE '
{
  monthday = $(NF-1) $(NF-2)
  if (inactive[monthday] == "") {
    inactive[monthday] = $0
  } else {
    inactive[monthday] = inactive[monthday] "\n" $0
  }
}

END {
  months[1] = "January"
  months[2] = "February"
  months[3] = "March"
  months[4] = "April"
  months[5] = "May"
  months[6] = "June"
  months[7] = "July"
  months[8] = "August"
  months[9] = "September"
  months[10] = "October"
  months[11] = "November"
  months[12] = "December"
  for (month=1; month<=12; month=month+1) {
    for (day=1; day<=31; day=day+1) {
      if (inactive[months[month] day] != "") {
        print inactive[months[month] day]
      }
    }
  }
}
'
done

cp tmp.$INACTIVE $INACTIVE