From Wikipedia, the free encyclopedia
# what_links_out.py
# Author: Benc (http://en.wikipedia.org/wiki/User:Benc)
# Created: 21 September 2004
# Updated: 21 September 2004
# Purpose: Compile a list of articles that are linked to by a page. Sorted by
# wiki and by namespace. Warning: may have unexpected results for
# malformed wikicode.
#
# Usage: Cut and paste the page source here. Alternately, if you're a
# programmer using this script as a module, call the reportLinks()
# function instead.
pageSource="""
"""
# -----------------------------------------------------------------------------
import string
NAMESPACES = [ 'Talk',
'Category', 'Category talk',
'Help', 'Help talk',
'Image', 'Image talk',
'Mediawiki', 'Mediawiki talk', # Namespace miscapitalization intentional
'Template', 'Template talk',
'User', 'User talk',
'Wikipedia', 'Wikipedia talk']
PSEUDO = {
'WP':'Wikipedia',
'CAT':'Category',
'P':'Wikipedia' } # Proposed WikiProject pseudo-namespace
INTERWIKI = { # this list should probably be expanded
'b':'[[Wikibooks]]',
'commons':'Wikimedia Commons',
'm':'Meta-wiki',
'q':'[[Wikiquote]]',
'sep11':'9-11 Memorial',
'wikt':'[[Wiktionary]]' }
INTERWIKI_PLACEHOLDER = 'ZZZ-interwiki to: '
TRANSLATION_PLACEHOLDER = 'ZZZ-translation to: '
outgoingLinks = [] # Another global variable. Horrible, I know.
def uncapAll(txt):
return string.lower(txt)
def capAll(txt):
return string.upper(txt)
def capFirst(txt):
return string.upper(txt[0]) + txt[1:]
def capNamespace(txt):
return string.upper(txt[0]) + string.lower(txt[1:])
def parsePipe(link):
i = link.find('|')
if i < 0:
return capFirst(link), None
else:
return capFirst(link[:i]), link[i+1:]
def parseNamespace(link):
namespace = '' # Main article namespace
i = link.find(':')
ns = link[:i]
if i >= 0:
if capNamespace(ns) in NAMESPACES:
namespace = capNamespace(ns)
elif PSEUDO.has_key(capAll(ns)):
namespace = PSEUDO[capAll(ns)]
elif INTERWIKI.has_key(uncapAll(ns)):
namespace = INTERWIKI_PLACEHOLDER + INTERWIKI[uncapAll(ns)]
elif len(ns) == 2 and ns == uncapAll(ns):
namespace = TRANSLATION_PLACEHOLDER + ns # fairly safe assumption
# else the : is not being used as a namespace indicator
return namespace
def parseOneLink(link):
namespace = parseNamespace(link)
trueLink, displayedLink = parsePipe(link)
return (namespace, link, trueLink, displayedLink)
def parseLinks(depth=0):
"""Note: this function is recursive to handle wikicode such as
[[Image:Example.gif|thumb|right|This is an [[example]] image]]
"""
global pageSource, outgoingLinks
if depth > 0:
i, j = pageSource.find('[['), pageSource.find(']]')
outgoingLinks.append(parseOneLink(pageSource[:j]))
if (i < 0 and j >=0) or j < i: # nothing deeper
pageSource = pageSource[j+2:]
return
# else recurse
while 1:
i = pageSource.find('[[')
if i < 0:
break
pageSource = pageSource[i+2:]
parseLinks(depth+1)
def reportLinks(src):
"""Main interface function"""
global pageSource, outgoingLinks
pageSource = src
outgoingLinks = []
parseLinks()
outgoingLinks.sort()
result = 'The following pages are linked to by this page:\n'
curNamespace = '?'
translation = False
for link in outgoingLinks:
if link[0] <> curNamespace: # transitioning into new namespace
curNamespace = link[0]
translation = False
if curNamespace == '':
result += '*Main article namespace:\n'
elif curNamespace.find(INTERWIKI_PLACEHOLDER) >= 0:
result += '*Interwiki to %s:\n' % (curNamespace[len(INTERWIKI_PLACEHOLDER):])
elif curNamespace.find(TRANSLATION_PLACEHOLDER) >= 0:
result += '*Translation: %s\n' % (curNamespace[len(TRANSLATION_PLACEHOLDER):])
translation = True
else:
result += '*%s namespace:\n' % (curNamespace)
if not translation:
if link[3] == None:
result += '**[[%s]]\n' % (link[2])
else:
result += '**[[%s]] (%s)\n' % (link[2], link[3])
return result
if __name__=="__main__":
print reportLinks(pageSource)