User:Drinibot/ExtractWikilinks.py
From Wikipedia, the free encyclopedia
import re import subprocess rawfilename="raw.html" linksfilename="links.txt" fi=open(rawfilename,'r') li=open(linksfilename,'w') regex=re.compile(r".*.org/wiki/(?P<oldcat>.*?)\".*$") for line in fi: m=regex.findall(line) if m: if not ("Special:" ) in m[0]: wl= "[["+ m[0] +"]]" print wl li.write(wl+"\n") fi.close() li.close()