Wikipedia:Persondata/extractPersondata.stx
From Wikipedia, the free encyclopedia
<?xml version="1.0"?> <stx:transform version="1.0" xmlns:stx="http://stx.sourceforge.net/2002/ns" xmlns:m="http://www.mediawiki.org/xml/export-0.3/" pass-through="none" output-method="xml" exclude-result-prefixes="#all"> <stx:variable name="namespace-prefixes"/> <stx:template match="m:namespace"> <stx:if test="@key!=0"> <stx:assign name="namespace-prefixes" select="($namespace-prefixes, .)"/> </stx:if> </stx:template> <stx:template match="/m:mediawiki"> <mediawiki> <stx:process-children /> </mediawiki> </stx:template> <stx:template match="m:siteinfo"> <stx:process-children /> </stx:template> <stx:template match="m:namespaces"> <stx:process-children /> </stx:template> <stx:variable name="page-title"/> <stx:variable name="page-text"/> <stx:variable name="page-id"/> <stx:template match="m:title"> <stx:assign name="page-title" select="string(.)"/> </stx:template> <stx:template match="m:text"> <stx:assign name="page-text" select="string(.)"/> </stx:template> <stx:variable name="first-revision" select="true()"/> <stx:template match="m:revision"> <stx:if test="$first-revision"> <stx:assign name="first-revision" select="false()"/> <stx:process-children/> </stx:if> </stx:template> <stx:buffer name="parsed"/> <stx:variable name="pd-count" select="0"/> <stx:variable name="pnd-count" select="0"/> <stx:variable name="found-something"/> <stx:template match="m:id"> <stx:if test="$first-revision"> <stx:assign name="page-id" select="normalize-space(.)"/> </stx:if> </stx:template> <stx:template match="m:page"> <stx:assign name="first-revision" select="true()"/> <stx:process-children /> <stx:variable name="prefix" select="substring-before($page-title,':')"/> <stx:variable name="skip" select="false()"/> <stx:if test="$prefix"> <stx:value-of select="$prefix"/> <stx:for-each-item name="p" select="$namespace-prefixes"> <stx:if test="string($p) = string($prefix)"> <stx:assign name="skip" select="true()"/> </stx:if> </stx:for-each-item> </stx:if> <stx:if test="not($skip)"> <stx:assign name="found-something" select="false()"/> <stx:result-buffer name="parsed" clear="yes"> <stx:variable name="text" select="$page-text"/> <stx:while test="string-length($text) > 0"> <stx:variable name="before" select="substring-before($text,'{{')"/> <stx:assign name="text" select="substring-after($text,'{{')"/>
<stx:variable name="nestcheck" select="substring-before($text,'}}')"/>
<stx:variable name="concattext" select="$nestcheck"/>
<stx:while test="contains($nestcheck,'{{')">
<stx:assign name="text" select="substring-after($text,'}}')"/>
<stx:assign name="concattext" select="string-join(($concattext, '}}', substring- before($text,'}}')),)"/>
<stx:assign name="nestcheck" select="substring-before($text,'}}')"/> </stx:while> <stx:call-procedure name="template"> <stx:with-param name="content" select="$concattext"/> </stx:call-procedure> <stx:assign name="text" select="substring-after($text,'}}')"/> </stx:while> </stx:result-buffer> <stx:if test="$found-something"> <stx:message> <stx:value-of select="$pd-count"/> <stx:text>/</stx:text> <stx:value-of select="$pnd-count"/> </stx:message> <stx:text> </stx:text> <page> <title><stx:value-of select="$page-title"/></title> <id><stx:value-of select="$page-id"/></id> <stx:text> </stx:text> <revision> <parsed> <stx:process-buffer name="parsed" group="copy"/> </parsed> <stx:text> </stx:text> </revision> </page> <stx:text> </stx:text> </stx:if> </stx:if> </stx:template> <stx:group name="copy"> <stx:template match="*"> <stx:element name="{name(.)}"> <stx:process-attributes/> <stx:process-children/> </stx:element> </stx:template> <stx:template match="@*"> <stx:attribute name="{name(.)}" select="."/> </stx:template> <stx:template match="text()"> <stx:value-of select="."/> </stx:template> </stx:group> <stx:procedure name="template"> <stx:param name="content" required="yes"/> <stx:if test="starts-with($content,'PND')"> <stx:assign name="pnd-count" select="$pnd-count+1"/> <stx:assign name="found-something" select="true()"/> <template name="PND"> <param> <stx:value-of select="normalize-space(substring-after($content,'|'))"/> </param> </template> </stx:if> <stx:else> <stx:if test="starts-with($content,'Persondata')"> <stx:assign name="pd-count" select="$pd-count+1"/> <stx:assign name="found-something" select="true()"/> <template name="Persondata"> <stx:call-procedure name="Persondata"> <stx:with-param name="text" select="normalize-space(substring-after($content,'|'))"/> </stx:call-procedure> </template> </stx:if> </stx:else> </stx:procedure> <stx:procedure name="Persondata"> <stx:param name="text"/> <stx:variable name="tokens"/> <stx:while test="string-length($text) > 0"> <stx:variable name="before" select="substring-before($text,'|')"/> <stx:if test="not($before)"> <stx:assign name="before" select="$text"/> </stx:if> <stx:assign name="tokens" select="($tokens, $before)"/> <stx:assign name="text" select="substring-after($text,'|')"/> </stx:while> <stx:variable name="parameter"/> <stx:variable name="value"/> <stx:for-each-item name="token" select="$tokens"> <stx:variable name="name" select="normalize-space(substring-before($token,'='))"/> <stx:if test="$name"> <stx:if test="$parameter"> <param name="{$parameter}"> <stx:value-of select="normalize-space($value)"/> </param> </stx:if> <stx:assign name="parameter" select="$name"/> <stx:assign name="value" select="substring-after($token,'=')"/> </stx:if> <stx:else> <stx:assign name="value" select="concat($value,'|',$token)"/> </stx:else> </stx:for-each-item> <stx:if test="$parameter"> <param name="{$parameter}"> <stx:value-of select="normalize-space($value)"/> </param> </stx:if> </stx:procedure> </stx:transform>