Wikipedia:Persondata/extractPersondata.stx

From Wikipedia, the free encyclopedia

<?xml version="1.0"?>
<stx:transform version="1.0"
               xmlns:stx="http://stx.sourceforge.net/2002/ns"
               xmlns:m="http://www.mediawiki.org/xml/export-0.3/"
               pass-through="none"
               output-method="xml"
               exclude-result-prefixes="#all">

<stx:variable name="namespace-prefixes"/>

<stx:template match="m:namespace">
  <stx:if test="@key!=0">
    <stx:assign name="namespace-prefixes" select="($namespace-prefixes, .)"/>
  </stx:if>
</stx:template> 

<stx:template match="/m:mediawiki">
  <mediawiki>
    <stx:process-children />
  </mediawiki>    
</stx:template> 

<stx:template match="m:siteinfo">    
  <stx:process-children />
</stx:template> 

<stx:template match="m:namespaces">
  <stx:process-children />
</stx:template>

<stx:variable name="page-title"/>
<stx:variable name="page-text"/>
<stx:variable name="page-id"/> 

<stx:template match="m:title">
  <stx:assign name="page-title" select="string(.)"/>    
</stx:template>

<stx:template match="m:text">
  <stx:assign name="page-text" select="string(.)"/>
</stx:template>

<stx:variable name="first-revision" select="true()"/> 

<stx:template match="m:revision">
  <stx:if test="$first-revision">
    <stx:assign name="first-revision" select="false()"/>  
    <stx:process-children/>
  </stx:if>  
</stx:template>

<stx:buffer name="parsed"/>
<stx:variable name="pd-count" select="0"/>
<stx:variable name="pnd-count" select="0"/>
<stx:variable name="found-something"/>

<stx:template match="m:id">
  <stx:if test="$first-revision">
    <stx:assign name="page-id" select="normalize-space(.)"/>
  </stx:if>  
</stx:template>

<stx:template match="m:page">  
  <stx:assign name="first-revision" select="true()"/>
  <stx:process-children />
  
  <stx:variable name="prefix" select="substring-before($page-title,':')"/>
  <stx:variable name="skip" select="false()"/>
  <stx:if test="$prefix">
    <stx:value-of select="$prefix"/>    
    <stx:for-each-item name="p" select="$namespace-prefixes">
      <stx:if test="string($p) = string($prefix)">
        <stx:assign name="skip" select="true()"/> 
      </stx:if>
    </stx:for-each-item>
  </stx:if>  
  <stx:if test="not($skip)">
    <stx:assign name="found-something" select="false()"/> 
    <stx:result-buffer name="parsed" clear="yes">  
      <stx:variable name="text" select="$page-text"/>
      <stx:while test="string-length($text) > 0">
        <stx:variable name="before" select="substring-before($text,'{{')"/>
        <stx:assign name="text" select="substring-after($text,'{{')"/>

<stx:variable name="nestcheck" select="substring-before($text,'}}')"/>

    <stx:variable name="concattext" select="$nestcheck"/>

<stx:while test="contains($nestcheck,'{{')">

          <stx:assign name="text" select="substring-after($text,'}}')"/>

<stx:assign name="concattext" select="string-join(($concattext, '}}', substring- before($text,'}}')),)"/>

     <stx:assign name="nestcheck" select="substring-before($text,'}}')"/>
         </stx:while>
        <stx:call-procedure name="template">
          <stx:with-param name="content" select="$concattext"/>
        </stx:call-procedure>
        <stx:assign name="text" select="substring-after($text,'}}')"/>
      </stx:while>
    </stx:result-buffer>  
    <stx:if test="$found-something">
      <stx:message>
        <stx:value-of select="$pd-count"/>
        <stx:text>/</stx:text>
        <stx:value-of select="$pnd-count"/>
      </stx:message>
      <stx:text>
</stx:text> 
  <page>
    <title><stx:value-of select="$page-title"/></title>
    <id><stx:value-of select="$page-id"/></id>
    <stx:text>
</stx:text> 
    <revision>
      <parsed>  
        <stx:process-buffer name="parsed" group="copy"/>
      </parsed>
      <stx:text>
</stx:text> 
    </revision>
  </page>
      <stx:text>
</stx:text> 
    </stx:if>
  </stx:if>  
</stx:template>

<stx:group name="copy">
  <stx:template match="*">
    <stx:element name="{name(.)}">
      <stx:process-attributes/>
      <stx:process-children/>
    </stx:element>
  </stx:template>
  <stx:template match="@*">
    <stx:attribute name="{name(.)}" select="."/>
  </stx:template>
  <stx:template match="text()">
    <stx:value-of select="."/>
  </stx:template>
</stx:group>

<stx:procedure name="template">
  <stx:param name="content" required="yes"/>
  <stx:if test="starts-with($content,'PND')">
    <stx:assign name="pnd-count" select="$pnd-count+1"/>
    <stx:assign name="found-something" select="true()"/>
    <template name="PND">
      <param>
        <stx:value-of select="normalize-space(substring-after($content,'|'))"/>
      </param>
    </template>
  </stx:if>
  <stx:else>
    <stx:if test="starts-with($content,'Persondata')">
      <stx:assign name="pd-count" select="$pd-count+1"/>
      <stx:assign name="found-something" select="true()"/>
      <template name="Persondata">  
        <stx:call-procedure name="Persondata">
          <stx:with-param name="text" 
               select="normalize-space(substring-after($content,'|'))"/>
        </stx:call-procedure>  
      </template>
    </stx:if>
  </stx:else>
</stx:procedure>


<stx:procedure name="Persondata">
  <stx:param name="text"/>  
  <stx:variable name="tokens"/>  
   
  <stx:while test="string-length($text) > 0">
    <stx:variable name="before" select="substring-before($text,'|')"/>
    <stx:if test="not($before)">
      <stx:assign name="before" select="$text"/>
    </stx:if>
    <stx:assign name="tokens" select="($tokens, $before)"/>    
    <stx:assign name="text" select="substring-after($text,'|')"/>
  </stx:while>  

  <stx:variable name="parameter"/>
  <stx:variable name="value"/>
  <stx:for-each-item name="token" select="$tokens">  
    <stx:variable name="name" select="normalize-space(substring-before($token,'='))"/>
    <stx:if test="$name">     
      <stx:if test="$parameter"> 
        <param name="{$parameter}">
          <stx:value-of select="normalize-space($value)"/>
        </param>
      </stx:if>  
      <stx:assign name="parameter" select="$name"/>
      <stx:assign name="value" select="substring-after($token,'=')"/>
    </stx:if>
    <stx:else> 
      <stx:assign name="value" select="concat($value,'|',$token)"/>
    </stx:else>
  </stx:for-each-item>
  <stx:if test="$parameter"> 
    <param name="{$parameter}">
      <stx:value-of select="normalize-space($value)"/>
    </param>
  </stx:if>
</stx:procedure>

</stx:transform>