<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:xd="http://www.oxygenxml.com/ns/doc/xsl"
    xmlns:local="http://whatever"
    exclude-result-prefixes="xs xd local"
    version="2.0">
    <xd:doc scope="stylesheet">
        <xd:desc>
            <xd:p><xd:b>Created on:</xd:b> Jan 27, 2011</xd:p>
            <xd:p><xd:b>Author:</xd:b> aschuth</xd:p>
            <xd:p></xd:p>
        </xd:desc>
    </xd:doc>
    <xsl:output indent="yes" />

    <!-- Returns a sequence of child nodes, except for empty text nodes-->
    <xsl:function name="local:clean" as="item()*">
        <xsl:param name="text"/>
        <xsl:sequence select="$text/child::node()[normalize-space(.) ne '']"/>
    </xsl:function>
    
    <!-- Boolean functions that return true if the text is an answer, question or start of paragraph-->
    <xsl:function name="local:is-a" as="xs:boolean">
        <xsl:param name="text"/>
        <xsl:value-of select="name($text) eq 'a'"/>
    </xsl:function>
    <xsl:function name="local:is-q" as="xs:boolean">
        <xsl:param name="text"/>
        <xsl:value-of select="name($text) eq 'q'"/>
    </xsl:function>
    <xsl:function name="local:is-p" as="xs:boolean">
        <xsl:param name="text"/>
        <xsl:value-of select="name($text) eq 'p'"/>
    </xsl:function>
    
    <xsl:template match="/">
        <!-- annotate all text elements -->
        <xsl:variable name="annotated">
            <xsl:apply-templates select=".//page" />
        </xsl:variable>
        <!-- select the intro nodes -->
        <xsl:variable name="intro" select="$annotated//i" />        
        <interview>
            <title>
                <!-- Usually the first intro node is the title -->
                <xsl:value-of select="normalize-space(string-join($intro[1], ' '))" />
            </title>
            <intro>
                <!-- Usually the intro nodes upto the last one are the real intro -->
                <xsl:value-of select="normalize-space(string-join(subsequence($intro, 1, count($intro)-1), ' '))" />
            </intro>
            <author>
                <!-- Usually the last intro node is the author -->
                <xsl:value-of select="normalize-space(string-join($intro[last()], ' '))" />
            </author>
            <!-- Loop through all text elements that start a question that are not preceeded by a question node-->
            <xsl:for-each select="$annotated//q[not(local:is-q(./preceding::*[1]) or local:is-q(./preceding::*[2]))]">
                <!-- Collect all following question and paragraph nodes that do not follow an answer -->
                <xsl:variable name="q" select=". union (./following::*[local:is-q(.) or local:is-p(.)] except          ./following::a/following::*)" />
                <!-- Collect all following answer and paragraph  nodes that do not follow a question -->
                <xsl:variable name="a" select="$q[last()]/following::*[local:is-a(.) or local:is-p(.)] except $q[last()]/following::q/following::*" />
                <qa>
                    <q>
                        <!-- Loop through all question nodes that start a question paragraph -->
                        <xsl:for-each select="$q[local:is-q(.) and not(local:is-q(./preceding::*[1]))]">
                            <p>
                                <!-- Glue all following question nodes together -->
                                <xsl:value-of select="normalize-space(string-join(. union (./following::q except ./following::p/following::q), ' '))" />
                            </p>
                        </xsl:for-each>                                
                    </q>
                    <a>
                        <!-- Loop through all answer nodes that start an answer paragraph -->
                        <xsl:for-each select="$a[local:is-a(.) and not(local:is-a(./preceding::*[1]))]">
                            <p>
                                <!-- Glue all following answer nodes together -->
                                <xsl:value-of select="normalize-space(string-join(. union (./following::a except ./following::p/following::a), ' '))" />
                            </p>
                        </xsl:for-each>                                
                    </a>
                </qa>
            </xsl:for-each>
        </interview>
    </xsl:template>
    
    <xsl:template match="page">
        <!-- loop through all text elements, skip footers -->
        <xsl:for-each select="subsequence(.//text, 1, count(.//text)-2)">
            <!-- remove empty textnodes -->
            <xsl:variable name="cleantext" select="local:clean(.)" />
            <xsl:choose>
                <!-- recognize empty text elements as start/end of paragraphs -->
                <xsl:when test="empty($cleantext)">
                    <p />
                </xsl:when>
                <!-- a first child that is bold signifies an intro node -->
                <xsl:when test="name($cleantext[1]) eq 'b'">
                    <i><xsl:value-of select="normalize-space(string-join($cleantext, ' '))" /></i>
                </xsl:when>
                <!-- a first child that is italic signifies an question node -->
                <xsl:when test="name($cleantext[1]) eq 'i'">
                    <q><xsl:value-of select="normalize-space(string-join($cleantext, ' '))" /></q>
                </xsl:when>
                <!-- a first child that is a link with a first child that is italic signifies an question node -->
                <xsl:when test="name($cleantext[1]) eq 'a' and name($cleantext[1]/*[1]) eq 'i'">
                    <q><xsl:value-of select="normalize-space(string-join($cleantext, ' '))" /></q>
                </xsl:when>
                <!-- otherwise the text element has to be an answer -->
                <xsl:otherwise>
                    <a><xsl:value-of select="normalize-space(string-join($cleantext, ' '))" /></a>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:for-each>
    </xsl:template>
    
</xsl:stylesheet>
