不同员工的连续天分组

问题描述

对于同一位员工,我有一个XML在不同的日期有不同的条目,如下所示:

<wd:Report_Data
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>111</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-21-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>111</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-22-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>222</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-23-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-15-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-16-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-29-07:00</wd:date>
    </wd:Report_Entry>
</wd:Report_Data>

我想要一个将连续的日期与开始日期和结束日期分组的输出,如下所示:

<wd:Report_Entry>
   <worker>
      <staffID>111</staffID>
      <start_date>2020-08-21-07:00</start_date>
      <end_date>2020-08-22-07:00</end_date>
   </worker>
   <worker>
      <staffID>222</staffID>
      <start_date>2020-08-23-07:00</start_date>
      <end_date>2020-08-23-07:00</end_date>
   </worker>
   <worker>
      <staffID>333</staffID>
      <start_date>2020-08-15-07:00</start_date>
      <end_date>2020-08-16-07:00</end_date>
   </worker>
   <worker>
      <staffID>333</staffID>
      <start_date>2020-08-29-07:00</start_date>
      <end_date>2020-08-29-07:00</end_date>
   </worker>
</wd:Report_Entry>

我尝试使用 group-starting-with ,但无论人员ID是什么,它都可以分组。

<xsl:template match="/wd:Report_Data">
  
    <wd:Report_Entry>
              <xsl:for-each-group select="wd:Report_Entry" group-starting-with="*[not(xs:date(wd:date) = xs:date(preceding-sibling::*[1]/wd:date) + xs:dayTimeDuration('P1D'))] "> 
            <worker>
                <staffID>
                    <xsl:value-of select="wd:workerGroup/wd:staffID"/>
                </staffID>
                <start_date>
                    <xsl:value-of select="wd:date"/>
                </start_date>
                <end_date>
                    <xsl:value-of select="current-group()[last()]/wd:date"/>
                </end_date>
            </worker>
            </xsl:for-each-group>
        <!--   </xsl:for-each-group> --> 
    </wd:Report_Entry>
   
</xsl:template>

获得以下输出,该输出错误

<wd:Report_Entry>
   <worker>
      <staffID>111</staffID>
      <start_date>2020-08-21-07:00</start_date>
      <end_date>2020-08-23-07:00</end_date>
   </worker>
</wd:Report_Entry>

解决方法

先在工作人员ID上使用group-by,然后在日期上对组进行排序,然后再按相邻的日期减去position()* 1天,即可:

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xpath-default-namespace="http://example.com/wd"
    exclude-result-prefixes="#all"
    expand-text="yes">
    
    <xsl:output method="xml" indent="yes"/>
    
    <xsl:template match="Report_Data">
        <root>
            <xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
                <xsl:for-each-group select="sort(current-group(),(),function($e) { $e/date })" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
                    <worker>
                        <staffID>
                            <xsl:value-of select="workerGroup/staffID"/>
                        </staffID>
                        <start_date>
                            <xsl:value-of select="date[1]"/>
                        </start_date>
                        <end_date>
                            <xsl:value-of select="current-group()[last()]/date"/>
                        </end_date>
                    </worker>
                </xsl:for-each-group>
            </xsl:for-each-group>
        </root>
    </xsl:template>
    
</xsl:stylesheet>

对于XSLT 2处理器,您可能需要在基于xsl:perform-sort的用户定义函数中实现排序:

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:mf="http://example.com/mf"
    xpath-default-namespace="http://example.com/wd"
    exclude-result-prefixes="#all"
    expand-text="yes">
    
    <xsl:function name="mf:sort-by-date" as="element(Report_Entry)*">
        <xsl:param name="entries" as="element(Report_Entry)*"/>
        <xsl:perform-sort select="$entries">
            <xsl:sort select="date"/>
        </xsl:perform-sort>
    </xsl:function>
    
    <xsl:output method="xml" indent="yes"/>
    
    <xsl:template match="Report_Data">
        <root>
            <xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
                <xsl:for-each-group select="mf:sort-by-date(current-group())" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
                    <worker>
                        <staffID>
                            <xsl:value-of select="workerGroup/staffID"/>
                        </staffID>
                        <start_date>
                            <xsl:value-of select="date[1]"/>
                        </start_date>
                        <end_date>
                            <xsl:value-of select="current-group()[last()]/date"/>
                        </end_date>
                    </worker>
                </xsl:for-each-group>
            </xsl:for-each-group>
        </root>
    </xsl:template>
    
</xsl:stylesheet>

当然,只有在输入内容不是按员工ID和日期进行排序时,才需要排序,否则显示的分组就足够了。