如何导出整个xml元素和属性并将其保存到一张表中的SQL Server

问题描述

我想导出所有来自https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml的XML元素,即属性。如下面的代码所示,我能够使用多个查询创建表。看来我必须创建包含多个元素和属性的多个表,这将是一个乏味的过程,无法全部合并或相应地联接。

我的问题:有没有办法在整个XML内容中一次性创建一个表?

第二个问题是如何使用openrowset读取存储在sql Server中而不是本地目录中的XML文件

这是我使用的代码,可以很好地查询XML的一部分:

Create database [irs-900]
USE [irs-900]
GO

CREATE TABLE [SampleXML]
(
Id INT IDENTITY PRIMARY KEY,XMLData XML,LoadedDateTime DATETIME
)

INSERT INTO [SampleXMl](XMLData,LoadedDateTime)
SELECT CONVERT(XML,BulkColumn) AS BulkColumn,GETDATE() 

FROM OPENROWSET(BULK 'C:\Users\200931393493000150_public.xml',SINGLE_BLOB) AS x;



     DECLARE @targetTable TABLE 
     
     (
        ID INT IDENTITY PRIMARY KEY,EIN CHAR(10),AddressLine1 VARCHAR(50),AddressLine2 VARCHAR(50),City VARCHAR(50),State CHAR(2),ZIPCode CHAR(5)
        
     );

      ;WITH XMLNAMESPACES (DEFAULT 'http://www.irs.gov/efile'),rs (xmldata) AS
    (
       SELECT TRY_CAST(BulkColumn AS XML) AS BulkColumn 
       FROM OPENROWSET(BULK 'C:\Users\200931393493000150_public.xml',SINGLE_BLOB) AS x
      -- xml file from (https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml)  saved to local disk 
    )

    INSERT INTO @targetTable
    --Insert Into FormIndex
    --Insert into irs900Sample
    SELECT 
       -- c.value('(Key/text())[1]','Varchar(100)') AS Keys
         c.value('(EIN/text())[1]','CHAR(10)') AS EIN,c.value('(USAddress/AddressLine1/text())[1]','VARCHAR(50)') AS AddressLine1,c.value('(USAddress/AddressLine2/text())[1]','VARCHAR(50)') AS AddressLine2,c.value('(USAddress/City/text())[1]','VARCHAR(100)') AS City,c.value('(USAddress/State/text())[1]','CHAR(2)') AS State,c.value('(USAddress/ZIPCode/text())[1]','CHAR(5)') AS ZIPCode
    FROM rs AS tbl
       --CROSS APPLY tbl.xmldata.nodes('/Return/ReturnHeader/Filer')  AS t(c);
         OUTER APPLY tbl.xmldata.nodes('/Return/ReturnHeader/Filer')  AS t(c);
       --querying the index 
       --CROSS APPLY tbl.xmldata.nodes ('/ListBucketResult/Contents') AS t(c);
       --OUTER APPLY tbl.xmldata.nodes ('/ListBucketResult/Contents') AS t(c);

    SELECT * FROM @targetTable;
    --select * from FormIndex
    --SELECT * FROM irs900Sample

 DECLARE @targetTable2 TABLE 
      --create table irs900Sample
     (
        ID INT IDENTITY PRIMARY KEY,NamePerson VARCHAR(50),Title VARCHAR(50),AverageHoursPerWeek FLOAT,IndividualTrusteeOrDirector CHAR(2),ReportableCompFromOrganization  CHAR(2),ReportableCompFromrelatedOrgs CHAR(2),OtherCompensation CHAR(2)
     );

    
      ;WITH XMLNAMESPACES (DEFAULT 'http://www.irs.gov/efile'),rs (xmldata) AS
    (
       SELECT TRY_CAST(BulkColumn AS XML) AS BulkColumn 
      
      FROM OPENROWSET(BULK 'C:\Users\200931393493000150_public.xml',SINGLE_BLOB) AS x
      -- xml file from (https://s3.amazonaws.com/irs-form-990/200931393493000150_public.xml)  saved to local disk 
       
    )

    INSERT INTO @targetTable
    --Insert Into FormIndex
    --Insert into irs900Sample
    SELECT 
       
         c.value('(NamePerson/text())[1]','VARCHAR(50)') AS NamePerson,c.value('(Title/text())[1]','VARCHAR(50)') AS Title,c.value('(AverageHoursPerWeek/text())[1]','float') AS AverageHoursPerWeek,c.value('(IndividualTrusteeOrDirector/text())[1]','CHAR(2)') AS IndividualTrusteeOrDirector,c.value('(ReportableCompFromOrganization/text())[1]','CHAR(2)') AS ReportableCompFromOrganization,c.value('(ReportableCompFromrelatedOrgs/text())[1]','CHAR(2)') AS ReportableCompFromrelatedOrgs,c.value('(OtherCompensation/text())[1]','CHAR(2)') AS OtherCompensation
    FROM rs AS tbl
      
          OUTER APPLY tbl.xmldata.nodes('/Return/ReturnData/irs990/Form990PartVIISectionA')  AS t(c);
        --CROSS APPLY tbl.xmldata.nodes('/Return/ReturnData/irs990/Form990PartVIISectionA')  AS t(c);

    SELECT * FROM @targetTable2;
    

解决方法

您当前正在执行的操作称为切碎,即切碎XML,将其转换为矩形/关系格式,并最终加载到表中。

问题#1:是否可以在整个xml内容中一次性创建一个表?

答案::可以将整个XML文件原样加载到XML数据类型的单个列中。

问题2:如何使用openrowset读取存储在SQL Server中而不是本地目录中的xml文件?

答案:SQL Server XQuery方法.nodes().value()正是为此目的。

,

有时,即使需要更多步骤,标准化也可以为您提供很多帮助。就我而言,我使用下表的表值函数(请参见下文)将任何XML 文件转换为SQL表,然后从那里我可以做任何我想做的事情。好处是,我不需要为每个xml更改编写代码,它是动态转换的,因此我可以将数据作为常规表进行管理。

使用:

select *  from Utility.FlattenXml('<a>1</a><b></b><c>3</c>')
select field,field  from Utility.FlattenXml('<a>1</a><b></b><c>3</c>') where [yourfield] = [value]

步骤:

USE [YOUR_DB]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO

CREATE FUNCTION [Utility].[FlattenXml](@xmlDoc XML)  
RETURNS TABLE 
AS RETURN 
WITH CTE AS ( 

    SELECT 
            1 AS lvl,x.value('local-name(.)','NVARCHAR(MAX)') AS Name,CAST(NULL AS NVARCHAR(MAX)) AS ParentName,CAST(1 AS INT) AS ParentPosition,CAST(N'Element' AS NVARCHAR(20)) AS NodeType,'NVARCHAR(MAX)') AS FullPath,'NVARCHAR(MAX)')  
            + N'[' 
            + CAST(ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS NVARCHAR)  
            + N']' AS XPath,ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS Position,'NVARCHAR(MAX)') AS Tree,x.value('text()[1]','NVARCHAR(MAX)') AS Value,x.query('.') AS this,x.query('*') AS t,CAST(CAST(1 AS VARBINARY(4)) AS VARBINARY(MAX)) AS Sort,CAST(1 AS INT) AS ID  
    FROM @xmlDoc.nodes('/*') a(x)  
    UNION ALL 
    SELECT 
            p.lvl + 1 AS lvl,c.value('local-name(.)',CAST(p.Name AS NVARCHAR(MAX)) AS ParentName,CAST(p.Position AS INT) AS ParentPosition,CAST(p.FullPath + N'/' + c.value('local-name(.)','NVARCHAR(MAX)') AS NVARCHAR(MAX)) AS FullPath,CAST(p.XPath + N'/'+ c.value('local-name(.)','NVARCHAR(MAX)')+ N'['+ CAST(ROW_NUMBER() OVER(PARTITION BY c.value('local-name(.)','NVARCHAR(MAX)') 
            ORDER BY (SELECT 1)) AS NVARCHAR)+ N']' AS NVARCHAR(MAX)) AS XPath,ROW_NUMBER() OVER(PARTITION BY c.value('local-name(.)','NVARCHAR(MAX)')
            ORDER BY (SELECT 1)) AS Position,CAST( SPACE(2 * p.lvl - 1) + N'|' + REPLICATE(N'-',1) + c.value('local-name(.)','NVARCHAR(MAX)') AS NVARCHAR(MAX)) AS Tree,CAST( c.value('text()[1]','NVARCHAR(MAX)') AS NVARCHAR(MAX) ) AS Value,c.query('.') AS this,c.query('*') AS t,CAST(p.Sort + CAST( (lvl + 1) * 1024 + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS VARBINARY(4)) AS VARBINARY(MAX) ) AS Sort,CAST((lvl + 1) * 1024 + (ROW_NUMBER() OVER(ORDER BY (SELECT 1)) * 2) AS INT)
            
    FROM CTE p  
    CROSS APPLY p.t.nodes('*') b(c)),cte2 AS (  
                                                SELECT 
                                                lvl AS Depth,Name AS NodeName,ParentName,ParentPosition,NodeType,FullPath,XPath,Position,Tree AS TreeView,Value,this AS XMLData,Sort,ID
                                                FROM cte  
    UNION ALL 
    SELECT 
            p.lvl,'NVARCHAR(MAX)'),p.Name,p.Position,CAST(N'Attribute' AS NVARCHAR(20)),p.FullPath + N'/@' + x.value('local-name(.)',p.XPath + N'/@' + x.value('local-name(.)',1,SPACE(2 * p.lvl - 1) + N'|' + REPLICATE('-',1)  
            + N'@' + x.value('local-name(.)',x.value('.',NULL,p.Sort,p.ID + 1  
    FROM CTE p  
    CROSS APPLY this.nodes('/*/@*') a(x)  
    )  

    SELECT 
            ROW_NUMBER() OVER(ORDER BY Sort,ID) AS ID,Depth,NodeName,TreeView,XMLData 
    FROM CTE2