Skip to main content

XML Parser V2

Parses XML. See Supported reader-parser combinations for compatible readers. See also XML Parser

XMLParserV2 has only a single property, Root Node. Its output format is XMLNodeEvent, which a java.util.Map containing event metadata followed by an org.dom4j.Element containing the data. For example, this is an event from the first sample application's XMLRawStream:

<?xml version="1.0" encoding="UTF-8"?>
<PurchaseOrder>
<event>
<event-metadata SOURCE NAME="PurchaseOrders1.xml" LINE NUMBER="5" CHARACTER OFFSET="209"
FileName="PurchaseOrders1.xml" FileOffset="0" SEQUENCENO="1" COLUMN NUMBER="1"/>
<event-data>
<PurchaseOrder PurchaseOrderNumber="1">
<CustomerName>Jon snow</CustomerName>
<CustomerAddress>Palo Alto</CustomerAddress>
<DeliveryNotes>Please leave packages in shed by driveway.</DeliveryNotes>
</PurchaseOrder>
</event-data>
</event>

For debugging purposes, you may write unparsed XMLNodeEvent to FileWriter and SysOut.

Sample Application 1

This example simply converts XML input into JSON.

Save the following as Striim/Samples/PurchaseOrders1.xml:

<PurchaseOrder PurchaseOrderNumber="1">
    <CustomerName>Jon snow</CustomerName>
    <CustomerAddress>Palo Alto</CustomerAddress>
    <DeliveryNotes>Please leave packages in shed by driveway.</DeliveryNotes>
</PurchaseOrder>
<PurchaseOrder PurchaseOrderNumber="2">
    <CustomerName>Tyrion</CustomerName>
    <CustomerAddress>Seattle</CustomerAddress>
    <DeliveryNotes>Preferred time : post 4 PM</DeliveryNotes>
</PurchaseOrder>

Then run the following application:

CREATE APPLICATION XMLParserV2Test1;

CREATE SOURCE XMLSource USING FileReader (
  directory:'Samples',
  wildcard:'PurchaseOrders1.xml',
  positionByEOF:false
) 
PARSE USING XMLParserV2(
  rootnode:'PurchaseOrder')
OUTPUT TO XMLRAwStream;

CREATE CQ XmlCQ
INSERT INTO XmlParsedStream
SELECT
  data.attributeValue("PurchaseOrderNumber") as PONumber,
  data.element("CustomerName").getText() as CustomerName,
  data.element("CustomerAddress").getText() as CustomerAddress,
  data.element("DeliveryNotes").getText() as DeliveryNotes
FROM XMLRawStream;

CREATE TARGET XMLParsedOut USING FileWriter(
  filename:'parsed.json',
  directory: 'XMLParserV2Test1')
FORMAT USING JSONFormatter ()
INPUT FROM XmlParsedStream;

END APPLICATION XMLParserV2Test1;

Striim/XMLParserV2Test1/parsed.00.txt should contain the following:

[
 {
  "PONumber":"1",
  "CustomerName":"Jon snow",
  "CustomerAddress":"Palo Alto",
  "DeliveryNotes":"Please leave packages in shed by driveway."
 },
 {
  "PONumber":"2",
  "CustomerName":"Tyrion",
  "CustomerAddress":"Seattle",
  "DeliveryNotes":"Preferred time : post 4 PM"
 }
]

Sample Application 2

This example iterates through child elements (line items in a purchase order).

Save the following as Striim/Samples/PurchaseOrders2.xml:

<PurchaseOrder PurchaseOrderNumber="1">
    <Details>
        <CustomerName>Jon snow</CustomerName>
        <CustomerAddress>Palo Alto</CustomerAddress>
        <DeliveryNotes>Please leave packages in shed by driveway.</DeliveryNotes>
    </Details>
    <Items>
        <Item ItemNumber="1">
            <ProductName>EarPhones</ProductName>
            <USPrice>148.95</USPrice>
        </Item>
        <Item ItemNumber="2">
            <ProductName>Mouse</ProductName>
            <USPrice>39.98</USPrice>
        </Item>
    </Items>
</PurchaseOrder>
<PurchaseOrder PurchaseOrderNumber="2">
    <Details>
        <CustomerName>Tyrion</CustomerName>
        <CustomerAddress>Seattle</CustomerAddress>
        <DeliveryNotes>Preffered time : post 4 PM</DeliveryNotes>
    </Details>
    <Items>
        <Item ItemNumber="1">
            <ProductName>Monitor</ProductName>
            <USPrice>148.95</USPrice>
        </Item>
        <Item ItemNumber="2">
            <ProductName>Keyboard</ProductName>
            <USPrice>39.98</USPrice>
        </Item>
    </Items>
</PurchaseOrder>

Then run the following application:

CREATE APPLICATION XMLParserV2Test2;

CREATE SOURCE XMLSource USING FileReader (
  directory:'Samples',
  wildcard:'PurchaseOrders2.xml',
  positionByEOF:false
) 
PARSE USING XMLParserV2(
  rootnode:'PurchaseOrder' )
OUTPUT TO XMLRAwStream;

CREATE TARGET RawXMLFileOut USING FileWriter(
  filename:'raw.txt',
  directory: 'XMLParserV2Test2')
FORMAT USING XMLFormatter(
  rootelement:'PurchaseOrder')
INPUT FROM XmlRawStream;

CREATE CQ IntermediateTransformation 
INSERT INTO IntermediateStream 
SELECT 
  data.attributeValue("PurchaseOrderNumber") as PONumber,
  data.element("Details") PODetails,
  data.element("Items").elements("Item") itemlist
FROM XMLRawStream;

-- iterates over the items in PO and appends common PO details to each item
CREATE CQ XmlCQ
INSERT INTO XmlParsedStream
SELECT 
  PO.PONumber as PONumber, 
  PO.PODetails.element("CustomerName").getText() as CustomerName,
  PO.PODetails.element("CustomerAddress").getText() as CustomerAddress,
  PO.PODetails.element("DeliveryNotes").getText() as DeliveryNotes, 
  item.attributeValue("ItemNumber") as ItemNumber,
  item.element("ProductName").getText() as ProductName,
  item.element("USPrice").getText() as USPrice
FROM IntermediateStream PO, iterator(PO.itemlist, org.dom4j.Element) item;

CREATE TARGET XMLParsedOut using FileWriter(
  filename:'parsed.json',
  directory: 'XMLParserV2Test2')
FORMAT USING JSONFormatter ()
INPUT FROM XmlParsedStream;

END APPLICATION XMLParserV2Test2;

Striim/XMLParserV2Test2/parsed.00.txt should contain the following:

[
 {
  "PONumber":"1",
  "CustomerName":"Jon snow",
  "CustomerAddress":"Palo Alto",
  "DeliveryNotes":"Please leave packages in shed by driveway.",
  "ItemNumber":"1",
  "ProductName":"EarPhones",
  "USPrice":"148.95"
 },
 {
  "PONumber":"1",
  "CustomerName":"Jon snow",
  "CustomerAddress":"Palo Alto",
  "DeliveryNotes":"Please leave packages in shed by driveway.",
  "ItemNumber":"2",
  "ProductName":"Mouse",
  "USPrice":"39.98"
 },
 {
  "PONumber":"2",
  "CustomerName":"Tyrion",
  "CustomerAddress":"Seattle",
  "DeliveryNotes":"Preffered time : post 4 PM",
  "ItemNumber":"1",
  "ProductName":"Monitor",
  "USPrice":"148.95"
 },
 {
  "PONumber":"2",
  "CustomerName":"Tyrion",
  "CustomerAddress":"Seattle",
  "DeliveryNotes":"Preffered time : post 4 PM",
  "ItemNumber":"2",
  "ProductName":"Keyboard",
  "USPrice":"39.98"
 }
]