XML Parser V2
Parses XML. See Supported reader-parser combinations for compatible readers. See also XML Parser
XMLParserV2 has only a single property, Root Node. Its output format is XMLNodeEvent, which a java.util.Map containing event metadata followed by an org.dom4j.Element containing the data. For example, this is an event from the first sample application's XMLRawStream:
<?xml version="1.0" encoding="UTF-8"?> <PurchaseOrder> <event> <event-metadata SOURCE NAME="PurchaseOrders1.xml" LINE NUMBER="5" CHARACTER OFFSET="209" FileName="PurchaseOrders1.xml" FileOffset="0" SEQUENCENO="1" COLUMN NUMBER="1"/> <event-data> <PurchaseOrder PurchaseOrderNumber="1"> <CustomerName>Jon snow</CustomerName> <CustomerAddress>Palo Alto</CustomerAddress> <DeliveryNotes>Please leave packages in shed by driveway.</DeliveryNotes> </PurchaseOrder> </event-data> </event>
For debugging purposes, you may write unparsed XMLNodeEvent to FileWriter and SysOut.
Sample Application 1
This example simply converts XML input into JSON.
Save the following as Striim/Samples/PurchaseOrders1.xml:
<PurchaseOrder PurchaseOrderNumber="1"> <CustomerName>Jon snow</CustomerName> <CustomerAddress>Palo Alto</CustomerAddress> <DeliveryNotes>Please leave packages in shed by driveway.</DeliveryNotes> </PurchaseOrder> <PurchaseOrder PurchaseOrderNumber="2"> <CustomerName>Tyrion</CustomerName> <CustomerAddress>Seattle</CustomerAddress> <DeliveryNotes>Preferred time : post 4 PM</DeliveryNotes> </PurchaseOrder>
Then run the following application:
CREATE APPLICATION XMLParserV2Test1; CREATE SOURCE XMLSource USING FileReader ( directory:'Samples', wildcard:'PurchaseOrders1.xml', positionByEOF:false ) PARSE USING XMLParserV2( rootnode:'PurchaseOrder') OUTPUT TO XMLRAwStream; CREATE CQ XmlCQ INSERT INTO XmlParsedStream SELECT data.attributeValue("PurchaseOrderNumber") as PONumber, data.element("CustomerName").getText() as CustomerName, data.element("CustomerAddress").getText() as CustomerAddress, data.element("DeliveryNotes").getText() as DeliveryNotes FROM XMLRawStream; CREATE TARGET XMLParsedOut USING FileWriter( filename:'parsed.json', directory: 'XMLParserV2Test1') FORMAT USING JSONFormatter () INPUT FROM XmlParsedStream; END APPLICATION XMLParserV2Test1;
Striim/XMLParserV2Test1/parsed.00.txt should contain the following:
[ { "PONumber":"1", "CustomerName":"Jon snow", "CustomerAddress":"Palo Alto", "DeliveryNotes":"Please leave packages in shed by driveway." }, { "PONumber":"2", "CustomerName":"Tyrion", "CustomerAddress":"Seattle", "DeliveryNotes":"Preferred time : post 4 PM" } ]
Sample Application 2
This example iterates through child elements (line items in a purchase order).
Save the following as Striim/Samples/PurchaseOrders2.xml:
<PurchaseOrder PurchaseOrderNumber="1"> <Details> <CustomerName>Jon snow</CustomerName> <CustomerAddress>Palo Alto</CustomerAddress> <DeliveryNotes>Please leave packages in shed by driveway.</DeliveryNotes> </Details> <Items> <Item ItemNumber="1"> <ProductName>EarPhones</ProductName> <USPrice>148.95</USPrice> </Item> <Item ItemNumber="2"> <ProductName>Mouse</ProductName> <USPrice>39.98</USPrice> </Item> </Items> </PurchaseOrder> <PurchaseOrder PurchaseOrderNumber="2"> <Details> <CustomerName>Tyrion</CustomerName> <CustomerAddress>Seattle</CustomerAddress> <DeliveryNotes>Preffered time : post 4 PM</DeliveryNotes> </Details> <Items> <Item ItemNumber="1"> <ProductName>Monitor</ProductName> <USPrice>148.95</USPrice> </Item> <Item ItemNumber="2"> <ProductName>Keyboard</ProductName> <USPrice>39.98</USPrice> </Item> </Items> </PurchaseOrder>
Then run the following application:
CREATE APPLICATION XMLParserV2Test2; CREATE SOURCE XMLSource USING FileReader ( directory:'Samples', wildcard:'PurchaseOrders2.xml', positionByEOF:false ) PARSE USING XMLParserV2( rootnode:'PurchaseOrder' ) OUTPUT TO XMLRAwStream; CREATE TARGET RawXMLFileOut USING FileWriter( filename:'raw.txt', directory: 'XMLParserV2Test2') FORMAT USING XMLFormatter( rootelement:'PurchaseOrder') INPUT FROM XmlRawStream; CREATE CQ IntermediateTransformation INSERT INTO IntermediateStream SELECT data.attributeValue("PurchaseOrderNumber") as PONumber, data.element("Details") PODetails, data.element("Items").elements("Item") itemlist FROM XMLRawStream; -- iterates over the items in PO and appends common PO details to each item CREATE CQ XmlCQ INSERT INTO XmlParsedStream SELECT PO.PONumber as PONumber, PO.PODetails.element("CustomerName").getText() as CustomerName, PO.PODetails.element("CustomerAddress").getText() as CustomerAddress, PO.PODetails.element("DeliveryNotes").getText() as DeliveryNotes, item.attributeValue("ItemNumber") as ItemNumber, item.element("ProductName").getText() as ProductName, item.element("USPrice").getText() as USPrice FROM IntermediateStream PO, iterator(PO.itemlist, org.dom4j.Element) item; CREATE TARGET XMLParsedOut using FileWriter( filename:'parsed.json', directory: 'XMLParserV2Test2') FORMAT USING JSONFormatter () INPUT FROM XmlParsedStream; END APPLICATION XMLParserV2Test2;
Striim/XMLParserV2Test2/parsed.00.txt should contain the following:
[ { "PONumber":"1", "CustomerName":"Jon snow", "CustomerAddress":"Palo Alto", "DeliveryNotes":"Please leave packages in shed by driveway.", "ItemNumber":"1", "ProductName":"EarPhones", "USPrice":"148.95" }, { "PONumber":"1", "CustomerName":"Jon snow", "CustomerAddress":"Palo Alto", "DeliveryNotes":"Please leave packages in shed by driveway.", "ItemNumber":"2", "ProductName":"Mouse", "USPrice":"39.98" }, { "PONumber":"2", "CustomerName":"Tyrion", "CustomerAddress":"Seattle", "DeliveryNotes":"Preffered time : post 4 PM", "ItemNumber":"1", "ProductName":"Monitor", "USPrice":"148.95" }, { "PONumber":"2", "CustomerName":"Tyrion", "CustomerAddress":"Seattle", "DeliveryNotes":"Preffered time : post 4 PM", "ItemNumber":"2", "ProductName":"Keyboard", "USPrice":"39.98" } ]