Hi folks,
I have a gnarly piece of XML that I’m looking to parse with many fields that will change from file to file. The specific issue are the tags under:
/om:ObservationCollection/om:member/om:Observation/om:result/elements
Each element
tag has a qualifier
tag underneath it, which I’m looking to split out into their own fields as they represent QA of the data in its parent tag.
I can get either the data or the QA information but in order to get everything seems to require two field_selection definitions. Examples below.
Is it possible to somehow use two different sets of field_selection information? Should I look at a processor plugin and/or perhaps concatenate those values into a single field and then split them into separate fields later?
Partial Example Config
metric_selection = "/om:ObservationCollection/om:member/om:Observation"
timestamp = "om:samplingTime/gml:TimeInstant/gml:timePosition"
timestamp_format = "2006-01-02T15:04:05.000Z"
# Extracts Data
field_selection = "om:result/elements/element"
field_name = "@name"
field_value = "number(@value)"
# Extracts QA Information
field_selection = "om:result/elements/element"
field_name = "concat(./@name, '_', ./qualifier/@name)"
field_value = "number(./qualifier/@value)"
Sample XML
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<om:ObservationCollection xmlns:om="http://www.opengis.net/om/1.0"
xmlns="http://dms.ec.gc.ca/schema/point-observation/2.0"
xmlns:gml="http://www.opengis.net/gml"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<om:member>
<om:Observation>
<om:metadata>
<set>
<general>
<author build="build.49" name="MSC-DMS-PG-SWOB" version="4.7"/>
<dataset name="msc/observation/atmospheric/surface_weather/ca-1.1-ascii"/>
<phase name="product_generic_swob-xml-2.0"/>
<id xlink:href="/data/msc/observation/atmospheric/surface_weather/ca-1.1-ascii/product_generic_swob-xml-2.0/202302022030/8200573/abb/orig/data_60"/>
<parent xlink:href="/data/msc/observation/atmospheric/surface_weather/ca-1.1-ascii/decoded_enhanced-xml-2.0/202302022030/8200573/abb/130/orig/data_60"/>
</general>
<identification-elements>
<element name="date_tm" uom="datetime" value="2023-02-02T20:30:00.000Z"/>
<element name="stn_nam" uom="unitless" value="BEDFORD BASIN"/>
<element name="tc_id" uom="unitless" value="ABB"/>
<element name="wmo_synop_id" uom="unitless" value="71329"/>
<element name="stn_elev" uom="m" value="5.0"/>
<element name="data_pvdr" uom="unitless" value="DND"/>
<element name="msc_id" uom="unitless" value="8200573"/>
<element name="clim_id" uom="unitless" value="8200573"/>
<element name="lat" uom="°" value="44.706111"/>
<element name="long" uom="°" value="-63.633333"/>
</identification-elements>
</set>
</om:metadata>
<om:samplingTime>
<gml:TimeInstant>
<gml:timePosition>2023-02-02T20:30:00.000Z</gml:timePosition>
</gml:TimeInstant>
</om:samplingTime>
<om:resultTime>
<gml:TimeInstant>
<gml:timePosition>2023-02-02T20:32:08.735Z</gml:timePosition>
</gml:TimeInstant>
</om:resultTime>
<om:procedure xlink:href="/data/msc/metadata/station/surface_weather/metadata_instance-2.0-xml/product-jicc_xml-2.0/20221014000000831/abb"/>
<om:observedProperty gml:remoteSchema="/schema/point-observation/2.0.xsd"/>
<om:featureOfInterest>
<gml:FeatureCollection>
<gml:location>
<gml:Point>
<gml:pos>44.706111 -63.6333333</gml:pos>
</gml:Point>
</gml:location>
</gml:FeatureCollection>
</om:featureOfInterest>
<om:result>
<elements>
<element name="air_temp" uom="°C" value="-2.0">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="rel_hum" uom="%" value="63">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="avg_wnd_spd_10m_pst2mts" uom="km/h" value="19.6">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="avg_wnd_dir_10m_pst2mts" uom="°" value="185">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="avg_wnd_spd_10m_pst10mts" uom="km/h" value="19.2">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="avg_wnd_dir_10m_pst10mts" uom="°" value="195">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="max_vis_pst1hr" uom="km" value="24.140">
<qualifier name="qa_summary" uom="unitless" value="100"/>
</element>
<element name="dwpt_temp" uom="°C" value="-8.1">
<qualifier code-src="std_code_src" code-type="data_flags" name="data_flag" uom="code" value="1"/>
</element>
<element name="mslp" uom="hPa" value="MSNG">
<qualifier code-src="std_code_src" code-type="data_flags" name="data_flag" uom="code" value="4"/>
</element>
<element name="wetblb_temp" uom="°C" value="-3.9">
<qualifier code-src="std_code_src" code-type="data_flags" name="data_flag" uom="code" value="1"/>
</element>
</elements>
</om:result>
</om:Observation>
</om:member>
</om:ObservationCollection>