Source code for datconv.filters.statex

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""General Filter that allows to calculate and print required statistics about processed data - extended version.
Filter prints counts or sums of records that fulfill given expression with option to group by certain data.
Filter prints statistics at program exit as logger INFO messages or to the file."""

# Standard Python Libs
from importlib import import_module

# Libs installed using pip
from lxml import etree

# Datconv generic modules
from datconv.outconn import dcstdout, dcfile
#from datconv.outconn import dcstdout as dcstdout, dcfile as dcfile

Log = None
"""Log varaible is automatically set by main pandoc script using logging.getLogger method.
Use it for logging messages in need.
"""

[docs]class DCFilter: """Please see constructor description for more details.""" def __init__(self, retval = 1, fields = [], statfile = None, statwriter = None): """Constructor parameters are usually passed from YAML file as subkeys of Filter:CArg key. :param retval: value that filter returns (0 to skip records, 1 to write records); :param fields: list of 5 or 6 elements' lists that define calculated statistics. :param statfile: file to write final statistics :param statwriter: datconv writer module to write final statistics For more detailed descriptions see :ref:`conf_template.yaml <filters_conf_template>` file in this module folder. """ assert Log is not None self._retval = retval self._fields = fields self._sfile = statfile self._swconf = statwriter self._swri = None self._stats = {} self._recno = 1 def filterRecord(self, record): for fld in self._fields: if len(fld) < 5: Log.warning('Too few items in fields definition: %s' % str(fld)) continue if fld[1] is None or record.tag == str(fld[1]): if fld[2] == False: continue elif fld[2] == True: evl = True else: res = record.xpath(fld[2]) evl = bool(_eval_xpath_result_as_scalar(res)) if evl: if fld[3] is None: key = '*' else: res = record.xpath(fld[3]) key = _eval_xpath_result_as_scalar(res) if key is None: key = 'null' if fld[4] == 'c': aval = 1 elif fld[4] == 's' and fld[5]: res = record.xpath(fld[5]) val = _eval_xpath_result_as_scalar(res) try: aval = float(val) except: aval = 0 if int(aval) == aval: aval = int(aval) else: continue stat = self._stats.get(fld[0]) if not stat: self._stats[fld[0]] = stat = {} val = stat.get(key) stat[key] = (val + aval) if val else aval self._recno = self._recno + 1 return self._retval def setFooter(self, footer): self._init_swriter() for fld in self._fields: stat = self._stats.get(fld[0]) if stat: for key in sorted(stat.keys()): val = stat[key] self._write_stat(fld[0], key, val) else: self._write_stat(fld[0], '', 0) if self._swri: self._swri.writeFooter([]) def _init_swriter(self): if self._swconf: sw_path = self._swconf.get('Module') if sw_path: sw_mod = import_module(sw_path) sw_mod.Log = Log.getChild('statwriter') sw_class = getattr(sw_mod, 'DCWriter') sw_carg = self._swconf.get('CArg') self._swri = sw_class(**sw_carg) if sw_carg else sw_class() else: Log.error("Invalid configuration: there is statwriter defined but no sub-key: Module") if self._sfile: if dcfile.Log is None: dcfile.Log = Log.getChild('statwriter') self._swri.setOutput(dcfile.DCConnector(path = self._sfile)) else: if dcstdout.Log is None: dcstdout.Log = Log.getChild('statwriter') self._swri.setOutput(dcstdout.DCConnector()) self._swri.writeHeader([]) def _write_stat(self, sname, key, val): if self._swri: stag = etree.Element('STAT') stag.text = sname ktag = etree.SubElement(stag, 'KEY') ktag.text = str(key) vtag = etree.SubElement(stag, 'VAL') vtag.text = str(val) self._swri.writeRecord(stag) else: Log.info("{}[{}]: {}".format(sname, key, val))
def _eval_xpath_result_as_scalar(res): if res is None or res == []: return None if isinstance(res, list) and len(res) > 0: res = res[0] if etree.iselement(res): val = res.text elif isinstance(res, str): val = res elif isinstance(res, float): val = res if int(val) == val: val = int(val) elif isinstance(res, int): val = res elif isinstance(res, bool): val = res else: val = str(res) return val