# -*- coding: utf-8 -*-
"""This module implements Datconv Writer which saves data in form of JSON file.
Supports connectors of type: STRING, OBJECT (dict()), ITERABLE."""
# In Python 2.7 only
from __future__ import print_function
# Standard Python Libs
import sys
import json
import collections
import logging
# Libs installed using pip
from lxml import etree
# Datconv generic modules
from datconv.outconn import STRING, OBJECT, ITERABLE
Log = None
"""Log varaible is automatically set by main datconv script using logging.getLogger method.
Use it for logging messages in need.
"""
[docs]class DCWriter:
"""Please see constructor description for more details."""
def __init__(self, add_header = True, add_footer = True, add_newline = True, \
convert_values = 2, null_text = 'None', preserve_order = False, \
text_key = 'text', text_eliminate = True, with_prop = False, ignore_rectyp = False, json_opt = None):
"""Parameters are usually passed from YAML file as subkeys of Writer:CArg key.
:param add_header: if True, generic header (as initialized by Reader) is added as first object of output file or stream - only in non-iteration mode.
:param add_footer: if True, generic footer (as initialized by Reader) is added as last object of output file or stream - only in non-iteration mode.
:param add_newline: if True, adds newline character after each record.
:param convert_values: 0 - does not convert (all values are text); 1 - tries to convert values to int, bool or float (do not quote in json file) - little slower; 2 - like 1 but in addition checks if int values can be stored in 64 bits, if not place them as string value.
:param null_text: text that is converted to JSON null value (apply if convert_values is > 0).
:param preserve_order: if True, order of keys in json output match order in source.
:param text_key: name of key to store XML text.
:param text_eliminate: if True, XML text key will be eliminated if there are no other tag components.
:param with_prop: if True, XML properties are being saved in JSON file.
:param ignore_rectyp: if True, XML root tag for records (aka record type) will not be saved in JSON file (simplifies output layout in case there is one record type).
:param json_opt: dictionary with json.dump() options. See `documentation <https://docs.python.org/3/library/json.html>`_ of json standard Python library.
For more detailed descriptions see :ref:`conf_template.yaml <writers_conf_template>` file in this module folder.
"""
assert Log is not None
self._out = None
self._out_flags = 0;
self._add_header = add_header
self._add_footer = add_footer
self._add_newline = add_newline
self._convert_values = convert_values
self._max_int = 2**64 - 1
self._min_int = - 2**63
self._null_text = null_text
self._preserve_order = preserve_order
self._text_key = text_key
self._text_eliminate = text_eliminate
self._with_prop = with_prop
self._ign_rectyp = ignore_rectyp
self._json_opt = json_opt
self._header = []
self._footer = []
def setOutput(self, out):
self._first = True
self._out = out
self._out_flags = out.supportedInterfases();
if self._out_flags & (OBJECT | ITERABLE):
if self._preserve_order:
if not out.tryObject(collections.OrderedDict()):
raise Exception('Incompatible OutConnector used, dcjson Writer option requires that connector supports dict objects')
else:
if not out.tryObject(dict()):
raise Exception('Incompatible OutConnector used, dcjson Writer requires that connector supports dict objects or OutConnector requires dcjson preserve_order option')
def writeHeader(self, header):
self._header = header
if self._out_flags & STRING:
self._out.pushString('[')
if self._add_header:
for h in header:
if self._first:
self._first = False
elif self._out_flags & STRING:
self._out.pushString(',')
if self._add_newline and self._out_flags & STRING:
self._out.pushString('\n')
if isinstance(h, dict):
if self._convert_values > 0:
for (key, val) in h.items():
val = self._TryConvert(val)
h[key] = val
if '_tag_' in h:
tag = h['_tag_']
del h['_tag_']
if '_bra_' in h:
del h['_bra_']
obj = {tag: h}
else:
obj = h
else:
if self._convert_values > 0:
h = self._TryConvert(h)
obj = {'header': str(h)}
if self._out_flags & STRING:
for stream in self._out.getStreams():
if self._json_opt:
json.dump(obj, stream, **self._json_opt)
else:
json.dump(obj, stream)
if self._out_flags & (OBJECT | ITERABLE):
self._out.pushObject(collections.OrderedDict(obj))
def writeFooter(self, footer):
self._footer = footer
if self._add_footer:
for f in footer:
if self._first:
self._first = False
elif self._out_flags & STRING:
self._out.pushString(',')
if self._add_newline and self._out_flags & STRING:
self._out.pushString('\n')
if isinstance(f, dict):
if self._convert_values > 0:
for (key, val) in f.items():
val = self._TryConvert(val)
f[key] = val
if '_tag_' in f:
tag = f['_tag_']
del f['_tag_']
obj = {tag: f}
else:
obj = f
else:
if self._convert_values > 0:
f = self._TryConvert(f)
obj = {'footer': str(f)}
if self._out_flags & STRING:
for stream in self._out.getStreams():
if self._json_opt:
json.dump(obj, stream, **self._json_opt)
else:
json.dump(obj, stream)
if self._out_flags & (OBJECT | ITERABLE):
self._out.pushObject(collections.OrderedDict(obj))
if self._out_flags & STRING:
if self._add_newline:
self._out.pushString('\n')
self._out.pushString(']')
def getHeader(self):
return self._header
def getFooter(self):
return self._footer
def writeRecord(self, record):
try:
if self._first:
self._first = False
elif self._out_flags & STRING:
self._out.pushString(',')
if self._add_newline and self._out_flags & STRING:
self._out.pushString('\n')
obj = {}
if self._with_prop:
element2obj = self._element2objP
else:
element2obj = self._element2obj
if self._ign_rectyp:
obj = element2obj(record)
else:
obj[record.tag] = element2obj(record)
if self._out_flags & STRING:
for stream in self._out.getStreams():
if self._json_opt:
json.dump(obj, stream, **self._json_opt)
else:
json.dump(obj, stream)
if self._out_flags & (OBJECT | ITERABLE):
self._out.pushObject(obj)
return obj
except:
Log.debug('record=%s' % etree.tostring(record, pretty_print = False))
raise
#############################################################
# Internal Methods
#############################################################
def _element2obj(self, el):
"""XML properties are ignored"""
if self._text_eliminate and len(el) == 0:
if self._convert_values > 0:
return self._TryConvert(el.text)
else:
return el.text
if self._preserve_order:
ret = collections.OrderedDict()
else:
ret = {}
if el.text is not None:
if self._convert_values > 0:
text = self._TryConvert(el.text)
ret[self._text_key] = text
else:
ret[self._text_key] = el.text
for tag in el:
if tag.tag in ret:
if isinstance(ret[tag.tag], list):
ret[tag.tag].append(self._element2obj(tag))
else:
ret[tag.tag] = [ret[tag.tag], self._element2obj(tag)]
else:
ret[tag.tag] = self._element2obj(tag)
return ret
def _element2objP(self, el):
"""XML properties are saved as keys"""
if self._text_eliminate and len(el) == 0 and len(el.items()) == 0:
if self._convert_values > 0:
return self._TryConvert(el.text)
else:
return el.text
if self._preserve_order:
ret = collections.OrderedDict()
else:
ret = {}
for (pname, pprop) in el.items():
if self._convert_values > 0:
pprop = self._TryConvert(pprop)
ret[pname] = pprop
if el.text is not None:
if self._convert_values > 0:
text = self._TryConvert(el.text)
ret[self._text_key] = text
else:
ret[self._text_key] = el.text
for tag in el:
if tag.tag in ret:
if isinstance(ret[tag.tag], list):
ret[tag.tag].append(self._element2objP(tag))
else:
ret[tag.tag] = [ret[tag.tag], self._element2objP(tag)]
else:
ret[tag.tag] = self._element2objP(tag)
return ret
#############################################################
# Helper Functions
#############################################################
def _TryConvert(self, val):
if val is None:
return None
if val == self._null_text:
return None
try:
if self._convert_values > 1:
ret = int(val)
if ret > self._max_int or ret < self._min_int:
return val
return ret
else:
return int(val) #Note: True=>1; False=>0
except ValueError:
try:
f = float(val)
if f in (float('-Inf'), float('Inf')):
return val
if isinstance(val, str) and val.lower() == 'nan':
# Note that float("nan") == float("nan") -> False
return val
return f
except ValueError:
return val
except TypeError:
return val