Source code for datconv.writers.dccsv

# -*- coding: utf-8 -*-
"""This module implements Datconv Writer which saves data in form of CSV file.
Supports connectors of type: STRING, LIST, ITERABLE.
"""

# In Python 2.7 only
from __future__ import print_function

# Standard Python Libs
import sys
import csv
import logging

# Libs installed using pip
from lxml import etree

#Datconv classes
from . import dcxpaths
from datconv.outconn import STRING, LIST, ITERABLE


Log = None
"""Log varaible is automatically set by main datconv script using logging.getLogger method.
Use it for logging messages in need.
"""

[docs]class DCWriter: """Please see constructor description for more details.""" def __init__(self, columns = None, simple_xpath = False, add_header = False, col_names = True, csv_opt = None): """Parameters are usually passed from YAML file as subkeys of Writer:CArg key. :param columns: this parameter may be one of 4 possible types or None: if it is a string, it should be the path to file that contain specification of columns in output file. \n if it is a list, it directly specifies columns in output file. \n if it is a integer, add columns based on first record. \n if it is None or dictionary, columns in output CSV file are being generated automatically based on contentents of input file. When this option is used number of columns in different records in CSV file may very because new columns are being added when discovered. :param simple_xpath: determines weather simple xpaths are used in column specification. See pdxpath Writer for more descripption. :param add_header: if True, generic header (as initialized by Reader) is added as first line of output file. :param col_names: if True, line with column names (fields) is added before data or after data (in case of auto option). :param csv_opt: dictionary with csv writer options. See `documentation <https://docs.python.org/3/library/csv.html>`_ of csv standard Python library. For more detailed descriptions see :ref:`conf_template.yaml <writers_conf_template>` file in this module folder. """ assert Log is not None dcxpaths.Log = Log self._out = None self._out_flags = 0; self._writers = [] self._auto_xpw = None self._auto_cno = 0 self._auto_from_first = False self._col = [] if columns is not None: if isinstance(columns, str): rea = csv.reader(open(columns), lineterminator='\n') for col in rea: if col and len(col) >= 4 and col[0][0] != '#': self._col.append(col) if isinstance(columns, dict): self._auto_xpw = dcxpaths.DCWriter(simple_xpath = simple_xpath, **columns) if columns.get('colno'): self._auto_cno = columns.get('colno') if isinstance(columns, list): for col in columns: if col and len(col) >= 4 and col[0][0] != '#': self._col.append(col) if isinstance(columns, int): self._auto_xpw = dcxpaths.DCWriter(simple_xpath = simple_xpath, ignore_rectyp = True) self._auto_from_first = True else: self._auto_xpw = dcxpaths.DCWriter(simple_xpath = simple_xpath) self._simple_xpath = simple_xpath self._add_header = add_header self._col_names = col_names self._csv = csv_opt self._header = None self._footer = None def setOutput(self, out): self._writers = [] self._out = None self._out_flags = out.supportedInterfases(); if self._out_flags & STRING: for stream in out.getStreams(): if self._csv: self._writers.append(csv.writer(stream, **self._csv)) else: self._writers.append(csv.writer(stream)) if self._out_flags & (LIST | ITERABLE): if not out.tryObject(list()): raise Exception('Incompatible OutConnector used, dccsv Writer requires that connector supports list objects') self._out = out if self._auto_xpw: self._auto_xpw.resetXPaths() self._col = [] def writeHeader(self, header): self._header = header if self._add_header: self._writeRow([str(header)] + [None]*(len(self._col) - 1)) if self._col_names and self._auto_xpw is None: self._writeRow([c[0] for c in self._col]) def writeFooter(self, footer): self._footer = footer if self._col_names and self._auto_xpw is not None and not self._auto_from_first: cn = [c[0] for c in self._col] if self._auto_cno > 0 and len(cn) < self._auto_cno: cn = cn + ['Spare']*(self._auto_cno - len(cn)) self._writeRow(cn) def getHeader(self): return self._header def getFooter(self): return self._footer def writeRecord(self, record): try: line = [] if self._auto_xpw: new_col = self._auto_xpw.checkXPath(record, ret_new = True) if new_col: first_rec = (len(self._col) == 0) for col in new_col: self._col.append(col) if first_rec and self._auto_from_first: self._writeRow([c[0] for c in self._col]) for col in self._col: val = col[3] if col[1] in ['*', record.tag]: if self._simple_xpath: res = record.find(col[2]) else: res = record.xpath(col[2]) if res is not None: if isinstance(res, list) and len(res) > 0: res = res[0] #if isinstance(res, etree._Element): #Undocumented if etree.iselement(res): val = res.text elif isinstance(res, str): val = res elif not isinstance(res, list): # exclude empty list val = str(res) if val and sys.version_info.major == 2: line.append(val.encode('utf8')) else: line.append(val) if self._auto_cno > 0 and len(line) < self._auto_cno: line = line + [None]*(self._auto_cno - len(line)) self._writeRow(line) return line except: Log.debug('record=%s' % etree.tostring(record, pretty_print = False)) Log.debug('col=%s' % str(col)) raise def _writeRow(self, line): if self._out_flags & STRING: for wri in self._writers: wri.writerow(line) if self._out_flags & (LIST | ITERABLE): self._out.pushObject(line)