r"""Import :term:`regular` :term:`clan`\s from and export them to CSV data."""
# Copyright Algebraix Data Corporation 2015 - 2017
#
# This file is part of algebraixlib <http://github.com/AlgebraixData/algebraixlib>.
#
# algebraixlib is free software: you can redistribute it and/or modify it under the terms of version
# 3 of the GNU Lesser General Public License as published by the Free Software Foundation.
#
# algebraixlib is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License along with algebraixlib.
# If not, see <http://www.gnu.org/licenses/>.
# --------------------------------------------------------------------------------------------------
import collections as _collections
import csv as _csv
import algebraixlib.algebras.clans as _clans
import algebraixlib.algebras.multiclans as _multiclans
import algebraixlib.algebras.relations as _relations
# noinspection PyProtectedMember
import algebraixlib.import_export._util as _util
import algebraixlib.mathobjects as _mo
import algebraixlib.undef as _ud
import algebraixlib.util.miscellaneous as _misc
from ..cache_status import CacheStatus
[docs]def export_csv(absolute_clan_or_multiclan, file_or_path, ordered_lefts=None, sort_key=None):
r"""Export an absolute clan or absolute multiclan as CSV file with header row.
The :term:`left component`\s of the :term:`clan` or term:`multiclan` are interpreted as
column names and are exported as header row. Every :term:`relation` in the input becomes a
data row in the CSV file.
:param absolute_clan_or_multiclan: An :term:`absolute clan` or term:`absolute multiclan`. If
it is not :term:`regular`, ``ordered_lefts`` must be given.
:param file_or_path: Either a file path (in this case the CSV data is written to a file at this
location) or a file object (in this case the CSV data is written to its ``.write()``
function).
:param ordered_lefts: (Optional) A ``Sequence`` of :term:`left`\s that are exported in the
given order. Default is the sequence that is the lexically sorted :term:`left set` of the
(multi)clan. This parameter is required if ``absolute_clan_or_multiclan`` is not
term:`regular`.
:param sort_key: (Optional) A function that compares two row-:term:`relation`\s and provides an
order (for use with :func:`sorted`). The output is not sorted if ``sort_key`` is missing.
:return: ``True`` if the CSV export succeeded, ``False`` if not.
"""
if not _clans.is_absolute_member(absolute_clan_or_multiclan) \
and not _multiclans.is_absolute_member(absolute_clan_or_multiclan):
return False
regular_clan = _clans.is_member(absolute_clan_or_multiclan) \
and _clans.is_regular(absolute_clan_or_multiclan)
regular_mclan = _multiclans.is_member(absolute_clan_or_multiclan) \
and _multiclans.is_regular(absolute_clan_or_multiclan)
if ordered_lefts is None and not (regular_clan or regular_mclan):
return False
if ordered_lefts is None:
# Since this clan is regular, get first relation to acquire left set.
rel = next(iter(absolute_clan_or_multiclan))
# left_set is sorted to guarantee consistent iterations
ordered_lefts = sorted([left.value for left in rel.get_left_set()])
# Generate dictionaries that associates left components with their right components for each
# relation.
clan_as_list_of_dicts = _convert_clan_to_list_of_dicts(
ordered_lefts, (absolute_clan_or_multiclan
if sort_key is None else sorted(absolute_clan_or_multiclan, key=sort_key)))
# Write the dictionaries.
_csv_dict_writer(file_or_path, ordered_lefts, clan_as_list_of_dicts)
return True
def _csv_dict_writer(file_or_path, ordered_columns: _collections.Sequence,
data: _collections.Sequence):
"""Write a CSV file using `csv.DictWriter`.
:param file_or_path: Either a file path (in this case the CSV data is written to a file at this
location) or a file object (in this case the CSV data is written to its ``.write()``
function).
:param ordered_columns: A `Sequence` of column names (atoms). The columns are
written in the given order.
:param data: A `Sequence` of rows, where each row is a dictionary, mapping a column name to its
value in the given row. Both the column name and the value are atoms.
"""
def write_data(out_file):
writer = _csv.DictWriter(
f=out_file, fieldnames=ordered_columns, dialect='excel')
writer.writeheader()
for row in data:
writer.writerow(row)
_misc.write_to_file_or_path(file_or_path, write_data)
def _convert_clan_to_list_of_dicts(ordered_lefts: 'P( A )', absolute_clan: 'PP(A x A)') -> list:
"""Convert a regular, absolute clan into a list of dictionaries.
:param ordered_lefts: The left components of ``absolute_clan`` that are converted.
:param absolute_clan: A regular, absolute clan that is converted into a list of dictionaries.
:return: A list of dictionaries. Every dictionary represents a single relation in
``absolute_clan``. The lefts of the relation become the keys, the rights become the values
of the dictionary.
"""
for rel in absolute_clan:
left_to_right_dict = {}
for left in ordered_lefts:
# Get the right component associated with left and add it to our row.
right = _relations.get_right(rel, left)
if right is not _ud.Undef():
left_to_right_dict[left] = right.value
# Add right component dictionary to result
yield left_to_right_dict
[docs]def import_csv(csv_file_or_filepath, types: {}=None, skip_rows: int=0, index_column: str=None,
has_dup_rows: bool=False, columns: []=None) -> 'PP( A x M )':
r"""Import the file ``csv_file_or_filepath`` as CSV data and return a clan or multiclan.
:param csv_file_or_filepath: The file path or file object (for example ``StringIO`` buffer) to
import.
:param types: (Optional) A dictionary of type conversions. The keys are the column names; the
values are functors (or types) that receive the string from the CSV cell and return the
value to be imported. Example: ``{'foo': int, 'bar': float}``. By default all values are
interpreted as `string`\s.
:param skip_rows: (Optional) A number of lines to skip (default 0). Some CSV files have a
preamble that can be skipped with this option.
:param index_column: (Optional) A name for an index column. (No index column is created if this
argument is not specified.) The index starts with 0. (This option is not compatible with the
``has_dup_rows`` option.)
:param has_dup_rows: (Optional) If ``True``, allow duplicate rows and return a multiclan
instead of a clan. By default, the value is ``False`` and a clan is returned. (This option
is not compatible with the option ``index_column``.)
:param columns: (Optional) A list of column names. If present, this list is used as the
sequence of columns (and all lines in the data are loaded). If missing, the first line of
the data must be a header that contains the column names (and this header line is not
loaded as data).
:return: A :term:`clan` (if ``has_dup_rows is ``False`` or not provided) or a :term:`multiclan`
(if ``has_dup_rows`` is ``True``).
"""
if types is None:
types = {}
def _filter_row(row):
"""Remove missing and blank elements from the CSV row."""
for key, val in row.items():
if val is None or val == '':
continue
yield key, val
_util.get_left_cached.left_cache = {}
import_csv.regular = True # Set to false if any row is missing one or more values
assert ((index_column is not None) & (has_dup_rows is False)) or (index_column is None)
def _import_csv(csv_file):
for _ in range(0, skip_rows):
next(csv_file)
reader = _csv.DictReader(csv_file, fieldnames=columns)
_index = 0
for row in reader:
filtered_row = {key: val for key, val in _filter_row(row)}
if import_csv.regular and len(row) != len(filtered_row):
import_csv.regular = False
for key, val in types.items():
if key in filtered_row:
filtered_row[key] = val(filtered_row[key])
if index_column is not None:
filtered_row[index_column] = _index
_index += 1
yield _mo.Set(
(_mo.Couplet(left=_util.get_left_cached(left), right=_mo.Atom(right),
direct_load=True) for left, right in filtered_row.items()), direct_load=True)\
.cache_relation(CacheStatus.IS).cache_functional(CacheStatus.IS)
if hasattr(csv_file_or_filepath, "readlines"): # Support StringIO.
if has_dup_rows:
return _mo.Multiset(_import_csv(csv_file_or_filepath),
direct_load=True).cache_multiclan(
CacheStatus.IS).cache_functional(CacheStatus.IS).cache_regular(
CacheStatus.from_bool(import_csv.regular))
else:
return _mo.Set(_import_csv(csv_file_or_filepath), direct_load=True)\
.cache_clan(CacheStatus.IS).cache_functional(CacheStatus.IS)\
.cache_regular(CacheStatus.from_bool(import_csv.regular))
else:
with open(csv_file_or_filepath, encoding='utf-8', errors='ignore') as file:
if has_dup_rows:
return _mo.Multiset(_import_csv(file), direct_load=True).cache_multiclan(
CacheStatus.IS).cache_functional(CacheStatus.IS).cache_regular(
CacheStatus.from_bool(import_csv.regular))
else:
return _mo.Set(_import_csv(file), direct_load=True)\
.cache_clan(CacheStatus.IS).cache_functional(CacheStatus.IS)\
.cache_regular(CacheStatus.from_bool(import_csv.regular))