Source code for rigidity

'''
Rigidity is a simple wrapper to the built-in csv module that allows for
validation and correction of data being read/written from/to CSV files.

This module allows you to easily construct validation and correction
rulesets to be applied automatically while preserving the csv interface.
This allows you to easily upgrade old software to use new, strict rules.
'''

import rigidity.errors
import rigidity.rules as rules


[docs]class Rigidity(): ''' A wrapper for CSV readers and writers that allows ''' csvobj = None # Declare here to prevent getattr/setattr recursion #: Do not display output at all. DISPLAY_NONE = 0 #: Display simple warnings when ValueError is raised by a rule. DISPLAY_SIMPLE = 1 def __init__(self, csvobj, rules=[], display=DISPLAY_NONE): ''' :param csvfile: a Reader or Writer object from the csv module; any calls to this object's methods will be wrapped to perform the specified rigidity checks. :param rules=[]: a two dimensional list containing rules to be applied to columns moving in/out of `csvobj`. The row indices in this list match the column in the CSV file the list of rules will be applied to. :param int display: When an error is thrown, display the row and information about which column caused the error. ''' self.csvobj = csvobj self.rules = rules self.display = display if isinstance(rules, dict): self.keys = rules.keys() else: self.keys = range(0, len(rules)) # Wrapper methods for the `csv` interface
[docs] def writeheader(self): ''' Plain pass-through to the given CSV object. It is assumed that header information is already valid when the CSV object is constructed. ''' self.csvobj.writeheader()
[docs] def writerow(self, row): ''' Validate and correct the data provided in `row` and raise an exception if the validation or correction fails. Then, write the row to the CSV file. ''' try: self.csvobj.writerow(self.validate_write(row)) except rigidity.errors.DropRow: return
[docs] def writerows(self, rows): ''' Validate and correct the data provided in every row and raise an exception if the validation or correction fails. .. note:: Behavior in the case that the data is invalid and cannot be repaired is undefined. For example, the implementation may choose to write all valid rows up until the error, or it may choose to only conduct the write operation after all rows have been verified. Do not depend on the presence or absence of any of the rows in `rows` in the event that an exception occurs. ''' for row in rows: self.writerow(row)
# New methods, not part of the `csv` interface
[docs] def validate(self, row): ''' .. warning:: This method is deprecated and will be removed in a future release; it is included only to support old code. It will not produce consistent results with bi-directional rules. You should use :meth:`validate_read` or :meth:`validate_write` instead. Validate that the row conforms with the specified rules, correcting invalid rows where the rule is able to do so. If the row is valid or can be made valid through corrections, this method will return a row that can be written to the CSV file. If the row is invalid and cannot be corrected, then this method will raise an exception. :param row: a row object that can be passed to a CSVWriter's writerow() method. ''' # Ensure mutability - I'm looking at you, tuples! if not isinstance(row, (list, dict)): row = list(row) # Iterate through all keys, updating the data for key in self.keys: value = row[key] for rule in self.rules[key]: if hasattr(rule, 'apply'): value = rule.apply(value) else: return rule.read(value) row[key] = value # Return the updated data return row
[docs] def validate_write(self, row): ''' Validate that the row conforms with the specified rules, correcting invalid rows where the rule is able to do so. If the row is valid or can be made valid through corrections, this method will return a row that can be written to the CSV file. If the row is invalid and cannot be corrected, then this method will raise an exception. :param row: a row object that can be passed to a CSVWriter's __next__() method. ''' # Ensure mutability - I'm looking at you, tuples! if not isinstance(row, (list, dict)): row = list(row) # Iterate through all keys, updating the data for key in self.keys: value = row[key] for rule in self.rules[key]: try: value = rule.write(value) except ValueError as err: if self.display == self.DISPLAY_SIMPLE: print('Invalid data encountered in column %s:' % key) print(' -', row) print(' - Error raised by rule:', rule) print('') raise err row[key] = value # Return the updated data return row
[docs] def validate_read(self, row): ''' Validate that the row conforms with the specified rules, correcting invalid rows where the rule is able to do so. If the row is valid or can be made valid through corrections, this method will return a row that can be written to the CSV file. If the row is invalid and cannot be corrected, then this method will raise an exception. :param row: a row object that can be returned from CSVReader's readrow() method. ''' # Ensure mutability - I'm looking at you, tuples! if not isinstance(row, (list, dict)): row = list(row) # Iterate through all keys, updating the data for key in self.keys: value = row[key] for rule in self.rules[key]: try: value = rule.read(value) except ValueError as err: if self.display == self.DISPLAY_SIMPLE: print('Invalid data encountered in column %s:' % key) print(' -', row) print(' - Error raised by rule:', rule) print('') raise err except IndexError as err: if self.display == self.DISPLAY_SIMPLE: print('IndexError raised in column %s:' % key) print(' -', row) print(' - Error raised by rule:', rule) print('') raise err row[key] = value # Return the updated data return row
[docs] def skip(self): ''' Return a row, skipping validation. This is useful when you want to skip validation of header information. ''' return next(self.csvobj)
def __iter__(self): for row in iter(self.csvobj): try: yield self.validate_read(row) except rigidity.errors.DropRow: continue def __next__(self): ''' Call the __next__() method on the given CSV object, validate and repair the row it returns, raise an exception if the row cannot be repaired, and then return the row. ''' try: return self.validate_read(next(self.csvobj)) except rigidity.errors.DropRow: return next(self) def __getattr__(self, name): if hasattr(self.csvobj, name): return getattr(self.csvobj, name) else: return super().__getattr__(self, name) def __setattr__(self, name, value): if hasattr(self.csvobj, name): return setattr(self.csvobj, name, value) super().__setattr__(name, value) def __delattr__(self, name): if hasattr(self.csvobj, name): return delattr(self.csvobj, name) return super().__delattr__(name)