Source code for rigidity.rules

import ctypes
import rigidity.errors


[docs]class Rule(): ''' Base rule class implementing a simple apply() method that returns the given data unchanged. '''
[docs] def apply(self, value): ''' This is the default method for applying a rule to data. By default, the `read()` and `write()` methods will use this method to validate and modify data. :param value: the data to be validated. :returns: the validated and possibly modified value as documented by the rule. :raises rigidity.errors.DropRow: when the rule wants to cancel processing of an entire row, it may do so with the DropRow error. This signifies to the :class:`rigidity.Rigidity` class that it should discontinue processing the row. ''' return value
[docs] def read(self, value): ''' When reading data, it is validated with this method. By default, this method calls the `apply()` method of this class. However, you may override this method to achieve different behavior when reading and writing. :param value: the data to be validated. :returns: the validated and possibly modified value as documented by the rule. :raises rigidity.errors.DropRow: when the rule wants to cancel processing of an entire row, it may do so with the DropRow error. This signifies to the :class:`rigidity.Rigidity` class that it should discontinue processing the row. ''' return self.apply(value)
[docs] def write(self, value): ''' When writing data, it is validated with this method. By default, this method calls the `apply()` method of this class. However, you may override this method to achieve different behavior when reading and writing. :param value: the data to be validated. :returns: the validated and possibly modified value as documented by the rule. :raises rigidity.errors.DropRow: when the rule wants to cancel processing of an entire row, it may do so with the DropRow error. This signifies to the :class:`rigidity.Rigidity` class that it should discontinue processing the row. ''' return self.apply(value)
[docs]class CapitalizeWords(Rule): ''' Capitalize words in a string. By default, words are detected by searching for space, tab, new line, and carriage return characters. You may override this setting. Also, by default, the first character is capitalized automatically. ''' SEPERATORS = ' \t\n\r'
[docs] def __init__(self, seperators=SEPERATORS, cap_first=True): ''' :param str seperators: capitalize any character following a character in this string. :param bool cap_first: automatically capitalize the first character in the string. ''' self.seperators = seperators self.cap_first = cap_first
def apply(self, value): # Create a unicode buffer. These things are mutable! buffer = ctypes.create_unicode_buffer(value) # If capitalization of the first character is desired, capitalize. if self.cap_first: buffer[0] = buffer[0].upper() # Search for all separators in the string for i in range(0, len(buffer) - 1): if buffer[i] in self.seperators: buffer[i + 1] = buffer[i + 1].upper() # Return the modified buffer return buffer.value
[docs]class Cary(Rule): ''' Cary values into subsequent rows lacking values in their column. ''' #: When an empty cell is encountered and no previous fill value is #: available, throw an error. ACTION_ERROR = 1 #: Until a value is encountered, use a default value to fill empty #: cells. ACTION_DEFAULT = 2 #: When an empty cell is encountered and no other value is available #: to fill the cell, drop the row. ACTION_DROPROW = 3
[docs] def __init__(self, action=ACTION_ERROR, default=None): ''' :param action: take the behavior indicated by ACTION_ERROR, ACTION_DEFAULT, or ACTION_DROPROW. ''' self.action = action self.previous_available = False self.previous = default if action == self.ACTION_DEFAULT: self.previous_available = True
def apply(self, value): if value is None or value == '': if self.previous_available: return self.previous elif self.action == self.ACTION_ERROR: raise ValueError('Empty cell encountered before a value.') elif self.action == self.ACTION_DROPROW: raise rigidity.errors.DropRow() else: self.previous = value self.previous_available = True return value
[docs]class Boolean(Rule): ''' Cast a string as a boolean value. ''' #: When invalid data is encountered, raise an exception. ACTION_ERROR = 1 #: When invalid data is encountered, return a set defaut value. ACTION_DEFAULT = 2 #: When invalid data is encountered, drop the row. ACTION_DROPROW = 3
[docs] def __init__(self, allow_null=False, action=ACTION_ERROR, default=None): ''' :param action: take the behavior indicated by ACTION_ERROR, ACTION_DEFAULT, or ACTION_DROPROW. ''' self.allow_null = allow_null self.default = default self.action = action
def apply(self, value): lvalue = str(value).lower() if lvalue in ('true', 'yes', 't', '1'): return True elif lvalue in ('false', 'no', 'f', '0'): return False elif self.allow_null and lvalue in ('null', 'none', ''): return None else: if self.action == self.ACTION_ERROR: raise ValueError('Value was not a boolean value') elif self.action == self.ACTION_DEFAULT: return self.default elif self.action == self.ACTION_DROPROW: raise rigidity.errors.DropRow() else: raise ValueError('Value was not a boolean value')
[docs]class Bytes(Rule): ''' When reading data, encode it as a bytes object using the given encoding. When writing data, decode it using the given encoding. ''' def __init__(self, encoding='utf8'): self.encoding = encoding def read(self, value): return value.encode(self.encoding) def write(self, value): return value.decode(self.encoding)
[docs]class Contains(Rule): ''' Check that a string field value contains the string (or all strings in a list of strings) passed as a parameter to this rule. ''' def __init__(self, string): if isinstance(string, str): self.strings = [string] elif isinstance(string, (list, tuple)): self.strings = string else: raise ValueError('string must be a string or a lsit') def apply(self, value): for string in self.strings: if string not in value: raise ValueError('String "%s" not in value' % string) return value
[docs]class Integer(Rule): ''' Cast all data to ints or die trying. ''' #: When invalid data is encountered, raise an exception. ACTION_ERROR = 1 #: When invalid data is encountered, return zero. ACTION_ZERO = 2 #: When invalid data is encountered, drop the row. ACTION_DROPROW = 3
[docs] def __init__(self, action=ACTION_ERROR): ''' :param action: take the behavior indicated by ACTION_ERROR, ACTION_ZERO, or ACTION_DROPROW. ''' self.action = action
def apply(self, value): try: return int(value) except ValueError as err: if self.action == self.ACTION_ERROR: raise err elif self.action == self.ACTION_ZERO: return 0 elif self.action == self.ACTION_DROPROW: raise rigidity.errors.DropRow() else: raise err
[docs]class Float(Rule): ''' Cast all data to floats or die trying. ''' #: When invalid data is encountered, raise an exception. ACTION_ERROR = 1 #: When invalid data is encountered, return zero. ACTION_ZERO = 2 #: When invalid data is encountered, drop the row. ACTION_DROPROW = 3
[docs] def __init__(self, action=ACTION_ERROR): ''' :param action: take the behavior indicated by ACTION_ERROR, ACTION_ZERO, or ACTION_DROPROW. ''' self.action = action
def apply(self, value): try: return float(value) except ValueError as err: if self.action == self.ACTION_ERROR: raise err elif self.action == self.ACTION_ZERO: return 0.0 elif self.action == self.ACTION_DROPROW: raise rigidity.errors.DropRow() else: raise err
[docs]class NoneToEmptyString(Rule): ''' Replace None values with an empty string. This is useful in cases where legacy software uses None to create an empty cell, but your other checks require a string. ''' def apply(self, value): if value is None: return '' return value
[docs]class RemoveLinebreaks(Rule): ''' Remove linebreaks from the start and end of field values. These can sometimes be introduced into files and create problems for humans because they are invisible.to human users. ''' def apply(self, value): return value.strip('\r\n')
[docs]class ReplaceValue(Rule): ''' Check if the value has a specified replacement. If it does, replace it with that value. If it does not, take one of the following configurable actions: pass it through unmodified, drop the row, or use a default value. ''' #: When no replacement is found, drop the row. ACTION_DROPROW = 1 #: When no replacement is found, return a set default value. ACTION_DEFAULT_VALUE = 2 #: When no replacement is found, allow the original to pass through. ACTION_PASSTHROUGH = 3 #: When no replacement is found, raise an exception. ACTION_ERROR = 4 #: When no replacement is found, return an empty string. ACTION_BLANK = 5 #: .. warning:: ACTION_DROP is deprecated due to the name being similar #: to ACTION_DROPROW. Use ACTION_BLANK instead. ACTION_DROP = ACTION_BLANK # Legacy support for v1.2.0; depreciated
[docs] def __init__(self, replacements={}, missing_action=ACTION_ERROR, default_value=''): ''' :param dict replacements: a mapping between original values and replacement values. :param missing_action: when a replacement is not found for a value, take the behavior specified by the specified value, such as ACTION_DROP, ACTION_DEFAULT_VALUE, ACTION_PASSTHROUGH, or ACTION_ERROR. :param default_value: if ACTION_DEFAULT_VALUE is the missing replacement behavior, use this variable as the default replacement value. ''' self.replacements = replacements self.missing_action = missing_action self.default_value = default_value if missing_action == self.ACTION_BLANK: self.missing_action = self.ACTION_DEFAULT_VALUE self.default_value = ''
def apply(self, value): if value in self.replacements: return self.replacements[value] elif self.missing_action == self.ACTION_DROPROW: raise rigidity.errors.DropRow() elif self.missing_action == self.ACTION_PASSTHROUGH: return value elif self.missing_action == self.ACTION_DEFAULT_VALUE: return self.default_value elif self.missing_action == self.ACTION_ERROR: raise IndexError('No replacement for value') else: raise IndexError('No replacement for value; invalid default action')
[docs]class Static(Rule): ''' Replace a field's value with a static value declared during initialization. ''' def __init__(self, value): self.static_value = value def apply(self, value): return self.static_value
[docs]class Unique(Rule): ''' Only allow unique values to pass. When a repeated value is found, the row may be dropped or an error may be raised. ''' #: When repeat data is encountered, raise an exception. ACTION_ERROR = 1 #: When repeat data is encountered, drop the row. ACTION_DROPROW = 2
[docs] def __init__(self, action=ACTION_ERROR): ''' :param action: Accepts either ACTION_ERROR or ACTION_DROPROW as the behavior to be performed when a value is not unique. ''' self.action = action self.encountered = []
[docs] def apply(self, value): ''' Check that a value is unique. :raises ValueError: when ACTION_ERROR is set and the value is not unique. ''' if value in self.encountered: if self.action == self.ACTION_ERROR: raise ValueError('Value not unique') elif self.action == self.ACTION_DROPROW: raise rigidity.errors.DropRow() else: raise ValueError('Invalid action set') self.encountered.append(value) return value
[docs]class Drop(Rule): ''' Drop the data in this column, replacing all data with an empty string value. ''' def apply(self, value): return ''
[docs]class Strip(Rule): ''' Strip excess white space from the beginning and end of a value. ''' def __init__(self, chars=None): if chars: self.strip_args = [chars] else: self.strip_args = [] def apply(self, value): return value.strip(*self.strip_args)
[docs]class UpcA(Rule): ''' Validate UPC-A barscode numbers to ensure that they are 12 digits. Strict validation of the check digit may also be enabled. '''
[docs] def __init__(self, strict=False): ''' :param bool strict: If `true`, raise a ValueError if the given UPC code fails the check digit validation. ''' self.strict = strict
[docs] def apply(self, value): ''' Cast the value to a string, then check that it is numeric. Afterwards, zero-pad the left side to reach the standard length of 12 digits. :raises ValueError: when strict mode is enabled and the given UPC code fails the check digit validation. ''' value = str(value) if not value.isdigit(): raise ValueError('UPC-A code is not numeric.') # Some barcodes become truncated by spreadsheet software that # treats the column numericly rather than as a string. value = '0' * (12 - len(value)) + value if len(value) > 12: raise ValueError('UPC-A is longer than 12 digits') # Verify the UPC check digit if self.strict: odd = sum([int(x) for x in value[0:11:2]]) * 3 even = sum([int(x) for x in value[1:11:2]]) check = (-1 * (odd + even) % 10) if int(value[-1]) != check: raise ValueError('UPC-A check digit is incorrect') return value
[docs]class Lower(Rule): ''' Convert a string value to lower-case. ''' def apply(self, value): return value.lower()
[docs]class Upper(Rule): ''' Convert a string value to upper-case. ''' def apply(self, value): return value.upper()