From 0ad8d51f0eb0f7afd60c8a75721a0bb0b957b55e Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Wed, 27 Mar 2019 15:14:47 -0400 Subject: [PATCH] remove base32 crockford in favor of dependency --- archivebox/base32_crockford.py | 172 --------------------------------- 1 file changed, 172 deletions(-) delete mode 100644 archivebox/base32_crockford.py diff --git a/archivebox/base32_crockford.py b/archivebox/base32_crockford.py deleted file mode 100644 index bafb69b4..00000000 --- a/archivebox/base32_crockford.py +++ /dev/null @@ -1,172 +0,0 @@ -""" -base32-crockford -================ - -A Python module implementing the alternate base32 encoding as described -by Douglas Crockford at: http://www.crockford.com/wrmg/base32.html. - -He designed the encoding to: - - * Be human and machine readable - * Be compact - * Be error resistant - * Be pronounceable - -It uses a symbol set of 10 digits and 22 letters, excluding I, L O and -U. Decoding is not case sensitive, and 'i' and 'l' are converted to '1' -and 'o' is converted to '0'. Encoding uses only upper-case characters. - -Hyphens may be present in symbol strings to improve readability, and -are removed when decoding. - -A check symbol can be appended to a symbol string to detect errors -within the string. - -""" - -import re -import sys - -PY3 = sys.version_info[0] == 3 - -if not PY3: - import string as str - - -__all__ = ["encode", "decode", "normalize"] - - -if PY3: - string_types = str, -else: - string_types = basestring, - -# The encoded symbol space does not include I, L, O or U -symbols = '0123456789ABCDEFGHJKMNPQRSTVWXYZ' -# These five symbols are exclusively for checksum values -check_symbols = '*~$=U' - -encode_symbols = dict((i, ch) for (i, ch) in enumerate(symbols + check_symbols)) -decode_symbols = dict((ch, i) for (i, ch) in enumerate(symbols + check_symbols)) -normalize_symbols = str.maketrans('IiLlOo', '111100') -valid_symbols = re.compile('^[%s]+[%s]?$' % (symbols, - re.escape(check_symbols))) - -base = len(symbols) -check_base = len(symbols + check_symbols) - - -def encode(number, checksum=False, split=0): - """Encode an integer into a symbol string. - - A ValueError is raised on invalid input. - - If checksum is set to True, a check symbol will be - calculated and appended to the string. - - If split is specified, the string will be divided into - clusters of that size separated by hyphens. - - The encoded string is returned. - """ - number = int(number) - if number < 0: - raise ValueError("number '%d' is not a positive integer" % number) - - split = int(split) - if split < 0: - raise ValueError("split '%d' is not a positive integer" % split) - - check_symbol = '' - if checksum: - check_symbol = encode_symbols[number % check_base] - - if number == 0: - return '0' + check_symbol - - symbol_string = '' - while number > 0: - remainder = number % base - number //= base - symbol_string = encode_symbols[remainder] + symbol_string - symbol_string = symbol_string + check_symbol - - if split: - chunks = [] - for pos in range(0, len(symbol_string), split): - chunks.append(symbol_string[pos:pos + split]) - symbol_string = '-'.join(chunks) - - return symbol_string - - -def decode(symbol_string, checksum=False, strict=False): - """Decode an encoded symbol string. - - If checksum is set to True, the string is assumed to have a - trailing check symbol which will be validated. If the - checksum validation fails, a ValueError is raised. - - If strict is set to True, a ValueError is raised if the - normalization step requires changes to the string. - - The decoded string is returned. - """ - symbol_string = normalize(symbol_string, strict=strict) - if checksum: - symbol_string, check_symbol = symbol_string[:-1], symbol_string[-1] - - number = 0 - for symbol in symbol_string: - number = number * base + decode_symbols[symbol] - - if checksum: - check_value = decode_symbols[check_symbol] - modulo = number % check_base - if check_value != modulo: - raise ValueError("invalid check symbol '%s' for string '%s'" % - (check_symbol, symbol_string)) - - return number - - -def normalize(symbol_string, strict=False): - """Normalize an encoded symbol string. - - Normalization provides error correction and prepares the - string for decoding. These transformations are applied: - - 1. Hyphens are removed - 2. 'I', 'i', 'L' or 'l' are converted to '1' - 3. 'O' or 'o' are converted to '0' - 4. All characters are converted to uppercase - - A TypeError is raised if an invalid string type is provided. - - A ValueError is raised if the normalized string contains - invalid characters. - - If the strict parameter is set to True, a ValueError is raised - if any of the above transformations are applied. - - The normalized string is returned. - """ - if isinstance(symbol_string, string_types): - if not PY3: - try: - symbol_string = symbol_string.encode('ascii') - except UnicodeEncodeError: - raise ValueError("string should only contain ASCII characters") - else: - raise TypeError("string is of invalid type %s" % - symbol_string.__class__.__name__) - - norm_string = symbol_string.replace('-', '').translate(normalize_symbols).upper() - - if not valid_symbols.match(norm_string): - raise ValueError("string '%s' contains invalid characters" % norm_string) - - if strict and norm_string != symbol_string: - raise ValueError("string '%s' requires normalization" % symbol_string) - - return norm_string