Source code for parsing.library.utils

# Copyright (C) 2017 Semester.ly Technologies, LLC
#
# Semester.ly is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Semester.ly is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

from __future__ import absolute_import, division, print_function

import collections
import dateparser
import os
import re
import simplejson as json

from datetime import datetime

from parsing.library.words import conjunctions_and_prepositions

UNICODE_WHITESPACE = re.compile(r'(?:\u00a0)|(?:\xc2)|(?:\xa0)', re.IGNORECASE)


[docs]def clean(dirt):
    """Recursively clean json-like object.

    `list`::
        - remove `None` elements
        - `None` on empty list

    :obj:`dict`::
        - filter out None valued key, value pairs
        - `None` on empty dict

    `basestring`::
        - convert unicode whitespace to ascii
        - strip extra whitespace
        - None on empty string

    Args:
        dirt: the object to clean

    Returns:
        Cleaned `dict`, cleaned `list`, cleaned `string`, or pass-through.
    """
    cleaned = None

    if isinstance(dirt, dict):
        cleaned = {}
        for k, v in dirt.items():
            cleaned_value = clean(v)
            if cleaned_value is None:
                continue
            cleaned[k] = cleaned_value
    elif isinstance(dirt, list):
        cleaned = filter(
            lambda x: x is not None,
            map(clean, dirt)
        )
    elif isinstance(dirt, basestring):
        cleaned = UNICODE_WHITESPACE.sub(' ', dirt).strip()
    else:
        return dirt

    if len(cleaned) == 0:
        return None
    return cleaned


[docs]def make_list(x=None):
    """Wrap in list if not list already.

    If input is None, will return empty list.

    Args:
        x: Input.

    Returns:
        list: Input wrapped in list.
    """
    if x is None:
        x = []
    if not isinstance(x, list):
        x = [x]
    return x


[docs]class DotDict(dict):
    """Dot notation access for dictionary.

    Supports set, get, and delete.

    Examples:
        >>> d = DotDict({'a': 1, 'b': 2, 'c': {'ca': 31}})
        >>> d.a, d.b
        (1, 2)
        >>> d['a']
        1
        >>> d['a'] = 3
        >>> d.a, d['b']
        (3, 2)
        >>> d.c.ca, d.c['ca']
        (31, 31)
    """

    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

    def __init__(self, dct):
        """Create instance.

        Args:
            dct (dict): Dictionary to create DotDict with.
        """
        for key, value in dct.items():
            if hasattr(value, 'keys'):
                value = DotDict(value)
            self[key] = value

[docs]    def as_dict(self):
        """Return pure dictionary representation of self."""
        def rec(d):
            if isinstance(d, DotDict):
                return d.as_dict()
            return d
        return {
            key: rec(value) for key, value in self.items()
        }


[docs]def pretty_json(obj):
    """Prettify object as JSON.

    Args:
        obj (dict): Serializable object to JSONify.

    Returns:
        str: Prettified JSON.
    """
    return '{}'.format(json.dumps(obj,
                                  sort_keys=True,
                                  indent=2,
                                  separators=(',', ': ')))


[docs]def safe_cast(val, to_type, default=None):
    """Attempt to cast to specified type or return default.

    Args:
        val: Value to cast.
        to_type: Type to cast to.
        default (None, optional): Description

    Returns:
        to_type: Description
    """
    try:
        return to_type(val)
    except (ValueError, TypeError):
        return default


[docs]def update(d, u):
    """Recursive update to dictionary w/o overwriting upper levels.

    Examples:
        >>> update({0: {1: 2, 3: 4}}, {1: 2, 0: {5: 6, 3: 7}})
        {0: {1: 2}}
    """
    for k, v in u.iteritems():
        if isinstance(v, collections.Mapping):
            r = update(d.get(k, {}), v)
            d[k] = r
        else:
            d[k] = u[k]
    return d


[docs]def iterrify(x):
    """Create iterable object if not already.

    Will wrap `str` types in extra iterable eventhough `str` is iterable.

    Examples:
        >>> for i in iterrify(1):
        ...     print(i)
        1
        >>> for i in iterrify([1]):
        ...     print(i)
        1
        >>> for i in iterrify('hello'):
        ...     print(i)
        'hello'
    """
    if isinstance(x, collections.Iterable) and not isinstance(x, basestring):
        return x
    else:
        return (x,)


[docs]def dir_to_dict(path):
    """Recursively create nested dictionary representing directory contents.

    Args:
        path (str): The path of the directory.

    Returns:
        dict: Dictionary representation of the directory.
    """
    d = {'name': os.path.basename(path)}
    if os.path.isdir(path):
        d['kind'] = "directory"
        d['children'] = [
            dir_to_dict(os.path.join(path, x)) for x in os.listdir(path)
        ]
    else:
        d['kind'] = "file"
    return d


[docs]def titlize(name):
    """Format name into pretty title.

    Will uppercase roman numerals.
    Will lowercase conjuctions and prepositions.

    Examples:
        >>> titlize('BIOLOGY OF CANINES II')
        Biology of Canines II
    """
    if name is None:
        return None

    titled = []
    for idx, word in enumerate(name.split()):
        if re.match(r'^[ivx]+$', word.lower()) is not None:
            word = word.upper()
        elif idx == 0:
            word = word.title()
        elif word.lower() in conjunctions_and_prepositions:
            word = word.lower()
        else:
            word = word.title()
        titled.append(word)
    return ' '.join(titled)


[docs]def dict_filter_by_dict(a, b):
    """Filter dictionary a by b.

    dict or set
    Items or keys must be string or regex.
    Filters at arbitrary depth with regex matching.

    Args:
        a (dict): Dictionary to filter.
        b (dict): Dictionary to filter by.

    Returns:
        dict: Filtered dictionary
    """
    if b is None:
        return a

    filtered = {}
    for x, ys in a.items():
        for p, qs in b.items():
            m = re.match(str(p), str(x))
            if m is None:
                continue
            if isinstance(ys, list):
                filtered.setdefault(x, [])
            elif isinstance(ys, dict):
                filtered.setdefault(x, {})
            for y in ys:
                for q in qs:
                    n = re.match(str(q), str(y))
                    if n is None:
                        continue
                    if isinstance(ys, list):
                        filtered[x].append(y)
                    elif isinstance(ys, dict):
                        filtered[x][y] = a[x][y]
    return filtered


[docs]def dict_filter_by_list(a, b):
    if b is None:
        return a
    filtered = None
    if isinstance(a, list):
        filtered = []
    elif isinstance(a, set):
        filtered = set()
    elif isinstance(a, dict):
        filtered = {}
    for x in a:
        for y in b:
            m = re.match(str(y), str(x))
            if m is None:
                continue
            if isinstance(a, list):
                filtered.append(x)
            elif isinstance(a, set):
                filtered.add(x)
            elif isinstance(a, dict):
                filtered[x] = a[x]
    return filtered


[docs]def time24(time):
    """Convert time to 24hr format.

    Args:
        time (str): time in reasonable format

    Returns:
        str: 24hr time in format hh:mm

    Raises:
        ParseError: Unparseable time input.
    """
    from parsing.library.validator import ValidationError

    if isinstance(time, basestring):
        time = dateparser.parse(time)
    if not isinstance(time, datetime):
        raise ValidationError('invalid time input {}'.format(time))
    return time.strftime('%H:%M')


[docs]class SimpleNamespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
    def __repr__(self):
        keys = sorted(self.__dict__)
        items = ("{}={!r}".format(k, self.__dict__[k]) for k in keys)
        return "{}({})".format(type(self).__name__, ", ".join(items))
    def __eq__(self, other):
        return self.__dict__ == other.__dict__