Source code for cvbase.io

import json
import os
import sys
try:
    import cPickle as pickle
except:
    import pickle
import yaml
try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper
from multiprocessing import Process, Queue
from os import path


def json_load(file):
    if isinstance(file, str):
        with open(file, 'r') as f:
            obj = json.load(f)
    elif hasattr(file, 'read'):
        obj = json.load(file)
    else:
        raise TypeError('"file" must be a filename str or a file-object')
    return obj


def json_dump(obj, file=None, **kwargs):
    if file is None:
        return json.dumps(obj, **kwargs)
    elif isinstance(file, str):
        with open(file, 'w') as f:
            json.dump(obj, f, **kwargs)
    elif hasattr(file, 'write'):
        json.dump(obj, file, **kwargs)
    else:
        raise TypeError('"file" must be a filename str or a file-object')


def yaml_load(file, **kwargs):
    kwargs.setdefault('Loader', Loader)
    if isinstance(file, str):
        with open(file, 'r') as f:
            obj = yaml.load(f, **kwargs)
    elif hasattr(file, 'read'):
        obj = yaml.load(file, **kwargs)
    else:
        raise TypeError('"file" must be a filename str or a file-object')
    return obj


def yaml_dump(obj, file=None, **kwargs):
    kwargs.setdefault('Dumper', Dumper)
    if file is None:
        return yaml.dump(obj, **kwargs)
    elif isinstance(file, str):
        with open(file, 'w') as f:
            yaml.dump(obj, f, **kwargs)
    elif hasattr(file, 'write'):
        yaml.dump(obj, file, **kwargs)
    else:
        raise TypeError('"file" must be a filename str or a file-object')


def pickle_load(file, **kwargs):
    if isinstance(file, str):
        with open(file, 'rb') as f:
            obj = pickle.load(f, **kwargs)
    elif hasattr(file, 'read'):
        obj = pickle.load(file, **kwargs)
    else:
        raise TypeError('"file" must be a filename str or a file-object')
    return obj


def pickle_dump(obj, file=None, **kwargs):
    kwargs.setdefault('protocol', 2)
    if file is None:
        return pickle.dumps(obj, **kwargs)
    elif isinstance(file, str):
        with open(file, 'wb') as f:
            pickle.dump(obj, f, **kwargs)
    elif hasattr(file, 'write'):
        pickle.dump(obj, file, **kwargs)
    else:
        raise TypeError('"file" must be a filename str or a file-object')


[docs]def load(file, format=None, **kwargs):
    """Load contents from json/yaml/pickle files, and also supports
    custom arguments for each file format.

    This method provides a unified api for loading from serialized files.

    Args:
        file(str or file-like object): filename or the file-like object
        format(None or str): if it is None, file format is inferred from the
                             file extension, otherwise use the specified one.
                             Currently supported formats are "json", "yaml",
                             "yml", "pickle" and "pkl"

    Returns:
        The content from the file
    """
    processors = {
        'json': json_load,
        'yaml': yaml_load,
        'yml': yaml_load,
        'pickle': pickle_load,
        'pkl': pickle_load
    }
    if format is None and isinstance(file, str):
        format = file.split('.')[-1]
    if format not in processors:
        raise TypeError('Unsupported format: ' + format)
    return processors[format](file, **kwargs)


[docs]def dump(obj, file=None, format=None, **kwargs):
    """Dump contents to json/yaml/pickle strings or files.

    This method provides a unified api for dumping to files, and also supports
    custom arguments for each file format.

    Args:
        file(None or str or file-like object): if None, then dump to a str,
            otherwise to a file specified by the filename or file-like object
        obj(any): the python object to be dumped
        format(None or str): same as :func:`load`

    Returns:
        bool: True for success, False otherwise
    """
    processors = {
        'json': json_dump,
        'yaml': yaml_dump,
        'yml': yaml_dump,
        'pickle': pickle_dump,
        'pkl': pickle_dump
    }
    if format is None:
        if isinstance(file, str):
            format = file.split('.')[-1]
        elif file is None:
            raise ValueError('format must be specified')
    if format not in processors:
        raise TypeError('Unsupported format: ' + format)
    return processors[format](obj, file, **kwargs)


def list_from_file(filename, prefix='', offset=0, max_num=0):
    cnt = 0
    item_list = []
    with open(filename, 'r') as f:
        for _ in range(offset):
            f.readline()
        for line in f:
            if max_num > 0 and cnt >= max_num:
                break
            item_list.append(prefix + line.rstrip('\n'))
            cnt += 1
    return item_list


def dict_from_file(filename, key_type=str):
    mapping = {}
    with open(filename, 'r') as f:
        for line in f:
            items = line.rstrip('\n').split(' ')
            assert len(items) >= 2
            key = key_type(items[0])
            val = items[1:] if len(items) > 2 else items[1]
            mapping[key] = val
    return mapping


[docs]class AsyncDumper(Process):

    def __init__(self):
        self._io_queue = Queue()
        super(AsyncDumper, self).__init__()

[docs]    def run(self):
        while True:
            data, out_file = self._io_queue.get()
            if data is None:
                break
            pickle_dump(data, out_file)

    def dump(self, obj, filename):
        self._io_queue.put((obj, filename))


def check_file_exist(filename, msg_tmpl='file "{}" not exist:'):
    if not path.isfile(filename):
        if sys.version_info > (3, 3):
            raise FileNotFoundError(msg_tmpl.format(filename))
        else:
            raise IOError(msg_tmpl.format(filename))


def mkdir_or_exist(dir_name):
    if not path.isdir(dir_name):
        os.makedirs(dir_name)


def _scandir_py35(dir_path, suffix=None):
    for entry in os.scandir(dir_path):
        if not entry.is_file():
            continue
        filename = entry.name
        if suffix is None:
            yield filename
        elif filename.endswith(suffix):
            yield filename


def _scandir_py(dir_path, suffix=None):
    for filename in os.listdir(dir_path):
        if not path.isfile(path.join(dir_path, filename)):
            continue
        if suffix is None:
            yield filename
        elif filename.endswith(suffix):
            yield filename


def scandir(dir_path, suffix=None):
    if suffix is not None and not isinstance(suffix, (str, tuple)):
        raise TypeError('"suffix" must be a string or tuple of strings')
    if sys.version[0] == 3 and sys.version[1] >= 5:
        return _scandir_py35(dir_path, suffix)
    else:
        return _scandir_py(dir_path, suffix)