import json
import os
import sys
try:
import cPickle as pickle
except:
import pickle
import yaml
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
from multiprocessing import Process, Queue
from os import path
def json_load(file):
if isinstance(file, str):
with open(file, 'r') as f:
obj = json.load(f)
elif hasattr(file, 'read'):
obj = json.load(file)
else:
raise TypeError('"file" must be a filename str or a file-object')
return obj
def json_dump(obj, file=None, **kwargs):
if file is None:
return json.dumps(obj, **kwargs)
elif isinstance(file, str):
with open(file, 'w') as f:
json.dump(obj, f, **kwargs)
elif hasattr(file, 'write'):
json.dump(obj, file, **kwargs)
else:
raise TypeError('"file" must be a filename str or a file-object')
def yaml_load(file, **kwargs):
kwargs.setdefault('Loader', Loader)
if isinstance(file, str):
with open(file, 'r') as f:
obj = yaml.load(f, **kwargs)
elif hasattr(file, 'read'):
obj = yaml.load(file, **kwargs)
else:
raise TypeError('"file" must be a filename str or a file-object')
return obj
def yaml_dump(obj, file=None, **kwargs):
kwargs.setdefault('Dumper', Dumper)
if file is None:
return yaml.dump(obj, **kwargs)
elif isinstance(file, str):
with open(file, 'w') as f:
yaml.dump(obj, f, **kwargs)
elif hasattr(file, 'write'):
yaml.dump(obj, file, **kwargs)
else:
raise TypeError('"file" must be a filename str or a file-object')
def pickle_load(file, **kwargs):
if isinstance(file, str):
with open(file, 'rb') as f:
obj = pickle.load(f, **kwargs)
elif hasattr(file, 'read'):
obj = pickle.load(file, **kwargs)
else:
raise TypeError('"file" must be a filename str or a file-object')
return obj
def pickle_dump(obj, file=None, **kwargs):
kwargs.setdefault('protocol', 2)
if file is None:
return pickle.dumps(obj, **kwargs)
elif isinstance(file, str):
with open(file, 'wb') as f:
pickle.dump(obj, f, **kwargs)
elif hasattr(file, 'write'):
pickle.dump(obj, file, **kwargs)
else:
raise TypeError('"file" must be a filename str or a file-object')
[docs]def load(file, format=None, **kwargs):
"""Load contents from json/yaml/pickle files, and also supports
custom arguments for each file format.
This method provides a unified api for loading from serialized files.
Args:
file(str or file-like object): filename or the file-like object
format(None or str): if it is None, file format is inferred from the
file extension, otherwise use the specified one.
Currently supported formats are "json", "yaml",
"yml", "pickle" and "pkl"
Returns:
The content from the file
"""
processors = {
'json': json_load,
'yaml': yaml_load,
'yml': yaml_load,
'pickle': pickle_load,
'pkl': pickle_load
}
if format is None and isinstance(file, str):
format = file.split('.')[-1]
if format not in processors:
raise TypeError('Unsupported format: ' + format)
return processors[format](file, **kwargs)
[docs]def dump(obj, file=None, format=None, **kwargs):
"""Dump contents to json/yaml/pickle strings or files.
This method provides a unified api for dumping to files, and also supports
custom arguments for each file format.
Args:
file(None or str or file-like object): if None, then dump to a str,
otherwise to a file specified by the filename or file-like object
obj(any): the python object to be dumped
format(None or str): same as :func:`load`
Returns:
bool: True for success, False otherwise
"""
processors = {
'json': json_dump,
'yaml': yaml_dump,
'yml': yaml_dump,
'pickle': pickle_dump,
'pkl': pickle_dump
}
if format is None:
if isinstance(file, str):
format = file.split('.')[-1]
elif file is None:
raise ValueError('format must be specified')
if format not in processors:
raise TypeError('Unsupported format: ' + format)
return processors[format](obj, file, **kwargs)
def list_from_file(filename, prefix='', offset=0, max_num=0):
cnt = 0
item_list = []
with open(filename, 'r') as f:
for _ in range(offset):
f.readline()
for line in f:
if max_num > 0 and cnt >= max_num:
break
item_list.append(prefix + line.rstrip('\n'))
cnt += 1
return item_list
def dict_from_file(filename, key_type=str):
mapping = {}
with open(filename, 'r') as f:
for line in f:
items = line.rstrip('\n').split(' ')
assert len(items) >= 2
key = key_type(items[0])
val = items[1:] if len(items) > 2 else items[1]
mapping[key] = val
return mapping
[docs]class AsyncDumper(Process):
def __init__(self):
self._io_queue = Queue()
super(AsyncDumper, self).__init__()
[docs] def run(self):
while True:
data, out_file = self._io_queue.get()
if data is None:
break
pickle_dump(data, out_file)
def dump(self, obj, filename):
self._io_queue.put((obj, filename))
def check_file_exist(filename, msg_tmpl='file "{}" not exist:'):
if not path.isfile(filename):
if sys.version_info > (3, 3):
raise FileNotFoundError(msg_tmpl.format(filename))
else:
raise IOError(msg_tmpl.format(filename))
def mkdir_or_exist(dir_name):
if not path.isdir(dir_name):
os.makedirs(dir_name)
def _scandir_py35(dir_path, suffix=None):
for entry in os.scandir(dir_path):
if not entry.is_file():
continue
filename = entry.name
if suffix is None:
yield filename
elif filename.endswith(suffix):
yield filename
def _scandir_py(dir_path, suffix=None):
for filename in os.listdir(dir_path):
if not path.isfile(path.join(dir_path, filename)):
continue
if suffix is None:
yield filename
elif filename.endswith(suffix):
yield filename
def scandir(dir_path, suffix=None):
if suffix is not None and not isinstance(suffix, (str, tuple)):
raise TypeError('"suffix" must be a string or tuple of strings')
if sys.version[0] == 3 and sys.version[1] >= 5:
return _scandir_py35(dir_path, suffix)
else:
return _scandir_py(dir_path, suffix)