pv2/util/fileutil.py

85 lines
2.4 KiB
Python
Raw Normal View History

"""
File functions
"""
import os
import hashlib
import magic
from pv2.util import error as err
# File utilities
__all__ = [
'filter_files',
'filter_files_inverse',
'get_checksum',
'get_magic_file',
'get_magic_content'
]
def filter_files(directory_path: str, filter_filename: str) -> list:
"""
Filter out specified files
"""
return_list = []
for file in os.scandir(directory_path):
if filter_filename(file.name):
return_list.append(os.path.join(directory_path, file.name))
return return_list
def filter_files_inverse(directory_path: str, filter_filename: str) -> list:
"""
Filter out specified files (inverse)
"""
return_list = []
for file in os.scandir(directory_path):
if not filter_filename(file.name):
return_list.append(os.path.join(directory_path, file.name))
return return_list
def get_checksum(file_path: str, hashtype: str = 'sha256') -> str:
"""
Generates a checksum from the provided path by doing things in chunks. This
reduces the time needed to make the hashes and avoids memory issues.
Borrowed from empanadas with some modifications
"""
# We shouldn't be using sha1 or md5.
if hashtype in ('sha', 'sha1', 'md5'):
raise err.ProvidedValueError(f'{hashtype} is not allowed.')
try:
checksum = hashlib.new(hashtype)
except ValueError as exc:
raise err.GenericError(f'hash type not available: {ValueError}') from exc
try:
with open(file_path, 'rb') as input_file:
while True:
chunk = input_file.read(8192)
if not chunk:
break
checksum.update(chunk)
input_file.close()
return checksum.hexdigest()
except IOError as exc:
raise err.GenericError(f'Could not open or process file {file_path}: {exc})')
def get_magic_file(file_path: str):
"""
Returns the magic data from a file. Use this to get mimetype and other info
you'd get by just running `file`
"""
detect = magic.detect_from_filename(file_path)
return detect
def get_magic_content(data):
"""
Returns the magic data from content. Use this to get mimetype and other info
you'd get by just running `file` on a file (but only pass read file data)
"""
detect = magic.detect_from_content(data)
return detect