pv2/util/fileutil.py
Louis Abel 185d144567
Add importutil module
Adds the importutil module that allows targetting a specific source RPM
file and importing and tagging. It aims to keep the same structure of
git.centos.org.

Other changes:

* constants.py: New constants added for git and rpm
* error.py: New git and rpm error classes added
* fileutil.py:
  * Add filter_files_inverse (matches everything but the filter)
  * Add get_magic_file (returns magic data from a file)
  * Add get_magic_content (returns magic data from data/content)
* generic.py: Add safe_encoding to return a urlquote string
* processor.py:
  * Add run_proc_foreground_shell to support shell calls
  * Add run_proc_no_output_shell to support shell calls
* rpmutil.py:
  * get_rpm_header now supports verify_signature parameter (default
    false). If set to true and key is not available, raises exception.
  * Add verify_rpm_signature, which allows local rpm verification
    without ingesting the whole header into a usable object.
  * Add add_rpm_key, which enables a user to add a key to the rpm
    keyring.
2023-06-27 17:20:44 -07:00

85 lines
2.4 KiB
Python

"""
File functions
"""
import os
import hashlib
import magic
from pv2.util import error as err
# File utilities
__all__ = [
'filter_files',
'filter_files_inverse',
'get_checksum',
'get_magic_file',
'get_magic_content'
]
def filter_files(directory_path: str, filter_filename: str) -> list:
"""
Filter out specified files
"""
return_list = []
for file in os.scandir(directory_path):
if filter_filename(file.name):
return_list.append(os.path.join(directory_path, file.name))
return return_list
def filter_files_inverse(directory_path: str, filter_filename: str) -> list:
"""
Filter out specified files (inverse)
"""
return_list = []
for file in os.scandir(directory_path):
if not filter_filename(file.name):
return_list.append(os.path.join(directory_path, file.name))
return return_list
def get_checksum(file_path: str, hashtype: str = 'sha256') -> str:
"""
Generates a checksum from the provided path by doing things in chunks. This
reduces the time needed to make the hashes and avoids memory issues.
Borrowed from empanadas with some modifications
"""
# We shouldn't be using sha1 or md5.
if hashtype in ('sha', 'sha1', 'md5'):
raise err.ProvidedValueError(f'{hashtype} is not allowed.')
try:
checksum = hashlib.new(hashtype)
except ValueError as exc:
raise err.GenericError(f'hash type not available: {ValueError}') from exc
try:
with open(file_path, 'rb') as input_file:
while True:
chunk = input_file.read(8192)
if not chunk:
break
checksum.update(chunk)
input_file.close()
return checksum.hexdigest()
except IOError as exc:
raise err.GenericError(f'Could not open or process file {file_path}: {exc})')
def get_magic_file(file_path: str):
"""
Returns the magic data from a file. Use this to get mimetype and other info
you'd get by just running `file`
"""
detect = magic.detect_from_filename(file_path)
return detect
def get_magic_content(data):
"""
Returns the magic data from content. Use this to get mimetype and other info
you'd get by just running `file` on a file (but only pass read file data)
"""
detect = magic.detect_from_content(data)
return detect