diff --git a/.github/ISSUE_TEMPLATE/bug-name.md b/.github/ISSUE_TEMPLATE/bug-name.md new file mode 100644 index 0000000..5108818 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-name.md @@ -0,0 +1,25 @@ +--- +name: "[BUG] NAME" +about: Create a bug report to fix up pv2 issues +title: '' +labels: bug +assignees: + - nazunalika + +--- + +**Below, describe the bug** + + +**Console or Terminal Info (terminal output from a script)** + + +**Distribution and Python Version** + - Distribution: + - Python: + +**Any other details, e.g. what were you trying to do?** + + diff --git a/.github/ISSUE_TEMPLATE/rfe-name.md b/.github/ISSUE_TEMPLATE/rfe-name.md new file mode 100644 index 0000000..d51d489 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/rfe-name.md @@ -0,0 +1,25 @@ +--- +name: "[RFE] NAME" +about: Suggest an idea for the pv2 module to make it better +title: '' +labels: enhancement +assignees: + - nazunalika + +--- + +**Preliminary questions** +- [ ] Is there already a PR open for this? +- [ ] Have you ensured that this feature is not already included? + +**Is this RFE related to a problem or a bug? Describe and reference it below.** + + +**What is your proposed idea or solution?** + + +**What are the benefits of this?** + + +**Additional context and information** + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..77f5b69 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ +## Proposed Commit Message + + +``` +Summary: No more than 70 characters + +Put in a description of the change being made here and why it is being made, +only if the summary line doesn't explain it in full. + +Fixes: GH-NNNNN (remove line if there is no related issue) +``` + +## Additional Context + + +## Testing/Verification + diff --git a/README.md b/README.md index 8d2f938..2e79f40 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,27 @@ in the RESF (such as Rocky Linux). * GitPython (python3-GitPython or via pip) * lxml (python3-lxml or via pip) * rpm (python3-rpm) + * pycurl (python3-pycurl) + +* rpm macros packages + + * \*-rpm-macros + * \*-srpm-macros ## Example Scripts Example scripts are found in the `examples` directory, which can utilize parts of the pv2 module. + +## Contributing + +If you see a bug or a potential enhancement, we always encourage Pull Requests +to be sent in. When sending in your pull request, make sure it is against the +`development` branch. PR's to main will be closed. + +To submit a change, we recommend that you do so on GitHub: + +* Fork the repository as necessary +* Make a new branch based on the `development` branch - Ensure that it is up-to-date +* Make your changes +* Send in the PR for review to our `development` branch diff --git a/pv2/importer/__init__.py b/pv2/importer/__init__.py index 4e08117..64f24d9 100644 --- a/pv2/importer/__init__.py +++ b/pv2/importer/__init__.py @@ -7,4 +7,4 @@ This assists packagers by taking input as srpm or git location, importing and tagging it as appropriate. """ -from .operation import Import, SrpmImport +from .operation import Import, SrpmImport, GitImport diff --git a/pv2/importer/operation.py b/pv2/importer/operation.py index 2d95f0c..33beec5 100644 --- a/pv2/importer/operation.py +++ b/pv2/importer/operation.py @@ -7,6 +7,7 @@ Importer accessories import os import re import shutil +import string from pv2.util import gitutil, fileutil, rpmutil, processor, generic from pv2.util import error as err from pv2.util import constants as const @@ -52,6 +53,31 @@ class Import: command_to_send = ' '.join(command_to_send) processor.run_proc_no_output_shell(command_to_send) + @staticmethod + def pack_srpm(srpm_dir, spec_file, dist_tag): + """ + Packs an srpm from available sources + """ + command_to_send = [ + 'rpmbuild', + '-bs', + f'{spec_file}', + '--define', + f"'dist {dist_tag}'", + '--define', + f"'_topdir {srpm_dir}'", + '--define', + f"'_sourcedir {srpm_dir}'" + ] + command_to_send = ' '.join(command_to_send) + returned = processor.run_proc_no_output_shell(command_to_send) + wrote_regex = r'Wrote:\s+(.*\.rpm)' + regex_search = re.search(wrote_regex, returned.stdout, re.MULTILINE) + if regex_search: + return regex_search.group(1) + + return None + @staticmethod def generate_metadata(repo_path: str, repo_name: str, file_dict: dict): """ @@ -136,6 +162,53 @@ class Import: source_path = f'{repo_path}/{name}' os.remove(source_path) + @staticmethod + def get_lookaside_template_path(source): + """ + Attempts to return the lookaside template + """ + # This is an extremely hacky way to return the right value. In python + # 3.10, match-case was introduced. However, we need to assume that + # python 3.9 is the lowest used version for this module, so we need to + # be inefficient until we no longer use EL9 as the base line. + return { + 'rocky8': const.GitConstants.ROCKY8_LOOKASIDE_PATH, + 'rocky': const.GitConstants.ROCKY_LOOKASIDE_PATH, + 'centos': const.GitConstants.CENTOS_LOOKASIDE_PATH, + 'stream': const.GitConstants.STREAM_LOOKASIDE_PATH, + 'fedora': const.GitConstants.FEDORA_LOOKASIDE_PATH, + }.get(source, None) + + @staticmethod + def parse_metadata_file(metadata_file) -> dict: + """ + Attempts to loop through the metadata file + """ + file_dict = {} + # pylint: disable=line-too-long + line_pattern = re.compile(r'^(?P[^ ]+?) \((?P[^ )]+?)\) = (?P[^ ]+?)$') + classic_pattern = re.compile(r'^(?P[^ ]+?)\s+(?P[^ ]+?)$') + with open(metadata_file, encoding='UTF-8') as metafile: + for line in metafile: + strip = line.strip() + if not strip: + continue + + line_check = line_pattern.match(strip) + classic_check = classic_pattern.match(strip) + if line_check is not None: + file_dict[line_check.group('file')] = { + 'hashtype': line_check.group('hashtype'), + 'checksum': line_check.group('checksum') + } + elif classic_check is not None: + file_dict[classic_check.group('file')] = { + 'hashtype': generic.hash_checker(classic_check.group('checksum')), + 'checksum': classic_check.group('checksum') + } + + return file_dict + # pylint: disable=too-many-instance-attributes class SrpmImport(Import): """ @@ -375,7 +448,7 @@ class GitImport(Import): guess on how to convert it and push it to your git forge with an expected format. """ - # pylint: disable=too-many-arguments + # pylint: disable=too-many-arguments,too-many-locals def __init__( self, package: str, @@ -384,8 +457,11 @@ class GitImport(Import): git_url_path: str, release: str, branch: str, - upstream_lookaside: str = '', + upstream_lookaside: str, + scl_mode: bool = False, + scl_package: str = '', dest_lookaside: str = '/var/www/html/sources', + source_git_protocol: str = 'https', dest_branch: str = '', distprefix: str = 'el', git_user: str = 'git', @@ -399,7 +475,8 @@ class GitImport(Import): """ self.__rpm = package self.__release = release - self.__source_git_url = f'https://{source_git_url_path}/{source_git_org_path}/{package}.git' + # pylint: disable=line-too-long + self.__source_git_url = f'{source_git_protocol}://{source_git_url_path}/{source_git_org_path}/{package}.git' self.__git_url = f'ssh://{git_user}@{git_url_path}/{org}/{package}.git' self.__dist_prefix = distprefix self.__dist_tag = f'.{distprefix}{release}' @@ -407,10 +484,15 @@ class GitImport(Import): self.__dest_branch = branch self.__dest_lookaside = dest_lookaside self.__upstream_lookaside = upstream_lookaside + self.__upstream_lookaside_url = self.get_lookaside_template_path(upstream_lookaside) if len(dest_branch) > 0: self.__dest_branch = dest_branch + if not self.__upstream_lookaside: + raise err.ConfigurationError(f'{upstream_lookaside} is not valid.') + + # pylint: disable=too-many-locals def pkg_import(self, skip_lookaside: bool = False): """ Actually perform the import @@ -419,29 +501,177 @@ class GitImport(Import): than uploaded to lookaside. """ check_source_repo = gitutil.lsremote(self.source_git_url) - check_dest_repo = gitutil.lsremote(self.source_git_url) + check_dest_repo = gitutil.lsremote(self.dest_git_url) source_git_repo_path = f'/var/tmp/{self.rpm_name}-source' + source_git_repo_spec = f'{source_git_repo_path}/{self.rpm_name}.spec' dest_git_repo_path = f'/var/tmp/{self.rpm_name}' + metadata_file = f'{source_git_repo_path}/.{self.rpm_name}.metadata' + sources_file = f'{source_git_repo_path}/sources' source_branch = self.source_branch dest_branch = self.dest_branch + _dist_tag = self.dist_tag repo_tags = [] + # If the upstream repo doesn't report anything, exit. + if not check_source_repo: + raise err.GitInitError('Upstream git repo does not exist') + + # If the source branch has "stream" in the name, it should be assumed + # it'll be a module. Since this should always be the case, we'll change + # dest_branch to be: {dest_branch}-stream-{stream_name} + if "stream" in source_branch: + dest_branch = self.__get_module_stream_branch_name(source_branch, dest_branch) + _dist_tag = f'.module+{_dist_tag}+1010+deadbeef' + + # Do SCL logic here. + + # Try to clone first + print(f'Cloning upstream: {self.rpm_name}') + source_repo = gitutil.clone( + git_url_path=self.source_git_url, + repo_name=self.rpm_name_replace, + to_path=source_git_repo_path, + branch=source_branch + ) + + if check_dest_repo: + ref_check = f'refs/heads/{dest_branch}' in check_dest_repo + print(f'Cloning: {self.rpm_name}') + if ref_check: + dest_repo = gitutil.clone( + git_url_path=self.dest_git_url, + repo_name=self.rpm_name_replace, + to_path=dest_git_repo_path, + branch=dest_branch + ) + else: + dest_repo = gitutil.clone( + git_url_path=self.dest_git_url, + repo_name=self.rpm_name_replace, + to_path=dest_git_repo_path, + branch=None + ) + gitutil.checkout(dest_repo, branch=dest_branch, orphan=True) + self.remove_everything(dest_repo.working_dir) + for tag_name in dest_repo.tags: + repo_tags.append(tag_name.name) + else: + print('Repo may not exist or is private. Try to import anyway.') + dest_repo = gitutil.init( + git_url_path=self.dest_git_url, + repo_name=self.rpm_name_replace, + to_path=dest_git_repo_path, + branch=dest_branch + ) + + # Within the confines of the source git repo, we need to find a + # "sources" file or a metadata file. One of these will determine which + # route we take. + if os.path.exists(metadata_file): + no_metadata_list = ['stream', 'fedora'] + if any(ignore in self.upstream_lookaside for ignore in no_metadata_list): + # pylint: disable=line-too-long + raise err.ConfigurationError(f'metadata files are not supported with {self.upstream_lookaside}') + metafile_to_use = metadata_file + elif os.path.exists(sources_file): + no_sources_list = ['rocky', 'centos'] + if any(ignore in self.upstream_lookaside for ignore in no_sources_list): + # pylint: disable=line-too-long + raise err.ConfigurationError(f'sources files are not supported with {self.upstream_lookaside}') + metafile_to_use = sources_file + else: + raise err.GenericError('sources or metadata file NOT found') + + sources_dict = self.parse_metadata_file(metafile_to_use) + + # We need to check if there is a SPECS directory and make a SOURCES + # directory if it doesn't exist + if os.path.exists(f'{source_git_repo_path}/SPECS'): + if not os.path.exists(f'{source_git_repo_path}/SOURCES'): + try: + os.makedirs(f'{source_git_repo_path}/SOURCES') + except Exception as exc: + raise err.GenericError(f'Directory could not be created: {exc}') + + for key, value in sources_dict.items(): + download_file = f'{source_git_repo_path}/{key}' + download_hashtype = sources_dict[key]['hashtype'] + download_checksum = sources_dict[key]['checksum'] + the_url = self.__get_actual_lookaside_url( + download_file.split('/')[-1], + download_hashtype, + download_checksum + ) + + generic.download_file(the_url, download_file, download_checksum, + download_hashtype) + + # attempt to pack up the RPM, get metadata + packed_srpm = self.pack_srpm(source_git_repo_path, source_git_repo_spec, _dist_tag) + if not packed_srpm: + raise err.MissingValueError( + 'The srpm was not written, yet command completed successfully.' + ) + # We can't verify an srpm we just built ourselves. + srpm_metadata = self.get_srpm_metadata(packed_srpm, verify=False) + # pylint: disable=line-too-long + srpm_nvr = srpm_metadata['name'] + '-' + srpm_metadata['version'] + '-' + srpm_metadata['release'] + import_tag = generic.safe_encoding(f'imports/{dest_branch}/{srpm_nvr}') + commit_msg = f'import {srpm_nvr}' + # unpack it to new dir, move lookaside if needed, tag and push + if import_tag in repo_tags: + shutil.rmtree(source_git_repo_path) + shutil.rmtree(dest_git_repo_path) + raise err.GitCommitError(f'Git tag already exists: {import_tag}') + + self.unpack_srpm(packed_srpm, dest_git_repo_path) + sources = self.get_dict_of_lookaside_files(dest_git_repo_path) + self.generate_metadata(dest_git_repo_path, self.rpm_name, sources) + self.generate_filesum(dest_git_repo_path, self.rpm_name, "Direct Git Import") + + if skip_lookaside: + self.skip_import_lookaside(dest_git_repo_path, sources) + else: + self.import_lookaside(dest_git_repo_path, self.rpm_name, dest_branch, + sources, self.dest_lookaside) + + gitutil.add_all(dest_repo) + verify = dest_repo.is_dirty() + if verify: + gitutil.commit(dest_repo, commit_msg) + ref = gitutil.tag(dest_repo, import_tag, commit_msg) + gitutil.push(dest_repo, ref=ref) + shutil.rmtree(source_git_repo_path) + shutil.rmtree(dest_git_repo_path) + return True + print('Nothing to push') + shutil.rmtree(source_git_repo_path) + shutil.rmtree(dest_git_repo_path) + return False + + def __get_actual_lookaside_url(self, filename, hashtype, checksum): + """ + Returns the translated URL to obtain sources + """ + dict_template = { + 'PKG_NAME': self.rpm_name, + 'FILENAME': filename, + 'HASH_TYPE': hashtype.lower(), + 'HASH': checksum + } + + template = string.Template(self.upstream_lookaside_url) + substitute = template.substitute(dict_template) + return substitute + @staticmethod - def __get_lookaside_template_path(source): + def __get_module_stream_branch_name(source_branch, dest_branch): """ - Attempts to return the lookaside template + Returns a branch name for modules """ - # This is an extremely hacky way to return the right value. In python - # 3.10, match-case was introduced. However, we need to assume that - # python 3.9 is the lowest used version for this module, so we need to - # be inefficient until we no longer use EL9 as the base line. - return { - 'rocky8': const.GitConstants.ROCKY8_LOOKASIDE_PATH, - 'rocky': const.GitConstants.ROCKY_LOOKASIDE_PATH, - 'centos': const.GitConstants.CENTOS_LOOKASIDE_PATH, - 'stream': const.GitConstants.STREAM_LOOKASIDE_PATH, - 'fedora': const.GitConstants.FEDORA_LOOKASIDE_PATH, - }.get(source, None) + regex = r'stream-([a-zA-Z0-9_\.]+)-([a-zA-Z0-9_\.]+)' + regex_search = re.search(regex, source_branch) + return f'{dest_branch}-stream-{regex_search.group(2)}' @property def rpm_name(self): @@ -450,6 +680,14 @@ class GitImport(Import): """ return self.__rpm + @property + def rpm_name_replace(self): + """ + Returns the name of the RPM we're working with + """ + new_name = self.__rpm.replace('+', 'plus') + return new_name + @property def source_branch(self): """ @@ -485,6 +723,20 @@ class GitImport(Import): """ return self.__dist_tag + @property + def upstream_lookaside(self): + """ + Returns upstream lookaside + """ + return self.__upstream_lookaside + + @property + def upstream_lookaside_url(self): + """ + Returns upstream lookaside + """ + return self.__upstream_lookaside_url + @property def dest_lookaside(self): """ diff --git a/pv2/util/constants.py b/pv2/util/constants.py index 42f8c71..d81b7d7 100644 --- a/pv2/util/constants.py +++ b/pv2/util/constants.py @@ -55,6 +55,7 @@ class ErrorConstants: ERR_MISSING_VALUE = 9003 ERR_CONFIGURATION = 9004 ERR_NOTFOUND = 9005 + ERR_DOWNLOAD = 9006 # Error in spec file MOCK_ERR_SPEC = 9100 # Error trying to get dependencies for a build @@ -186,7 +187,7 @@ class GitConstants: CENTOS_LOOKASIDE_PATH = 'https://git.centos.org/sources/${PKG_NAME}/${BRANCH}/${HASH}' # pylint: disable=line-too-long - STREAM_LOOKASIDE_PATH = 'https://sources.stream.centos.org/sources/rpms/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${FILENAME}' - FEDORA_LOOKASIDE_PATH = 'https://src.fedoraproject.org/repo/pkgs/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${FILENAME}' + STREAM_LOOKASIDE_PATH = 'https://sources.stream.centos.org/sources/rpms/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${HASH}/${FILENAME}' + FEDORA_LOOKASIDE_PATH = 'https://src.fedoraproject.org/repo/pkgs/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${HASH}/${FILENAME}' ROCKY8_LOOKASIDE_PATH = 'https://rocky-linux-sources-staging.a1.rockylinux.org/${HASH}' ROCKY_LOOKASIDE_PATH = 'https://sources.build.resf.org/${HASH}' diff --git a/pv2/util/error.py b/pv2/util/error.py index 28e82a5..19f955d 100644 --- a/pv2/util/error.py +++ b/pv2/util/error.py @@ -16,6 +16,7 @@ __all__ = [ 'MissingValueError', 'ConfigurationError', 'FileNotFound', + 'DownloadError', 'MockGenericError', 'MockUnexpectedError', 'MockInvalidConfError', @@ -75,6 +76,12 @@ class FileNotFound(GenericError): """ fault_code = errconst.ERR_NOTFOUND +class DownloadError(GenericError): + """ + Value being requested already exists + """ + fault_code = errconst.ERR_DOWNLOAD + class MockGenericError(GenericError): """ Mock error exceptions diff --git a/pv2/util/generic.py b/pv2/util/generic.py index a65d1d3..064c433 100644 --- a/pv2/util/generic.py +++ b/pv2/util/generic.py @@ -1,10 +1,14 @@ """ Generic functions """ +import os +import sys import datetime import hashlib +import pycurl from urllib.parse import quote as urlquote from pv2.util import error as err +from pv2.util import fileutil # General utilities __all__ = [ @@ -14,7 +18,9 @@ __all__ = [ 'generate_password_hash', 'ordered', 'to_unicode', - 'trim_non_empty_string' + 'trim_non_empty_string', + 'hash_checker', + 'download_file' ] def to_unicode(string: str) -> str: @@ -86,3 +92,82 @@ def safe_encoding(data: str) -> str: # the urllib library currently doesn't reserve this quoter = quoter.replace('~', '%7e') return quoter + +def hash_checker(data: str) -> str: + """ + Returns the type of hash the string possibly is + """ + if len(data) == 128: + hashtype = 'sha512' + elif len(data) == 64: + hashtype = 'sha256' + elif len(data) == 40: + hashtype = 'sha1' + elif len(data) == 32: + hashtype = 'md5' + else: + raise err.GenericError('Data is either invalid or is not a hash.') + + return hashtype + +def download_file(url: str, to_path: str, checksum=None, hashtype=None): + """ + Downloads a file + """ + url = url.encode('utf-8') + if os.path.exists(to_path): + if not checksum or not hashtype: + # pylint: disable=line-too-long + raise err.DownloadError(f'File {to_path} already exists, but a checksum was not provided to verify it.') + + file_checksum = fileutil.get_checksum(to_path, hashtype=hashtype) + if file_checksum == checksum: + print('File already downloaded and checksum is valid.') + else: + raise err.DownloadError('File exists, but checksum does not match') + + # Assume path doesn't exist, download it. + print(f'Downloading {to_path}') + with open(to_path, 'wb') as dlf: + # todo: add stdout or logging for this + # pylint: disable=c-extension-no-member + curl = pycurl.Curl() + curl.setopt(pycurl.URL, url) + curl.setopt(pycurl.HTTPHEADER, ['Pragma:']) + curl.setopt(pycurl.NOPROGRESS, True) + curl.setopt(pycurl.OPT_FILETIME, True) + curl.setopt(pycurl.WRITEDATA, dlf) + curl.setopt(pycurl.LOW_SPEED_LIMIT, 1000) + curl.setopt(pycurl.LOW_SPEED_TIME, 300) + curl.setopt(pycurl.FOLLOWLOCATION, 1) + + try: + curl.perform() + timestamp = curl.getinfo(pycurl.INFO_FILETIME) + status = curl.getinfo(pycurl.RESPONSE_CODE) + except Exception as exc: + os.remove(to_path) + raise err.DownloadError(exc) + finally: + curl.close() + + if sys.stdout.isatty(): + sys.stdout.write('\n') + sys.stdout.flush() + + if status != 200: + print(f'Removing invalid file {to_path}') + os.remove(to_path) + raise err.DownloadError(f'There was an error downloading: {status}') + + os.utime(to_path, (timestamp, timestamp)) + # verify checksum + if not checksum or not hashtype: + # pylint: disable=line-too-long + print('checksum and hashtype were not set, skipping verification') + return + + file_checksum = fileutil.get_checksum(to_path, hashtype=hashtype) + if file_checksum != checksum: + os.remove(to_path) + raise err.DownloadError('Checksums do not match for downloaded file') diff --git a/pv2/util/gitutil.py b/pv2/util/gitutil.py index 1729db4..504d7b4 100644 --- a/pv2/util/gitutil.py +++ b/pv2/util/gitutil.py @@ -57,6 +57,7 @@ def clone( clone a repo. if branch is None, it will just clone the repo in general and you'll be expected to checkout. """ + clone_path = to_path if not to_path: clone_path = f'/var/tmp/{repo_name}' diff --git a/pyproject.toml b/pyproject.toml index 6319dbc..6e73e0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pv2" -version = "0.9.1" +version = "0.9.2" description = "PV2 backend framework module" readme = "README.md" authors = [ @@ -16,7 +16,8 @@ requires-python = ">=3.6" dependencies = [ "GitPython >= 3.1.30", "lxml >= 4.6.5", - "file-magic >= 0.4.0" + "file-magic >= 0.4.0", + "pycurl >= 7.43.0.6" ] [project.urls]