Add GitImporter and misc changes

* Add GitImporter class to importer utility

  * Given a pagure or gitlab source, it should be able to import an rpm

* Added utilitis to handle GitImporter changes
* Modified README for future github transport
* Added issue templates for future github transport
* Raise micro version
This commit is contained in:
Louis Abel 2023-07-06 15:49:05 -07:00
parent df30fa4c72
commit 3488f7ca28
Signed by: label
GPG key ID: 3331F061D1D9990E
11 changed files with 457 additions and 23 deletions

25
.github/ISSUE_TEMPLATE/bug-name.md vendored Normal file
View file

@ -0,0 +1,25 @@
---
name: "[BUG] NAME"
about: Create a bug report to fix up pv2 issues
title: ''
labels: bug
assignees:
- nazunalika
---
**Below, describe the bug**
<!-- Describe the bug that you ran into -->
**Console or Terminal Info (terminal output from a script)**
<!-- Add any terminal output below using backticks as necessary -->
**Distribution and Python Version**
- Distribution:
- Python:
**Any other details, e.g. what were you trying to do?**
<!-- Include any other information you may believe is applicable
to this report. If you are using a python script that imports this
module, please include it. -->

25
.github/ISSUE_TEMPLATE/rfe-name.md vendored Normal file
View file

@ -0,0 +1,25 @@
---
name: "[RFE] NAME"
about: Suggest an idea for the pv2 module to make it better
title: ''
labels: enhancement
assignees:
- nazunalika
---
**Preliminary questions**
- [ ] Is there already a PR open for this?
- [ ] Have you ensured that this feature is not already included?
**Is this RFE related to a problem or a bug? Describe and reference it below.**
<!-- Describe here, if necessary, if this is related to a problem or reported bug. -->
**What is your proposed idea or solution?**
<!-- Describe what you would like to see as the solution for this RFE -->
**What are the benefits of this?**
<!-- Describe the benefits of this request -->
**Additional context and information**
<!-- Add any other information you feel is appropriate here -->

18
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View file

@ -0,0 +1,18 @@
## Proposed Commit Message
<!-- Include a proposed commit message, as all commits will be squash-merged -->
```
Summary: No more than 70 characters
Put in a description of the change being made here and why it is being made,
only if the summary line doesn't explain it in full.
Fixes: GH-NNNNN (remove line if there is no related issue)
```
## Additional Context
<!-- If relevant, add additional details of why you believe this change is necessary -->
## Testing/Verification
<!-- Include steps necessary to verify your changes. If this is a bug fix,
it is recommended to show reproduction steps before and after. -->

View file

@ -20,8 +20,27 @@ in the RESF (such as Rocky Linux).
* GitPython (python3-GitPython or via pip)
* lxml (python3-lxml or via pip)
* rpm (python3-rpm)
* pycurl (python3-pycurl)
* rpm macros packages
* \*-rpm-macros
* \*-srpm-macros
## Example Scripts
Example scripts are found in the `examples` directory, which can utilize
parts of the pv2 module.
## Contributing
If you see a bug or a potential enhancement, we always encourage Pull Requests
to be sent in. When sending in your pull request, make sure it is against the
`development` branch. PR's to main will be closed.
To submit a change, we recommend that you do so on GitHub:
* Fork the repository as necessary
* Make a new branch based on the `development` branch - Ensure that it is up-to-date
* Make your changes
* Send in the PR for review to our `development` branch

View file

@ -7,4 +7,4 @@ This assists packagers by taking input as srpm or git location, importing and
tagging it as appropriate.
"""
from .operation import Import, SrpmImport
from .operation import Import, SrpmImport, GitImport

View file

@ -7,6 +7,7 @@ Importer accessories
import os
import re
import shutil
import string
from pv2.util import gitutil, fileutil, rpmutil, processor, generic
from pv2.util import error as err
from pv2.util import constants as const
@ -52,6 +53,31 @@ class Import:
command_to_send = ' '.join(command_to_send)
processor.run_proc_no_output_shell(command_to_send)
@staticmethod
def pack_srpm(srpm_dir, spec_file, dist_tag):
"""
Packs an srpm from available sources
"""
command_to_send = [
'rpmbuild',
'-bs',
f'{spec_file}',
'--define',
f"'dist {dist_tag}'",
'--define',
f"'_topdir {srpm_dir}'",
'--define',
f"'_sourcedir {srpm_dir}'"
]
command_to_send = ' '.join(command_to_send)
returned = processor.run_proc_no_output_shell(command_to_send)
wrote_regex = r'Wrote:\s+(.*\.rpm)'
regex_search = re.search(wrote_regex, returned.stdout, re.MULTILINE)
if regex_search:
return regex_search.group(1)
return None
@staticmethod
def generate_metadata(repo_path: str, repo_name: str, file_dict: dict):
"""
@ -136,6 +162,53 @@ class Import:
source_path = f'{repo_path}/{name}'
os.remove(source_path)
@staticmethod
def get_lookaside_template_path(source):
"""
Attempts to return the lookaside template
"""
# This is an extremely hacky way to return the right value. In python
# 3.10, match-case was introduced. However, we need to assume that
# python 3.9 is the lowest used version for this module, so we need to
# be inefficient until we no longer use EL9 as the base line.
return {
'rocky8': const.GitConstants.ROCKY8_LOOKASIDE_PATH,
'rocky': const.GitConstants.ROCKY_LOOKASIDE_PATH,
'centos': const.GitConstants.CENTOS_LOOKASIDE_PATH,
'stream': const.GitConstants.STREAM_LOOKASIDE_PATH,
'fedora': const.GitConstants.FEDORA_LOOKASIDE_PATH,
}.get(source, None)
@staticmethod
def parse_metadata_file(metadata_file) -> dict:
"""
Attempts to loop through the metadata file
"""
file_dict = {}
# pylint: disable=line-too-long
line_pattern = re.compile(r'^(?P<hashtype>[^ ]+?) \((?P<file>[^ )]+?)\) = (?P<checksum>[^ ]+?)$')
classic_pattern = re.compile(r'^(?P<checksum>[^ ]+?)\s+(?P<file>[^ ]+?)$')
with open(metadata_file, encoding='UTF-8') as metafile:
for line in metafile:
strip = line.strip()
if not strip:
continue
line_check = line_pattern.match(strip)
classic_check = classic_pattern.match(strip)
if line_check is not None:
file_dict[line_check.group('file')] = {
'hashtype': line_check.group('hashtype'),
'checksum': line_check.group('checksum')
}
elif classic_check is not None:
file_dict[classic_check.group('file')] = {
'hashtype': generic.hash_checker(classic_check.group('checksum')),
'checksum': classic_check.group('checksum')
}
return file_dict
# pylint: disable=too-many-instance-attributes
class SrpmImport(Import):
"""
@ -375,7 +448,7 @@ class GitImport(Import):
guess on how to convert it and push it to your git forge with an expected
format.
"""
# pylint: disable=too-many-arguments
# pylint: disable=too-many-arguments,too-many-locals
def __init__(
self,
package: str,
@ -384,8 +457,11 @@ class GitImport(Import):
git_url_path: str,
release: str,
branch: str,
upstream_lookaside: str = '',
upstream_lookaside: str,
scl_mode: bool = False,
scl_package: str = '',
dest_lookaside: str = '/var/www/html/sources',
source_git_protocol: str = 'https',
dest_branch: str = '',
distprefix: str = 'el',
git_user: str = 'git',
@ -399,7 +475,8 @@ class GitImport(Import):
"""
self.__rpm = package
self.__release = release
self.__source_git_url = f'https://{source_git_url_path}/{source_git_org_path}/{package}.git'
# pylint: disable=line-too-long
self.__source_git_url = f'{source_git_protocol}://{source_git_url_path}/{source_git_org_path}/{package}.git'
self.__git_url = f'ssh://{git_user}@{git_url_path}/{org}/{package}.git'
self.__dist_prefix = distprefix
self.__dist_tag = f'.{distprefix}{release}'
@ -407,10 +484,15 @@ class GitImport(Import):
self.__dest_branch = branch
self.__dest_lookaside = dest_lookaside
self.__upstream_lookaside = upstream_lookaside
self.__upstream_lookaside_url = self.get_lookaside_template_path(upstream_lookaside)
if len(dest_branch) > 0:
self.__dest_branch = dest_branch
if not self.__upstream_lookaside:
raise err.ConfigurationError(f'{upstream_lookaside} is not valid.')
# pylint: disable=too-many-locals
def pkg_import(self, skip_lookaside: bool = False):
"""
Actually perform the import
@ -419,29 +501,177 @@ class GitImport(Import):
than uploaded to lookaside.
"""
check_source_repo = gitutil.lsremote(self.source_git_url)
check_dest_repo = gitutil.lsremote(self.source_git_url)
check_dest_repo = gitutil.lsremote(self.dest_git_url)
source_git_repo_path = f'/var/tmp/{self.rpm_name}-source'
source_git_repo_spec = f'{source_git_repo_path}/{self.rpm_name}.spec'
dest_git_repo_path = f'/var/tmp/{self.rpm_name}'
metadata_file = f'{source_git_repo_path}/.{self.rpm_name}.metadata'
sources_file = f'{source_git_repo_path}/sources'
source_branch = self.source_branch
dest_branch = self.dest_branch
_dist_tag = self.dist_tag
repo_tags = []
# If the upstream repo doesn't report anything, exit.
if not check_source_repo:
raise err.GitInitError('Upstream git repo does not exist')
# If the source branch has "stream" in the name, it should be assumed
# it'll be a module. Since this should always be the case, we'll change
# dest_branch to be: {dest_branch}-stream-{stream_name}
if "stream" in source_branch:
dest_branch = self.__get_module_stream_branch_name(source_branch, dest_branch)
_dist_tag = f'.module+{_dist_tag}+1010+deadbeef'
# Do SCL logic here.
# Try to clone first
print(f'Cloning upstream: {self.rpm_name}')
source_repo = gitutil.clone(
git_url_path=self.source_git_url,
repo_name=self.rpm_name_replace,
to_path=source_git_repo_path,
branch=source_branch
)
if check_dest_repo:
ref_check = f'refs/heads/{dest_branch}' in check_dest_repo
print(f'Cloning: {self.rpm_name}')
if ref_check:
dest_repo = gitutil.clone(
git_url_path=self.dest_git_url,
repo_name=self.rpm_name_replace,
to_path=dest_git_repo_path,
branch=dest_branch
)
else:
dest_repo = gitutil.clone(
git_url_path=self.dest_git_url,
repo_name=self.rpm_name_replace,
to_path=dest_git_repo_path,
branch=None
)
gitutil.checkout(dest_repo, branch=dest_branch, orphan=True)
self.remove_everything(dest_repo.working_dir)
for tag_name in dest_repo.tags:
repo_tags.append(tag_name.name)
else:
print('Repo may not exist or is private. Try to import anyway.')
dest_repo = gitutil.init(
git_url_path=self.dest_git_url,
repo_name=self.rpm_name_replace,
to_path=dest_git_repo_path,
branch=dest_branch
)
# Within the confines of the source git repo, we need to find a
# "sources" file or a metadata file. One of these will determine which
# route we take.
if os.path.exists(metadata_file):
no_metadata_list = ['stream', 'fedora']
if any(ignore in self.upstream_lookaside for ignore in no_metadata_list):
# pylint: disable=line-too-long
raise err.ConfigurationError(f'metadata files are not supported with {self.upstream_lookaside}')
metafile_to_use = metadata_file
elif os.path.exists(sources_file):
no_sources_list = ['rocky', 'centos']
if any(ignore in self.upstream_lookaside for ignore in no_sources_list):
# pylint: disable=line-too-long
raise err.ConfigurationError(f'sources files are not supported with {self.upstream_lookaside}')
metafile_to_use = sources_file
else:
raise err.GenericError('sources or metadata file NOT found')
sources_dict = self.parse_metadata_file(metafile_to_use)
# We need to check if there is a SPECS directory and make a SOURCES
# directory if it doesn't exist
if os.path.exists(f'{source_git_repo_path}/SPECS'):
if not os.path.exists(f'{source_git_repo_path}/SOURCES'):
try:
os.makedirs(f'{source_git_repo_path}/SOURCES')
except Exception as exc:
raise err.GenericError(f'Directory could not be created: {exc}')
for key, value in sources_dict.items():
download_file = f'{source_git_repo_path}/{key}'
download_hashtype = sources_dict[key]['hashtype']
download_checksum = sources_dict[key]['checksum']
the_url = self.__get_actual_lookaside_url(
download_file.split('/')[-1],
download_hashtype,
download_checksum
)
generic.download_file(the_url, download_file, download_checksum,
download_hashtype)
# attempt to pack up the RPM, get metadata
packed_srpm = self.pack_srpm(source_git_repo_path, source_git_repo_spec, _dist_tag)
if not packed_srpm:
raise err.MissingValueError(
'The srpm was not written, yet command completed successfully.'
)
# We can't verify an srpm we just built ourselves.
srpm_metadata = self.get_srpm_metadata(packed_srpm, verify=False)
# pylint: disable=line-too-long
srpm_nvr = srpm_metadata['name'] + '-' + srpm_metadata['version'] + '-' + srpm_metadata['release']
import_tag = generic.safe_encoding(f'imports/{dest_branch}/{srpm_nvr}')
commit_msg = f'import {srpm_nvr}'
# unpack it to new dir, move lookaside if needed, tag and push
if import_tag in repo_tags:
shutil.rmtree(source_git_repo_path)
shutil.rmtree(dest_git_repo_path)
raise err.GitCommitError(f'Git tag already exists: {import_tag}')
self.unpack_srpm(packed_srpm, dest_git_repo_path)
sources = self.get_dict_of_lookaside_files(dest_git_repo_path)
self.generate_metadata(dest_git_repo_path, self.rpm_name, sources)
self.generate_filesum(dest_git_repo_path, self.rpm_name, "Direct Git Import")
if skip_lookaside:
self.skip_import_lookaside(dest_git_repo_path, sources)
else:
self.import_lookaside(dest_git_repo_path, self.rpm_name, dest_branch,
sources, self.dest_lookaside)
gitutil.add_all(dest_repo)
verify = dest_repo.is_dirty()
if verify:
gitutil.commit(dest_repo, commit_msg)
ref = gitutil.tag(dest_repo, import_tag, commit_msg)
gitutil.push(dest_repo, ref=ref)
shutil.rmtree(source_git_repo_path)
shutil.rmtree(dest_git_repo_path)
return True
print('Nothing to push')
shutil.rmtree(source_git_repo_path)
shutil.rmtree(dest_git_repo_path)
return False
def __get_actual_lookaside_url(self, filename, hashtype, checksum):
"""
Returns the translated URL to obtain sources
"""
dict_template = {
'PKG_NAME': self.rpm_name,
'FILENAME': filename,
'HASH_TYPE': hashtype.lower(),
'HASH': checksum
}
template = string.Template(self.upstream_lookaside_url)
substitute = template.substitute(dict_template)
return substitute
@staticmethod
def __get_lookaside_template_path(source):
def __get_module_stream_branch_name(source_branch, dest_branch):
"""
Attempts to return the lookaside template
Returns a branch name for modules
"""
# This is an extremely hacky way to return the right value. In python
# 3.10, match-case was introduced. However, we need to assume that
# python 3.9 is the lowest used version for this module, so we need to
# be inefficient until we no longer use EL9 as the base line.
return {
'rocky8': const.GitConstants.ROCKY8_LOOKASIDE_PATH,
'rocky': const.GitConstants.ROCKY_LOOKASIDE_PATH,
'centos': const.GitConstants.CENTOS_LOOKASIDE_PATH,
'stream': const.GitConstants.STREAM_LOOKASIDE_PATH,
'fedora': const.GitConstants.FEDORA_LOOKASIDE_PATH,
}.get(source, None)
regex = r'stream-([a-zA-Z0-9_\.]+)-([a-zA-Z0-9_\.]+)'
regex_search = re.search(regex, source_branch)
return f'{dest_branch}-stream-{regex_search.group(2)}'
@property
def rpm_name(self):
@ -450,6 +680,14 @@ class GitImport(Import):
"""
return self.__rpm
@property
def rpm_name_replace(self):
"""
Returns the name of the RPM we're working with
"""
new_name = self.__rpm.replace('+', 'plus')
return new_name
@property
def source_branch(self):
"""
@ -485,6 +723,20 @@ class GitImport(Import):
"""
return self.__dist_tag
@property
def upstream_lookaside(self):
"""
Returns upstream lookaside
"""
return self.__upstream_lookaside
@property
def upstream_lookaside_url(self):
"""
Returns upstream lookaside
"""
return self.__upstream_lookaside_url
@property
def dest_lookaside(self):
"""

View file

@ -55,6 +55,7 @@ class ErrorConstants:
ERR_MISSING_VALUE = 9003
ERR_CONFIGURATION = 9004
ERR_NOTFOUND = 9005
ERR_DOWNLOAD = 9006
# Error in spec file
MOCK_ERR_SPEC = 9100
# Error trying to get dependencies for a build
@ -186,7 +187,7 @@ class GitConstants:
CENTOS_LOOKASIDE_PATH = 'https://git.centos.org/sources/${PKG_NAME}/${BRANCH}/${HASH}'
# pylint: disable=line-too-long
STREAM_LOOKASIDE_PATH = 'https://sources.stream.centos.org/sources/rpms/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${FILENAME}'
FEDORA_LOOKASIDE_PATH = 'https://src.fedoraproject.org/repo/pkgs/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${FILENAME}'
STREAM_LOOKASIDE_PATH = 'https://sources.stream.centos.org/sources/rpms/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${HASH}/${FILENAME}'
FEDORA_LOOKASIDE_PATH = 'https://src.fedoraproject.org/repo/pkgs/${PKG_NAME}/${FILENAME}/${HASH_TYPE}/${HASH}/${FILENAME}'
ROCKY8_LOOKASIDE_PATH = 'https://rocky-linux-sources-staging.a1.rockylinux.org/${HASH}'
ROCKY_LOOKASIDE_PATH = 'https://sources.build.resf.org/${HASH}'

View file

@ -16,6 +16,7 @@ __all__ = [
'MissingValueError',
'ConfigurationError',
'FileNotFound',
'DownloadError',
'MockGenericError',
'MockUnexpectedError',
'MockInvalidConfError',
@ -75,6 +76,12 @@ class FileNotFound(GenericError):
"""
fault_code = errconst.ERR_NOTFOUND
class DownloadError(GenericError):
"""
Value being requested already exists
"""
fault_code = errconst.ERR_DOWNLOAD
class MockGenericError(GenericError):
"""
Mock error exceptions

View file

@ -1,10 +1,14 @@
"""
Generic functions
"""
import os
import sys
import datetime
import hashlib
import pycurl
from urllib.parse import quote as urlquote
from pv2.util import error as err
from pv2.util import fileutil
# General utilities
__all__ = [
@ -14,7 +18,9 @@ __all__ = [
'generate_password_hash',
'ordered',
'to_unicode',
'trim_non_empty_string'
'trim_non_empty_string',
'hash_checker',
'download_file'
]
def to_unicode(string: str) -> str:
@ -86,3 +92,82 @@ def safe_encoding(data: str) -> str:
# the urllib library currently doesn't reserve this
quoter = quoter.replace('~', '%7e')
return quoter
def hash_checker(data: str) -> str:
"""
Returns the type of hash the string possibly is
"""
if len(data) == 128:
hashtype = 'sha512'
elif len(data) == 64:
hashtype = 'sha256'
elif len(data) == 40:
hashtype = 'sha1'
elif len(data) == 32:
hashtype = 'md5'
else:
raise err.GenericError('Data is either invalid or is not a hash.')
return hashtype
def download_file(url: str, to_path: str, checksum=None, hashtype=None):
"""
Downloads a file
"""
url = url.encode('utf-8')
if os.path.exists(to_path):
if not checksum or not hashtype:
# pylint: disable=line-too-long
raise err.DownloadError(f'File {to_path} already exists, but a checksum was not provided to verify it.')
file_checksum = fileutil.get_checksum(to_path, hashtype=hashtype)
if file_checksum == checksum:
print('File already downloaded and checksum is valid.')
else:
raise err.DownloadError('File exists, but checksum does not match')
# Assume path doesn't exist, download it.
print(f'Downloading {to_path}')
with open(to_path, 'wb') as dlf:
# todo: add stdout or logging for this
# pylint: disable=c-extension-no-member
curl = pycurl.Curl()
curl.setopt(pycurl.URL, url)
curl.setopt(pycurl.HTTPHEADER, ['Pragma:'])
curl.setopt(pycurl.NOPROGRESS, True)
curl.setopt(pycurl.OPT_FILETIME, True)
curl.setopt(pycurl.WRITEDATA, dlf)
curl.setopt(pycurl.LOW_SPEED_LIMIT, 1000)
curl.setopt(pycurl.LOW_SPEED_TIME, 300)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
try:
curl.perform()
timestamp = curl.getinfo(pycurl.INFO_FILETIME)
status = curl.getinfo(pycurl.RESPONSE_CODE)
except Exception as exc:
os.remove(to_path)
raise err.DownloadError(exc)
finally:
curl.close()
if sys.stdout.isatty():
sys.stdout.write('\n')
sys.stdout.flush()
if status != 200:
print(f'Removing invalid file {to_path}')
os.remove(to_path)
raise err.DownloadError(f'There was an error downloading: {status}')
os.utime(to_path, (timestamp, timestamp))
# verify checksum
if not checksum or not hashtype:
# pylint: disable=line-too-long
print('checksum and hashtype were not set, skipping verification')
return
file_checksum = fileutil.get_checksum(to_path, hashtype=hashtype)
if file_checksum != checksum:
os.remove(to_path)
raise err.DownloadError('Checksums do not match for downloaded file')

View file

@ -57,6 +57,7 @@ def clone(
clone a repo. if branch is None, it will just clone the repo in general and
you'll be expected to checkout.
"""
clone_path = to_path
if not to_path:
clone_path = f'/var/tmp/{repo_name}'

View file

@ -1,6 +1,6 @@
[project]
name = "pv2"
version = "0.9.1"
version = "0.9.2"
description = "PV2 backend framework module"
readme = "README.md"
authors = [
@ -16,7 +16,8 @@ requires-python = ">=3.6"
dependencies = [
"GitPython >= 3.1.30",
"lxml >= 4.6.5",
"file-magic >= 0.4.0"
"file-magic >= 0.4.0",
"pycurl >= 7.43.0.6"
]
[project.urls]