distro-tools/apollo/rpmworker/repomd.py
2023-02-04 09:28:38 +01:00

128 lines
4.1 KiB
Python

import gzip
import lzma
import re
from xml.etree import ElementTree as ET
from urllib.parse import urlparse
from os import path
import aiohttp
import yaml
NVRA_RE = re.compile(
r"^(\S+)-([\w~%.+]+)-(\w+(?:\.[\w~%+]+)+?)(?:\.(\w+))?(?:\.rpm)?$"
)
NEVRA_RE = re.compile(
r"^(\S+)-(\d):([\w~%.+]+)-(\w+(?:\.[\w~%+]+)+?)(?:\.(\w+))?(?:\.rpm)?$"
)
EPOCH_RE = re.compile(r"(\d+):")
DIST_RE = re.compile(r"(\.el\d(?:_\d|))")
MODULE_DIST_RE = re.compile(r"\.module.+$")
def clean_nvra_pkg(matching_pkg: ET.Element) -> str:
name = matching_pkg.find("{http://linux.duke.edu/metadata/common}name").text
version = matching_pkg.find(
"{http://linux.duke.edu/metadata/common}version"
).attrib["ver"]
release = matching_pkg.find(
"{http://linux.duke.edu/metadata/common}version"
).attrib["rel"]
arch = matching_pkg.find("{http://linux.duke.edu/metadata/common}arch").text
clean_release = MODULE_DIST_RE.sub("", DIST_RE.sub("", release))
cleaned = f"{name}-{version}-{clean_release}.{arch}"
if ".module+" in release:
cleaned = f"module.{cleaned}"
return cleaned
def clean_nvra(nvra_raw: str) -> str:
nvra = NVRA_RE.search(nvra_raw)
name = nvra.group(1)
version = nvra.group(2)
release = nvra.group(3)
arch = nvra.group(4)
clean_release = MODULE_DIST_RE.sub("", DIST_RE.sub("", release))
cleaned = f"{name}-{version}-{clean_release}.{arch}"
if ".module+" in release:
cleaned = f"module.{cleaned}"
return cleaned
async def download_xml(
url: str, gz: bool = False, xz: bool = False
) -> ET.Element:
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
if resp.status != 200:
raise Exception(f"Failed to get {url}: {resp.status}")
# Do an in memory gzip decompression if gz is set
if gz:
return ET.fromstring(
gzip.decompress(await resp.read()).decode("utf-8")
)
elif xz:
return ET.fromstring(
lzma.decompress(await resp.read()).decode("utf-8")
)
return ET.fromstring(await resp.text())
async def download_yaml(url: str, gz: bool = False, xz: bool = False) -> any:
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
if resp.status != 200:
raise Exception(f"Failed to get {url}: {resp.status}")
# Do an in memory gzip decompression if gz is set
if gz:
return yaml.full_load_all(
gzip.decompress(await resp.read()).decode("utf-8")
)
elif xz:
return yaml.full_load_all(
lzma.decompress(await resp.read()).decode("utf-8")
)
return yaml.full_load_all(await resp.text())
async def get_data_from_repomd(
url: str,
data_type: str,
el: ET.Element,
is_yaml=False,
):
# There is a top-most repomd element in repomd
# Under there is revision and multiple data elements
# We want the data element with type="data_type"
# Under that is location with href
# That href is the location of the data
for data in el.findall("{http://linux.duke.edu/metadata/repo}data"):
if data.attrib["type"] == data_type:
location = data.find(
"{http://linux.duke.edu/metadata/repo}location"
)
parsed_url = urlparse(url)
new_path = path.abspath(
path.join(parsed_url.path, "../..", location.attrib["href"])
)
data_url = parsed_url._replace(path=new_path).geturl()
if is_yaml:
return await download_yaml(
data_url,
gz=data_url.endswith(".gz"),
xz=data_url.endswith(".xz"),
)
return await download_xml(
data_url,
gz=data_url.endswith(".gz"),
xz=data_url.endswith(".xz"),
)
return None