Merge pull request #7 from skip77/main

This commit is contained in:
Mustafa Gezen 2023-02-24 02:16:54 +01:00 committed by GitHub
commit 2c6d6f0b0a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 212 additions and 54 deletions

View File

@ -1,14 +1,14 @@
# srpmproc # srpmproc
Upstream package importer with auto patching. Reference implementation for OpenPatch Upstream package importer with auto patching. Reference implementation for OpenPatch
# Usage ## Usage
``` ```
Usage: Usage:
srpmproc [flags] srpmproc [flags]
srpmproc [command] srpmproc [command]
Available Commands: Available Commands:
fetch fetch
help Help about any command help Help about any command
Flags: Flags:
@ -16,7 +16,8 @@ Flags:
--basic-username string Basic auth username --basic-username string Basic auth username
--branch-prefix string Branch prefix (replaces import-branch-prefix) (default "r") --branch-prefix string Branch prefix (replaces import-branch-prefix) (default "r")
--branch-suffix string Branch suffix to use for imported branches --branch-suffix string Branch suffix to use for imported branches
--cdn-url string CDN URL to download blobs from (default "https://git.centos.org/sources") --cdn string CDN URL shortcuts for well-known distros, auto-assigns --cdn-url. Valid values: rocky8, rocky, fedora, centos, centos-stream. Setting this overrides --cdn-url
--cdn-url string CDN URL to download blobs from. Simple URL follows default rocky/centos patterns. Can be customized using macros (see docs) (default "https://git.centos.org/sources")
--git-committer-email string Email of committer (default "rockyautomation@rockylinux.org") --git-committer-email string Email of committer (default "rockyautomation@rockylinux.org")
--git-committer-name string Name of committer (default "rockyautomation") --git-committer-name string Name of committer (default "rockyautomation")
-h, --help help for srpmproc -h, --help help for srpmproc
@ -28,6 +29,8 @@ Flags:
--no-dup-mode If enabled, skips already imported tags --no-dup-mode If enabled, skips already imported tags
--no-storage-download If enabled, blobs are always downloaded from upstream --no-storage-download If enabled, blobs are always downloaded from upstream
--no-storage-upload If enabled, blobs are not uploaded to blob storage --no-storage-upload If enabled, blobs are not uploaded to blob storage
--package-release string Package release to fetch
--package-version string Package version to fetch
--rpm-prefix string Where to retrieve SRPM content. Only used when source-rpm is not a local file (default "https://git.centos.org/rpms") --rpm-prefix string Where to retrieve SRPM content. Only used when source-rpm is not a local file (default "https://git.centos.org/rpms")
--single-tag string If set, only this tag is imported --single-tag string If set, only this tag is imported
--source-rpm string Location of RPM to process --source-rpm string Location of RPM to process
@ -35,9 +38,42 @@ Flags:
--ssh-user string SSH User (default "git") --ssh-user string SSH User (default "git")
--storage-addr string Bucket to use as blob storage --storage-addr string Bucket to use as blob storage
--strict-branch-mode If enabled, only branches with the calculated name are imported and not prefix only --strict-branch-mode If enabled, only branches with the calculated name are imported and not prefix only
--taglessmode Tagless mode: If set, pull the latest commit from the branch and determine version numbers from spec file. This is auto-tried if tags aren't found.
--tmpfs-mode string If set, packages are imported to path and patched but not pushed --tmpfs-mode string If set, packages are imported to path and patched but not pushed
--upstream-prefix string Upstream git repository prefix --upstream-prefix string Upstream git repository prefix
--version int Upstream version --version int Upstream version
Use "srpmproc [command] --help" for more information about a command. Use "srpmproc [command] --help" for more information about a command.
``` ```
<br />
## Examples:
1. Import the kernel package from git.centos.org/rpms/, to local folder /opt/gitroot/rpms/kernel.git/ . Download the lookaside source tarballs from the default CentOS file server location to local folder `/opt/fake_s3/` . We want to grab branch "c8" (import prefix plus RHEL version), and it will be committed as branch "r8" (branch prefix plus RHEL version). This assumes that `/opt/fake_s3` exists, and `/opt/gitroot/rpms/kernel.git` exists and is a git repository of some kind (even an empty one).
```
srpmproc --branch-prefix "r" --import-branch-prefix "c" --rpm-prefix "https://git.centos.org/rpms" --version 8 --storage-addr file:///opt/fake_s3 --upstream-prefix file:///opt/gitroot --cdn centos --strict-branch-mode --source-rpm kernel
```
<br />
## CDN and --cdn-url
The --cdn-url option allows for Go-style templates to craft complex URL patterns. These templates are: `{{.Name}}` (package name), `{{.Hash}}` (hash of lookaside file), `{{.Hashtype}}` (hash type of file, like "sha256" or "sha512"), `{{.Branch}}` (the branch we are importing), and `{{.Filename}}` (the lookaside file's name as it appears in SOURCES/). You can add these values as part of --cdn-url to craft your lookaside pattern.
For example, if I wanted my lookaside downloads to come from CentOS 9 Stream, I would use as part of my command:
```
--cdn-url "https://sources.stream.centos.org/sources/rpms/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}"
```
**Default Behavior:** If these templates are not used, the default behavior of `--cdn-url` is to fall back on the traditional RHEL import pattern: `<CDN_URL>/<NAME>/<BRANCH>/<HASH>` . If that fails, a further fallback is attempted, the simple: `<CDN_URL>/<HASH>`. These cover the common Rocky Linux and RHEL/CentOS imports if the base lookaside URL is the only thing given. If no `--cdn-url` is specified, it defaults to "https://git.centos.org/sources" (for RHEL imports into Rocky Linux)
**CDN Shorthand:** For convenience, some lookaside patterns for popular distros are provided via the `--cdn` option. You can specify this without needing to use the longer `--cdn-url`. For example, when importing from CentOS 9 Stream, you could use `--cdn centos-stream`

View File

@ -58,7 +58,7 @@ var (
packageVersion string packageVersion string
packageRelease string packageRelease string
taglessMode bool taglessMode bool
altLookAside bool cdn string
) )
var root = &cobra.Command{ var root = &cobra.Command{
@ -96,7 +96,7 @@ func mn(_ *cobra.Command, _ []string) {
PackageVersion: packageVersion, PackageVersion: packageVersion,
PackageRelease: packageRelease, PackageRelease: packageRelease,
TaglessMode: taglessMode, TaglessMode: taglessMode,
AltLookAside: altLookAside, Cdn: cdn,
}) })
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
@ -131,7 +131,7 @@ func main() {
root.Flags().StringVar(&rpmPrefix, "rpm-prefix", "https://git.centos.org/rpms", "Where to retrieve SRPM content. Only used when source-rpm is not a local file") root.Flags().StringVar(&rpmPrefix, "rpm-prefix", "https://git.centos.org/rpms", "Where to retrieve SRPM content. Only used when source-rpm is not a local file")
root.Flags().StringVar(&importBranchPrefix, "import-branch-prefix", "c", "Import branch prefix") root.Flags().StringVar(&importBranchPrefix, "import-branch-prefix", "c", "Import branch prefix")
root.Flags().StringVar(&branchPrefix, "branch-prefix", "r", "Branch prefix (replaces import-branch-prefix)") root.Flags().StringVar(&branchPrefix, "branch-prefix", "r", "Branch prefix (replaces import-branch-prefix)")
root.Flags().StringVar(&cdnUrl, "cdn-url", "https://git.centos.org/sources", "CDN URL to download blobs from") root.Flags().StringVar(&cdnUrl, "cdn-url", "https://git.centos.org/sources", "CDN URL to download blobs from. Simple URL follows default rocky/centos patterns. Can be customized using macros (see docs)")
root.Flags().StringVar(&singleTag, "single-tag", "", "If set, only this tag is imported") root.Flags().StringVar(&singleTag, "single-tag", "", "If set, only this tag is imported")
root.Flags().BoolVar(&noDupMode, "no-dup-mode", false, "If enabled, skips already imported tags") root.Flags().BoolVar(&noDupMode, "no-dup-mode", false, "If enabled, skips already imported tags")
root.Flags().BoolVar(&moduleMode, "module-mode", false, "If enabled, imports a module instead of a package") root.Flags().BoolVar(&moduleMode, "module-mode", false, "If enabled, imports a module instead of a package")
@ -146,8 +146,8 @@ func main() {
root.Flags().StringVar(&basicPassword, "basic-password", "", "Basic auth password") root.Flags().StringVar(&basicPassword, "basic-password", "", "Basic auth password")
root.Flags().StringVar(&packageVersion, "package-version", "", "Package version to fetch") root.Flags().StringVar(&packageVersion, "package-version", "", "Package version to fetch")
root.Flags().StringVar(&packageRelease, "package-release", "", "Package release to fetch") root.Flags().StringVar(&packageRelease, "package-release", "", "Package release to fetch")
root.Flags().BoolVar(&taglessMode, "taglessmode", false, "Tagless mode: If set, pull the latest commit from a branch, and determine version info from spec file (aka upstream versions aren't tagged)") root.Flags().BoolVar(&taglessMode, "taglessmode", false, "Tagless mode: If set, pull the latest commit from the branch and determine version numbers from spec file. This is auto-tried if tags aren't found.")
root.Flags().BoolVar(&altLookAside, "altlookaside", false, "If set, uses the new CentOS Stream lookaside pattern (https://<SITE_PREFIX>/<RPM_NAME>/<FILE_NAME>/<SHA_VERSION>/<SHA_SUM>/<FILE_NAME>)") root.Flags().StringVar(&cdn, "cdn", "", "CDN URL shortcuts for well-known distros, auto-assigns --cdn-url. Valid values: rocky8, rocky, fedora, centos, centos-stream. Setting this overrides --cdn-url")
if err := root.Execute(); err != nil { if err := root.Execute(); err != nil {
log.Fatal(err) log.Fatal(err)

View File

@ -59,5 +59,5 @@ type ProcessData struct {
PackageVersion string PackageVersion string
PackageRelease string PackageRelease string
TaglessMode bool TaglessMode bool
AltLookAside bool Cdn string
} }

View File

@ -21,12 +21,15 @@
package modes package modes
import ( import (
"bytes"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log"
"net/http" "net/http"
"path/filepath" "path/filepath"
"sort" "sort"
"strings" "strings"
"text/template"
"time" "time"
"github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/plumbing/transport"
@ -46,6 +49,15 @@ type remoteTarget struct {
when time.Time when time.Time
} }
// Struct to define the possible template values ( {{.Value}} in CDN URL strings:
type Lookaside struct {
Name string
Branch string
Hash string
Hashtype string
Filename string
}
type remoteTargetSlice []remoteTarget type remoteTargetSlice []remoteTarget
func (p remoteTargetSlice) Len() int { func (p remoteTargetSlice) Len() int {
@ -341,44 +353,66 @@ func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error {
} else { } else {
url := "" url := ""
// Alternate lookaside logic: if enabled, we pull from a new URL pattern
if !pd.AltLookAside { // We need to figure out the hashtype for templating purposes:
url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash) hashType := "sha512"
} else { switch len(hash) {
// We first need the hash algorithm based on length of hash: case 128:
hashType := "sha512" hashType = "sha512"
switch len(hash) { case 64:
case 128: hashType = "sha256"
hashType = "sha512" case 40:
case 64: hashType = "sha1"
hashType = "sha256" case 32:
case 40: hashType = "md5"
hashType = "sha1" }
case 32:
hashType = "md5" // need the name of the file without "SOURCES/":
fileName := strings.Split(path, "/")[1]
// Feed our template info to ProcessUrl and transform to the real values: ( {{.Name}}, {{.Branch}}, {{.Hash}}, {{.Hashtype}}, {{.Filename}} )
url, hasTemplate := ProcessUrl(pd.CdnUrl, md.Name, branchName, hash, hashType, fileName)
var req *http.Request
var resp *http.Response
// Download the --cdn-url given, but *only* if it contains template strings ( {{.Name}} , {{.Hash}} , etc. )
// Otherwise we need to fall back to the traditional cdn-url patterns
if hasTemplate {
pd.Log.Printf("downloading %s", url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("could not create new http request: %v", err)
} }
req.Header.Set("Accept-Encoding", "*")
// need the name of the file without "SOURCES/": resp, err = client.Do(req)
fileName := strings.Split(path, "/")[1] if err != nil {
return fmt.Errorf("could not download dist-git file: %v", err)
// Alt. lookaside url is of the form: <cdn> / <name> / <filename> / <hashtype> / <hash> / <filename> }
url = fmt.Sprintf("%s/%s/%s/%s/%s/%s", pd.CdnUrl, md.Name, fileName, hashType, hash, fileName)
} }
pd.Log.Printf("downloading %s", url) // Default cdn-url: If we don't have a templated download string, try the default <SITE>/<PKG>/<BRANCH>/<HASH> pattern:
if resp == nil || resp.StatusCode != http.StatusOK {
req, err := http.NewRequest("GET", url, nil) url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash)
if err != nil { pd.Log.Printf("Attempting default URL: %s", url)
return fmt.Errorf("could not create new http request: %v", err) req, err = http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("could not create new http request: %v", err)
}
req.Header.Set("Accept-Encoding", "*")
resp, err = client.Do(req)
if err != nil {
return fmt.Errorf("could not download dist-git file: %v", err)
}
} }
req.Header.Set("Accept-Encoding", "*")
resp, err := client.Do(req) // If the default URL fails, we have one more pattern to try. The simple <SITE>/<HASH> pattern
if err != nil { // If this one fails, we are truly lost, and have to bail out w/ an error:
return fmt.Errorf("could not download dist-git file: %v", err) if resp == nil || resp.StatusCode != http.StatusOK {
}
if resp.StatusCode != http.StatusOK {
url = fmt.Sprintf("%s/%s", pd.CdnUrl, hash) url = fmt.Sprintf("%s/%s", pd.CdnUrl, hash)
pd.Log.Printf("Attempting 2nd fallback URL: %s", url)
req, err = http.NewRequest("GET", url, nil) req, err = http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
return fmt.Errorf("could not create new http request: %v", err) return fmt.Errorf("could not create new http request: %v", err)
@ -458,3 +492,24 @@ func (g *GitMode) ImportName(pd *data.ProcessData, md *data.ModeData) string {
return strings.Replace(strings.TrimPrefix(md.TagBranch, "refs/heads/"), "%", "_", -1) return strings.Replace(strings.TrimPrefix(md.TagBranch, "refs/heads/"), "%", "_", -1)
} }
// Given a cdnUrl string as input, return same string, but with substituted
// template values ( {{.Name}} , {{.Hash}}, {{.Filename}}, etc. )
func ProcessUrl(cdnUrl string, name string, branch string, hash string, hashtype string, filename string) (string, bool) {
tmpUrl := Lookaside{name, branch, hash, hashtype, filename}
// If we run into trouble with our template parsing, we'll just return the cdnUrl, exactly as we found it
tmpl, err := template.New("").Parse(cdnUrl)
if err != nil {
return cdnUrl, false
}
var result bytes.Buffer
err = tmpl.Execute(&result, tmpUrl)
if err != nil {
log.Fatalf("ERROR: Could not process CDN URL template(s) from URL string: %s\n", cdnUrl)
}
return result.String(), true
}

View File

@ -101,8 +101,13 @@ type ProcessDataRequest struct {
PackageVersion string PackageVersion string
PackageRelease string PackageRelease string
TaglessMode bool TaglessMode bool
AltLookAside bool Cdn string
}
type LookasidePath struct {
Distro string
Url string
} }
func gitlabify(str string) string { func gitlabify(str string) string {
@ -113,7 +118,57 @@ func gitlabify(str string) string {
return strings.Replace(str, "+", "plus", -1) return strings.Replace(str, "+", "plus", -1)
} }
// List of distros and their lookaside patterns
// If we find one of these passed as --cdn (ex: "--cdn fedora"), then we override, and assign this URL to be our --cdn-url
func StaticLookasides() []LookasidePath {
centos := LookasidePath{
Distro: "centos",
Url: "https://git.centos.org/sources/{{.Name}}/{{.Branch}}/{{.Hash}}",
}
centosStream := LookasidePath{
Distro: "centos-stream",
Url: "https://sources.stream.centos.org/sources/rpms/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}",
}
rocky8 := LookasidePath{
Distro: "rocky8",
Url: "https://rocky-linux-sources-staging.a1.rockylinux.org/{{.Hash}}",
}
rocky := LookasidePath{
Distro: "rocky",
Url: "https://sources.build.resf.org/{{.Hash}}",
}
fedora := LookasidePath{
Distro: "fedora",
Url: "https://src.fedoraproject.org/repo/pkgs/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}",
}
return []LookasidePath{centos, centosStream, rocky8, rocky, fedora}
}
// Given a "--cdn" entry like "centos", we can search through our struct list of distros, and return the proper lookaside URL
// If we can't find it, we return false and the calling function will error out
func FindDistro(cdn string) (string, bool) {
var cdnUrl = ""
// Loop through each distro in the static list defined, try to find a match with "--cdn":
for _, distro := range StaticLookasides() {
if distro.Distro == strings.ToLower(cdn) {
cdnUrl = distro.Url
return cdnUrl, true
}
}
return "", false
}
func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) { func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
// Build the logger to use for the data import
var writer io.Writer = os.Stdout
if req.LogWriter != nil {
writer = req.LogWriter
}
logger := log.New(writer, "", log.LstdFlags)
// Set defaults // Set defaults
if req.ModulePrefix == "" { if req.ModulePrefix == "" {
req.ModulePrefix = ModulePrefixCentOS req.ModulePrefix = ModulePrefixCentOS
@ -139,13 +194,22 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
if req.BranchPrefix == "" { if req.BranchPrefix == "" {
req.BranchPrefix = "r" req.BranchPrefix = "r"
} }
if req.CdnUrl == "" && !req.AltLookAside { if req.CdnUrl == "" {
req.CdnUrl = "https://git.centos.org/sources" req.CdnUrl = "https://git.centos.org/sources"
} }
// If altlookaside is enabled, and the CdnUrl hasn't been changed, then automatically set it to the default
// CentOS Stream (the new pattern very much won't work with the old git.centos.org/sources site) // If a Cdn distro is defined, we try to find a match from StaticLookasides() array of structs
if (req.CdnUrl == "https://git.centos.org/sources" || req.CdnUrl == "") && req.AltLookAside { // see if we have a match to --cdn (matching values are things like fedora, centos, rocky8, etc.)
req.CdnUrl = "https://sources.stream.centos.org/sources/rpms" // If we match, then we want to short-circuit the CdnUrl to the assigned distro's one
if req.Cdn != "" {
newCdn, foundDistro := FindDistro(req.Cdn)
if !foundDistro {
return nil, fmt.Errorf("Error, distro name given as --cdn argument is not valid.")
}
req.CdnUrl = newCdn
logger.Printf("Discovered --cdn distro: %s . Using override CDN URL Pattern: %s", req.Cdn, req.CdnUrl)
} }
// Validate required // Validate required
@ -214,12 +278,6 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
reqFsCreator = req.FsCreator reqFsCreator = req.FsCreator
} }
var writer io.Writer = os.Stdout
if req.LogWriter != nil {
writer = req.LogWriter
}
logger := log.New(writer, "", log.LstdFlags)
if req.TmpFsMode != "" { if req.TmpFsMode != "" {
logger.Printf("using tmpfs dir: %s", req.TmpFsMode) logger.Printf("using tmpfs dir: %s", req.TmpFsMode)
fsCreator = func(branch string) (billy.Filesystem, error) { fsCreator = func(branch string) (billy.Filesystem, error) {
@ -276,7 +334,7 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
PackageVersion: req.PackageVersion, PackageVersion: req.PackageVersion,
PackageRelease: req.PackageRelease, PackageRelease: req.PackageRelease,
TaglessMode: req.TaglessMode, TaglessMode: req.TaglessMode,
AltLookAside: req.AltLookAside, Cdn: req.Cdn,
}, nil }, nil
} }
@ -334,6 +392,7 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
list, err := remote.List(&git.ListOptions{ list, err := remote.List(&git.ListOptions{
Auth: pd.Authenticator, Auth: pd.Authenticator,
}) })
if err != nil { if err != nil {
log.Println("ignoring no-dup-mode") log.Println("ignoring no-dup-mode")
} else { } else {
@ -367,6 +426,14 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
} }
} }
// If we have no valid branches to consider, then we'll automatically switch to attempt a tagless import:
if len(md.Branches) == 0 {
log.Println("No valid tags (refs/tags/imports/*) found in repository! Switching to perform a tagless import.")
pd.TaglessMode = true
result, err := processRPMTagless(pd)
return result, err
}
for _, branch := range md.Branches { for _, branch := range md.Branches {
md.Repo = &sourceRepo md.Repo = &sourceRepo
md.Worktree = &sourceWorktree md.Worktree = &sourceWorktree