From 32377940714edcf660cd74ffbc907e5cea345383 Mon Sep 17 00:00:00 2001 From: Skip Grube Date: Wed, 22 Feb 2023 23:23:47 -0500 Subject: [PATCH] Updates to add flexibility to lookasides and tagless handling: - Added template "macros" to allow for complex --cdn-url specifications ( {{.Name}}, {{.Branch}}, etc.) - Added --cdn option with pre-set URL patterns to simplify use - Kept default fallback behavior and search patterns of --cdn-url if templates are not used - Removed "--altlookaside" option, as the above features make it redundant - Automatic tagless: If a proper git version/imports tag isn't found, a "tagless-mode" import will be attempted automatically at run-time - taglessmode command line option kept in case user still wants to force a tagless import -Skip Grube --- README.md | 42 +++++++++++++- cmd/srpmproc/main.go | 10 ++-- pkg/data/process.go | 2 +- pkg/modes/git.go | 120 +++++++++++++++++++++++++++++----------- pkg/srpmproc/process.go | 74 ++++++++++++++++++++----- 5 files changed, 194 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 6db1eb6..a68f475 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ # srpmproc Upstream package importer with auto patching. Reference implementation for OpenPatch -# Usage +## Usage ``` Usage: srpmproc [flags] srpmproc [command] Available Commands: - fetch + fetch help Help about any command Flags: @@ -16,7 +16,8 @@ Flags: --basic-username string Basic auth username --branch-prefix string Branch prefix (replaces import-branch-prefix) (default "r") --branch-suffix string Branch suffix to use for imported branches - --cdn-url string CDN URL to download blobs from (default "https://git.centos.org/sources") + --cdn string CDN URL shortcuts for well-known distros, auto-assigns --cdn-url. Valid values: rocky8, rocky, fedora, centos, centos-stream. Setting this overrides --cdn-url + --cdn-url string CDN URL to download blobs from. Simple URL follows default rocky/centos patterns. Can be customized using macros (see docs) (default "https://git.centos.org/sources") --git-committer-email string Email of committer (default "rockyautomation@rockylinux.org") --git-committer-name string Name of committer (default "rockyautomation") -h, --help help for srpmproc @@ -28,6 +29,8 @@ Flags: --no-dup-mode If enabled, skips already imported tags --no-storage-download If enabled, blobs are always downloaded from upstream --no-storage-upload If enabled, blobs are not uploaded to blob storage + --package-release string Package release to fetch + --package-version string Package version to fetch --rpm-prefix string Where to retrieve SRPM content. Only used when source-rpm is not a local file (default "https://git.centos.org/rpms") --single-tag string If set, only this tag is imported --source-rpm string Location of RPM to process @@ -35,9 +38,42 @@ Flags: --ssh-user string SSH User (default "git") --storage-addr string Bucket to use as blob storage --strict-branch-mode If enabled, only branches with the calculated name are imported and not prefix only + --taglessmode Tagless mode: If set, pull the latest commit from the branch and determine version numbers from spec file. This is auto-tried if tags aren't found. --tmpfs-mode string If set, packages are imported to path and patched but not pushed --upstream-prefix string Upstream git repository prefix --version int Upstream version Use "srpmproc [command] --help" for more information about a command. ``` + +
+ +## Examples: + +1. Import the kernel package from git.centos.org/rpms/, to local folder /opt/gitroot/rpms/kernel.git/ . Download the lookaside source tarballs from the default CentOS file server location to local folder `/opt/fake_s3/` . We want to grab branch "c8" (import prefix plus RHEL version), and it will be committed as branch "r8" (branch prefix plus RHEL version). This assumes that `/opt/fake_s3` exists, and `/opt/gitroot/rpms/kernel.git` exists and is a git repository of some kind (even an empty one). + +``` +srpmproc --branch-prefix "r" --import-branch-prefix "c" --rpm-prefix "https://git.centos.org/rpms" --version 8 --storage-addr file:///opt/fake_s3 --upstream-prefix file:///opt/gitroot --cdn centos --strict-branch-mode --source-rpm kernel +``` + +
+ +## CDN and --cdn-url +The --cdn-url option allows for Go-style templates to craft complex URL patterns. These templates are: `{{.Name}}` (package name), `{{.Hash}}` (hash of lookaside file), `{{.Hashtype}}` (hash type of file, like "sha256" or "sha512"), `{{.Branch}}` (the branch we are importing), and `{{.Filename}}` (the lookaside file's name as it appears in SOURCES/). You can add these values as part of --cdn-url to craft your lookaside pattern. + + +For example, if I wanted my lookaside downloads to come from CentOS 9 Stream, I would use as part of my command: +``` +--cdn-url "https://sources.stream.centos.org/sources/rpms/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}" +``` + + +**Default Behavior:** If these templates are not used, the default behavior of `--cdn-url` is to fall back on the traditional RHEL import pattern: `///` . If that fails, a further fallback is attempted, the simple: `/`. These cover the common Rocky Linux and RHEL/CentOS imports if the base lookaside URL is the only thing given. If no `--cdn-url` is specified, it defaults to "https://git.centos.org/sources" (for RHEL imports into Rocky Linux) + + +**CDN Shorthand:** For convenience, some lookaside patterns for popular distros are provided via the `--cdn` option. You can specify this without needing to use the longer `--cdn-url`. For example, when importing from CentOS 9 Stream, you could use `--cdn centos-stream` + + + + + diff --git a/cmd/srpmproc/main.go b/cmd/srpmproc/main.go index 3855a48..2a9b96c 100644 --- a/cmd/srpmproc/main.go +++ b/cmd/srpmproc/main.go @@ -58,7 +58,7 @@ var ( packageVersion string packageRelease string taglessMode bool - altLookAside bool + cdn string ) var root = &cobra.Command{ @@ -96,7 +96,7 @@ func mn(_ *cobra.Command, _ []string) { PackageVersion: packageVersion, PackageRelease: packageRelease, TaglessMode: taglessMode, - AltLookAside: altLookAside, + Cdn: cdn, }) if err != nil { log.Fatal(err) @@ -131,7 +131,7 @@ func main() { root.Flags().StringVar(&rpmPrefix, "rpm-prefix", "https://git.centos.org/rpms", "Where to retrieve SRPM content. Only used when source-rpm is not a local file") root.Flags().StringVar(&importBranchPrefix, "import-branch-prefix", "c", "Import branch prefix") root.Flags().StringVar(&branchPrefix, "branch-prefix", "r", "Branch prefix (replaces import-branch-prefix)") - root.Flags().StringVar(&cdnUrl, "cdn-url", "https://git.centos.org/sources", "CDN URL to download blobs from") + root.Flags().StringVar(&cdnUrl, "cdn-url", "https://git.centos.org/sources", "CDN URL to download blobs from. Simple URL follows default rocky/centos patterns. Can be customized using macros (see docs)") root.Flags().StringVar(&singleTag, "single-tag", "", "If set, only this tag is imported") root.Flags().BoolVar(&noDupMode, "no-dup-mode", false, "If enabled, skips already imported tags") root.Flags().BoolVar(&moduleMode, "module-mode", false, "If enabled, imports a module instead of a package") @@ -146,8 +146,8 @@ func main() { root.Flags().StringVar(&basicPassword, "basic-password", "", "Basic auth password") root.Flags().StringVar(&packageVersion, "package-version", "", "Package version to fetch") root.Flags().StringVar(&packageRelease, "package-release", "", "Package release to fetch") - root.Flags().BoolVar(&taglessMode, "taglessmode", false, "Tagless mode: If set, pull the latest commit from a branch, and determine version info from spec file (aka upstream versions aren't tagged)") - root.Flags().BoolVar(&altLookAside, "altlookaside", false, "If set, uses the new CentOS Stream lookaside pattern (https:///////)") + root.Flags().BoolVar(&taglessMode, "taglessmode", false, "Tagless mode: If set, pull the latest commit from the branch and determine version numbers from spec file. This is auto-tried if tags aren't found.") + root.Flags().StringVar(&cdn, "cdn", "", "CDN URL shortcuts for well-known distros, auto-assigns --cdn-url. Valid values: rocky8, rocky, fedora, centos, centos-stream. Setting this overrides --cdn-url") if err := root.Execute(); err != nil { log.Fatal(err) diff --git a/pkg/data/process.go b/pkg/data/process.go index a3797a8..bdd01b9 100644 --- a/pkg/data/process.go +++ b/pkg/data/process.go @@ -59,5 +59,5 @@ type ProcessData struct { PackageVersion string PackageRelease string TaglessMode bool - AltLookAside bool + Cdn string } diff --git a/pkg/modes/git.go b/pkg/modes/git.go index f151aca..cbec5c8 100644 --- a/pkg/modes/git.go +++ b/pkg/modes/git.go @@ -21,12 +21,14 @@ package modes import ( + "bytes" "fmt" "io/ioutil" "net/http" "path/filepath" "sort" "strings" + "text/template" "time" "github.com/go-git/go-git/v5/plumbing/transport" @@ -341,44 +343,69 @@ func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error { } else { url := "" - // Alternate lookaside logic: if enabled, we pull from a new URL pattern - if !pd.AltLookAside { - url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash) - } else { - // We first need the hash algorithm based on length of hash: - hashType := "sha512" - switch len(hash) { - case 128: - hashType = "sha512" - case 64: - hashType = "sha256" - case 40: - hashType = "sha1" - case 32: - hashType = "md5" + + // We need to figure out the hashtype for templating purposes: + hashType := "sha512" + switch len(hash) { + case 128: + hashType = "sha512" + case 64: + hashType = "sha256" + case 40: + hashType = "sha1" + case 32: + hashType = "md5" + } + + // need the name of the file without "SOURCES/": + fileName := strings.Split(path, "/")[1] + + // Feed our template info to ProcessUrl and transform to the real values: ( {{.Name}}, {{.Branch}}, {{.Hash}}, {{.Hashtype}}, {{.Filename}} ) + url, err = ProcessUrl(pd.CdnUrl, md.Name, branchName, hash, hashType, fileName) + if err != nil { + return fmt.Errorf("Could not process CDN URL template(s) in string ( {{ .Variable }} )") + } + + var req *http.Request + var resp *http.Response + + // Download the --cdn-url given, but *only* if it contains template strings ( {{.Name}} , {{.Hash}} , etc. ) + // Otherwise we need to fall back to the traditional cdn-url patterns + if strings.Contains(pd.CdnUrl, "{{") && strings.Contains(pd.CdnUrl, "}}") { + pd.Log.Printf("downloading %s", url) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return fmt.Errorf("could not create new http request: %v", err) } + req.Header.Set("Accept-Encoding", "*") - // need the name of the file without "SOURCES/": - fileName := strings.Split(path, "/")[1] - - // Alt. lookaside url is of the form: / / / / / - url = fmt.Sprintf("%s/%s/%s/%s/%s/%s", pd.CdnUrl, md.Name, fileName, hashType, hash, fileName) + resp, err = client.Do(req) + if err != nil { + return fmt.Errorf("could not download dist-git file: %v", err) + } } - pd.Log.Printf("downloading %s", url) - - req, err := http.NewRequest("GET", url, nil) - if err != nil { - return fmt.Errorf("could not create new http request: %v", err) + // Default cdn-url: If we don't have a templated download string, try the default /// pattern: + if resp == nil || resp.StatusCode != http.StatusOK { + url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash) + pd.Log.Printf("Attempting default URL: %s", url) + req, err = http.NewRequest("GET", url, nil) + if err != nil { + return fmt.Errorf("could not create new http request: %v", err) + } + req.Header.Set("Accept-Encoding", "*") + resp, err = client.Do(req) + if err != nil { + return fmt.Errorf("could not download dist-git file: %v", err) + } } - req.Header.Set("Accept-Encoding", "*") - resp, err := client.Do(req) - if err != nil { - return fmt.Errorf("could not download dist-git file: %v", err) - } - if resp.StatusCode != http.StatusOK { + // If the default URL fails, we have one more pattern to try. The simple / pattern + // If this one fails, we are truly lost, and have to bail out w/ an error: + if resp == nil || resp.StatusCode != http.StatusOK { url = fmt.Sprintf("%s/%s", pd.CdnUrl, hash) + pd.Log.Printf("Attempting 2nd fallback URL: %s", url) req, err = http.NewRequest("GET", url, nil) if err != nil { return fmt.Errorf("could not create new http request: %v", err) @@ -458,3 +485,34 @@ func (g *GitMode) ImportName(pd *data.ProcessData, md *data.ModeData) string { return strings.Replace(strings.TrimPrefix(md.TagBranch, "refs/heads/"), "%", "_", -1) } + +// Given a cdnUrl string as input, return same string, but with substituted +// template values ( {{.Name}} , {{.Hash}}, {{.Filename}}, etc. ) +func ProcessUrl(cdnUrl string, name string, branch string, hash string, hashtype string, filename string) (string, error) { + + // These 5 {{ .Value }} items are possible in our templated string: + type Lookaside struct { + Name string + Branch string + Hash string + Hashtype string + Filename string + } + + tmpUrl := Lookaside{name, branch, hash, hashtype, filename} + + tmpl, err := template.New("").Parse(cdnUrl) + if err != nil { + panic(err) + } + + var result bytes.Buffer + err = tmpl.Execute(&result, tmpUrl) + + if err != nil { + panic(err) + } + + return result.String(), nil + +} diff --git a/pkg/srpmproc/process.go b/pkg/srpmproc/process.go index 4712f74..5bc47af 100644 --- a/pkg/srpmproc/process.go +++ b/pkg/srpmproc/process.go @@ -101,8 +101,13 @@ type ProcessDataRequest struct { PackageVersion string PackageRelease string - TaglessMode bool - AltLookAside bool + TaglessMode bool + Cdn string +} + +type LookasidePath struct { + Distro string + Url string } func gitlabify(str string) string { @@ -113,7 +118,29 @@ func gitlabify(str string) string { return strings.Replace(str, "+", "plus", -1) } +// List of distros and their lookaside patterns +// If we find one of these passed as --cdn (ex: "--cdn fedora"), then we override, and assign this URL to be our --cdn-url +func StaticLookasides() []LookasidePath { + + centos := LookasidePath{Distro: "centos", Url: "https://git.centos.org/sources/{{.Name}}/{{.Branch}}/{{.Hash}}"} + centosStream := LookasidePath{Distro: "centos-stream", Url: "https://sources.stream.centos.org/sources/rpms/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}"} + rocky8 := LookasidePath{Distro: "rocky8", Url: "https://rocky-linux-sources-staging.a1.rockylinux.org/{{.Hash}}"} + rocky := LookasidePath{Distro: "rocky", Url: "https://sources.build.resf.org/{{.Hash}}"} + fedora := LookasidePath{Distro: "fedora", Url: "https://src.fedoraproject.org/repo/pkgs/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}"} + + return []LookasidePath{centos, centosStream, rocky8, rocky, fedora} + +} + func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) { + + // Build the logger to use for the data import + var writer io.Writer = os.Stdout + if req.LogWriter != nil { + writer = req.LogWriter + } + logger := log.New(writer, "", log.LstdFlags) + // Set defaults if req.ModulePrefix == "" { req.ModulePrefix = ModulePrefixCentOS @@ -139,13 +166,29 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) { if req.BranchPrefix == "" { req.BranchPrefix = "r" } - if req.CdnUrl == "" && !req.AltLookAside { + if req.CdnUrl == "" { req.CdnUrl = "https://git.centos.org/sources" } - // If altlookaside is enabled, and the CdnUrl hasn't been changed, then automatically set it to the default - // CentOS Stream (the new pattern very much won't work with the old git.centos.org/sources site) - if (req.CdnUrl == "https://git.centos.org/sources" || req.CdnUrl == "") && req.AltLookAside { - req.CdnUrl = "https://sources.stream.centos.org/sources/rpms" + + // If a Cdn distro is defined, loop through StaticLookasides() array of structs, + // see if we have a match to --cdn (matching values are things like fedora, centos, rocky8, etc.) + // If we match, then we want to short-circuit the CdnUrl to the assigned distro's one + if req.Cdn != "" { + + var foundDistro = false + + for _, distro := range StaticLookasides() { + if distro.Distro == strings.ToLower(req.Cdn) { + foundDistro = true + req.CdnUrl = distro.Url + logger.Printf("Discovered --cdn distro: %s . Using override CDN URL Pattern: %s", distro.Distro, req.CdnUrl) + break + } + } + + if foundDistro == false { + return nil, fmt.Errorf("Error, distro name given as --cdn argument is not valid.") + } } // Validate required @@ -214,12 +257,6 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) { reqFsCreator = req.FsCreator } - var writer io.Writer = os.Stdout - if req.LogWriter != nil { - writer = req.LogWriter - } - logger := log.New(writer, "", log.LstdFlags) - if req.TmpFsMode != "" { logger.Printf("using tmpfs dir: %s", req.TmpFsMode) fsCreator = func(branch string) (billy.Filesystem, error) { @@ -276,7 +313,7 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) { PackageVersion: req.PackageVersion, PackageRelease: req.PackageRelease, TaglessMode: req.TaglessMode, - AltLookAside: req.AltLookAside, + Cdn: req.Cdn, }, nil } @@ -334,6 +371,7 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) { list, err := remote.List(&git.ListOptions{ Auth: pd.Authenticator, }) + if err != nil { log.Println("ignoring no-dup-mode") } else { @@ -367,6 +405,14 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) { } } + // If we have no valid branches to consider, then we'll automatically switch to attempt a tagless import: + if len(md.Branches) <= 0 { + log.Println("No valid tags (refs/tags/imports/*) found in repository! Switching to perform a tagless import.") + pd.TaglessMode = true + result, err := processRPMTagless(pd) + return result, err + } + for _, branch := range md.Branches { md.Repo = &sourceRepo md.Worktree = &sourceWorktree