Updates to add flexibility to lookasides and tagless handling:

- Added template "macros" to allow for complex --cdn-url specifications ( {{.Name}}, {{.Branch}}, etc.)
- Added --cdn <distro> option with pre-set URL patterns to simplify use
- Kept default fallback behavior and search patterns of --cdn-url if templates are not used
- Removed "--altlookaside" option, as the above features make it redundant

- Automatic tagless:  If a proper git version/imports tag isn't found, a "tagless-mode" import will be attempted automatically at run-time
- taglessmode command line option kept in case user still wants to force a tagless import

-Skip Grube
This commit is contained in:
Skip Grube 2023-02-22 23:23:47 -05:00
parent 06ffd1e507
commit 3237794071
No known key found for this signature in database
GPG Key ID: D391F8393BEA6D9C
5 changed files with 194 additions and 54 deletions

View File

@ -1,14 +1,14 @@
# srpmproc
Upstream package importer with auto patching. Reference implementation for OpenPatch
# Usage
## Usage
```
Usage:
srpmproc [flags]
srpmproc [command]
Available Commands:
fetch
fetch
help Help about any command
Flags:
@ -16,7 +16,8 @@ Flags:
--basic-username string Basic auth username
--branch-prefix string Branch prefix (replaces import-branch-prefix) (default "r")
--branch-suffix string Branch suffix to use for imported branches
--cdn-url string CDN URL to download blobs from (default "https://git.centos.org/sources")
--cdn string CDN URL shortcuts for well-known distros, auto-assigns --cdn-url. Valid values: rocky8, rocky, fedora, centos, centos-stream. Setting this overrides --cdn-url
--cdn-url string CDN URL to download blobs from. Simple URL follows default rocky/centos patterns. Can be customized using macros (see docs) (default "https://git.centos.org/sources")
--git-committer-email string Email of committer (default "rockyautomation@rockylinux.org")
--git-committer-name string Name of committer (default "rockyautomation")
-h, --help help for srpmproc
@ -28,6 +29,8 @@ Flags:
--no-dup-mode If enabled, skips already imported tags
--no-storage-download If enabled, blobs are always downloaded from upstream
--no-storage-upload If enabled, blobs are not uploaded to blob storage
--package-release string Package release to fetch
--package-version string Package version to fetch
--rpm-prefix string Where to retrieve SRPM content. Only used when source-rpm is not a local file (default "https://git.centos.org/rpms")
--single-tag string If set, only this tag is imported
--source-rpm string Location of RPM to process
@ -35,9 +38,42 @@ Flags:
--ssh-user string SSH User (default "git")
--storage-addr string Bucket to use as blob storage
--strict-branch-mode If enabled, only branches with the calculated name are imported and not prefix only
--taglessmode Tagless mode: If set, pull the latest commit from the branch and determine version numbers from spec file. This is auto-tried if tags aren't found.
--tmpfs-mode string If set, packages are imported to path and patched but not pushed
--upstream-prefix string Upstream git repository prefix
--version int Upstream version
Use "srpmproc [command] --help" for more information about a command.
```
<br />
## Examples:
1. Import the kernel package from git.centos.org/rpms/, to local folder /opt/gitroot/rpms/kernel.git/ . Download the lookaside source tarballs from the default CentOS file server location to local folder `/opt/fake_s3/` . We want to grab branch "c8" (import prefix plus RHEL version), and it will be committed as branch "r8" (branch prefix plus RHEL version). This assumes that `/opt/fake_s3` exists, and `/opt/gitroot/rpms/kernel.git` exists and is a git repository of some kind (even an empty one).
```
srpmproc --branch-prefix "r" --import-branch-prefix "c" --rpm-prefix "https://git.centos.org/rpms" --version 8 --storage-addr file:///opt/fake_s3 --upstream-prefix file:///opt/gitroot --cdn centos --strict-branch-mode --source-rpm kernel
```
<br />
## CDN and --cdn-url
The --cdn-url option allows for Go-style templates to craft complex URL patterns. These templates are: `{{.Name}}` (package name), `{{.Hash}}` (hash of lookaside file), `{{.Hashtype}}` (hash type of file, like "sha256" or "sha512"), `{{.Branch}}` (the branch we are importing), and `{{.Filename}}` (the lookaside file's name as it appears in SOURCES/). You can add these values as part of --cdn-url to craft your lookaside pattern.
For example, if I wanted my lookaside downloads to come from CentOS 9 Stream, I would use as part of my command:
```
--cdn-url "https://sources.stream.centos.org/sources/rpms/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}"
```
**Default Behavior:** If these templates are not used, the default behavior of `--cdn-url` is to fall back on the traditional RHEL import pattern: `<CDN_URL>/<NAME>/<BRANCH>/<HASH>` . If that fails, a further fallback is attempted, the simple: `<CDN_URL>/<HASH>`. These cover the common Rocky Linux and RHEL/CentOS imports if the base lookaside URL is the only thing given. If no `--cdn-url` is specified, it defaults to "https://git.centos.org/sources" (for RHEL imports into Rocky Linux)
**CDN Shorthand:** For convenience, some lookaside patterns for popular distros are provided via the `--cdn` option. You can specify this without needing to use the longer `--cdn-url`. For example, when importing from CentOS 9 Stream, you could use `--cdn centos-stream`

View File

@ -58,7 +58,7 @@ var (
packageVersion string
packageRelease string
taglessMode bool
altLookAside bool
cdn string
)
var root = &cobra.Command{
@ -96,7 +96,7 @@ func mn(_ *cobra.Command, _ []string) {
PackageVersion: packageVersion,
PackageRelease: packageRelease,
TaglessMode: taglessMode,
AltLookAside: altLookAside,
Cdn: cdn,
})
if err != nil {
log.Fatal(err)
@ -131,7 +131,7 @@ func main() {
root.Flags().StringVar(&rpmPrefix, "rpm-prefix", "https://git.centos.org/rpms", "Where to retrieve SRPM content. Only used when source-rpm is not a local file")
root.Flags().StringVar(&importBranchPrefix, "import-branch-prefix", "c", "Import branch prefix")
root.Flags().StringVar(&branchPrefix, "branch-prefix", "r", "Branch prefix (replaces import-branch-prefix)")
root.Flags().StringVar(&cdnUrl, "cdn-url", "https://git.centos.org/sources", "CDN URL to download blobs from")
root.Flags().StringVar(&cdnUrl, "cdn-url", "https://git.centos.org/sources", "CDN URL to download blobs from. Simple URL follows default rocky/centos patterns. Can be customized using macros (see docs)")
root.Flags().StringVar(&singleTag, "single-tag", "", "If set, only this tag is imported")
root.Flags().BoolVar(&noDupMode, "no-dup-mode", false, "If enabled, skips already imported tags")
root.Flags().BoolVar(&moduleMode, "module-mode", false, "If enabled, imports a module instead of a package")
@ -146,8 +146,8 @@ func main() {
root.Flags().StringVar(&basicPassword, "basic-password", "", "Basic auth password")
root.Flags().StringVar(&packageVersion, "package-version", "", "Package version to fetch")
root.Flags().StringVar(&packageRelease, "package-release", "", "Package release to fetch")
root.Flags().BoolVar(&taglessMode, "taglessmode", false, "Tagless mode: If set, pull the latest commit from a branch, and determine version info from spec file (aka upstream versions aren't tagged)")
root.Flags().BoolVar(&altLookAside, "altlookaside", false, "If set, uses the new CentOS Stream lookaside pattern (https://<SITE_PREFIX>/<RPM_NAME>/<FILE_NAME>/<SHA_VERSION>/<SHA_SUM>/<FILE_NAME>)")
root.Flags().BoolVar(&taglessMode, "taglessmode", false, "Tagless mode: If set, pull the latest commit from the branch and determine version numbers from spec file. This is auto-tried if tags aren't found.")
root.Flags().StringVar(&cdn, "cdn", "", "CDN URL shortcuts for well-known distros, auto-assigns --cdn-url. Valid values: rocky8, rocky, fedora, centos, centos-stream. Setting this overrides --cdn-url")
if err := root.Execute(); err != nil {
log.Fatal(err)

View File

@ -59,5 +59,5 @@ type ProcessData struct {
PackageVersion string
PackageRelease string
TaglessMode bool
AltLookAside bool
Cdn string
}

View File

@ -21,12 +21,14 @@
package modes
import (
"bytes"
"fmt"
"io/ioutil"
"net/http"
"path/filepath"
"sort"
"strings"
"text/template"
"time"
"github.com/go-git/go-git/v5/plumbing/transport"
@ -341,44 +343,69 @@ func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error {
} else {
url := ""
// Alternate lookaside logic: if enabled, we pull from a new URL pattern
if !pd.AltLookAside {
url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash)
} else {
// We first need the hash algorithm based on length of hash:
hashType := "sha512"
switch len(hash) {
case 128:
hashType = "sha512"
case 64:
hashType = "sha256"
case 40:
hashType = "sha1"
case 32:
hashType = "md5"
// We need to figure out the hashtype for templating purposes:
hashType := "sha512"
switch len(hash) {
case 128:
hashType = "sha512"
case 64:
hashType = "sha256"
case 40:
hashType = "sha1"
case 32:
hashType = "md5"
}
// need the name of the file without "SOURCES/":
fileName := strings.Split(path, "/")[1]
// Feed our template info to ProcessUrl and transform to the real values: ( {{.Name}}, {{.Branch}}, {{.Hash}}, {{.Hashtype}}, {{.Filename}} )
url, err = ProcessUrl(pd.CdnUrl, md.Name, branchName, hash, hashType, fileName)
if err != nil {
return fmt.Errorf("Could not process CDN URL template(s) in string ( {{ .Variable }} )")
}
var req *http.Request
var resp *http.Response
// Download the --cdn-url given, but *only* if it contains template strings ( {{.Name}} , {{.Hash}} , etc. )
// Otherwise we need to fall back to the traditional cdn-url patterns
if strings.Contains(pd.CdnUrl, "{{") && strings.Contains(pd.CdnUrl, "}}") {
pd.Log.Printf("downloading %s", url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("could not create new http request: %v", err)
}
req.Header.Set("Accept-Encoding", "*")
// need the name of the file without "SOURCES/":
fileName := strings.Split(path, "/")[1]
// Alt. lookaside url is of the form: <cdn> / <name> / <filename> / <hashtype> / <hash> / <filename>
url = fmt.Sprintf("%s/%s/%s/%s/%s/%s", pd.CdnUrl, md.Name, fileName, hashType, hash, fileName)
resp, err = client.Do(req)
if err != nil {
return fmt.Errorf("could not download dist-git file: %v", err)
}
}
pd.Log.Printf("downloading %s", url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("could not create new http request: %v", err)
// Default cdn-url: If we don't have a templated download string, try the default <SITE>/<PKG>/<BRANCH>/<HASH> pattern:
if resp == nil || resp.StatusCode != http.StatusOK {
url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash)
pd.Log.Printf("Attempting default URL: %s", url)
req, err = http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("could not create new http request: %v", err)
}
req.Header.Set("Accept-Encoding", "*")
resp, err = client.Do(req)
if err != nil {
return fmt.Errorf("could not download dist-git file: %v", err)
}
}
req.Header.Set("Accept-Encoding", "*")
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("could not download dist-git file: %v", err)
}
if resp.StatusCode != http.StatusOK {
// If the default URL fails, we have one more pattern to try. The simple <SITE>/<HASH> pattern
// If this one fails, we are truly lost, and have to bail out w/ an error:
if resp == nil || resp.StatusCode != http.StatusOK {
url = fmt.Sprintf("%s/%s", pd.CdnUrl, hash)
pd.Log.Printf("Attempting 2nd fallback URL: %s", url)
req, err = http.NewRequest("GET", url, nil)
if err != nil {
return fmt.Errorf("could not create new http request: %v", err)
@ -458,3 +485,34 @@ func (g *GitMode) ImportName(pd *data.ProcessData, md *data.ModeData) string {
return strings.Replace(strings.TrimPrefix(md.TagBranch, "refs/heads/"), "%", "_", -1)
}
// Given a cdnUrl string as input, return same string, but with substituted
// template values ( {{.Name}} , {{.Hash}}, {{.Filename}}, etc. )
func ProcessUrl(cdnUrl string, name string, branch string, hash string, hashtype string, filename string) (string, error) {
// These 5 {{ .Value }} items are possible in our templated string:
type Lookaside struct {
Name string
Branch string
Hash string
Hashtype string
Filename string
}
tmpUrl := Lookaside{name, branch, hash, hashtype, filename}
tmpl, err := template.New("").Parse(cdnUrl)
if err != nil {
panic(err)
}
var result bytes.Buffer
err = tmpl.Execute(&result, tmpUrl)
if err != nil {
panic(err)
}
return result.String(), nil
}

View File

@ -101,8 +101,13 @@ type ProcessDataRequest struct {
PackageVersion string
PackageRelease string
TaglessMode bool
AltLookAside bool
TaglessMode bool
Cdn string
}
type LookasidePath struct {
Distro string
Url string
}
func gitlabify(str string) string {
@ -113,7 +118,29 @@ func gitlabify(str string) string {
return strings.Replace(str, "+", "plus", -1)
}
// List of distros and their lookaside patterns
// If we find one of these passed as --cdn (ex: "--cdn fedora"), then we override, and assign this URL to be our --cdn-url
func StaticLookasides() []LookasidePath {
centos := LookasidePath{Distro: "centos", Url: "https://git.centos.org/sources/{{.Name}}/{{.Branch}}/{{.Hash}}"}
centosStream := LookasidePath{Distro: "centos-stream", Url: "https://sources.stream.centos.org/sources/rpms/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}"}
rocky8 := LookasidePath{Distro: "rocky8", Url: "https://rocky-linux-sources-staging.a1.rockylinux.org/{{.Hash}}"}
rocky := LookasidePath{Distro: "rocky", Url: "https://sources.build.resf.org/{{.Hash}}"}
fedora := LookasidePath{Distro: "fedora", Url: "https://src.fedoraproject.org/repo/pkgs/{{.Name}}/{{.Filename}}/{{.Hashtype}}/{{.Hash}}/{{.Filename}}"}
return []LookasidePath{centos, centosStream, rocky8, rocky, fedora}
}
func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
// Build the logger to use for the data import
var writer io.Writer = os.Stdout
if req.LogWriter != nil {
writer = req.LogWriter
}
logger := log.New(writer, "", log.LstdFlags)
// Set defaults
if req.ModulePrefix == "" {
req.ModulePrefix = ModulePrefixCentOS
@ -139,13 +166,29 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
if req.BranchPrefix == "" {
req.BranchPrefix = "r"
}
if req.CdnUrl == "" && !req.AltLookAside {
if req.CdnUrl == "" {
req.CdnUrl = "https://git.centos.org/sources"
}
// If altlookaside is enabled, and the CdnUrl hasn't been changed, then automatically set it to the default
// CentOS Stream (the new pattern very much won't work with the old git.centos.org/sources site)
if (req.CdnUrl == "https://git.centos.org/sources" || req.CdnUrl == "") && req.AltLookAside {
req.CdnUrl = "https://sources.stream.centos.org/sources/rpms"
// If a Cdn distro is defined, loop through StaticLookasides() array of structs,
// see if we have a match to --cdn (matching values are things like fedora, centos, rocky8, etc.)
// If we match, then we want to short-circuit the CdnUrl to the assigned distro's one
if req.Cdn != "" {
var foundDistro = false
for _, distro := range StaticLookasides() {
if distro.Distro == strings.ToLower(req.Cdn) {
foundDistro = true
req.CdnUrl = distro.Url
logger.Printf("Discovered --cdn distro: %s . Using override CDN URL Pattern: %s", distro.Distro, req.CdnUrl)
break
}
}
if foundDistro == false {
return nil, fmt.Errorf("Error, distro name given as --cdn argument is not valid.")
}
}
// Validate required
@ -214,12 +257,6 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
reqFsCreator = req.FsCreator
}
var writer io.Writer = os.Stdout
if req.LogWriter != nil {
writer = req.LogWriter
}
logger := log.New(writer, "", log.LstdFlags)
if req.TmpFsMode != "" {
logger.Printf("using tmpfs dir: %s", req.TmpFsMode)
fsCreator = func(branch string) (billy.Filesystem, error) {
@ -276,7 +313,7 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
PackageVersion: req.PackageVersion,
PackageRelease: req.PackageRelease,
TaglessMode: req.TaglessMode,
AltLookAside: req.AltLookAside,
Cdn: req.Cdn,
}, nil
}
@ -334,6 +371,7 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
list, err := remote.List(&git.ListOptions{
Auth: pd.Authenticator,
})
if err != nil {
log.Println("ignoring no-dup-mode")
} else {
@ -367,6 +405,14 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
}
}
// If we have no valid branches to consider, then we'll automatically switch to attempt a tagless import:
if len(md.Branches) <= 0 {
log.Println("No valid tags (refs/tags/imports/*) found in repository! Switching to perform a tagless import.")
pd.TaglessMode = true
result, err := processRPMTagless(pd)
return result, err
}
for _, branch := range md.Branches {
md.Repo = &sourceRepo
md.Worktree = &sourceWorktree