From f339965767350d1aca704a142ddf8c87aa8d0b76 Mon Sep 17 00:00:00 2001 From: Skip Grube Date: Thu, 8 Sep 2022 22:45:45 -0400 Subject: [PATCH] More tagless + altlookaside mode work - Completely functional now - pushes and lookaside downloads/uploads work properly - Limitation: no modules (yet, that's next) - Limitation: Tagless imports only pull the latest head from a branch -Skip G. --- pkg/modes/git.go | 134 +++++++++++++++++--------- pkg/srpmproc/process.go | 209 +++++++++++++++++++++++++++++++++------- 2 files changed, 264 insertions(+), 79 deletions(-) diff --git a/pkg/modes/git.go b/pkg/modes/git.go index d0de1a5..94d318a 100644 --- a/pkg/modes/git.go +++ b/pkg/modes/git.go @@ -139,7 +139,13 @@ func (g *GitMode) RetrieveSource(pd *data.ProcessData) (*data.ModeData, error) { _tmpRef := strings.Split(refSpec, "/") _branchName := _tmpRef[(len(_tmpRef) - 1)] - + // In the case of "strict branch mode" on, the branch name must match *exactly* with our prefix-version-suffix (like "c8" cannot also match "c8-beta") + // If it doesn't, bail out without adding this branch + if pd.StrictBranchMode == true && _branchName != fmt.Sprintf("%s%d%s", pd.ImportBranchPrefix, pd.Version, pd.BranchSuffix) { + return nil + } + + latestTags[_branchName] = &remoteTarget{ remote: refSpec, when: tag.Tagger.When, @@ -228,56 +234,73 @@ func (g *GitMode) RetrieveSource(pd *data.ProcessData) (*data.ModeData, error) { } func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error { + remote, err := md.Repo.Remote("upstream") - if err != nil { - return fmt.Errorf("could not get upstream remote: %v", err) + + if err != nil && pd.TaglessMode == false { + return fmt.Errorf("could not get upstream remote: %v", err) } + var refspec config.RefSpec var branchName string - if strings.HasPrefix(md.TagBranch, "refs/heads") { - refspec = config.RefSpec(fmt.Sprintf("+%s:%s", md.TagBranch, md.TagBranch)) - branchName = strings.TrimPrefix(md.TagBranch, "refs/heads/") - } else { - match := misc.GetTagImportRegex(pd).FindStringSubmatch(md.TagBranch) - branchName = match[2] - refspec = config.RefSpec(fmt.Sprintf("+refs/heads/%s:%s", branchName, md.TagBranch)) - } - pd.Log.Printf("checking out upstream refspec %s", refspec) - fetchOpts := &git.FetchOptions{ - Auth: pd.Authenticator, - RemoteName: "upstream", - RefSpecs: []config.RefSpec{refspec}, - Tags: git.AllTags, - Force: true, - } - err = remote.Fetch(fetchOpts) - if err != nil && err != git.NoErrAlreadyUpToDate { - if err == transport.ErrInvalidAuthMethod || err == transport.ErrAuthenticationRequired { - fetchOpts.Auth = nil - err = remote.Fetch(fetchOpts) - if err != nil && err != git.NoErrAlreadyUpToDate { - return fmt.Errorf("could not fetch upstream: %v", err) - } - } else { - return fmt.Errorf("could not fetch upstream: %v", err) - } - } + fmt.Printf("pd.AltLookaside == %v , pd.CdnUrl == %s \n", pd.AltLookAside, pd.CdnUrl) + - err = md.Worktree.Checkout(&git.CheckoutOptions{ - Branch: plumbing.ReferenceName(md.TagBranch), - Force: true, - }) - if err != nil { - return fmt.Errorf("could not checkout source from git: %v", err) - } + // In the case of tagless mode, we already have the transformed repo sitting in the worktree, + // and don't need to perform any checkout or fetch operations + if pd.TaglessMode == false { + if strings.HasPrefix(md.TagBranch, "refs/heads") { + refspec = config.RefSpec(fmt.Sprintf("+%s:%s", md.TagBranch, md.TagBranch)) + branchName = strings.TrimPrefix(md.TagBranch, "refs/heads/") + } else { + match := misc.GetTagImportRegex(pd).FindStringSubmatch(md.TagBranch) + branchName = match[2] + refspec = config.RefSpec(fmt.Sprintf("+refs/heads/%s:%s", branchName, md.TagBranch)) + fmt.Println("Found branchname that does not start w/ refs/heads :: ", branchName) + } + pd.Log.Printf("checking out upstream refspec %s", refspec) + + + fetchOpts := &git.FetchOptions{ + Auth: pd.Authenticator, + RemoteName: "upstream", + RefSpecs: []config.RefSpec{refspec}, + Tags: git.AllTags, + Force: true, + } + err = remote.Fetch(fetchOpts) + if err != nil && err != git.NoErrAlreadyUpToDate { + if err == transport.ErrInvalidAuthMethod || err == transport.ErrAuthenticationRequired { + fetchOpts.Auth = nil + err = remote.Fetch(fetchOpts) + if err != nil && err != git.NoErrAlreadyUpToDate { + return fmt.Errorf("could not fetch upstream: %v", err) + } + } else { + return fmt.Errorf("could not fetch upstream: %v", err) + } + } - _, err = md.Worktree.Add(".") - if err != nil { - return fmt.Errorf("could not add Worktree: %v", err) - } + err = md.Worktree.Checkout(&git.CheckoutOptions{ + Branch: plumbing.ReferenceName(md.TagBranch), + Force: true, + }) + if err != nil { + return fmt.Errorf("could not checkout source from git: %v", err) + } + _, err = md.Worktree.Add(".") + if err != nil { + return fmt.Errorf("could not add Worktree: %v", err) + } + } + + if pd.TaglessMode == true { + branchName = fmt.Sprintf("%s%d%s", pd.ImportBranchPrefix, pd.Version, pd.BranchSuffix) + } + metadataPath := "" ls, err := md.Worktree.Filesystem.ReadDir(".") if err != nil { @@ -335,7 +358,32 @@ func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error { body = fromBlobStorage pd.Log.Printf("downloading %s from blob storage", hash) } else { - url := fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash) + + url := "" + // Alternate lookaside logic: if enabled, we pull from a new URL pattern + if pd.AltLookAside == false { + url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash) + } else { + // We first need the hash algorithm based on length of hash: + hashType := "sha512" + switch len(hash) { + case 128: + hashType = "sha512" + case 64: + hashType = "sha256" + case 40: + hashType = "sha1" + case 32: + hashType = "md5" + } + + // need the name of the file without "SOURCES/": + fileName := strings.Split(path, "/")[1] + + // Alt. lookaside url is of the form: / / / / / + url = fmt.Sprintf("%s/%s/%s/%s/%s/%s", pd.CdnUrl, md.Name, fileName, hashType, hash, fileName) + } + pd.Log.Printf("downloading %s", url) req, err := http.NewRequest("GET", url, nil) diff --git a/pkg/srpmproc/process.go b/pkg/srpmproc/process.go index 1a36abd..68665dd 100644 --- a/pkg/srpmproc/process.go +++ b/pkg/srpmproc/process.go @@ -25,6 +25,7 @@ import ( "fmt" "github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5/osfs" + "github.com/go-git/go-git/v5/plumbing/format/gitignore" "github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/plumbing/transport/http" "github.com/go-git/go-git/v5/plumbing/transport/ssh" @@ -137,9 +138,14 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) { if req.BranchPrefix == "" { req.BranchPrefix = "r" } - if req.CdnUrl == "" { + if req.CdnUrl == "" && req.AltLookAside == false { req.CdnUrl = "https://git.centos.org/sources" } + // If altlookaside is enabled, and the CdnUrl hasn't been changed, then automatically set it to the default + // CentOS Stream (the new pattern very much won't work with the old git.centos.org/sources site) + if (req.CdnUrl == "https://git.centos.org/sources" || req.CdnUrl == "") && req.AltLookAside == true { + req.CdnUrl = "https://sources.stream.centos.org/sources/rpms" + } // Validate required if req.Package == "" { @@ -657,12 +663,6 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) { pushRefspecs = append(pushRefspecs, config.RefSpec("HEAD:"+plumbing.NewTagReferenceName(newTag))) - - - fmt.Printf("pushRefspecs == %+v \n", pushRefspecs) - fmt.Println("blah blah blah") - - err = repo.Push(&git.PushOptions{ RemoteName: "origin", @@ -690,25 +690,30 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) { func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) { pd.Log.Println("Tagless mode detected, attempting import of latest commit") + // In tagless mode, we *automatically* set StrictBranchMode to true + // Only the exact branch should be pulled from the source repo + pd.StrictBranchMode = true + + // our return values: a mapping of branches -> commits (1:1) that we're bringing in, + // and a mapping of branches to: version = X, release = Y + latestHashForBranch := map[string]string{} + versionForBranch := map[string]*srpmprocpb.VersionRelease{} + + md, err := pd.Importer.RetrieveSource(pd) if err != nil { pd.Log.Println("Error detected in RetrieveSource!") return nil, err } - log.Printf("%+v\n", md) md.BlobCache = map[string][]byte{} // TODO: add tagless module support - remotePrefix := "rpms" if pd.ModuleMode { remotePrefix = "modules" } - - - // already uploaded blobs are skipped - // var alreadyUploadedBlobs []string + // Set up our remote URL for pushing our repo to var tagIgnoreList []string @@ -745,14 +750,10 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error } - sourceRepo := *md.Repo sourceWorktree := *md.Worktree - - localPath := "" - for _, branch := range md.Branches { md.Repo = &sourceRepo md.Worktree = &sourceWorktree @@ -784,25 +785,29 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error // We want sources to become .PKGNAME.metadata, we want SOURCES and SPECS folders, etc. repoFixed, _ := convertLocalRepo(md.Name, localPath) if !repoFixed { - pd.Log.Println("Error converting repository into SOURCES + SPECS + .package.metadata format") return nil, fmt.Errorf("Error converting repository into SOURCES + SPECS + .package.metadata format") } - rpmVersion := getVersionFromSpec(md.Name, localPath, pd.Version) - if rpmVersion == "" { + nvrString := getVersionFromSpec(md.Name, localPath, pd.Version) + if nvrString == "" { return nil, fmt.Errorf("Error using rpm or rpmbuild to build SRPM and determine version info! (tagless mode)") } + // Set version and release fields we extracted (name|version|release are separated by pipes) + pd.PackageVersion = strings.Split(nvrString, "|")[1] + pd.PackageRelease = strings.Split(nvrString, "|")[2] + + + // Set full rpm version: name-version-release (for tagging properly) + rpmVersion := fmt.Sprintf("%s-%s-%s", md.Name, pd.PackageVersion, pd.PackageRelease) + + pd.Log.Println("Successfully determined version of tagless checkout: ", rpmVersion) - - md.PushBranch = fmt.Sprintf("%s%d%s", pd.BranchPrefix, pd.Version, pd.BranchSuffix) - - - + // Make an initial repo we will use to push to our target pushRepo, err := git.PlainInit(localPath + "_gitpush", false) if err != nil { @@ -858,9 +863,43 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error os.Rename(fmt.Sprintf("%s/.%s.metadata", localPath, md.Name ), fmt.Sprintf("%s_gitpush/.%s.metadata", localPath, md.Name) ) + + md.Repo = pushRepo + md.Worktree = w + + // Download lookaside sources (tarballs) into the push git repo: + err = pd.Importer.WriteSource(pd, md) + if err != nil { + return nil, err + } + + + // Call function to upload source to target lookaside and + // ensure the sources are added to .gitignore + + err = processLookasideSources(pd, md, localPath + "_gitpush") + if err != nil { + return nil, err + } + + + // Apply patch(es) if needed: + if pd.ModuleMode { + err := patchModuleYaml(pd, md) + if err != nil { + return nil, err + } + } else { + err := executePatchesRpm(pd, md) + if err != nil { + return nil, err + } + } + + + err = w.AddWithOptions(&git.AddOptions{All: true} ) if err != nil { - fmt.Printf("ERROR == %v \n", err) return nil, fmt.Errorf("Error adding SOURCES/ , SPECS/ or .metadata file to commit list.") } @@ -891,7 +930,7 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error if newRepo == true { pushRefspecs = append(pushRefspecs, config.RefSpec("*:*")) - pd.Log.Printf("Looks like a new remote repo, committing all local objects to new remote branch") + pd.Log.Printf("New remote repo detected, creating new remote branch") } @@ -952,21 +991,31 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error return nil, fmt.Errorf("could not push to remote: %v", err) } - - - - // Clean up temporary path after succesful import (disabled during development) - /* if err := os.RemoveAll(localPath); err != nil { log.Printf("Error cleaning up temporary git checkout directory %s . Non-fatal, continuing anyway...\n", localPath) } if err := os.RemoveAll(fmt.Sprintf("%s_gitpush", localPath)); err != nil { log.Printf("Error cleaning up temporary git checkout directory %s . Non-fatal, continuing anyway...\n", fmt.Sprintf("%s_gitpush", localPath)) } - */ + + // append our processed branch to the return structures: + latestHashForBranch[md.PushBranch] = obj.Hash.String() + + versionForBranch[md.PushBranch] = &srpmprocpb.VersionRelease{ + Version: pd.PackageVersion, + Release: pd.PackageRelease, + } + } - return nil, nil + fmt.Printf("returning::\n latestHashForBranch == %+v \n\n versionForBranch == %+v\n\n", latestHashForBranch, versionForBranch) + + // return struct with all our branch:commit and branch:version+release mappings + return &srpmprocpb.ProcessResponse{ + BranchCommits: latestHashForBranch, + BranchVersions: versionForBranch, + }, nil + } @@ -994,7 +1043,7 @@ func convertLocalRepo(pkgName string, localRepo string) (bool, error) { for _, file := range files { // We don't want to process SOURCES, SPECS, or any of our .git folders - if file.Name() == "SOURCES" || file.Name() == "SPECS" || strings.HasPrefix(file.Name(), ".git") { + if file.Name() == "SOURCES" || file.Name() == "SPECS" || strings.HasPrefix(file.Name(), ".git") || file.Name() == "." + pkgName + ".metadata" { continue } @@ -1181,7 +1230,7 @@ func getVersionFromSpec(pkgName string, localRepo string, majorVersion int) (str // Call the rpm binary to extract the version-release info out of it, and tack on ".el" at the end: - cmd = exec.Command("rpm", "-qp", "--qf", `%{NAME}-%{VERSION}-%{RELEASE}\n`, fmt.Sprintf("%s/SRPMS/%s", rpmBuildPath, srpmFile) ) + cmd = exec.Command("rpm", "-qp", "--qf", `%{NAME}|%{VERSION}|%{RELEASE}\n`, fmt.Sprintf("%s/SRPMS/%s", rpmBuildPath, srpmFile) ) nvrTmp, err := cmd.CombinedOutput() if err != nil { log.Println("Error running rpm command to extract temporary SRPM name-version-release identifiers.") @@ -1207,6 +1256,94 @@ func getVersionFromSpec(pkgName string, localRepo string, majorVersion int) (str +// We need to loop through the lookaside blob files ("SourcesToIgnore"), +// and upload them to our target storage (usually an S3 bucket, but could be a local folder) +// +// We also need to add the source paths to .gitignore in the git repo, so we don't accidentally commit + push them +func processLookasideSources(pd *data.ProcessData, md *data.ModeData, localDir string) (error) { + + w := md.Worktree + metadata, err := w.Filesystem.Create(fmt.Sprintf(".%s.metadata", md.Name) ) + if err != nil { + return fmt.Errorf("could not create metadata file: %v", err) + } + + // Keep track of files we've already uploaded - don't want duplicates! + var alreadyUploadedBlobs []string + + + gitIgnore, err := os.OpenFile(fmt.Sprintf("%s/.gitignore", localDir), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + + + for _, source := range md.SourcesToIgnore { + + sourcePath := source.Name + _, err := w.Filesystem.Stat(sourcePath) + if source.Expired || err != nil { + continue + } + + sourceFile, err := w.Filesystem.Open(sourcePath) + if err != nil { + return fmt.Errorf("could not open ignored source file %s: %v", sourcePath, err) + } + sourceFileBts, err := ioutil.ReadAll(sourceFile) + if err != nil { + return fmt.Errorf("could not read the whole of ignored source file: %v", err) + } + + source.HashFunction.Reset() + _, err = source.HashFunction.Write(sourceFileBts) + if err != nil { + return fmt.Errorf("could not write bytes to hash function: %v", err) + } + checksum := hex.EncodeToString(source.HashFunction.Sum(nil)) + checksumLine := fmt.Sprintf("%s %s\n", checksum, sourcePath) + _, err = metadata.Write([]byte(checksumLine)) + if err != nil { + return fmt.Errorf("could not write to metadata file: %v", err) + } + + if data.StrContains(alreadyUploadedBlobs, checksum) { + continue + } + exists, err := pd.BlobStorage.Exists(checksum) + if err != nil { + return err + } + if !exists && !pd.NoStorageUpload { + err := pd.BlobStorage.Write(checksum, sourceFileBts) + if err != nil { + return err + } + pd.Log.Printf("wrote %s to blob storage", checksum) + } + alreadyUploadedBlobs = append(alreadyUploadedBlobs, checksum) + + // Add this SOURCES/ lookaside file to be excluded + w.Excludes = append(w.Excludes, gitignore.ParsePattern(sourcePath, nil) ) + + // Append the SOURCES/ path to .gitignore: + _, err = gitIgnore.Write([]byte(fmt.Sprintf("%s\n", sourcePath)) ) + if err != nil { + return err + } + + } + + + err = gitIgnore.Close() + if err != nil { + return err + } + + return nil + +} +