More tagless + altlookaside mode work

- Completely functional now - pushes and lookaside downloads/uploads work properly
- Limitation:  no modules (yet, that's next)
- Limitation:  Tagless imports only pull the latest head from a branch

-Skip G.
This commit is contained in:
Skip Grube 2022-09-08 22:45:45 -04:00
parent d99f126761
commit f339965767
No known key found for this signature in database
GPG Key ID: D391F8393BEA6D9C
2 changed files with 264 additions and 79 deletions

View File

@ -139,7 +139,13 @@ func (g *GitMode) RetrieveSource(pd *data.ProcessData) (*data.ModeData, error) {
_tmpRef := strings.Split(refSpec, "/") _tmpRef := strings.Split(refSpec, "/")
_branchName := _tmpRef[(len(_tmpRef) - 1)] _branchName := _tmpRef[(len(_tmpRef) - 1)]
// In the case of "strict branch mode" on, the branch name must match *exactly* with our prefix-version-suffix (like "c8" cannot also match "c8-beta")
// If it doesn't, bail out without adding this branch
if pd.StrictBranchMode == true && _branchName != fmt.Sprintf("%s%d%s", pd.ImportBranchPrefix, pd.Version, pd.BranchSuffix) {
return nil
}
latestTags[_branchName] = &remoteTarget{ latestTags[_branchName] = &remoteTarget{
remote: refSpec, remote: refSpec,
when: tag.Tagger.When, when: tag.Tagger.When,
@ -228,56 +234,73 @@ func (g *GitMode) RetrieveSource(pd *data.ProcessData) (*data.ModeData, error) {
} }
func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error { func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error {
remote, err := md.Repo.Remote("upstream") remote, err := md.Repo.Remote("upstream")
if err != nil {
return fmt.Errorf("could not get upstream remote: %v", err) if err != nil && pd.TaglessMode == false {
return fmt.Errorf("could not get upstream remote: %v", err)
} }
var refspec config.RefSpec var refspec config.RefSpec
var branchName string var branchName string
if strings.HasPrefix(md.TagBranch, "refs/heads") { fmt.Printf("pd.AltLookaside == %v , pd.CdnUrl == %s \n", pd.AltLookAside, pd.CdnUrl)
refspec = config.RefSpec(fmt.Sprintf("+%s:%s", md.TagBranch, md.TagBranch))
branchName = strings.TrimPrefix(md.TagBranch, "refs/heads/")
} else {
match := misc.GetTagImportRegex(pd).FindStringSubmatch(md.TagBranch)
branchName = match[2]
refspec = config.RefSpec(fmt.Sprintf("+refs/heads/%s:%s", branchName, md.TagBranch))
}
pd.Log.Printf("checking out upstream refspec %s", refspec)
fetchOpts := &git.FetchOptions{
Auth: pd.Authenticator,
RemoteName: "upstream",
RefSpecs: []config.RefSpec{refspec},
Tags: git.AllTags,
Force: true,
}
err = remote.Fetch(fetchOpts)
if err != nil && err != git.NoErrAlreadyUpToDate {
if err == transport.ErrInvalidAuthMethod || err == transport.ErrAuthenticationRequired {
fetchOpts.Auth = nil
err = remote.Fetch(fetchOpts)
if err != nil && err != git.NoErrAlreadyUpToDate {
return fmt.Errorf("could not fetch upstream: %v", err)
}
} else {
return fmt.Errorf("could not fetch upstream: %v", err)
}
}
err = md.Worktree.Checkout(&git.CheckoutOptions{ // In the case of tagless mode, we already have the transformed repo sitting in the worktree,
Branch: plumbing.ReferenceName(md.TagBranch), // and don't need to perform any checkout or fetch operations
Force: true, if pd.TaglessMode == false {
}) if strings.HasPrefix(md.TagBranch, "refs/heads") {
if err != nil { refspec = config.RefSpec(fmt.Sprintf("+%s:%s", md.TagBranch, md.TagBranch))
return fmt.Errorf("could not checkout source from git: %v", err) branchName = strings.TrimPrefix(md.TagBranch, "refs/heads/")
} } else {
match := misc.GetTagImportRegex(pd).FindStringSubmatch(md.TagBranch)
branchName = match[2]
refspec = config.RefSpec(fmt.Sprintf("+refs/heads/%s:%s", branchName, md.TagBranch))
fmt.Println("Found branchname that does not start w/ refs/heads :: ", branchName)
}
pd.Log.Printf("checking out upstream refspec %s", refspec)
fetchOpts := &git.FetchOptions{
Auth: pd.Authenticator,
RemoteName: "upstream",
RefSpecs: []config.RefSpec{refspec},
Tags: git.AllTags,
Force: true,
}
err = remote.Fetch(fetchOpts)
if err != nil && err != git.NoErrAlreadyUpToDate {
if err == transport.ErrInvalidAuthMethod || err == transport.ErrAuthenticationRequired {
fetchOpts.Auth = nil
err = remote.Fetch(fetchOpts)
if err != nil && err != git.NoErrAlreadyUpToDate {
return fmt.Errorf("could not fetch upstream: %v", err)
}
} else {
return fmt.Errorf("could not fetch upstream: %v", err)
}
}
_, err = md.Worktree.Add(".") err = md.Worktree.Checkout(&git.CheckoutOptions{
if err != nil { Branch: plumbing.ReferenceName(md.TagBranch),
return fmt.Errorf("could not add Worktree: %v", err) Force: true,
} })
if err != nil {
return fmt.Errorf("could not checkout source from git: %v", err)
}
_, err = md.Worktree.Add(".")
if err != nil {
return fmt.Errorf("could not add Worktree: %v", err)
}
}
if pd.TaglessMode == true {
branchName = fmt.Sprintf("%s%d%s", pd.ImportBranchPrefix, pd.Version, pd.BranchSuffix)
}
metadataPath := "" metadataPath := ""
ls, err := md.Worktree.Filesystem.ReadDir(".") ls, err := md.Worktree.Filesystem.ReadDir(".")
if err != nil { if err != nil {
@ -335,7 +358,32 @@ func (g *GitMode) WriteSource(pd *data.ProcessData, md *data.ModeData) error {
body = fromBlobStorage body = fromBlobStorage
pd.Log.Printf("downloading %s from blob storage", hash) pd.Log.Printf("downloading %s from blob storage", hash)
} else { } else {
url := fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash)
url := ""
// Alternate lookaside logic: if enabled, we pull from a new URL pattern
if pd.AltLookAside == false {
url = fmt.Sprintf("%s/%s/%s/%s", pd.CdnUrl, md.Name, branchName, hash)
} else {
// We first need the hash algorithm based on length of hash:
hashType := "sha512"
switch len(hash) {
case 128:
hashType = "sha512"
case 64:
hashType = "sha256"
case 40:
hashType = "sha1"
case 32:
hashType = "md5"
}
// need the name of the file without "SOURCES/":
fileName := strings.Split(path, "/")[1]
// Alt. lookaside url is of the form: <cdn> / <name> / <filename> / <hashtype> / <hash> / <filename>
url = fmt.Sprintf("%s/%s/%s/%s/%s/%s", pd.CdnUrl, md.Name, fileName, hashType, hash, fileName)
}
pd.Log.Printf("downloading %s", url) pd.Log.Printf("downloading %s", url)
req, err := http.NewRequest("GET", url, nil) req, err := http.NewRequest("GET", url, nil)

View File

@ -25,6 +25,7 @@ import (
"fmt" "fmt"
"github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/osfs" "github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5/plumbing/format/gitignore"
"github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/plumbing/transport"
"github.com/go-git/go-git/v5/plumbing/transport/http" "github.com/go-git/go-git/v5/plumbing/transport/http"
"github.com/go-git/go-git/v5/plumbing/transport/ssh" "github.com/go-git/go-git/v5/plumbing/transport/ssh"
@ -137,9 +138,14 @@ func NewProcessData(req *ProcessDataRequest) (*data.ProcessData, error) {
if req.BranchPrefix == "" { if req.BranchPrefix == "" {
req.BranchPrefix = "r" req.BranchPrefix = "r"
} }
if req.CdnUrl == "" { if req.CdnUrl == "" && req.AltLookAside == false {
req.CdnUrl = "https://git.centos.org/sources" req.CdnUrl = "https://git.centos.org/sources"
} }
// If altlookaside is enabled, and the CdnUrl hasn't been changed, then automatically set it to the default
// CentOS Stream (the new pattern very much won't work with the old git.centos.org/sources site)
if (req.CdnUrl == "https://git.centos.org/sources" || req.CdnUrl == "") && req.AltLookAside == true {
req.CdnUrl = "https://sources.stream.centos.org/sources/rpms"
}
// Validate required // Validate required
if req.Package == "" { if req.Package == "" {
@ -657,12 +663,6 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
pushRefspecs = append(pushRefspecs, config.RefSpec("HEAD:"+plumbing.NewTagReferenceName(newTag))) pushRefspecs = append(pushRefspecs, config.RefSpec("HEAD:"+plumbing.NewTagReferenceName(newTag)))
fmt.Printf("pushRefspecs == %+v \n", pushRefspecs)
fmt.Println("blah blah blah")
err = repo.Push(&git.PushOptions{ err = repo.Push(&git.PushOptions{
RemoteName: "origin", RemoteName: "origin",
@ -690,25 +690,30 @@ func ProcessRPM(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) { func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error) {
pd.Log.Println("Tagless mode detected, attempting import of latest commit") pd.Log.Println("Tagless mode detected, attempting import of latest commit")
// In tagless mode, we *automatically* set StrictBranchMode to true
// Only the exact <PREFIX><VERSION><SUFFIX> branch should be pulled from the source repo
pd.StrictBranchMode = true
// our return values: a mapping of branches -> commits (1:1) that we're bringing in,
// and a mapping of branches to: version = X, release = Y
latestHashForBranch := map[string]string{}
versionForBranch := map[string]*srpmprocpb.VersionRelease{}
md, err := pd.Importer.RetrieveSource(pd) md, err := pd.Importer.RetrieveSource(pd)
if err != nil { if err != nil {
pd.Log.Println("Error detected in RetrieveSource!") pd.Log.Println("Error detected in RetrieveSource!")
return nil, err return nil, err
} }
log.Printf("%+v\n", md)
md.BlobCache = map[string][]byte{} md.BlobCache = map[string][]byte{}
// TODO: add tagless module support // TODO: add tagless module support
remotePrefix := "rpms" remotePrefix := "rpms"
if pd.ModuleMode { if pd.ModuleMode {
remotePrefix = "modules" remotePrefix = "modules"
} }
// already uploaded blobs are skipped
// var alreadyUploadedBlobs []string
// Set up our remote URL for pushing our repo to // Set up our remote URL for pushing our repo to
var tagIgnoreList []string var tagIgnoreList []string
@ -745,14 +750,10 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error
} }
sourceRepo := *md.Repo sourceRepo := *md.Repo
sourceWorktree := *md.Worktree sourceWorktree := *md.Worktree
localPath := "" localPath := ""
for _, branch := range md.Branches { for _, branch := range md.Branches {
md.Repo = &sourceRepo md.Repo = &sourceRepo
md.Worktree = &sourceWorktree md.Worktree = &sourceWorktree
@ -784,25 +785,29 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error
// We want sources to become .PKGNAME.metadata, we want SOURCES and SPECS folders, etc. // We want sources to become .PKGNAME.metadata, we want SOURCES and SPECS folders, etc.
repoFixed, _ := convertLocalRepo(md.Name, localPath) repoFixed, _ := convertLocalRepo(md.Name, localPath)
if !repoFixed { if !repoFixed {
pd.Log.Println("Error converting repository into SOURCES + SPECS + .package.metadata format")
return nil, fmt.Errorf("Error converting repository into SOURCES + SPECS + .package.metadata format") return nil, fmt.Errorf("Error converting repository into SOURCES + SPECS + .package.metadata format")
} }
rpmVersion := getVersionFromSpec(md.Name, localPath, pd.Version) nvrString := getVersionFromSpec(md.Name, localPath, pd.Version)
if rpmVersion == "" { if nvrString == "" {
return nil, fmt.Errorf("Error using rpm or rpmbuild to build SRPM and determine version info! (tagless mode)") return nil, fmt.Errorf("Error using rpm or rpmbuild to build SRPM and determine version info! (tagless mode)")
} }
// Set version and release fields we extracted (name|version|release are separated by pipes)
pd.PackageVersion = strings.Split(nvrString, "|")[1]
pd.PackageRelease = strings.Split(nvrString, "|")[2]
// Set full rpm version: name-version-release (for tagging properly)
rpmVersion := fmt.Sprintf("%s-%s-%s", md.Name, pd.PackageVersion, pd.PackageRelease)
pd.Log.Println("Successfully determined version of tagless checkout: ", rpmVersion) pd.Log.Println("Successfully determined version of tagless checkout: ", rpmVersion)
md.PushBranch = fmt.Sprintf("%s%d%s", pd.BranchPrefix, pd.Version, pd.BranchSuffix) md.PushBranch = fmt.Sprintf("%s%d%s", pd.BranchPrefix, pd.Version, pd.BranchSuffix)
// Make an initial repo we will use to push to our target // Make an initial repo we will use to push to our target
pushRepo, err := git.PlainInit(localPath + "_gitpush", false) pushRepo, err := git.PlainInit(localPath + "_gitpush", false)
if err != nil { if err != nil {
@ -858,9 +863,43 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error
os.Rename(fmt.Sprintf("%s/.%s.metadata", localPath, md.Name ), fmt.Sprintf("%s_gitpush/.%s.metadata", localPath, md.Name) ) os.Rename(fmt.Sprintf("%s/.%s.metadata", localPath, md.Name ), fmt.Sprintf("%s_gitpush/.%s.metadata", localPath, md.Name) )
md.Repo = pushRepo
md.Worktree = w
// Download lookaside sources (tarballs) into the push git repo:
err = pd.Importer.WriteSource(pd, md)
if err != nil {
return nil, err
}
// Call function to upload source to target lookaside and
// ensure the sources are added to .gitignore
err = processLookasideSources(pd, md, localPath + "_gitpush")
if err != nil {
return nil, err
}
// Apply patch(es) if needed:
if pd.ModuleMode {
err := patchModuleYaml(pd, md)
if err != nil {
return nil, err
}
} else {
err := executePatchesRpm(pd, md)
if err != nil {
return nil, err
}
}
err = w.AddWithOptions(&git.AddOptions{All: true} ) err = w.AddWithOptions(&git.AddOptions{All: true} )
if err != nil { if err != nil {
fmt.Printf("ERROR == %v \n", err)
return nil, fmt.Errorf("Error adding SOURCES/ , SPECS/ or .metadata file to commit list.") return nil, fmt.Errorf("Error adding SOURCES/ , SPECS/ or .metadata file to commit list.")
} }
@ -891,7 +930,7 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error
if newRepo == true { if newRepo == true {
pushRefspecs = append(pushRefspecs, config.RefSpec("*:*")) pushRefspecs = append(pushRefspecs, config.RefSpec("*:*"))
pd.Log.Printf("Looks like a new remote repo, committing all local objects to new remote branch") pd.Log.Printf("New remote repo detected, creating new remote branch")
} }
@ -952,21 +991,31 @@ func processRPMTagless(pd *data.ProcessData) (*srpmprocpb.ProcessResponse, error
return nil, fmt.Errorf("could not push to remote: %v", err) return nil, fmt.Errorf("could not push to remote: %v", err)
} }
// Clean up temporary path after succesful import (disabled during development)
/*
if err := os.RemoveAll(localPath); err != nil { if err := os.RemoveAll(localPath); err != nil {
log.Printf("Error cleaning up temporary git checkout directory %s . Non-fatal, continuing anyway...\n", localPath) log.Printf("Error cleaning up temporary git checkout directory %s . Non-fatal, continuing anyway...\n", localPath)
} }
if err := os.RemoveAll(fmt.Sprintf("%s_gitpush", localPath)); err != nil { if err := os.RemoveAll(fmt.Sprintf("%s_gitpush", localPath)); err != nil {
log.Printf("Error cleaning up temporary git checkout directory %s . Non-fatal, continuing anyway...\n", fmt.Sprintf("%s_gitpush", localPath)) log.Printf("Error cleaning up temporary git checkout directory %s . Non-fatal, continuing anyway...\n", fmt.Sprintf("%s_gitpush", localPath))
} }
*/
// append our processed branch to the return structures:
latestHashForBranch[md.PushBranch] = obj.Hash.String()
versionForBranch[md.PushBranch] = &srpmprocpb.VersionRelease{
Version: pd.PackageVersion,
Release: pd.PackageRelease,
}
} }
return nil, nil fmt.Printf("returning::\n latestHashForBranch == %+v \n\n versionForBranch == %+v\n\n", latestHashForBranch, versionForBranch)
// return struct with all our branch:commit and branch:version+release mappings
return &srpmprocpb.ProcessResponse{
BranchCommits: latestHashForBranch,
BranchVersions: versionForBranch,
}, nil
} }
@ -994,7 +1043,7 @@ func convertLocalRepo(pkgName string, localRepo string) (bool, error) {
for _, file := range files { for _, file := range files {
// We don't want to process SOURCES, SPECS, or any of our .git folders // We don't want to process SOURCES, SPECS, or any of our .git folders
if file.Name() == "SOURCES" || file.Name() == "SPECS" || strings.HasPrefix(file.Name(), ".git") { if file.Name() == "SOURCES" || file.Name() == "SPECS" || strings.HasPrefix(file.Name(), ".git") || file.Name() == "." + pkgName + ".metadata" {
continue continue
} }
@ -1181,7 +1230,7 @@ func getVersionFromSpec(pkgName string, localRepo string, majorVersion int) (str
// Call the rpm binary to extract the version-release info out of it, and tack on ".el<VERSION>" at the end: // Call the rpm binary to extract the version-release info out of it, and tack on ".el<VERSION>" at the end:
cmd = exec.Command("rpm", "-qp", "--qf", `%{NAME}-%{VERSION}-%{RELEASE}\n`, fmt.Sprintf("%s/SRPMS/%s", rpmBuildPath, srpmFile) ) cmd = exec.Command("rpm", "-qp", "--qf", `%{NAME}|%{VERSION}|%{RELEASE}\n`, fmt.Sprintf("%s/SRPMS/%s", rpmBuildPath, srpmFile) )
nvrTmp, err := cmd.CombinedOutput() nvrTmp, err := cmd.CombinedOutput()
if err != nil { if err != nil {
log.Println("Error running rpm command to extract temporary SRPM name-version-release identifiers.") log.Println("Error running rpm command to extract temporary SRPM name-version-release identifiers.")
@ -1207,6 +1256,94 @@ func getVersionFromSpec(pkgName string, localRepo string, majorVersion int) (str
// We need to loop through the lookaside blob files ("SourcesToIgnore"),
// and upload them to our target storage (usually an S3 bucket, but could be a local folder)
//
// We also need to add the source paths to .gitignore in the git repo, so we don't accidentally commit + push them
func processLookasideSources(pd *data.ProcessData, md *data.ModeData, localDir string) (error) {
w := md.Worktree
metadata, err := w.Filesystem.Create(fmt.Sprintf(".%s.metadata", md.Name) )
if err != nil {
return fmt.Errorf("could not create metadata file: %v", err)
}
// Keep track of files we've already uploaded - don't want duplicates!
var alreadyUploadedBlobs []string
gitIgnore, err := os.OpenFile(fmt.Sprintf("%s/.gitignore", localDir), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return err
}
for _, source := range md.SourcesToIgnore {
sourcePath := source.Name
_, err := w.Filesystem.Stat(sourcePath)
if source.Expired || err != nil {
continue
}
sourceFile, err := w.Filesystem.Open(sourcePath)
if err != nil {
return fmt.Errorf("could not open ignored source file %s: %v", sourcePath, err)
}
sourceFileBts, err := ioutil.ReadAll(sourceFile)
if err != nil {
return fmt.Errorf("could not read the whole of ignored source file: %v", err)
}
source.HashFunction.Reset()
_, err = source.HashFunction.Write(sourceFileBts)
if err != nil {
return fmt.Errorf("could not write bytes to hash function: %v", err)
}
checksum := hex.EncodeToString(source.HashFunction.Sum(nil))
checksumLine := fmt.Sprintf("%s %s\n", checksum, sourcePath)
_, err = metadata.Write([]byte(checksumLine))
if err != nil {
return fmt.Errorf("could not write to metadata file: %v", err)
}
if data.StrContains(alreadyUploadedBlobs, checksum) {
continue
}
exists, err := pd.BlobStorage.Exists(checksum)
if err != nil {
return err
}
if !exists && !pd.NoStorageUpload {
err := pd.BlobStorage.Write(checksum, sourceFileBts)
if err != nil {
return err
}
pd.Log.Printf("wrote %s to blob storage", checksum)
}
alreadyUploadedBlobs = append(alreadyUploadedBlobs, checksum)
// Add this SOURCES/ lookaside file to be excluded
w.Excludes = append(w.Excludes, gitignore.ParsePattern(sourcePath, nil) )
// Append the SOURCES/<file> path to .gitignore:
_, err = gitIgnore.Write([]byte(fmt.Sprintf("%s\n", sourcePath)) )
if err != nil {
return err
}
}
err = gitIgnore.Close()
if err != nil {
return err
}
return nil
}