mirror of
https://github.com/rocky-linux/srpmproc.git
synced 2024-06-02 08:10:17 +00:00
make srpmproc more efficient with downloads and uploads. use files from blob storage when possible
This commit is contained in:
parent
a7cb786293
commit
1d5719b234
|
@ -2,4 +2,6 @@ package blob
|
||||||
|
|
||||||
type Storage interface {
|
type Storage interface {
|
||||||
Write(path string, content []byte)
|
Write(path string, content []byte)
|
||||||
|
Read(path string) []byte
|
||||||
|
Exists(path string) bool
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package gcs
|
||||||
import (
|
import (
|
||||||
"cloud.google.com/go/storage"
|
"cloud.google.com/go/storage"
|
||||||
"context"
|
"context"
|
||||||
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -37,3 +38,27 @@ func (g *GCS) Write(path string, content []byte) {
|
||||||
log.Fatalf("could not close gcs writer to source: %v", err)
|
log.Fatalf("could not close gcs writer to source: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (g *GCS) Read(path string) []byte {
|
||||||
|
ctx := context.Background()
|
||||||
|
obj := g.bucket.Object(path)
|
||||||
|
|
||||||
|
r, err := obj.NewReader(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *GCS) Exists(path string) bool {
|
||||||
|
ctx := context.Background()
|
||||||
|
obj := g.bucket.Object(path)
|
||||||
|
_, err := obj.NewReader(ctx)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
|
@ -4,7 +4,9 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"github.com/aws/aws-sdk-go/aws"
|
"github.com/aws/aws-sdk-go/aws"
|
||||||
"github.com/aws/aws-sdk-go/aws/session"
|
"github.com/aws/aws-sdk-go/aws/session"
|
||||||
|
"github.com/aws/aws-sdk-go/service/s3"
|
||||||
"github.com/aws/aws-sdk-go/service/s3/s3manager"
|
"github.com/aws/aws-sdk-go/service/s3/s3manager"
|
||||||
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -35,3 +37,28 @@ func (s *S3) Write(path string, content []byte) {
|
||||||
log.Fatalf("failed to upload file to s3, %v", err)
|
log.Fatalf("failed to upload file to s3, %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *S3) Read(path string) []byte {
|
||||||
|
obj, err := s.uploader.S3.GetObject(&s3.GetObjectInput{
|
||||||
|
Bucket: aws.String(s.bucket),
|
||||||
|
Key: aws.String(path),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(obj.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *S3) Exists(path string) bool {
|
||||||
|
_, err := s.uploader.S3.GetObject(&s3.GetObjectInput{
|
||||||
|
Bucket: aws.String(s.bucket),
|
||||||
|
Key: aws.String(path),
|
||||||
|
})
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
|
@ -101,7 +101,7 @@ func (g *GitMode) RetrieveSource(pd *ProcessData) *modeData {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *GitMode) WriteSource(md *modeData) {
|
func (g *GitMode) WriteSource(pd *ProcessData, md *modeData) {
|
||||||
remote, err := md.repo.Remote("upstream")
|
remote, err := md.repo.Remote("upstream")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("could not get upstream remote: %v", err)
|
log.Fatalf("could not get upstream remote: %v", err)
|
||||||
|
@ -168,27 +168,42 @@ func (g *GitMode) WriteSource(md *modeData) {
|
||||||
hash := strings.TrimSpace(lineInfo[0])
|
hash := strings.TrimSpace(lineInfo[0])
|
||||||
path := strings.TrimSpace(lineInfo[1])
|
path := strings.TrimSpace(lineInfo[1])
|
||||||
|
|
||||||
url := fmt.Sprintf("https://git.centos.org/sources/%s/%s/%s", md.rpmFile.Name(), branchName, hash)
|
var body []byte
|
||||||
log.Printf("downloading %s", url)
|
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", url, nil)
|
if md.blobCache[hash] != nil {
|
||||||
if err != nil {
|
body = md.blobCache[hash]
|
||||||
log.Fatalf("could not create new http request: %v", err)
|
log.Printf("retrieving %s from cache", hash)
|
||||||
}
|
} else {
|
||||||
req.Header.Set("Accept-Encoding", "*")
|
fromBlobStorage := pd.BlobStorage.Read(hash)
|
||||||
|
if fromBlobStorage != nil {
|
||||||
|
body = fromBlobStorage
|
||||||
|
log.Printf("downloading %s from blob storage", hash)
|
||||||
|
} else {
|
||||||
|
url := fmt.Sprintf("https://git.centos.org/sources/%s/%s/%s", md.rpmFile.Name(), branchName, hash)
|
||||||
|
log.Printf("downloading %s", url)
|
||||||
|
|
||||||
resp, err := client.Do(req)
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("could not download dist-git file: %v", err)
|
log.Fatalf("could not create new http request: %v", err)
|
||||||
}
|
}
|
||||||
|
req.Header.Set("Accept-Encoding", "*")
|
||||||
|
|
||||||
body, err := ioutil.ReadAll(resp.Body)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("could not read the whole dist-git file: %v", err)
|
log.Fatalf("could not download dist-git file: %v", err)
|
||||||
}
|
}
|
||||||
err = resp.Body.Close()
|
|
||||||
if err != nil {
|
body, err = ioutil.ReadAll(resp.Body)
|
||||||
log.Fatalf("could not close body handle: %v", err)
|
if err != nil {
|
||||||
|
log.Fatalf("could not read the whole dist-git file: %v", err)
|
||||||
|
}
|
||||||
|
err = resp.Body.Close()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("could not close body handle: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
md.blobCache[hash] = body
|
||||||
}
|
}
|
||||||
|
|
||||||
f, err := md.worktree.Filesystem.Create(path)
|
f, err := md.worktree.Filesystem.Create(path)
|
||||||
|
|
|
@ -2,7 +2,7 @@ package internal
|
||||||
|
|
||||||
type ImportMode interface {
|
type ImportMode interface {
|
||||||
RetrieveSource(pd *ProcessData) *modeData
|
RetrieveSource(pd *ProcessData) *modeData
|
||||||
WriteSource(md *modeData)
|
WriteSource(pd *ProcessData, md *modeData)
|
||||||
PostProcess(md *modeData)
|
PostProcess(md *modeData)
|
||||||
ImportName(pd *ProcessData, md *modeData) string
|
ImportName(pd *ProcessData, md *modeData) string
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package internal
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/cavaliercoder/go-rpm"
|
"github.com/cavaliercoder/go-rpm"
|
||||||
"github.com/go-git/go-billy/v5/memfs"
|
"github.com/go-git/go-billy/v5/memfs"
|
||||||
|
@ -15,6 +16,7 @@ import (
|
||||||
"hash"
|
"hash"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -57,6 +59,7 @@ type modeData struct {
|
||||||
pushBranch string
|
pushBranch string
|
||||||
branches []string
|
branches []string
|
||||||
sourcesToIgnore []*ignoredSource
|
sourcesToIgnore []*ignoredSource
|
||||||
|
blobCache map[string][]byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessRPM checks the RPM specs and discards any remote files
|
// ProcessRPM checks the RPM specs and discards any remote files
|
||||||
|
@ -69,12 +72,18 @@ type modeData struct {
|
||||||
func ProcessRPM(pd *ProcessData) {
|
func ProcessRPM(pd *ProcessData) {
|
||||||
tagImportRegex = regexp.MustCompile(fmt.Sprintf("refs/tags/(imports/(%s.|%s.-.+)/(.*))", pd.ImportBranchPrefix, pd.ImportBranchPrefix))
|
tagImportRegex = regexp.MustCompile(fmt.Sprintf("refs/tags/(imports/(%s.|%s.-.+)/(.*))", pd.ImportBranchPrefix, pd.ImportBranchPrefix))
|
||||||
md := pd.Importer.RetrieveSource(pd)
|
md := pd.Importer.RetrieveSource(pd)
|
||||||
|
md.blobCache = map[string][]byte{}
|
||||||
|
|
||||||
remotePrefix := "rpms"
|
remotePrefix := "rpms"
|
||||||
if pd.ModuleMode {
|
if pd.ModuleMode {
|
||||||
remotePrefix = "modules"
|
remotePrefix = "modules"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var pushedHashes []string
|
||||||
|
|
||||||
|
// already uploaded blobs are skipped
|
||||||
|
var alreadyUploadedBlobs []string
|
||||||
|
|
||||||
// if no-dup-mode is enabled then skip already imported versions
|
// if no-dup-mode is enabled then skip already imported versions
|
||||||
var tagIgnoreList []string
|
var tagIgnoreList []string
|
||||||
if pd.NoDupMode {
|
if pd.NoDupMode {
|
||||||
|
@ -113,6 +122,7 @@ func ProcessRPM(pd *ProcessData) {
|
||||||
sourceWorktree := *md.worktree
|
sourceWorktree := *md.worktree
|
||||||
|
|
||||||
for _, branch := range md.branches {
|
for _, branch := range md.branches {
|
||||||
|
md.sourcesToIgnore = []*ignoredSource{}
|
||||||
md.repo = &sourceRepo
|
md.repo = &sourceRepo
|
||||||
md.worktree = &sourceWorktree
|
md.worktree = &sourceWorktree
|
||||||
md.tagBranch = branch
|
md.tagBranch = branch
|
||||||
|
@ -202,7 +212,7 @@ func ProcessRPM(pd *ProcessData) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pd.Importer.WriteSource(md)
|
pd.Importer.WriteSource(pd, md)
|
||||||
|
|
||||||
copyFromFs(md.worktree.Filesystem, w.Filesystem, ".")
|
copyFromFs(md.worktree.Filesystem, w.Filesystem, ".")
|
||||||
md.repo = repo
|
md.repo = repo
|
||||||
|
@ -214,8 +224,6 @@ func ProcessRPM(pd *ProcessData) {
|
||||||
executePatchesRpm(pd, md)
|
executePatchesRpm(pd, md)
|
||||||
}
|
}
|
||||||
|
|
||||||
// already uploaded blobs are skipped
|
|
||||||
var alreadyUploadedBlobs []string
|
|
||||||
// get ignored files hash and add to .{name}.metadata
|
// get ignored files hash and add to .{name}.metadata
|
||||||
metadataFile := fmt.Sprintf(".%s.metadata", rpmFile.Name())
|
metadataFile := fmt.Sprintf(".%s.metadata", rpmFile.Name())
|
||||||
metadata, err := w.Filesystem.Create(metadataFile)
|
metadata, err := w.Filesystem.Create(metadataFile)
|
||||||
|
@ -255,8 +263,10 @@ func ProcessRPM(pd *ProcessData) {
|
||||||
if strContains(alreadyUploadedBlobs, path) {
|
if strContains(alreadyUploadedBlobs, path) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
pd.BlobStorage.Write(path, sourceFileBts)
|
if !pd.BlobStorage.Exists(path) {
|
||||||
log.Printf("wrote %s to blob storage", path)
|
pd.BlobStorage.Write(path, sourceFileBts)
|
||||||
|
log.Printf("wrote %s to blob storage", path)
|
||||||
|
}
|
||||||
alreadyUploadedBlobs = append(alreadyUploadedBlobs, path)
|
alreadyUploadedBlobs = append(alreadyUploadedBlobs, path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -341,5 +351,13 @@ func ProcessRPM(pd *ProcessData) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("could not push to remote: %v", err)
|
log.Fatalf("could not push to remote: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hashString := obj.Hash.String()
|
||||||
|
pushedHashes = append(pushedHashes, fmt.Sprintf("%s:%s", md.pushBranch, hashString))
|
||||||
|
}
|
||||||
|
|
||||||
|
err := json.NewEncoder(os.Stdout).Encode(pushedHashes)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("could not print hashes")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user