logs go brrrrr
- fix deadlock in waitgroups - batch inserts - fix request_time mapping
This commit is contained in:
parent
649b754be6
commit
77c266c43e
4 changed files with 243 additions and 122 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,3 +1,3 @@
|
||||||
fastly_log_processor_errors.log
|
process_fastly_logs_errors.log
|
||||||
logs.db
|
logs.db
|
||||||
logs.db-journal
|
logs.db-journal
|
||||||
|
|
|
@ -29,6 +29,7 @@ func CreateLogTable() error {
|
||||||
service_id TEXT,
|
service_id TEXT,
|
||||||
client_ip TEXT,
|
client_ip TEXT,
|
||||||
request_method TEXT,
|
request_method TEXT,
|
||||||
|
request_time DATETIME,
|
||||||
request_url TEXT,
|
request_url TEXT,
|
||||||
protocol TEXT,
|
protocol TEXT,
|
||||||
response_status INTEGER,
|
response_status INTEGER,
|
||||||
|
|
358
main.go
358
main.go
|
@ -2,187 +2,305 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
"io"
|
|
||||||
"context"
|
|
||||||
|
|
||||||
"github.com/urfave/cli/v3"
|
"github.com/urfave/cli/v3"
|
||||||
|
|
||||||
"git.resf.org/infrastructure/process-fastly-logs/db"
|
"git.resf.org/infrastructure/process-fastly-logs/db"
|
||||||
"git.resf.org/infrastructure/process-fastly-logs/parser"
|
|
||||||
"git.resf.org/infrastructure/process-fastly-logs/models"
|
"git.resf.org/infrastructure/process-fastly-logs/models"
|
||||||
|
"git.resf.org/infrastructure/process-fastly-logs/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
logFile, err := os.OpenFile("process_fastly_logs_errors.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
|
logFile, err := os.OpenFile("process_fastly_logs_errors.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println("Failed to open error log file:", err)
|
log.Println("Failed to open error log file:", err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
log.SetOutput(io.MultiWriter(os.Stdout, logFile))
|
log.SetOutput(io.MultiWriter(os.Stdout, logFile))
|
||||||
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
|
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
cwd, err := os.Getwd()
|
cwd, err := os.Getwd()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd := &cli.Command{
|
cmd := &cli.Command{
|
||||||
Flags: []cli.Flag{
|
Flags: []cli.Flag{
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "logDir",
|
Name: "logDir",
|
||||||
Value: cwd,
|
Value: cwd,
|
||||||
Usage: "directory containing logs to process",
|
Usage: "directory containing logs to process",
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "outDir",
|
Name: "outDir",
|
||||||
Value: cwd,
|
Value: cwd,
|
||||||
Usage: "directory to ouput database and program logs",
|
Usage: "directory to ouput database and program logs",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Action: fastlyLogProcessor,
|
Action: fastlyLogProcessor,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Run(context.Background(), os.Args); err != nil {
|
if err := cmd.Run(context.Background(), os.Args); err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// - Anonymize requests
|
// - Anonymize requests
|
||||||
// - Spit out in format Brian can use
|
// - Spit out in format Brian can use
|
||||||
// - Profile? lol
|
// - Profile? lol
|
||||||
// - More tests
|
// - More tests
|
||||||
func fastlyLogProcessor(ctx context.Context, cmd *cli.Command) error {
|
func fastlyLogProcessor(ctx context.Context, cmd *cli.Command) error {
|
||||||
if cmd.NArg() < 1 {
|
if cmd.NArg() < 1 {
|
||||||
return cli.Exit("Must pass at least one directory to process logs of", 1)
|
return cli.Exit("Must pass at least one directory to process logs of", 1)
|
||||||
}
|
}
|
||||||
month := cmd.Args().Get(0)
|
month := cmd.Args().Get(0)
|
||||||
logDir := cmd.String("logDir")
|
logDir := cmd.String("logDir")
|
||||||
outDir := cmd.String("outDir")
|
outDir := cmd.String("outDir")
|
||||||
|
|
||||||
// Initialize the database
|
// Initialize the database
|
||||||
err := db.InitDB(fmt.Sprintf(filepath.Join(outDir, "logs.db")))
|
err := db.InitDB(fmt.Sprintf(filepath.Join(outDir, "logs.db")))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
defer db.DB.Close()
|
defer db.DB.Close()
|
||||||
|
|
||||||
// Create logs table if not exists
|
// Create logs table if not exists
|
||||||
err = db.CreateLogTable()
|
err = db.CreateLogTable()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
walkLogDir := fmt.Sprintf(filepath.Join(logDir, month))
|
walkLogDir := fmt.Sprintf(filepath.Join(logDir, month))
|
||||||
|
|
||||||
// Create a WaitGroup
|
// Create a WaitGroup
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
// Define a semaphore channel to limit concurrency
|
// Define a semaphore channel to limit concurrency
|
||||||
maxGoroutines := 10 // Adjust based on your system capabilities
|
maxGoroutines := 10 // Adjust based on your system capabilities
|
||||||
semaphore := make(chan struct{}, maxGoroutines)
|
semaphore := make(chan struct{}, maxGoroutines)
|
||||||
|
|
||||||
// Walk through the directory and process each log file concurrently
|
// Walk through the directory and process each log file concurrently
|
||||||
err = filepath.Walk(walkLogDir, func(path string, info os.FileInfo, err error) error {
|
err = filepath.Walk(walkLogDir, func(path string, info os.FileInfo, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !info.IsDir() && filepath.Ext(path) == ".log" {
|
if !info.IsDir() && filepath.Ext(path) == ".log" {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
// Acquire a token
|
// Acquire a token
|
||||||
semaphore <- struct{}{}
|
semaphore <- struct{}{}
|
||||||
go func(p string) {
|
go func(p string) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
defer func() { <-semaphore }() // Release the token
|
defer func() { <-semaphore }() // Release the token
|
||||||
err := processLogFile(p)
|
err := processLogFile(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Error processing file %s: %v\n", p, err)
|
log.Printf("Error processing file %s: %v\n", p, err)
|
||||||
}
|
}
|
||||||
}(path)
|
}(path)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for all Goroutines to finish
|
// Wait for all Goroutines to finish
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func processLogFile(filePath string) error {
|
func processLogFile(filePath string) error {
|
||||||
file, err := os.Open(filePath)
|
file, err := os.Open(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
// Create channels
|
// Create channels
|
||||||
linesChan := make(chan string, 100)
|
linesChan := make(chan string, 1000)
|
||||||
entriesChan := make(chan *models.LogEntry, 100)
|
entriesChan := make(chan *models.LogEntry, 1000)
|
||||||
errorsChan := make(chan error, 100)
|
errorsChan := make(chan error, 1000)
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wgParsers sync.WaitGroup
|
||||||
|
|
||||||
// Start Goroutines to parse lines
|
// Start Goroutines to parse lines
|
||||||
numParsers := 5 // Adjust as needed
|
numParsers := 100 // Adjust as needed
|
||||||
for i := 0; i < numParsers; i++ {
|
for i := 0; i < numParsers; i++ {
|
||||||
wg.Add(1)
|
wgParsers.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wgParsers.Done()
|
||||||
for line := range linesChan {
|
for line := range linesChan {
|
||||||
entry, err := parser.ParseLogLine(line)
|
log.Println("parsing line")
|
||||||
if err != nil {
|
entry, err := parser.ParseLogLine(line)
|
||||||
errorsChan <- fmt.Errorf("Error parsing line: %v", err)
|
if err != nil {
|
||||||
continue
|
errorsChan <- fmt.Errorf("Error parsing line: %v", err)
|
||||||
}
|
continue
|
||||||
entriesChan <- entry
|
}
|
||||||
}
|
entriesChan <- entry
|
||||||
}()
|
}
|
||||||
}
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
// Start a Goroutine to save entries to the database
|
// Start a Goroutine to save entries to the database
|
||||||
wg.Add(1)
|
var wgSaver sync.WaitGroup
|
||||||
|
wgSaver.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wgSaver.Done()
|
||||||
|
const batchSize = 1000 // Adjust the batch size as needed
|
||||||
|
var batch []*models.LogEntry
|
||||||
|
batch = make([]*models.LogEntry, 0, batchSize)
|
||||||
|
|
||||||
for entry := range entriesChan {
|
for entry := range entriesChan {
|
||||||
err := entry.Save()
|
batch = append(batch, entry)
|
||||||
if err != nil {
|
if len(batch) >= batchSize {
|
||||||
errorsChan <- fmt.Errorf("Error saving entry: %v", err)
|
if err := saveBatch(batch); err != nil {
|
||||||
|
errorsChan <- fmt.Errorf("Error saving batch: %v", err)
|
||||||
|
}
|
||||||
|
batch = batch[:0] // Reset the batch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save any remaining entries in the batch
|
||||||
|
if len(batch) > 0 {
|
||||||
|
if err := saveBatch(batch); err != nil {
|
||||||
|
errorsChan <- fmt.Errorf("Error saving batch: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Read lines from the file and send to linesChan
|
// Error handling
|
||||||
scanner := bufio.NewScanner(file)
|
var errorList []error
|
||||||
for scanner.Scan() {
|
var errorMutex sync.Mutex
|
||||||
linesChan <- scanner.Text()
|
var wgErrors sync.WaitGroup
|
||||||
}
|
|
||||||
close(linesChan)
|
|
||||||
|
|
||||||
// Wait for parsing and saving to finish
|
wgErrors.Add(1)
|
||||||
wg.Wait()
|
go func() {
|
||||||
close(entriesChan)
|
defer wgErrors.Done()
|
||||||
close(errorsChan)
|
|
||||||
|
|
||||||
if len(errorsChan) > 0 {
|
|
||||||
log.Printf("Encountered errors while processing file %s:\n", filePath)
|
|
||||||
for err := range errorsChan {
|
for err := range errorsChan {
|
||||||
|
log.Println("handling error")
|
||||||
|
errorMutex.Lock()
|
||||||
|
errorList = append(errorList, err)
|
||||||
|
errorMutex.Unlock()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Read lines from the file and send to linesChan
|
||||||
|
scanner := bufio.NewScanner(file)
|
||||||
|
for scanner.Scan() {
|
||||||
|
linesChan <- scanner.Text()
|
||||||
|
}
|
||||||
|
close(linesChan) // Close linesChan after all lines have been sent
|
||||||
|
|
||||||
|
// Wait for parsing Goroutines to finish
|
||||||
|
wgParsers.Wait()
|
||||||
|
|
||||||
|
// Close entriesChan since no more entries will be sent
|
||||||
|
close(entriesChan)
|
||||||
|
|
||||||
|
// Wait for the saving Goroutine to finish
|
||||||
|
wgSaver.Wait()
|
||||||
|
|
||||||
|
// Close errorsChan after all senders have finished
|
||||||
|
close(errorsChan)
|
||||||
|
|
||||||
|
// Wait for the error handling Goroutine to finish
|
||||||
|
wgErrors.Wait()
|
||||||
|
|
||||||
|
// Log the errors
|
||||||
|
if len(errorList) > 0 {
|
||||||
|
log.Printf("Encountered errors while processing file %s:\n", filePath)
|
||||||
|
for _, err := range errorList {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := scanner.Err(); err != nil {
|
if err := scanner.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// saveBatch saves a batch of log entries to the database using a transaction
|
||||||
|
func saveBatch(entries []*models.LogEntry) error {
|
||||||
|
tx, err := db.DB.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt, err := tx.Prepare(`
|
||||||
|
INSERT INTO logs(
|
||||||
|
priority,
|
||||||
|
timestamp,
|
||||||
|
cache_server,
|
||||||
|
service_id,
|
||||||
|
client_ip,
|
||||||
|
request_method,
|
||||||
|
request_time,
|
||||||
|
request_url,
|
||||||
|
protocol,
|
||||||
|
response_status,
|
||||||
|
response_body_bytes,
|
||||||
|
host,
|
||||||
|
user_agent,
|
||||||
|
datacenter,
|
||||||
|
geo_city,
|
||||||
|
geo_continent_code,
|
||||||
|
geo_region,
|
||||||
|
start_time,
|
||||||
|
elapsed_time_usec,
|
||||||
|
is_hit,
|
||||||
|
cache_result
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer stmt.Close()
|
||||||
|
|
||||||
|
for _, entry := range entries {
|
||||||
|
_, err := stmt.Exec(
|
||||||
|
entry.Priority,
|
||||||
|
entry.Timestamp,
|
||||||
|
entry.CacheServer,
|
||||||
|
entry.ServiceID,
|
||||||
|
entry.ClientIP,
|
||||||
|
entry.RequestMethod,
|
||||||
|
entry.RequestTime,
|
||||||
|
entry.RequestURL,
|
||||||
|
entry.Protocol,
|
||||||
|
entry.ResponseStatus,
|
||||||
|
entry.ResponseBodyBytes,
|
||||||
|
entry.Host,
|
||||||
|
entry.UserAgent,
|
||||||
|
entry.Datacenter,
|
||||||
|
entry.GeoCity,
|
||||||
|
entry.GeoContinentCode,
|
||||||
|
entry.GeoRegion,
|
||||||
|
entry.StartTime,
|
||||||
|
entry.ElapsedTimeUsec,
|
||||||
|
entry.IsHit,
|
||||||
|
entry.CacheResult,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
tx.Rollback()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -37,6 +37,7 @@ func (entry *LogEntry) Save() error {
|
||||||
service_id,
|
service_id,
|
||||||
client_ip,
|
client_ip,
|
||||||
request_method,
|
request_method,
|
||||||
|
request_time,
|
||||||
request_url,
|
request_url,
|
||||||
protocol,
|
protocol,
|
||||||
response_status,
|
response_status,
|
||||||
|
@ -51,7 +52,7 @@ func (entry *LogEntry) Save() error {
|
||||||
elapsed_time_usec,
|
elapsed_time_usec,
|
||||||
is_hit,
|
is_hit,
|
||||||
cache_result
|
cache_result
|
||||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||||
|
|
||||||
statement, err := db.DB.Prepare(insertSQL)
|
statement, err := db.DB.Prepare(insertSQL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -64,6 +65,7 @@ func (entry *LogEntry) Save() error {
|
||||||
entry.ServiceID,
|
entry.ServiceID,
|
||||||
entry.ClientIP,
|
entry.ClientIP,
|
||||||
entry.RequestMethod,
|
entry.RequestMethod,
|
||||||
|
entry.RequestTime,
|
||||||
entry.RequestURL,
|
entry.RequestURL,
|
||||||
entry.Protocol,
|
entry.Protocol,
|
||||||
entry.ResponseStatus,
|
entry.ResponseStatus,
|
||||||
|
|
Loading…
Reference in a new issue