logs go brrrrr

- fix deadlock in waitgroups
- batch inserts
- fix request_time mapping
This commit is contained in:
Neil Hanlon 2025-01-04 17:56:51 -05:00
parent 649b754be6
commit 77c266c43e
Signed by: neil
GPG key ID: 705BC21EC3C70F34
4 changed files with 243 additions and 122 deletions

2
.gitignore vendored
View file

@ -1,3 +1,3 @@
fastly_log_processor_errors.log
process_fastly_logs_errors.log
logs.db
logs.db-journal

View file

@ -29,6 +29,7 @@ func CreateLogTable() error {
service_id TEXT,
client_ip TEXT,
request_method TEXT,
request_time DATETIME,
request_url TEXT,
protocol TEXT,
response_status INTEGER,

358
main.go
View file

@ -2,187 +2,305 @@ package main
import (
"bufio"
"context"
"fmt"
"io"
"log"
"os"
"path/filepath"
"sync"
"io"
"context"
"github.com/urfave/cli/v3"
"github.com/urfave/cli/v3"
"git.resf.org/infrastructure/process-fastly-logs/db"
"git.resf.org/infrastructure/process-fastly-logs/parser"
"git.resf.org/infrastructure/process-fastly-logs/models"
"git.resf.org/infrastructure/process-fastly-logs/parser"
)
func init() {
logFile, err := os.OpenFile("process_fastly_logs_errors.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
log.Println("Failed to open error log file:", err)
os.Exit(1)
}
log.SetOutput(io.MultiWriter(os.Stdout, logFile))
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
logFile, err := os.OpenFile("process_fastly_logs_errors.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
log.Println("Failed to open error log file:", err)
os.Exit(1)
}
log.SetOutput(io.MultiWriter(os.Stdout, logFile))
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
}
func main() {
cwd, err := os.Getwd()
if err != nil {
log.Println(err)
log.Println(err)
}
cmd := &cli.Command{
Flags: []cli.Flag{
&cli.StringFlag{
Name: "logDir",
Value: cwd,
Usage: "directory containing logs to process",
},
&cli.StringFlag{
Name: "outDir",
Value: cwd,
Usage: "directory to ouput database and program logs",
},
},
Action: fastlyLogProcessor,
}
cmd := &cli.Command{
Flags: []cli.Flag{
&cli.StringFlag{
Name: "logDir",
Value: cwd,
Usage: "directory containing logs to process",
},
&cli.StringFlag{
Name: "outDir",
Value: cwd,
Usage: "directory to ouput database and program logs",
},
},
Action: fastlyLogProcessor,
}
if err := cmd.Run(context.Background(), os.Args); err != nil {
log.Fatal(err)
}
if err := cmd.Run(context.Background(), os.Args); err != nil {
log.Fatal(err)
}
}
// TODO:
// - Anonymize requests
// - Spit out in format Brian can use
// - Profile? lol
// - More tests
// - Anonymize requests
// - Spit out in format Brian can use
// - Profile? lol
// - More tests
func fastlyLogProcessor(ctx context.Context, cmd *cli.Command) error {
if cmd.NArg() < 1 {
return cli.Exit("Must pass at least one directory to process logs of", 1)
return cli.Exit("Must pass at least one directory to process logs of", 1)
}
month := cmd.Args().Get(0)
logDir := cmd.String("logDir")
outDir := cmd.String("outDir")
// Initialize the database
// Initialize the database
err := db.InitDB(fmt.Sprintf(filepath.Join(outDir, "logs.db")))
if err != nil {
log.Fatal(err)
}
defer db.DB.Close()
if err != nil {
log.Fatal(err)
}
defer db.DB.Close()
// Create logs table if not exists
err = db.CreateLogTable()
if err != nil {
log.Fatal(err)
}
// Create logs table if not exists
err = db.CreateLogTable()
if err != nil {
log.Fatal(err)
}
walkLogDir := fmt.Sprintf(filepath.Join(logDir, month))
// Create a WaitGroup
var wg sync.WaitGroup
// Create a WaitGroup
var wg sync.WaitGroup
// Define a semaphore channel to limit concurrency
maxGoroutines := 10 // Adjust based on your system capabilities
semaphore := make(chan struct{}, maxGoroutines)
// Define a semaphore channel to limit concurrency
maxGoroutines := 10 // Adjust based on your system capabilities
semaphore := make(chan struct{}, maxGoroutines)
// Walk through the directory and process each log file concurrently
err = filepath.Walk(walkLogDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && filepath.Ext(path) == ".log" {
wg.Add(1)
// Acquire a token
semaphore <- struct{}{}
go func(p string) {
defer wg.Done()
defer func() { <-semaphore }() // Release the token
err := processLogFile(p)
if err != nil {
log.Printf("Error processing file %s: %v\n", p, err)
}
}(path)
}
return nil
})
if err != nil {
return err
}
// Walk through the directory and process each log file concurrently
err = filepath.Walk(walkLogDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && filepath.Ext(path) == ".log" {
wg.Add(1)
// Acquire a token
semaphore <- struct{}{}
go func(p string) {
defer wg.Done()
defer func() { <-semaphore }() // Release the token
err := processLogFile(p)
if err != nil {
log.Printf("Error processing file %s: %v\n", p, err)
}
}(path)
}
return nil
})
if err != nil {
return err
}
// Wait for all Goroutines to finish
wg.Wait()
// Wait for all Goroutines to finish
wg.Wait()
return nil
}
func processLogFile(filePath string) error {
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
// Create channels
linesChan := make(chan string, 100)
entriesChan := make(chan *models.LogEntry, 100)
errorsChan := make(chan error, 100)
// Create channels
linesChan := make(chan string, 1000)
entriesChan := make(chan *models.LogEntry, 1000)
errorsChan := make(chan error, 1000)
var wg sync.WaitGroup
var wgParsers sync.WaitGroup
// Start Goroutines to parse lines
numParsers := 5 // Adjust as needed
for i := 0; i < numParsers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for line := range linesChan {
entry, err := parser.ParseLogLine(line)
if err != nil {
errorsChan <- fmt.Errorf("Error parsing line: %v", err)
continue
}
entriesChan <- entry
}
}()
}
// Start Goroutines to parse lines
numParsers := 100 // Adjust as needed
for i := 0; i < numParsers; i++ {
wgParsers.Add(1)
go func() {
defer wgParsers.Done()
for line := range linesChan {
log.Println("parsing line")
entry, err := parser.ParseLogLine(line)
if err != nil {
errorsChan <- fmt.Errorf("Error parsing line: %v", err)
continue
}
entriesChan <- entry
}
}()
}
// Start a Goroutine to save entries to the database
wg.Add(1)
var wgSaver sync.WaitGroup
wgSaver.Add(1)
go func() {
defer wg.Done()
defer wgSaver.Done()
const batchSize = 1000 // Adjust the batch size as needed
var batch []*models.LogEntry
batch = make([]*models.LogEntry, 0, batchSize)
for entry := range entriesChan {
err := entry.Save()
if err != nil {
errorsChan <- fmt.Errorf("Error saving entry: %v", err)
batch = append(batch, entry)
if len(batch) >= batchSize {
if err := saveBatch(batch); err != nil {
errorsChan <- fmt.Errorf("Error saving batch: %v", err)
}
batch = batch[:0] // Reset the batch
}
}
// Save any remaining entries in the batch
if len(batch) > 0 {
if err := saveBatch(batch); err != nil {
errorsChan <- fmt.Errorf("Error saving batch: %v", err)
}
}
}()
// Read lines from the file and send to linesChan
scanner := bufio.NewScanner(file)
for scanner.Scan() {
linesChan <- scanner.Text()
}
close(linesChan)
// Error handling
var errorList []error
var errorMutex sync.Mutex
var wgErrors sync.WaitGroup
// Wait for parsing and saving to finish
wg.Wait()
close(entriesChan)
close(errorsChan)
if len(errorsChan) > 0 {
log.Printf("Encountered errors while processing file %s:\n", filePath)
wgErrors.Add(1)
go func() {
defer wgErrors.Done()
for err := range errorsChan {
log.Println("handling error")
errorMutex.Lock()
errorList = append(errorList, err)
errorMutex.Unlock()
}
}()
// Read lines from the file and send to linesChan
scanner := bufio.NewScanner(file)
for scanner.Scan() {
linesChan <- scanner.Text()
}
close(linesChan) // Close linesChan after all lines have been sent
// Wait for parsing Goroutines to finish
wgParsers.Wait()
// Close entriesChan since no more entries will be sent
close(entriesChan)
// Wait for the saving Goroutine to finish
wgSaver.Wait()
// Close errorsChan after all senders have finished
close(errorsChan)
// Wait for the error handling Goroutine to finish
wgErrors.Wait()
// Log the errors
if len(errorList) > 0 {
log.Printf("Encountered errors while processing file %s:\n", filePath)
for _, err := range errorList {
log.Println(err)
}
}
if err := scanner.Err(); err != nil {
if err := scanner.Err(); err != nil {
return err
}
return nil
}
// saveBatch saves a batch of log entries to the database using a transaction
func saveBatch(entries []*models.LogEntry) error {
tx, err := db.DB.Begin()
if err != nil {
return err
}
stmt, err := tx.Prepare(`
INSERT INTO logs(
priority,
timestamp,
cache_server,
service_id,
client_ip,
request_method,
request_time,
request_url,
protocol,
response_status,
response_body_bytes,
host,
user_agent,
datacenter,
geo_city,
geo_continent_code,
geo_region,
start_time,
elapsed_time_usec,
is_hit,
cache_result
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`)
if err != nil {
tx.Rollback()
return err
}
defer stmt.Close()
for _, entry := range entries {
_, err := stmt.Exec(
entry.Priority,
entry.Timestamp,
entry.CacheServer,
entry.ServiceID,
entry.ClientIP,
entry.RequestMethod,
entry.RequestTime,
entry.RequestURL,
entry.Protocol,
entry.ResponseStatus,
entry.ResponseBodyBytes,
entry.Host,
entry.UserAgent,
entry.Datacenter,
entry.GeoCity,
entry.GeoContinentCode,
entry.GeoRegion,
entry.StartTime,
entry.ElapsedTimeUsec,
entry.IsHit,
entry.CacheResult,
)
if err != nil {
tx.Rollback()
return err
}
}
if err := tx.Commit(); err != nil {
tx.Rollback()
return err
}
return nil

View file

@ -37,6 +37,7 @@ func (entry *LogEntry) Save() error {
service_id,
client_ip,
request_method,
request_time,
request_url,
protocol,
response_status,
@ -51,7 +52,7 @@ func (entry *LogEntry) Save() error {
elapsed_time_usec,
is_hit,
cache_result
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
statement, err := db.DB.Prepare(insertSQL)
if err != nil {
@ -64,6 +65,7 @@ func (entry *LogEntry) Save() error {
entry.ServiceID,
entry.ClientIP,
entry.RequestMethod,
entry.RequestTime,
entry.RequestURL,
entry.Protocol,
entry.ResponseStatus,