# Chapter 9: Log File Analysis and Monitoring > "The difference between a good sysadmin and a great one? The great one automated their log analysis before the outage." - Anonymous Logs are the heartbeat of your systems. They tell you what happened, when it happened, and sometimes even why. But with modern systems generating gigabytes of logs daily, manual analysis is impossible. This chapter shows you how to build log analysis tools that find needles in haystacks, detect anomalies, and alert you before things go wrong. ## Understanding Log Formats ### Common Log Formats ```perl #!/usr/bin/env perl use Modern::Perl '2018'; use feature 'signatures'; no warnings 'experimental::signatures'; # Apache Combined Log Format sub parse_apache_log($line) { my $regex = qr/ ^(\S+)\s+ # IP address (\S+)\s+ # Identity (\S+)\s+ # User \[([^\]]+)\]\s+ # Timestamp "([^"]+)"\s+ # Request (\d{3})\s+ # Status code (\d+|-)\s* # Size "([^"]*)"\s* # Referrer "([^"]*)" # User agent /x; if ($line =~ $regex) { my ($method, $path, $protocol) = split /\s+/, $5; return { ip => $1, identity => $2 eq '-' ? undef : $2, user => $3 eq '-' ? undef : $3, timestamp => $4, method => $method, path => $path, protocol => $protocol, status => $6, size => $7 eq '-' ? 0 : $7, referrer => $8 eq '-' ? undef : $8, user_agent => $9, }; } return undef; } # Syslog Format sub parse_syslog($line) { my $regex = qr/ ^(\w+\s+\d+\s+\d{2}:\d{2}:\d{2})\s+ # Timestamp (\S+)\s+ # Hostname ([^:\[]+) # Program (?:\[(\d+)\])? # PID (optional) :\s* # Separator (.+)$ # Message /x; if ($line =~ $regex) { return { timestamp => $1, hostname => $2, program => $3, pid => $4, message => $5, }; } return undef; } # JSON Logs (structured logging) use JSON::XS; sub parse_json_log($line) { my $json = JSON::XS->new->utf8; eval { return $json->decode($line); }; if ($@) { warn "Failed to parse JSON log: $@"; return undef; } } # Custom Application Logs sub parse_custom_log($line, $pattern) { if ($line =~ $pattern) { return { %+ }; # Return named captures } return undef; } ``` ## Real-Time Log Monitoring ### Tail Follow Implementation ```perl use File::Tail; use IO::Select; # Monitor single file sub tail_file { my ($filename, $callback) = @_; my $file = File::Tail->new( name => $filename, interval => 1, maxinterval => 5, adjustafter => 10, resetafter => 30, tail => 0, # Start from end of file ); while (defined(my $line = $file->read)) { chomp $line; $callback->($line); } } # Monitor multiple files sub tail_multiple { my ($files, $callback) = @_; my @tailfiles = map { File::Tail->new( name => $_, interval => 1, tail => 0, ) } @$files; while (1) { my ($nfound, $timeleft, @pending) = File::Tail::select(undef, undef, undef, 10, @tailfiles); foreach my $file (@pending) { my $line = $file->read; chomp $line; $callback->($file->{name}, $line); } } } # Real-time log processor sub monitor_logs { my ($logfile) = @_; tail_file($logfile, sub { my ($line) = @_; # Parse log line my $entry = parse_apache_log($line); return unless $entry; # Check for errors if ($entry->{status} >= 500) { alert("Server error: $entry->{path} returned $entry->{status}"); } # Check for attacks if ($entry->{path} =~ /\.\.[\/\\]|