#!/usr/bin/perl
#
# log-grep-recent - select only recent messages from a mass-check log
#                   [optionally only select messages with set=x]


                                                        sub usage { die "

usage: log-grep-recent --months num_months [--set x] < full.log > recent.log

                                                        \n"; }

# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

use strict;
use warnings;

my $months = 0;
my $set;
use Getopt::Long qw(:config bundling auto_help);
GetOptions(
           "m|months=f" => \$months,
           "s|set=f"    => \$set
         ) or usage();

usage() unless ($months > 0);

my $roughly1month = (24 * 60 * 60 * 30);
my $now = time;
my $cutoff = $now - (($roughly1month * $months) + ($roughly1month - 1));

my %buckets = ();

while (<>) {
  if (/^[Y\.]\s+-?\d+.*?\btime=(\d+)\b.*?\bset=(\d)\b/) {
    my $t = $1;
    my $s = $2;

    if (defined $set && $set != $s) {
      next;
    }

    my $monthsago = int (($now - $t) / $roughly1month);
    $buckets{$monthsago}++;

    if ($t >= $cutoff) {
      print;
    } else {
      next;
    }
  }
  elsif (/^#/) {
    next;           # comments
  }
  else {
    warn "unparseable line: $_";
  }
}

my $numtotal = 0;
foreach my $m (keys %buckets) {
  $numtotal += $buckets{$m};
}

warn "Month distribution:\n\n";
my $cumul = 0;
foreach my $m (sort { $a <=> $b } keys %buckets) {
  $cumul += $buckets{$m};
  warn sprintf ("%8d (%3d%%)  %8d  %2d-%d months old\n",
            $cumul, int(($cumul / ($numtotal||0.001)) * 100),
            $buckets{$m}, $m, $m+1);
}

