#!/usr/bin/perl
#
# check_backuppc: a Nagios plugin to check the status of BackupPC
#
# Tested against BackupPC 3.2.1 and Nagios 3
#   <http://backuppc.sourceforge.net>
#   <http://nagios.org>
#
# AUTHORS
#   Benjamin Renard  <brenard@easter-eggs.com>
#   Jonas Meurer     <jmeurer@inet.de>
#
# Fork from check_backuppc 1.1.0 write by Seneca Cunningham
# <tetragon@users.sourceforge.net>.
#
# 2014.01.21 jmeurer:
# - slightly improve script output
# - add option to output performance data
#
# COPYRIGHT
#   Copyright (C) 2013       Easter-eggs
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

use strict;
no utf8;

# Nagios
#use lib "/usr/lib/nagios/plugins";
use lib qw(/usr/lib/nagios/plugins /usr/lib64/nagios/plugins /usr/local/nagios/libexec);
use utils qw(%ERRORS $TIMEOUT);
use POSIX qw(strftime difftime);
use Getopt::Long;
Getopt::Long::Configure('bundling');
use List::Util qw[max];

# BackupPC
use lib "/usr/share/backuppc/lib";
use BackupPC::Lib;

my $version = '1.1.1';
my $warnDaysOld = 2;
my $critDaysOld = 7;
my $verbose = 0;
my $perfdata = 0;
my $opt_V = 0;
my $opt_h = 0;
my $goodOpt = 0;
my $maxdays = 0;
my @ownerOnly;
my @hostsDesired;
my @hostsExcluded;

# Process options
$goodOpt = GetOptions(
	'v+' => \$verbose, 'verbose+' => \$verbose, 
	'p' => \$perfdata, 'perfdata' => \$perfdata, 
	'c=f' => \$critDaysOld, 'critical=f' => \$critDaysOld,
	'w=f' => \$warnDaysOld, 'warning=f' => \$warnDaysOld,
	'o=s' => \@ownerOnly, 'owner=s' => \@ownerOnly,
	'V' => \$opt_V, 'version' => \$opt_V,
	'h' => \$opt_h, 'help' => \$opt_h,
	'H=s' => \@hostsDesired, 'hostname=s' => \@hostsDesired,
	'x=s' => \@hostsExcluded, 'exclude=s' => \@hostsExcluded);

@hostsDesired = () if $#hostsDesired < 0;
@hostsExcluded = () if $#hostsExcluded < 0;

if ($opt_V)
{
	print "check_backuppc - " . $version . "\n";
	exit $ERRORS{'OK'};
}
if ($opt_h or not $goodOpt)
{
	print "check_backuppc - " . $version . "\n";
	print "A Nagios plugin to check on BackupPC backup status.\n\n";
	print "Options:\n";
	print "  --hostname,-H      only check the specified host\n";
	print "  --exclude,-x       do not check the specified host\n";
	print "  --owner,-o         do only hosts of specified user\n";
	print "  --warning,-w       days old of last good backup to cause a warning\n";
	print "  --critical,-c      days old of last good backup to be critical\n";
	print "  --verbose,-v       increase verbosity\n";
	print "  --perfdata,-p      print perfdata\n";
	print "  --version,-V       display plugin version\n";
	print "  --help,-h          display this message\n\n";
	exit $ERRORS{'OK'} if $goodOpt;
	exit $ERRORS{'UNKNOWN'};
}
if ($warnDaysOld > $critDaysOld)
{
	print("BACKUPPC UNKNOWN - Warning threshold must be <= critical\n");
	exit $ERRORS{'UNKNOWN'};
}

# Connect to BackupPC
my $server;
if (!($server = BackupPC::Lib->new))
{
	print "BACKUPPC CRITICAL - Couldn't connect to BackupPC\n";
	exit $ERRORS{'CRITICAL'};
}
my %Conf = $server->Conf();

$server->ChildInit();

my $err = $server->ServerConnect($Conf{ServerHost}, $Conf{ServerPort});
if ($err)
{
	print("BACKUPPC UNKNOWN - Can't connect to server ($err)\n");
	exit $ERRORS{'UNKNOWN'};
}

my %Status;

# query the BackupPC server for host status
my $status_raw = $server->ServerMesg('status hosts');
my $hosts_infos = $server->HostInfoRead();

# undump the output... BackupPC uses Data::Dumper
eval $status_raw;

# check the dumped output
my $hostCount = 0;
my $errorLevel='OK';

foreach my $host (@hostsDesired, @hostsExcluded)
{
	if (not grep {/$host/} keys(%Status))
	{
		print("BACKUPPC UNKNOWN - Unknown host ($host)\n");
		exit $ERRORS{'UNKNOWN'};
	}
}

my @problems_c;
my @problems_w;

# host status checks
foreach my $host (sort(keys(%Status)))
{
	next if $host =~ /^ /;
	my $owner = $hosts_infos->{$host}->{user};
	next if (@ownerOnly and not grep {/$owner/} @ownerOnly);
	my %host_conf = %{$server->ConfigDataRead($host)};
	next if ( $host_conf{BackupsDisable} );
	next if (@hostsDesired and not grep {/$host/} @hostsDesired);
	next if (@hostsExcluded and grep {/$host/} @hostsExcluded);
	next if ($Status{$host}{'type'} eq 'archive');
	$hostCount++;
	# Debug
	if ($verbose == 2)
	{
		print "Host $host state " . $Status{$host}{'state'} . "\n";
		print "  with reason: " . $Status{$host}{'reason'} . "\n";
		print "  with error: " . $Status{$host}{'error'} . "\n";
		print "  with owner: $owner\n\n";
	}
	# Check host error
	if ($Status{$host}{'error'})
	{
		# Check connectivity errors with greater care
		if ($Status{$host}{'error'} ne 'ping too slow' &&
		    $Status{$host}{'error'} ne 'no ping response' &&
		    $Status{$host}{'error'} ne 'no ping response' &&
		    $Status{$host}{'error'} ne 'host not found' &&
		    $Status{$host}{'reason'} !~ /Reason_restore_failed/) {
			push @problems_c, "$host (error: ".$Status{$host}{'error'}." / ".$Status{$host}{'reason'}.")";
			$errorLevel='CRITICAL';
			next;
		}
	}
	# Check last good backup time
	my $difftime=difftime(time(), $Status{$host}{'lastGoodBackupTime'});
	my $diffdays=$difftime/(3600 * 24);
	$maxdays=max($maxdays,$diffdays) if ($Status{$host}{'lastGoodBackupTime'});
	if (not $Status{$host}{'lastGoodBackupTime'}) {
		push @problems_c, "$host: (no backups)";
		$errorLevel='CRITICAL';
	}
	elsif ($difftime > ($critDaysOld * 3600 * 24))
	{
		push @problems_c, "$host: (backup age ".sprintf("%.1f",$diffdays)." days)";
		$errorLevel='CRITICAL';
	} 
	elsif ($difftime > ($warnDaysOld * 3600 * 24))
	{
		push @problems_w, "$host: (backup age ".sprintf("%.1f",$diffdays)." days)";
		$errorLevel='WARNING' if ($errorLevel eq 'OK');
	}
}

my $problemTxt="";
my $problemTxt_c="";
my $problemTxt_w="";
if (scalar(@problems_c) > 0 or scalar(@problems_w) > 0) {
	if ($verbose > 0) {
		foreach my $pbl (@problems_c) {
			if ($problemTxt_c ne "") {
				$problemTxt_c.=", ";
			}
			else {
				$problemTxt_c="(Critical: ";
			}
			$problemTxt_c.=$pbl;
		}
		$problemTxt_c.=")" if ($problemTxt_c);
		foreach my $pbl (@problems_w) {
			if ($problemTxt_w ne "") {
				$problemTxt_w.=", ";
			}
			else {
				$problemTxt_w="(Warning: ";
			}
			$problemTxt_w.=$pbl;
		}
		$problemTxt_w.=")" if ($problemTxt_w);
		$problemTxt="$problemTxt_c $problemTxt_w";
	}
	else {
		$problemTxt="(";
		if (scalar(@problems_c) > 0) {
			$problemTxt.=scalar(@problems_c)." critical problems";
		}
		if (scalar(@problems_w) > 0) {
			$problemTxt.=", " if (@problems_c);
			$problemTxt.=scalar(@problems_w)." warning problems";
		}
		$problemTxt.=")";
	}
}
else {
	$problemTxt.="(no problems)";
}

my $perfTxt="";
if ($perfdata) {
	$perfTxt.="critHosts=".scalar(@problems_c)." warnHosts=".scalar(@problems_w)." maxAge=".sprintf("%.1f",$maxdays);
}

print "BACKUPPC $errorLevel - $problemTxt\n";
print "| $perfTxt\n" if ($perfdata);
exit $ERRORS{$errorLevel};
