#!/usr/local/bin/perl # # NAME # ntservice.monitor - monitor NT service status with the Empire # SystemEdge SNMP agent # # # SYNOPSIS # ntservice.monitor [-vn] [-c community ] [-t timeout ] # [-s "service string"] -u {automatic|manual|disabled} host... # # # ARGUMENTS # -v Runs in verbose mode, shows all SNMP output collected. # Unsuitable for presentation to Mon but possibly useful # for development/debugging. # # -c SNMP community string. Default is "public". # # -t SNMP timeout, in seconds. # # -n Negate option. Instead of testing if the service is running, # instead test to make sure that the service is NOT running and/or # not installed. # # -s Service string name. Case insensitive name of service to look for. # This is the string that appears in the "Service" column of # the WinNT "Services" control panel. # # -u Service startup type. Will be either "manual", "automatic", or # "disabled." This is the string that appears in the "Startup" column # of the WinNT "Services" control panel. If this option is not set, # the service startup type is ignored. # # host... # Space separated list of hosts to monitor. # # # # EXAMPLE # Check the MS SQL Server service on the hosts "ntdb1", "ntdb2" `. # Report an error if the service is not running or if its restart # status is not set to "automatic". # # ntservice.monitor -c secret -S "MSSQLService" -u automatic ntdb1 ntdb2 # # # DESCRIPTION # ntservice.monitor monitors WinNT service status via the Empire # SystemEdge SNMP agent. It is designed to be used as a monitor # for the Mon package. # # As such if any monitoring condition is not met, the script will report # a non-zero error code and output 2+ lines of text. The first line of # text will be a space-separated list of hosts which are in error, # second and subsequent lines are detail output about exactly what # went wrong (one error per line). # # This script relies on several things: # 1) You must have the Empire SNMP set up and running on every machine that # you want this script to run, and have that agent configured to # be monitoring NT processes. # 2) You must have the Empire MIB placed in your mibs directory # for your UCD implementation (by default this directory is # /usr/local/share/snmp/mibs/). The dedicated could rewrite with # numeric OID's, but far easier just to get the MIB, it is included # with every package of Empire SystemEdge. # # # EXIT STATUS # 0 The command completed successfully. # 1 At least one hard failure (e.g. service is definitely in error) # was detected. # 2 At least one soft failure (e.g., timeout, OID not found) was detected. # # # SEE ALSO # http://www.kernel.org/pub/software/admin/mon/html/ # http://www.empire.com/ # # # AUTHORS # Andrew Ryan # $Id: diskspace.monitor,v 1.16 2000/02/17 21:06:14 andrewr Exp $ # # use strict; use SNMP; use Getopt::Std; use vars qw ($opt_v $opt_n $opt_c $opt_t $opt_s $opt_u); getopts('vnc:t:s:u:'); my $community = $opt_c || 'public'; #default SNMP community string my $timeout = $opt_t * 1000 * 1000 || 5000000; #default timeout is 5 secs. my $service_string = $opt_s ; #default is null my $service_startup_type_string = $opt_u ; #default is null if ( $opt_u && !( ($service_startup_type_string =~ /^automatic$/i) || ($service_startup_type_string =~ /^manual$/i) || ($service_startup_type_string =~ /^disabled$/i) ) ) { print "\n$0: Usage error. Invalid service startup type \"$service_startup_type_string\"\n"; print "Service startup type string must be either automatic, manual, or disabled"; exit 1; } my %service_startups = (1 => "automatic", 2 => "manual", 3 => "disabled", ); if ($opt_u) { # Set service_startups to its proper numeric value. # (this translation would be better done by the MIB, but we won't # rely on having that around) foreach (keys %service_startups) { $service_startup_type_string = $_ if $service_startups{$_} eq $service_startup_type_string ; } } my $exit_status = 0; # Default exit status is 0 #$ENV{"MIBS"} = 'ALL'; $ENV{"MIBS"} = ''; $SNMP::use_long_names = 1; $SNMP::use_sprint_value = 1; # This is the textual representation of the OID we are looking for # (.iso(1).org(3).dod(6).internet(1).private(4).enterprises(1).empire(546).nt(5).ntServices(4).ntServiceTable(1)) my $ntservice_string = ".iso.org.dod.internet.private.enterprises.empire.nt.ntServices.ntServiceTable"; # This is the string we test on to make sure that we are still in the table # while we are doing our walk. my $test_string = "ntServiceTable"; my $ntServiceName = "ntServiceName"; my $ntServiceState = "ntServiceState"; my $ntServiceStartType = "ntServiceStartType"; #my $ntservice_name_var = new SNMP::Varbind([".1.3.6.1.4.1.546.5.4.1.1.2",1]); #my $ntservice_startup_var = new SNMP::Varbind([".1.3.6.1.4.1.546.5.4.1.1.4",1]); #my $ntservice_status_var = new SNMP::Varbind([".1.3.6.1.4.1.546.5.4.1.1.6",1]); my $ntservice_name_var = ".1.3.6.1.4.1.546.5.4.1.1.2"; my $ntservice_startup_var = ".1.3.6.1.4.1.546.5.4.1.1.4"; my $ntservice_status_var = ".1.3.6.1.4.1.546.5.4.1.1.6"; #my $service_running_string = $opt_n ? "running" : "notRunning"; my $service_running_string = $opt_n ? "1" : "2"; my $ntservice_var ; my ($host, $session, $got_data, $found_service, @failures, @details, $index, $tag, %ntservices, $ntServiceEntryType, $val, @oidname, $exit_var, $service_descr, $service_status, $service_startup); foreach $host (@ARGV) { undef $session; undef %ntservices; undef $exit_var; # It is important that the $ntservice_var variable be redefined within each # iteration of the loop, otherwise multihosts don't work #$ntservice_var = new SNMP::Varbind([$ntservice_string, 1]); #$ntservice_var = new SNMP::Varbind(["$ntservice_string.$ntServiceName", 0]); $ntservice_var = new SNMP::Varbind(["$ntservice_name_var",1]); # $test_string = ".1.3.6.1.4.1.546.5.4.1.1.2"; # DEBUG $test_string = $ntservice_name_var; # DEBUG # $ntservice_var = new SNMP::Varbind(["$ntservice_string.$ntServiceName",1]); #this crashes things badly #$ntservice_var = new SNMP::Varbind(["$ntservice_string.$ntServiceName",1]); $got_data = 0; # this boolean is set to 1 if we get SNMP data back $found_service = 0; #this boolean is set to 1 if we find info about the requested service print "performing query on $host\n" if $opt_v; $session = new SNMP::Session(DestHost => $host, Timeout => $timeout, RetryNoSuch => 1, Retries => 3, Community => $community, ); print "SNMP timeout for host $host is $timeout\n" if $opt_v; do { $val = $session->getnext($ntservice_var); #print "$ntservice_var->[$SNMP::Varbind::tag_f].$ntservice_var->[$SNMP::Varbind::iid_f] = ", #"$ntservice_var->[$SNMP::Varbind::val_f]\n"; #DEBUG $got_data = 1; @oidname = split(/\./, $ntservice_var->[$SNMP::Varbind::tag_f]); $tag = $ntservice_var->[$SNMP::Varbind::iid_f]; $ntServiceEntryType = $oidname[-1]; $ntservices{$ntServiceEntryType} = $val ; # This contains the english name of the service, e.g. "Net Logon" $found_service = 1 if ($val =~ m/\"$service_string\"/i); #print "tag is $tag, val is $val, ntServiceEntryType is $ntServiceEntryType\n"; } until ( ($ntservice_var->[$SNMP::Varbind::tag_f] !~ /$test_string/) || ($session->{ErrorStr}) || ($found_service == 1) ) ; # Now do the specific gets on the svc we're interested in if ($found_service == 1 ) { # this is the service we are interested at looking at! $service_descr = $val; $service_status = $session->get([["$ntservice_status_var.$ntServiceEntryType"]]); print "index $ntServiceEntryType ($service_descr) has status $service_status\n" if $opt_v; $service_startup = $session->get([["$ntservice_startup_var.$ntServiceEntryType"]]); print "index $ntServiceEntryType ($service_descr) has startup param $service_startup ($service_startups{$service_startup})\n" if $opt_v; if ( !($opt_n) && ($service_status eq $service_running_string) ) { #service is not running and it should be push(@failures, $host); push (@details,"$host: service $service_descr is not running (should be)"); $exit_status = 1; } elsif ( ($opt_n) && ($service_status eq $service_running_string) ) { #service is running and it shouldn't be push(@failures, $host); push (@details,"$host: service $service_descr is running (shouldn't be)"); $exit_status = 1; } if ( ($opt_u) && ($service_startup != $service_startup_type_string) ) { #service has the wrong start type push(@failures, $host); push (@details,"$host: service $service_descr startType is $service_startups{$service_startup}, should be $service_startups{$service_startup_type_string}"); $exit_status = 1; } } # After checking the MIB, we have the following possible cases: # 1) error in SNMP session (special case for Timeout) # 2) Service isn't running, should be # 3) Service is running, shouldn't be # 4) Service has wrong startup parameters # 5) Service not found in table (not installed?) and should be # 6) OID not found in SNMP request (host responded to SNMP request but # doesn't answer to this OID. Perhaps agent is not installed/licensed? # 7) Unknown error getting SNMP session (we never got any data for the host) if ( (defined($session->{ErrorStr})) && ($session->{ErrorStr} eq "Timeout" ) ) { #timeout error push(@failures,"$host"); push(@details,"$host: $session->{ErrorStr}"); $exit_status = $exit_status == 1 ? 1 : 2; } elsif ( $session->{ErrorNum} ) { # Some other kind of SNMP error push(@failures,"$host"); push(@details,"$host: '$session->{ErrorStr}'"); $exit_status = 1; } elsif ( $got_data == 1 ) { if ( !($opt_n) && ($found_service == 0) ) { # The service we asked about doesn't appear to be installed on this machine push(@failures, $host); push (@details,"$host: A service matching string \"$service_string\" not found in table (service not installed?)"); $exit_status = 1; } } elsif ($got_data == 0) { #The OID was not found in the table # ucd-snmpd 4.0.1 reports this behavior when an OID is not found in table push(@failures, "$host"); push(@details, "$host: OID for exit value is null (OID doesn't exist?)"); $exit_status = $exit_status == 1 ? 1 : 2; } else { push(@failures,"$host"); push(@details, "$host: unknown error trying to retrieve SNMP data ($got_data)"); $exit_status = $exit_status == 1 ? 1 : 2; } } # Uniq the array of failures, so multiple failures on a single host # are reported in the details section (lines #2-infinity) but not # in the summary (line #1). # Then print out the failures, if any. my %saw; undef %saw; @saw{@failures} = (); @failures = keys %saw; print "failures: " if $opt_v; print join(", ", @failures); print "\n" if scalar(@failures) > 0 ; print "\ndetails: " if $opt_v; print join("\n", @details); exit $exit_status if scalar(@failures) > 0 ; exit 0;