--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Getopt::Long;
+use OpenSRF::System;
+use OpenSRF::AppSession;
+use OpenSRF::EX qw(:try);
+
+# Sane-ish default
+my $opt_osrf_config = '/openils/conf/opensrf_core.xml';
+
+# For storing the list of supposedly active services
+my @services;
+# For storing our list of routers to check
+my @routers;
+
+GetOptions(
+ 'osrf-config=s' => \$opt_osrf_config,
+);
+
+# If we can't bootstrap then something is horribly wrong!
+# Probably "ejabberd isn't running"
+try {
+ OpenSRF::System->bootstrap_client(config_file => $opt_osrf_config);
+} otherwise {
+ print "Bootstrap failed\n";
+ exit 2;
+};
+
+# This gets the list of supposedly active services
+sub prep_service_list {
+ # Using settings directly, as I don't know how to ask with pre-existing classes
+ my $session = OpenSRF::AppSession->create('opensrf.settings');
+ try {
+ $session->connect;
+ } otherwise {
+ print "Settings Connect Failed\n";
+ exit 2;
+ };
+ # This xpath is "Find every instace of an appname node under an activeapps node, anywhere"
+ # It should grab every app configured to run on any drone
+ # If your config contains apps that are not run on real drones you will get errors ;)
+ my $req = $session->request('opensrf.settings.xpath.get', '//activeapps/appname');
+ my $list = $req->recv;
+
+ if(UNIVERSAL::isa($list,"Error")) {
+ print "Active Apps List Failed\n";
+ exit 2;
+ }
+
+ $req->finish;
+ # Quick and dirty de-dupe
+ my %u_list = map { ($_ => 1) } @{$list->content};
+ # And save for later
+ @services = keys(%u_list);
+
+ $session->finish;
+ $session->disconnect;
+}
+
+# This gets the list of supposedly active routers
+# This relies on the bootstrap being accurate in that regard
+sub prep_routers_list {
+ # First, we grab our (hopefully) cached config
+ my $config = OpenSRF::Utils::Config->current;
+ # Loop over it quick
+ foreach(@{$config->bootstrap->routers}) {
+ # And make entries for each router
+ my $router = {};
+ $router->{name} = $_->{name};
+ $router->{domain} = $_->{domain};
+ # If we don't have a services list assume all active ones (aka, private router)
+ $router->{services} = \@services unless $_->{services};
+ # Otherwise, make note of what we are supposed to be running (aka, public router)
+ $router->{services} = $_->{services}->{service} if $_->{services};
+ # And tack it onto the list
+ push @routers, $router;
+ }
+}
+
+# This does the actual checking of routers/services
+sub check_routers {
+ # Shortcut
+ my $conf = OpenSRF::Utils::Config->current;
+ foreach my $router (@routers) {
+ # HACK WARNING - This changes the router we will be querying
+ # This basically edits the cached bootstrap file. This is not guaranteed to keep working.
+ # This does NOT change what domain we are querying from
+ $conf->bootstrap->router_name($router->{name});
+ $conf->bootstrap->domain($router->{domain});
+ # Assume things failed unless they didn't.
+ my $failed = 1;
+ # First, check the router to see what it claims to have active services-wise
+ my $session = OpenSRF::AppSession->create('router');
+ try {
+ $failed = 0 if $session->connect;
+ } otherwise {
+ $failed = 1;
+ };
+ if($session->state != $session->CONNECTED || $failed) {
+ $router->{online} = 0;
+ next;
+ }
+ # Yay router commands! This should give us all services with at least one listener
+ my $req = $session->request('opensrf.router.info.class.list');
+ my $class_list = $req->recv;
+ $req->finish;
+
+ if(UNIVERSAL::isa($class_list,"Error")) {
+ $session->finish;
+ $session->disconnect;
+ $router->{online} = 0;
+ next;
+ }
+
+ # If we got an answer then this router is online!
+ $router->{online} = 1;
+ # Counters and storage for services checks
+ $router->{checked} = 0;
+ $router->{pass} = 0;
+ $router->{failed} = [];
+ # Quick reference of what the router told us it has
+ my %online_services = map { ($_ => 1) } @{$class_list->content};
+ foreach my $service (@{$router->{services}}) {
+ # This skips services not in the active list. Mainly for routers with explicit lists (aka, public routers) that not all may be configured to run.
+ next unless grep { $service eq $_ } @services;
+ # Assume we did not pass until proven otherwise
+ my $passed = 0;
+ $router->{checked} += 1;
+ if($online_services{$service}) {
+ # Check the service, even if a listener is registered it may be dead
+ my $session2 = OpenSRF::AppSession->create($service);
+ try {
+ $session2->connect;
+ };
+ if($session2->state == $session2->CONNECTED) {
+ # To my knowledge, EVERY service should have atomic echo available
+ my $req2 = $session2->request('opensrf.system.echo.atomic','Test');
+ my $testresult = $req2->recv;
+ if(!UNIVERSAL::isa($testresult,"Error")) {
+ # If we got back what we passed in the service is working! Ish. Not a flawless test.
+ $passed = 1 if @{$testresult->content}[0] eq 'Test';
+ }
+ $req2->finish;
+ $session2->finish;
+ $session2->disconnect;
+ }
+ }
+ if($passed) {
+ # Looks like it works, make note!
+ $router->{pass} += 1;
+ } else {
+ # Doesn't work! Save for later reporting.
+ push @{$router->{failed}}, $service;
+ }
+ }
+ $session->finish;
+ $session->disconnect;
+ }
+}
+
+# This outputs the result for Nagios
+sub output_result {
+ # Counters/storage
+ my $checked_services = 0;
+ my $up_services = 0;
+ my @down_services;
+ my @down_routers;
+ # Assume all is good until proven otherwise
+ my $retcode = 0;
+ foreach my $router (@routers) {
+ # If the router isn't online then we don't need to look at services - We didn't check any!
+ if(!$router->{online}) {
+ push @down_routers, $router->{domain};
+ next;
+ }
+ # Otherwise increment our counters as needed
+ $checked_services += $router->{checked};
+ $up_services += $router->{pass};
+ foreach (@{$router->{failed}}) {
+ # Keep track of any down services for reporting in a minute
+ push @down_services, $router->{domain} . ':' . $_;
+ }
+ }
+ if(@down_routers) {
+ # Down routers are really bad. Chances are there will only ever be one here (public), but join with commas anyway.
+ print "Router(s) Offline: " . join(', ', @down_routers) . "\n";
+ $retcode = 2;
+ } elsif ($checked_services != $up_services) {
+ # Non-responsive services are also really bad
+ print "Service(s) not responding\n";
+ $retcode = 2;
+ } else {
+ # But if we have nothing then things are good!
+ print "Routers/Services OK\n";
+ }
+ # If there are down services then spit them out as additional information.
+ print "$_\n" foreach (@down_services);
+ # And return our response code
+ exit $retcode;
+}
+
+# CHEAT - We need SettingsClient to have cached stuff
+try {
+ OpenSRF::Utils::SettingsClient->new()->config_value('none');
+} otherwise {
+ print "Settings Fetch Failed\n";
+ exit 2;
+};
+# And run all of the above functions
+prep_service_list();
+prep_routers_list();
+check_routers();
+output_result();
+
+# This code should NEVER run, as the only way out of output_result is an exit statement
+print "What? I shouldn't have reached here.";
+exit 3;