2 # ---------------------------------------------------------------
3 # Copyright (C) 2008-2013 Georgia Public Library Service
4 # Copyright (C) 2013 Equinox Software, Inc
5 # Bill Erickson <berick@esilibrary.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 # ---------------------------------------------------------------
17 use strict; use warnings;
19 use Net::Domain qw/hostfqdn/;
20 use POSIX qw/setsid :sys_wait_h/;
21 use OpenSRF::Utils::Logger q/$logger/;
23 use OpenSRF::Transport::PeerHandle;
24 use OpenSRF::Utils::SettingsClient;
25 use OpenSRF::Transport::Listener;
27 use OpenSRF::Utils::Config;
29 my $opt_service = undef;
30 my $opt_config = "@CONF_DIR@/opensrf_core.xml";
31 my $opt_pid_dir = "@PID_DIR@/run/opensrf";
32 my $opt_no_daemon = 0;
33 my $opt_settings_pause = 0;
34 my $opt_localhost = 0;
36 my $opt_shutdown_graceful = 0;
37 my $opt_shutdown_fast = 0;
38 my $opt_shutdown_immediate = 0;
39 my $opt_shutdown_graceful_all = 0;
40 my $opt_shutdown_fast_all = 0;
41 my $opt_shutdown_immediate_all = 0;
42 my $opt_kill_with_fire = 0;
43 my $opt_signal = ''; # signal name
44 my $opt_signal_all = 0;
45 my $opt_signal_timeout = 30;
49 my $opt_start_all = 0;
51 my $opt_restart_all = 0;
52 my $opt_start_services = 0;
53 my $opt_stop_services = 0;
54 my $opt_restart_services = 0;
55 my $opt_force_clean_process = 0;
56 my $opt_router_de_register = 0;
57 my $opt_router_de_register_all = 0;
58 my $opt_router_re_register = 0;
59 my $opt_router_re_register_all = 0;
61 my $opt_reload_all = 0;
63 my $opt_diagnostic = 0;
67 my $hostname = $ENV{OSRF_HOSTNAME} || hostfqdn();
70 'service=s' => \$opt_service,
71 'config=s' => \$opt_config,
72 'pid-dir=s' => \$opt_pid_dir,
73 'no-daemon' => \$opt_no_daemon,
74 'settings-startup-pause=i' => \$opt_settings_pause,
75 'localhost' => \$opt_localhost,
77 'quiet' => \$opt_quiet,
78 'graceful-shutdown' => \$opt_shutdown_graceful,
79 'fast-shutdown' => \$opt_shutdown_fast,
80 'immediate-shutdown' => \$opt_shutdown_immediate,
81 'graceful-shutdown-all' => \$opt_shutdown_graceful_all,
82 'fast-shutdown-all' => \$opt_shutdown_fast_all,
83 'immediate-shutdown-all' => \$opt_shutdown_immediate_all,
84 'kill-with-fire' => \$opt_kill_with_fire,
85 'force-clean-process' => \$opt_force_clean_process,
86 'signal-timeout' => \$opt_signal_timeout,
87 'signal=s' => \$opt_signal,
88 'signal-all' => \$opt_signal_all,
89 'start' => \$opt_start,
91 'start-all' => \$opt_start_all,
92 'stop-all' => \$opt_stop_all,
93 'restart' => \$opt_restart,
94 'restart-all' => \$opt_restart_all,
95 'start-services' => \$opt_start_services,
96 'stop-services' => \$opt_stop_services,
97 'restart-services' => \$opt_restart_services,
98 'router-de-register' => \$opt_router_de_register,
99 'router-de-register-all' => \$opt_router_de_register_all,
100 'router-re-register' => \$opt_router_re_register,
101 'router-re-register-all' => \$opt_router_re_register_all,
102 'reload' => \$opt_reload,
103 'reload-all' => \$opt_reload_all,
104 'diagnostic' => \$opt_diagnostic
107 if ($opt_localhost) {
108 $hostname = 'localhost';
109 $ENV{OSRF_HOSTNAME} = $hostname;
112 my $C_COMMAND = "opensrf-c -c $opt_config -x opensrf -p $opt_pid_dir -h $hostname";
113 my $PY_COMMAND = "opensrf.py -f $opt_config -p $opt_pid_dir ". ($opt_localhost ? '-l' : '');
115 sub verify_services {
117 return 1 if $service and $service eq 'router';
118 my @services = (@perl_services, map {$_->{service}} @nonperl_services);
120 return 1 unless $service;
121 return 1 if grep { $_ eq $service } @services;
122 msg("$service is not configured to run on $hostname");
124 msg("No services are configured to run on $hostname");
126 msg("Perhaps you meant to use --localhost?") unless $opt_localhost;
134 my @pids = get_service_pids_from_file($service);
137 # no PID files exist. see if the service is running anyway
139 @pids = get_service_pids_from_ps($service);
141 msg("cannot signal $service : no pid file or running process");
146 for my $pid (@pids) {
147 if (kill($signal, $pid) == 0) { # no process was signaled.
148 msg("cannot signal $service: process $pid is not running");
149 my $pidfile = get_pid_file($service);
150 unlink $pidfile if $pidfile;
154 msg("sending $signal signal to pid=$pid $service");
160 # returns 2 if a process should have gone away but did not
161 # in the case of multiple PIDs (e.g. router), return the
162 # status of any failures, but not the successes.
165 my @pids = get_service_pids_from_file($service);
168 for my $pid (@pids) {
170 # to determine whether a process has died, we have to send
171 # a no-op signal to the PID and check the success of that signal
173 for my $i (1..$opt_signal_timeout) {
174 $sig_count = kill(0, $pid);
175 last unless $sig_count;
180 msg("timed out waiting on $service pid=$pid to die");
185 # cleanup successful. remove the PID file
186 my $pidfile = get_pid_file($service);
187 unlink $pidfile if $pidfile;
195 return "$opt_pid_dir/$service.pid";
198 # services usually only have 1 pid, but the router will have at least 2
199 sub get_service_pids_from_file {
201 my $pid_file = get_pid_file($service);
202 return () unless -e $pid_file;
203 my @pids = `cat $pid_file`;
204 s/^\s*|\n//g for @pids;
208 sub get_service_pids_from_ps {
211 my $ps = ($service eq 'router') ?
212 "ps ax | grep 'OpenSRF Router'" :
213 "ps ax | grep 'OpenSRF Listener \\[$service\\]'";
215 $ps .= " | grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1";
217 s/^\s*|\n//g for @pids;
224 my $alive = do_init(1);
226 my @services = get_service_list_from_files(1);
229 @conf_services = (@perl_services,
230 map {$_->{service}} @nonperl_services);
231 push(@services, @conf_services);
236 for my $svc (@services) {
237 $len = length($svc) if length($svc) > $len;
241 for my $svc (sort keys %services) {
242 my @pf_pids = get_service_pids_from_file($svc);
243 my @ps_pids = get_service_pids_from_ps($svc);
244 my $svc_str = sprintf("%-${len}s ", $svc);
247 unless(@ps_pids or @pf_pids) {
248 msg("$svc_str is not running");
252 for my $pid (@ps_pids) {
255 my $str = "$svc_str [$pid] ";
256 my $times = `ps -o etime=,cputime= $pid`;
257 $times =~ s/^\s+|\s+$//g;
258 my @times = split(/ /, $times);
259 $str .= sprintf("uptime=%-11s cputime=%-11s ", $times[0], $times[1]);
261 if ($svc eq 'router') {
264 my @drones = `pgrep -f "Drone \\[$svc\\]"`;
265 $str .= "#drones=".scalar(@drones);
267 msg("\tERR $svc has no running drones.") unless @drones;
270 msg("\tERR $svc [$pid] NOT configured for this host.")
271 unless grep {$_ eq $svc} @conf_services
274 msg("\tERR $svc [$pid] NOT found in PID file.")
275 unless grep {$_ eq $pid} @pf_pids;
278 for my $pid (@pf_pids) {
280 msg("\tERR $svc Has PID file entry [$pid], ".
281 "which matches no running $svc processes");
288 sub do_start_router {
289 `opensrf_router $opt_config routers`;
291 sleep 2; # give the router time to fork
292 my @pids = `ps -C opensrf_router -o pid=`;
293 s/^\s*|\n//g for @pids;
295 my $pidfile = get_pid_file('router');
296 open(PF, '>', $pidfile) or die "Cannot open $pidfile: $!\n";
299 msg("starting service pid=$_ router");
305 # stop a specific service
307 my ($service, @signals) = @_;
308 @signals = qw/TERM INT KILL/ unless @signals;
309 for my $sig (@signals) {
310 last unless do_signal($service, $sig) == 2;
318 OpenSRF::System->bootstrap_client(config_file => $opt_config);
320 if (!OpenSRF::Transport::PeerHandle->retrieve) {
321 return 0 if $fail_ok;
322 die "Unable to bootstrap client for requests\n";
325 load_settings(); # load the settings config if we can
327 my $sclient = OpenSRF::Utils::SettingsClient->new;
328 my $apps = $sclient->config_value("activeapps", "appname");
330 # disconnect the top-level network handle
331 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
334 $apps = [$apps] unless ref $apps;
335 for my $app (@$apps) {
336 if (!$sclient->config_value('apps', $app)) {
337 msg("Service '$app' is listed for this host, ".
338 "but there is no configuration for it in $opt_config");
341 my $lang = $sclient->config_value('apps', $app, 'language') || '';
342 if ($lang =~ /perl/i) {
343 push(@perl_services, $app);
345 push(@nonperl_services, {service => $app, lang => $lang});
352 # start a specific service
356 my @pf_pids = get_service_pids_from_file($service);
357 my @ps_pids = get_service_pids_from_ps($service);
359 if (@pf_pids) { # had pidfile
362 msg("service $service already running : @ps_pids");
365 } else { # stale pidfile
367 my $pidfile = get_pid_file($service);
368 msg("removing stale pid file $pidfile");
372 } elsif (@ps_pids) { # orphan process
374 if ($opt_force_clean_process) {
375 msg("service $service pid=@ps_pids is running with no pidfile");
376 do_signal($service, 'KILL');
378 msg("service $service pid=@ps_pids is running with no pidfile! ".
379 "use --force-clean-process to automatically kill orphan processes");
384 return do_start_router() if $service eq 'router';
386 load_settings() if $service eq 'opensrf.settings';
388 if(grep { $_ eq $service } @perl_services) {
389 return unless do_daemon($service);
390 OpenSRF::System->run_service($service, $opt_pid_dir);
393 # note: we don't daemonize non-perl services, but instead
394 # assume the controller for other languages manages that.
395 my ($svc) = grep { $_->{service} eq $service } @nonperl_services;
397 if ($svc->{lang} =~ /c/i) {
398 system("$C_COMMAND -a start -s $service");
400 } elsif ($svc->{lang} =~ /python/i) {
401 system("$PY_COMMAND -a start -s $service");
407 # should not get here
413 msg("starting router and services for $hostname");
415 return do_start_services();
418 sub do_start_services {
419 msg("starting services for $hostname");
421 if(grep {$_ eq 'opensrf.settings'} @perl_services) {
422 do_start('opensrf.settings');
423 # in batch mode, give opensrf.settings plenty of time to start
424 # before any non-Perl services try to connect
425 sleep $opt_settings_pause if $opt_settings_pause;
428 # start Perl services
429 for my $service (@perl_services) {
430 do_start($service) unless $service eq 'opensrf.settings';
433 # start each non-perl service individually instead of using the native
434 # start-all command. this allows us to test for existing pid files
435 # and/or running processes on each service before starting.
436 # it also means each service has to connect-fetch_setting-disconnect
437 # from jabber, which makes startup slightly slower than native start-all
438 do_start($_->{service}) for @nonperl_services;
443 # signal a single service
447 return do_signal_all($signal, $service);
450 # returns the list of running services based on presence of PID files.
451 # the 'router' service is not included by deault, since it's
452 # usually treated special.
453 sub get_service_list_from_files {
454 my $include_router = shift;
455 my @services = `ls $opt_pid_dir/*.pid 2> /dev/null`;
456 s/^\s*|\n//g for @services;
457 s|.*/(.*)\.pid$|$1| for @services;
458 return @services if $include_router;
459 return grep { $_ ne 'router' } @services;
463 my ($signal, @services) = @_;
464 @services = get_service_list_from_files() unless @services;
466 do_signal_send($_, $signal) for @services;
468 # if user passed a know non-shutdown signal, we're done.
469 return if $signal =~ /HUP|USR1|USR2/;
471 do_signal_wait($_) for @services;
474 # pull all opensrf listener and drone PIDs from 'ps',
475 # kill them all, and remove all pid files
476 sub do_kill_with_fire {
477 msg("killing with fire");
479 my @pids = get_running_pids();
481 next unless $_ =~ /\d+/;
482 my $proc = `ps -p $_ -o cmd=`;
484 msg("killing with fire pid=$_ $proc");
488 # remove all of the pid files
489 my @files = `ls $opt_pid_dir/*.pid 2> /dev/null`;
490 s/^\s*|\n//g for @files;
492 msg("removing pid file $_");
497 sub get_running_pids {
500 # start with the listeners, then drones, then routers
502 "ps ax | grep 'OpenSRF Listener' ",
503 "ps ax | grep 'OpenSRF Drone' ",
504 "ps ax | grep 'OpenSRF Router' "
507 $_ .= "| grep -v grep | sed 's/^\\s*//' | cut -d' ' -f1" for @greps;
509 for my $grep (@greps) {
511 s/^\s*|\n//g for @spids;
512 push (@pids, @spids);
518 sub clear_stale_pids {
519 my @pidfile_services = get_service_list_from_files(1);
520 my @running_pids = get_running_pids();
522 for my $svc (@pidfile_services) {
523 my @pids = get_service_pids_from_file($svc);
524 for my $pid (@pids) {
525 next if grep { $_ eq $pid } @running_pids;
526 my $pidfile = get_pid_file($svc);
527 msg("removing stale pid file $pidfile");
533 sub do_stop_services {
535 @signals = qw/TERM INT KILL/ unless @signals;
537 msg("stopping services for $hostname");
538 my @services = get_service_list_from_files();
540 for my $signal (@signals) {
543 # send the signal to all PIDs
544 do_signal_send($_, $signal) for @services;
546 # then wait for them to go away
547 for my $service (@services) {
548 push(@redo, $service) if do_signal_wait($service) == 2;
552 last unless @services;
560 @signals = qw/TERM INT KILL/ unless @signals;
562 do_stop_services(@signals);
564 # graceful shutdown requires the presence of the router, so stop the
565 # router last. See if it's running first to avoid unnecessary warnings.
566 do_stop('router', $signals[0]) if get_service_pids_from_file('router');
571 # daemonize us. return true if we're the child, false if parent
573 return 1 if $opt_no_daemon;
575 my $pid_file = get_pid_file($service);
576 my $pid = OpenSRF::Utils::safe_fork();
578 msg("starting service pid=$pid $service");
586 open STDIN, '</dev/null';
587 open STDOUT, '>/dev/null';
588 open STDERR, '>/dev/null';
589 `echo $$ > $pid_file`;
593 # parses the local settings file
595 my $conf = OpenSRF::Utils::Config->current;
596 my $cfile = $conf->bootstrap->settings_config;
597 return unless $cfile;
598 my $parser = OpenSRF::Utils::SettingsParser->new();
599 $parser->initialize( $cfile );
600 $OpenSRF::Utils::SettingsClient::host_config =
601 $parser->get_server_config($conf->env->hostname);
606 print "* $m\n" unless $opt_quiet;
612 Usage: $0 --localhost --start-all
614 --config <file> [default: @CONF_DIR@/opensrf_core.xml]
615 OpenSRF configuration file
617 --pid-dir <dir> [default: @PID_DIR@/run/opensrf]
618 Directory where process-specific PID files are kept
620 --settings-startup-pause
621 How long to give the opensrf.settings server to start up when running
622 in batch mode (start_all). The purpose is to give plenty of time for
623 the settings server to be up and active before any non-Perl services
627 Force the hostname to be 'localhost', instead of the fully qualified
628 domain name for the machine.
631 Specifies which OpenSRF service to control
634 Do not print informational messages to STDOUT
637 Do not detach and run as a daemon process. Useful for debugging.
638 Only works for Perl services and only when starting a single service.
641 Print this help message
644 Print information about running services
646 ==== starting services =====
649 Start the router and all services
652 Start the service specified by --service
655 Start all services but do not start any routers
658 Restart the router and all services
661 Restart the service specified by --service
664 Restart all services but do not restart any routers
666 --force-clean-process
667 When starting a service, if a service process is already running
668 but no pidfile exists, kill the service process before starting
671 ==== stopping services =====
674 Stop the router and all services. Services are sent the TERM signal,
675 followed by the INT signal, followed by the KILL signal. With each
676 iteration, the script pauses up to --signal-timeout seconds waiting
677 for each process to die before sending the next signal.
680 Stop the service specified by --service. See also --stop-all.
681 If the requested service does not have a matching PID file, an
682 attempt to locate the PID via 'ps' will be made.
685 Stop all services but do not stop any routers. See also --stop-all.
687 --graceful-shutdown-all
688 Send TERM signal to all services + router
691 Send TERM signal to the service specified by --service
694 Send INT signal to all services + router
697 Send INT signal to the service specified by --service
699 --immediate-shutdown-all
700 Send KILL signal to all services + router
703 Send KILL signal to the service specified by --service
706 Send KILL signal to all running services + routers, regardless of
707 the presence of a PID file, and remove all PID files indiscriminately.
709 ==== signaling services =====
712 Send signal to all services
715 Name of signal to send. If --signal-all is not specified, the
716 signal will be sent to the service specified by --service.
719 Seconds to wait for a process to die after sending a shutdown signal.
720 All signals except HUP, USR1, and USR2 are assumed to be shutdown signals.
722 ==== special signals ====
725 --router-de-register-all
726 Sends a SIGUSR1 signal to the selected service(s), which causes each
727 service's listener process to send an "unregister" command to all
728 registered routers. The --all variant sends the signal to all
729 running listeners. The non-(--all) variant requires a --service.
732 --router-re-register-all
733 Sends a SIGUSR2 signal to the selected service(s), which causes each
734 service's listener process to send a "register" command to all
735 configured routers. The --all variant sends the signal to all
736 running listeners. The non-(--all) variant requires a --service.
740 Sends a SIGHUP signal to the selected service(s). SIGHUP causes
741 each listener process to reload its opensrf_core.xml config file
742 and gracefully re-launch drone processes. The -all variant sends
743 the signal to all services. The non-(-all) variant requires a
749 # we do not verify services for stop/signal actions, since those may
750 # legitimately be used against services not (or no longer) configured
751 # to run on the selected host.
752 do_init() and verify_services($opt_service) if
755 $opt_start_services or
758 $opt_restart_services) and $opt_service ne 'router';
760 # starting services. do_init() handled above
761 do_start($opt_service) if $opt_start;
762 do_stop($opt_service) and do_start($opt_service) if $opt_restart;
763 do_start_all() if $opt_start_all;
764 do_start_services() if $opt_start_services;
765 do_stop_all() and do_start_all() if $opt_restart_all;
766 do_stop_services() and do_start_services() if $opt_restart_services;
769 do_stop($opt_service) if $opt_stop;
770 do_stop_all() if $opt_stop_all;
771 do_stop_services() if $opt_stop_services;
772 do_stop($opt_service, 'TERM') if $opt_shutdown_graceful;
773 do_stop($opt_service, 'INT') if $opt_shutdown_fast;
774 do_stop($opt_service, 'KILL') if $opt_shutdown_immediate;
775 do_stop_all('TERM') if $opt_shutdown_graceful_all;
776 do_stop_all('INT') if $opt_shutdown_fast_all;
777 do_stop_all('KILL') if $opt_shutdown_immediate_all;
778 do_kill_with_fire() if $opt_kill_with_fire;
781 $opt_signal = 'USR1' if $opt_router_de_register or $opt_router_de_register_all;
782 $opt_signal = 'USR2' if $opt_router_re_register or $opt_router_re_register_all;
783 $opt_signal = 'HUP' if $opt_reload or $opt_reload_all;
785 do_signal($opt_service, $opt_signal) if $opt_signal and $opt_service;
786 do_signal_all($opt_signal) if
789 $opt_router_de_register_all or
790 $opt_router_re_register_all;
793 do_diagnostic() if $opt_diagnostic;
796 # show help if no action was requested
797 do_help() if $opt_help or not (
800 $opt_start_services or
803 $opt_stop_services or
806 $opt_restart_services or
809 $opt_shutdown_graceful or
810 $opt_shutdown_graceful_all or
811 $opt_shutdown_fast or
812 $opt_shutdown_fast_all or
813 $opt_shutdown_immediate or
814 $opt_shutdown_immediate_all or
815 $opt_kill_with_fire or