1 # ----------------------------------------------------------------
2 # Copyright (C) 2010 Equinox Software, Inc.
3 # Bill Erickson <erickson@esilibrary.com>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 # ----------------------------------------------------------------
15 package OpenSRF::Server;
18 use OpenSRF::Transport;
19 use OpenSRF::Application;
20 use OpenSRF::Utils::Config;
21 use OpenSRF::Transport::PeerHandle;
22 use OpenSRF::Utils::SettingsClient;
23 use OpenSRF::Utils::Logger qw($logger);
24 use OpenSRF::Transport::SlimJabber::Client;
26 use POSIX qw/:sys_wait_h :errno_h/;
27 use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
28 use Time::HiRes qw/usleep/;
31 our $chatty = 1; # disable for production
33 use constant STATUS_PIPE_DATA_SIZE => 12;
34 use constant WRITE_PIPE_DATA_SIZE => 12;
37 my($class, $service, %args) = @_;
38 my $self = bless(\%args, $class);
40 $self->{service} = $service; # service name
41 $self->{num_children} = 0; # number of child processes
42 $self->{osrf_handle} = undef; # xmpp handle
43 $self->{routers} = []; # list of registered routers
44 $self->{active_list} = []; # list of active children
45 $self->{idle_list} = []; # list of idle children
46 $self->{sighup_pending} = [];
47 $self->{pid_map} = {}; # map of child pid to child for cleaner access
48 $self->{sig_pipe} = 0; # true if last syswrite failed
50 $self->{stderr_log} = $self->{stderr_log_path} . "/${service}_stderr.log"
51 if $self->{stderr_log_path};
53 $self->{min_spare_children} ||= 0;
54 $self->{max_backlog_queue} ||= 1000;
56 $self->{max_spare_children} = $self->{min_spare_children} + 1 if
57 $self->{max_spare_children} and
58 $self->{max_spare_children} <= $self->{min_spare_children};
63 # ----------------------------------------------------------------
64 # Disconnects from routers and waits for child processes to exit.
65 # ----------------------------------------------------------------
71 $logger->info("server: shutting down and cleaning up...");
74 $self->unregister_routers;
77 # graceful shutdown waits for all active
78 # children to complete their in-process tasks.
80 while (@{$self->{active_list}}) {
81 $logger->info("server: graceful shutdown with ".
82 @{$self->{active_list}}." active children...");
84 # block until a child is becomes available
85 $self->check_status(1);
87 $logger->info("server: all clear for graceful shutdown");
90 # don't get sidetracked by signals while we're cleaning up.
91 # it could result in unexpected behavior with list traversal
92 $SIG{CHLD} = 'IGNORE';
94 # terminate the child processes
95 $self->kill_child($_) for
96 (@{$self->{idle_list}}, @{$self->{active_list}});
98 $self->{osrf_handle}->disconnect;
100 # clean up our dead children
101 $self->reap_children(1);
103 exit(0) unless $no_exit;
106 # ----------------------------------------------------------------
107 # SIGHUP handler. Kill all idle children. Copy list of active
108 # children into sighup_pending list for later cleanup.
109 # ----------------------------------------------------------------
112 $logger->info("server: caught SIGHUP; reloading children");
114 # reload the opensrf config
115 # note: calling ::Config->load() results in ever-growing
116 # package names, which eventually causes an exception
117 OpenSRF::Utils::Config->current->_load(
119 config_file => OpenSRF::Utils::Config->current->FILE
122 # force-reload the logger config
123 OpenSRF::Utils::Logger::set_config(1);
125 # copy active list into pending list for later cleanup
126 $self->{sighup_pending} = [ @{$self->{active_list}} ];
128 # idle_list will be modified as children are reaped.
129 my @idle = @{$self->{idle_list}};
131 # idle children are the reaper's plaything
132 $self->kill_child($_) for @idle;
135 # ----------------------------------------------------------------
136 # Waits on the jabber socket for inbound data from the router.
137 # Each new message is passed off to a child process for handling.
138 # At regular intervals, wake up for min/max spare child maintenance
139 # ----------------------------------------------------------------
143 $logger->set_service($self->{service});
145 $SIG{$_} = sub { $self->cleanup; } for (qw/INT QUIT/);
146 $SIG{TERM} = sub { $self->cleanup(0, 1); };
147 $SIG{CHLD} = sub { $self->reap_children(); };
148 $SIG{HUP} = sub { $self->handle_sighup(); };
149 $SIG{USR1} = sub { $self->unregister_routers; };
150 $SIG{USR2} = sub { $self->register_routers; };
152 $self->spawn_children;
153 $self->build_osrf_handle;
154 $self->register_routers;
157 my @max_children_msg_queue;
161 my $from_network = 0;
164 $self->{child_died} = 0;
166 my $msg = shift(@max_children_msg_queue);
168 # no pending message, so wait for the next one forever
169 $from_network = $wait_time = -1 if (!$msg);
170 $msg ||= $self->{osrf_handle}->process($wait_time);
172 !$from_network and $chatty and $logger->debug("server: attempting to process previously queued message");
173 $from_network and $chatty and $logger->internal("server: no queued messages, processing due to network or signal");
175 # we woke up for any reason, reset the wait time to allow
176 # for idle maintenance as necessary
181 if ($msg->type and $msg->type eq 'error') {
182 $logger->info("server: Listener received an XMPP error ".
183 "message. Likely a bounced message. sender=".$msg->from);
185 } elsif(my $child = pop(@{$self->{idle_list}})) {
187 # we have an idle child to handle the request
188 $chatty and $logger->internal("server: passing request to idle child $child");
189 push(@{$self->{active_list}}, $child);
190 $self->write_child($child, $msg);
192 } elsif($self->{num_children} < $self->{max_children}) {
194 # spawning a child to handle the request
195 $chatty and $logger->internal("server: spawning child to handle request");
196 $self->write_child($self->spawn_child(1), $msg);
199 $logger->warn("server: no children available, waiting... consider increasing " .
200 "max_children for this application higher than $self->{max_children} ".
201 "in the OpenSRF configuration if this message occurs frequently");
204 $chatty and $logger->debug("server: queuing new message");
205 push @max_children_msg_queue, $msg;
207 $chatty and $logger->debug("server: re-queuing old message");
208 unshift @max_children_msg_queue, $msg;
211 $logger->warn("server: backlog queue size is now ". scalar(@max_children_msg_queue));
213 if (@max_children_msg_queue < $self->{max_backlog_queue}) {
214 # We still have room on the queue. Set the wait time to
215 # 1s, waiting for a drone to be freed up and reprocess
216 # this (and any other) queued messages.
218 if (!$from_network) {
219 # if we got here, we had retrieved a message from the queue
220 # but couldn't process it... but also hadn't fetched any
221 # additional messages from the network. Doing so now,
222 # as otherwise only one message will ever get queued
223 $msg = $self->{osrf_handle}->process($wait_time);
225 $chatty and $logger->debug("server: queuing new message after a re-queue");
226 push @max_children_msg_queue, $msg;
230 # We'll just have to wait
231 $self->check_status(1); # block until child is available
237 # don't perform idle maint immediately when woken by SIGCHLD
238 unless($self->{child_died}) {
240 # when we hit equilibrium, there's no need for regular
241 # maintenance, so set wait_time to 'forever'
243 !$self->perform_idle_maintenance and # no maintenance performed this time
244 @{$self->{active_list}} == 0; # no active children
250 # ----------------------------------------------------------------
251 # Launch a new spare child or kill an extra spare child. To
252 # prevent large-scale spawning or die-offs, spawn or kill only
253 # 1 process per idle maintenance loop.
254 # Returns true if any idle maintenance occurred, 0 otherwise
255 # ----------------------------------------------------------------
256 sub perform_idle_maintenance {
259 $chatty and $logger->internal(
261 "server: %d idle, %d active, %d min_spare, %d max_spare in idle maintenance",
262 scalar(@{$self->{idle_list}}),
263 scalar(@{$self->{active_list}}),
264 $self->{min_spare_children},
265 $self->{max_spare_children}
269 # spawn 1 spare child per maintenance loop if necessary
270 if( $self->{min_spare_children} and
271 $self->{num_children} < $self->{max_children} and
272 scalar(@{$self->{idle_list}}) < $self->{min_spare_children} ) {
274 $chatty and $logger->internal("server: spawning spare child");
278 # kill 1 excess spare child per maintenance loop if necessary
279 } elsif($self->{max_spare_children} and
280 $self->{num_children} > $self->{min_children} and
281 scalar(@{$self->{idle_list}}) > $self->{max_spare_children} ) {
283 $chatty and $logger->internal("server: killing spare child");
293 my $child = shift || pop(@{$self->{idle_list}}) or return;
294 $chatty and $logger->internal("server: killing child $child");
295 kill('TERM', $child->{pid});
298 # ----------------------------------------------------------------
299 # Jabber connection inbound message arrive on.
300 # ----------------------------------------------------------------
301 sub build_osrf_handle {
304 my $conf = OpenSRF::Utils::Config->current;
305 my $username = $conf->bootstrap->username;
306 my $password = $conf->bootstrap->passwd;
307 my $domain = $conf->bootstrap->domain;
308 my $port = $conf->bootstrap->port;
309 my $resource = $self->{service} . '_listener_' . $conf->env->hostname;
311 $logger->debug("server: inbound connecting as $username\@$domain/$resource on port $port");
313 $self->{osrf_handle} =
314 OpenSRF::Transport::SlimJabber::Client->new(
315 username => $username,
316 resource => $resource,
317 password => $password,
322 $self->{osrf_handle}->initialize;
326 # ----------------------------------------------------------------
327 # Sends request data to a child process
328 # ----------------------------------------------------------------
330 my($self, $child, $msg) = @_;
331 my $xml = encode_utf8(decode_utf8($msg->to_xml));
333 # tell the child how much data to expect, minus the header
335 {use bytes; $write_size = length($xml)}
336 $write_size = sprintf("%*s", WRITE_PIPE_DATA_SIZE, $write_size);
340 $self->{sig_pipe} = 0;
341 local $SIG{'PIPE'} = sub { $self->{sig_pipe} = 1; };
343 # In rare cases a child can die between creation and first
344 # write, typically a result of a jabber connect error. Before
345 # sending data to each child, confirm it's still alive. If it's
346 # not, log the error and drop the message to prevent the parent
347 # process from dying.
348 # When a child dies, all of its attributes are deleted,
349 # so the lack of a pid means the child is dead.
350 if (!$child->{pid}) {
351 $logger->error("server: child is dead in write_child(). ".
352 "unable to send message: $xml");
353 return; # avoid syswrite crash
356 # send message to child data pipe
357 syswrite($child->{pipe_to_child}, $write_size . $xml);
359 last unless $self->{sig_pipe};
360 $logger->error("server: got SIGPIPE writing to $child, retrying...");
361 usleep(50000); # 50 msec
364 $logger->error("server: unable to send request message to child $child") if $self->{sig_pipe};
367 # ----------------------------------------------------------------
368 # Checks to see if any child process has reported its availability
369 # In blocking mode, blocks until a child has reported.
370 # ----------------------------------------------------------------
372 my($self, $block) = @_;
374 return unless @{$self->{active_list}};
380 # if can_read or sysread is interrupted while bloking, go back and
381 # wait again until we have at least 1 free child
383 # refresh the read_set handles in case we lost a child in the previous iteration
384 my $read_set = IO::Select->new;
385 $read_set->add($_->{pipe_to_child}) for @{$self->{active_list}};
387 if(my @handles = $read_set->can_read(($block) ? undef : 0)) {
389 for my $pipe (@handles) {
390 sysread($pipe, $pid, STATUS_PIPE_DATA_SIZE) or next;
391 push(@pids, int($pid));
395 last unless $block and !@pids;
400 $chatty and $logger->internal("server: ".scalar(@pids)." children reporting for duty: (@pids)");
405 # move the children from the active list to the idle list
406 for my $proc (@{$self->{active_list}}) {
407 if(grep { $_ == $proc->{pid} } @pids) {
408 push(@{$self->{idle_list}}, $proc);
410 push(@new_actives, $proc);
414 $self->{active_list} = [@new_actives];
416 $chatty and $logger->internal(sprintf(
417 "server: %d idle and %d active children after status update",
418 scalar(@{$self->{idle_list}}), scalar(@{$self->{active_list}})));
420 # some children just went from active to idle. let's see
421 # if any of them need to be killed from a previous sighup.
423 for my $child (@{$self->{sighup_pending}}) {
424 if (grep {$_ == $child->{pid}} @pids) {
426 $chatty and $logger->internal(
427 "server: killing previously-active ".
428 "child after receiving SIGHUP: $child");
430 # remove the pending child
431 $self->{sighup_pending} = [
432 grep {$_->{pid} != $child->{pid}}
433 @{$self->{sighup_pending}}
436 # kill the pending child
437 $self->kill_child($child)
442 # ----------------------------------------------------------------
443 # Cleans up any child processes that have exited.
444 # In shutdown mode, block until all children have washed ashore
445 # ----------------------------------------------------------------
447 my($self, $shutdown) = @_;
448 $self->{child_died} = 1;
452 my $pid = waitpid(-1, ($shutdown) ? 0 : WNOHANG);
455 $chatty and $logger->internal("server: reaping child $pid");
457 my $child = $self->{pid_map}->{$pid};
459 close($child->{pipe_to_parent});
460 close($child->{pipe_to_child});
462 $self->{active_list} = [ grep { $_->{pid} != $pid } @{$self->{active_list}} ];
463 $self->{idle_list} = [ grep { $_->{pid} != $pid } @{$self->{idle_list}} ];
465 $self->{num_children}--;
466 delete $self->{pid_map}->{$pid};
467 delete $child->{$_} for keys %$child; # destroy with a vengeance
470 $self->spawn_children unless $shutdown;
472 $chatty and $logger->internal(sprintf(
473 "server: %d idle and %d active children after reap_children",
474 scalar(@{$self->{idle_list}}), scalar(@{$self->{active_list}})));
477 # ----------------------------------------------------------------
478 # Spawn up to max_children processes
479 # ----------------------------------------------------------------
482 $self->spawn_child while $self->{num_children} < $self->{min_children};
485 # ----------------------------------------------------------------
486 # Spawns a new child. If $active is set, the child goes directly
487 # into the active_list.
488 # ----------------------------------------------------------------
490 my($self, $active) = @_;
492 my $child = OpenSRF::Server::Child->new($self);
494 # socket for sending message data to the child
496 $child->{pipe_to_child},
497 $child->{pipe_to_parent},
498 AF_UNIX, SOCK_STREAM, PF_UNSPEC)) {
499 $logger->error("server: error creating data socketpair: $!");
503 $child->{pipe_to_child}->autoflush(1);
504 $child->{pipe_to_parent}->autoflush(1);
506 $child->{pid} = fork();
508 if($child->{pid}) { # parent process
509 $self->{num_children}++;
510 $self->{pid_map}->{$child->{pid}} = $child;
513 push(@{$self->{active_list}}, $child);
515 push(@{$self->{idle_list}}, $child);
518 $chatty and $logger->internal("server: server spawned child $child with ".$self->{num_children}." total children");
522 } else { # child process
524 # recover default handling for any signal whose handler
525 # may have been adopted from the parent process.
526 $SIG{$_} = 'DEFAULT' for qw/TERM INT QUIT HUP CHLD USR1 USR2/;
528 if($self->{stderr_log}) {
530 $chatty and $logger->internal("server: redirecting STDERR to " . $self->{stderr_log});
533 unless( open(STDERR, '>>' . $self->{stderr_log}) ) {
534 $logger->error("server: unable to open STDERR log file: " . $self->{stderr_log} . " : $@");
535 open STDERR, '>/dev/null'; # send it back to /dev/null
543 OpenSRF::Transport::PeerHandle->retrieve->disconnect;
545 $logger->error("server: child process died: $@") if $@;
550 # ----------------------------------------------------------------
551 # Sends the register command to the configured routers
552 # ----------------------------------------------------------------
553 sub register_routers {
556 my $conf = OpenSRF::Utils::Config->current;
557 my $routers = $conf->bootstrap->routers;
558 my $router_name = $conf->bootstrap->router_name;
561 for my $router (@$routers) {
564 if( !$router->{services} ||
565 !$router->{services}->{service} ||
567 ref($router->{services}->{service}) eq 'ARRAY' and
568 grep { $_ eq $self->{service} } @{$router->{services}->{service}}
569 ) || $router->{services}->{service} eq $self->{service}) {
571 my $name = $router->{name};
572 my $domain = $router->{domain};
573 push(@targets, "$name\@$domain/router");
577 push(@targets, "$router_name\@$router/router");
582 $logger->info("server: registering with router $_");
583 $self->{osrf_handle}->send(
585 body => 'registering',
586 router_command => 'register',
587 router_class => $self->{service}
591 $self->{routers} = \@targets;
594 # ----------------------------------------------------------------
595 # Sends the unregister command to any routers we have registered
597 # ----------------------------------------------------------------
598 sub unregister_routers {
600 return unless $self->{osrf_handle}->tcp_connected;
602 for my $router (@{$self->{routers}}) {
603 $logger->info("server: disconnecting from router $router");
604 $self->{osrf_handle}->send(
606 body => "unregistering",
607 router_command => "unregister",
608 router_class => $self->{service}
614 package OpenSRF::Server::Child;
617 use OpenSRF::Transport;
618 use OpenSRF::Application;
619 use OpenSRF::Transport::PeerHandle;
620 use OpenSRF::Transport::SlimJabber::XMPPMessage;
621 use OpenSRF::Utils::Logger qw($logger);
622 use OpenSRF::DomainObject::oilsResponse qw/:status/;
623 use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
624 use Time::HiRes qw(time usleep);
625 use POSIX qw/:sys_wait_h :errno_h/;
627 use overload '""' => sub { return '[' . shift()->{pid} . ']'; };
630 my($class, $parent) = @_;
631 my $self = bless({}, $class);
632 $self->{pid} = 0; # my process ID
633 $self->{parent} = $parent; # Controller parent process
634 $self->{num_requests} = 0; # total serviced requests
635 $self->{sig_pipe} = 0; # true if last syswrite failed
641 my $flags = fcntl($fh, F_GETFL, 0);
642 fcntl($fh, F_SETFL, $flags | O_NONBLOCK);
647 my $flags = fcntl($fh, F_GETFL, 0);
648 $flags &= ~O_NONBLOCK;
649 fcntl($fh, F_SETFL, $flags);
652 # ----------------------------------------------------------------
653 # Connects to Jabber and runs the application child_init
654 # ----------------------------------------------------------------
657 my $service = $self->{parent}->{service};
658 $0 = "OpenSRF Drone [$service]";
659 OpenSRF::Transport::PeerHandle->construct($service);
660 OpenSRF::Application->application_implementation->child_init
661 if (OpenSRF::Application->application_implementation->can('child_init'));
664 # ----------------------------------------------------------------
665 # Waits for messages from the parent process, handles the message,
666 # then goes into the keepalive loop if this is a stateful session.
667 # When max_requests is hit, the process exits.
668 # ----------------------------------------------------------------
671 my $network = OpenSRF::Transport::PeerHandle->retrieve;
673 # main child run loop. Ends when this child hits max requests.
676 my $data = $self->wait_for_request or next;
678 # Update process name to show activity
682 # Discard extraneous data from the jabber socket
683 if(!$network->flush_socket()) {
684 $logger->error("server: network disconnected! child dropping request and exiting: $data");
688 my $session = OpenSRF::Transport->handler(
689 $self->{parent}->{service},
690 OpenSRF::Transport::SlimJabber::XMPPMessage->new(xml => $data)
693 my $recycle = $self->keepalive_loop($session);
695 last if ++$self->{num_requests} == $self->{parent}->{max_requests};
698 $chatty && $logger->internal(
699 "server: child exiting early on force_recycle");
703 # Tell the parent process we are available to process requests
706 # Repair process name
710 $chatty and $logger->internal("server: child process shutting down after reaching max_requests");
712 OpenSRF::Application->application_implementation->child_exit
713 if (OpenSRF::Application->application_implementation->can('child_exit'));
716 # ----------------------------------------------------------------
717 # waits for a request data on the parent pipe and returns it.
718 # ----------------------------------------------------------------
719 sub wait_for_request {
722 my $data = ''; # final request data
723 my $buf_size = 4096; # default linux pipe_buf (atomic window, not total size)
724 my $read_pipe = $self->{pipe_to_parent};
725 my $bytes_needed; # size of the data we are about to receive
726 my $bytes_recvd; # number of bytes read so far
727 my $first_read = 1; # true for first loop iteration
732 # wait for some data to start arriving
733 my $read_set = IO::Select->new;
734 $read_set->add($read_pipe);
737 # if can_read is interrupted while blocking,
738 # go back and wait again until it succeeds.
739 last if $read_set->can_read;
742 # parent started writing, let's start reading
743 $self->set_nonblock($read_pipe);
746 # read all of the available data
749 my $nbytes = sysread($self->{pipe_to_parent}, $buf, $buf_size);
751 unless(defined $nbytes) {
753 $logger->error("server: error reading data from parent: $!. ".
754 "bytes_needed=$bytes_needed; bytes_recvd=$bytes_recvd; data=$data");
760 last if $nbytes <= 0; # no more data available for reading
762 $bytes_recvd += $nbytes;
766 $self->set_block($self->{pipe_to_parent});
767 return undef if $read_error;
769 # extract the data size and remove the header from the final data
771 my $wps_size = OpenSRF::Server::WRITE_PIPE_DATA_SIZE;
772 $bytes_needed = int(substr($data, 0, $wps_size)) + $wps_size;
773 $data = substr($data, $wps_size);
778 if ($bytes_recvd == $bytes_needed) {
779 # we've read all the data. Nothing left to do
783 $logger->info("server: child process read all available pipe data. ".
784 "waiting for more data from parent. bytes_needed=$bytes_needed; bytes_recvd=$bytes_recvd");
791 # ----------------------------------------------------------------
792 # If this is a stateful opensrf session, wait up to $keepalive
793 # seconds for subsequent requests from the client
794 # ----------------------------------------------------------------
796 my($self, $session) = @_;
797 my $keepalive = $self->{parent}->{keepalive};
799 while($session->state and $session->state == $session->CONNECTED) {
801 unless( $session->queue_wait($keepalive) ) {
803 # client failed to disconnect before timeout
804 $logger->info("server: no request was received in $keepalive seconds, exiting stateful session");
806 my $res = OpenSRF::DomainObject::oilsConnectStatus->new(
807 status => "Disconnected on timeout",
808 statusCode => STATUS_TIMEOUT
811 $session->status($res);
812 $session->state($session->DISCONNECTED);
817 $chatty and $logger->internal("server: child done with request(s)");
819 # Capture the recycle option value before it's clobbered.
820 # The option may be set at any point along the life of the
821 # session. Once set, it remains set unless
822 # $session->force_recycle(0) is explicitly called.
823 my $recycle = $session->force_recycle;
829 # ----------------------------------------------------------------
830 # Report our availability to our parent process
831 # ----------------------------------------------------------------
837 $self->{sig_pipe} = 0;
838 local $SIG{'PIPE'} = sub { $self->{sig_pipe} = 1; };
841 $self->{pipe_to_parent},
842 sprintf("%*s", OpenSRF::Server::STATUS_PIPE_DATA_SIZE, $self->{pid})
845 last unless $self->{sig_pipe};
846 $logger->error("server: $self got SIGPIPE writing status to parent, retrying...");
847 usleep(50000); # 50 msec
850 $logger->error("server: $self unable to send status to parent") if $self->{sig_pipe};