LP#1729610: Add some debug/internal logging to backlog queue
[opensrf-equinox.git] / src / perl / lib / OpenSRF / Server.pm
index 849e606..0d31fcf 100644 (file)
@@ -51,6 +51,7 @@ sub new {
         if $self->{stderr_log_path};
 
     $self->{min_spare_children} ||= 0;
+    $self->{max_backlog_queue} ||= 1000;
 
     $self->{max_spare_children} = $self->{min_spare_children} + 1 if
         $self->{max_spare_children} and
@@ -146,19 +147,30 @@ sub run {
     $SIG{CHLD} = sub { $self->reap_children(); };
     $SIG{HUP} = sub { $self->handle_sighup(); };
     $SIG{USR1} = sub { $self->unregister_routers; };
+    $SIG{USR2} = sub { $self->register_routers; };
 
     $self->spawn_children;
     $self->build_osrf_handle;
     $self->register_routers;
     my $wait_time = 1;
 
+    my @max_children_msg_queue;
+
     # main server loop
     while(1) {
+        my $from_network = 0;
 
         $self->check_status;
         $self->{child_died} = 0;
 
-        my $msg = $self->{osrf_handle}->process($wait_time);
+        my $msg = shift(@max_children_msg_queue);
+
+        # no pending message, so wait for the next one forever
+        $from_network = $wait_time = -1 if (!$msg);
+        $msg ||= $self->{osrf_handle}->process($wait_time);
+
+        !$from_network and $chatty and $logger->debug("server: attempting to process previously queued message");
+        $from_network and $chatty and $logger->internal("server: no queued messages, processing due to network or signal");
 
         # we woke up for any reason, reset the wait time to allow
         # for idle maintenance as necessary
@@ -166,7 +178,11 @@ sub run {
 
         if($msg) {
 
-            if(my $child = pop(@{$self->{idle_list}})) {
+            if ($msg->type and $msg->type eq 'error') {
+                $logger->info("server: Listener received an XMPP error ".
+                    "message.  Likely a bounced message. sender=".$msg->from);
+
+            } elsif(my $child = pop(@{$self->{idle_list}})) {
 
                 # we have an idle child to handle the request
                 $chatty and $logger->internal("server: passing request to idle child $child");
@@ -183,11 +199,37 @@ sub run {
                 $logger->warn("server: no children available, waiting... consider increasing " .
                     "max_children for this application higher than $self->{max_children} ".
                     "in the OpenSRF configuration if this message occurs frequently");
-                $self->check_status(1); # block until child is available
 
-                my $child = pop(@{$self->{idle_list}});
-                push(@{$self->{active_list}}, $child);
-                $self->write_child($child, $msg);
+                if ($from_network) {
+                    $chatty and $logger->debug("server: queuing new message");
+                    push @max_children_msg_queue, $msg;
+                } else {
+                    $chatty and $logger->debug("server: re-queuing old message");
+                    unshift @max_children_msg_queue, $msg;
+                }
+
+                $logger->warn("server: backlog queue size is now ". scalar(@max_children_msg_queue));
+
+                if (@max_children_msg_queue < $self->{max_backlog_queue}) {
+                    # We still have room on the queue. Set the wait time to
+                    # 1s, waiting for a drone to be freed up and reprocess
+                    # this (and any other) queued messages.
+                    $wait_time = 1;
+                    if (!$from_network) {
+                        # if we got here, we had retrieved a message from the queue
+                        # but couldn't process it... but also hadn't fetched any
+                        # additional messages from the network. Doing so now,
+                        # as otherwise only one message will ever get queued
+                        $msg = $self->{osrf_handle}->process($wait_time);
+                        if ($msg) {
+                            $chatty and $logger->debug("server: queuing new message after a re-queue");
+                            push @max_children_msg_queue, $msg;
+                        }
+                    }
+                } else {
+                    # We'll just have to wait
+                    $self->check_status(1); # block until child is available
+                }
             }
 
         } else {
@@ -298,6 +340,19 @@ sub write_child {
         $self->{sig_pipe} = 0;
         local $SIG{'PIPE'} = sub { $self->{sig_pipe} = 1; };
 
+        # In rare cases a child can die between creation and first
+        # write, typically a result of a jabber connect error.  Before
+        # sending data to each child, confirm it's still alive.  If it's
+        # not, log the error and drop the message to prevent the parent
+        # process from dying.
+        # When a child dies, all of its attributes are deleted,
+        # so the lack of a pid means the child is dead.
+        if (!$child->{pid}) {
+            $logger->error("server: child is dead in write_child(). ".
+                "unable to send message: $xml");
+            return; # avoid syswrite crash
+        }
+
         # send message to child data pipe
         syswrite($child->{pipe_to_child}, $write_size . $xml);
 
@@ -635,10 +690,16 @@ sub run {
             OpenSRF::Transport::SlimJabber::XMPPMessage->new(xml => $data)
         );
 
-        $self->keepalive_loop($session);
+        my $recycle = $self->keepalive_loop($session);
 
         last if ++$self->{num_requests} == $self->{parent}->{max_requests};
 
+        if ($recycle) {
+            $chatty && $logger->internal(
+                "server: child exiting early on force_recycle");
+            last;
+        }
+
         # Tell the parent process we are available to process requests
         $self->send_status;
 
@@ -754,7 +815,15 @@ sub keepalive_loop {
     }
 
     $chatty and $logger->internal("server: child done with request(s)");
+
+    # Capture the recycle option value before it's clobbered.
+    # The option may be set at any point along the life of the 
+    # session.  Once set, it remains set unless 
+    # $session->force_recycle(0) is explicitly called.
+    my $recycle = $session->force_recycle;
+
     $session->kill_me;
+    return $recycle;
 }
 
 # ----------------------------------------------------------------