Tighten TAP tests' tracking of postmaster state some more.

author Tom Lane

Thu, 20 Jan 2022 22:28:07 +0000 (17:28 -0500)

committer Tom Lane

Thu, 20 Jan 2022 22:28:07 +0000 (17:28 -0500)
author Tom Lane
Thu, 20 Jan 2022 22:28:07 +0000 (17:28 -0500)
committer Tom Lane
Thu, 20 Jan 2022 22:28:07 +0000 (17:28 -0500)
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm

index 11227408c3b3f2bda3bc0f8ff75d49116ff0b1a8..b85068e0da1aaabec2cf55e08f30fcf8d5226494 100644 (file)
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -831,20 +831,37 @@ Note: if the node is already known stopped, this does nothing.
  However, if we think it's running and it's not, it's important for
  this to fail.  Otherwise, tests might fail to detect server crashes.
  
+With optional extra param fail_ok => 1, returns 0 for failure
+instead of bailing out.
+
  =cut
  
  sub stop
  {
-   my ($self, $mode) = @_;
-   my $port   = $self->port;
+   my ($self, $mode, %params) = @_;
     my $pgdata = $self->data_dir;
     my $name   = $self->name;
+   my $ret;
     $mode = 'fast' unless defined $mode;
-   return unless defined $self->{_pid};
+   return 1 unless defined $self->{_pid};
+
     print "### Stopping node \"$name\" using mode $mode\n";
-   TestLib::system_or_bail('pg_ctl', '-D', $pgdata, '-m', $mode, 'stop');
+   $ret = TestLib::system_log('pg_ctl', '-D', $pgdata,
+       '-m', $mode, 'stop');
+
+   if ($ret != 0)
+   {
+       print "# pg_ctl stop failed: $ret\n";
+
+       # Check to see if we still have a postmaster or not.
+       $self->_update_pid(-1);
+
+       BAIL_OUT("pg_ctl stop failed") unless $params{fail_ok};
+       return 0;
+   }
+
     $self->_update_pid(0);
-   return;
+   return 1;
  }
  
  =pod
@@ -1066,9 +1083,20 @@ sub _update_pid
     if (open my $pidfile, '<', $self->data_dir . "/postmaster.pid")
     {
         chomp($self->{_pid} = <$pidfile>);
-       print "# Postmaster PID for node \"$name\" is $self->{_pid}\n";
         close $pidfile;
  
+       # If we aren't sure what to expect, validate the PID using kill().
+       # This protects against stale PID files left by crashed postmasters.
+       if ($is_running == -1 && kill(0, $self->{_pid}) == 0)
+       {
+           print
+             "# Stale postmaster.pid file for node \"$name\": PID $self->{_pid} no longer exists\n";
+           $self->{_pid} = undef;
+           return;
+       }
+
+       print "# Postmaster PID for node \"$name\" is $self->{_pid}\n";
+
         # If we found a pidfile when there shouldn't be one, complain.
         BAIL_OUT("postmaster.pid unexpectedly present") if $is_running == 0;
         return;
diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl

index dc0dcd3ca275a9017fe7ffa9e41d2782e35344f0..ba6410a7aa396f354a341c2a81df792568692d65 100644 (file)
--- a/src/test/recovery/t/017_shm.pl
+++ b/src/test/recovery/t/017_shm.pl
@@ -204,6 +204,9 @@ sub poll_start
         # Wait 0.1 second before retrying.
         usleep(100_000);
  
+       # Clean up in case the start attempt just timed out or some such.
+       $node->stop('fast', fail_ok => 1);
+
         $attempts++;
     }
author	Tom Lane
	Thu, 20 Jan 2022 22:28:07 +0000 (17:28 -0500)
committer	Tom Lane
	Thu, 20 Jan 2022 22:28:07 +0000 (17:28 -0500)
src/test/perl/PostgresNode.pm		patch \| blob \| blame \| history
src/test/recovery/t/017_shm.pl		patch \| blob \| blame \| history