Browse Source

Throttle restarts.

Back off from restarting the application immediately if it starts
successfully but exits too soon.

Handle resume messages from the service console to restart the
application immediately even if it is throttled.

Use service status wait hints to tell the operating system how long
start, stop and resume actions are likely to take.
Iain Patterson 11 years ago
parent
commit
63363fbc93
5 changed files with 97 additions and 4 deletions
  1. 10 2
      README.txt
  2. 15 0
      messages.mc
  3. 9 0
      nssm.h
  4. 62 2
      service.cpp
  5. 1 0
      service.h

+ 10 - 2
README.txt

@@ -68,8 +68,15 @@ action if/when the application dies.
 
 With no configuration from you, NSSM will try to restart itself if it notices
 that the application died but you didn't send it a stop signal.  NSSM will
-keep trying, pausing 30 seconds between each attempt, until the service is
-successfully started or you send it a stop signal.
+keep trying, pausing between each attempt, until the service is successfully
+started or you send it a stop signal.
+
+NSSM will pause an increasingly longer time between subsequent restart attempts
+if the service fails to start in a timely manner, up to a maximum of 60 seconds.
+This is so it does not consume an excessive amount of CPU time trying to start
+a failed application over and over again.  If you identify the cause of the
+failure and don't want to wait you can use the Windows service console to
+send a continue signal to NSSM and it will retry within a few seconds.
 
 NSSM will look in the registry under
 HKLM\SYSTEM\CurrentControlSet\Services\<service>\Parameters\AppExit for
@@ -161,6 +168,7 @@ Thanks to Joel Reingold for spotting a command line truncation bug.
 Thanks to Arve Knudsen for spotting that child processes of the monitored
 application could be left running on service shutdown, and that a missing
 registry value for AppDirectory confused NSSM.
+Thanks to Peter Wagemans and Laszlo Kereszt for suggesting throttling restarts.
 
 Licence
 -------

+ 15 - 0
messages.mc

@@ -259,3 +259,18 @@ Failed to enumerate running threads when terminating service %1:
 %2
 .
 
+MessageId = +1
+SymbolicName = NSSM_EVENT_THROTTLED
+Severity = Warning
+Language = English
+Service %1 ran for less than %2 milliseconds.
+Restart will be delayed by %3 milliseconds.
+.
+
+MessageId = +1
+SymbolicName = NSSM_EVENT_RESET_THROTTLE
+Severity = Informational
+Language = English
+Request to resume service %1.  Throttling of restart attempts will be reset.
+.
+

+ 9 - 0
nssm.h

@@ -32,6 +32,12 @@ int str_equiv(const char *, const char *);
 #define VALUE_LENGTH 16383
 #define SERVICE_NAME_LENGTH KEY_LENGTH - 55
 
+/*
+  Throttle the restart of the service if it stops before this many
+  milliseconds have elapsed since startup.
+*/
+#define NSSM_RESET_THROTTLE_RESTART 1500
+
 /*
   How many milliseconds to wait for the application to die after posting to
   its windows' message queues.
@@ -43,4 +49,7 @@ int str_equiv(const char *, const char *);
 */
 #define NSSM_KILL_THREADS_GRACE_PERIOD 1500
 
+/* Margin of error for service status wait hints in milliseconds. */
+#define NSSM_WAITHINT_MARGIN 2000
+
 #endif

+ 62 - 2
service.cpp

@@ -10,10 +10,20 @@ char exe[EXE_LENGTH];
 char flags[CMD_LENGTH];
 char dir[MAX_PATH];
 bool stopping;
+CRITICAL_SECTION throttle_section;
+CONDITION_VARIABLE throttle_condition;
 
 static enum { NSSM_EXIT_RESTART, NSSM_EXIT_IGNORE, NSSM_EXIT_REALLY, NSSM_EXIT_UNCLEAN } exit_actions;
 static const char *exit_action_strings[] = { "Restart", "Ignore", "Exit", "Suicide", 0 };
 
+static unsigned long throttle;
+
+static inline int throttle_milliseconds() {
+  /* pow() operates on doubles. */
+  int ret = 1; for (unsigned long i = 1; i < throttle; i++) ret *= 2;
+  return ret * 1000;
+}
+
 /* Connect to the service manager */
 SC_HANDLE open_service_manager() {
   SC_HANDLE ret = OpenSCManager(0, SERVICES_ACTIVE_DATABASE, SC_MANAGER_ALL_ACCESS);
@@ -148,11 +158,11 @@ void WINAPI service_main(unsigned long argc, char **argv) {
   /* Initialise status */
   ZeroMemory(&service_status, sizeof(service_status));
   service_status.dwServiceType = SERVICE_WIN32_OWN_PROCESS | SERVICE_INTERACTIVE_PROCESS;
-  service_status.dwControlsAccepted = SERVICE_ACCEPT_SHUTDOWN | SERVICE_ACCEPT_STOP;
+  service_status.dwControlsAccepted = SERVICE_ACCEPT_SHUTDOWN | SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_PAUSE_CONTINUE;
   service_status.dwWin32ExitCode = NO_ERROR;
   service_status.dwServiceSpecificExitCode = 0;
   service_status.dwCheckPoint = 0;
-  service_status.dwWaitHint = 1000;
+  service_status.dwWaitHint = NSSM_WAITHINT_MARGIN;
 
   /* Signal we AREN'T running the server */
   process_handle = 0;
@@ -177,6 +187,7 @@ void WINAPI service_main(unsigned long argc, char **argv) {
   }
 
   service_status.dwCurrentState = SERVICE_START_PENDING;
+  service_status.dwWaitHint = NSSM_RESET_THROTTLE_RESTART + NSSM_WAITHINT_MARGIN;
   SetServiceStatus(service_handle, &service_status);
 
   /* Try to create the exit action parameters; we don't care if it fails */
@@ -184,6 +195,9 @@ void WINAPI service_main(unsigned long argc, char **argv) {
 
   set_service_recovery(service_name);
 
+  /* Used for signalling a resume if the service pauses when throttled. */
+  InitializeCriticalSection(&throttle_section);
+
   monitor_service();
 }
 
@@ -230,6 +244,22 @@ unsigned long WINAPI service_control_handler(unsigned long control, unsigned lon
     case SERVICE_CONTROL_STOP:
       stop_service(0, true, true);
       return NO_ERROR;
+
+    case SERVICE_CONTROL_CONTINUE:
+      throttle = 0;
+      WakeConditionVariable(&throttle_condition);
+      service_status.dwCurrentState = SERVICE_CONTINUE_PENDING;
+      service_status.dwWaitHint = throttle_milliseconds() + NSSM_WAITHINT_MARGIN;
+      log_event(EVENTLOG_INFORMATION_TYPE, NSSM_EVENT_RESET_THROTTLE, service_name, 0);
+      SetServiceStatus(service_handle, &service_status);
+      return NO_ERROR;
+
+    case SERVICE_CONTROL_PAUSE:
+      /*
+        We don't accept pause messages but it isn't possible to register
+        only for continue messages so we have to handle this case.
+      */
+      return ERROR_CALL_NOT_IMPLEMENTED;
   }
 
   /* Unknown control */
@@ -257,6 +287,9 @@ int start_service() {
     log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_OUT_OF_MEMORY, "command line", "start_service", 0);
     return stop_service(2, true, true);
   }
+
+  throttle_restart();
+
   if (! CreateProcess(0, cmd, 0, 0, false, 0, 0, dir, &si, &pi)) {
     log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_CREATEPROCESS_FAILED, service_name, exe, GetLastError(), 0);
     return stop_service(3, true, true);
@@ -268,6 +301,9 @@ int start_service() {
   service_status.dwCurrentState = SERVICE_RUNNING;
   SetServiceStatus(service_handle, &service_status);
 
+  /* Wait for a clean startup. */
+  if (WaitForSingleObject(process_handle, NSSM_RESET_THROTTLE_RESTART) == WAIT_TIMEOUT) throttle = 0;
+
   return 0;
 }
 
@@ -281,6 +317,7 @@ int stop_service(unsigned long exitcode, bool graceful, bool default_action) {
   /* Signal we are stopping */
   if (graceful) {
     service_status.dwCurrentState = SERVICE_STOP_PENDING;
+    service_status.dwWaitHint = NSSM_KILL_WINDOW_GRACE_PERIOD + NSSM_KILL_THREADS_GRACE_PERIOD + NSSM_WAITHINT_MARGIN;
     SetServiceStatus(service_handle, &service_status);
   }
 
@@ -383,3 +420,26 @@ void CALLBACK end_service(void *arg, unsigned char why) {
     break;
   }
 }
+
+void throttle_restart() {
+  /* This can't be a restart if the service is already running. */
+  if (! throttle++) return;
+
+  int ms = throttle_milliseconds();
+
+  if (throttle > 7) throttle = 8;
+
+  char threshold[8], milliseconds[8];
+  _snprintf(threshold, sizeof(threshold), "%d", NSSM_RESET_THROTTLE_RESTART);
+  _snprintf(milliseconds, sizeof(milliseconds), "%d", ms);
+  log_event(EVENTLOG_WARNING_TYPE, NSSM_EVENT_THROTTLED, service_name, threshold, milliseconds, 0);
+
+  EnterCriticalSection(&throttle_section);
+
+  service_status.dwCurrentState = SERVICE_PAUSED;
+  SetServiceStatus(service_handle, &service_status);
+
+  SleepConditionVariableCS(&throttle_condition, &throttle_section, ms);
+
+  LeaveCriticalSection(&throttle_section);
+}

+ 1 - 0
service.h

@@ -16,5 +16,6 @@ int monitor_service();
 int start_service();
 int stop_service(unsigned long, bool, bool);
 void CALLBACK end_service(void *, unsigned char);
+void throttle_restart();
 
 #endif