smartmontools  SVN Rev 4430
Utility to control and monitor storage systems with "S.M.A.R.T."
smartd.cpp
Go to the documentation of this file.
1 /*
2  * Home page of code is: http://www.smartmontools.org
3  *
4  * Copyright (C) 2002-11 Bruce Allen
5  * Copyright (C) 2008-17 Christian Franke
6  * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7  * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2, or (at your option)
12  * any later version.
13  *
14  * You should have received a copy of the GNU General Public License
15  * (for example COPYING); If not, see <http://www.gnu.org/licenses/>.
16  *
17  * This code was originally developed as a Senior Thesis by Michael Cornwell
18  * at the Concurrent Systems Laboratory (now part of the Storage Systems
19  * Research Center), Jack Baskin School of Engineering, University of
20  * California, Santa Cruz. http://ssrc.soe.ucsc.edu/
21  *
22  */
23 
24 #include "config.h"
25 #include "int64.h"
26 
27 // unconditionally included files
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h> // umask
31 #include <signal.h>
32 #include <fcntl.h>
33 #include <string.h>
34 #include <syslog.h>
35 #include <stdarg.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <time.h>
39 #include <limits.h>
40 #include <getopt.h>
41 
42 #include <stdexcept>
43 #include <string>
44 #include <vector>
45 #include <algorithm> // std::replace()
46 
47 // conditionally included files
48 #ifndef _WIN32
49 #include <sys/wait.h>
50 #endif
51 #ifdef HAVE_UNISTD_H
52 #include <unistd.h>
53 #endif
54 
55 #ifdef _WIN32
56 #ifdef _MSC_VER
57 #pragma warning(disable:4761) // "conversion supplied"
58 typedef unsigned short mode_t;
59 typedef int pid_t;
60 #endif
61 #include <io.h> // umask()
62 #include <process.h> // getpid()
63 #endif // _WIN32
64 
65 #ifdef __CYGWIN__
66 #include <io.h> // setmode()
67 #endif // __CYGWIN__
68 
69 #ifdef HAVE_LIBCAP_NG
70 #include <cap-ng.h>
71 #endif // LIBCAP_NG
72 
73 // locally included files
74 #include "atacmds.h"
75 #include "dev_interface.h"
76 #include "knowndrives.h"
77 #include "scsicmds.h"
78 #include "nvmecmds.h"
79 #include "utility.h"
80 
81 // This is for solaris, where signal() resets the handler to SIG_DFL
82 // after the first signal is caught.
83 #ifdef HAVE_SIGSET
84 #define SIGNALFN sigset
85 #else
86 #define SIGNALFN signal
87 #endif
88 
89 #ifdef _WIN32
90 // fork()/signal()/initd simulation for native Windows
91 #include "daemon_win32.h" // daemon_main/detach/signal()
92 #undef SIGNALFN
93 #define SIGNALFN daemon_signal
94 #define strsignal daemon_strsignal
95 #define sleep daemon_sleep
96 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
97 #define SIGQUIT SIGBREAK
98 #define SIGQUIT_KEYNAME "CONTROL-Break"
99 #else // _WIN32
100 #define SIGQUIT_KEYNAME "CONTROL-\\"
101 #endif // _WIN32
102 
103 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 4413 2017-03-27 20:47:24Z chrfranke $"
104  CONFIG_H_CVSID;
105 
106 using namespace smartmontools;
107 
108 // smartd exit codes
109 #define EXIT_BADCMD 1 // command line did not parse
110 #define EXIT_BADCONF 2 // syntax error in config file
111 #define EXIT_STARTUP 3 // problem forking daemon
112 #define EXIT_PID 4 // problem creating pid file
113 #define EXIT_NOCONF 5 // config file does not exist
114 #define EXIT_READCONF 6 // config file exists but cannot be read
115 
116 #define EXIT_NOMEM 8 // out of memory
117 #define EXIT_BADCODE 10 // internal error - should NEVER happen
118 
119 #define EXIT_BADDEV 16 // we can't monitor this device
120 #define EXIT_NODEV 17 // no devices to monitor
121 
122 #define EXIT_SIGNAL 254 // abort on signal
123 
124 
125 // command-line: 1=debug mode, 2=print presets
126 static unsigned char debugmode = 0;
127 
128 // command-line: how long to sleep between checks
129 #define CHECKTIME 1800
130 static int checktime=CHECKTIME;
131 
132 // command-line: name of PID file (empty for no pid file)
133 static std::string pid_file;
134 
135 // command-line: path prefix of persistent state file, empty if no persistence.
136 static std::string state_path_prefix
137 #ifdef SMARTMONTOOLS_SAVESTATES
138  = SMARTMONTOOLS_SAVESTATES
139 #endif
140  ;
141 
142 // command-line: path prefix of attribute log file, empty if no logs.
143 static std::string attrlog_path_prefix
144 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
145  = SMARTMONTOOLS_ATTRIBUTELOG
146 #endif
147  ;
148 
149 // configuration file name
150 static const char * configfile;
151 // configuration file "name" if read from stdin
152 static const char * const configfile_stdin = "<stdin>";
153 // path of alternate configuration file
154 static std::string configfile_alt;
155 
156 // warning script file
157 static std::string warning_script;
158 
159 // command-line: when should we exit?
160 enum quit_t {
163 };
165 
166 // command-line; this is the default syslog(3) log facility to use.
167 static int facility=LOG_DAEMON;
168 
169 #ifndef _WIN32
170 // command-line: fork into background?
171 static bool do_fork=true;
172 #endif
173 
174 #ifdef HAVE_LIBCAP_NG
175 // command-line: enable capabilities?
176 static bool enable_capabilities = false;
177 #endif
178 
179 // TODO: This smartctl only variable is also used in os_win32.cpp
180 unsigned char failuretest_permissive = 0;
181 
182 // set to one if we catch a USR1 (check devices now)
183 static volatile int caughtsigUSR1=0;
184 
185 #ifdef _WIN32
186 // set to one if we catch a USR2 (toggle debug mode)
187 static volatile int caughtsigUSR2=0;
188 #endif
189 
190 // set to one if we catch a HUP (reload config file). In debug mode,
191 // set to two, if we catch INT (also reload config file).
192 static volatile int caughtsigHUP=0;
193 
194 // set to signal value if we catch INT, QUIT, or TERM
195 static volatile int caughtsigEXIT=0;
196 
197 // This function prints either to stdout or to the syslog as needed.
198 static void PrintOut(int priority, const char *fmt, ...)
200 
201 // Attribute monitoring flags.
202 // See monitor_attr_flags below.
203 enum {
207  MONITOR_RAW = 0x08,
210 };
211 
212 // Array of flags for each attribute.
214 {
215 public:
217  { memset(m_flags, 0, sizeof(m_flags)); }
218 
219  bool is_set(int id, unsigned char flag) const
220  { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
221 
222  void set(int id, unsigned char flags)
223  {
224  if (0 < id && id < (int)sizeof(m_flags))
225  m_flags[id] |= flags;
226  }
227 
228 private:
229  unsigned char m_flags[256];
230 };
231 
232 
233 /// Configuration data for a device. Read from smartd.conf.
234 /// Supports copy & assignment and is compatible with STL containers.
236 {
237  int lineno; // Line number of entry in file
238  std::string name; // Device name (with optional extra info)
239  std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
240  std::string dev_type; // Device type argument from -d directive, empty if none
241  std::string dev_idinfo; // Device identify info for warning emails
242  std::string state_file; // Path of the persistent state file, empty if none
243  std::string attrlog_file; // Path of the persistent attrlog file, empty if none
244  bool ignore; // Ignore this entry
245  bool smartcheck; // Check SMART status
246  bool usagefailed; // Check for failed Usage Attributes
247  bool prefail; // Track changes in Prefail Attributes
248  bool usage; // Track changes in Usage Attributes
249  bool selftest; // Monitor number of selftest errors
250  bool errorlog; // Monitor number of ATA errors
251  bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
252  bool offlinests; // Monitor changes in offline data collection status
253  bool offlinests_ns; // Disable auto standby if in progress
254  bool selfteststs; // Monitor changes in self-test execution status
255  bool selfteststs_ns; // Disable auto standby if in progress
256  bool permissive; // Ignore failed SMART commands
257  char autosave; // 1=disable, 2=enable Autosave Attributes
258  char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
259  firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
260  bool ignorepresets; // Ignore database of -v options
261  bool showpresets; // Show database entry for this device
262  bool removable; // Device may disappear (not be present)
263  char powermode; // skip check, if disk in idle or standby mode
264  bool powerquiet; // skip powermode 'skipping checks' message
265  int powerskipmax; // how many times can be check skipped
266  unsigned char tempdiff; // Track Temperature changes >= this limit
267  unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
268  regular_expression test_regex; // Regex for scheduled testing
269 
270  // Configuration of email warning messages
271  std::string emailcmdline; // script to execute, empty if no messages
272  std::string emailaddress; // email address, or empty
273  unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
274  bool emailtest; // Send test email?
275 
276  // ATA ONLY
277  int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
278  int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
279  int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
280  int set_lookahead; // disable(-1), enable(1) read look-ahead
281  int set_standby; // set(1..255->0..254) standby timer
282  bool set_security_freeze; // Freeze ATA security
283  int set_wcache; // disable(-1), enable(1) write cache
284  int set_dsn; // disable(0x2), enable(0x1) DSN
285 
286  bool sct_erc_set; // set SCT ERC to:
287  unsigned short sct_erc_readtime; // ERC read time (deciseconds)
288  unsigned short sct_erc_writetime; // ERC write time (deciseconds)
289 
290  unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
291  unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
292  bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
293  bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
294 
295  attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
296 
298 
299  dev_config();
300 };
301 
303 : lineno(0),
304  ignore(false),
305  smartcheck(false),
306  usagefailed(false),
307  prefail(false),
308  usage(false),
309  selftest(false),
310  errorlog(false),
311  xerrorlog(false),
312  offlinests(false), offlinests_ns(false),
313  selfteststs(false), selfteststs_ns(false),
314  permissive(false),
315  autosave(0),
316  autoofflinetest(0),
317  ignorepresets(false),
318  showpresets(false),
319  removable(false),
320  powermode(0),
321  powerquiet(false),
322  powerskipmax(0),
323  tempdiff(0),
324  tempinfo(0), tempcrit(0),
325  emailfreq(0),
326  emailtest(false),
327  dev_rpm(0),
328  set_aam(0), set_apm(0),
329  set_lookahead(0),
330  set_standby(0),
331  set_security_freeze(false),
332  set_wcache(0), set_dsn(0),
333  sct_erc_set(false),
334  sct_erc_readtime(0), sct_erc_writetime(0),
335  curr_pending_id(0), offl_pending_id(0),
336  curr_pending_incr(false), offl_pending_incr(false),
337  curr_pending_set(false), offl_pending_set(false)
338 {
339 }
340 
341 
342 // Number of allowed mail message types
343 static const int SMARTD_NMAIL = 13;
344 // Type for '-M test' mails (state not persistent)
345 static const int MAILTYPE_TEST = 0;
346 // TODO: Add const or enum for all mail types.
347 
348 struct mailinfo {
349  int logged;// number of times an email has been sent
350  time_t firstsent;// time first email was sent, as defined by time(2)
351  time_t lastsent; // time last email was sent, as defined by time(2)
352 
354  : logged(0), firstsent(0), lastsent(0) { }
355 };
356 
357 /// Persistent state data for a device.
359 {
360  unsigned char tempmin, tempmax; // Min/Max Temperatures
361 
362  unsigned char selflogcount; // total number of self-test errors
363  unsigned short selfloghour; // lifetime hours of last self-test error
364 
365  time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
366 
367  uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
368  uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
369 
370  mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
371 
372  // ATA ONLY
373  int ataerrorcount; // Total number of ATA errors
374 
375  // Persistent part of ata_smart_values:
376  struct ata_attribute {
377  unsigned char id;
378  unsigned char val;
379  unsigned char worst; // Byte needed for 'raw64' attribute only.
381  unsigned char resvd;
382 
383  ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
384  };
386 
387  // SCSI ONLY
388 
391  unsigned char found;
392  scsi_error_counter_t() : found(0)
393  { memset(&errCounter, 0, sizeof(errCounter)); }
394  };
396 
399  unsigned char found;
401  { memset(&nme, 0, sizeof(nme)); }
402  };
404 
405  // NVMe only
407 
409 };
410 
412 : tempmin(0), tempmax(0),
413  selflogcount(0),
414  selfloghour(0),
415  scheduled_test_next_check(0),
416  selective_test_last_start(0),
417  selective_test_last_end(0),
418  ataerrorcount(0),
419  nvme_err_log_entries(0)
420 {
421 }
422 
423 /// Non-persistent state data for a device.
425 {
426  bool must_write; // true if persistent part should be written
427 
428  bool not_cap_offline; // true == not capable of offline testing
433 
434  unsigned char temperature; // last recorded Temperature (in Celsius)
435  time_t tempmin_delay; // time where Min Temperature tracking will start
436 
437  bool removed; // true if open() failed for removable device
438 
439  bool powermodefail; // true if power mode check failed
440  int powerskipcnt; // Number of checks skipped due to idle or standby mode
441  int lastpowermodeskipped; // the last power mode that was skipped
442 
443  // SCSI ONLY
444  unsigned char SmartPageSupported; // has log sense IE page (0x2f)
445  unsigned char TempPageSupported; // has log sense temperature page (0xd)
450  unsigned char SuppressReport; // minimize nuisance reports
451  unsigned char modese_len; // mode sense/select cmd len: 0 (don't
452  // know yet) 6 or 10
453  // ATA ONLY
454  uint64_t num_sectors; // Number of sectors
455  ata_smart_values smartval; // SMART data
457  bool offline_started; // true if offline data collection was started
458  bool selftest_started; // true if self-test was started
459 
460  temp_dev_state();
461 };
462 
464 : must_write(false),
465  not_cap_offline(false),
466  not_cap_conveyance(false),
467  not_cap_short(false),
468  not_cap_long(false),
469  not_cap_selective(false),
470  temperature(0),
471  tempmin_delay(0),
472  removed(false),
473  powermodefail(false),
474  powerskipcnt(0),
475  lastpowermodeskipped(0),
476  SmartPageSupported(false),
477  TempPageSupported(false),
478  ReadECounterPageSupported(false),
479  WriteECounterPageSupported(false),
480  VerifyECounterPageSupported(false),
481  NonMediumErrorPageSupported(false),
482  SuppressReport(false),
483  modese_len(0),
484  num_sectors(0),
485  offline_started(false),
486  selftest_started(false)
487 {
488  memset(&smartval, 0, sizeof(smartval));
489  memset(&smartthres, 0, sizeof(smartthres));
490 }
491 
492 /// Runtime state data for a device.
493 struct dev_state
494 : public persistent_dev_state,
495  public temp_dev_state
496 {
498  void update_temp_state();
499 };
500 
501 /// Container for configuration info for each device.
502 typedef std::vector<dev_config> dev_config_vector;
503 
504 /// Container for state info for each device.
505 typedef std::vector<dev_state> dev_state_vector;
506 
507 // Copy ATA attributes to persistent state.
509 {
510  for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
512  ata_attribute & pa = ata_attributes[i];
513  pa.id = ta.id;
514  if (ta.id == 0) {
515  pa.val = pa.worst = 0; pa.raw = 0;
516  continue;
517  }
518  pa.val = ta.current;
519  pa.worst = ta.worst;
520  pa.raw = ta.raw[0]
521  | ( ta.raw[1] << 8)
522  | ( ta.raw[2] << 16)
523  | ((uint64_t)ta.raw[3] << 24)
524  | ((uint64_t)ta.raw[4] << 32)
525  | ((uint64_t)ta.raw[5] << 40);
526  pa.resvd = ta.reserv;
527  }
528 }
529 
530 // Copy ATA from persistent to temp state.
532 {
533  for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
534  const ata_attribute & pa = ata_attributes[i];
536  ta.id = pa.id;
537  if (pa.id == 0) {
538  ta.current = ta.worst = 0;
539  memset(ta.raw, 0, sizeof(ta.raw));
540  continue;
541  }
542  ta.current = pa.val;
543  ta.worst = pa.worst;
544  ta.raw[0] = (unsigned char) pa.raw;
545  ta.raw[1] = (unsigned char)(pa.raw >> 8);
546  ta.raw[2] = (unsigned char)(pa.raw >> 16);
547  ta.raw[3] = (unsigned char)(pa.raw >> 24);
548  ta.raw[4] = (unsigned char)(pa.raw >> 32);
549  ta.raw[5] = (unsigned char)(pa.raw >> 40);
550  ta.reserv = pa.resvd;
551  }
552 }
553 
554 // Parse a line from a state file.
555 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
556 {
557  static const regular_expression regex(
558  "^ *"
559  "((temperature-min)" // (1 (2)
560  "|(temperature-max)" // (3)
561  "|(self-test-errors)" // (4)
562  "|(self-test-last-err-hour)" // (5)
563  "|(scheduled-test-next-check)" // (6)
564  "|(selective-test-last-start)" // (7)
565  "|(selective-test-last-end)" // (8)
566  "|(ata-error-count)" // (9)
567  "|(mail\\.([0-9]+)\\." // (10 (11)
568  "((count)" // (12 (13)
569  "|(first-sent-time)" // (14)
570  "|(last-sent-time)" // (15)
571  ")" // 12)
572  ")" // 10)
573  "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
574  "((id)" // (18 (19)
575  "|(val)" // (20)
576  "|(worst)" // (21)
577  "|(raw)" // (22)
578  "|(resvd)" // (23)
579  ")" // 18)
580  ")" // 16)
581  "|(nvme-err-log-entries)" // (24)
582  ")" // 1)
583  " *= *([0-9]+)[ \n]*$", // (25)
584  REG_EXTENDED
585  );
586 
587  const int nmatch = 1+25;
588  regmatch_t match[nmatch];
589  if (!regex.execute(line, nmatch, match))
590  return false;
591  if (match[nmatch-1].rm_so < 0)
592  return false;
593 
594  uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
595 
596  int m = 1;
597  if (match[++m].rm_so >= 0)
598  state.tempmin = (unsigned char)val;
599  else if (match[++m].rm_so >= 0)
600  state.tempmax = (unsigned char)val;
601  else if (match[++m].rm_so >= 0)
602  state.selflogcount = (unsigned char)val;
603  else if (match[++m].rm_so >= 0)
604  state.selfloghour = (unsigned short)val;
605  else if (match[++m].rm_so >= 0)
606  state.scheduled_test_next_check = (time_t)val;
607  else if (match[++m].rm_so >= 0)
608  state.selective_test_last_start = val;
609  else if (match[++m].rm_so >= 0)
610  state.selective_test_last_end = val;
611  else if (match[++m].rm_so >= 0)
612  state.ataerrorcount = (int)val;
613  else if (match[m+=2].rm_so >= 0) {
614  int i = atoi(line+match[m].rm_so);
615  if (!(0 <= i && i < SMARTD_NMAIL))
616  return false;
617  if (i == MAILTYPE_TEST) // Don't suppress test mails
618  return true;
619  if (match[m+=2].rm_so >= 0)
620  state.maillog[i].logged = (int)val;
621  else if (match[++m].rm_so >= 0)
622  state.maillog[i].firstsent = (time_t)val;
623  else if (match[++m].rm_so >= 0)
624  state.maillog[i].lastsent = (time_t)val;
625  else
626  return false;
627  }
628  else if (match[m+=5+1].rm_so >= 0) {
629  int i = atoi(line+match[m].rm_so);
630  if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
631  return false;
632  if (match[m+=2].rm_so >= 0)
633  state.ata_attributes[i].id = (unsigned char)val;
634  else if (match[++m].rm_so >= 0)
635  state.ata_attributes[i].val = (unsigned char)val;
636  else if (match[++m].rm_so >= 0)
637  state.ata_attributes[i].worst = (unsigned char)val;
638  else if (match[++m].rm_so >= 0)
639  state.ata_attributes[i].raw = val;
640  else if (match[++m].rm_so >= 0)
641  state.ata_attributes[i].resvd = (unsigned char)val;
642  else
643  return false;
644  }
645  else if (match[m+7].rm_so >= 0)
646  state.nvme_err_log_entries = val;
647  else
648  return false;
649  return true;
650 }
651 
652 // Read a state file.
653 static bool read_dev_state(const char * path, persistent_dev_state & state)
654 {
655  stdio_file f(path, "r");
656  if (!f) {
657  if (errno != ENOENT)
658  pout("Cannot read state file \"%s\"\n", path);
659  return false;
660  }
661 #ifdef __CYGWIN__
662  setmode(fileno(f), O_TEXT); // Allow files with \r\n
663 #endif
664 
665  persistent_dev_state new_state;
666  int good = 0, bad = 0;
667  char line[256];
668  while (fgets(line, sizeof(line), f)) {
669  const char * s = line + strspn(line, " \t");
670  if (!*s || *s == '#')
671  continue;
672  if (!parse_dev_state_line(line, new_state))
673  bad++;
674  else
675  good++;
676  }
677 
678  if (bad) {
679  if (!good) {
680  pout("%s: format error\n", path);
681  return false;
682  }
683  pout("%s: %d invalid line(s) ignored\n", path, bad);
684  }
685 
686  // This sets the values missing in the file to 0.
687  state = new_state;
688  return true;
689 }
690 
691 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
692 {
693  if (val)
694  fprintf(f, "%s = %" PRIu64 "\n", name, val);
695 }
696 
697 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
698 {
699  if (val)
700  fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
701 }
702 
703 // Write a state file
704 static bool write_dev_state(const char * path, const persistent_dev_state & state)
705 {
706  // Rename old "file" to "file~"
707  std::string pathbak = path; pathbak += '~';
708  unlink(pathbak.c_str());
709  rename(path, pathbak.c_str());
710 
711  stdio_file f(path, "w");
712  if (!f) {
713  pout("Cannot create state file \"%s\"\n", path);
714  return false;
715  }
716 
717  fprintf(f, "# smartd state file\n");
718  write_dev_state_line(f, "temperature-min", state.tempmin);
719  write_dev_state_line(f, "temperature-max", state.tempmax);
720  write_dev_state_line(f, "self-test-errors", state.selflogcount);
721  write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
722  write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
723  write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
724  write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
725 
726  int i;
727  for (i = 0; i < SMARTD_NMAIL; i++) {
728  if (i == MAILTYPE_TEST) // Don't suppress test mails
729  continue;
730  const mailinfo & mi = state.maillog[i];
731  if (!mi.logged)
732  continue;
733  write_dev_state_line(f, "mail", i, "count", mi.logged);
734  write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
735  write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
736  }
737 
738  // ATA ONLY
739  write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
740 
741  for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
743  if (!pa.id)
744  continue;
745  write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
746  write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
747  write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
748  write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
749  write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
750  }
751 
752  // NVMe only
753  write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
754 
755  return true;
756 }
757 
758 // Write to the attrlog file
759 static bool write_dev_attrlog(const char * path, const dev_state & state)
760 {
761  stdio_file f(path, "a");
762  if (!f) {
763  pout("Cannot create attribute log file \"%s\"\n", path);
764  return false;
765  }
766 
767 
768  time_t now = time(0);
769  struct tm * tms = gmtime(&now);
770  fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
771  1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
772  tms->tm_hour, tms->tm_min, tms->tm_sec);
773  // ATA ONLY
774  for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
776  if (!pa.id)
777  continue;
778  fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
779  }
780  // SCSI ONLY
781  const struct scsiErrorCounter * ecp;
782  const char * pageNames[3] = {"read", "write", "verify"};
783  for (int k = 0; k < 3; ++k) {
784  if ( !state.scsi_error_counters[k].found ) continue;
785  ecp = &state.scsi_error_counters[k].errCounter;
786  fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
787  "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
788  "\t%s-corr-by-retry;%" PRIu64 ";"
789  "\t%s-total-err-corrected;%" PRIu64 ";"
790  "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
791  "\t%s-gb-processed;%.3f;"
792  "\t%s-total-unc-errors;%" PRIu64 ";",
793  pageNames[k], ecp->counter[0],
794  pageNames[k], ecp->counter[1],
795  pageNames[k], ecp->counter[2],
796  pageNames[k], ecp->counter[3],
797  pageNames[k], ecp->counter[4],
798  pageNames[k], (ecp->counter[5] / 1000000000.0),
799  pageNames[k], ecp->counter[6]);
800  }
802  fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
803  }
804  // write SCSI current temperature if it is monitored
805  if (state.temperature)
806  fprintf(f, "\ttemperature;%d;", state.temperature);
807  // end of line
808  fprintf(f, "\n");
809  return true;
810 }
811 
812 // Write all state files. If write_always is false, don't write
813 // unless must_write is set.
814 static void write_all_dev_states(const dev_config_vector & configs,
815  dev_state_vector & states,
816  bool write_always = true)
817 {
818  for (unsigned i = 0; i < states.size(); i++) {
819  const dev_config & cfg = configs.at(i);
820  if (cfg.state_file.empty())
821  continue;
822  dev_state & state = states[i];
823  if (!write_always && !state.must_write)
824  continue;
825  if (!write_dev_state(cfg.state_file.c_str(), state))
826  continue;
827  state.must_write = false;
828  if (write_always || debugmode)
829  PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
830  cfg.name.c_str(), cfg.state_file.c_str());
831  }
832 }
833 
834 // Write to all attrlog files
835 static void write_all_dev_attrlogs(const dev_config_vector & configs,
836  dev_state_vector & states)
837 {
838  for (unsigned i = 0; i < states.size(); i++) {
839  const dev_config & cfg = configs.at(i);
840  if (cfg.attrlog_file.empty())
841  continue;
842  dev_state & state = states[i];
843  write_dev_attrlog(cfg.attrlog_file.c_str(), state);
844  }
845 }
846 
847 // remove the PID file
848 static void RemovePidFile()
849 {
850  if (!pid_file.empty()) {
851  if (unlink(pid_file.c_str()))
852  PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
853  pid_file.c_str(), strerror(errno));
854  pid_file.clear();
855  }
856  return;
857 }
858 
859 extern "C" { // signal handlers require C-linkage
860 
861 // Note if we catch a SIGUSR1
862 static void USR1handler(int sig)
863 {
864  if (SIGUSR1==sig)
865  caughtsigUSR1=1;
866  return;
867 }
868 
869 #ifdef _WIN32
870 // Note if we catch a SIGUSR2
871 static void USR2handler(int sig)
872 {
873  if (SIGUSR2==sig)
874  caughtsigUSR2=1;
875  return;
876 }
877 #endif
878 
879 // Note if we catch a HUP (or INT in debug mode)
880 static void HUPhandler(int sig)
881 {
882  if (sig==SIGHUP)
883  caughtsigHUP=1;
884  else
885  caughtsigHUP=2;
886  return;
887 }
888 
889 // signal handler for TERM, QUIT, and INT (if not in debug mode)
890 static void sighandler(int sig)
891 {
892  if (!caughtsigEXIT)
893  caughtsigEXIT=sig;
894  return;
895 }
896 
897 } // extern "C"
898 
899 // Cleanup, print Goodbye message and remove pidfile
900 static int Goodbye(int status)
901 {
902  // delete PID file, if one was created
903  RemovePidFile();
904 
905  // and this should be the final output from smartd before it exits
906  PrintOut(status?LOG_CRIT:LOG_INFO, "smartd is exiting (exit status %d)\n", status);
907 
908  return status;
909 }
910 
911 // a replacement for setenv() which is not available on all platforms.
912 // Note that the string passed to putenv must not be freed or made
913 // invalid, since a pointer to it is kept by putenv(). This means that
914 // it must either be a static buffer or allocated off the heap. The
915 // string can be freed if the environment variable is redefined via
916 // another call to putenv(). There is no portable way to unset a variable
917 // with putenv(). So we manage the buffer in a static object.
918 // Using setenv() if available is not considered because some
919 // implementations may produce memory leaks.
920 
922 {
923 public:
925  : m_buf((char *)0) { }
926 
927  void set(const char * name, const char * value);
928 
929 private:
930  char * m_buf;
931 
932  env_buffer(const env_buffer &);
933  void operator=(const env_buffer &);
934 };
935 
936 void env_buffer::set(const char * name, const char * value)
937 {
938  int size = strlen(name) + 1 + strlen(value) + 1;
939  char * newbuf = new char[size];
940  snprintf(newbuf, size, "%s=%s", name, value);
941 
942  if (putenv(newbuf))
943  throw std::runtime_error("putenv() failed");
944 
945  // This assumes that the same NAME is passed on each call
946  delete [] m_buf;
947  m_buf = newbuf;
948 }
949 
950 #define EBUFLEN 1024
951 
952 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
954 
955 // If either address or executable path is non-null then send and log
956 // a warning email, or execute executable
957 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
958 {
959  static const char * const whichfail[] = {
960  "EmailTest", // 0
961  "Health", // 1
962  "Usage", // 2
963  "SelfTest", // 3
964  "ErrorCount", // 4
965  "FailedHealthCheck", // 5
966  "FailedReadSmartData", // 6
967  "FailedReadSmartErrorLog", // 7
968  "FailedReadSmartSelfTestLog", // 8
969  "FailedOpenDevice", // 9
970  "CurrentPendingSector", // 10
971  "OfflineUncorrectableSector", // 11
972  "Temperature" // 12
973  };
974 
975  // See if user wants us to send mail
976  if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
977  return;
978 
979  std::string address = cfg.emailaddress;
980  const char * executable = cfg.emailcmdline.c_str();
981 
982  // which type of mail are we sending?
983  mailinfo * mail=(state.maillog)+which;
984 
985  // checks for sanity
986  if (cfg.emailfreq<1 || cfg.emailfreq>3) {
987  PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
988  return;
989  }
990  if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
991  PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
992  which, (int)sizeof(whichfail));
993  return;
994  }
995 
996  // Return if a single warning mail has been sent.
997  if ((cfg.emailfreq==1) && mail->logged)
998  return;
999 
1000  // Return if this is an email test and one has already been sent.
1001  if (which == 0 && mail->logged)
1002  return;
1003 
1004  // To decide if to send mail, we need to know what time it is.
1005  time_t epoch = time(0);
1006 
1007  // Return if less than one day has gone by
1008  const int day = 24*3600;
1009  if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1010  return;
1011 
1012  // Return if less than 2^(logged-1) days have gone by
1013  if (cfg.emailfreq==3 && mail->logged) {
1014  int days = 0x01 << (mail->logged - 1);
1015  days*=day;
1016  if (epoch<(mail->lastsent+days))
1017  return;
1018  }
1019 
1020 #ifdef HAVE_LIBCAP_NG
1021  if (enable_capabilities) {
1022  PrintOut(LOG_ERR, "Sending a mail was supressed. "
1023  "Mails can't be send when capabilites are enabled\n");
1024  return;
1025  }
1026 #endif
1027 
1028  // record the time of this mail message, and the first mail message
1029  if (!mail->logged)
1030  mail->firstsent=epoch;
1031  mail->lastsent=epoch;
1032 
1033  // print warning string into message
1034  char message[256];
1035  va_list ap;
1036  va_start(ap, fmt);
1037  vsnprintf(message, sizeof(message), fmt, ap);
1038  va_end(ap);
1039 
1040  // replace commas by spaces to separate recipients
1041  std::replace(address.begin(), address.end(), ',', ' ');
1042 
1043  // Export information in environment variables that will be useful
1044  // for user scripts
1045  static env_buffer env[12];
1046  env[0].set("SMARTD_MAILER", executable);
1047  env[1].set("SMARTD_MESSAGE", message);
1048  char dates[DATEANDEPOCHLEN];
1049  snprintf(dates, sizeof(dates), "%d", mail->logged);
1050  env[2].set("SMARTD_PREVCNT", dates);
1051  dateandtimezoneepoch(dates, mail->firstsent);
1052  env[3].set("SMARTD_TFIRST", dates);
1053  snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1054  env[4].set("SMARTD_TFIRSTEPOCH", dates);
1055  env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1056  env[6].set("SMARTD_ADDRESS", address.c_str());
1057  env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1058 
1059  // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1060  env[8].set("SMARTD_DEVICETYPE",
1061  (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1062  env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1063 
1064  env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1065  dates[0] = 0;
1066  if (which) switch (cfg.emailfreq) {
1067  case 2: dates[0] = '1'; dates[1] = 0; break;
1068  case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1069  }
1070  env[11].set("SMARTD_NEXTDAYS", dates);
1071 
1072  // now construct a command to send this as EMAIL
1073  if (!*executable)
1074  executable = "<mail>";
1075  const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1076  const char * newwarn = (which? "Warning via" : "Test of");
1077 
1078 #ifndef _WIN32
1079  char command[2048];
1080  snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1081 
1082  // tell SYSLOG what we are about to do...
1083  PrintOut(LOG_INFO,"%s %s to %s ...\n",
1084  which?"Sending warning via":"Executing test of", executable, newadd);
1085 
1086  // issue the command to send mail or to run the user's executable
1087  errno=0;
1088  FILE * pfp;
1089  if (!(pfp=popen(command, "r")))
1090  // failed to popen() mail process
1091  PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1092  newwarn, executable, newadd, errno?strerror(errno):"");
1093  else {
1094  // pipe suceeded!
1095  int len, status;
1096  char buffer[EBUFLEN];
1097 
1098  // if unexpected output on stdout/stderr, null terminate, print, and flush
1099  if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1100  int count=0;
1101  int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1102  buffer[newlen]='\0';
1103  PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1104  newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1105 
1106  // flush pipe if needed
1107  while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1108  count++;
1109 
1110  // tell user that pipe was flushed, or that something is really wrong
1111  if (count && count<EBUFLEN)
1112  PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1113  newwarn, executable, newadd);
1114  else if (count)
1115  PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1116  newwarn, executable, newadd);
1117  }
1118 
1119  // if something went wrong with mail process, print warning
1120  errno=0;
1121  if (-1==(status=pclose(pfp)))
1122  PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1123  errno?strerror(errno):"");
1124  else {
1125  // mail process apparently succeeded. Check and report exit status
1126  if (WIFEXITED(status)) {
1127  // exited 'normally' (but perhaps with nonzero status)
1128  int status8 = WEXITSTATUS(status);
1129  if (status8>128)
1130  PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1131  newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1132  else if (status8)
1133  PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1134  newwarn, executable, newadd, status, status8);
1135  else
1136  PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1137  }
1138 
1139  if (WIFSIGNALED(status))
1140  PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1141  newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1142 
1143  // this branch is probably not possible. If subprocess is
1144  // stopped then pclose() should not return.
1145  if (WIFSTOPPED(status))
1146  PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1147  newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1148 
1149  }
1150  }
1151 
1152 #else // _WIN32
1153  {
1154  char command[2048];
1155  snprintf(command, sizeof(command), "cmd /c \"%s\"", warning_script.c_str());
1156 
1157  char stdoutbuf[800]; // < buffer in syslog_win32::vsyslog()
1158  int rc;
1159  // run command
1160  PrintOut(LOG_INFO,"%s %s to %s ...\n",
1161  (which?"Sending warning via":"Executing test of"), executable, newadd);
1162  rc = daemon_spawn(command, "", 0, stdoutbuf, sizeof(stdoutbuf));
1163  if (rc >= 0 && stdoutbuf[0])
1164  PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%d bytes) to STDOUT/STDERR:\n%s\n",
1165  newwarn, executable, newadd, (int)strlen(stdoutbuf), stdoutbuf);
1166  if (rc != 0)
1167  PrintOut(LOG_CRIT,"%s %s to %s: failed, exit status %d\n",
1168  newwarn, executable, newadd, rc);
1169  else
1170  PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1171  }
1172 
1173 #endif // _WIN32
1174 
1175  // increment mail sent counter
1176  mail->logged++;
1177 }
1178 
1179 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1181 
1182 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1183 {
1184  if (!(0 <= which && which < SMARTD_NMAIL))
1185  return;
1186 
1187  // Return if no mail sent yet
1188  mailinfo & mi = state.maillog[which];
1189  if (!mi.logged)
1190  return;
1191 
1192  // Format & print message
1193  char msg[256];
1194  va_list ap;
1195  va_start(ap, fmt);
1196  vsnprintf(msg, sizeof(msg), fmt, ap);
1197  va_end(ap);
1198 
1199  PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1200  msg, mi.logged, (mi.logged==1 ? "" : "s"));
1201 
1202  // Clear mail counter and timestamps
1203  mi = mailinfo();
1204  state.must_write = true;
1205 }
1206 
1207 #ifndef _WIN32
1208 
1209 // Output multiple lines via separate syslog(3) calls.
1210 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1211 {
1212  char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1213  vsnprintf(buf, sizeof(buf), fmt, ap);
1214 
1215  for (char * p = buf, * q; p && *p; p = q) {
1216  if ((q = strchr(p, '\n')))
1217  *q++ = 0;
1218  if (*p)
1219  syslog(priority, "%s\n", p);
1220  }
1221 }
1222 
1223 #else // _WIN32
1224 // os_win32/syslog_win32.cpp supports multiple lines.
1225 #define vsyslog_lines vsyslog
1226 #endif // _WIN32
1227 
1228 // Printing function for watching ataprint commands, or losing them
1229 // [From GLIBC Manual: Since the prototype doesn't specify types for
1230 // optional arguments, in a call to a variadic function the default
1231 // argument promotions are performed on the optional argument
1232 // values. This means the objects of type char or short int (whether
1233 // signed or not) are promoted to either int or unsigned int, as
1234 // appropriate.]
1235 void pout(const char *fmt, ...){
1236  va_list ap;
1237 
1238  // get the correct time in syslog()
1240  // initialize variable argument list
1241  va_start(ap,fmt);
1242  // in debugmode==1 mode we will print the output from the ataprint.o functions!
1243  if (debugmode && debugmode != 2) {
1244  FILE * f = stdout;
1245 #ifdef _WIN32
1246  if (facility == LOG_LOCAL1) // logging to stdout
1247  f = stderr;
1248 #endif
1249  vfprintf(f, fmt, ap);
1250  fflush(f);
1251  }
1252  // in debugmode==2 mode we print output from knowndrives.o functions
1253  else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1254  openlog("smartd", LOG_PID, facility);
1255  vsyslog_lines(LOG_INFO, fmt, ap);
1256  closelog();
1257  }
1258  va_end(ap);
1259  return;
1260 }
1261 
1262 // This function prints either to stdout or to the syslog as needed.
1263 static void PrintOut(int priority, const char *fmt, ...){
1264  va_list ap;
1265 
1266  // get the correct time in syslog()
1268  // initialize variable argument list
1269  va_start(ap,fmt);
1270  if (debugmode) {
1271  FILE * f = stdout;
1272 #ifdef _WIN32
1273  if (facility == LOG_LOCAL1) // logging to stdout
1274  f = stderr;
1275 #endif
1276  vfprintf(f, fmt, ap);
1277  fflush(f);
1278  }
1279  else {
1280  openlog("smartd", LOG_PID, facility);
1281  vsyslog_lines(priority, fmt, ap);
1282  closelog();
1283  }
1284  va_end(ap);
1285  return;
1286 }
1287 
1288 // Used to warn users about invalid checksums. Called from atacmds.cpp.
1289 void checksumwarning(const char * string)
1290 {
1291  pout("Warning! %s error: invalid SMART checksum.\n", string);
1292 }
1293 
1294 #ifndef _WIN32
1295 
1296 // Wait for the pid file to show up, this makes sure a calling program knows
1297 // that the daemon is really up and running and has a pid to kill it
1298 static bool WaitForPidFile()
1299 {
1300  int waited, max_wait = 10;
1301  struct stat stat_buf;
1302 
1303  if (pid_file.empty() || debugmode)
1304  return true;
1305 
1306  for(waited = 0; waited < max_wait; ++waited) {
1307  if (!stat(pid_file.c_str(), &stat_buf)) {
1308  return true;
1309  } else
1310  sleep(1);
1311  }
1312  return false;
1313 }
1314 
1315 #endif // _WIN32
1316 
1317 // Forks new process, closes ALL file descriptors, redirects stdin,
1318 // stdout, and stderr. Not quite daemon(). See
1319 // http://www.linuxjournal.com/article/2335
1320 // for a good description of why we do things this way.
1321 static void DaemonInit()
1322 {
1323 #ifndef _WIN32
1324  pid_t pid;
1325  int i;
1326 
1327  // flush all buffered streams. Else we might get two copies of open
1328  // streams since both parent and child get copies of the buffers.
1329  fflush(NULL);
1330 
1331  if (do_fork) {
1332  if ((pid=fork()) < 0) {
1333  // unable to fork!
1334  PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1335  EXIT(EXIT_STARTUP);
1336  }
1337  else if (pid) {
1338  // we are the parent process, wait for pid file, then exit cleanly
1339  if(!WaitForPidFile()) {
1340  PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1341  EXIT(EXIT_STARTUP);
1342  } else
1343  EXIT(0);
1344  }
1345 
1346  // from here on, we are the child process.
1347  setsid();
1348 
1349  // Fork one more time to avoid any possibility of having terminals
1350  if ((pid=fork()) < 0) {
1351  // unable to fork!
1352  PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1353  EXIT(EXIT_STARTUP);
1354  }
1355  else if (pid)
1356  // we are the parent process -- exit cleanly
1357  EXIT(0);
1358 
1359  // Now we are the child's child...
1360  }
1361 
1362  // close any open file descriptors
1363  for (i=getdtablesize();i>=0;--i)
1364  close(i);
1365 
1366 #define NO_warn_unused_result(cmd) { if (cmd) {} ; }
1367 
1368  // redirect any IO attempts to /dev/null for stdin
1369  i=open("/dev/null",O_RDWR);
1370  if (i>=0) {
1371  // stdout
1372  NO_warn_unused_result(dup(i));
1373  // stderr
1374  NO_warn_unused_result(dup(i));
1375  };
1376  umask(0022);
1377  NO_warn_unused_result(chdir("/"));
1378 
1379  if (do_fork)
1380  PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1381 
1382 #else // _WIN32
1383 
1384  // No fork() on native Win32
1385  // Detach this process from console
1386  fflush(NULL);
1387  if (daemon_detach("smartd")) {
1388  PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1389  EXIT(EXIT_STARTUP);
1390  }
1391  // stdin/out/err now closed if not redirected
1392 
1393 #endif // _WIN32
1394  return;
1395 }
1396 
1397 // create a PID file containing the current process id
1398 static void WritePidFile()
1399 {
1400  if (!pid_file.empty()) {
1401  pid_t pid = getpid();
1402  mode_t old_umask;
1403 #ifndef __CYGWIN__
1404  old_umask = umask(0077); // rwx------
1405 #else
1406  // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1407  old_umask = umask(0033); // rwxr--r--
1408 #endif
1409 
1410  stdio_file f(pid_file.c_str(), "w");
1411  umask(old_umask);
1412  if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1413  PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1414  EXIT(EXIT_PID);
1415  }
1416  PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1417  }
1418 }
1419 
1420 // Prints header identifying version of code and home
1421 static void PrintHead()
1422 {
1423  PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1424 }
1425 
1426 // prints help info for configuration file Directives
1427 static void Directives()
1428 {
1429  PrintOut(LOG_INFO,
1430  "Configuration file (%s) Directives (after device name):\n"
1431  " -d TYPE Set the device type: auto, ignore, removable,\n"
1432  " %s\n"
1433  " -T TYPE Set the tolerance to one of: normal, permissive\n"
1434  " -o VAL Enable/disable automatic offline tests (on/off)\n"
1435  " -S VAL Enable/disable attribute autosave (on/off)\n"
1436  " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1437  " -H Monitor SMART Health Status, report if failed\n"
1438  " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1439  " -l TYPE Monitor SMART log or self-test status:\n"
1440  " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1441  " -l scterc,R,W Set SCT Error Recovery Control\n"
1442  " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1443  " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1444  " -f Monitor 'Usage' Attributes, report failures\n"
1445  " -m ADD Send email warning to address ADD\n"
1446  " -M TYPE Modify email warning behavior (see man page)\n"
1447  " -p Report changes in 'Prefailure' Attributes\n"
1448  " -u Report changes in 'Usage' Attributes\n"
1449  " -t Equivalent to -p and -u Directives\n"
1450  " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1451  " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1452  " -i ID Ignore Attribute ID for -f Directive\n"
1453  " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1454  " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1455  " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1456  " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1457  " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1458  " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1459  " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1460  " -F TYPE Use firmware bug workaround:\n"
1461  " %s\n"
1462  " # Comment: text after a hash sign is ignored\n"
1463  " \\ Line continuation character\n"
1464  "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1465  "Use ID = 0 to turn off -C and/or -U Directives\n"
1466  "Example: /dev/sda -a\n",
1467  configfile,
1468  smi()->get_valid_dev_types_str().c_str(),
1470 }
1471 
1472 /* Returns a pointer to a static string containing a formatted list of the valid
1473  arguments to the option opt or NULL on failure. */
1474 static const char *GetValidArgList(char opt)
1475 {
1476  switch (opt) {
1477  case 'A':
1478  case 's':
1479  return "<PATH_PREFIX>";
1480  case 'B':
1481  return "[+]<FILE_NAME>";
1482  case 'c':
1483  return "<FILE_NAME>, -";
1484  case 'l':
1485  return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1486  case 'q':
1487  return "nodev, errors, nodevstartup, never, onecheck, showtests";
1488  case 'r':
1489  return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1490  case 'p':
1491  case 'w':
1492  return "<FILE_NAME>";
1493  case 'i':
1494  return "<INTEGER_SECONDS>";
1495  default:
1496  return NULL;
1497  }
1498 }
1499 
1500 /* prints help information for command syntax */
1501 static void Usage()
1502 {
1503  PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1504  PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1505  PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1506 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1507  PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1508 #endif
1509  PrintOut(LOG_INFO,"\n");
1510  PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1511  PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1512  PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1513 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1514  PrintOut(LOG_INFO,"\n");
1515  PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1516 #endif
1517  PrintOut(LOG_INFO,"]\n\n");
1518  PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1519  PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1520  PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1521 #ifdef HAVE_LIBCAP_NG
1522  PrintOut(LOG_INFO," -C, --capabilities\n");
1523  PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1524  " Warning: Mail notification does not work when used.\n\n");
1525 #endif
1526  PrintOut(LOG_INFO," -d, --debug\n");
1527  PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1528  PrintOut(LOG_INFO," -D, --showdirectives\n");
1529  PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1530  PrintOut(LOG_INFO," -h, --help, --usage\n");
1531  PrintOut(LOG_INFO," Display this help and exit\n\n");
1532  PrintOut(LOG_INFO," -i N, --interval=N\n");
1533  PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1534  PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1535 #ifndef _WIN32
1536  PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1537 #else
1538  PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1539 #endif
1540 #ifndef _WIN32
1541  PrintOut(LOG_INFO," -n, --no-fork\n");
1542  PrintOut(LOG_INFO," Do not fork into background\n\n");
1543 #endif // _WIN32
1544  PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1545  PrintOut(LOG_INFO," Write PID file NAME\n\n");
1546  PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1547  PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1548  PrintOut(LOG_INFO," -r, --report=TYPE\n");
1549  PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1550  PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1551  PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1552 #ifdef SMARTMONTOOLS_SAVESTATES
1553  PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1554 #endif
1555  PrintOut(LOG_INFO,"\n");
1556  PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1557  PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1558 #ifndef _WIN32
1559  PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1560 #else
1561  PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1562 #endif
1563 #ifdef _WIN32
1564  PrintOut(LOG_INFO," --service\n");
1565  PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1566  PrintOut(LOG_INFO," smartd install [options]\n");
1567  PrintOut(LOG_INFO," Remove service with:\n");
1568  PrintOut(LOG_INFO," smartd remove\n\n");
1569 #endif // _WIN32
1570  PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1571  PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1572 }
1573 
1574 static int CloseDevice(smart_device * device, const char * name)
1575 {
1576  if (!device->close()){
1577  PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1578  return 1;
1579  }
1580  // device sucessfully closed
1581  return 0;
1582 }
1583 
1584 // return true if a char is not allowed in a state file name
1585 static bool not_allowed_in_filename(char c)
1586 {
1587  return !( ('0' <= c && c <= '9')
1588  || ('A' <= c && c <= 'Z')
1589  || ('a' <= c && c <= 'z'));
1590 }
1591 
1592 // Read error count from Summary or Extended Comprehensive SMART error log
1593 // Return -1 on error
1594 static int read_ata_error_count(ata_device * device, const char * name,
1595  firmwarebug_defs firmwarebugs, bool extended)
1596 {
1597  if (!extended) {
1598  ata_smart_errorlog log;
1599  if (ataReadErrorLog(device, &log, firmwarebugs)){
1600  PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1601  return -1;
1602  }
1603  return (log.error_log_pointer ? log.ata_error_count : 0);
1604  }
1605  else {
1606  ata_smart_exterrlog logx;
1607  if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1608  PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1609  return -1;
1610  }
1611  // Some disks use the reserved byte as index, see ataprint.cpp.
1612  return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1613  }
1614 }
1615 
1616 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1617 // error count, and top bits are the power-on hours of the last error.
1618 static int SelfTestErrorCount(ata_device * device, const char * name,
1619  firmwarebug_defs firmwarebugs)
1620 {
1621  struct ata_smart_selftestlog log;
1622 
1623  if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1624  PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1625  return -1;
1626  }
1627 
1628  // return current number of self-test errors
1629  return ataPrintSmartSelfTestlog(&log, false, firmwarebugs);
1630 }
1631 
1632 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1633 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1634 
1635 // Check offline data collection status
1636 static inline bool is_offl_coll_in_progress(unsigned char status)
1637 {
1638  return ((status & 0x7f) == 0x03);
1639 }
1640 
1641 // Check self-test execution status
1642 static inline bool is_self_test_in_progress(unsigned char status)
1643 {
1644  return ((status >> 4) == 0xf);
1645 }
1646 
1647 // Log offline data collection status
1648 static void log_offline_data_coll_status(const char * name, unsigned char status)
1649 {
1650  const char * msg;
1651  switch (status & 0x7f) {
1652  case 0x00: msg = "was never started"; break;
1653  case 0x02: msg = "was completed without error"; break;
1654  case 0x03: msg = "is in progress"; break;
1655  case 0x04: msg = "was suspended by an interrupting command from host"; break;
1656  case 0x05: msg = "was aborted by an interrupting command from host"; break;
1657  case 0x06: msg = "was aborted by the device with a fatal error"; break;
1658  default: msg = 0;
1659  }
1660 
1661  if (msg)
1662  PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1663  "Device: %s, offline data collection %s%s\n", name, msg,
1664  ((status & 0x80) ? " (auto:on)" : ""));
1665  else
1666  PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1667  name, status);
1668 }
1669 
1670 // Log self-test execution status
1671 static void log_self_test_exec_status(const char * name, unsigned char status)
1672 {
1673  const char * msg;
1674  switch (status >> 4) {
1675  case 0x0: msg = "completed without error"; break;
1676  case 0x1: msg = "was aborted by the host"; break;
1677  case 0x2: msg = "was interrupted by the host with a reset"; break;
1678  case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1679  case 0x4: msg = "completed with error (unknown test element)"; break;
1680  case 0x5: msg = "completed with error (electrical test element)"; break;
1681  case 0x6: msg = "completed with error (servo/seek test element)"; break;
1682  case 0x7: msg = "completed with error (read test element)"; break;
1683  case 0x8: msg = "completed with error (handling damage?)"; break;
1684  default: msg = 0;
1685  }
1686 
1687  if (msg)
1688  PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1689  "Device: %s, previous self-test %s\n", name, msg);
1690  else if ((status >> 4) == 0xf)
1691  PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1692  name, status & 0x0f);
1693  else
1694  PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1695  name, status);
1696 }
1697 
1698 // Check pending sector count id (-C, -U directives).
1699 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1700  unsigned char id, const char * msg)
1701 {
1702  // Check attribute index
1703  int i = ata_find_attr_index(id, state.smartval);
1704  if (i < 0) {
1705  PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1706  cfg.name.c_str(), msg, id);
1707  return false;
1708  }
1709 
1710  // Check value
1712  cfg.attribute_defs);
1713  if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1714  PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1715  cfg.name.c_str(), msg, id, rawval, rawval);
1716  return false;
1717  }
1718 
1719  return true;
1720 }
1721 
1722 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1723 static void finish_device_scan(dev_config & cfg, dev_state & state)
1724 {
1725  // Set cfg.emailfreq if user hasn't set it
1726  if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1727  // Avoid that emails are suppressed forever due to state persistence
1728  if (cfg.state_file.empty())
1729  cfg.emailfreq = 1; // '-M once'
1730  else
1731  cfg.emailfreq = 2; // '-M daily'
1732  }
1733 
1734  // Start self-test regex check now if time was not read from state file
1735  if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1736  state.scheduled_test_next_check = time(0);
1737 }
1738 
1739 // Common function to format result message for ATA setting
1740 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1741  int set_option = 0, bool has_value = false)
1742 {
1743  if (!msg.empty())
1744  msg += ", ";
1745  msg += name;
1746  if (!ok)
1747  msg += ":--";
1748  else if (set_option < 0)
1749  msg += ":off";
1750  else if (has_value)
1751  msg += strprintf(":%d", set_option-1);
1752  else if (set_option > 0)
1753  msg += ":on";
1754 }
1755 
1756 
1757 // TODO: Add '-F swapid' directive
1758 const bool fix_swapped_id = false;
1759 
1760 // scan to see what ata devices there are, and if they support SMART
1761 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev)
1762 {
1763  int supported=0;
1764  struct ata_identify_device drive;
1765  const char *name = cfg.name.c_str();
1766  int retid;
1767 
1768  // Device must be open
1769 
1770  // Get drive identity structure
1771  if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1772  if (retid<0)
1773  // Unable to read Identity structure
1774  PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1775  else
1776  PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1777  name, packetdevicetype(retid-1));
1778  CloseDevice(atadev, name);
1779  return 2;
1780  }
1781 
1782  // Get drive identity, size and rotation rate (HDD/SSD)
1783  char model[40+1], serial[20+1], firmware[8+1];
1784  ata_format_id_string(model, drive.model, sizeof(model)-1);
1785  ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1786  ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1787 
1788  ata_size_info sizes;
1789  ata_get_size_info(&drive, sizes);
1790  state.num_sectors = sizes.sectors;
1791  cfg.dev_rpm = ata_get_rotation_rate(&drive);
1792 
1793  char wwn[30]; wwn[0] = 0;
1794  unsigned oui = 0; uint64_t unique_id = 0;
1795  int naa = ata_get_wwn(&drive, oui, unique_id);
1796  if (naa >= 0)
1797  snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1798 
1799  // Format device id string for warning emails
1800  char cap[32];
1801  cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1802  format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1803 
1804  PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1805 
1806  // Show if device in database, and use preset vendor attribute
1807  // options unless user has requested otherwise.
1808  if (cfg.ignorepresets)
1809  PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1810  else {
1811  // Apply vendor specific presets, print warning if present
1812  const drive_settings * dbentry = lookup_drive_apply_presets(
1813  &drive, cfg.attribute_defs, cfg.firmwarebugs);
1814  if (!dbentry)
1815  PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1816  else {
1817  PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1818  name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1819  if (*dbentry->warningmsg)
1820  PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1821  }
1822  }
1823 
1824  // Check for ATA Security LOCK
1825  unsigned short word128 = drive.words088_255[128-88];
1826  bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1827  if (locked)
1828  PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1829 
1830  // Set default '-C 197[+]' if no '-C ID' is specified.
1831  if (!cfg.curr_pending_set)
1833  // Set default '-U 198[+]' if no '-U ID' is specified.
1834  if (!cfg.offl_pending_set)
1836 
1837  // If requested, show which presets would be used for this drive
1838  if (cfg.showpresets) {
1839  int savedebugmode=debugmode;
1840  PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
1841  if (!debugmode)
1842  debugmode=2;
1843  show_presets(&drive);
1844  debugmode=savedebugmode;
1845  }
1846 
1847  // see if drive supports SMART
1848  supported=ataSmartSupport(&drive);
1849  if (supported!=1) {
1850  if (supported==0)
1851  // drive does NOT support SMART
1852  PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
1853  else
1854  // can't tell if drive supports SMART
1855  PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
1856 
1857  // should we proceed anyway?
1858  if (cfg.permissive) {
1859  PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
1860  }
1861  else {
1862  PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
1863  CloseDevice(atadev, name);
1864  return 2;
1865  }
1866  }
1867 
1868  if (ataEnableSmart(atadev)) {
1869  // Enable SMART command has failed
1870  PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
1871 
1872  if (ataIsSmartEnabled(&drive) <= 0) {
1873  CloseDevice(atadev, name);
1874  return 2;
1875  }
1876  PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
1877  }
1878 
1879  // disable device attribute autosave...
1880  if (cfg.autosave==1) {
1881  if (ataDisableAutoSave(atadev))
1882  PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
1883  else
1884  PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
1885  }
1886 
1887  // or enable device attribute autosave
1888  if (cfg.autosave==2) {
1889  if (ataEnableAutoSave(atadev))
1890  PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
1891  else
1892  PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
1893  }
1894 
1895  // capability check: SMART status
1896  if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
1897  PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
1898  cfg.smartcheck = false;
1899  }
1900 
1901  // capability check: Read smart values and thresholds. Note that
1902  // smart values are ALSO needed even if we ONLY want to know if the
1903  // device is self-test log or error-log capable! After ATA-5, this
1904  // information was ALSO reproduced in the IDENTIFY DEVICE response,
1905  // but sadly not for ATA-5. Sigh.
1906 
1907  // do we need to get SMART data?
1908  bool smart_val_ok = false;
1909  if ( cfg.autoofflinetest || cfg.selftest
1910  || cfg.errorlog || cfg.xerrorlog
1911  || cfg.offlinests || cfg.selfteststs
1912  || cfg.usagefailed || cfg.prefail || cfg.usage
1913  || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
1914  || cfg.curr_pending_id || cfg.offl_pending_id ) {
1915 
1916  if (ataReadSmartValues(atadev, &state.smartval)) {
1917  PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
1918  cfg.usagefailed = cfg.prefail = cfg.usage = false;
1919  cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1920  cfg.curr_pending_id = cfg.offl_pending_id = 0;
1921  }
1922  else {
1923  smart_val_ok = true;
1924  if (ataReadSmartThresholds(atadev, &state.smartthres)) {
1925  PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
1926  name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
1927  cfg.usagefailed = false;
1928  // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
1929  memset(&state.smartthres, 0, sizeof(state.smartthres));
1930  }
1931  }
1932 
1933  // see if the necessary Attribute is there to monitor offline or
1934  // current pending sectors or temperature
1935  if ( cfg.curr_pending_id
1936  && !check_pending_id(cfg, state, cfg.curr_pending_id,
1937  "Current_Pending_Sector"))
1938  cfg.curr_pending_id = 0;
1939 
1940  if ( cfg.offl_pending_id
1941  && !check_pending_id(cfg, state, cfg.offl_pending_id,
1942  "Offline_Uncorrectable"))
1943  cfg.offl_pending_id = 0;
1944 
1945  if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
1947  PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
1948  name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
1949  cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
1950  }
1951 
1952  // Report ignored '-r' or '-R' directives
1953  for (int id = 1; id <= 255; id++) {
1955  char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
1956  const char * excl = (cfg.monitor_attr_flags.is_set(id,
1957  (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
1958 
1959  int idx = ata_find_attr_index(id, state.smartval);
1960  if (idx < 0)
1961  PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
1962  else {
1963  bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
1964  if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
1965  PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
1966  (prefail ? "Prefailure" : "Usage"), opt, id, excl);
1967  }
1968  }
1969  }
1970  }
1971 
1972  // enable/disable automatic on-line testing
1973  if (cfg.autoofflinetest) {
1974  // is this an enable or disable request?
1975  const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
1976  if (!smart_val_ok)
1977  PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
1978  else {
1979  // if command appears unsupported, issue a warning...
1980  if (!isSupportAutomaticTimer(&state.smartval))
1981  PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
1982  // ... but then try anyway
1983  if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
1984  PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
1985  else
1986  PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
1987  }
1988  }
1989 
1990  // Read log directories if required for capability check
1991  ata_smart_log_directory smart_logdir, gp_logdir;
1992  bool smart_logdir_ok = false, gp_logdir_ok = false;
1993 
1994  if ( isGeneralPurposeLoggingCapable(&drive)
1995  && (cfg.errorlog || cfg.selftest)
1996  && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
1997  if (!ataReadLogDirectory(atadev, &smart_logdir, false))
1998  smart_logdir_ok = true;
1999  }
2000 
2001  if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2002  if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2003  gp_logdir_ok = true;
2004  }
2005 
2006  // capability check: self-test-log
2007  state.selflogcount = 0; state.selfloghour = 0;
2008  if (cfg.selftest) {
2009  int retval;
2010  if (!( cfg.permissive
2011  || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2012  || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2013  PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2014  cfg.selftest = false;
2015  }
2016  else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2017  PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2018  cfg.selftest = false;
2019  }
2020  else {
2021  state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2022  state.selfloghour =SELFTEST_ERRORHOURS(retval);
2023  }
2024  }
2025 
2026  // capability check: ATA error log
2027  state.ataerrorcount = 0;
2028  if (cfg.errorlog) {
2029  int errcnt1;
2030  if (!( cfg.permissive
2031  || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2032  || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2033  PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2034  cfg.errorlog = false;
2035  }
2036  else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2037  PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2038  cfg.errorlog = false;
2039  }
2040  else
2041  state.ataerrorcount = errcnt1;
2042  }
2043 
2044  if (cfg.xerrorlog) {
2045  int errcnt2;
2046  if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2047  || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2048  PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2049  name);
2050  cfg.xerrorlog = false;
2051  }
2052  else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2053  PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2054  cfg.xerrorlog = false;
2055  }
2056  else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2057  PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2058  name, state.ataerrorcount, errcnt2);
2059  // Record max error count
2060  if (errcnt2 > state.ataerrorcount)
2061  state.ataerrorcount = errcnt2;
2062  }
2063  else
2064  state.ataerrorcount = errcnt2;
2065  }
2066 
2067  // capability check: self-test and offline data collection status
2068  if (cfg.offlinests || cfg.selfteststs) {
2069  if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2070  if (cfg.offlinests)
2071  PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2072  if (cfg.selfteststs)
2073  PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2074  cfg.offlinests = cfg.selfteststs = false;
2075  }
2076  }
2077 
2078  // capabilities check -- does it support powermode?
2079  if (cfg.powermode) {
2080  int powermode = ataCheckPowerMode(atadev);
2081 
2082  if (-1 == powermode) {
2083  PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2084  cfg.powermode=0;
2085  }
2086  else if (powermode!=0x00 && powermode!=0x01
2087  && powermode!=0x40 && powermode!=0x41
2088  && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2089  && powermode!=0xff) {
2090  PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2091  name, powermode);
2092  cfg.powermode=0;
2093  }
2094  }
2095 
2096  // Apply ATA settings
2097  std::string msg;
2098 
2099  if (cfg.set_aam)
2100  format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2101  ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2102  ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2103 
2104  if (cfg.set_apm)
2105  format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2106  ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2107  ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2108 
2109  if (cfg.set_lookahead)
2110  format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2112  cfg.set_lookahead);
2113 
2114  if (cfg.set_wcache)
2115  format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2117 
2118  if (cfg.set_dsn)
2119  format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2120  ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2121 
2122  if (cfg.set_security_freeze)
2123  format_set_result_msg(msg, "Security freeze",
2125 
2126  if (cfg.set_standby)
2127  format_set_result_msg(msg, "Standby",
2128  ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2129 
2130  // Report as one log entry
2131  if (!msg.empty())
2132  PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2133 
2134  // set SCT Error Recovery Control if requested
2135  if (cfg.sct_erc_set) {
2136  if (!isSCTErrorRecoveryControlCapable(&drive))
2137  PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2138  name);
2139  else if (locked)
2140  PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2141  name);
2142  else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2144  PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2145  else
2146  PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2147  name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2148  }
2149 
2150  // If no tests available or selected, return
2151  if (!( cfg.smartcheck || cfg.selftest
2152  || cfg.errorlog || cfg.xerrorlog
2153  || cfg.offlinests || cfg.selfteststs
2154  || cfg.usagefailed || cfg.prefail || cfg.usage
2155  || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2156  CloseDevice(atadev, name);
2157  return 3;
2158  }
2159 
2160  // tell user we are registering device
2161  PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2162 
2163  // close file descriptor
2164  CloseDevice(atadev, name);
2165 
2166  if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2167  // Build file name for state file
2168  std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2169  std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2170  if (!state_path_prefix.empty()) {
2171  cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2172  // Read previous state
2173  if (read_dev_state(cfg.state_file.c_str(), state)) {
2174  PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2175  // Copy ATA attribute values to temp state
2176  state.update_temp_state();
2177  }
2178  }
2179  if (!attrlog_path_prefix.empty())
2180  cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2181  }
2182 
2183  finish_device_scan(cfg, state);
2184 
2185  return 0;
2186 }
2187 
2188 // on success, return 0. On failure, return >0. Never return <0,
2189 // please.
2190 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev)
2191 {
2192  int err, req_len, avail_len, version, len;
2193  const char *device = cfg.name.c_str();
2194  struct scsi_iec_mode_page iec;
2195  UINT8 tBuf[64];
2196  UINT8 inqBuf[96];
2197  UINT8 vpdBuf[252];
2198  char lu_id[64], serial[256], vendor[40], model[40];
2199 
2200  // Device must be open
2201  memset(inqBuf, 0, 96);
2202  req_len = 36;
2203  if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2204  /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2205  req_len = 64;
2206  if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2207  PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2208  "skip device\n", device);
2209  return 2;
2210  }
2211  }
2212  version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2213 
2214  avail_len = inqBuf[4] + 5;
2215  len = (avail_len < req_len) ? avail_len : req_len;
2216  if (len < 36) {
2217  PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2218  "skip device\n", device);
2219  return 2;
2220  }
2221 
2222  int pdt = inqBuf[0] & 0x1f;
2223 
2224  if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2225  (0xe == pdt))) {
2226  PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2227  "skip\n", device, pdt);
2228  return 2;
2229  }
2230 
2231  if (supported_vpd_pages_p) {
2232  delete supported_vpd_pages_p;
2233  supported_vpd_pages_p = NULL;
2234  }
2236 
2237  lu_id[0] = '\0';
2238  if ((version >= 0x3) && (version < 0x8)) {
2239  /* SPC to SPC-5 */
2241  vpdBuf, sizeof(vpdBuf))) {
2242  len = vpdBuf[3];
2243  scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2244  }
2245  }
2246  serial[0] = '\0';
2247  if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2248  vpdBuf, sizeof(vpdBuf))) {
2249  len = vpdBuf[3];
2250  vpdBuf[4 + len] = '\0';
2251  scsi_format_id_string(serial, (const unsigned char *)&vpdBuf[4], len);
2252  }
2253 
2254  unsigned int lb_size;
2255  char si_str[64];
2256  uint64_t capacity = scsiGetSize(scsidev, &lb_size, NULL);
2257 
2258  if (capacity)
2259  format_capacity(si_str, sizeof(si_str), capacity, ".");
2260  else
2261  si_str[0] = '\0';
2262 
2263  // Format device id string for warning emails
2264  cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2265  (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2266  (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2267  (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2268  (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2269 
2270  // format "model" string
2271  scsi_format_id_string(vendor, (const unsigned char *)&inqBuf[8], 8);
2272  scsi_format_id_string(model, (const unsigned char *)&inqBuf[16], 16);
2273  PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2274 
2275  // check that device is ready for commands. IE stores its stuff on
2276  // the media.
2277  if ((err = scsiTestUnitReady(scsidev))) {
2278  if (SIMPLE_ERR_NOT_READY == err)
2279  PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2280  else if (SIMPLE_ERR_NO_MEDIUM == err)
2281  PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2282  else if (SIMPLE_ERR_BECOMING_READY == err)
2283  PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2284  else
2285  PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2286  CloseDevice(scsidev, device);
2287  return 2;
2288  }
2289 
2290  // Badly-conforming USB storage devices may fail this check.
2291  // The response to the following IE mode page fetch (current and
2292  // changeable values) is carefully examined. It has been found
2293  // that various USB devices that malform the response will lock up
2294  // if asked for a log page (e.g. temperature) so it is best to
2295  // bail out now.
2296  if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2297  state.modese_len = iec.modese_len;
2298  else if (SIMPLE_ERR_BAD_FIELD == err)
2299  ; /* continue since it is reasonable not to support IE mpage */
2300  else { /* any other error (including malformed response) unreasonable */
2301  PrintOut(LOG_INFO,
2302  "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2303  device, err);
2304  CloseDevice(scsidev, device);
2305  return 3;
2306  }
2307 
2308  // N.B. The following is passive (i.e. it doesn't attempt to turn on
2309  // smart if it is off). This may change to be the same as the ATA side.
2310  if (!scsi_IsExceptionControlEnabled(&iec)) {
2311  PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2312  "Try 'smartctl -s on %s' to turn on SMART features\n",
2313  device, device);
2314  CloseDevice(scsidev, device);
2315  return 3;
2316  }
2317 
2318  // Flag that certain log pages are supported (information may be
2319  // available from other sources).
2320  if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2321  0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2322  /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2323  {
2324  for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2325  switch (tBuf[k]) {
2326  case TEMPERATURE_LPAGE:
2327  state.TempPageSupported = 1;
2328  break;
2329  case IE_LPAGE:
2330  state.SmartPageSupported = 1;
2331  break;
2333  state.ReadECounterPageSupported = 1;
2334  break;
2336  state.WriteECounterPageSupported = 1;
2337  break;
2339  state.VerifyECounterPageSupported = 1;
2340  break;
2342  state.NonMediumErrorPageSupported = 1;
2343  break;
2344  default:
2345  break;
2346  }
2347  }
2348  }
2349 
2350  // Check if scsiCheckIE() is going to work
2351  {
2352  UINT8 asc = 0;
2353  UINT8 ascq = 0;
2354  UINT8 currenttemp = 0;
2355  UINT8 triptemp = 0;
2356 
2357  if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2358  &asc, &ascq, &currenttemp, &triptemp)) {
2359  PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2360  state.SuppressReport = 1;
2361  }
2362  if ( (state.SuppressReport || !currenttemp)
2363  && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2364  PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2365  device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2366  cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2367  }
2368  }
2369 
2370  // capability check: self-test-log
2371  if (cfg.selftest){
2372  int retval = scsiCountFailedSelfTests(scsidev, 0);
2373  if (retval<0) {
2374  // no self-test log, turn off monitoring
2375  PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2376  cfg.selftest = false;
2377  state.selflogcount = 0;
2378  state.selfloghour = 0;
2379  }
2380  else {
2381  // register starting values to watch for changes
2382  state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2383  state.selfloghour =SELFTEST_ERRORHOURS(retval);
2384  }
2385  }
2386 
2387  // disable autosave (set GLTSD bit)
2388  if (cfg.autosave==1){
2389  if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2390  PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2391  else
2392  PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2393  }
2394 
2395  // or enable autosave (clear GLTSD bit)
2396  if (cfg.autosave==2){
2397  if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2398  PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2399  else
2400  PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2401  }
2402 
2403  // tell user we are registering device
2404  PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2405 
2406  // Make sure that init_standby_check() ignores SCSI devices
2407  cfg.offlinests_ns = cfg.selfteststs_ns = false;
2408 
2409  // close file descriptor
2410  CloseDevice(scsidev, device);
2411 
2412  if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2413  // Build file name for state file
2414  std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2415  std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2416  if (!state_path_prefix.empty()) {
2417  cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2418  // Read previous state
2419  if (read_dev_state(cfg.state_file.c_str(), state)) {
2420  PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2421  // Copy ATA attribute values to temp state
2422  state.update_temp_state();
2423  }
2424  }
2425  if (!attrlog_path_prefix.empty())
2426  cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2427  }
2428 
2429  finish_device_scan(cfg, state);
2430 
2431  return 0;
2432 }
2433 
2434 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
2435 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2436 {
2437  for (int i = 8; i < 16; i++) {
2438  if (val[i])
2439  return ~(uint64_t)0;
2440  }
2441  uint64_t lo = val[7];
2442  for (int i = 7-1; i >= 0; i--) {
2443  lo <<= 8; lo += val[i];
2444  }
2445  return lo;
2446 }
2447 
2448 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
2449 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2450 {
2451  int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2452  for (int i = 0; i < 8; i++) {
2453  if (smart_log.temp_sensor[i] > k)
2454  k = smart_log.temp_sensor[i];
2455  }
2456  return k;
2457 }
2458 
2459 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev)
2460 {
2461  const char *name = cfg.name.c_str();
2462 
2463  // Device must be open
2464 
2465  // Get ID Controller
2466  nvme_id_ctrl id_ctrl;
2467  if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2468  PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2469  CloseDevice(nvmedev, name);
2470  return 2;
2471  }
2472 
2473  // Get drive identity
2474  char model[40+1], serial[20+1], firmware[8+1];
2475  format_char_array(model, id_ctrl.mn);
2476  format_char_array(serial, id_ctrl.sn);
2477  format_char_array(firmware, id_ctrl.fr);
2478 
2479  // Format device id string for warning emails
2480  char nsstr[32] = "", capstr[32] = "";
2481  unsigned nsid = nvmedev->get_nsid();
2482  if (nsid != 0xffffffff)
2483  snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2484  uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2485  if (capacity)
2486  format_capacity(capstr, sizeof(capstr), capacity, ".");
2487  cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2488  nsstr, (capstr[0] ? ", " : ""), capstr);
2489 
2490  PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2491 
2492  // Read SMART/Health log
2493  nvme_smart_log smart_log;
2494  if (!nvme_read_smart_log(nvmedev, smart_log)) {
2495  PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2496  CloseDevice(nvmedev, name);
2497  return 2;
2498  }
2499 
2500  // Check temperature sensor support
2501  if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2502  if (!nvme_get_max_temp_kelvin(smart_log)) {
2503  PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2504  name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2505  cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2506  }
2507  }
2508 
2509  // Init total error count
2510  if (cfg.errorlog || cfg.xerrorlog) {
2511  state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2512  }
2513 
2514  // If no supported tests selected, return
2515  if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2516  || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2517  CloseDevice(nvmedev, name);
2518  return 3;
2519  }
2520 
2521  // Tell user we are registering device
2522  PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2523 
2524  // Make sure that init_standby_check() ignores NVMe devices
2525  cfg.offlinests_ns = cfg.selfteststs_ns = false;
2526 
2527  CloseDevice(nvmedev, name);
2528 
2529  if (!state_path_prefix.empty()) {
2530  // Build file name for state file
2531  std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2532  std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2533  nsstr[0] = 0;
2534  if (nsid != 0xffffffff)
2535  snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2536  cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2537  // Read previous state
2538  if (read_dev_state(cfg.state_file.c_str(), state))
2539  PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2540  }
2541 
2542  finish_device_scan(cfg, state);
2543 
2544  return 0;
2545 }
2546 
2547 // Open device for next check, return false on error
2548 static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2549  const char * type)
2550 {
2551  const char * name = cfg.name.c_str();
2552 
2553  // If user has asked, test the email warning system
2554  if (cfg.emailtest)
2555  MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2556 
2557  // User may have requested (with the -n Directive) to leave the disk
2558  // alone if it is in idle or standby mode. In this case check the
2559  // power mode first before opening the device for full access,
2560  // and exit without check if disk is reported in standby.
2561  if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2562  // Note that 'is_powered_down()' handles opening the device itself, and
2563  // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2564  if (device->is_powered_down())
2565  {
2566  // skip at most powerskipmax checks
2567  if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2568  // report first only except if state has changed, avoid waking up system disk
2569  if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2570  PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2571  state.lastpowermodeskipped = -1;
2572  }
2573  state.powerskipcnt++;
2574  return false;
2575  }
2576  }
2577  }
2578 
2579  // if we can't open device, fail gracefully rather than hard --
2580  // perhaps the next time around we'll be able to open it
2581  if (!device->open()) {
2582  // For removable devices, print error message only once and suppress email
2583  if (!cfg.removable) {
2584  PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2585  MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2586  }
2587  else if (!state.removed) {
2588  PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2589  state.removed = true;
2590  }
2591  else if (debugmode)
2592  PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2593  return false;
2594  }
2595 
2596  if (debugmode)
2597  PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2598 
2599  if (!cfg.removable)
2600  reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2601  else if (state.removed) {
2602  PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2603  state.removed = false;
2604  }
2605 
2606  return true;
2607 }
2608 
2609 // If the self-test log has got more self-test errors (or more recent
2610 // self-test errors) recorded, then notify user.
2611 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2612 {
2613  const char * name = cfg.name.c_str();
2614 
2615  if (newi<0)
2616  // command failed
2617  MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2618  else {
2619  reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2620 
2621  // old and new error counts
2622  int oldc=state.selflogcount;
2623  int newc=SELFTEST_ERRORCOUNT(newi);
2624 
2625  // old and new error timestamps in hours
2626  int oldh=state.selfloghour;
2627  int newh=SELFTEST_ERRORHOURS(newi);
2628 
2629  if (oldc<newc) {
2630  // increase in error count
2631  PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2632  name, oldc, newc);
2633  MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2634  name, oldc, newc);
2635  state.must_write = true;
2636  }
2637  else if (newc > 0 && oldh != newh) {
2638  // more recent error
2639  // a 'more recent' error might actually be a smaller hour number,
2640  // if the hour number has wrapped.
2641  // There's still a bug here. You might just happen to run a new test
2642  // exactly 32768 hours after the previous failure, and have run exactly
2643  // 20 tests between the two, in which case smartd will miss the
2644  // new failure.
2645  PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2646  name, newh);
2647  MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2648  name, newh);
2649  state.must_write = true;
2650  }
2651 
2652  // Print info if error entries have disappeared
2653  // or newer successful successful extended self-test exits
2654  if (oldc > newc) {
2655  PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2656  name, oldc, newc);
2657  if (newc == 0)
2658  reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2659  }
2660 
2661  // Needed since self-test error count may DECREASE. Hour might
2662  // also have changed.
2663  state.selflogcount= newc;
2664  state.selfloghour = newh;
2665  }
2666  return;
2667 }
2668 
2669 // Test types, ordered by priority.
2670 static const char test_type_chars[] = "LncrSCO";
2671 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2672 
2673 // returns test type if time to do test of type testtype,
2674 // 0 if not time to do test.
2675 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2676 {
2677  // check that self-testing has been requested
2678  if (cfg.test_regex.empty())
2679  return 0;
2680 
2681  // Exit if drive not capable of any test
2682  if ( state.not_cap_long && state.not_cap_short &&
2683  (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2684  return 0;
2685 
2686  // since we are about to call localtime(), be sure glibc is informed
2687  // of any timezone changes we make.
2688  if (!usetime)
2690 
2691  // Is it time for next check?
2692  time_t now = (!usetime ? time(0) : usetime);
2693  if (now < state.scheduled_test_next_check)
2694  return 0;
2695 
2696  // Limit time check interval to 90 days
2697  if (state.scheduled_test_next_check + (3600L*24*90) < now)
2698  state.scheduled_test_next_check = now - (3600L*24*90);
2699 
2700  // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2701  char testtype = 0;
2702  time_t testtime = 0; int testhour = 0;
2703  int maxtest = num_test_types-1;
2704 
2705  for (time_t t = state.scheduled_test_next_check; ; ) {
2706  struct tm * tms = localtime(&t);
2707  // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2708  int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2709  for (int i = 0; i <= maxtest; i++) {
2710  // Skip if drive not capable of this test
2711  switch (test_type_chars[i]) {
2712  case 'L': if (state.not_cap_long) continue; break;
2713  case 'S': if (state.not_cap_short) continue; break;
2714  case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2715  case 'O': if (scsi || state.not_cap_offline) continue; break;
2716  case 'c': case 'n':
2717  case 'r': if (scsi || state.not_cap_selective) continue; break;
2718  default: continue;
2719  }
2720  // Try match of "T/MM/DD/d/HH"
2721  char pattern[16];
2722  snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2723  test_type_chars[i], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2724  if (cfg.test_regex.full_match(pattern)) {
2725  // Test found
2726  testtype = pattern[0];
2727  testtime = t; testhour = tms->tm_hour;
2728  // Limit further matches to higher priority self-tests
2729  maxtest = i-1;
2730  break;
2731  }
2732  }
2733  // Exit if no tests left or current time reached
2734  if (maxtest < 0)
2735  break;
2736  if (t >= now)
2737  break;
2738  // Check next hour
2739  if ((t += 3600) > now)
2740  t = now;
2741  }
2742 
2743  // Do next check not before next hour.
2744  struct tm * tmnow = localtime(&now);
2745  state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2746 
2747  if (testtype) {
2748  state.must_write = true;
2749  // Tell user if an old test was found.
2750  if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2751  char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2752  PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2753  cfg.name.c_str(), testtype, datebuf);
2754  }
2755  }
2756 
2757  return testtype;
2758 }
2759 
2760 // Print a list of future tests.
2761 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2762 {
2763  unsigned numdev = configs.size();
2764  if (!numdev)
2765  return;
2766  std::vector<int> testcnts(numdev * num_test_types, 0);
2767 
2768  PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2769 
2770  // FixGlibcTimeZoneBug(); // done in PrintOut()
2771  time_t now = time(0);
2772  char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
2773  dateandtimezoneepoch(datenow, now);
2774 
2775  long seconds;
2776  for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
2777  // Check for each device whether a test will be run
2778  time_t testtime = now + seconds;
2779  for (unsigned i = 0; i < numdev; i++) {
2780  const dev_config & cfg = configs.at(i);
2781  dev_state & state = states.at(i);
2782  const char * p;
2783  char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
2784  if (testtype && (p = strchr(test_type_chars, testtype))) {
2785  unsigned t = (p - test_type_chars);
2786  // Report at most 5 tests of each type
2787  if (++testcnts[i*num_test_types + t] <= 5) {
2788  dateandtimezoneepoch(date, testtime);
2789  PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
2790  testcnts[i*num_test_types + t], testtype, date);
2791  }
2792  }
2793  }
2794  }
2795 
2796  // Report totals
2797  dateandtimezoneepoch(date, now+seconds);
2798  PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
2799  for (unsigned i = 0; i < numdev; i++) {
2800  const dev_config & cfg = configs.at(i);
2801  bool scsi = devices.at(i)->is_scsi();
2802  for (unsigned t = 0; t < num_test_types; t++) {
2803  int cnt = testcnts[i*num_test_types + t];
2804  if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
2805  continue;
2806  PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
2807  cnt, (cnt==1?"":"s"), test_type_chars[t]);
2808  }
2809  }
2810 
2811 }
2812 
2813 // Return zero on success, nonzero on failure. Perform offline (background)
2814 // short or long (extended) self test on given scsi device.
2815 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
2816 {
2817  int retval = 0;
2818  const char *testname = 0;
2819  const char *name = cfg.name.c_str();
2820  int inProgress;
2821 
2822  if (scsiSelfTestInProgress(device, &inProgress)) {
2823  PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
2824  state.not_cap_short = state.not_cap_long = true;
2825  return 1;
2826  }
2827 
2828  if (1 == inProgress) {
2829  PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
2830  "progress.\n", name);
2831  return 1;
2832  }
2833 
2834  switch (testtype) {
2835  case 'S':
2836  testname = "Short Self";
2837  retval = scsiSmartShortSelfTest(device);
2838  break;
2839  case 'L':
2840  testname = "Long Self";
2841  retval = scsiSmartExtendSelfTest(device);
2842  break;
2843  }
2844  // If we can't do the test, exit
2845  if (NULL == testname) {
2846  PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
2847  testtype);
2848  return 1;
2849  }
2850  if (retval) {
2851  if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
2852  (SIMPLE_ERR_BAD_FIELD == retval)) {
2853  PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
2854  testname);
2855  if ('L'==testtype)
2856  state.not_cap_long = true;
2857  else
2858  state.not_cap_short = true;
2859 
2860  return 1;
2861  }
2862  PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
2863  testname, retval);
2864  return 1;
2865  }
2866 
2867  PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
2868 
2869  return 0;
2870 }
2871 
2872 // Do an offline immediate or self-test. Return zero on success,
2873 // nonzero on failure.
2874 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
2875 {
2876  const char *name = cfg.name.c_str();
2877 
2878  // Read current smart data and check status/capability
2879  struct ata_smart_values data;
2880  if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
2881  PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
2882  return 1;
2883  }
2884 
2885  // Check for capability to do the test
2886  int dotest = -1, mode = 0;
2887  const char *testname = 0;
2888  switch (testtype) {
2889  case 'O':
2890  testname="Offline Immediate ";
2892  dotest=OFFLINE_FULL_SCAN;
2893  else
2894  state.not_cap_offline = true;
2895  break;
2896  case 'C':
2897  testname="Conveyance Self-";
2898  if (isSupportConveyanceSelfTest(&data))
2899  dotest=CONVEYANCE_SELF_TEST;
2900  else
2901  state.not_cap_conveyance = true;
2902  break;
2903  case 'S':
2904  testname="Short Self-";
2905  if (isSupportSelfTest(&data))
2906  dotest=SHORT_SELF_TEST;
2907  else
2908  state.not_cap_short = true;
2909  break;
2910  case 'L':
2911  testname="Long Self-";
2912  if (isSupportSelfTest(&data))
2913  dotest=EXTEND_SELF_TEST;
2914  else
2915  state.not_cap_long = true;
2916  break;
2917 
2918  case 'c': case 'n': case 'r':
2919  testname = "Selective Self-";
2920  if (isSupportSelectiveSelfTest(&data)) {
2921  dotest = SELECTIVE_SELF_TEST;
2922  switch (testtype) {
2923  case 'c': mode = SEL_CONT; break;
2924  case 'n': mode = SEL_NEXT; break;
2925  case 'r': mode = SEL_REDO; break;
2926  }
2927  }
2928  else
2929  state.not_cap_selective = true;
2930  break;
2931  }
2932 
2933  // If we can't do the test, exit
2934  if (dotest<0) {
2935  PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
2936  return 1;
2937  }
2938 
2939  // If currently running a self-test, do not interrupt it to start another.
2940  if (15==(data.self_test_exec_status >> 4)) {
2941  if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
2942  PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
2943  "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
2944  } else {
2945  PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
2946  name, testname, (int)(data.self_test_exec_status & 0x0f));
2947  return 1;
2948  }
2949  }
2950 
2951  if (dotest == SELECTIVE_SELF_TEST) {
2952  // Set test span
2953  ata_selective_selftest_args selargs, prev_args;
2954  selargs.num_spans = 1;
2955  selargs.span[0].mode = mode;
2956  prev_args.num_spans = 1;
2957  prev_args.span[0].start = state.selective_test_last_start;
2958  prev_args.span[0].end = state.selective_test_last_end;
2959  if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
2960  PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
2961  return 1;
2962  }
2963  uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
2964  PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
2965  name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
2966  start, end, end - start + 1,
2967  (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
2968  (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
2969  state.selective_test_last_start = start;
2970  state.selective_test_last_end = end;
2971  }
2972 
2973  // execute the test, and return status
2974  int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
2975  if (retval) {
2976  PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
2977  return retval;
2978  }
2979 
2980  // Report recent test start to do_disable_standby_check()
2981  // and force log of next test status
2982  if (testtype == 'O')
2983  state.offline_started = true;
2984  else
2985  state.selftest_started = true;
2986 
2987  PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
2988  return 0;
2989 }
2990 
2991 // Check pending sector count attribute values (-C, -U directives).
2992 static void check_pending(const dev_config & cfg, dev_state & state,
2993  unsigned char id, bool increase_only,
2994  const ata_smart_values & smartval,
2995  int mailtype, const char * msg)
2996 {
2997  // Find attribute index
2998  int i = ata_find_attr_index(id, smartval);
2999  if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3000  return;
3001 
3002  // No report if no sectors pending.
3004  if (rawval == 0) {
3005  reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3006  return;
3007  }
3008 
3009  // If attribute is not reset, report only sector count increases.
3011  if (!(!increase_only || prev_rawval < rawval))
3012  return;
3013 
3014  // Format message.
3015  std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3016  if (prev_rawval > 0 && rawval != prev_rawval)
3017  s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3018 
3019  PrintOut(LOG_CRIT, "%s\n", s.c_str());
3020  MailWarning(cfg, state, mailtype, "%s", s.c_str());
3021  state.must_write = true;
3022 }
3023 
3024 // Format Temperature value
3025 static const char * fmt_temp(unsigned char x, char (& buf)[20])
3026 {
3027  if (!x) // unset
3028  return "??";
3029  snprintf(buf, sizeof(buf), "%u", x);
3030  return buf;
3031 }
3032 
3033 // Check Temperature limits
3034 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3035 {
3036  if (!(0 < currtemp && currtemp < 255)) {
3037  PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3038  return;
3039  }
3040 
3041  // Update Max Temperature
3042  const char * minchg = "", * maxchg = "";
3043  if (currtemp > state.tempmax) {
3044  if (state.tempmax)
3045  maxchg = "!";
3046  state.tempmax = currtemp;
3047  state.must_write = true;
3048  }
3049 
3050  char buf[20];
3051  if (!state.temperature) {
3052  // First check
3053  if (!state.tempmin || currtemp < state.tempmin)
3054  // Delay Min Temperature update by ~ 30 minutes.
3055  state.tempmin_delay = time(0) + CHECKTIME - 60;
3056  PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3057  cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3058  if (triptemp)
3059  PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3060  state.temperature = currtemp;
3061  }
3062  else {
3063  if (state.tempmin_delay) {
3064  // End Min Temperature update delay if ...
3065  if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3066  || (state.tempmin_delay <= time(0))) { // or delay time is over.
3067  state.tempmin_delay = 0;
3068  if (!state.tempmin)
3069  state.tempmin = 255;
3070  }
3071  }
3072 
3073  // Update Min Temperature
3074  if (!state.tempmin_delay && currtemp < state.tempmin) {
3075  state.tempmin = currtemp;
3076  state.must_write = true;
3077  if (currtemp != state.temperature)
3078  minchg = "!";
3079  }
3080 
3081  // Track changes
3082  if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3083  PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3084  cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3085  state.temperature = currtemp;
3086  }
3087  }
3088 
3089  // Check limits
3090  if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3091  PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3092  cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3093  MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3094  cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3095  }
3096  else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3097  PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3098  cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3099  }
3100  else if (cfg.tempcrit) {
3101  unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3102  if (currtemp < limit)
3103  reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3104  }
3105 }
3106 
3107 // Check normalized and raw attribute values.
3108 static void check_attribute(const dev_config & cfg, dev_state & state,
3109  const ata_smart_attribute & attr,
3110  const ata_smart_attribute & prev,
3111  int attridx,
3112  const ata_smart_threshold_entry * thresholds)
3113 {
3114  // Check attribute and threshold
3115  ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3116  if (attrstate == ATTRSTATE_NON_EXISTING)
3117  return;
3118 
3119  // If requested, check for usage attributes that have failed.
3120  if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3122  std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3123  PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3124  MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3125  state.must_write = true;
3126  }
3127 
3128  // Return if we're not tracking this type of attribute
3129  bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3130  if (!( ( prefail && cfg.prefail)
3131  || (!prefail && cfg.usage )))
3132  return;
3133 
3134  // Return if '-I ID' was specified
3135  if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3136  return;
3137 
3138  // Issue warning if they don't have the same ID in all structures.
3139  if (attr.id != prev.id) {
3140  PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3141  cfg.name.c_str(), attr.id, prev.id);
3142  return;
3143  }
3144 
3145  // Compare normalized values if valid.
3146  bool valchanged = false;
3147  if (attrstate > ATTRSTATE_NO_NORMVAL) {
3148  if (attr.current != prev.current)
3149  valchanged = true;
3150  }
3151 
3152  // Compare raw values if requested.
3153  bool rawchanged = false;
3154  if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3155  if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
3156  != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3157  rawchanged = true;
3158  }
3159 
3160  // Return if no change
3161  if (!(valchanged || rawchanged))
3162  return;
3163 
3164  // Format value strings
3165  std::string currstr, prevstr;
3166  if (attrstate == ATTRSTATE_NO_NORMVAL) {
3167  // Print raw values only
3168  currstr = strprintf("%s (Raw)",
3169  ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3170  prevstr = strprintf("%s (Raw)",
3171  ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3172  }
3173  else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3174  // Print normalized and raw values
3175  currstr = strprintf("%d [Raw %s]", attr.current,
3176  ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3177  prevstr = strprintf("%d [Raw %s]", prev.current,
3178  ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3179  }
3180  else {
3181  // Print normalized values only
3182  currstr = strprintf("%d", attr.current);
3183  prevstr = strprintf("%d", prev.current);
3184  }
3185 
3186  // Format message
3187  std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3188  cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3189  ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3190  prevstr.c_str(), currstr.c_str());
3191 
3192  // Report this change as critical ?
3193  if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3194  || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3195  PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3196  MailWarning(cfg, state, 2, "%s", msg.c_str());
3197  }
3198  else {
3199  PrintOut(LOG_INFO, "%s\n", msg.c_str());
3200  }
3201  state.must_write = true;
3202 }
3203 
3204 
3205 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3206  bool firstpass, bool allow_selftests)
3207 {
3208  if (!open_device(cfg, state, atadev, "ATA"))
3209  return 1;
3210 
3211  const char * name = cfg.name.c_str();
3212 
3213  // user may have requested (with the -n Directive) to leave the disk
3214  // alone if it is in idle or sleeping mode. In this case check the
3215  // power mode and exit without check if needed
3216  if (cfg.powermode && !state.powermodefail) {
3217  int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3218  const char * mode = 0;
3219  if (0 <= powermode && powermode < 0xff) {
3220  // wait for possible spin up and check again
3221  int powermode2;
3222  sleep(5);
3223  powermode2 = ataCheckPowerMode(atadev);
3224  if (powermode2 > powermode)
3225  PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3226  powermode = powermode2;
3227  }
3228 
3229  switch (powermode){
3230  case -1:
3231  // SLEEP
3232  mode="SLEEP";
3233  if (cfg.powermode>=1)
3234  dontcheck=1;
3235  break;
3236  case 0x00:
3237  // STANDBY
3238  mode="STANDBY";
3239  if (cfg.powermode>=2)
3240  dontcheck=1;
3241  break;
3242  case 0x01:
3243  // STANDBY_Y
3244  mode="STANDBY_Y";
3245  if (cfg.powermode>=2)
3246  dontcheck=1;
3247  break;
3248  case 0x80:
3249  // IDLE
3250  mode="IDLE";
3251  if (cfg.powermode>=3)
3252  dontcheck=1;
3253  break;
3254  case 0x81:
3255  // IDLE_A
3256  mode="IDLE_A";
3257  if (cfg.powermode>=3)
3258  dontcheck=1;
3259  break;
3260  case 0x82:
3261  // IDLE_B
3262  mode="IDLE_B";
3263  if (cfg.powermode>=3)
3264  dontcheck=1;
3265  break;
3266  case 0x83:
3267  // IDLE_C
3268  mode="IDLE_C";
3269  if (cfg.powermode>=3)
3270  dontcheck=1;
3271  break;
3272  case 0xff:
3273  // ACTIVE/IDLE
3274  case 0x40:
3275  // ACTIVE
3276  case 0x41:
3277  // ACTIVE
3278  mode="ACTIVE or IDLE";
3279  break;
3280  default:
3281  // UNKNOWN
3282  PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3283  name, powermode);
3284  state.powermodefail = true;
3285  break;
3286  }
3287 
3288  // if we are going to skip a check, return now
3289  if (dontcheck){
3290  // skip at most powerskipmax checks
3291  if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3292  CloseDevice(atadev, name);
3293  // report first only except if state has changed, avoid waking up system disk
3294  if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3295  PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3296  state.lastpowermodeskipped = powermode;
3297  }
3298  state.powerskipcnt++;
3299  return 0;
3300  }
3301  else {
3302  PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3303  name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3304  }
3305  state.powerskipcnt = 0;
3306  state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3307  }
3308  else if (state.powerskipcnt) {
3309  PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3310  name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3311  state.powerskipcnt = 0;
3312  state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3313  }
3314  }
3315 
3316  // check smart status
3317  if (cfg.smartcheck) {
3318  int status=ataSmartStatus2(atadev);
3319  if (status==-1){
3320  PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3321  MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3322  state.must_write = true;
3323  }
3324  else if (status==1){
3325  PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3326  MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3327  state.must_write = true;
3328  }
3329  }
3330 
3331  // Check everything that depends upon SMART Data (eg, Attribute values)
3332  if ( cfg.usagefailed || cfg.prefail || cfg.usage
3333  || cfg.curr_pending_id || cfg.offl_pending_id
3334  || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3335  || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3336 
3337  // Read current attribute values.
3338  ata_smart_values curval;
3339  if (ataReadSmartValues(atadev, &curval)){
3340  PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3341  MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3342  state.must_write = true;
3343  }
3344  else {
3345  reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3346 
3347  // look for current or offline pending sectors
3348  if (cfg.curr_pending_id)
3349  check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3350  (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3351  : "Total unreadable (pending) sectors" ));
3352 
3353  if (cfg.offl_pending_id)
3354  check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3355  (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3356  : "Total offline uncorrectable sectors"));
3357 
3358  // check temperature limits
3359  if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3360  CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3361 
3362  // look for failed usage attributes, or track usage or prefail attributes
3363  if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3364  for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3365  check_attribute(cfg, state,
3366  curval.vendor_attributes[i],
3367  state.smartval.vendor_attributes[i],
3368  i, state.smartthres.thres_entries);
3369  }
3370  }
3371 
3372  // Log changes of offline data collection status
3373  if (cfg.offlinests) {
3374  if ( curval.offline_data_collection_status
3376  || state.offline_started // test was started in previous call
3377  || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3379  }
3380 
3381  // Log changes of self-test execution status
3382  if (cfg.selfteststs) {
3384  || state.selftest_started // test was started in previous call
3385  || (firstpass && (debugmode || curval.self_test_exec_status != 0x00)))
3387  }
3388 
3389  // Save the new values for the next time around
3390  state.smartval = curval;
3391  }
3392  }
3393  state.offline_started = state.selftest_started = false;
3394 
3395  // check if number of selftest errors has increased (note: may also DECREASE)
3396  if (cfg.selftest)
3397  CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3398 
3399  // check if number of ATA errors has increased
3400  if (cfg.errorlog || cfg.xerrorlog) {
3401 
3402  int errcnt1 = -1, errcnt2 = -1;
3403  if (cfg.errorlog)
3404  errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3405  if (cfg.xerrorlog)
3406  errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3407 
3408  // new number of errors is max of both logs
3409  int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3410 
3411  // did command fail?
3412  if (newc<0)
3413  // lack of PrintOut here is INTENTIONAL
3414  MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3415 
3416  // has error count increased?
3417  int oldc = state.ataerrorcount;
3418  if (newc>oldc){
3419  PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3420  name, oldc, newc);
3421  MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3422  name, oldc, newc);
3423  state.must_write = true;
3424  }
3425 
3426  if (newc>=0)
3427  state.ataerrorcount=newc;
3428  }
3429 
3430  // if the user has asked, and device is capable (or we're not yet
3431  // sure) check whether a self test should be done now.
3432  if (allow_selftests && !cfg.test_regex.empty()) {
3433  char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3434  if (testtype)
3435  DoATASelfTest(cfg, state, atadev, testtype);
3436  }
3437 
3438  // Don't leave device open -- the OS/user may want to access it
3439  // before the next smartd cycle!
3440  CloseDevice(atadev, name);
3441 
3442  // Copy ATA attribute values to persistent state
3443  state.update_persistent_state();
3444 
3445  return 0;
3446 }
3447 
3448 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3449 {
3450  if (!open_device(cfg, state, scsidev, "SCSI"))
3451  return 1;
3452 
3453  const char * name = cfg.name.c_str();
3454 
3455  UINT8 asc = 0, ascq = 0;
3456  UINT8 currenttemp = 0, triptemp = 0;
3457  if (!state.SuppressReport) {
3458  if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3459  &asc, &ascq, &currenttemp, &triptemp)) {
3460  PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3461  name);
3462  MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3463  state.SuppressReport = 1;
3464  }
3465  }
3466  if (asc > 0) {
3467  const char * cp = scsiGetIEString(asc, ascq);
3468  if (cp) {
3469  PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3470  MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3471  } else if (asc == 4 && ascq == 9) {
3472  PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3473  } else if (debugmode)
3474  PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3475  name, (int)asc, (int)ascq);
3476  } else if (debugmode)
3477  PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3478 
3479  // check temperature limits
3480  if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3481  CheckTemperature(cfg, state, currenttemp, triptemp);
3482 
3483  // check if number of selftest errors has increased (note: may also DECREASE)
3484  if (cfg.selftest)
3485  CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3486 
3487  if (allow_selftests && !cfg.test_regex.empty()) {
3488  char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3489  if (testtype)
3490  DoSCSISelfTest(cfg, state, scsidev, testtype);
3491  }
3492  if (!cfg.attrlog_file.empty()){
3493  // saving error counters to state
3494  UINT8 tBuf[252];
3495  if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3496  READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3498  state.scsi_error_counters[0].found=1;
3499  }
3500  if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3501  WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3503  state.scsi_error_counters[1].found=1;
3504  }
3505  if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3506  VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3508  state.scsi_error_counters[2].found=1;
3509  }
3510  if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3511  NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3513  state.scsi_nonmedium_error.found=1;
3514  }
3515  // store temperature if not done by CheckTemperature() above
3516  if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3517  state.temperature = currenttemp;
3518  }
3519  CloseDevice(scsidev, name);
3520  return 0;
3521 }
3522 
3523 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3524 {
3525  if (!open_device(cfg, state, nvmedev, "NVMe"))
3526  return 1;
3527 
3528  const char * name = cfg.name.c_str();
3529 
3530  // Read SMART/Health log
3531  nvme_smart_log smart_log;
3532  if (!nvme_read_smart_log(nvmedev, smart_log)) {
3533  PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3534  MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3535  state.must_write = true;
3536  return 0;
3537  }
3538 
3539  // Check Critical Warning bits
3540  if (cfg.smartcheck && smart_log.critical_warning) {
3541  unsigned char w = smart_log.critical_warning;
3542  std::string msg;
3543  static const char * const wnames[] =
3544  {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3545 
3546  for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3547  if (!(w & (1 << b)))
3548  continue;
3549  if (cnt)
3550  msg += ", ";
3551  if (++cnt > 3) {
3552  msg += "..."; break;
3553  }
3554  if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3555  msg += "*Unknown*"; break;
3556  }
3557  msg += wnames[b];
3558  }
3559 
3560  PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3561  MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3562  state.must_write = true;
3563  }
3564 
3565  // Check temperature limits
3566  if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3567  int k = nvme_get_max_temp_kelvin(smart_log);
3568  // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3569  int c = k - 273;
3570  if (c < 1)
3571  c = 1;
3572  else if (c > 0xff)
3573  c = 0xff;
3574  CheckTemperature(cfg, state, c, 0);
3575  }
3576 
3577  // Check if number of errors has increased
3578  if (cfg.errorlog || cfg.xerrorlog) {
3579  uint64_t oldcnt = state.nvme_err_log_entries;
3580  uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3581  if (newcnt > oldcnt) {
3582  PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3583  name, oldcnt, newcnt);
3584  MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3585  name, oldcnt, newcnt);
3586  state.must_write = true;
3587  }
3588  state.nvme_err_log_entries = newcnt;
3589  }
3590 
3591  CloseDevice(nvmedev, name);
3592  return 0;
3593 }
3594 
3595 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3596 static int standby_disable_state = 0;
3597 
3599 {
3600  // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3601  bool sts1 = false, sts2 = false;
3602  for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3603  const dev_config & cfg = configs.at(i);
3604  if (cfg.offlinests_ns)
3605  sts1 = true;
3606  if (cfg.selfteststs_ns)
3607  sts2 = true;
3608  }
3609 
3610  // Check for support of disable auto standby
3611  // Reenable standby if smartd.conf was reread
3612  if (sts1 || sts2 || standby_disable_state == 3) {
3613  if (!smi()->disable_system_auto_standby(false)) {
3614  if (standby_disable_state == 3)
3615  PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3616  if (sts1 || sts2) {
3617  PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3618  (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3619  sts1 = sts2 = false;
3620  }
3621  }
3622  }
3623 
3624  standby_disable_state = (sts1 || sts2 ? 1 : 0);
3625 }
3626 
3627 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3628 {
3629  if (!standby_disable_state)
3630  return;
3631 
3632  // Check for just started or still running self-tests
3633  bool running = false;
3634  for (unsigned i = 0; i < configs.size() && !running; i++) {
3635  const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3636 
3637  if ( ( cfg.offlinests_ns
3638  && (state.offline_started ||
3640  || ( cfg.selfteststs_ns
3641  && (state.selftest_started ||
3643  running = true;
3644  // state.offline/selftest_started will be reset after next logging of test status
3645  }
3646 
3647  // Disable/enable auto standby and log state changes
3648  if (!running) {
3649  if (standby_disable_state != 1) {
3650  if (!smi()->disable_system_auto_standby(false))
3651  PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3652  smi()->get_errmsg());
3653  else
3654  PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3655  standby_disable_state = 1;
3656  }
3657  }
3658  else if (!smi()->disable_system_auto_standby(true)) {
3659  if (standby_disable_state != 2) {
3660  PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3661  smi()->get_errmsg());
3662  standby_disable_state = 2;
3663  }
3664  }
3665  else {
3666  if (standby_disable_state != 3) {
3667  PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3668  standby_disable_state = 3;
3669  }
3670  }
3671 }
3672 
3673 // Checks the SMART status of all ATA and SCSI devices
3674 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3675  smart_device_list & devices, bool firstpass, bool allow_selftests)
3676 {
3677  for (unsigned i = 0; i < configs.size(); i++) {
3678  const dev_config & cfg = configs.at(i);
3679  dev_state & state = states.at(i);
3680  smart_device * dev = devices.at(i);
3681  if (dev->is_ata())
3682  ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3683  else if (dev->is_scsi())
3684  SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3685  else if (dev->is_nvme())
3686  NVMeCheckDevice(cfg, state, dev->to_nvme());
3687  }
3688 
3689  do_disable_standby_check(configs, states);
3690 }
3691 
3692 // Set if Initialize() was called
3693 static bool is_initialized = false;
3694 
3695 // Does initialization right after fork to daemon mode
3696 static void Initialize(time_t *wakeuptime)
3697 {
3698  // Call Goodbye() on exit
3699  is_initialized = true;
3700 
3701  // write PID file
3702  if (!debugmode)
3703  WritePidFile();
3704 
3705  // install signal handlers. On Solaris, can't use signal() because
3706  // it resets the handler to SIG_DFL after each call. So use sigset()
3707  // instead. So SIGNALFN()==signal() or SIGNALFN()==sigset().
3708 
3709  // normal and abnormal exit
3710  if (SIGNALFN(SIGTERM, sighandler)==SIG_IGN)
3711  SIGNALFN(SIGTERM, SIG_IGN);
3712  if (SIGNALFN(SIGQUIT, sighandler)==SIG_IGN)
3713  SIGNALFN(SIGQUIT, SIG_IGN);
3714 
3715  // in debug mode, <CONTROL-C> ==> HUP
3716  if (SIGNALFN(SIGINT, debugmode?HUPhandler:sighandler)==SIG_IGN)
3717  SIGNALFN(SIGINT, SIG_IGN);
3718 
3719  // Catch HUP and USR1
3720  if (SIGNALFN(SIGHUP, HUPhandler)==SIG_IGN)
3721  SIGNALFN(SIGHUP, SIG_IGN);
3722  if (SIGNALFN(SIGUSR1, USR1handler)==SIG_IGN)
3723  SIGNALFN(SIGUSR1, SIG_IGN);
3724 #ifdef _WIN32
3725  if (SIGNALFN(SIGUSR2, USR2handler)==SIG_IGN)
3726  SIGNALFN(SIGUSR2, SIG_IGN);
3727 #endif
3728 
3729  // initialize wakeup time to CURRENT time
3730  *wakeuptime=time(NULL);
3731 
3732  return;
3733 }
3734 
3735 #ifdef _WIN32
3736 // Toggle debug mode implemented for native windows only
3737 // (there is no easy way to reopen tty on *nix)
3738 static void ToggleDebugMode()
3739 {
3740  if (!debugmode) {
3741  PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3742  if (!daemon_enable_console("smartd [Debug]")) {
3743  debugmode = 1;
3744  daemon_signal(SIGINT, HUPhandler);
3745  PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3746  }
3747  else
3748  PrintOut(LOG_INFO,"enable console failed\n");
3749  }
3750  else if (debugmode == 1) {
3751  daemon_disable_console();
3752  debugmode = 0;
3753  daemon_signal(SIGINT, sighandler);
3754  PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3755  }
3756  else
3757  PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3758 }
3759 #endif
3760 
3761 static time_t dosleep(time_t wakeuptime, bool & sigwakeup)
3762 {
3763  // If past wake-up-time, compute next wake-up-time
3764  time_t timenow=time(NULL);
3765  while (wakeuptime<=timenow){
3766  int intervals=1+(timenow-wakeuptime)/checktime;
3767  wakeuptime+=intervals*checktime;
3768  }
3769 
3770  // sleep until we catch SIGUSR1 or have completed sleeping
3771  int addtime = 0;
3772  while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3773 
3774  // protect user again system clock being adjusted backwards
3775  if (wakeuptime>timenow+checktime){
3776  PrintOut(LOG_CRIT, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3777  wakeuptime=timenow+checktime;
3778  }
3779 
3780  // Exit sleep when time interval has expired or a signal is received
3781  sleep(wakeuptime+addtime-timenow);
3782 
3783 #ifdef _WIN32
3784  // toggle debug mode?
3785  if (caughtsigUSR2) {
3786  ToggleDebugMode();
3787  caughtsigUSR2 = 0;
3788  }
3789 #endif
3790 
3791  timenow=time(NULL);
3792 
3793  // Actual sleep time too long?
3794  if (!addtime && timenow > wakeuptime+60) {
3795  if (debugmode)
3796  PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
3797  (int)(timenow-wakeuptime));
3798  // Wait another 20 seconds to avoid I/O errors during disk spin-up
3799  addtime = timenow-wakeuptime+20;
3800  // Use next wake-up-time if close
3801  int nextcheck = checktime - addtime % checktime;
3802  if (nextcheck <= 20)
3803  addtime += nextcheck;
3804  }
3805  }
3806 
3807  // if we caught a SIGUSR1 then print message and clear signal
3808  if (caughtsigUSR1){
3809  PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
3810  wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
3811  caughtsigUSR1=0;
3812  sigwakeup = true;
3813  }
3814 
3815  // return adjusted wakeuptime
3816  return wakeuptime;
3817 }
3818 
3819 // Print out a list of valid arguments for the Directive d
3820 static void printoutvaliddirectiveargs(int priority, char d)
3821 {
3822  switch (d) {
3823  case 'n':
3824  PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
3825  break;
3826  case 's':
3827  PrintOut(priority, "valid_regular_expression");
3828  break;
3829  case 'd':
3830  PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
3831  break;
3832  case 'T':
3833  PrintOut(priority, "normal, permissive");
3834  break;
3835  case 'o':
3836  case 'S':
3837  PrintOut(priority, "on, off");
3838  break;
3839  case 'l':
3840  PrintOut(priority, "error, selftest");
3841  break;
3842  case 'M':
3843  PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
3844  break;
3845  case 'v':
3846  PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
3847  break;
3848  case 'P':
3849  PrintOut(priority, "use, ignore, show, showall");
3850  break;
3851  case 'F':
3852  PrintOut(priority, "%s", get_valid_firmwarebug_args());
3853  break;
3854  case 'e':
3855  PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
3856  "security-freeze, standby,[N|off], wcache,[on|off]");
3857  break;
3858  }
3859 }
3860 
3861 // exits with an error message, or returns integer value of token
3862 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3863  int min, int max, char * suffix = 0)
3864 {
3865  // make sure argument is there
3866  if (!arg) {
3867  PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
3868  cfgfile, lineno, name, token, min, max);
3869  return -1;
3870  }
3871 
3872  // get argument value (base 10), check that it's integer, and in-range
3873  char *endptr;
3874  int val = strtol(arg,&endptr,10);
3875 
3876  // optional suffix present?
3877  if (suffix) {
3878  if (!strcmp(endptr, suffix))
3879  endptr += strlen(suffix);
3880  else
3881  *suffix = 0;
3882  }
3883 
3884  if (!(!*endptr && min <= val && val <= max)) {
3885  PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
3886  cfgfile, lineno, name, token, arg, min, max);
3887  return -1;
3888  }
3889 
3890  // all is well; return value
3891  return val;
3892 }
3893 
3894 
3895 // Get 1-3 small integer(s) for '-W' directive
3896 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
3897  unsigned char *val1, unsigned char *val2, unsigned char *val3)
3898 {
3899  unsigned v1 = 0, v2 = 0, v3 = 0;
3900  int n1 = -1, n2 = -1, n3 = -1, len;
3901  if (!arg) {
3902  PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
3903  cfgfile, lineno, name, token);
3904  return -1;
3905  }
3906 
3907  len = strlen(arg);
3908  if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
3909  && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
3910  PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
3911  cfgfile, lineno, name, token, arg);
3912  return -1;
3913  }
3914  *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
3915  return 0;
3916 }
3917 
3918 
3919 #ifdef _WIN32
3920 
3921 // Concatenate strtok() results if quoted with "..."
3922 static const char * strtok_dequote(const char * delimiters)
3923 {
3924  const char * t = strtok(0, delimiters);
3925  if (!t || t[0] != '"')
3926  return t;
3927 
3928  static std::string token;
3929  token = t+1;
3930  for (;;) {
3931  t = strtok(0, delimiters);
3932  if (!t || !*t)
3933  return "\"";
3934  token += ' ';
3935  int len = strlen(t);
3936  if (t[len-1] == '"') {
3937  token += std::string(t, len-1);
3938  break;
3939  }
3940  token += t;
3941  }
3942  return token.c_str();
3943 }
3944 
3945 #endif // _WIN32
3946 
3947 
3948 // This function returns 1 if it has correctly parsed one token (and
3949 // any arguments), else zero if no tokens remain. It returns -1 if an
3950 // error was encountered.
3951 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
3952 {
3953  char sym;
3954  const char * name = cfg.name.c_str();
3955  int lineno=cfg.lineno;
3956  const char *delim = " \n\t";
3957  int badarg = 0;
3958  int missingarg = 0;
3959  const char *arg = 0;
3960 
3961  // is the rest of the line a comment
3962  if (*token=='#')
3963  return 1;
3964 
3965  // is the token not recognized?
3966  if (*token!='-' || strlen(token)!=2) {
3967  PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
3968  configfile, lineno, name, token);
3969  PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
3970  return -1;
3971  }
3972 
3973  // token we will be parsing:
3974  sym=token[1];
3975 
3976  // parse the token and swallow its argument
3977  int val;
3978  char plus[] = "+", excl[] = "!";
3979 
3980  switch (sym) {
3981  case 'C':
3982  // monitor current pending sector count (default 197)
3983  if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3984  return -1;
3985  cfg.curr_pending_id = (unsigned char)val;
3986  cfg.curr_pending_incr = (*plus == '+');
3987  cfg.curr_pending_set = true;
3988  break;
3989  case 'U':
3990  // monitor offline uncorrectable sectors (default 198)
3991  if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
3992  return -1;
3993  cfg.offl_pending_id = (unsigned char)val;
3994  cfg.offl_pending_incr = (*plus == '+');
3995  cfg.offl_pending_set = true;
3996  break;
3997  case 'T':
3998  // Set tolerance level for SMART command failures
3999  if ((arg = strtok(NULL, delim)) == NULL) {
4000  missingarg = 1;
4001  } else if (!strcmp(arg, "normal")) {
4002  // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4003  // not on failure of an optional S.M.A.R.T. command.
4004  // This is the default so we don't need to actually do anything here.
4005  cfg.permissive = false;
4006  } else if (!strcmp(arg, "permissive")) {
4007  // Permissive mode; ignore errors from Mandatory SMART commands
4008  cfg.permissive = true;
4009  } else {
4010  badarg = 1;
4011  }
4012  break;
4013  case 'd':
4014  // specify the device type
4015  if ((arg = strtok(NULL, delim)) == NULL) {
4016  missingarg = 1;
4017  } else if (!strcmp(arg, "ignore")) {
4018  cfg.ignore = true;
4019  } else if (!strcmp(arg, "removable")) {
4020  cfg.removable = true;
4021  } else if (!strcmp(arg, "auto")) {
4022  cfg.dev_type = "";
4023  scan_types.clear();
4024  } else {
4025  cfg.dev_type = arg;
4026  scan_types.push_back(arg);
4027  }
4028  break;
4029  case 'F':
4030  // fix firmware bug
4031  if (!(arg = strtok(0, delim)))
4032  missingarg = 1;
4033  else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4034  badarg = 1;
4035  break;
4036  case 'H':
4037  // check SMART status
4038  cfg.smartcheck = true;
4039  break;
4040  case 'f':
4041  // check for failure of usage attributes
4042  cfg.usagefailed = true;
4043  break;
4044  case 't':
4045  // track changes in all vendor attributes
4046  cfg.prefail = true;
4047  cfg.usage = true;
4048  break;
4049  case 'p':
4050  // track changes in prefail vendor attributes
4051  cfg.prefail = true;
4052  break;
4053  case 'u':
4054  // track changes in usage vendor attributes
4055  cfg.usage = true;
4056  break;
4057  case 'l':
4058  // track changes in SMART logs
4059  if ((arg = strtok(NULL, delim)) == NULL) {
4060  missingarg = 1;
4061  } else if (!strcmp(arg, "selftest")) {
4062  // track changes in self-test log
4063  cfg.selftest = true;
4064  } else if (!strcmp(arg, "error")) {
4065  // track changes in ATA error log
4066  cfg.errorlog = true;
4067  } else if (!strcmp(arg, "xerror")) {
4068  // track changes in Extended Comprehensive SMART error log
4069  cfg.xerrorlog = true;
4070  } else if (!strcmp(arg, "offlinests")) {
4071  // track changes in offline data collection status
4072  cfg.offlinests = true;
4073  } else if (!strcmp(arg, "offlinests,ns")) {
4074  // track changes in offline data collection status, disable auto standby
4075  cfg.offlinests = cfg.offlinests_ns = true;
4076  } else if (!strcmp(arg, "selfteststs")) {
4077  // track changes in self-test execution status
4078  cfg.selfteststs = true;
4079  } else if (!strcmp(arg, "selfteststs,ns")) {
4080  // track changes in self-test execution status, disable auto standby
4081  cfg.selfteststs = cfg.selfteststs_ns = true;
4082  } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4083  // set SCT Error Recovery Control
4084  unsigned rt = ~0, wt = ~0; int nc = -1;
4085  sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4086  if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4087  cfg.sct_erc_set = true;
4088  cfg.sct_erc_readtime = rt;
4089  cfg.sct_erc_writetime = wt;
4090  }
4091  else
4092  badarg = 1;
4093  } else {
4094  badarg = 1;
4095  }
4096  break;
4097  case 'a':
4098  // monitor everything
4099  cfg.smartcheck = true;
4100  cfg.prefail = true;
4101  cfg.usagefailed = true;
4102  cfg.usage = true;
4103  cfg.selftest = true;
4104  cfg.errorlog = true;
4105  cfg.selfteststs = true;
4106  break;
4107  case 'o':
4108  // automatic offline testing enable/disable
4109  if ((arg = strtok(NULL, delim)) == NULL) {
4110  missingarg = 1;
4111  } else if (!strcmp(arg, "on")) {
4112  cfg.autoofflinetest = 2;
4113  } else if (!strcmp(arg, "off")) {
4114  cfg.autoofflinetest = 1;
4115  } else {
4116  badarg = 1;
4117  }
4118  break;
4119  case 'n':
4120  // skip disk check if in idle or standby mode
4121  if (!(arg = strtok(NULL, delim)))
4122  missingarg = 1;
4123  else {
4124  char *endptr = NULL;
4125  char *next = strchr(const_cast<char*>(arg), ',');
4126 
4127  cfg.powerquiet = false;
4128  cfg.powerskipmax = 0;
4129 
4130  if (next!=NULL) *next='\0';
4131  if (!strcmp(arg, "never"))
4132  cfg.powermode = 0;
4133  else if (!strcmp(arg, "sleep"))
4134  cfg.powermode = 1;
4135  else if (!strcmp(arg, "standby"))
4136  cfg.powermode = 2;
4137  else if (!strcmp(arg, "idle"))
4138  cfg.powermode = 3;
4139  else
4140  badarg = 1;
4141 
4142  // if optional arguments are present
4143  if (!badarg && next!=NULL) {
4144  next++;
4145  cfg.powerskipmax = strtol(next, &endptr, 10);
4146  if (endptr == next)
4147  cfg.powerskipmax = 0;
4148  else {
4149  next = endptr + (*endptr != '\0');
4150  if (cfg.powerskipmax <= 0)
4151  badarg = 1;
4152  }
4153  if (*next != '\0') {
4154  if (!strcmp("q", next))
4155  cfg.powerquiet = true;
4156  else {
4157  badarg = 1;
4158  }
4159  }
4160  }
4161  }
4162  break;
4163  case 'S':
4164  // automatic attribute autosave enable/disable
4165  if ((arg = strtok(NULL, delim)) == NULL) {
4166  missingarg = 1;
4167  } else if (!strcmp(arg, "on")) {
4168  cfg.autosave = 2;
4169  } else if (!strcmp(arg, "off")) {
4170  cfg.autosave = 1;
4171  } else {
4172  badarg = 1;
4173  }
4174  break;
4175  case 's':
4176  // warn user, and delete any previously given -s REGEXP Directives
4177  if (!cfg.test_regex.empty()){
4178  PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4179  configfile, lineno, name, cfg.test_regex.get_pattern());
4181  }
4182  // check for missing argument
4183  if (!(arg = strtok(NULL, delim))) {
4184  missingarg = 1;
4185  }
4186  // Compile regex
4187  else {
4188  if (!cfg.test_regex.compile(arg, REG_EXTENDED)) {
4189  // not a valid regular expression!
4190  PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4191  configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4192  return -1;
4193  }
4194  // Do a bit of sanity checking and warn user if we think that
4195  // their regexp is "strange". User probably confused about shell
4196  // glob(3) syntax versus regular expression syntax regexp(7).
4197  if (arg[(val = strspn(arg, "0123456789/.-+*|()?^$[]SLCOcnr"))])
4198  PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, character %d (%c) looks odd in extended regular expression %s\n",
4199  configfile, lineno, name, val+1, arg[val], arg);
4200  }
4201  break;
4202  case 'm':
4203  // send email to address that follows
4204  if (!(arg = strtok(NULL,delim)))
4205  missingarg = 1;
4206  else {
4207  if (!cfg.emailaddress.empty())
4208  PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4209  configfile, lineno, name, cfg.emailaddress.c_str());
4210 #ifdef _WIN32 // TODO: Remove after smartmontools 6.5
4211  if ( !strcmp(arg, "msgbox") || !strcmp(arg, "sysmsgbox")
4212  || str_starts_with(arg, "msgbox,") || str_starts_with(arg, "sysmsgbox,")) {
4213  PrintOut(LOG_CRIT, "File %s line %d (drive %s): -m %s is no longer supported, use -m console[,...] instead\n",
4214  configfile, lineno, name, arg);
4215  return -1;
4216  }
4217 #endif
4218  cfg.emailaddress = arg;
4219  }
4220  break;
4221  case 'M':
4222  // email warning options
4223  if (!(arg = strtok(NULL, delim)))
4224  missingarg = 1;
4225  else if (!strcmp(arg, "once"))
4226  cfg.emailfreq = 1;
4227  else if (!strcmp(arg, "daily"))
4228  cfg.emailfreq = 2;
4229  else if (!strcmp(arg, "diminishing"))
4230  cfg.emailfreq = 3;
4231  else if (!strcmp(arg, "test"))
4232  cfg.emailtest = 1;
4233  else if (!strcmp(arg, "exec")) {
4234  // Get the next argument (the command line)
4235 #ifdef _WIN32
4236  // Allow "/path name/with spaces/..." on Windows
4237  arg = strtok_dequote(delim);
4238  if (arg && arg[0] == '"') {
4239  PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4240  configfile, lineno, name, token);
4241  return -1;
4242  }
4243 #else
4244  arg = strtok(0, delim);
4245 #endif
4246  if (!arg) {
4247  PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4248  configfile, lineno, name, token);
4249  return -1;
4250  }
4251  // Free the last cmd line given if any, and copy new one
4252  if (!cfg.emailcmdline.empty())
4253  PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4254  configfile, lineno, name, cfg.emailcmdline.c_str());
4255  cfg.emailcmdline = arg;
4256  }
4257  else
4258  badarg = 1;
4259  break;
4260  case 'i':
4261  // ignore failure of usage attribute
4262  if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4263  return -1;
4265  break;
4266  case 'I':
4267  // ignore attribute for tracking purposes
4268  if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4269  return -1;
4271  break;
4272  case 'r':
4273  // print raw value when tracking
4274  if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4275  return -1;
4277  if (*excl == '!') // attribute change is critical
4279  break;
4280  case 'R':
4281  // track changes in raw value (forces printing of raw value)
4282  if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4283  return -1;
4285  if (*excl == '!') // raw value change is critical
4287  break;
4288  case 'W':
4289  // track Temperature
4290  if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4291  &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4292  return -1;
4293  break;
4294  case 'v':
4295  // non-default vendor-specific attribute meaning
4296  if (!(arg=strtok(NULL,delim))) {
4297  missingarg = 1;
4298  } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4299  badarg = 1;
4300  }
4301  break;
4302  case 'P':
4303  // Define use of drive-specific presets.
4304  if (!(arg = strtok(NULL, delim))) {
4305  missingarg = 1;
4306  } else if (!strcmp(arg, "use")) {
4307  cfg.ignorepresets = false;
4308  } else if (!strcmp(arg, "ignore")) {
4309  cfg.ignorepresets = true;
4310  } else if (!strcmp(arg, "show")) {
4311  cfg.showpresets = true;
4312  } else if (!strcmp(arg, "showall")) {
4313  showallpresets();
4314  } else {
4315  badarg = 1;
4316  }
4317  break;
4318 
4319  case 'e':
4320  // Various ATA settings
4321  if (!(arg = strtok(NULL, delim))) {
4322  missingarg = true;
4323  }
4324  else {
4325  char arg2[16+1]; unsigned val;
4326  int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4327  if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4328  && (n1 == len || n2 > 0)) {
4329  bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4330  bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4331  if (n3 != len)
4332  val = ~0U;
4333 
4334  if (!strcmp(arg2, "aam")) {
4335  if (off)
4336  cfg.set_aam = -1;
4337  else if (val <= 254)
4338  cfg.set_aam = val + 1;
4339  else
4340  badarg = true;
4341  }
4342  else if (!strcmp(arg2, "apm")) {
4343  if (off)
4344  cfg.set_apm = -1;
4345  else if (1 <= val && val <= 254)
4346  cfg.set_apm = val + 1;
4347  else
4348  badarg = true;
4349  }
4350  else if (!strcmp(arg2, "lookahead")) {
4351  if (off)
4352  cfg.set_lookahead = -1;
4353  else if (on)
4354  cfg.set_lookahead = 1;
4355  else
4356  badarg = true;
4357  }
4358  else if (!strcmp(arg, "security-freeze")) {
4359  cfg.set_security_freeze = true;
4360  }
4361  else if (!strcmp(arg2, "standby")) {
4362  if (off)
4363  cfg.set_standby = 0 + 1;
4364  else if (val <= 255)
4365  cfg.set_standby = val + 1;
4366  else
4367  badarg = true;
4368  }
4369  else if (!strcmp(arg2, "wcache")) {
4370  if (off)
4371  cfg.set_wcache = -1;
4372  else if (on)
4373  cfg.set_wcache = 1;
4374  else
4375  badarg = true;
4376  }
4377  else if (!strcmp(arg2, "dsn")) {
4378  if (off)
4379  cfg.set_dsn = -1;
4380  else if (on)
4381  cfg.set_dsn = 1;
4382  else
4383  badarg = true;
4384  }
4385  else
4386  badarg = true;
4387  }
4388  else
4389  badarg = true;
4390  }
4391  break;
4392 
4393  default:
4394  // Directive not recognized
4395  PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4396  configfile, lineno, name, token);
4397  Directives();
4398  return -1;
4399  }
4400  if (missingarg) {
4401  PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4402  configfile, lineno, name, token);
4403  }
4404  if (badarg) {
4405  PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4406  configfile, lineno, name, token, arg);
4407  }
4408  if (missingarg || badarg) {
4409  PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4410  printoutvaliddirectiveargs(LOG_CRIT, sym);
4411  PrintOut(LOG_CRIT, "\n");
4412  return -1;
4413  }
4414 
4415  return 1;
4416 }
4417 
4418 // Scan directive for configuration file
4419 #define SCANDIRECTIVE "DEVICESCAN"
4420 
4421 // This is the routine that adds things to the conf_entries list.
4422 //
4423 // Return values are:
4424 // 1: parsed a normal line
4425 // 0: found DEFAULT setting or comment or blank line
4426 // -1: found SCANDIRECTIVE line
4427 // -2: found an error
4428 //
4429 // Note: this routine modifies *line from the caller!
4430 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4431  smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4432 {
4433  const char *delim = " \n\t";
4434 
4435  // get first token: device name. If a comment, skip line
4436  const char * name = strtok(line, delim);
4437  if (!name || *name == '#')
4438  return 0;
4439 
4440  // Check device name for DEFAULT or DEVICESCAN
4441  int retval;
4442  if (!strcmp("DEFAULT", name)) {
4443  retval = 0;
4444  // Restart with empty defaults
4445  default_conf = dev_config();
4446  }
4447  else {
4448  retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4449  // Init new entry with current defaults
4450  conf_entries.push_back(default_conf);
4451  }
4452  dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4453 
4454  cfg.name = name; // Later replaced by dev->get_info().info_name
4455  cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4456  cfg.lineno = lineno;
4457 
4458  // parse tokens one at a time from the file.
4459  while (char * token = strtok(0, delim)) {
4460  int rc = ParseToken(token, cfg, scan_types);
4461  if (rc < 0)
4462  // error found on the line
4463  return -2;
4464 
4465  if (rc == 0)
4466  // No tokens left
4467  break;
4468 
4469  // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4470  }
4471 
4472  // Check for multiple -d TYPE directives
4473  if (retval != -1 && scan_types.size() > 1) {
4474  PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4475  cfg.name.c_str(), cfg.lineno, configfile);
4476  return -2;
4477  }
4478 
4479  // Don't perform checks below for DEFAULT entries
4480  if (retval == 0)
4481  return retval;
4482 
4483  // If NO monitoring directives are set, then set all of them.
4484  if (!( cfg.smartcheck || cfg.selftest
4485  || cfg.errorlog || cfg.xerrorlog
4486  || cfg.offlinests || cfg.selfteststs
4487  || cfg.usagefailed || cfg.prefail || cfg.usage
4488  || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4489 
4490  PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4491  cfg.name.c_str(), cfg.lineno, configfile);
4492 
4493  cfg.smartcheck = true;
4494  cfg.usagefailed = true;
4495  cfg.prefail = true;
4496  cfg.usage = true;
4497  cfg.selftest = true;
4498  cfg.errorlog = true;
4499  cfg.selfteststs = true;
4500  }
4501 
4502  // additional sanity check. Has user set -M options without -m?
4503  if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4504  PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4505  cfg.name.c_str(), cfg.lineno, configfile);
4506  return -2;
4507  }
4508 
4509  // has the user has set <nomailer>?
4510  if (cfg.emailaddress == "<nomailer>") {
4511  // check that -M exec is also set
4512  if (cfg.emailcmdline.empty()){
4513  PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4514  cfg.name.c_str(), cfg.lineno, configfile);
4515  return -2;
4516  }
4517  // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4518  cfg.emailaddress.clear();
4519  }
4520 
4521  return retval;
4522 }
4523 
4524 // Parses a configuration file. Return values are:
4525 // N=>0: found N entries
4526 // -1: syntax error in config file
4527 // -2: config file does not exist
4528 // -3: config file exists but cannot be read
4529 //
4530 // In the case where the return value is 0, there are three
4531 // possiblities:
4532 // Empty configuration file ==> conf_entries.empty()
4533 // No configuration file ==> conf_entries[0].lineno == 0
4534 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4535 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4536 {
4537  // maximum line length in configuration file
4538  const int MAXLINELEN = 256;
4539  // maximum length of a continued line in configuration file
4540  const int MAXCONTLINE = 1023;
4541 
4542  stdio_file f;
4543  // Open config file, if it exists and is not <stdin>
4544  if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4545  if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4546  // file exists but we can't read it or it should exist due to '-c' option
4547  int ret = (errno!=ENOENT ? -3 : -2);
4548  PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4549  strerror(errno),configfile);
4550  return ret;
4551  }
4552  }
4553  else // read from stdin ('-c -' option)
4554  f.open(stdin);
4555 
4556  // Start with empty defaults
4557  dev_config default_conf;
4558 
4559  // No configuration file found -- use fake one
4560  int entry = 0;
4561  if (!f) {
4562  char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4563 
4564  if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4565  throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4566  return 0;
4567  }
4568 
4569 #ifdef __CYGWIN__
4570  setmode(fileno(f), O_TEXT); // Allow files with \r\n
4571 #endif
4572 
4573  // configuration file exists
4574  PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4575 
4576  // parse config file line by line
4577  int lineno = 1, cont = 0, contlineno = 0;
4578  char line[MAXLINELEN+2];
4579  char fullline[MAXCONTLINE+1];
4580 
4581  for (;;) {
4582  int len=0,scandevice;
4583  char *lastslash;
4584  char *comment;
4585  char *code;
4586 
4587  // make debugging simpler
4588  memset(line,0,sizeof(line));
4589 
4590  // get a line
4591  code=fgets(line, MAXLINELEN+2, f);
4592 
4593  // are we at the end of the file?
4594  if (!code){
4595  if (cont) {
4596  scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4597  // See if we found a SCANDIRECTIVE directive
4598  if (scandevice==-1)
4599  return 0;
4600  // did we find a syntax error
4601  if (scandevice==-2)
4602  return -1;
4603  // the final line is part of a continuation line
4604  entry+=scandevice;
4605  }
4606  break;
4607  }
4608 
4609  // input file line number
4610  contlineno++;
4611 
4612  // See if line is too long
4613  len=strlen(line);
4614  if (len>MAXLINELEN){
4615  const char *warn;
4616  if (line[len-1]=='\n')
4617  warn="(including newline!) ";
4618  else
4619  warn="";
4620  PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4621  (int)contlineno,configfile,warn,(int)MAXLINELEN);
4622  return -1;
4623  }
4624 
4625  // Ignore anything after comment symbol
4626  if ((comment=strchr(line,'#'))){
4627  *comment='\0';
4628  len=strlen(line);
4629  }
4630 
4631  // is the total line (made of all continuation lines) too long?
4632  if (cont+len>MAXCONTLINE){
4633  PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4634  lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4635  return -1;
4636  }
4637 
4638  // copy string so far into fullline, and increment length
4639  snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4640  cont+=len;
4641 
4642  // is this a continuation line. If so, replace \ by space and look at next line
4643  if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4644  *(fullline+(cont-len)+(lastslash-line))=' ';
4645  continue;
4646  }
4647 
4648  // Not a continuation line. Parse it
4649  scan_types.clear();
4650  scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4651 
4652  // did we find a scandevice directive?
4653  if (scandevice==-1)
4654  return 0;
4655  // did we find a syntax error
4656  if (scandevice==-2)
4657  return -1;
4658 
4659  entry+=scandevice;
4660  lineno++;
4661  cont=0;
4662  }
4663 
4664  // note -- may be zero if syntax of file OK, but no valid entries!
4665  return entry;
4666 }
4667 
4668 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4669  <LIST> is the list of valid arguments for option opt. */
4670 static void PrintValidArgs(char opt)
4671 {
4672  const char *s;
4673 
4674  PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4675  if (!(s = GetValidArgList(opt)))
4676  PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4677  else
4678  PrintOut(LOG_CRIT, "%s", (char *)s);
4679  PrintOut(LOG_CRIT, " <=======\n");
4680 }
4681 
4682 #ifndef _WIN32
4683 // Report error and exit if specified path is not absolute.
4684 static void check_abs_path(char option, const std::string & path)
4685 {
4686  if (path.empty() || path[0] == '/')
4687  return;
4688 
4689  debugmode = 1;
4690  PrintHead();
4691  PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4692  PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4693  EXIT(EXIT_BADCMD);
4694 }
4695 #endif // !_WIN32
4696 
4697 // Parses input line, prints usage message and
4698 // version/license/copyright messages
4699 static void ParseOpts(int argc, char **argv)
4700 {
4701  // Init default path names
4702 #ifndef _WIN32
4703  configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4704  warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4705 #else
4706  std::string exedir = get_exe_dir();
4707  static std::string configfile_str = exedir + "/smartd.conf";
4708  configfile = configfile_str.c_str();
4709  warning_script = exedir + "/smartd_warning.cmd";
4710 #endif
4711 
4712  // Please update GetValidArgList() if you edit shortopts
4713  static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4714 #ifdef HAVE_LIBCAP_NG
4715  "C"
4716 #endif
4717  ;
4718  // Please update GetValidArgList() if you edit longopts
4719  struct option longopts[] = {
4720  { "configfile", required_argument, 0, 'c' },
4721  { "logfacility", required_argument, 0, 'l' },
4722  { "quit", required_argument, 0, 'q' },
4723  { "debug", no_argument, 0, 'd' },
4724  { "showdirectives", no_argument, 0, 'D' },
4725  { "interval", required_argument, 0, 'i' },
4726 #ifndef _WIN32
4727  { "no-fork", no_argument, 0, 'n' },
4728 #else
4729  { "service", no_argument, 0, 'n' },
4730 #endif
4731  { "pidfile", required_argument, 0, 'p' },
4732  { "report", required_argument, 0, 'r' },
4733  { "savestates", required_argument, 0, 's' },
4734  { "attributelog", required_argument, 0, 'A' },
4735  { "drivedb", required_argument, 0, 'B' },
4736  { "warnexec", required_argument, 0, 'w' },
4737  { "version", no_argument, 0, 'V' },
4738  { "license", no_argument, 0, 'V' },
4739  { "copyright", no_argument, 0, 'V' },
4740  { "help", no_argument, 0, 'h' },
4741  { "usage", no_argument, 0, 'h' },
4742 #ifdef HAVE_LIBCAP_NG
4743  { "capabilities", no_argument, 0, 'C' },
4744 #endif
4745  { 0, 0, 0, 0 }
4746  };
4747 
4748  opterr=optopt=0;
4749  bool badarg = false;
4750  bool use_default_db = true; // set false on '-B FILE'
4751 
4752  // Parse input options.
4753  int optchar;
4754  while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4755  char *arg;
4756  char *tailptr;
4757  long lchecktime;
4758 
4759  switch(optchar) {
4760  case 'q':
4761  // when to quit
4762  if (!strcmp(optarg, "nodev"))
4763  quit = QUIT_NODEV;
4764  else if (!strcmp(optarg, "nodevstartup"))
4766  else if (!strcmp(optarg, "never"))
4767  quit = QUIT_NEVER;
4768  else if (!strcmp(optarg, "onecheck")) {
4769  quit = QUIT_ONECHECK;
4770  debugmode = 1;
4771  }
4772  else if (!strcmp(optarg, "showtests")) {
4773  quit = QUIT_SHOWTESTS;
4774  debugmode = 1;
4775  }
4776  else if (!strcmp(optarg, "errors"))
4777  quit = QUIT_ERRORS;
4778  else
4779  badarg = true;
4780  break;
4781  case 'l':
4782  // set the log facility level
4783  if (!strcmp(optarg, "daemon"))
4784  facility=LOG_DAEMON;
4785  else if (!strcmp(optarg, "local0"))
4786  facility=LOG_LOCAL0;
4787  else if (!strcmp(optarg, "local1"))
4788  facility=LOG_LOCAL1;
4789  else if (!strcmp(optarg, "local2"))
4790  facility=LOG_LOCAL2;
4791  else if (!strcmp(optarg, "local3"))
4792  facility=LOG_LOCAL3;
4793  else if (!strcmp(optarg, "local4"))
4794  facility=LOG_LOCAL4;
4795  else if (!strcmp(optarg, "local5"))
4796  facility=LOG_LOCAL5;
4797  else if (!strcmp(optarg, "local6"))
4798  facility=LOG_LOCAL6;
4799  else if (!strcmp(optarg, "local7"))
4800  facility=LOG_LOCAL7;
4801  else
4802  badarg = true;
4803  break;
4804  case 'd':
4805  // enable debug mode
4806  debugmode = 1;
4807  break;
4808  case 'n':
4809  // don't fork()
4810 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
4811  do_fork = false;
4812 #endif
4813  break;
4814  case 'D':
4815  // print summary of all valid directives
4816  debugmode = 1;
4817  Directives();
4818  EXIT(0);
4819  break;
4820  case 'i':
4821  // Period (time interval) for checking
4822  // strtol will set errno in the event of overflow, so we'll check it.
4823  errno = 0;
4824  lchecktime = strtol(optarg, &tailptr, 10);
4825  if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
4826  debugmode=1;
4827  PrintHead();
4828  PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
4829  PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
4830  PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4831  EXIT(EXIT_BADCMD);
4832  }
4833  checktime = (int)lchecktime;
4834  break;
4835  case 'r':
4836  // report IOCTL transactions
4837  {
4838  int n1 = -1, n2 = -1, len = strlen(optarg);
4839  char s[9+1]; unsigned i = 1;
4840  sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
4841  if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
4842  badarg = true;
4843  } else if (!strcmp(s,"ioctl")) {
4845  } else if (!strcmp(s,"ataioctl")) {
4846  ata_debugmode = i;
4847  } else if (!strcmp(s,"scsiioctl")) {
4848  scsi_debugmode = i;
4849  } else if (!strcmp(s,"nvmeioctl")) {
4850  nvme_debugmode = i;
4851  } else {
4852  badarg = true;
4853  }
4854  }
4855  break;
4856  case 'c':
4857  // alternate configuration file
4858  if (strcmp(optarg,"-"))
4859  configfile = (configfile_alt = optarg).c_str();
4860  else // read from stdin
4862  break;
4863  case 'p':
4864  // output file with PID number
4865  pid_file = optarg;
4866  break;
4867  case 's':
4868  // path prefix of persistent state file
4869  state_path_prefix = optarg;
4870  break;
4871  case 'A':
4872  // path prefix of attribute log file
4873  attrlog_path_prefix = optarg;
4874  break;
4875  case 'B':
4876  {
4877  const char * path = optarg;
4878  if (*path == '+' && path[1])
4879  path++;
4880  else
4881  use_default_db = false;
4882  unsigned char savedebug = debugmode; debugmode = 1;
4883  if (!read_drive_database(path))
4884  EXIT(EXIT_BADCMD);
4885  debugmode = savedebug;
4886  }
4887  break;
4888  case 'w':
4889  warning_script = optarg;
4890  break;
4891  case 'V':
4892  // print version and CVS info
4893  debugmode = 1;
4894  PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
4895  EXIT(0);
4896  break;
4897 #ifdef HAVE_LIBCAP_NG
4898  case 'C':
4899  // enable capabilities
4900  enable_capabilities = true;
4901  break;
4902 #endif
4903  case 'h':
4904  // help: print summary of command-line options
4905  debugmode=1;
4906  PrintHead();
4907  Usage();
4908  EXIT(0);
4909  break;
4910  case '?':
4911  default:
4912  // unrecognized option
4913  debugmode=1;
4914  PrintHead();
4915  // Point arg to the argument in which this option was found.
4916  arg = argv[optind-1];
4917  // Check whether the option is a long option that doesn't map to -h.
4918  if (arg[1] == '-' && optchar != 'h') {
4919  // Iff optopt holds a valid option then argument must be missing.
4920  if (optopt && (strchr(shortopts, optopt) != NULL)) {
4921  PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
4922  PrintValidArgs(optopt);
4923  } else {
4924  PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
4925  }
4926  PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
4927  EXIT(EXIT_BADCMD);
4928  }
4929  if (optopt) {
4930  // Iff optopt holds a valid option then argument must be missing.
4931  if (strchr(shortopts, optopt) != NULL){
4932  PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
4933  PrintValidArgs(optopt);
4934  } else {
4935  PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
4936  }
4937  PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4938  EXIT(EXIT_BADCMD);
4939  }
4940  Usage();
4941  EXIT(0);
4942  }
4943 
4944  // Check to see if option had an unrecognized or incorrect argument.
4945  if (badarg) {
4946  debugmode=1;
4947  PrintHead();
4948  // It would be nice to print the actual option name given by the user
4949  // here, but we just print the short form. Please fix this if you know
4950  // a clean way to do it.
4951  PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
4952  PrintValidArgs(optchar);
4953  PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4954  EXIT(EXIT_BADCMD);
4955  }
4956  }
4957 
4958  // non-option arguments are not allowed
4959  if (argc > optind) {
4960  debugmode=1;
4961  PrintHead();
4962  PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
4963  PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
4964  EXIT(EXIT_BADCMD);
4965  }
4966 
4967  // no pidfile in debug mode
4968  if (debugmode && !pid_file.empty()) {
4969  debugmode=1;
4970  PrintHead();
4971  PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
4972  PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
4973  EXIT(EXIT_BADCMD);
4974  }
4975 
4976 #ifndef _WIN32
4977  if (!debugmode) {
4978  // absolute path names are required due to chdir('/') after fork().
4979  check_abs_path('p', pid_file);
4982  }
4983 #endif
4984 
4985  // Read or init drive database
4986  {
4987  unsigned char savedebug = debugmode; debugmode = 1;
4988  if (!init_drive_database(use_default_db))
4989  EXIT(EXIT_BADCMD);
4990  debugmode = savedebug;
4991  }
4992 
4993  // print header
4994  PrintHead();
4995 }
4996 
4997 // Function we call if no configuration file was found or if the
4998 // SCANDIRECTIVE Directive was found. It makes entries for device
4999 // names returned by scan_smart_devices() in os_OSNAME.cpp
5000 static int MakeConfigEntries(const dev_config & base_cfg,
5001  dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5002  const smart_devtype_list & types)
5003 {
5004  // make list of devices
5005  smart_device_list devlist;
5006  if (!smi()->scan_smart_devices(devlist, types)) {
5007  PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
5008  return 0;
5009  }
5010 
5011  // if no devices, return
5012  if (devlist.size() <= 0)
5013  return 0;
5014 
5015  // add empty device slots for existing config entries
5016  while (scanned_devs.size() < conf_entries.size())
5017  scanned_devs.push_back((smart_device *)0);
5018 
5019  // loop over entries to create
5020  for (unsigned i = 0; i < devlist.size(); i++) {
5021  // Move device pointer
5022  smart_device * dev = devlist.release(i);
5023  scanned_devs.push_back(dev);
5024 
5025  // Copy configuration, update device and type name
5026  conf_entries.push_back(base_cfg);
5027  dev_config & cfg = conf_entries.back();
5028  cfg.name = dev->get_info().info_name;
5029  cfg.dev_name = dev->get_info().dev_name;
5030  cfg.dev_type = dev->get_info().dev_type;
5031  }
5032 
5033  return devlist.size();
5034 }
5035 
5036 static void CanNotRegister(const char *name, const char *type, int line, bool scandirective)
5037 {
5038  if (!debugmode && scandirective)
5039  return;
5040  if (line)
5041  PrintOut(scandirective?LOG_INFO:LOG_CRIT,
5042  "Unable to register %s device %s at line %d of file %s\n",
5043  type, name, line, configfile);
5044  else
5045  PrintOut(LOG_INFO,"Unable to register %s device %s\n",
5046  type, name);
5047  return;
5048 }
5049 
5050 // Returns negative value (see ParseConfigFile()) if config file
5051 // had errors, else number of entries which may be zero or positive.
5052 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5053 {
5054  // parse configuration file configfile (normally /etc/smartd.conf)
5055  smart_devtype_list scan_types;
5056  int entries = ParseConfigFile(conf_entries, scan_types);
5057 
5058  if (entries < 0) {
5059  // There was an error reading the configuration file.
5060  conf_entries.clear();
5061  if (entries == -1)
5062  PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5063  return entries;
5064  }
5065 
5066  // no error parsing config file.
5067  if (entries) {
5068  // we did not find a SCANDIRECTIVE and did find valid entries
5069  PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5070  }
5071  else if (!conf_entries.empty()) {
5072  // we found a SCANDIRECTIVE or there was no configuration file so
5073  // scan. Configuration file's last entry contains all options
5074  // that were set
5075  dev_config first = conf_entries.back();
5076  conf_entries.pop_back();
5077 
5078  if (first.lineno)
5079  PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5080  else
5081  PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5082 
5083  // make config list of devices to search for
5084  MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5085 
5086  // warn user if scan table found no devices
5087  if (conf_entries.empty())
5088  PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5089  }
5090  else
5091  PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5092 
5093  return conf_entries.size();
5094 }
5095 
5096 // Return true if TYPE contains a RAID drive number
5097 static bool is_raid_type(const char * type)
5098 {
5099  if (str_starts_with(type, "sat,"))
5100  return false;
5101  int i;
5102  if (sscanf(type, "%*[^,],%d", &i) != 1)
5103  return false;
5104  return true;
5105 }
5106 
5107 // Return true if DEV is already in DEVICES[0..NUMDEVS) or IGNORED[*]
5108 static bool is_duplicate_device(const smart_device * dev,
5109  const smart_device_list & devices, unsigned numdevs,
5110  const dev_config_vector & ignored)
5111 {
5112  const smart_device::device_info & info1 = dev->get_info();
5113  bool is_raid1 = is_raid_type(info1.dev_type.c_str());
5114 
5115  for (unsigned i = 0; i < numdevs; i++) {
5116  const smart_device::device_info & info2 = devices.at(i)->get_info();
5117  // -d TYPE options must match if RAID drive number is specified
5118  if ( info1.dev_name == info2.dev_name
5119  && ( info1.dev_type == info2.dev_type
5120  || !is_raid1 || !is_raid_type(info2.dev_type.c_str())))
5121  return true;
5122  }
5123 
5124  for (unsigned i = 0; i < ignored.size(); i++) {
5125  const dev_config & cfg2 = ignored.at(i);
5126  if ( info1.dev_name == cfg2.dev_name
5127  && ( info1.dev_type == cfg2.dev_type
5128  || !is_raid1 || !is_raid_type(cfg2.dev_type.c_str())))
5129  return true;
5130  }
5131  return false;
5132 }
5133 
5134 // Register one device, return false on error
5135 static bool register_device(dev_config & cfg, dev_state & state, smart_device_auto_ptr & dev)
5136 {
5137  bool scanning;
5138  if (!dev) {
5139  // Get device of appropriate type
5140  dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5141  if (!dev) {
5142  if (cfg.dev_type.empty())
5143  PrintOut(LOG_INFO, "Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5144  else
5145  PrintOut(LOG_INFO, "Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5146  return false;
5147  }
5148  scanning = false;
5149  }
5150  else {
5151  // Use device from device scan
5152  scanning = true;
5153  }
5154 
5155  // Save old info
5156  smart_device::device_info oldinfo = dev->get_info();
5157 
5158  // Open with autodetect support, may return 'better' device
5159  dev.replace( dev->autodetect_open() );
5160 
5161  // Report if type has changed
5162  if (oldinfo.dev_type != dev->get_dev_type())
5163  PrintOut(LOG_INFO, "Device: %s, type changed from '%s' to '%s'\n",
5164  cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5165 
5166  // Return if autodetect_open() failed
5167  if (!dev->is_open()) {
5168  if (debugmode || !scanning)
5169  PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5170  return false;
5171  }
5172 
5173  // Update informal name
5174  cfg.name = dev->get_info().info_name;
5175  PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5176 
5177  // register ATA device
5178  if (dev->is_ata()){
5179  if (ATADeviceScan(cfg, state, dev->to_ata())) {
5180  CanNotRegister(cfg.name.c_str(), "ATA", cfg.lineno, scanning);
5181  return false;
5182  }
5183  }
5184  // or register SCSI device
5185  else if (dev->is_scsi()){
5186  if (SCSIDeviceScan(cfg, state, dev->to_scsi())) {
5187  CanNotRegister(cfg.name.c_str(), "SCSI", cfg.lineno, scanning);
5188  return false;
5189  }
5190  }
5191  // or register NVMe device
5192  else if (dev->is_nvme()) {
5193  if (NVMeDeviceScan(cfg, state, dev->to_nvme())) {
5194  CanNotRegister(cfg.name.c_str(), "NVMe", cfg.lineno, scanning);
5195  return false;
5196  }
5197  }
5198  else {
5199  PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5200  return false;
5201  }
5202 
5203  return true;
5204 }
5205 
5206 // This function tries devices from conf_entries. Each one that can be
5207 // registered is moved onto the [ata|scsi]devices lists and removed
5208 // from the conf_entries list.
5209 static void RegisterDevices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5211 {
5212  // start by clearing lists/memory of ALL existing devices
5213  configs.clear();
5214  devices.clear();
5215  states.clear();
5216 
5217  // Register entries
5218  dev_config_vector ignored_entries;
5219  unsigned numnoscan = 0;
5220  for (unsigned i = 0; i < conf_entries.size(); i++){
5221 
5222  dev_config cfg = conf_entries[i];
5223 
5224  if (cfg.ignore) {
5225  // Store for is_duplicate_device() check and ignore
5226  PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5227  (!cfg.dev_type.empty() ? " [" : ""),
5228  cfg.dev_type.c_str(),
5229  (!cfg.dev_type.empty() ? "]" : ""));
5230  ignored_entries.push_back(cfg);
5231  continue;
5232  }
5233 
5235 
5236  // Device may already be detected during devicescan
5237  bool scanning = false;
5238  if (i < scanned_devs.size()) {
5239  dev = scanned_devs.release(i);
5240  if (dev) {
5241  // Check for a preceding non-DEVICESCAN entry for the same device
5242  if ( (numnoscan || !ignored_entries.empty())
5243  && is_duplicate_device(dev.get(), devices, numnoscan, ignored_entries)) {
5244  PrintOut(LOG_INFO, "Device: %s, duplicate, ignored\n", dev->get_info_name());
5245  continue;
5246  }
5247  scanning = true;
5248  }
5249  }
5250 
5251  // Register device
5252  dev_state state;
5253  if (!register_device(cfg, state, dev)) {
5254  // if device is explictly listed and we can't register it, then
5255  // exit unless the user has specified that the device is removable
5256  if (!scanning) {
5257  if (!(cfg.removable || quit == QUIT_NEVER)) {
5258  PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n", cfg.name.c_str());
5259  EXIT(EXIT_BADDEV);
5260  }
5261  PrintOut(LOG_INFO, "Device: %s, not available\n", cfg.name.c_str());
5262  // Prevent retry of registration
5263  ignored_entries.push_back(cfg);
5264  }
5265  continue;
5266  }
5267 
5268  // move onto the list of devices
5269  configs.push_back(cfg);
5270  states.push_back(state);
5271  devices.push_back(dev);
5272  if (!scanning)
5273  numnoscan = devices.size();
5274  }
5275 
5276  init_disable_standby_check(configs);
5277 }
5278 
5279 
5280 // Main program without exception handling
5281 static int main_worker(int argc, char **argv)
5282 {
5283  // Initialize interface
5285  if (!smi())
5286  return 1;
5287 
5288  // is it our first pass through?
5289  bool firstpass = true;
5290 
5291  // next time to wake up
5292  time_t wakeuptime = 0;
5293 
5294  // parse input and print header and usage info if needed
5295  ParseOpts(argc,argv);
5296 
5297  // Configuration for each device
5298  dev_config_vector configs;
5299  // Device states
5300  dev_state_vector states;
5301  // Devices to monitor
5303 
5304  bool write_states_always = true;
5305 
5306 #ifdef HAVE_LIBCAP_NG
5307  // Drop capabilities
5308  if (enable_capabilities) {
5309  capng_clear(CAPNG_SELECT_BOTH);
5310  capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
5311  CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
5312  capng_apply(CAPNG_SELECT_BOTH);
5313  }
5314 #endif
5315 
5316  // the main loop of the code
5317  for (;;) {
5318 
5319  // are we exiting from a signal?
5320  if (caughtsigEXIT) {
5321  // are we exiting with SIGTERM?
5322  int isterm=(caughtsigEXIT==SIGTERM);
5323  int isquit=(caughtsigEXIT==SIGQUIT);
5324  int isok=debugmode?isterm || isquit:isterm;
5325 
5326  PrintOut(isok?LOG_INFO:LOG_CRIT, "smartd received signal %d: %s\n",
5327  caughtsigEXIT, strsignal(caughtsigEXIT));
5328 
5329  if (!isok)
5330  return EXIT_SIGNAL;
5331 
5332  // Write state files
5333  if (!state_path_prefix.empty())
5334  write_all_dev_states(configs, states);
5335 
5336  return 0;
5337  }
5338 
5339  // Should we (re)read the config file?
5340  if (firstpass || caughtsigHUP){
5341  if (!firstpass) {
5342  // Write state files
5343  if (!state_path_prefix.empty())
5344  write_all_dev_states(configs, states);
5345 
5346  PrintOut(LOG_INFO,
5347  caughtsigHUP==1?
5348  "Signal HUP - rereading configuration file %s\n":
5349  "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5350  configfile);
5351  }
5352 
5353  {
5354  dev_config_vector conf_entries; // Entries read from smartd.conf
5355  smart_device_list scanned_devs; // Devices found during scan
5356  // (re)reads config file, makes >=0 entries
5357  int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5358 
5359  if (entries>=0) {
5360  // checks devices, then moves onto ata/scsi list or deallocates.
5361  RegisterDevices(conf_entries, scanned_devs, configs, states, devices);
5362  if (!(configs.size() == devices.size() && configs.size() == states.size()))
5363  throw std::logic_error("Invalid result from RegisterDevices");
5364  }
5365  else if ( quit == QUIT_NEVER
5366  || ((quit == QUIT_NODEV || quit == QUIT_NODEVSTARTUP) && !firstpass)) {
5367  // user has asked to continue on error in configuration file
5368  if (!firstpass)
5369  PrintOut(LOG_INFO,"Reusing previous configuration\n");
5370  }
5371  else {
5372  // exit with configuration file error status
5373  return (entries==-3 ? EXIT_READCONF : entries==-2 ? EXIT_NOCONF : EXIT_BADCONF);
5374  }
5375  }
5376 
5377  // Log number of devices we are monitoring...
5378  if (devices.size() > 0 || quit == QUIT_NEVER || (quit == QUIT_NODEVSTARTUP && !firstpass)) {
5379  int numata = 0, numscsi = 0;
5380  for (unsigned i = 0; i < devices.size(); i++) {
5381  const smart_device * dev = devices.at(i);
5382  if (dev->is_ata())
5383  numata++;
5384  else if (dev->is_scsi())
5385  numscsi++;
5386  }
5387  PrintOut(LOG_INFO,"Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5388  numata, numscsi, (int)devices.size() - numata - numscsi);
5389  }
5390  else {
5391  PrintOut(LOG_INFO,"Unable to monitor any SMART enabled devices. Try debug (-d) option. Exiting...\n");
5392  return EXIT_NODEV;
5393  }
5394 
5395  if (quit == QUIT_SHOWTESTS) {
5396  // user has asked to print test schedule
5397  PrintTestSchedule(configs, states, devices);
5398  return 0;
5399  }
5400 
5401 #ifdef HAVE_LIBCAP_NG
5402  if (enable_capabilities) {
5403  for (unsigned i = 0; i < configs.size(); i++) {
5404  if (!configs[i].emailaddress.empty() || !configs[i].emailcmdline.empty()) {
5405  PrintOut(LOG_WARNING, "Mail can't be enabled together with --capabilities. All mail will be suppressed.\n");
5406  break;
5407  }
5408  }
5409  }
5410 #endif
5411 
5412  // reset signal
5413  caughtsigHUP=0;
5414 
5415  // Always write state files after (re)configuration
5416  write_states_always = true;
5417  }
5418 
5419  // check all devices once,
5420  // self tests are not started in first pass unless '-q onecheck' is specified
5421  CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit ==