smartmontools SVN Rev 5554
Utility to control and monitor storage systems with "S.M.A.R.T."
smartd.cpp
Go to the documentation of this file.
1/*
2 * Home page of code is: https://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-23 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12#include "config.h"
13#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15// unconditionally included files
16#include <inttypes.h>
17#include <stdio.h>
18#include <sys/types.h>
19#include <sys/stat.h> // umask
20#include <signal.h>
21#include <fcntl.h>
22#include <string.h>
23#include <syslog.h>
24#include <stdarg.h>
25#include <stdlib.h>
26#include <errno.h>
27#include <time.h>
28#include <limits.h>
29#include <getopt.h>
30
31#include <algorithm> // std::replace()
32#include <map>
33#include <stdexcept>
34#include <string>
35#include <vector>
36
37// conditionally included files
38#ifndef _WIN32
39#include <sys/wait.h>
40#endif
41#ifdef HAVE_UNISTD_H
42#include <unistd.h>
43#endif
44
45#ifdef _WIN32
46#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
47#ifdef _MSC_VER
48#pragma warning(disable:4761) // "conversion supplied"
49typedef unsigned short mode_t;
50typedef int pid_t;
51#endif
52#include <io.h> // umask()
53#include <process.h> // getpid()
54#endif // _WIN32
55
56#ifdef __CYGWIN__
57#include <io.h> // setmode()
58#endif // __CYGWIN__
59
60#ifdef HAVE_LIBCAP_NG
61#include <cap-ng.h>
62#endif // LIBCAP_NG
63
64#ifdef HAVE_LIBSYSTEMD
65#include <systemd/sd-daemon.h>
66#endif // HAVE_LIBSYSTEMD
67
68// locally included files
69#include "atacmds.h"
70#include "dev_interface.h"
71#include "knowndrives.h"
72#include "scsicmds.h"
73#include "nvmecmds.h"
74#include "utility.h"
75
76#ifdef HAVE_POSIX_API
77#include "popen_as_ugid.h"
78#endif
79
80#ifdef _WIN32
81// fork()/signal()/initd simulation for native Windows
82#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
83#define strsignal daemon_strsignal
84#define sleep daemon_sleep
85// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
86#define SIGQUIT SIGBREAK
87#define SIGQUIT_KEYNAME "CONTROL-Break"
88#else // _WIN32
89#define SIGQUIT_KEYNAME "CONTROL-\\"
90#endif // _WIN32
91
92const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5519 2023-07-24 15:57:54Z chrfranke $"
93 CONFIG_H_CVSID;
94
95extern "C" {
96 typedef void (*signal_handler_type)(int);
97}
98
100{
101#if defined(_WIN32)
102 // signal() emulation
103 daemon_signal(sig, handler);
104
105#elif defined(HAVE_SIGACTION)
106 // SVr4, POSIX.1-2001, POSIX.1-2008
107 struct sigaction sa;
108 sa.sa_handler = SIG_DFL;
109 sigaction(sig, (struct sigaction *)0, &sa);
110 if (sa.sa_handler == SIG_IGN)
111 return;
112
113 memset(&sa, 0, sizeof(sa));
114 sa.sa_handler = handler;
115 sa.sa_flags = SA_RESTART; // BSD signal() semantics
116 sigaction(sig, &sa, (struct sigaction *)0);
117
118#elif defined(HAVE_SIGSET)
119 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
120 if (sigset(sig, handler) == SIG_IGN)
121 sigset(sig, SIG_IGN);
122
123#else
124 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
125 // Important: BSD semantics is required. Traditional signal()
126 // resets the handler to SIG_DFL after the first signal is caught.
127 if (signal(sig, handler) == SIG_IGN)
128 signal(sig, SIG_IGN);
129#endif
130}
131
132using namespace smartmontools;
133
134static const int scsiLogRespLen = 252;
135
136// smartd exit codes
137#define EXIT_BADCMD 1 // command line did not parse
138#define EXIT_BADCONF 2 // syntax error in config file
139#define EXIT_STARTUP 3 // problem forking daemon
140#define EXIT_PID 4 // problem creating pid file
141#define EXIT_NOCONF 5 // config file does not exist
142#define EXIT_READCONF 6 // config file exists but cannot be read
143
144#define EXIT_NOMEM 8 // out of memory
145#define EXIT_BADCODE 10 // internal error - should NEVER happen
146
147#define EXIT_BADDEV 16 // we can't monitor this device
148#define EXIT_NODEV 17 // no devices to monitor
149
150#define EXIT_SIGNAL 254 // abort on signal
151
152
153// command-line: 1=debug mode, 2=print presets
154static unsigned char debugmode = 0;
155
156// command-line: how long to sleep between checks
157static constexpr int default_checktime = 1800;
159static int checktime_min = 0; // Minimum individual check time, 0 if none
160
161// command-line: name of PID file (empty for no pid file)
162static std::string pid_file;
163
164// command-line: path prefix of persistent state file, empty if no persistence.
165static std::string state_path_prefix
166#ifdef SMARTMONTOOLS_SAVESTATES
167 = SMARTMONTOOLS_SAVESTATES
168#endif
169 ;
170
171// command-line: path prefix of attribute log file, empty if no logs.
172static std::string attrlog_path_prefix
173#ifdef SMARTMONTOOLS_ATTRIBUTELOG
174 = SMARTMONTOOLS_ATTRIBUTELOG
175#endif
176 ;
177
178// configuration file name
179static const char * configfile;
180// configuration file "name" if read from stdin
181static const char * const configfile_stdin = "<stdin>";
182// path of alternate configuration file
183static std::string configfile_alt;
184
185// warning script file
186static std::string warning_script;
187
188#ifdef HAVE_POSIX_API
189// run warning script as non-privileged user
190static bool warn_as_user;
191static uid_t warn_uid;
192static gid_t warn_gid;
193static std::string warn_uname, warn_gname;
194#elif defined(_WIN32)
195// run warning script as restricted user
196static bool warn_as_restr_user;
197#endif
198
199// command-line: when should we exit?
200enum quit_t {
205static bool quit_nodev0 = false;
206
207// command-line; this is the default syslog(3) log facility to use.
208static int facility=LOG_DAEMON;
209
210#ifndef _WIN32
211// command-line: fork into background?
212static bool do_fork=true;
213#endif
214
215// TODO: This smartctl only variable is also used in some os_*.cpp
216unsigned char failuretest_permissive = 0;
217
218// set to one if we catch a USR1 (check devices now)
219static volatile int caughtsigUSR1=0;
220
221#ifdef _WIN32
222// set to one if we catch a USR2 (toggle debug mode)
223static volatile int caughtsigUSR2=0;
224#endif
225
226// set to one if we catch a HUP (reload config file). In debug mode,
227// set to two, if we catch INT (also reload config file).
228static volatile int caughtsigHUP=0;
229
230// set to signal value if we catch INT, QUIT, or TERM
231static volatile int caughtsigEXIT=0;
232
233// This function prints either to stdout or to the syslog as needed.
234static void PrintOut(int priority, const char *fmt, ...)
236
237#ifdef HAVE_LIBSYSTEMD
238// systemd notify support
239
240static bool notify_enabled = false;
241static bool notify_ready = false;
242
243static inline void notify_init()
244{
245 if (!getenv("NOTIFY_SOCKET"))
246 return;
247 notify_enabled = true;
248}
249
250static inline bool notify_post_init()
251{
252 if (!notify_enabled)
253 return true;
254 if (do_fork) {
255 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
256 return false;
257 }
258 return true;
259}
260
261static inline void notify_extend_timeout()
262{
263 if (!notify_enabled)
264 return;
265 if (notify_ready)
266 return;
267 const char * notify = "EXTEND_TIMEOUT_USEC=20000000"; // typical drive spinup time is 20s tops
268 if (debugmode) {
269 pout("sd_notify(0, \"%s\")\n", notify);
270 return;
271 }
272 sd_notify(0, notify);
273}
274
275static void notify_msg(const char * msg, bool ready = false)
276{
277 if (!notify_enabled)
278 return;
279 if (debugmode) {
280 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
281 return;
282 }
283 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
284}
285
286static void notify_check(int numdev)
287{
288 if (!notify_enabled)
289 return;
290 char msg[32];
291 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
292 numdev, (numdev != 1 ? "s" : ""));
293 notify_msg(msg);
294}
295
296static void notify_wait(time_t wakeuptime, int numdev)
297{
298 if (!notify_enabled)
299 return;
300 char ts[16] = ""; struct tm tmbuf;
301 strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
302 char msg[64];
303 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
304 numdev, (numdev != 1 ? "s" : ""), ts);
305 notify_msg(msg, !notify_ready); // first call notifies READY=1
306 notify_ready = true;
307}
308
309static void notify_exit(int status)
310{
311 if (!notify_enabled)
312 return;
313 const char * msg;
314 switch (status) {
315 case 0: msg = "Exiting ..."; break;
316 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
317 case EXIT_BADCONF: case EXIT_NOCONF:
318 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
319 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
320 case EXIT_NODEV: msg = "No devices to monitor"; break;
321 default: msg = "Error (see SYSLOG)"; break;
322 }
323 // Ensure that READY=1 is notified before 'exit(0)' because otherwise
324 // systemd will report a service (protocol) failure
325 notify_msg(msg, (!status && !notify_ready));
326}
327
328#else // HAVE_LIBSYSTEMD
329// No systemd notify support
330
331static inline bool notify_post_init()
332{
333#ifdef __linux__
334 if (getenv("NOTIFY_SOCKET")) {
335 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
336 return false;
337 }
338#endif
339 return true;
340}
341
342static inline void notify_init() { }
343static inline void notify_extend_timeout() { }
344static inline void notify_msg(const char *) { }
345static inline void notify_check(int) { }
346static inline void notify_wait(time_t, int) { }
347static inline void notify_exit(int) { }
348
349#endif // HAVE_LIBSYSTEMD
350
351// Email frequencies
352enum class emailfreqs : unsigned char {
354};
355
356// Attribute monitoring flags.
357// See monitor_attr_flags below.
358enum {
365};
366
367// Array of flags for each attribute.
369{
370public:
371 bool is_set(int id, unsigned char flag) const
372 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
373
374 void set(int id, unsigned char flags)
375 {
376 if (0 < id && id < (int)sizeof(m_flags))
377 m_flags[id] |= flags;
378 }
379
380private:
381 unsigned char m_flags[256]{};
382};
383
384
385/// Configuration data for a device. Read from smartd.conf.
386/// Supports copy & assignment and is compatible with STL containers.
388{
389 int lineno{}; // Line number of entry in file
390 std::string name; // Device name (with optional extra info)
391 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
392 std::string dev_type; // Device type argument from -d directive, empty if none
393 std::string dev_idinfo; // Device identify info for warning emails
394 std::string state_file; // Path of the persistent state file, empty if none
395 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
396 int checktime{}; // Individual check interval, 0 if none
397 bool ignore{}; // Ignore this entry
398 bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
399 bool smartcheck{}; // Check SMART status
400 bool usagefailed{}; // Check for failed Usage Attributes
401 bool prefail{}; // Track changes in Prefail Attributes
402 bool usage{}; // Track changes in Usage Attributes
403 bool selftest{}; // Monitor number of selftest errors
404 bool errorlog{}; // Monitor number of ATA errors
405 bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
406 bool offlinests{}; // Monitor changes in offline data collection status
407 bool offlinests_ns{}; // Disable auto standby if in progress
408 bool selfteststs{}; // Monitor changes in self-test execution status
409 bool selfteststs_ns{}; // Disable auto standby if in progress
410 bool permissive{}; // Ignore failed SMART commands
411 char autosave{}; // 1=disable, 2=enable Autosave Attributes
412 char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
413 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
414 bool ignorepresets{}; // Ignore database of -v options
415 bool showpresets{}; // Show database entry for this device
416 bool removable{}; // Device may disappear (not be present)
417 char powermode{}; // skip check, if disk in idle or standby mode
418 bool powerquiet{}; // skip powermode 'skipping checks' message
419 int powerskipmax{}; // how many times can be check skipped
420 unsigned char tempdiff{}; // Track Temperature changes >= this limit
421 unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
422 regular_expression test_regex; // Regex for scheduled testing
423 unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
424
425 // Configuration of email warning messages
426 std::string emailcmdline; // script to execute, empty if no messages
427 std::string emailaddress; // email address, or empty
428 emailfreqs emailfreq{}; // Send emails once, daily, diminishing
429 bool emailtest{}; // Send test email?
430
431 // ATA ONLY
432 int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
433 int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
434 int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
435 int set_lookahead{}; // disable(-1), enable(1) read look-ahead
436 int set_standby{}; // set(1..255->0..254) standby timer
437 bool set_security_freeze{}; // Freeze ATA security
438 int set_wcache{}; // disable(-1), enable(1) write cache
439 int set_dsn{}; // disable(0x2), enable(0x1) DSN
440
441 bool sct_erc_set{}; // set SCT ERC to:
442 unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
443 unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
444
445 unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
446 unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
447 bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
448 bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
449
450 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
451
453
454 // NVMe only
455 unsigned nvme_err_log_max_entries{}; // size of error log
456};
457
458// Number of allowed mail message types
459static const int SMARTD_NMAIL = 13;
460// Type for '-M test' mails (state not persistent)
461static const int MAILTYPE_TEST = 0;
462// TODO: Add const or enum for all mail types.
463
464struct mailinfo {
465 int logged{}; // number of times an email has been sent
466 time_t firstsent{}; // time first email was sent, as defined by time(2)
467 time_t lastsent{}; // time last email was sent, as defined by time(2)
468};
469
470/// Persistent state data for a device.
472{
473 unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
474
475 unsigned char selflogcount{}; // total number of self-test errors
476 unsigned short selfloghour{}; // lifetime hours of last self-test error
477
478 time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
479
480 uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
481 uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
482
483 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
484
485 // ATA ONLY
486 int ataerrorcount{}; // Total number of ATA errors
487
488 // Persistent part of ata_smart_values:
490 unsigned char id{};
491 unsigned char val{};
492 unsigned char worst{}; // Byte needed for 'raw64' attribute only.
493 uint64_t raw{};
494 unsigned char resvd{};
495 };
497
498 // SCSI ONLY
499
502 unsigned char found{};
503 };
505
508 unsigned char found{};
509 };
511
512 // NVMe only
514};
515
516/// Non-persistent state data for a device.
518{
519 bool must_write{}; // true if persistent part should be written
520
521 bool skip{}; // skip during next check cycle
522 time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
523
524 bool not_cap_offline{}; // true == not capable of offline testing
529
530 unsigned char temperature{}; // last recorded Temperature (in Celsius)
531 time_t tempmin_delay{}; // time where Min Temperature tracking will start
532
533 bool removed{}; // true if open() failed for removable device
534
535 bool powermodefail{}; // true if power mode check failed
536 int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
537 int lastpowermodeskipped{}; // the last power mode that was skipped
538
539 bool attrlog_dirty{}; // true if persistent part has new attr values that
540 // need to be written to attrlog
541
542 // SCSI ONLY
543 // TODO: change to bool
544 unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
545 unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
550 unsigned char SuppressReport{}; // minimize nuisance reports
551 unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
552 // know yet) 6 or 10
553 // ATA ONLY
554 uint64_t num_sectors{}; // Number of sectors
555 ata_smart_values smartval{}; // SMART data
557 bool offline_started{}; // true if offline data collection was started
558 bool selftest_started{}; // true if self-test was started
559};
560
561/// Runtime state data for a device.
563: public persistent_dev_state,
564 public temp_dev_state
565{
567 void update_temp_state();
568};
569
570/// Container for configuration info for each device.
571typedef std::vector<dev_config> dev_config_vector;
572
573/// Container for state info for each device.
574typedef std::vector<dev_state> dev_state_vector;
575
576// Copy ATA attributes to persistent state.
578{
579 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
582 pa.id = ta.id;
583 if (ta.id == 0) {
584 pa.val = pa.worst = 0; pa.raw = 0;
585 continue;
586 }
587 pa.val = ta.current;
588 pa.worst = ta.worst;
589 pa.raw = ta.raw[0]
590 | ( ta.raw[1] << 8)
591 | ( ta.raw[2] << 16)
592 | ((uint64_t)ta.raw[3] << 24)
593 | ((uint64_t)ta.raw[4] << 32)
594 | ((uint64_t)ta.raw[5] << 40);
595 pa.resvd = ta.reserv;
596 }
597}
598
599// Copy ATA from persistent to temp state.
601{
602 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
603 const ata_attribute & pa = ata_attributes[i];
605 ta.id = pa.id;
606 if (pa.id == 0) {
607 ta.current = ta.worst = 0;
608 memset(ta.raw, 0, sizeof(ta.raw));
609 continue;
610 }
611 ta.current = pa.val;
612 ta.worst = pa.worst;
613 ta.raw[0] = (unsigned char) pa.raw;
614 ta.raw[1] = (unsigned char)(pa.raw >> 8);
615 ta.raw[2] = (unsigned char)(pa.raw >> 16);
616 ta.raw[3] = (unsigned char)(pa.raw >> 24);
617 ta.raw[4] = (unsigned char)(pa.raw >> 32);
618 ta.raw[5] = (unsigned char)(pa.raw >> 40);
619 ta.reserv = pa.resvd;
620 }
621}
622
623// Parse a line from a state file.
624static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
625{
626 static const regular_expression regex(
627 "^ *"
628 "((temperature-min)" // (1 (2)
629 "|(temperature-max)" // (3)
630 "|(self-test-errors)" // (4)
631 "|(self-test-last-err-hour)" // (5)
632 "|(scheduled-test-next-check)" // (6)
633 "|(selective-test-last-start)" // (7)
634 "|(selective-test-last-end)" // (8)
635 "|(ata-error-count)" // (9)
636 "|(mail\\.([0-9]+)\\." // (10 (11)
637 "((count)" // (12 (13)
638 "|(first-sent-time)" // (14)
639 "|(last-sent-time)" // (15)
640 ")" // 12)
641 ")" // 10)
642 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
643 "((id)" // (18 (19)
644 "|(val)" // (20)
645 "|(worst)" // (21)
646 "|(raw)" // (22)
647 "|(resvd)" // (23)
648 ")" // 18)
649 ")" // 16)
650 "|(nvme-err-log-entries)" // (24)
651 ")" // 1)
652 " *= *([0-9]+)[ \n]*$" // (25)
653 );
654
655 const int nmatch = 1+25;
657 if (!regex.execute(line, nmatch, match))
658 return false;
659 if (match[nmatch-1].rm_so < 0)
660 return false;
661
662 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
663
664 int m = 1;
665 if (match[++m].rm_so >= 0)
666 state.tempmin = (unsigned char)val;
667 else if (match[++m].rm_so >= 0)
668 state.tempmax = (unsigned char)val;
669 else if (match[++m].rm_so >= 0)
670 state.selflogcount = (unsigned char)val;
671 else if (match[++m].rm_so >= 0)
672 state.selfloghour = (unsigned short)val;
673 else if (match[++m].rm_so >= 0)
674 state.scheduled_test_next_check = (time_t)val;
675 else if (match[++m].rm_so >= 0)
676 state.selective_test_last_start = val;
677 else if (match[++m].rm_so >= 0)
678 state.selective_test_last_end = val;
679 else if (match[++m].rm_so >= 0)
680 state.ataerrorcount = (int)val;
681 else if (match[m+=2].rm_so >= 0) {
682 int i = atoi(line+match[m].rm_so);
683 if (!(0 <= i && i < SMARTD_NMAIL))
684 return false;
685 if (i == MAILTYPE_TEST) // Don't suppress test mails
686 return true;
687 if (match[m+=2].rm_so >= 0)
688 state.maillog[i].logged = (int)val;
689 else if (match[++m].rm_so >= 0)
690 state.maillog[i].firstsent = (time_t)val;
691 else if (match[++m].rm_so >= 0)
692 state.maillog[i].lastsent = (time_t)val;
693 else
694 return false;
695 }
696 else if (match[m+=5+1].rm_so >= 0) {
697 int i = atoi(line+match[m].rm_so);
698 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
699 return false;
700 if (match[m+=2].rm_so >= 0)
701 state.ata_attributes[i].id = (unsigned char)val;
702 else if (match[++m].rm_so >= 0)
703 state.ata_attributes[i].val = (unsigned char)val;
704 else if (match[++m].rm_so >= 0)
705 state.ata_attributes[i].worst = (unsigned char)val;
706 else if (match[++m].rm_so >= 0)
707 state.ata_attributes[i].raw = val;
708 else if (match[++m].rm_so >= 0)
709 state.ata_attributes[i].resvd = (unsigned char)val;
710 else
711 return false;
712 }
713 else if (match[m+7].rm_so >= 0)
714 state.nvme_err_log_entries = val;
715 else
716 return false;
717 return true;
718}
719
720// Read a state file.
721static bool read_dev_state(const char * path, persistent_dev_state & state)
722{
723 stdio_file f(path, "r");
724 if (!f) {
725 if (errno != ENOENT)
726 pout("Cannot read state file \"%s\"\n", path);
727 return false;
728 }
729#ifdef __CYGWIN__
730 setmode(fileno(f), O_TEXT); // Allow files with \r\n
731#endif
732
733 persistent_dev_state new_state;
734 int good = 0, bad = 0;
735 char line[256];
736 while (fgets(line, sizeof(line), f)) {
737 const char * s = line + strspn(line, " \t");
738 if (!*s || *s == '#')
739 continue;
740 if (!parse_dev_state_line(line, new_state))
741 bad++;
742 else
743 good++;
744 }
745
746 if (bad) {
747 if (!good) {
748 pout("%s: format error\n", path);
749 return false;
750 }
751 pout("%s: %d invalid line(s) ignored\n", path, bad);
752 }
753
754 // This sets the values missing in the file to 0.
755 state = new_state;
756 return true;
757}
758
759static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
760{
761 if (val)
762 fprintf(f, "%s = %" PRIu64 "\n", name, val);
763}
764
765static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
766{
767 if (val)
768 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
769}
770
771// Write a state file
772static bool write_dev_state(const char * path, const persistent_dev_state & state)
773{
774 // Rename old "file" to "file~"
775 std::string pathbak = path; pathbak += '~';
776 unlink(pathbak.c_str());
777 rename(path, pathbak.c_str());
778
779 stdio_file f(path, "w");
780 if (!f) {
781 pout("Cannot create state file \"%s\"\n", path);
782 return false;
783 }
784
785 fprintf(f, "# smartd state file\n");
786 write_dev_state_line(f, "temperature-min", state.tempmin);
787 write_dev_state_line(f, "temperature-max", state.tempmax);
788 write_dev_state_line(f, "self-test-errors", state.selflogcount);
789 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
790 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
791 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
792 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
793
794 for (int i = 0; i < SMARTD_NMAIL; i++) {
795 if (i == MAILTYPE_TEST) // Don't suppress test mails
796 continue;
797 const mailinfo & mi = state.maillog[i];
798 if (!mi.logged)
799 continue;
800 write_dev_state_line(f, "mail", i, "count", mi.logged);
801 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
802 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
803 }
804
805 // ATA ONLY
806 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
807
808 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
809 const auto & pa = state.ata_attributes[i];
810 if (!pa.id)
811 continue;
812 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
813 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
814 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
815 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
816 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
817 }
818
819 // NVMe only
820 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
821
822 return true;
823}
824
825// Write to the attrlog file
826static bool write_dev_attrlog(const char * path, const dev_state & state)
827{
828 stdio_file f(path, "a");
829 if (!f) {
830 pout("Cannot create attribute log file \"%s\"\n", path);
831 return false;
832 }
833
834
835 time_t now = time(nullptr);
836 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
837 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
838 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
839 tms->tm_hour, tms->tm_min, tms->tm_sec);
840 // ATA ONLY
841 for (const auto & pa : state.ata_attributes) {
842 if (!pa.id)
843 continue;
844 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
845 }
846 // SCSI ONLY
847 const struct scsiErrorCounter * ecp;
848 const char * pageNames[3] = {"read", "write", "verify"};
849 for (int k = 0; k < 3; ++k) {
850 if ( !state.scsi_error_counters[k].found ) continue;
851 ecp = &state.scsi_error_counters[k].errCounter;
852 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
853 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
854 "\t%s-corr-by-retry;%" PRIu64 ";"
855 "\t%s-total-err-corrected;%" PRIu64 ";"
856 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
857 "\t%s-gb-processed;%.3f;"
858 "\t%s-total-unc-errors;%" PRIu64 ";",
859 pageNames[k], ecp->counter[0],
860 pageNames[k], ecp->counter[1],
861 pageNames[k], ecp->counter[2],
862 pageNames[k], ecp->counter[3],
863 pageNames[k], ecp->counter[4],
864 pageNames[k], (ecp->counter[5] / 1000000000.0),
865 pageNames[k], ecp->counter[6]);
866 }
867 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
868 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
869 }
870 // write SCSI current temperature if it is monitored
871 if (state.temperature)
872 fprintf(f, "\ttemperature;%d;", state.temperature);
873 // end of line
874 fprintf(f, "\n");
875 return true;
876}
877
878// Write all state files. If write_always is false, don't write
879// unless must_write is set.
880static void write_all_dev_states(const dev_config_vector & configs,
881 dev_state_vector & states,
882 bool write_always = true)
883{
884 for (unsigned i = 0; i < states.size(); i++) {
885 const dev_config & cfg = configs.at(i);
886 if (cfg.state_file.empty())
887 continue;
888 dev_state & state = states[i];
889 if (!write_always && !state.must_write)
890 continue;
891 if (!write_dev_state(cfg.state_file.c_str(), state))
892 continue;
893 state.must_write = false;
894 if (write_always || debugmode)
895 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
896 cfg.name.c_str(), cfg.state_file.c_str());
897 }
898}
899
900// Write to all attrlog files
901static void write_all_dev_attrlogs(const dev_config_vector & configs,
902 dev_state_vector & states)
903{
904 for (unsigned i = 0; i < states.size(); i++) {
905 const dev_config & cfg = configs.at(i);
906 if (cfg.attrlog_file.empty())
907 continue;
908 dev_state & state = states[i];
909 if (state.attrlog_dirty) {
910 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
911 state.attrlog_dirty = false;
912 }
913 }
914}
915
916extern "C" { // signal handlers require C-linkage
917
918// Note if we catch a SIGUSR1
919static void USR1handler(int sig)
920{
921 if (SIGUSR1==sig)
923 return;
924}
925
926#ifdef _WIN32
927// Note if we catch a SIGUSR2
928static void USR2handler(int sig)
929{
930 if (SIGUSR2==sig)
931 caughtsigUSR2=1;
932 return;
933}
934#endif
935
936// Note if we catch a HUP (or INT in debug mode)
937static void HUPhandler(int sig)
938{
939 if (sig==SIGHUP)
940 caughtsigHUP=1;
941 else
942 caughtsigHUP=2;
943 return;
944}
945
946// signal handler for TERM, QUIT, and INT (if not in debug mode)
947static void sighandler(int sig)
948{
949 if (!caughtsigEXIT)
950 caughtsigEXIT=sig;
951 return;
952}
953
954} // extern "C"
955
956#ifdef HAVE_LIBCAP_NG
957// capabilities(7) support
958
959static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
960
961static void capabilities_drop_now()
962{
963 if (!capabilities_mode)
964 return;
965 capng_clear(CAPNG_SELECT_BOTH);
966 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
967 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
968 if (warn_as_user && (warn_uid || warn_gid)) {
969 // For popen_as_ugid()
970 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
971 CAP_SETGID, CAP_SETUID, -1);
972 }
973 if (capabilities_mode > 1) {
974 // For exim MTA
975 capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
976 CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
977 }
978 capng_apply(CAPNG_SELECT_BOTH);
979}
980
981static void capabilities_log_error_hint()
982{
983 if (!capabilities_mode)
984 return;
985 PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
986 (capabilities_mode == 1 ? "', try '--capabilities=mail'"
987 : "=mail', please inform " PACKAGE_BUGREPORT));
988}
989
990#else // HAVE_LIBCAP_NG
991// No capabilities(7) support
992
993static inline void capabilities_drop_now() { }
994static inline void capabilities_log_error_hint() { }
995
996#endif // HAVE_LIBCAP_NG
997
998// a replacement for setenv() which is not available on all platforms.
999// Note that the string passed to putenv must not be freed or made
1000// invalid, since a pointer to it is kept by putenv(). This means that
1001// it must either be a static buffer or allocated off the heap. The
1002// string can be freed if the environment variable is redefined via
1003// another call to putenv(). There is no portable way to unset a variable
1004// with putenv(). So we manage the buffer in a static object.
1005// Using setenv() if available is not considered because some
1006// implementations may produce memory leaks.
1007
1009{
1010public:
1011 env_buffer() = default;
1012 env_buffer(const env_buffer &) = delete;
1013 void operator=(const env_buffer &) = delete;
1014
1015 void set(const char * name, const char * value);
1016private:
1017 char * m_buf = nullptr;
1018};
1019
1020void env_buffer::set(const char * name, const char * value)
1021{
1022 int size = strlen(name) + 1 + strlen(value) + 1;
1023 char * newbuf = new char[size];
1024 snprintf(newbuf, size, "%s=%s", name, value);
1025
1026 if (putenv(newbuf))
1027 throw std::runtime_error("putenv() failed");
1028
1029 // This assumes that the same NAME is passed on each call
1030 delete [] m_buf;
1031 m_buf = newbuf;
1032}
1033
1034#define EBUFLEN 1024
1035
1036static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1038
1039// If either address or executable path is non-null then send and log
1040// a warning email, or execute executable
1041static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1042{
1043 // See if user wants us to send mail
1044 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1045 return;
1046
1047 // Which type of mail are we sending?
1048 static const char * const whichfail[] = {
1049 "EmailTest", // 0
1050 "Health", // 1
1051 "Usage", // 2
1052 "SelfTest", // 3
1053 "ErrorCount", // 4
1054 "FailedHealthCheck", // 5
1055 "FailedReadSmartData", // 6
1056 "FailedReadSmartErrorLog", // 7
1057 "FailedReadSmartSelfTestLog", // 8
1058 "FailedOpenDevice", // 9
1059 "CurrentPendingSector", // 10
1060 "OfflineUncorrectableSector", // 11
1061 "Temperature" // 12
1062 };
1063 STATIC_ASSERT(sizeof(whichfail) == SMARTD_NMAIL * sizeof(whichfail[0]));
1064
1065 if (!(0 <= which && which < SMARTD_NMAIL)) {
1066 PrintOut(LOG_CRIT, "Internal error in MailWarning(): which=%d\n", which);
1067 return;
1068 }
1069 mailinfo * mail = state.maillog + which;
1070
1071 // Calc current and next interval for warning reminder emails
1072 int days, nextdays;
1073 if (which == 0)
1074 days = nextdays = -1; // EmailTest
1075 else switch (cfg.emailfreq) {
1076 case emailfreqs::once:
1077 days = nextdays = -1; break;
1078 case emailfreqs::always:
1079 days = nextdays = 0; break;
1080 case emailfreqs::daily:
1081 days = nextdays = 1; break;
1083 // 0, 1, 2, 3, 4, 5, 6, 7, ... => 1, 2, 4, 8, 16, 32, 32, 32, ...
1084 nextdays = 1 << ((unsigned)mail->logged <= 5 ? mail->logged : 5);
1085 // 0, 1, 2, 3, 4, 5, 6, 7, ... => 0, 1, 2, 4, 8, 16, 32, 32, ... (0 not used below)
1086 days = ((unsigned)mail->logged <= 5 ? nextdays >> 1 : nextdays);
1087 break;
1088 default:
1089 PrintOut(LOG_CRIT, "Internal error in MailWarning(): cfg.emailfreq=%d\n", (int)cfg.emailfreq);
1090 return;
1091 }
1092
1093 time_t now = time(nullptr);
1094 if (mail->logged) {
1095 // Return if no warning reminder email needs to be sent (now)
1096 if (days < 0)
1097 return; // '-M once' or EmailTest
1098 if (days > 0 && now < mail->lastsent + days * 24 * 3600)
1099 return; // '-M daily/diminishing' and too early
1100 }
1101 else {
1102 // Record the time of this first email message
1103 mail->firstsent = now;
1104 }
1105
1106 // Record the time of this email message
1107 mail->lastsent = now;
1108
1109 // print warning string into message
1110 // Note: Message length may reach ~300 characters as device names may be
1111 // very long on certain platforms (macOS ~230 characters).
1112 // Message length must not exceed email line length limit, see RFC 5322:
1113 // "... MUST be no more than 998 characters, ... excluding the CRLF."
1114 char message[512];
1115 va_list ap;
1116 va_start(ap, fmt);
1117 vsnprintf(message, sizeof(message), fmt, ap);
1118 va_end(ap);
1119
1120 // replace commas by spaces to separate recipients
1121 std::string address = cfg.emailaddress;
1122 std::replace(address.begin(), address.end(), ',', ' ');
1123
1124 // Export information in environment variables that will be useful
1125 // for user scripts
1126 const char * executable = cfg.emailcmdline.c_str();
1127 static env_buffer env[13];
1128 env[0].set("SMARTD_MAILER", executable);
1129 env[1].set("SMARTD_MESSAGE", message);
1130 char dates[DATEANDEPOCHLEN];
1131 snprintf(dates, sizeof(dates), "%d", mail->logged);
1132 env[2].set("SMARTD_PREVCNT", dates);
1133 dateandtimezoneepoch(dates, mail->firstsent);
1134 env[3].set("SMARTD_TFIRST", dates);
1135 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1136 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1137 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1138 env[6].set("SMARTD_ADDRESS", address.c_str());
1139 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1140
1141 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1142 env[8].set("SMARTD_DEVICETYPE",
1143 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1144 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1145
1146 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1147 dates[0] = 0;
1148 if (nextdays >= 0)
1149 snprintf(dates, sizeof(dates), "%d", nextdays);
1150 env[11].set("SMARTD_NEXTDAYS", dates);
1151 // Avoid false positive recursion detection by smartd_warning.{sh,cmd}
1152 env[12].set("SMARTD_SUBJECT", "");
1153
1154 // now construct a command to send this as EMAIL
1155 if (!*executable)
1156 executable = "<mail>";
1157 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1158 const char * newwarn = (which? "Warning via" : "Test of");
1159
1160 char command[256];
1161#ifdef _WIN32
1162 // Path may contain spaces
1163 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1164#else
1165 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1166#endif
1167
1168 // tell SYSLOG what we are about to do...
1169 PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
1170 (which ? "Sending warning via" : "Executing test of"), executable, newadd,
1171 (
1172#ifdef HAVE_POSIX_API
1173 warn_as_user ?
1174 strprintf(" (uid=%u(%s) gid=%u(%s))",
1175 (unsigned)warn_uid, warn_uname.c_str(),
1176 (unsigned)warn_gid, warn_gname.c_str() ).c_str() :
1177#elif defined(_WIN32)
1178 warn_as_restr_user ? " (restricted user)" :
1179#endif
1180 ""
1181 )
1182 );
1183
1184 // issue the command to send mail or to run the user's executable
1185 errno=0;
1186 FILE * pfp;
1187
1188#ifdef HAVE_POSIX_API
1189 if (warn_as_user) {
1190 pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
1191 } else
1192#endif
1193 {
1194#ifdef _WIN32
1195 pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
1196#else
1197 pfp = popen(command, "r");
1198#endif
1199 }
1200
1201 if (!pfp)
1202 // failed to popen() mail process
1203 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1204 newwarn, executable, newadd, errno?strerror(errno):"");
1205 else {
1206 // pipe succeeded!
1207 int len;
1208 char buffer[EBUFLEN];
1209
1210 // if unexpected output on stdout/stderr, null terminate, print, and flush
1211 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1212 int count=0;
1213 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1214 buffer[newlen]='\0';
1215 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1216 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1217
1218 // flush pipe if needed
1219 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1220 count++;
1221
1222 // tell user that pipe was flushed, or that something is really wrong
1223 if (count && count<EBUFLEN)
1224 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1225 newwarn, executable, newadd);
1226 else if (count)
1227 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1228 newwarn, executable, newadd);
1229 }
1230
1231 // if something went wrong with mail process, print warning
1232 errno=0;
1233 int status;
1234
1235#ifdef HAVE_POSIX_API
1236 if (warn_as_user) {
1237 status = pclose_as_ugid(pfp);
1238 } else
1239#endif
1240 {
1241 status = pclose(pfp);
1242 }
1243
1244 if (status == -1)
1245 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1246 errno?strerror(errno):"");
1247 else {
1248 // mail process apparently succeeded. Check and report exit status
1249 if (WIFEXITED(status)) {
1250 // exited 'normally' (but perhaps with nonzero status)
1251 int status8 = WEXITSTATUS(status);
1252 if (status8>128)
1253 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1254 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1255 else if (status8) {
1256 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1257 newwarn, executable, newadd, status, status8);
1259 }
1260 else
1261 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1262 }
1263
1264 if (WIFSIGNALED(status))
1265 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1266 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1267
1268 // this branch is probably not possible. If subprocess is
1269 // stopped then pclose() should not return.
1270 if (WIFSTOPPED(status))
1271 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1272 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1273
1274 }
1275 }
1276
1277 // increment mail sent counter
1278 mail->logged++;
1279}
1280
1281static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1283
1284static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1285{
1286 if (!(0 <= which && which < SMARTD_NMAIL))
1287 return;
1288
1289 // Return if no mail sent yet
1290 mailinfo & mi = state.maillog[which];
1291 if (!mi.logged)
1292 return;
1293
1294 // Format & print message
1295 char msg[256];
1296 va_list ap;
1297 va_start(ap, fmt);
1298 vsnprintf(msg, sizeof(msg), fmt, ap);
1299 va_end(ap);
1300
1301 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1302 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1303
1304 // Clear mail counter and timestamps
1305 mi = mailinfo();
1306 state.must_write = true;
1307}
1308
1309#ifndef _WIN32
1310
1311// Output multiple lines via separate syslog(3) calls.
1313static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1314{
1315 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1316 vsnprintf(buf, sizeof(buf), fmt, ap);
1317
1318 for (char * p = buf, * q; p && *p; p = q) {
1319 if ((q = strchr(p, '\n')))
1320 *q++ = 0;
1321 if (*p)
1322 syslog(priority, "%s\n", p);
1323 }
1324}
1325
1326#else // _WIN32
1327// os_win32/syslog_win32.cpp supports multiple lines.
1328#define vsyslog_lines vsyslog
1329#endif // _WIN32
1330
1331// Printing function for watching ataprint commands, or losing them
1332// [From GLIBC Manual: Since the prototype doesn't specify types for
1333// optional arguments, in a call to a variadic function the default
1334// argument promotions are performed on the optional argument
1335// values. This means the objects of type char or short int (whether
1336// signed or not) are promoted to either int or unsigned int, as
1337// appropriate.]
1338void pout(const char *fmt, ...){
1339 va_list ap;
1340
1341 // get the correct time in syslog()
1343 // initialize variable argument list
1344 va_start(ap,fmt);
1345 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1346 if (debugmode && debugmode != 2) {
1347 FILE * f = stdout;
1348#ifdef _WIN32
1349 if (facility == LOG_LOCAL1) // logging to stdout
1350 f = stderr;
1351#endif
1352 vfprintf(f, fmt, ap);
1353 fflush(f);
1354 }
1355 // in debugmode==2 mode we print output from knowndrives.o functions
1356 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1357 openlog("smartd", LOG_PID, facility);
1358 vsyslog_lines(LOG_INFO, fmt, ap);
1359 closelog();
1360 }
1361 va_end(ap);
1362 return;
1363}
1364
1365// This function prints either to stdout or to the syslog as needed.
1366static void PrintOut(int priority, const char *fmt, ...){
1367 va_list ap;
1368
1369 // get the correct time in syslog()
1371 // initialize variable argument list
1372 va_start(ap,fmt);
1373 if (debugmode) {
1374 FILE * f = stdout;
1375#ifdef _WIN32
1376 if (facility == LOG_LOCAL1) // logging to stdout
1377 f = stderr;
1378#endif
1379 vfprintf(f, fmt, ap);
1380 fflush(f);
1381 }
1382 else {
1383 openlog("smartd", LOG_PID, facility);
1384 vsyslog_lines(priority, fmt, ap);
1385 closelog();
1386 }
1387 va_end(ap);
1388 return;
1389}
1390
1391// Used to warn users about invalid checksums. Called from atacmds.cpp.
1392void checksumwarning(const char * string)
1393{
1394 pout("Warning! %s error: invalid SMART checksum.\n", string);
1395}
1396
1397#ifndef _WIN32
1398
1399// Wait for the pid file to show up, this makes sure a calling program knows
1400// that the daemon is really up and running and has a pid to kill it
1401static bool WaitForPidFile()
1402{
1403 int waited, max_wait = 10;
1404 struct stat stat_buf;
1405
1406 if (pid_file.empty() || debugmode)
1407 return true;
1408
1409 for(waited = 0; waited < max_wait; ++waited) {
1410 if (!stat(pid_file.c_str(), &stat_buf)) {
1411 return true;
1412 } else
1413 sleep(1);
1414 }
1415 return false;
1416}
1417
1418#endif // _WIN32
1419
1420// Forks new process if needed, closes ALL file descriptors,
1421// redirects stdin, stdout, and stderr. Not quite daemon().
1422// See https://www.linuxjournal.com/article/2335
1423// for a good description of why we do things this way.
1424static int daemon_init()
1425{
1426#ifndef _WIN32
1427
1428 // flush all buffered streams. Else we might get two copies of open
1429 // streams since both parent and child get copies of the buffers.
1430 fflush(nullptr);
1431
1432 if (do_fork) {
1433 pid_t pid;
1434 if ((pid=fork()) < 0) {
1435 // unable to fork!
1436 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1437 return EXIT_STARTUP;
1438 }
1439 if (pid) {
1440 // we are the parent process, wait for pid file, then exit cleanly
1441 if(!WaitForPidFile()) {
1442 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1443 return EXIT_STARTUP;
1444 }
1445 return 0;
1446 }
1447
1448 // from here on, we are the child process.
1449 setsid();
1450
1451 // Fork one more time to avoid any possibility of having terminals
1452 if ((pid=fork()) < 0) {
1453 // unable to fork!
1454 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1455 return EXIT_STARTUP;
1456 }
1457 if (pid)
1458 // we are the parent process -- exit cleanly
1459 return 0;
1460
1461 // Now we are the child's child...
1462 }
1463
1464 // close any open file descriptors
1465 for (int i = sysconf(_SC_OPEN_MAX); --i >= 0; )
1466 close(i);
1467
1468 // redirect any IO attempts to /dev/null and change to root directory
1469 int fd = open("/dev/null", O_RDWR);
1470 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1471 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1472 return EXIT_STARTUP;
1473 }
1474 umask(0022);
1475
1476 if (do_fork)
1477 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1478
1479#else // _WIN32
1480
1481 // No fork() on native Win32
1482 // Detach this process from console
1483 fflush(nullptr);
1484 if (daemon_detach("smartd")) {
1485 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1486 return EXIT_STARTUP;
1487 }
1488 // stdin/out/err now closed if not redirected
1489
1490#endif // _WIN32
1491
1492 // No error, continue in main_worker()
1493 return -1;
1494}
1495
1496// create a PID file containing the current process id
1497static bool write_pid_file()
1498{
1499 if (!pid_file.empty()) {
1500 pid_t pid = getpid();
1501 mode_t old_umask;
1502#ifndef __CYGWIN__
1503 old_umask = umask(0077); // rwx------
1504#else
1505 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1506 old_umask = umask(0033); // rwxr--r--
1507#endif
1508
1509 stdio_file f(pid_file.c_str(), "w");
1510 umask(old_umask);
1511 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1512 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1513 return false;
1514 }
1515 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1516 }
1517 return true;
1518}
1519
1520// Prints header identifying version of code and home
1521static void PrintHead()
1522{
1523 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1524}
1525
1526// prints help info for configuration file Directives
1527static void Directives()
1528{
1529 PrintOut(LOG_INFO,
1530 "Configuration file (%s) Directives (after device name):\n"
1531 " -d TYPE Set the device type: auto, ignore, removable,\n"
1532 " %s\n"
1533 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1534 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1535 " -S VAL Enable/disable attribute autosave (on/off)\n"
1536 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1537 " -H Monitor SMART Health Status, report if failed\n"
1538 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1539 " -l TYPE Monitor SMART log or self-test status:\n"
1540 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1541 " -l scterc,R,W Set SCT Error Recovery Control\n"
1542 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1543 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1544 " -f Monitor 'Usage' Attributes, report failures\n"
1545 " -m ADD Send email warning to address ADD\n"
1546 " -M TYPE Modify email warning behavior (see man page)\n"
1547 " -p Report changes in 'Prefailure' Attributes\n"
1548 " -u Report changes in 'Usage' Attributes\n"
1549 " -t Equivalent to -p and -u Directives\n"
1550 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1551 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1552 " -i ID Ignore Attribute ID for -f Directive\n"
1553 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1554 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1555 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1556 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1557 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1558 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1559 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1560 " -F TYPE Use firmware bug workaround:\n"
1561 " %s\n"
1562 " -c i=N Set interval between disk checks to N seconds\n"
1563 " # Comment: text after a hash sign is ignored\n"
1564 " \\ Line continuation character\n"
1565 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1566 "Use ID = 0 to turn off -C and/or -U Directives\n"
1567 "Example: /dev/sda -a\n",
1568 configfile,
1569 smi()->get_valid_dev_types_str().c_str(),
1571}
1572
1573/* Returns a pointer to a static string containing a formatted list of the valid
1574 arguments to the option opt or nullptr on failure. */
1575static const char *GetValidArgList(char opt)
1576{
1577 switch (opt) {
1578 case 'A':
1579 case 's':
1580 return "<PATH_PREFIX>, -";
1581 case 'B':
1582 return "[+]<FILE_NAME>";
1583 case 'c':
1584 return "<FILE_NAME>, -";
1585 case 'l':
1586 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1587 case 'q':
1588 return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
1589 case 'r':
1590 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1591 case 'p':
1592 case 'w':
1593 return "<FILE_NAME>";
1594 case 'i':
1595 return "<INTEGER_SECONDS>";
1596#ifdef HAVE_POSIX_API
1597 case 'u':
1598 return "<USER>[:<GROUP>], -";
1599#elif defined(_WIN32)
1600 case 'u':
1601 return "restricted, unchanged";
1602#endif
1603#ifdef HAVE_LIBCAP_NG
1604 case 'C':
1605 return "mail, <no_argument>";
1606#endif
1607 default:
1608 return nullptr;
1609 }
1610}
1611
1612/* prints help information for command syntax */
1613static void Usage()
1614{
1615 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1616#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1617 PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
1618#else
1619 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1620#endif
1621 PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
1622#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1623 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
1624#endif
1625 PrintOut(LOG_INFO,"\n");
1626 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1627 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1628 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1629#ifdef SMARTMONTOOLS_DRIVEDBDIR
1630 PrintOut(LOG_INFO,"\n");
1631 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1632#endif
1633 PrintOut(LOG_INFO,"]\n\n");
1634 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1635 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1636 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1637#ifdef HAVE_LIBCAP_NG
1638 PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
1639 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1640 " Warning: Mail notification may not work when used.\n\n");
1641#endif
1642 PrintOut(LOG_INFO," -d, --debug\n");
1643 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1644 PrintOut(LOG_INFO," -D, --showdirectives\n");
1645 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1646 PrintOut(LOG_INFO," -h, --help, --usage\n");
1647 PrintOut(LOG_INFO," Display this help and exit\n\n");
1648 PrintOut(LOG_INFO," -i N, --interval=N\n");
1649 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1650 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1651#ifndef _WIN32
1652 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1653#else
1654 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1655#endif
1656#ifndef _WIN32
1657 PrintOut(LOG_INFO," -n, --no-fork\n");
1658 PrintOut(LOG_INFO," Do not fork into background\n");
1659#ifdef HAVE_LIBSYSTEMD
1660 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1661#endif // HAVE_LIBSYSTEMD
1662 PrintOut(LOG_INFO,"\n");
1663#endif // WIN32
1664 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1665 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1666 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1667 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1668 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1669 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1670#ifdef SMARTMONTOOLS_SAVESTATES
1671 PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
1672#else
1673 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1674#endif
1675 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1676#ifdef SMARTMONTOOLS_SAVESTATES
1677 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1678#endif
1679 PrintOut(LOG_INFO,"\n");
1680 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1681 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1682#ifndef _WIN32
1683 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1684#else
1685 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1686#endif
1687#ifdef HAVE_POSIX_API
1688 PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
1689 PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
1690#elif defined(_WIN32)
1691 PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
1692 PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
1693#endif
1694#ifdef _WIN32
1695 PrintOut(LOG_INFO," --service\n");
1696 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1697 PrintOut(LOG_INFO," smartd install [options]\n");
1698 PrintOut(LOG_INFO," Remove service with:\n");
1699 PrintOut(LOG_INFO," smartd remove\n\n");
1700#endif // _WIN32
1701 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1702 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1703}
1704
1705static int CloseDevice(smart_device * device, const char * name)
1706{
1707 if (!device->close()){
1708 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1709 return 1;
1710 }
1711 // device successfully closed
1712 return 0;
1713}
1714
1715// Replace invalid characters in cfg.dev_idinfo
1716static bool sanitize_dev_idinfo(std::string & s)
1717{
1718 bool changed = false;
1719 for (unsigned i = 0; i < s.size(); i++) {
1720 char c = s[i];
1721 STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
1722 // Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
1723 if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
1724 continue;
1725 s[i] = '?';
1726 changed = true;
1727 }
1728 return changed;
1729}
1730
1731// return true if a char is not allowed in a state file name
1732static bool not_allowed_in_filename(char c)
1733{
1734 return !( ('0' <= c && c <= '9')
1735 || ('A' <= c && c <= 'Z')
1736 || ('a' <= c && c <= 'z'));
1737}
1738
1739// Read error count from Summary or Extended Comprehensive SMART error log
1740// Return -1 on error
1741static int read_ata_error_count(ata_device * device, const char * name,
1742 firmwarebug_defs firmwarebugs, bool extended)
1743{
1744 if (!extended) {
1746 if (ataReadErrorLog(device, &log, firmwarebugs)){
1747 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1748 return -1;
1749 }
1750 return (log.error_log_pointer ? log.ata_error_count : 0);
1751 }
1752 else {
1754 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1755 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1756 return -1;
1757 }
1758 // Some disks use the reserved byte as index, see ataprint.cpp.
1759 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1760 }
1761}
1762
1763// returns <0 if problem. Otherwise, bottom 8 bits are the self test
1764// error count, and top bits are the power-on hours of the last error.
1765static int SelfTestErrorCount(ata_device * device, const char * name,
1766 firmwarebug_defs firmwarebugs)
1767{
1768 struct ata_smart_selftestlog log;
1769
1770 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1771 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1772 return -1;
1773 }
1774
1775 if (!log.mostrecenttest)
1776 // No tests logged
1777 return 0;
1778
1779 // Count failed self-tests
1780 int errcnt = 0, hours = 0;
1781 for (int i = 20; i >= 0; i--) {
1782 int j = (i + log.mostrecenttest) % 21;
1784 if (!nonempty(&entry, sizeof(entry)))
1785 continue;
1786
1787 int status = entry.selfteststatus >> 4;
1788 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1789 // First successful extended self-test, stop count
1790 break;
1791
1792 if (0x3 <= status && status <= 0x8) {
1793 // Self-test showed an error
1794 errcnt++;
1795 // Keep track of time of most recent error
1796 if (!hours)
1797 hours = entry.timestamp;
1798 }
1799 }
1800
1801 return ((hours << 8) | errcnt);
1802}
1803
1804#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1805#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1806
1807// Check offline data collection status
1808static inline bool is_offl_coll_in_progress(unsigned char status)
1809{
1810 return ((status & 0x7f) == 0x03);
1811}
1812
1813// Check self-test execution status
1814static inline bool is_self_test_in_progress(unsigned char status)
1815{
1816 return ((status >> 4) == 0xf);
1817}
1818
1819// Log offline data collection status
1820static void log_offline_data_coll_status(const char * name, unsigned char status)
1821{
1822 const char * msg;
1823 switch (status & 0x7f) {
1824 case 0x00: msg = "was never started"; break;
1825 case 0x02: msg = "was completed without error"; break;
1826 case 0x03: msg = "is in progress"; break;
1827 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1828 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1829 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1830 default: msg = nullptr;
1831 }
1832
1833 if (msg)
1834 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1835 "Device: %s, offline data collection %s%s\n", name, msg,
1836 ((status & 0x80) ? " (auto:on)" : ""));
1837 else
1838 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1839 name, status);
1840}
1841
1842// Log self-test execution status
1843static void log_self_test_exec_status(const char * name, unsigned char status)
1844{
1845 const char * msg;
1846 switch (status >> 4) {
1847 case 0x0: msg = "completed without error"; break;
1848 case 0x1: msg = "was aborted by the host"; break;
1849 case 0x2: msg = "was interrupted by the host with a reset"; break;
1850 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1851 case 0x4: msg = "completed with error (unknown test element)"; break;
1852 case 0x5: msg = "completed with error (electrical test element)"; break;
1853 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1854 case 0x7: msg = "completed with error (read test element)"; break;
1855 case 0x8: msg = "completed with error (handling damage?)"; break;
1856 default: msg = nullptr;
1857 }
1858
1859 if (msg)
1860 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1861 "Device: %s, previous self-test %s\n", name, msg);
1862 else if ((status >> 4) == 0xf)
1863 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1864 name, status & 0x0f);
1865 else
1866 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1867 name, status);
1868}
1869
1870// Check pending sector count id (-C, -U directives).
1871static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1872 unsigned char id, const char * msg)
1873{
1874 // Check attribute index
1875 int i = ata_find_attr_index(id, state.smartval);
1876 if (i < 0) {
1877 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1878 cfg.name.c_str(), msg, id);
1879 return false;
1880 }
1881
1882 // Check value
1883 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1884 cfg.attribute_defs);
1885 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1886 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1887 cfg.name.c_str(), msg, id, rawval, rawval);
1888 return false;
1889 }
1890
1891 return true;
1892}
1893
1894// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1895static void finish_device_scan(dev_config & cfg, dev_state & state)
1896{
1897 // Set cfg.emailfreq if user hasn't set it
1898 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && cfg.emailfreq == emailfreqs::unknown) {
1899 // Avoid that emails are suppressed forever due to state persistence
1900 if (cfg.state_file.empty())
1902 else
1904 }
1905
1906 // Start self-test regex check now if time was not read from state file
1907 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1908 state.scheduled_test_next_check = time(nullptr);
1909}
1910
1911// Common function to format result message for ATA setting
1912static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1913 int set_option = 0, bool has_value = false)
1914{
1915 if (!msg.empty())
1916 msg += ", ";
1917 msg += name;
1918 if (!ok)
1919 msg += ":--";
1920 else if (set_option < 0)
1921 msg += ":off";
1922 else if (has_value)
1923 msg += strprintf(":%d", set_option-1);
1924 else if (set_option > 0)
1925 msg += ":on";
1926}
1927
1928// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1929static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1930{
1931 if (!cfg.id_is_unique)
1932 return false;
1933
1934 for (const auto & prev_cfg : prev_cfgs) {
1935 if (!prev_cfg.id_is_unique)
1936 continue;
1937 if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
1938 continue;
1939
1940 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1941 cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
1942 return true;
1943 }
1944
1945 return false;
1946}
1947
1948// TODO: Add '-F swapid' directive
1949const bool fix_swapped_id = false;
1950
1951// scan to see what ata devices there are, and if they support SMART
1952static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1953 const dev_config_vector * prev_cfgs)
1954{
1955 int supported=0;
1956 struct ata_identify_device drive;
1957 const char *name = cfg.name.c_str();
1958 int retid;
1959
1960 // Device must be open
1961
1962 // Get drive identity structure
1963 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1964 if (retid<0)
1965 // Unable to read Identity structure
1966 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1967 else
1968 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1969 name, packetdevicetype(retid-1));
1970 CloseDevice(atadev, name);
1971 return 2;
1972 }
1973
1974 // Get drive identity, size and rotation rate (HDD/SSD)
1975 char model[40+1], serial[20+1], firmware[8+1];
1976 ata_format_id_string(model, drive.model, sizeof(model)-1);
1977 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1978 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1979
1980 ata_size_info sizes;
1981 ata_get_size_info(&drive, sizes);
1982 state.num_sectors = sizes.sectors;
1983 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1984
1985 char wwn[64]; wwn[0] = 0;
1986 unsigned oui = 0; uint64_t unique_id = 0;
1987 int naa = ata_get_wwn(&drive, oui, unique_id);
1988 if (naa >= 0)
1989 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1990
1991 // Format device id string for warning emails
1992 char cap[32];
1993 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1994 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1995 cfg.id_is_unique = true; // TODO: Check serial?
1997 cfg.id_is_unique = false;
1998
1999 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2000
2001 // Check for duplicates
2002 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2003 CloseDevice(atadev, name);
2004 return 1;
2005 }
2006
2007 // Show if device in database, and use preset vendor attribute
2008 // options unless user has requested otherwise.
2009 if (cfg.ignorepresets)
2010 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
2011 else {
2012 // Apply vendor specific presets, print warning if present
2013 std::string dbversion;
2015 &drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
2016 if (!dbentry)
2017 PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
2018 (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
2019 else {
2020 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
2021 name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
2022 (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
2023 if (*dbentry->warningmsg)
2024 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
2025 }
2026 }
2027
2028 // Check for ATA Security LOCK
2029 unsigned short word128 = drive.words088_255[128-88];
2030 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
2031 if (locked)
2032 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
2033
2034 // Set default '-C 197[+]' if no '-C ID' is specified.
2035 if (!cfg.curr_pending_set)
2037 // Set default '-U 198[+]' if no '-U ID' is specified.
2038 if (!cfg.offl_pending_set)
2040
2041 // If requested, show which presets would be used for this drive
2042 if (cfg.showpresets) {
2043 int savedebugmode=debugmode;
2044 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2045 if (!debugmode)
2046 debugmode=2;
2047 show_presets(&drive);
2048 debugmode=savedebugmode;
2049 }
2050
2051 // see if drive supports SMART
2052 supported=ataSmartSupport(&drive);
2053 if (supported!=1) {
2054 if (supported==0)
2055 // drive does NOT support SMART
2056 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2057 else
2058 // can't tell if drive supports SMART
2059 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2060
2061 // should we proceed anyway?
2062 if (cfg.permissive) {
2063 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2064 }
2065 else {
2066 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2067 CloseDevice(atadev, name);
2068 return 2;
2069 }
2070 }
2071
2072 if (ataEnableSmart(atadev)) {
2073 // Enable SMART command has failed
2074 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2075
2076 if (ataIsSmartEnabled(&drive) <= 0) {
2077 if (!cfg.permissive) {
2078 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2079 CloseDevice(atadev, name);
2080 return 2;
2081 }
2082 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2083 }
2084 else {
2085 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2086 }
2087 }
2088
2089 // disable device attribute autosave...
2090 if (cfg.autosave==1) {
2091 if (ataDisableAutoSave(atadev))
2092 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2093 else
2094 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2095 }
2096
2097 // or enable device attribute autosave
2098 if (cfg.autosave==2) {
2099 if (ataEnableAutoSave(atadev))
2100 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2101 else
2102 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2103 }
2104
2105 // capability check: SMART status
2106 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2107 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2108 cfg.smartcheck = false;
2109 }
2110
2111 // capability check: Read smart values and thresholds. Note that
2112 // smart values are ALSO needed even if we ONLY want to know if the
2113 // device is self-test log or error-log capable! After ATA-5, this
2114 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2115 // but sadly not for ATA-5. Sigh.
2116
2117 // do we need to get SMART data?
2118 bool smart_val_ok = false;
2119 if ( cfg.autoofflinetest || cfg.selftest
2120 || cfg.errorlog || cfg.xerrorlog
2121 || cfg.offlinests || cfg.selfteststs
2122 || cfg.usagefailed || cfg.prefail || cfg.usage
2123 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2124 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2125
2126 if (ataReadSmartValues(atadev, &state.smartval)) {
2127 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2128 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2129 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2130 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2131 }
2132 else {
2133 smart_val_ok = true;
2134 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2135 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2136 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2137 cfg.usagefailed = false;
2138 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2139 memset(&state.smartthres, 0, sizeof(state.smartthres));
2140 }
2141 }
2142
2143 // see if the necessary Attribute is there to monitor offline or
2144 // current pending sectors or temperature
2145 if ( cfg.curr_pending_id
2146 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2147 "Current_Pending_Sector"))
2148 cfg.curr_pending_id = 0;
2149
2150 if ( cfg.offl_pending_id
2151 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2152 "Offline_Uncorrectable"))
2153 cfg.offl_pending_id = 0;
2154
2155 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2157 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2158 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2159 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2160 }
2161
2162 // Report ignored '-r' or '-R' directives
2163 for (int id = 1; id <= 255; id++) {
2165 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2166 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2167 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2168
2169 int idx = ata_find_attr_index(id, state.smartval);
2170 if (idx < 0)
2171 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2172 else {
2173 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2174 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2175 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2176 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2177 }
2178 }
2179 }
2180 }
2181
2182 // enable/disable automatic on-line testing
2183 if (cfg.autoofflinetest) {
2184 // is this an enable or disable request?
2185 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2186 if (!smart_val_ok)
2187 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2188 else {
2189 // if command appears unsupported, issue a warning...
2190 if (!isSupportAutomaticTimer(&state.smartval))
2191 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2192 // ... but then try anyway
2193 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2194 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2195 else
2196 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2197 }
2198 }
2199
2200 // Read log directories if required for capability check
2201 ata_smart_log_directory smart_logdir, gp_logdir;
2202 bool smart_logdir_ok = false, gp_logdir_ok = false;
2203
2205 && (cfg.errorlog || cfg.selftest)
2206 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2207 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2208 smart_logdir_ok = true;
2209 }
2210
2211 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2212 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2213 gp_logdir_ok = true;
2214 }
2215
2216 // capability check: self-test-log
2217 state.selflogcount = 0; state.selfloghour = 0;
2218 if (cfg.selftest) {
2219 int retval;
2220 if (!( cfg.permissive
2221 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2222 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2223 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2224 cfg.selftest = false;
2225 }
2226 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2227 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2228 cfg.selftest = false;
2229 }
2230 else {
2231 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2232 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2233 }
2234 }
2235
2236 // capability check: ATA error log
2237 state.ataerrorcount = 0;
2238 if (cfg.errorlog) {
2239 int errcnt1;
2240 if (!( cfg.permissive
2241 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2242 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2243 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2244 cfg.errorlog = false;
2245 }
2246 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2247 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2248 cfg.errorlog = false;
2249 }
2250 else
2251 state.ataerrorcount = errcnt1;
2252 }
2253
2254 if (cfg.xerrorlog) {
2255 int errcnt2;
2256 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2257 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2258 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2259 name);
2260 cfg.xerrorlog = false;
2261 }
2262 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2263 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2264 cfg.xerrorlog = false;
2265 }
2266 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2267 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2268 name, state.ataerrorcount, errcnt2);
2269 // Record max error count
2270 if (errcnt2 > state.ataerrorcount)
2271 state.ataerrorcount = errcnt2;
2272 }
2273 else
2274 state.ataerrorcount = errcnt2;
2275 }
2276
2277 // capability check: self-test and offline data collection status
2278 if (cfg.offlinests || cfg.selfteststs) {
2279 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2280 if (cfg.offlinests)
2281 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2282 if (cfg.selfteststs)
2283 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2284 cfg.offlinests = cfg.selfteststs = false;
2285 }
2286 }
2287
2288 // capabilities check -- does it support powermode?
2289 if (cfg.powermode) {
2290 int powermode = ataCheckPowerMode(atadev);
2291
2292 if (-1 == powermode) {
2293 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2294 cfg.powermode=0;
2295 }
2296 else if (powermode!=0x00 && powermode!=0x01
2297 && powermode!=0x40 && powermode!=0x41
2298 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2299 && powermode!=0xff) {
2300 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2301 name, powermode);
2302 cfg.powermode=0;
2303 }
2304 }
2305
2306 // Apply ATA settings
2307 std::string msg;
2308
2309 if (cfg.set_aam)
2310 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2311 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2312 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2313
2314 if (cfg.set_apm)
2315 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2316 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2317 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2318
2319 if (cfg.set_lookahead)
2320 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2322 cfg.set_lookahead);
2323
2324 if (cfg.set_wcache)
2325 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2327
2328 if (cfg.set_dsn)
2329 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2330 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2331
2332 if (cfg.set_security_freeze)
2333 format_set_result_msg(msg, "Security freeze",
2335
2336 if (cfg.set_standby)
2337 format_set_result_msg(msg, "Standby",
2338 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2339
2340 // Report as one log entry
2341 if (!msg.empty())
2342 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2343
2344 // set SCT Error Recovery Control if requested
2345 if (cfg.sct_erc_set) {
2347 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2348 name);
2349 else if (locked)
2350 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2351 name);
2352 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
2353 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
2354 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2355 else
2356 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2357 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2358 }
2359
2360 // If no tests available or selected, return
2361 if (!( cfg.smartcheck || cfg.selftest
2362 || cfg.errorlog || cfg.xerrorlog
2363 || cfg.offlinests || cfg.selfteststs
2364 || cfg.usagefailed || cfg.prefail || cfg.usage
2365 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2366 CloseDevice(atadev, name);
2367 return 3;
2368 }
2369
2370 // tell user we are registering device
2371 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2372
2373 // close file descriptor
2374 CloseDevice(atadev, name);
2375
2376 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2377 // Build file name for state file
2378 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2379 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2380 if (!state_path_prefix.empty()) {
2381 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2382 // Read previous state
2383 if (read_dev_state(cfg.state_file.c_str(), state)) {
2384 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2385 // Copy ATA attribute values to temp state
2386 state.update_temp_state();
2387 }
2388 }
2389 if (!attrlog_path_prefix.empty())
2390 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2391 }
2392
2393 finish_device_scan(cfg, state);
2394
2395 return 0;
2396}
2397
2398// on success, return 0. On failure, return >0. Never return <0,
2399// please.
2400static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2401 const dev_config_vector * prev_cfgs)
2402{
2403 int err, req_len, avail_len, version, len;
2404 const char *device = cfg.name.c_str();
2405 struct scsi_iec_mode_page iec;
2406 uint8_t tBuf[64];
2407 uint8_t inqBuf[96];
2408 uint8_t vpdBuf[252];
2409 char lu_id[64], serial[256], vendor[40], model[40];
2410
2411 // Device must be open
2412 memset(inqBuf, 0, 96);
2413 req_len = 36;
2414 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2415 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2416 req_len = 64;
2417 int err64;
2418 if ((err64 = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2419 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2420 "skip device [err=%d, %d]\n", device, err, err64);
2421 return 2;
2422 }
2423 }
2424 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2425
2426 avail_len = inqBuf[4] + 5;
2427 len = (avail_len < req_len) ? avail_len : req_len;
2428 if (len < 36) {
2429 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2430 "skip device\n", device);
2431 return 2;
2432 }
2433
2434 int pdt = inqBuf[0] & 0x1f;
2435
2436 switch (pdt) {
2438 case SCSI_PT_WO:
2439 case SCSI_PT_CDROM:
2440 case SCSI_PT_OPTICAL:
2441 case SCSI_PT_RBC: /* Reduced Block commands */
2442 case SCSI_PT_HOST_MANAGED: /* Zoned disk */
2443 break;
2444 default:
2445 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2446 "skip\n", device, pdt);
2447 return 2;
2448 }
2449
2451 delete supported_vpd_pages_p;
2452 supported_vpd_pages_p = nullptr;
2453 }
2455
2456 lu_id[0] = '\0';
2457 if (version >= 0x3) {
2458 /* SPC to SPC-5, assume SPC-6 is version==8 or higher */
2460 vpdBuf, sizeof(vpdBuf))) {
2461 len = vpdBuf[3];
2462 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
2463 }
2464 }
2465 serial[0] = '\0';
2467 vpdBuf, sizeof(vpdBuf))) {
2468 len = vpdBuf[3];
2469 vpdBuf[4 + len] = '\0';
2470 scsi_format_id_string(serial, &vpdBuf[4], len);
2471 }
2472
2473 char si_str[64];
2474 struct scsi_readcap_resp srr;
2475 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2476
2477 if (capacity)
2478 format_capacity(si_str, sizeof(si_str), capacity, ".");
2479 else
2480 si_str[0] = '\0';
2481
2482 // Format device id string for warning emails
2483 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2484 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2485 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2486 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2487 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2488 cfg.id_is_unique = (lu_id[0] || serial[0]);
2490 cfg.id_is_unique = false;
2491
2492 // format "model" string
2493 scsi_format_id_string(vendor, &inqBuf[8], 8);
2494 scsi_format_id_string(model, &inqBuf[16], 16);
2495 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2496
2497 // Check for duplicates
2498 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2499 CloseDevice(scsidev, device);
2500 return 1;
2501 }
2502
2503 // check that device is ready for commands. IE stores its stuff on
2504 // the media.
2505 if ((err = scsiTestUnitReady(scsidev))) {
2506 if (SIMPLE_ERR_NOT_READY == err)
2507 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2508 else if (SIMPLE_ERR_NO_MEDIUM == err)
2509 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2510 else if (SIMPLE_ERR_BECOMING_READY == err)
2511 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2512 else
2513 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2514 CloseDevice(scsidev, device);
2515 return 2;
2516 }
2517
2518 // Badly-conforming USB storage devices may fail this check.
2519 // The response to the following IE mode page fetch (current and
2520 // changeable values) is carefully examined. It has been found
2521 // that various USB devices that malform the response will lock up
2522 // if asked for a log page (e.g. temperature) so it is best to
2523 // bail out now.
2524 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2525 state.modese_len = iec.modese_len;
2526 else if (SIMPLE_ERR_BAD_FIELD == err)
2527 ; /* continue since it is reasonable not to support IE mpage */
2528 else { /* any other error (including malformed response) unreasonable */
2529 PrintOut(LOG_INFO,
2530 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2531 device, err);
2532 CloseDevice(scsidev, device);
2533 return 3;
2534 }
2535
2536 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2537 // smart if it is off). This may change to be the same as the ATA side.
2538 if (!scsi_IsExceptionControlEnabled(&iec)) {
2539 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2540 "Try 'smartctl -s on %s' to turn on SMART features\n",
2541 device, device);
2542 CloseDevice(scsidev, device);
2543 return 3;
2544 }
2545
2546 // Flag that certain log pages are supported (information may be
2547 // available from other sources).
2548 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2549 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2550 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2551 {
2552 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2553 switch (tBuf[k]) {
2554 case TEMPERATURE_LPAGE:
2555 state.TempPageSupported = 1;
2556 break;
2557 case IE_LPAGE:
2558 state.SmartPageSupported = 1;
2559 break;
2561 state.ReadECounterPageSupported = 1;
2562 break;
2565 break;
2568 break;
2571 break;
2572 default:
2573 break;
2574 }
2575 }
2576 }
2577
2578 // Check if scsiCheckIE() is going to work
2579 {
2580 uint8_t asc = 0;
2581 uint8_t ascq = 0;
2582 uint8_t currenttemp = 0;
2583 uint8_t triptemp = 0;
2584
2585 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2586 &asc, &ascq, &currenttemp, &triptemp)) {
2587 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2588 state.SuppressReport = 1;
2589 }
2590 if ( (state.SuppressReport || !currenttemp)
2591 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2592 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2593 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2594 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2595 }
2596 }
2597
2598 // capability check: self-test-log
2599 if (cfg.selftest){
2600 int retval = scsiCountFailedSelfTests(scsidev, 0);
2601 if (retval<0) {
2602 // no self-test log, turn off monitoring
2603 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2604 cfg.selftest = false;
2605 state.selflogcount = 0;
2606 state.selfloghour = 0;
2607 }
2608 else {
2609 // register starting values to watch for changes
2610 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2611 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2612 }
2613 }
2614
2615 // disable autosave (set GLTSD bit)
2616 if (cfg.autosave==1){
2617 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2618 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2619 else
2620 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2621 }
2622
2623 // or enable autosave (clear GLTSD bit)
2624 if (cfg.autosave==2){
2625 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2626 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2627 else
2628 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2629 }
2630
2631 // tell user we are registering device
2632 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2633
2634 // Make sure that init_standby_check() ignores SCSI devices
2635 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2636
2637 // close file descriptor
2638 CloseDevice(scsidev, device);
2639
2640 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2641 // Build file name for state file
2642 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2643 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2644 if (!state_path_prefix.empty()) {
2645 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2646 // Read previous state
2647 if (read_dev_state(cfg.state_file.c_str(), state)) {
2648 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2649 // Copy ATA attribute values to temp state
2650 state.update_temp_state();
2651 }
2652 }
2653 if (!attrlog_path_prefix.empty())
2654 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2655 }
2656
2657 finish_device_scan(cfg, state);
2658
2659 return 0;
2660}
2661
2662// Convert 128 bit LE integer to uint64_t or its max value on overflow.
2663static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2664{
2665 for (int i = 8; i < 16; i++) {
2666 if (val[i])
2667 return ~(uint64_t)0;
2668 }
2669 uint64_t lo = val[7];
2670 for (int i = 7-1; i >= 0; i--) {
2671 lo <<= 8; lo += val[i];
2672 }
2673 return lo;
2674}
2675
2676// Get max temperature in Kelvin reported in NVMe SMART/Health log.
2677static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2678{
2679 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2680 for (auto s : smart_log.temp_sensor) {
2681 if (s > k)
2682 k = s; // cppcheck-suppress useStlAlgorithm
2683 }
2684 return k;
2685}
2686
2687// Check the NVMe Error Information log for device related errors.
2688static bool check_nvme_error_log(const dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2689 uint64_t newcnt = 0)
2690{
2691 // Limit transfer size to one page (64 entries) to avoid problems with
2692 // limits of NVMe pass-through layer or too low MDTS values.
2693 unsigned want_entries = 64;
2694 if (want_entries > cfg.nvme_err_log_max_entries)
2695 want_entries = cfg.nvme_err_log_max_entries;
2696 raw_buffer error_log_buf(want_entries * sizeof(nvme_error_log_page));
2697 nvme_error_log_page * error_log =
2698 reinterpret_cast<nvme_error_log_page *>(error_log_buf.data());
2699 unsigned read_entries = nvme_read_error_log(nvmedev, error_log, want_entries, false /*!lpo_sup*/);
2700 if (!read_entries) {
2701 PrintOut(LOG_INFO, "Device: %s, Read %u entries from Error Information Log failed\n",
2702 cfg.name.c_str(), want_entries);
2703 return false;
2704 }
2705
2706 if (!newcnt)
2707 return true; // Support check only
2708
2709 // Scan log, find device related errors
2710 uint64_t oldcnt = state.nvme_err_log_entries, mincnt = newcnt;
2711 int err = 0, ign = 0;
2712 for (unsigned i = 0; i < read_entries; i++) {
2713 const nvme_error_log_page & e = error_log[i];
2714 if (!e.error_count)
2715 continue; // unused
2716 if (e.error_count <= oldcnt)
2717 break; // stop on first old entry
2718 if (e.error_count < mincnt)
2719 mincnt = e.error_count; // min known error
2720 if (e.error_count > newcnt)
2721 newcnt = e.error_count; // adjust maximum
2722 uint16_t status = e.status_field >> 1;
2723 if (!nvme_status_is_error(status) || nvme_status_to_errno(status) == EINVAL) {
2724 ign++; // Not a device related error
2725 continue;
2726 }
2727
2728 // Log the most recent 8 errors
2729 if (++err > 8)
2730 continue;
2731 char buf[64];
2732 PrintOut(LOG_INFO, "Device: %s, NVMe error [%u], count %" PRIu64 ", status 0x%04x: %s\n",
2733 cfg.name.c_str(), i, e.error_count, e.status_field,
2735 }
2736
2737 std::string msg = strprintf("Device: %s, NVMe error count increased from %" PRIu64 " to %" PRIu64
2738 " (%d new, %d ignored, %" PRIu64 " unknown)",
2739 cfg.name.c_str(), oldcnt, newcnt, err, ign,
2740 (mincnt > oldcnt + 1 ? mincnt - oldcnt - 1 : 0));
2741 // LOG_CRIT only if device related errors are found
2742 if (!err) {
2743 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2744 }
2745 else {
2746 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2747 MailWarning(cfg, state, 4, "%s", msg.c_str());
2748 }
2749
2750 state.nvme_err_log_entries = newcnt;
2751 state.must_write = true;
2752 return true;
2753}
2754
2755static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2756 const dev_config_vector * prev_cfgs)
2757{
2758 const char *name = cfg.name.c_str();
2759
2760 // Device must be open
2761
2762 // Get ID Controller
2763 nvme_id_ctrl id_ctrl;
2764 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2765 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2766 CloseDevice(nvmedev, name);
2767 return 2;
2768 }
2769
2770 // Get drive identity
2771 char model[40+1], serial[20+1], firmware[8+1];
2772 format_char_array(model, id_ctrl.mn);
2773 format_char_array(serial, id_ctrl.sn);
2774 format_char_array(firmware, id_ctrl.fr);
2775
2776 // Format device id string for warning emails
2777 char nsstr[32] = "", capstr[32] = "";
2778 unsigned nsid = nvmedev->get_nsid();
2779 if (nsid != 0xffffffff)
2780 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2781 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2782 if (capacity)
2783 format_capacity(capstr, sizeof(capstr), capacity, ".");
2784 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2785 nsstr, (capstr[0] ? ", " : ""), capstr);
2786 cfg.id_is_unique = true; // TODO: Check serial?
2788 cfg.id_is_unique = false;
2789
2790 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2791
2792 // Check for duplicates
2793 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2794 CloseDevice(nvmedev, name);
2795 return 1;
2796 }
2797
2798 // Read SMART/Health log
2799 nvme_smart_log smart_log;
2800 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2801 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2802 CloseDevice(nvmedev, name);
2803 return 2;
2804 }
2805
2806 // Check temperature sensor support
2807 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2808 if (!nvme_get_max_temp_kelvin(smart_log)) {
2809 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2810 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2811 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2812 }
2813 }
2814
2815 // Init total error count
2816 cfg.nvme_err_log_max_entries = id_ctrl.elpe + 1; // 0's based value
2817 if (cfg.errorlog || cfg.xerrorlog) {
2818 if (!check_nvme_error_log(cfg, state, nvmedev)) {
2819 PrintOut(LOG_INFO, "Device: %s, Error Information unavailable, ignoring -l [x]error\n", name);
2820 cfg.errorlog = cfg.xerrorlog = false;
2821 }
2822 else
2824 }
2825
2826 // If no supported tests selected, return
2827 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2828 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2829 CloseDevice(nvmedev, name);
2830 return 3;
2831 }
2832
2833 // Tell user we are registering device
2834 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2835
2836 // Make sure that init_standby_check() ignores NVMe devices
2837 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2838
2839 CloseDevice(nvmedev, name);
2840
2841 if (!state_path_prefix.empty()) {
2842 // Build file name for state file
2843 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2844 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2845 nsstr[0] = 0;
2846 if (nsid != 0xffffffff)
2847 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2848 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2849 // Read previous state
2850 if (read_dev_state(cfg.state_file.c_str(), state))
2851 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2852 }
2853
2854 finish_device_scan(cfg, state);
2855
2856 return 0;
2857}
2858
2859// Open device for next check, return false on error
2860static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2861 const char * type)
2862{
2863 const char * name = cfg.name.c_str();
2864
2865 // If user has asked, test the email warning system
2866 if (cfg.emailtest)
2867 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2868
2869 // User may have requested (with the -n Directive) to leave the disk
2870 // alone if it is in idle or standby mode. In this case check the
2871 // power mode first before opening the device for full access,
2872 // and exit without check if disk is reported in standby.
2873 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2874 // Note that 'is_powered_down()' handles opening the device itself, and
2875 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2876 if (device->is_powered_down())
2877 {
2878 // skip at most powerskipmax checks
2879 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2880 // report first only except if state has changed, avoid waking up system disk
2881 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2882 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2883 state.lastpowermodeskipped = -1;
2884 }
2885 state.powerskipcnt++;
2886 return false;
2887 }
2888 }
2889 }
2890
2891 // if we can't open device, fail gracefully rather than hard --
2892 // perhaps the next time around we'll be able to open it
2893 if (!device->open()) {
2894 // For removable devices, print error message only once and suppress email
2895 if (!cfg.removable) {
2896 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2897 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2898 }
2899 else if (!state.removed) {
2900 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2901 state.removed = true;
2902 }
2903 else if (debugmode)
2904 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2905 return false;
2906 }
2907
2908 if (debugmode)
2909 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2910
2911 if (!cfg.removable)
2912 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2913 else if (state.removed) {
2914 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2915 state.removed = false;
2916 }
2917
2918 return true;
2919}
2920
2921// If the self-test log has got more self-test errors (or more recent
2922// self-test errors) recorded, then notify user.
2923static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2924{
2925 const char * name = cfg.name.c_str();
2926
2927 if (newi<0)
2928 // command failed
2929 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2930 else {
2931 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2932
2933 // old and new error counts
2934 int oldc=state.selflogcount;
2935 int newc=SELFTEST_ERRORCOUNT(newi);
2936
2937 // old and new error timestamps in hours
2938 int oldh=state.selfloghour;
2939 int newh=SELFTEST_ERRORHOURS(newi);
2940
2941 if (oldc<newc) {
2942 // increase in error count
2943 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2944 name, oldc, newc);
2945 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2946 name, oldc, newc);
2947 state.must_write = true;
2948 }
2949 else if (newc > 0 && oldh != newh) {
2950 // more recent error
2951 // a 'more recent' error might actually be a smaller hour number,
2952 // if the hour number has wrapped.
2953 // There's still a bug here. You might just happen to run a new test
2954 // exactly 32768 hours after the previous failure, and have run exactly
2955 // 20 tests between the two, in which case smartd will miss the
2956 // new failure.
2957 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2958 name, newh);
2959 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2960 name, newh);
2961 state.must_write = true;
2962 }
2963
2964 // Print info if error entries have disappeared
2965 // or newer successful successful extended self-test exits
2966 if (oldc > newc) {
2967 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2968 name, oldc, newc);
2969 if (newc == 0)
2970 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2971 }
2972
2973 // Needed since self-test error count may DECREASE. Hour might
2974 // also have changed.
2975 state.selflogcount= newc;
2976 state.selfloghour = newh;
2977 }
2978 return;
2979}
2980
2981// Test types, ordered by priority.
2982static const char test_type_chars[] = "LncrSCO";
2983static const unsigned num_test_types = sizeof(test_type_chars)-1;
2984
2985// returns test type if time to do test of type testtype,
2986// 0 if not time to do test.
2987static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2988{
2989 // check that self-testing has been requested
2990 if (cfg.test_regex.empty())
2991 return 0;
2992
2993 // Exit if drive not capable of any test
2994 if ( state.not_cap_long && state.not_cap_short &&
2995 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2996 return 0;
2997
2998 // since we are about to call localtime(), be sure glibc is informed
2999 // of any timezone changes we make.
3000 if (!usetime)
3002
3003 // Is it time for next check?
3004 time_t now = (!usetime ? time(nullptr) : usetime);
3005 if (now < state.scheduled_test_next_check) {
3006 if (state.scheduled_test_next_check <= now + 3600)
3007 return 0; // Next check within one hour
3008 // More than one hour, assume system clock time adjusted to the past
3009 state.scheduled_test_next_check = now;
3010 }
3011 else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
3012 // Limit time check interval to 90 days
3013 state.scheduled_test_next_check = now - (3600L*24*90);
3014 }
3015
3016 // Find ':NNN[-LLL]' in regex for possible offsets and limits
3017 const unsigned max_offsets = 1 + num_test_types;
3018 unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
3019 unsigned num_offsets = 1; // offsets/limits[0] == 0 always
3020 for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
3021 const char * q = strchr(p, ':');
3022 if (!q)
3023 break;
3024 p = q + 1;
3025 unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
3026 sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
3027 if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
3028 continue;
3029 offsets[num_offsets] = offset; limits[num_offsets] = limit;
3030 num_offsets++;
3031 p += (n3 > 0 ? n3 : n1);
3032 }
3033
3034 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
3035 char testtype = 0;
3036 time_t testtime = 0; int testhour = 0;
3037 int maxtest = num_test_types-1;
3038
3039 for (time_t t = state.scheduled_test_next_check; ; ) {
3040 // Check offset 0 and then all offsets for ':NNN' found above
3041 for (unsigned i = 0; i < num_offsets; i++) {
3042 unsigned offset = offsets[i], limit = limits[i];
3043 unsigned delay = cfg.test_offset_factor * offset;
3044 if (0 < limit && limit < delay)
3045 delay %= limit + 1;
3046 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
3047
3048 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
3049 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
3050 for (int j = 0; j <= maxtest; j++) {
3051 // Skip if drive not capable of this test
3052 switch (test_type_chars[j]) {
3053 case 'L': if (state.not_cap_long) continue; break;
3054 case 'S': if (state.not_cap_short) continue; break;
3055 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
3056 case 'O': if (scsi || state.not_cap_offline) continue; break;
3057 case 'c': case 'n':
3058 case 'r': if (scsi || state.not_cap_selective) continue; break;
3059 default: continue;
3060 }
3061 // Try match of "T/MM/DD/d/HH[:NNN]"
3062 char pattern[64];
3063 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
3064 test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
3065 if (i > 0) {
3066 const unsigned len = sizeof("S/01/01/1/01") - 1;
3067 snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
3068 if (limit > 0)
3069 snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
3070 }
3071 if (cfg.test_regex.full_match(pattern)) {
3072 // Test found
3073 testtype = pattern[0];
3074 testtime = t; testhour = tms->tm_hour;
3075 // Limit further matches to higher priority self-tests
3076 maxtest = j-1;
3077 break;
3078 }
3079 }
3080 }
3081
3082 // Exit if no tests left or current time reached
3083 if (maxtest < 0)
3084 break;
3085 if (t >= now)
3086 break;
3087 // Check next hour
3088 if ((t += 3600) > now)
3089 t = now;
3090 }
3091
3092 // Do next check not before next hour.
3093 struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
3094 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
3095
3096 if (testtype) {
3097 state.must_write = true;
3098 // Tell user if an old test was found.
3099 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
3100 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
3101 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
3102 cfg.name.c_str(), testtype, datebuf);
3103 }
3104 }
3105
3106 return testtype;
3107}
3108
3109// Print a list of future tests.
3111{
3112 unsigned numdev = configs.size();
3113 if (!numdev)
3114 return;
3115 std::vector<int> testcnts(numdev * num_test_types, 0);
3116
3117 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
3118
3119 // FixGlibcTimeZoneBug(); // done in PrintOut()
3120 time_t now = time(nullptr);
3121 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3122 dateandtimezoneepoch(datenow, now);
3123
3124 long seconds;
3125 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3126 // Check for each device whether a test will be run
3127 time_t testtime = now + seconds;
3128 for (unsigned i = 0; i < numdev; i++) {
3129 const dev_config & cfg = configs.at(i);
3130 dev_state & state = states.at(i);
3131 const char * p;
3132 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3133 if (testtype && (p = strchr(test_type_chars, testtype))) {
3134 unsigned t = (p - test_type_chars);
3135 // Report at most 5 tests of each type
3136 if (++testcnts[i*num_test_types + t] <= 5) {
3137 dateandtimezoneepoch(date, testtime);
3138 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3139 testcnts[i*num_test_types + t], testtype, date);
3140 }
3141 }
3142 }
3143 }
3144
3145 // Report totals
3146 dateandtimezoneepoch(date, now+seconds);
3147 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3148 for (unsigned i = 0; i < numdev; i++) {
3149 const dev_config & cfg = configs.at(i);
3150 bool scsi = devices.at(i)->is_scsi();
3151 for (unsigned t = 0; t < num_test_types; t++) {
3152 int cnt = testcnts[i*num_test_types + t];
3153 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3154 continue;
3155 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3156 cnt, (cnt==1?"":"s"), test_type_chars[t]);
3157 }
3158 }
3159
3160}
3161
3162// Return zero on success, nonzero on failure. Perform offline (background)
3163// short or long (extended) self test on given scsi device.
3164static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3165{
3166 int retval = 0;
3167 const char *testname = nullptr;
3168 const char *name = cfg.name.c_str();
3169 int inProgress;
3170
3171 if (scsiSelfTestInProgress(device, &inProgress)) {
3172 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3173 state.not_cap_short = state.not_cap_long = true;
3174 return 1;
3175 }
3176
3177 if (1 == inProgress) {
3178 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3179 "progress.\n", name);
3180 return 1;
3181 }
3182
3183 switch (testtype) {
3184 case 'S':
3185 testname = "Short Self";
3186 retval = scsiSmartShortSelfTest(device);
3187 break;
3188 case 'L':
3189 testname = "Long Self";
3190 retval = scsiSmartExtendSelfTest(device);
3191 break;
3192 }
3193 // If we can't do the test, exit
3194 if (!testname) {
3195 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3196 testtype);
3197 return 1;
3198 }
3199 if (retval) {
3200 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3201 (SIMPLE_ERR_BAD_FIELD == retval)) {
3202 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3203 testname);
3204 if ('L'==testtype)
3205 state.not_cap_long = true;
3206 else
3207 state.not_cap_short = true;
3208
3209 return 1;
3210 }
3211 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3212 testname, retval);
3213 return 1;
3214 }
3215
3216 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3217
3218 return 0;
3219}
3220
3221// Do an offline immediate or self-test. Return zero on success,
3222// nonzero on failure.
3223static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3224{
3225 const char *name = cfg.name.c_str();
3226
3227 // Read current smart data and check status/capability
3228 struct ata_smart_values data;
3229 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3230 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3231 return 1;
3232 }
3233
3234 // Check for capability to do the test
3235 int dotest = -1, mode = 0;
3236 const char *testname = nullptr;
3237 switch (testtype) {
3238 case 'O':
3239 testname="Offline Immediate ";
3241 dotest=OFFLINE_FULL_SCAN;
3242 else
3243 state.not_cap_offline = true;
3244 break;
3245 case 'C':
3246 testname="Conveyance Self-";
3248 dotest=CONVEYANCE_SELF_TEST;
3249 else
3250 state.not_cap_conveyance = true;
3251 break;
3252 case 'S':
3253 testname="Short Self-";
3254 if (isSupportSelfTest(&data))
3255 dotest=SHORT_SELF_TEST;
3256 else
3257 state.not_cap_short = true;
3258 break;
3259 case 'L':
3260 testname="Long Self-";
3261 if (isSupportSelfTest(&data))
3262 dotest=EXTEND_SELF_TEST;
3263 else
3264 state.not_cap_long = true;
3265 break;
3266
3267 case 'c': case 'n': case 'r':
3268 testname = "Selective Self-";
3270 dotest = SELECTIVE_SELF_TEST;
3271 switch (testtype) {
3272 case 'c': mode = SEL_CONT; break;
3273 case 'n': mode = SEL_NEXT; break;
3274 case 'r': mode = SEL_REDO; break;
3275 }
3276 }
3277 else
3278 state.not_cap_selective = true;
3279 break;
3280 }
3281
3282 // If we can't do the test, exit
3283 if (dotest<0) {
3284 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3285 return 1;
3286 }
3287
3288 // If currently running a self-test, do not interrupt it to start another.
3289 if (15==(data.self_test_exec_status >> 4)) {
3290 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3291 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3292 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3293 } else {
3294 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3295 name, testname, (int)(data.self_test_exec_status & 0x0f));
3296 return 1;
3297 }
3298 }
3299
3300 if (dotest == SELECTIVE_SELF_TEST) {
3301 // Set test span
3302 ata_selective_selftest_args selargs, prev_args;
3303 selargs.num_spans = 1;
3304 selargs.span[0].mode = mode;
3305 prev_args.num_spans = 1;
3306 prev_args.span[0].start = state.selective_test_last_start;
3307 prev_args.span[0].end = state.selective_test_last_end;
3308 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3309 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3310 return 1;
3311 }
3312 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3313 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3314 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3315 start, end, end - start + 1,
3316 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3317 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3318 state.selective_test_last_start = start;
3319 state.selective_test_last_end = end;
3320 }
3321
3322 // execute the test, and return status
3323 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, nullptr);
3324 if (retval) {
3325 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3326 return retval;
3327 }
3328
3329 // Report recent test start to do_disable_standby_check()
3330 // and force log of next test status
3331 if (testtype == 'O')
3332 state.offline_started = true;
3333 else
3334 state.selftest_started = true;
3335
3336 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3337 return 0;
3338}
3339
3340// Check pending sector count attribute values (-C, -U directives).
3341static void check_pending(const dev_config & cfg, dev_state & state,
3342 unsigned char id, bool increase_only,
3343 const ata_smart_values & smartval,
3344 int mailtype, const char * msg)
3345{
3346 // Find attribute index
3347 int i = ata_find_attr_index(id, smartval);
3348 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3349 return;
3350
3351 // No report if no sectors pending.
3352 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3353 if (rawval == 0) {
3354 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3355 return;
3356 }
3357
3358 // If attribute is not reset, report only sector count increases.
3359 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3360 if (!(!increase_only || prev_rawval < rawval))
3361 return;
3362
3363 // Format message.
3364 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3365 if (prev_rawval > 0 && rawval != prev_rawval)
3366 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3367
3368 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3369 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3370 state.must_write = true;
3371}
3372
3373// Format Temperature value
3374static const char * fmt_temp(unsigned char x, char (& buf)[20])
3375{
3376 if (!x) // unset
3377 return "??";
3378 snprintf(buf, sizeof(buf), "%u", x);
3379 return buf;
3380}
3381
3382// Check Temperature limits
3383static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3384{
3385 if (!(0 < currtemp && currtemp < 255)) {
3386 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3387 return;
3388 }
3389
3390 // Update Max Temperature
3391 const char * minchg = "", * maxchg = "";
3392 if (currtemp > state.tempmax) {
3393 if (state.tempmax)
3394 maxchg = "!";
3395 state.tempmax = currtemp;
3396 state.must_write = true;
3397 }
3398
3399 char buf[20];
3400 if (!state.temperature) {
3401 // First check
3402 if (!state.tempmin || currtemp < state.tempmin)
3403 // Delay Min Temperature update by ~ 30 minutes.
3404 state.tempmin_delay = time(nullptr) + default_checktime - 60;
3405 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3406 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3407 if (triptemp)
3408 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3409 state.temperature = currtemp;
3410 }
3411 else {
3412 if (state.tempmin_delay) {
3413 // End Min Temperature update delay if ...
3414 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3415 || (state.tempmin_delay <= time(nullptr))) { // or delay time is over.
3416 state.tempmin_delay = 0;
3417 if (!state.tempmin)
3418 state.tempmin = 255;
3419 }
3420 }
3421
3422 // Update Min Temperature
3423 if (!state.tempmin_delay && currtemp < state.tempmin) {
3424 state.tempmin = currtemp;
3425 state.must_write = true;
3426 if (currtemp != state.temperature)
3427 minchg = "!";
3428 }
3429
3430 // Track changes
3431 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3432 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3433 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3434 state.temperature = currtemp;
3435 }
3436 }
3437
3438 // Check limits
3439 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3440 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3441 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3442 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3443 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3444 }
3445 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3446 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3447 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3448 }
3449 else if (cfg.tempcrit) {
3450 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3451 if (currtemp < limit)
3452 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3453 }
3454}
3455
3456// Check normalized and raw attribute values.
3457static void check_attribute(const dev_config & cfg, dev_state & state,
3458 const ata_smart_attribute & attr,
3459 const ata_smart_attribute & prev,
3460 int attridx,
3461 const ata_smart_threshold_entry * thresholds)
3462{
3463 // Check attribute and threshold
3464 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3465 if (attrstate == ATTRSTATE_NON_EXISTING)
3466 return;
3467
3468 // If requested, check for usage attributes that have failed.
3469 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3471 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3472 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3473 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3474 state.must_write = true;
3475 }
3476
3477 // Return if we're not tracking this type of attribute
3478 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3479 if (!( ( prefail && cfg.prefail)
3480 || (!prefail && cfg.usage )))
3481 return;
3482
3483 // Return if '-I ID' was specified
3485 return;
3486
3487 // Issue warning if they don't have the same ID in all structures.
3488 if (attr.id != prev.id) {
3489 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3490 cfg.name.c_str(), attr.id, prev.id);
3491 return;
3492 }
3493
3494 // Compare normalized values if valid.
3495 bool valchanged = false;
3496 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3497 if (attr.current != prev.current)
3498 valchanged = true;
3499 }
3500
3501 // Compare raw values if requested.
3502 bool rawchanged = false;
3503 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3506 rawchanged = true;
3507 }
3508
3509 // Return if no change
3510 if (!(valchanged || rawchanged))
3511 return;
3512
3513 // Format value strings
3514 std::string currstr, prevstr;
3515 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3516 // Print raw values only
3517 currstr = strprintf("%s (Raw)",
3518 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3519 prevstr = strprintf("%s (Raw)",
3520 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3521 }
3522 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3523 // Print normalized and raw values
3524 currstr = strprintf("%d [Raw %s]", attr.current,
3525 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3526 prevstr = strprintf("%d [Raw %s]", prev.current,
3527 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3528 }
3529 else {
3530 // Print normalized values only
3531 currstr = strprintf("%d", attr.current);
3532 prevstr = strprintf("%d", prev.current);
3533 }
3534
3535 // Format message
3536 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3537 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3538 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3539 prevstr.c_str(), currstr.c_str());
3540
3541 // Report this change as critical ?
3542 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3543 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3544 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3545 MailWarning(cfg, state, 2, "%s", msg.c_str());
3546 }
3547 else {
3548 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3549 }
3550 state.must_write = true;
3551}
3552
3553
3554static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3555 bool firstpass, bool allow_selftests)
3556{
3557 if (!open_device(cfg, state, atadev, "ATA"))
3558 return 1;
3559
3560 const char * name = cfg.name.c_str();
3561
3562 // user may have requested (with the -n Directive) to leave the disk
3563 // alone if it is in idle or sleeping mode. In this case check the
3564 // power mode and exit without check if needed
3565 if (cfg.powermode && !state.powermodefail) {
3566 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3567 const char * mode = 0;
3568 if (0 <= powermode && powermode < 0xff) {
3569 // wait for possible spin up and check again
3570 int powermode2;
3571 sleep(5);
3572 powermode2 = ataCheckPowerMode(atadev);
3573 if (powermode2 > powermode)
3574 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3575 powermode = powermode2;
3576 }
3577
3578 switch (powermode){
3579 case -1:
3580 // SLEEP
3581 mode="SLEEP";
3582 if (cfg.powermode>=1)
3583 dontcheck=1;
3584 break;
3585 case 0x00:
3586 // STANDBY
3587 mode="STANDBY";
3588 if (cfg.powermode>=2)
3589 dontcheck=1;
3590 break;
3591 case 0x01:
3592 // STANDBY_Y
3593 mode="STANDBY_Y";
3594 if (cfg.powermode>=2)
3595 dontcheck=1;
3596 break;
3597 case 0x80:
3598 // IDLE
3599 mode="IDLE";
3600 if (cfg.powermode>=3)
3601 dontcheck=1;
3602 break;
3603 case 0x81:
3604 // IDLE_A
3605 mode="IDLE_A";
3606 if (cfg.powermode>=3)
3607 dontcheck=1;
3608 break;
3609 case 0x82:
3610 // IDLE_B
3611 mode="IDLE_B";
3612 if (cfg.powermode>=3)
3613 dontcheck=1;
3614 break;
3615 case 0x83:
3616 // IDLE_C
3617 mode="IDLE_C";
3618 if (cfg.powermode>=3)
3619 dontcheck=1;
3620 break;
3621 case 0xff:
3622 // ACTIVE/IDLE
3623 case 0x40:
3624 // ACTIVE
3625 case 0x41:
3626 // ACTIVE
3627 mode="ACTIVE or IDLE";
3628 break;
3629 default:
3630 // UNKNOWN
3631 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3632 name, powermode);
3633 state.powermodefail = true;
3634 break;
3635 }
3636
3637 // if we are going to skip a check, return now
3638 if (dontcheck){
3639 // skip at most powerskipmax checks
3640 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3641 CloseDevice(atadev, name);
3642 // report first only except if state has changed, avoid waking up system disk
3643 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3644 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3645 state.lastpowermodeskipped = powermode;
3646 }
3647 state.powerskipcnt++;
3648 return 0;
3649 }
3650 else {
3651 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3652 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3653 }
3654 state.powerskipcnt = 0;
3655 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3656 }
3657 else if (state.powerskipcnt) {
3658 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3659 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3660 state.powerskipcnt = 0;
3661 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3662 }
3663 }
3664
3665 // check smart status
3666 if (cfg.smartcheck) {
3667 int status=ataSmartStatus2(atadev);
3668 if (status==-1){
3669 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3670 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3671 state.must_write = true;
3672 }
3673 else if (status==1){
3674 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3675 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3676 state.must_write = true;
3677 }
3678 }
3679
3680 // Check everything that depends upon SMART Data (eg, Attribute values)
3681 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3682 || cfg.curr_pending_id || cfg.offl_pending_id
3683 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3684 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3685
3686 // Read current attribute values.
3687 ata_smart_values curval;
3688 if (ataReadSmartValues(atadev, &curval)){
3689 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3690 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3691 state.must_write = true;
3692 }
3693 else {
3694 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3695
3696 // look for current or offline pending sectors
3697 if (cfg.curr_pending_id)
3698 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3699 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3700 : "Total unreadable (pending) sectors" ));
3701
3702 if (cfg.offl_pending_id)
3703 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3704 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3705 : "Total offline uncorrectable sectors"));
3706
3707 // check temperature limits
3708 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3709 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3710
3711 // look for failed usage attributes, or track usage or prefail attributes
3712 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3713 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3714 check_attribute(cfg, state,
3715 curval.vendor_attributes[i],
3716 state.smartval.vendor_attributes[i],
3717 i, state.smartthres.thres_entries);
3718 }
3719 }
3720
3721 // Log changes of offline data collection status
3722 if (cfg.offlinests) {
3725 || state.offline_started // test was started in previous call
3726 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3728 }
3729
3730 // Log changes of self-test execution status
3731 if (cfg.selfteststs) {
3733 || state.selftest_started // test was started in previous call
3734 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3736 }
3737
3738 // Save the new values for the next time around
3739 state.smartval = curval;
3741 state.attrlog_dirty = true;
3742 }
3743 }
3744 state.offline_started = state.selftest_started = false;
3745
3746 // check if number of selftest errors has increased (note: may also DECREASE)
3747 if (cfg.selftest)
3748 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3749
3750 // check if number of ATA errors has increased
3751 if (cfg.errorlog || cfg.xerrorlog) {
3752
3753 int errcnt1 = -1, errcnt2 = -1;
3754 if (cfg.errorlog)
3755 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3756 if (cfg.xerrorlog)
3757 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3758
3759 // new number of errors is max of both logs
3760 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3761
3762 // did command fail?
3763 if (newc<0)
3764 // lack of PrintOut here is INTENTIONAL
3765 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3766
3767 // has error count increased?
3768 int oldc = state.ataerrorcount;
3769 if (newc>oldc){
3770 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3771 name, oldc, newc);
3772 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3773 name, oldc, newc);
3774 state.must_write = true;
3775 }
3776
3777 if (newc>=0)
3778 state.ataerrorcount=newc;
3779 }
3780
3781 // if the user has asked, and device is capable (or we're not yet
3782 // sure) check whether a self test should be done now.
3783 if (allow_selftests && !cfg.test_regex.empty()) {
3784 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3785 if (testtype)
3786 DoATASelfTest(cfg, state, atadev, testtype);
3787 }
3788
3789 // Don't leave device open -- the OS/user may want to access it
3790 // before the next smartd cycle!
3791 CloseDevice(atadev, name);
3792 return 0;
3793}
3794
3795static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3796{
3797 if (!open_device(cfg, state, scsidev, "SCSI"))
3798 return 1;
3799
3800 const char * name = cfg.name.c_str();
3801
3802 uint8_t asc = 0, ascq = 0;
3803 uint8_t currenttemp = 0, triptemp = 0;
3804 if (!state.SuppressReport) {
3805 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3806 &asc, &ascq, &currenttemp, &triptemp)) {
3807 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3808 name);
3809 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3810 state.SuppressReport = 1;
3811 }
3812 }
3813 if (asc > 0) {
3814 char b[128];
3815 const char * cp = scsiGetIEString(asc, ascq, b, sizeof(b));
3816
3817 if (cp) {
3818 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3819 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3820 } else if (asc == 4 && ascq == 9) {
3821 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3822 } else if (debugmode)
3823 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3824 name, (int)asc, (int)ascq);
3825 } else if (debugmode)
3826 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3827
3828 // check temperature limits
3829 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3830 CheckTemperature(cfg, state, currenttemp, triptemp);
3831
3832 // check if number of selftest errors has increased (note: may also DECREASE)
3833 if (cfg.selftest)
3834 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3835
3836 if (allow_selftests && !cfg.test_regex.empty()) {
3837 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3838 if (testtype)
3839 DoSCSISelfTest(cfg, state, scsidev, testtype);
3840 }
3841 if (!cfg.attrlog_file.empty()){
3842 // saving error counters to state
3843 uint8_t tBuf[252];
3844 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3845 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3848 state.scsi_error_counters[0].found=1;
3849 }
3850 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3851 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3854 state.scsi_error_counters[1].found=1;
3855 }
3856 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3857 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3860 state.scsi_error_counters[2].found=1;
3861 }
3862 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3863 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3867 }
3868 // store temperature if not done by CheckTemperature() above
3869 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3870 state.temperature = currenttemp;
3871 }
3872 CloseDevice(scsidev, name);
3873 state.attrlog_dirty = true;
3874 return 0;
3875}
3876
3877static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3878{
3879 if (!open_device(cfg, state, nvmedev, "NVMe"))
3880 return 1;
3881
3882 const char * name = cfg.name.c_str();
3883
3884 // Read SMART/Health log
3885 nvme_smart_log smart_log;
3886 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3887 CloseDevice(nvmedev, name);
3888 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3889 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3890 state.must_write = true;
3891 return 0;
3892 }
3893
3894 // Check Critical Warning bits
3895 if (cfg.smartcheck && smart_log.critical_warning) {
3896 unsigned char w = smart_log.critical_warning;
3897 std::string msg;
3898 static const char * const wnames[] =
3899 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3900
3901 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3902 if (!(w & (1 << b)))
3903 continue;
3904 if (cnt)
3905 msg += ", ";
3906 if (++cnt > 3) {
3907 msg += "..."; break;
3908 }
3909 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3910 msg += "*Unknown*"; break;
3911 }
3912 msg += wnames[b];
3913 }
3914
3915 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3916 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3917 state.must_write = true;
3918 }
3919
3920 // Check temperature limits
3921 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3922 int k = nvme_get_max_temp_kelvin(smart_log);
3923 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3924 int c = k - 273;
3925 if (c < 1)
3926 c = 1;
3927 else if (c > 0xff)
3928 c = 0xff;
3929 CheckTemperature(cfg, state, c, 0);
3930 }
3931
3932 // Check if number of errors has increased
3933 if (cfg.errorlog || cfg.xerrorlog) {
3934 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3935 if (newcnt > state.nvme_err_log_entries) {
3936 // Warn only if device related errors are found
3937 check_nvme_error_log(cfg, state, nvmedev, newcnt);
3938 }
3939 // else // TODO: Handle decrease of count?
3940 }
3941
3942 CloseDevice(nvmedev, name);
3943 state.attrlog_dirty = true;
3944 return 0;
3945}
3946
3947// 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3949
3951{
3952 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3953 bool sts1 = false, sts2 = false;
3954 for (const auto & cfg : configs) {
3955 if (cfg.offlinests_ns)
3956 sts1 = true;
3957 if (cfg.selfteststs_ns)
3958 sts2 = true;
3959 }
3960
3961 // Check for support of disable auto standby
3962 // Reenable standby if smartd.conf was reread
3963 if (sts1 || sts2 || standby_disable_state == 3) {
3964 if (!smi()->disable_system_auto_standby(false)) {
3965 if (standby_disable_state == 3)
3966 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3967 if (sts1 || sts2) {
3968 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3969 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3970 sts1 = sts2 = false;
3971 }
3972 }
3973 }
3974
3975 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3976}
3977
3978static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3979{
3981 return;
3982
3983 // Check for just started or still running self-tests
3984 bool running = false;
3985 for (unsigned i = 0; i < configs.size() && !running; i++) {
3986 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3987
3988 if ( ( cfg.offlinests_ns
3989 && (state.offline_started ||
3991 || ( cfg.selfteststs_ns
3992 && (state.selftest_started ||
3994 running = true;
3995 // state.offline/selftest_started will be reset after next logging of test status
3996 }
3997
3998 // Disable/enable auto standby and log state changes
3999 if (!running) {
4000 if (standby_disable_state != 1) {
4001 if (!smi()->disable_system_auto_standby(false))
4002 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
4003 smi()->get_errmsg());
4004 else
4005 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
4007 }
4008 }
4009 else if (!smi()->disable_system_auto_standby(true)) {
4010 if (standby_disable_state != 2) {
4011 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
4012 smi()->get_errmsg());
4014 }
4015 }
4016 else {
4017 if (standby_disable_state != 3) {
4018 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
4020 }
4021 }
4022}
4023
4024// Checks the SMART status of all ATA and SCSI devices
4025static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
4026 smart_device_list & devices, bool firstpass, bool allow_selftests)
4027{
4028 for (unsigned i = 0; i < configs.size(); i++) {
4029 const dev_config & cfg = configs.at(i);
4030 dev_state & state = states.at(i);
4031 if (state.skip) {
4032 if (debugmode)
4033 PrintOut(LOG_INFO, "Device: %s, skipped (interval=%d)\n", cfg.name.c_str(),
4034 (cfg.checktime ? cfg.checktime : checktime));
4035 continue;
4036 }
4037
4038 smart_device * dev = devices.at(i);
4039 if (dev->is_ata())
4040 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
4041 else if (dev->is_scsi())
4042 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
4043 else if (dev->is_nvme())
4044 NVMeCheckDevice(cfg, state, dev->to_nvme());
4045
4046 // Prevent systemd unit startup timeout when checking many devices on startup
4048 }
4049
4050 do_disable_standby_check(configs, states);
4051}
4052
4053// Install all signal handlers
4055{
4056 // normal and abnormal exit
4059
4060 // in debug mode, <CONTROL-C> ==> HUP
4062
4063 // Catch HUP and USR1
4066#ifdef _WIN32
4067 set_signal_if_not_ignored(SIGUSR2, USR2handler);
4068#endif
4069}
4070
4071#ifdef _WIN32
4072// Toggle debug mode implemented for native windows only
4073// (there is no easy way to reopen tty on *nix)
4074static void ToggleDebugMode()
4075{
4076 if (!debugmode) {
4077 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
4078 if (!daemon_enable_console("smartd [Debug]")) {
4079 debugmode = 1;
4080 daemon_signal(SIGINT, HUPhandler);
4081 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
4082 }
4083 else
4084 PrintOut(LOG_INFO,"enable console failed\n");
4085 }
4086 else if (debugmode == 1) {
4087 daemon_disable_console();
4088 debugmode = 0;
4089 daemon_signal(SIGINT, sighandler);
4090 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
4091 }
4092 else
4093 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
4094}
4095#endif
4096
4097time_t calc_next_wakeuptime(time_t wakeuptime, time_t timenow, int ct)
4098{
4099 if (timenow < wakeuptime)
4100 return wakeuptime;
4101 return timenow + ct - (timenow - wakeuptime) % ct;
4102}
4103
4104static time_t dosleep(time_t wakeuptime, const dev_config_vector & configs,
4105 dev_state_vector & states, bool & sigwakeup)
4106{
4107 // If past wake-up-time, compute next wake-up-time
4108 time_t timenow = time(nullptr);
4109 unsigned n = configs.size();
4110 int ct;
4111 if (!checktime_min) {
4112 // Same for all devices
4113 wakeuptime = calc_next_wakeuptime(wakeuptime, timenow, checktime);
4114 ct = checktime;
4115 }
4116 else {
4117 // Determine wakeuptime of next device(s)
4118 wakeuptime = 0;
4119 for (unsigned i = 0; i < n; i++) {
4120 const dev_config & cfg = configs.at(i);
4121 dev_state & state = states.at(i);
4122 if (!state.skip)
4123 state.wakeuptime = calc_next_wakeuptime((state.wakeuptime ? state.wakeuptime : timenow),
4124 timenow, (cfg.checktime ? cfg.checktime : checktime));
4125 if (!wakeuptime || state.wakeuptime < wakeuptime)
4126 wakeuptime = state.wakeuptime;
4127 }
4128 ct = checktime_min;
4129 }
4130
4131 notify_wait(wakeuptime, n);
4132
4133 // Sleep until we catch a signal or have completed sleeping
4134 bool no_skip = false;
4135 int addtime = 0;
4136 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
4137 // Restart if system clock has been adjusted to the past
4138 if (wakeuptime > timenow + ct) {
4139 PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
4140 wakeuptime = timenow + ct;
4141 for (auto & state : states)
4142 state.wakeuptime = 0;
4143 no_skip = true;
4144 }
4145
4146 // Exit sleep when time interval has expired or a signal is received
4147 sleep(wakeuptime+addtime-timenow);
4148
4149#ifdef _WIN32
4150 // toggle debug mode?
4151 if (caughtsigUSR2) {
4152 ToggleDebugMode();
4153 caughtsigUSR2 = 0;
4154 }
4155#endif
4156
4157 timenow = time(nullptr);
4158
4159 // Actual sleep time too long?
4160 if (!addtime && timenow > wakeuptime+60) {
4161 if (debugmode)
4162 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4163 (int)(timenow-wakeuptime));
4164 // Wait another 20 seconds to avoid I/O errors during disk spin-up
4165 addtime = timenow-wakeuptime+20;
4166 // Use next wake-up-time if close
4167 int nextcheck = ct - addtime % ct;
4168 if (nextcheck <= 20)
4169 addtime += nextcheck;
4170 }
4171 }
4172
4173 // if we caught a SIGUSR1 then print message and clear signal
4174 if (caughtsigUSR1){
4175 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4176 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4177 caughtsigUSR1=0;
4178 sigwakeup = no_skip = true;
4179 }
4180
4181 // Check which devices must be skipped in this cycle
4182 if (checktime_min) {
4183 for (auto & state : states)
4184 state.skip = (!no_skip && timenow < state.wakeuptime);
4185 }
4186
4187 // return adjusted wakeuptime
4188 return wakeuptime;
4189}
4190
4191// Print out a list of valid arguments for the Directive d
4192static void printoutvaliddirectiveargs(int priority, char d)
4193{
4194 switch (d) {
4195 case 'n':
4196 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4197 break;
4198 case 's':
4199 PrintOut(priority, "valid_regular_expression");
4200 break;
4201 case 'd':
4202 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4203 break;
4204 case 'T':
4205 PrintOut(priority, "normal, permissive");
4206 break;
4207 case 'o':
4208 case 'S':
4209 PrintOut(priority, "on, off");
4210 break;
4211 case 'l':
4212 PrintOut(priority, "error, selftest");
4213 break;
4214 case 'M':
4215 PrintOut(priority, "\"once\", \"always\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4216 break;
4217 case 'v':
4218 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4219 break;
4220 case 'P':
4221 PrintOut(priority, "use, ignore, show, showall");
4222 break;
4223 case 'F':
4224 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4225 break;
4226 case 'e':
4227 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4228 "security-freeze, standby,[N|off], wcache,[on|off]");
4229 break;
4230 case 'c':
4231 PrintOut(priority, "i=N, interval=N");
4232 break;
4233 }
4234}
4235
4236// exits with an error message, or returns integer value of token
4237static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4238 int min, int max, char * suffix = 0)
4239{
4240 // make sure argument is there
4241 if (!arg) {
4242 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4243 cfgfile, lineno, name, token, min, max);
4244 return -1;
4245 }
4246
4247 // get argument value (base 10), check that it's integer, and in-range
4248 char *endptr;
4249 int val = strtol(arg,&endptr,10);
4250
4251 // optional suffix present?
4252 if (suffix) {
4253 if (!strcmp(endptr, suffix))
4254 endptr += strlen(suffix);
4255 else
4256 *suffix = 0;
4257 }
4258
4259 if (!(!*endptr && min <= val && val <= max)) {
4260 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4261 cfgfile, lineno, name, token, arg, min, max);
4262 return -1;
4263 }
4264
4265 // all is well; return value
4266 return val;
4267}
4268
4269
4270// Get 1-3 small integer(s) for '-W' directive
4271static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4272 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4273{
4274 unsigned v1 = 0, v2 = 0, v3 = 0;
4275 int n1 = -1, n2 = -1, n3 = -1, len;
4276 if (!arg) {
4277 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4278 cfgfile, lineno, name, token);
4279 return -1;
4280 }
4281
4282 len = strlen(arg);
4283 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4284 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4285 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4286 cfgfile, lineno, name, token, arg);
4287 return -1;
4288 }
4289 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4290 return 0;
4291}
4292
4293
4294#ifdef _WIN32
4295
4296// Concatenate strtok() results if quoted with "..."
4297static const char * strtok_dequote(const char * delimiters)
4298{
4299 const char * t = strtok(nullptr, delimiters);
4300 if (!t || t[0] != '"')
4301 return t;
4302
4303 static std::string token;
4304 token = t+1;
4305 for (;;) {
4306 t = strtok(nullptr, delimiters);
4307 if (!t || !*t)
4308 return "\"";
4309 token += ' ';
4310 int len = strlen(t);
4311 if (t[len-1] == '"') {
4312 token += std::string(t, len-1);
4313 break;
4314 }
4315 token += t;
4316 }
4317 return token.c_str();
4318}
4319
4320#endif // _WIN32
4321
4322
4323// This function returns 1 if it has correctly parsed one token (and
4324// any arguments), else zero if no tokens remain. It returns -1 if an
4325// error was encountered.
4326static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4327{
4328 char sym;
4329 const char * name = cfg.name.c_str();
4330 int lineno=cfg.lineno;
4331 const char *delim = " \n\t";
4332 int badarg = 0;
4333 int missingarg = 0;
4334 const char *arg = 0;
4335
4336 // is the rest of the line a comment
4337 if (*token=='#')
4338 return 1;
4339
4340 // is the token not recognized?
4341 if (*token!='-' || strlen(token)!=2) {
4342 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4343 configfile, lineno, name, token);
4344 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4345 return -1;
4346 }
4347
4348 // token we will be parsing:
4349 sym=token[1];
4350
4351 // parse the token and swallow its argument
4352 int val;
4353 char plus[] = "+", excl[] = "!";
4354
4355 switch (sym) {
4356 case 'C':
4357 // monitor current pending sector count (default 197)
4358 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4359 return -1;
4360 cfg.curr_pending_id = (unsigned char)val;
4361 cfg.curr_pending_incr = (*plus == '+');
4362 cfg.curr_pending_set = true;
4363 break;
4364 case 'U':
4365 // monitor offline uncorrectable sectors (default 198)
4366 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4367 return -1;
4368 cfg.offl_pending_id = (unsigned char)val;
4369 cfg.offl_pending_incr = (*plus == '+');
4370 cfg.offl_pending_set = true;
4371 break;
4372 case 'T':
4373 // Set tolerance level for SMART command failures
4374 if (!(arg = strtok(nullptr, delim))) {
4375 missingarg = 1;
4376 } else if (!strcmp(arg, "normal")) {
4377 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4378 // not on failure of an optional S.M.A.R.T. command.
4379 // This is the default so we don't need to actually do anything here.
4380 cfg.permissive = false;
4381 } else if (!strcmp(arg, "permissive")) {
4382 // Permissive mode; ignore errors from Mandatory SMART commands
4383 cfg.permissive = true;
4384 } else {
4385 badarg = 1;
4386 }
4387 break;
4388 case 'd':
4389 // specify the device type
4390 if (!(arg = strtok(nullptr, delim))) {
4391 missingarg = 1;
4392 } else if (!strcmp(arg, "ignore")) {
4393 cfg.ignore = true;
4394 } else if (!strcmp(arg, "removable")) {
4395 cfg.removable = true;
4396 } else if (!strcmp(arg, "auto")) {
4397 cfg.dev_type = "";
4398 scan_types.clear();
4399 } else {
4400 cfg.dev_type = arg;
4401 scan_types.push_back(arg);
4402 }
4403 break;
4404 case 'F':
4405 // fix firmware bug
4406 if (!(arg = strtok(nullptr, delim)))
4407 missingarg = 1;
4408 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4409 badarg = 1;
4410 break;
4411 case 'H':
4412 // check SMART status
4413 cfg.smartcheck = true;
4414 break;
4415 case 'f':
4416 // check for failure of usage attributes
4417 cfg.usagefailed = true;
4418 break;
4419 case 't':
4420 // track changes in all vendor attributes
4421 cfg.prefail = true;
4422 cfg.usage = true;
4423 break;
4424 case 'p':
4425 // track changes in prefail vendor attributes
4426 cfg.prefail = true;
4427 break;
4428 case 'u':
4429 // track changes in usage vendor attributes
4430 cfg.usage = true;
4431 break;
4432 case 'l':
4433 // track changes in SMART logs
4434 if (!(arg = strtok(nullptr, delim))) {
4435 missingarg = 1;
4436 } else if (!strcmp(arg, "selftest")) {
4437 // track changes in self-test log
4438 cfg.selftest = true;
4439 } else if (!strcmp(arg, "error")) {
4440 // track changes in ATA error log
4441 cfg.errorlog = true;
4442 } else if (!strcmp(arg, "xerror")) {
4443 // track changes in Extended Comprehensive SMART error log
4444 cfg.xerrorlog = true;
4445 } else if (!strcmp(arg, "offlinests")) {
4446 // track changes in offline data collection status
4447 cfg.offlinests = true;
4448 } else if (!strcmp(arg, "offlinests,ns")) {
4449 // track changes in offline data collection status, disable auto standby
4450 cfg.offlinests = cfg.offlinests_ns = true;
4451 } else if (!strcmp(arg, "selfteststs")) {
4452 // track changes in self-test execution status
4453 cfg.selfteststs = true;
4454 } else if (!strcmp(arg, "selfteststs,ns")) {
4455 // track changes in self-test execution status, disable auto standby
4456 cfg.selfteststs = cfg.selfteststs_ns = true;
4457 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4458 // set SCT Error Recovery Control
4459 unsigned rt = ~0, wt = ~0; int nc = -1;
4460 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4461 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4462 cfg.sct_erc_set = true;
4463 cfg.sct_erc_readtime = rt;
4464 cfg.sct_erc_writetime = wt;
4465 }
4466 else
4467 badarg = 1;
4468 } else {
4469 badarg = 1;
4470 }
4471 break;
4472 case 'a':
4473 // monitor everything
4474 cfg.smartcheck = true;
4475 cfg.prefail = true;
4476 cfg.usagefailed = true;
4477 cfg.usage = true;
4478 cfg.selftest = true;
4479 cfg.errorlog = true;
4480 cfg.selfteststs = true;
4481 break;
4482 case 'o':
4483 // automatic offline testing enable/disable
4484 if (!(arg = strtok(nullptr, delim))) {
4485 missingarg = 1;
4486 } else if (!strcmp(arg, "on")) {
4487 cfg.autoofflinetest = 2;
4488 } else if (!strcmp(arg, "off")) {
4489 cfg.autoofflinetest = 1;
4490 } else {
4491 badarg = 1;
4492 }
4493 break;
4494 case 'n':
4495 // skip disk check if in idle or standby mode
4496 if (!(arg = strtok(nullptr, delim)))
4497 missingarg = 1;
4498 else {
4499 char *endptr = nullptr;
4500 char *next = strchr(const_cast<char*>(arg), ',');
4501
4502 cfg.powerquiet = false;
4503 cfg.powerskipmax = 0;
4504
4505 if (next)
4506 *next = '\0';
4507 if (!strcmp(arg, "never"))
4508 cfg.powermode = 0;
4509 else if (!strcmp(arg, "sleep"))
4510 cfg.powermode = 1;
4511 else if (!strcmp(arg, "standby"))
4512 cfg.powermode = 2;
4513 else if (!strcmp(arg, "idle"))
4514 cfg.powermode = 3;
4515 else
4516 badarg = 1;
4517
4518 // if optional arguments are present
4519 if (!badarg && next) {
4520 next++;
4521 cfg.powerskipmax = strtol(next, &endptr, 10);
4522 if (endptr == next)
4523 cfg.powerskipmax = 0;
4524 else {
4525 next = endptr + (*endptr != '\0');
4526 if (cfg.powerskipmax <= 0)
4527 badarg = 1;
4528 }
4529 if (*next != '\0') {
4530 if (!strcmp("q", next))
4531 cfg.powerquiet = true;
4532 else {
4533 badarg = 1;
4534 }
4535 }
4536 }
4537 }
4538 break;
4539 case 'S':
4540 // automatic attribute autosave enable/disable
4541 if (!(arg = strtok(nullptr, delim))) {
4542 missingarg = 1;
4543 } else if (!strcmp(arg, "on")) {
4544 cfg.autosave = 2;
4545 } else if (!strcmp(arg, "off")) {
4546 cfg.autosave = 1;
4547 } else {
4548 badarg = 1;
4549 }
4550 break;
4551 case 's':
4552 // warn user, and delete any previously given -s REGEXP Directives
4553 if (!cfg.test_regex.empty()){
4554 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4555 configfile, lineno, name, cfg.test_regex.get_pattern());
4557 }
4558 // check for missing argument
4559 if (!(arg = strtok(nullptr, delim))) {
4560 missingarg = 1;
4561 }
4562 // Compile regex
4563 else {
4564 if (!cfg.test_regex.compile(arg)) {
4565 // not a valid regular expression!
4566 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4567 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4568 return -1;
4569 }
4570 // Do a bit of sanity checking and warn user if we think that
4571 // their regexp is "strange". User probably confused about shell
4572 // glob(3) syntax versus regular expression syntax regexp(7).
4573 // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4574 static const regular_expression syntax_check(
4575 "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4576 ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4577 ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4578 );
4580 if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4581 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4582 "extended regular expression \"%s\"\n",
4583 configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4584 }
4585 break;
4586 case 'm':
4587 // send email to address that follows
4588 if (!(arg = strtok(nullptr, delim)))
4589 missingarg = 1;
4590 else {
4591 if (!cfg.emailaddress.empty())
4592 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4593 configfile, lineno, name, cfg.emailaddress.c_str());
4594 cfg.emailaddress = arg;
4595 }
4596 break;
4597 case 'M':
4598 // email warning options
4599 if (!(arg = strtok(nullptr, delim)))
4600 missingarg = 1;
4601 else if (!strcmp(arg, "once"))
4603 else if (!strcmp(arg, "always"))
4605 else if (!strcmp(arg, "daily"))
4607 else if (!strcmp(arg, "diminishing"))
4609 else if (!strcmp(arg, "test"))
4610 cfg.emailtest = true;
4611 else if (!strcmp(arg, "exec")) {
4612 // Get the next argument (the command line)
4613#ifdef _WIN32
4614 // Allow "/path name/with spaces/..." on Windows
4615 arg = strtok_dequote(delim);
4616 if (arg && arg[0] == '"') {
4617 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4618 configfile, lineno, name, token);
4619 return -1;
4620 }
4621#else
4622 arg = strtok(nullptr, delim);
4623#endif
4624 if (!arg) {
4625 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4626 configfile, lineno, name, token);
4627 return -1;
4628 }
4629 // Free the last cmd line given if any, and copy new one
4630 if (!cfg.emailcmdline.empty())
4631 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4632 configfile, lineno, name, cfg.emailcmdline.c_str());
4633 cfg.emailcmdline = arg;
4634 }
4635 else
4636 badarg = 1;
4637 break;
4638 case 'i':
4639 // ignore failure of usage attribute
4640 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4641 return -1;
4643 break;
4644 case 'I':
4645 // ignore attribute for tracking purposes
4646 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4647 return -1;
4649 break;
4650 case 'r':
4651 // print raw value when tracking
4652 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4653 return -1;
4655 if (*excl == '!') // attribute change is critical
4657 break;
4658 case 'R':
4659 // track changes in raw value (forces printing of raw value)
4660 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4661 return -1;
4663 if (*excl == '!') // raw value change is critical
4665 break;
4666 case 'W':
4667 // track Temperature
4668 if (Get3Integers((arg = strtok(nullptr, delim)), name, token, lineno, configfile,
4669 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4670 return -1;
4671 break;
4672 case 'v':
4673 // non-default vendor-specific attribute meaning
4674 if (!(arg = strtok(nullptr, delim))) {
4675 missingarg = 1;
4676 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4677 badarg = 1;
4678 }
4679 break;
4680 case 'P':
4681 // Define use of drive-specific presets.
4682 if (!(arg = strtok(nullptr, delim))) {
4683 missingarg = 1;
4684 } else if (!strcmp(arg, "use")) {
4685 cfg.ignorepresets = false;
4686 } else if (!strcmp(arg, "ignore")) {
4687 cfg.ignorepresets = true;
4688 } else if (!strcmp(arg, "show")) {
4689 cfg.showpresets = true;
4690 } else if (!strcmp(arg, "showall")) {
4692 } else {
4693 badarg = 1;
4694 }
4695 break;
4696
4697 case 'e':
4698 // Various ATA settings
4699 if (!(arg = strtok(nullptr, delim))) {
4700 missingarg = true;
4701 }
4702 else {
4703 char arg2[16+1]; unsigned uval;
4704 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4705 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &uval, &n3) >= 1
4706 && (n1 == len || n2 > 0)) {
4707 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4708 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4709 if (n3 != len)
4710 uval = ~0U;
4711
4712 if (!strcmp(arg2, "aam")) {
4713 if (off)
4714 cfg.set_aam = -1;
4715 else if (uval <= 254)
4716 cfg.set_aam = uval + 1;
4717 else
4718 badarg = true;
4719 }
4720 else if (!strcmp(arg2, "apm")) {
4721 if (off)
4722 cfg.set_apm = -1;
4723 else if (1 <= uval && uval <= 254)
4724 cfg.set_apm = uval + 1;
4725 else
4726 badarg = true;
4727 }
4728 else if (!strcmp(arg2, "lookahead")) {
4729 if (off)
4730 cfg.set_lookahead = -1;
4731 else if (on)
4732 cfg.set_lookahead = 1;
4733 else
4734 badarg = true;
4735 }
4736 else if (!strcmp(arg, "security-freeze")) {
4737 cfg.set_security_freeze = true;
4738 }
4739 else if (!strcmp(arg2, "standby")) {
4740 if (off)
4741 cfg.set_standby = 0 + 1;
4742 else if (uval <= 255)
4743 cfg.set_standby = uval + 1;
4744 else
4745 badarg = true;
4746 }
4747 else if (!strcmp(arg2, "wcache")) {
4748 if (off)
4749 cfg.set_wcache = -1;
4750 else if (on)
4751 cfg.set_wcache = 1;
4752 else
4753 badarg = true;
4754 }
4755 else if (!strcmp(arg2, "dsn")) {
4756 if (off)
4757 cfg.set_dsn = -1;
4758 else if (on)
4759 cfg.set_dsn = 1;
4760 else
4761 badarg = true;
4762 }
4763 else
4764 badarg = true;
4765 }
4766 else
4767 badarg = true;
4768 }
4769 break;
4770
4771 case 'c':
4772 // Override command line options
4773 {
4774 if (!(arg = strtok(nullptr, delim))) {
4775 missingarg = true;
4776 break;
4777 }
4778 int n = 0, nc = -1, len = strlen(arg);
4779 if ( ( sscanf(arg, "i=%d%n", &n, &nc) == 1
4780 || sscanf(arg, "interval=%d%n", &n, &nc) == 1)
4781 && nc == len && n >= 10)
4782 cfg.checktime = n;
4783 else
4784 badarg = true;
4785 }
4786 break;
4787
4788 default:
4789 // Directive not recognized
4790 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4791 configfile, lineno, name, token);
4792 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4793 return -1;
4794 }
4795 if (missingarg) {
4796 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4797 configfile, lineno, name, token);
4798 }
4799 if (badarg) {
4800 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4801 configfile, lineno, name, token, arg);
4802 }
4803 if (missingarg || badarg) {
4804 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4805 printoutvaliddirectiveargs(LOG_CRIT, sym);
4806 PrintOut(LOG_CRIT, "\n");
4807 return -1;
4808 }
4809
4810 return 1;
4811}
4812
4813// Scan directive for configuration file
4814#define SCANDIRECTIVE "DEVICESCAN"
4815
4816// This is the routine that adds things to the conf_entries list.
4817//
4818// Return values are:
4819// 1: parsed a normal line
4820// 0: found DEFAULT setting or comment or blank line
4821// -1: found SCANDIRECTIVE line
4822// -2: found an error
4823//
4824// Note: this routine modifies *line from the caller!
4825static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4826 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4827{
4828 const char *delim = " \n\t";
4829
4830 // get first token: device name. If a comment, skip line
4831 const char * name = strtok(line, delim);
4832 if (!name || *name == '#')
4833 return 0;
4834
4835 // Check device name for DEFAULT or DEVICESCAN
4836 int retval;
4837 if (!strcmp("DEFAULT", name)) {
4838 retval = 0;
4839 // Restart with empty defaults
4840 default_conf = dev_config();
4841 }
4842 else {
4843 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4844 // Init new entry with current defaults
4845 conf_entries.push_back(default_conf);
4846 }
4847 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4848
4849 cfg.name = name; // Later replaced by dev->get_info().info_name
4850 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4851 cfg.lineno = lineno;
4852
4853 // parse tokens one at a time from the file.
4854 while (char * token = strtok(nullptr, delim)) {
4855 int rc = ParseToken(token, cfg, scan_types);
4856 if (rc < 0)
4857 // error found on the line
4858 return -2;
4859
4860 if (rc == 0)
4861 // No tokens left
4862 break;
4863
4864 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4865 }
4866
4867 // Check for multiple -d TYPE directives
4868 if (retval != -1 && scan_types.size() > 1) {
4869 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4870 cfg.name.c_str(), cfg.lineno, configfile);
4871 return -2;
4872 }
4873
4874 // Don't perform checks below for DEFAULT entries
4875 if (retval == 0)
4876 return retval;
4877
4878 // If NO monitoring directives are set, then set all of them.
4879 if (!( cfg.smartcheck || cfg.selftest
4880 || cfg.errorlog || cfg.xerrorlog
4881 || cfg.offlinests || cfg.selfteststs
4882 || cfg.usagefailed || cfg.prefail || cfg.usage
4883 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4884
4885 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4886 cfg.name.c_str(), cfg.lineno, configfile);
4887
4888 cfg.smartcheck = true;
4889 cfg.usagefailed = true;
4890 cfg.prefail = true;
4891 cfg.usage = true;
4892 cfg.selftest = true;
4893 cfg.errorlog = true;
4894 cfg.selfteststs = true;
4895 }
4896
4897 // additional sanity check. Has user set -M options without -m?
4898 if ( cfg.emailaddress.empty()
4899 && (!cfg.emailcmdline.empty() || cfg.emailfreq != emailfreqs::unknown || cfg.emailtest)) {
4900 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4901 cfg.name.c_str(), cfg.lineno, configfile);
4902 return -2;
4903 }
4904
4905 // has the user has set <nomailer>?
4906 if (cfg.emailaddress == "<nomailer>") {
4907 // check that -M exec is also set
4908 if (cfg.emailcmdline.empty()){
4909 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4910 cfg.name.c_str(), cfg.lineno, configfile);
4911 return -2;
4912 }
4913 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4914 cfg.emailaddress.clear();
4915 }
4916
4917 return retval;
4918}
4919
4920// Parses a configuration file. Return values are:
4921// N=>0: found N entries
4922// -1: syntax error in config file
4923// -2: config file does not exist
4924// -3: config file exists but cannot be read
4925//
4926// In the case where the return value is 0, there are three
4927// possibilities:
4928// Empty configuration file ==> conf_entries.empty()
4929// No configuration file ==> conf_entries[0].lineno == 0
4930// SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4931static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4932{
4933 // maximum line length in configuration file
4934 const int MAXLINELEN = 256;
4935 // maximum length of a continued line in configuration file
4936 const int MAXCONTLINE = 1023;
4937
4938 stdio_file f;
4939 // Open config file, if it exists and is not <stdin>
4940 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4941 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4942 // file exists but we can't read it or it should exist due to '-c' option
4943 int ret = (errno!=ENOENT ? -3 : -2);
4944 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4945 strerror(errno),configfile);
4946 return ret;
4947 }
4948 }
4949 else // read from stdin ('-c -' option)
4950 f.open(stdin);
4951
4952 // Start with empty defaults
4953 dev_config default_conf;
4954
4955 // No configuration file found -- use fake one
4956 int entry = 0;
4957 if (!f) {
4958 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4959
4960 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4961 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4962 return 0;
4963 }
4964
4965#ifdef __CYGWIN__
4966 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4967#endif
4968
4969 // configuration file exists
4970 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4971
4972 // parse config file line by line
4973 int lineno = 1, cont = 0, contlineno = 0;
4974 char line[MAXLINELEN+2];
4975 char fullline[MAXCONTLINE+1];
4976
4977 for (;;) {
4978 int len=0,scandevice;
4979 char *lastslash;
4980 char *comment;
4981 char *code;
4982
4983 // make debugging simpler
4984 memset(line,0,sizeof(line));
4985
4986 // get a line
4987 code=fgets(line, MAXLINELEN+2, f);
4988
4989 // are we at the end of the file?
4990 if (!code){
4991 if (cont) {
4992 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4993 // See if we found a SCANDIRECTIVE directive
4994 if (scandevice==-1)
4995 return 0;
4996 // did we find a syntax error
4997 if