smartmontools SVN Rev 5613
Utility to control and monitor storage systems with "S.M.A.R.T."
smartd.cpp
Go to the documentation of this file.
1/*
2 * Home page of code is: https://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-24 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12#include "config.h"
13#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15// unconditionally included files
16#include <inttypes.h>
17#include <stdio.h>
18#include <sys/types.h>
19#include <sys/stat.h> // umask
20#include <signal.h>
21#include <fcntl.h>
22#include <string.h>
23#include <syslog.h>
24#include <stdarg.h>
25#include <stdlib.h>
26#include <errno.h>
27#include <time.h>
28#include <limits.h>
29#include <getopt.h>
30
31#include <algorithm> // std::replace()
32#include <map>
33#include <stdexcept>
34#include <string>
35#include <vector>
36
37// conditionally included files
38#ifndef _WIN32
39#include <sys/wait.h>
40#endif
41#ifdef HAVE_UNISTD_H
42#include <unistd.h>
43#endif
44
45#ifdef _WIN32
46#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
47#ifdef _MSC_VER
48#pragma warning(disable:4761) // "conversion supplied"
49typedef unsigned short mode_t;
50typedef int pid_t;
51#endif
52#include <io.h> // umask()
53#include <process.h> // getpid()
54#endif // _WIN32
55
56#ifdef __CYGWIN__
57#include <io.h> // setmode()
58#endif // __CYGWIN__
59
60#ifdef HAVE_LIBCAP_NG
61#include <cap-ng.h>
62#endif // LIBCAP_NG
63
64#ifdef HAVE_LIBSYSTEMD
65#include <systemd/sd-daemon.h>
66#endif // HAVE_LIBSYSTEMD
67
68// locally included files
69#include "atacmds.h"
70#include "dev_interface.h"
71#include "knowndrives.h"
72#include "scsicmds.h"
73#include "nvmecmds.h"
74#include "utility.h"
75
76#ifdef HAVE_POSIX_API
77#include "popen_as_ugid.h"
78#endif
79
80#ifdef _WIN32
81// fork()/signal()/initd simulation for native Windows
82#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
83#define strsignal daemon_strsignal
84#define sleep daemon_sleep
85// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
86#define SIGQUIT SIGBREAK
87#define SIGQUIT_KEYNAME "CONTROL-Break"
88#else // _WIN32
89#define SIGQUIT_KEYNAME "CONTROL-\\"
90#endif // _WIN32
91
92const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5613 2024-05-08 13:46:51Z chrfranke $"
93 CONFIG_H_CVSID;
94
95extern "C" {
96 typedef void (*signal_handler_type)(int);
97}
98
100{
101#if defined(_WIN32)
102 // signal() emulation
103 daemon_signal(sig, handler);
104
105#elif defined(HAVE_SIGACTION)
106 // SVr4, POSIX.1-2001, POSIX.1-2008
107 struct sigaction sa;
108 sa.sa_handler = SIG_DFL;
109 sigaction(sig, (struct sigaction *)0, &sa);
110 if (sa.sa_handler == SIG_IGN)
111 return;
112
113 memset(&sa, 0, sizeof(sa));
114 sa.sa_handler = handler;
115 sa.sa_flags = SA_RESTART; // BSD signal() semantics
116 sigaction(sig, &sa, (struct sigaction *)0);
117
118#elif defined(HAVE_SIGSET)
119 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
120 if (sigset(sig, handler) == SIG_IGN)
121 sigset(sig, SIG_IGN);
122
123#else
124 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
125 // Important: BSD semantics is required. Traditional signal()
126 // resets the handler to SIG_DFL after the first signal is caught.
127 if (signal(sig, handler) == SIG_IGN)
128 signal(sig, SIG_IGN);
129#endif
130}
131
132using namespace smartmontools;
133
134static const int scsiLogRespLen = 252;
135
136// smartd exit codes
137#define EXIT_BADCMD 1 // command line did not parse
138#define EXIT_BADCONF 2 // syntax error in config file
139#define EXIT_STARTUP 3 // problem forking daemon
140#define EXIT_PID 4 // problem creating pid file
141#define EXIT_NOCONF 5 // config file does not exist
142#define EXIT_READCONF 6 // config file exists but cannot be read
143
144#define EXIT_NOMEM 8 // out of memory
145#define EXIT_BADCODE 10 // internal error - should NEVER happen
146
147#define EXIT_BADDEV 16 // we can't monitor this device
148#define EXIT_NODEV 17 // no devices to monitor
149
150#define EXIT_SIGNAL 254 // abort on signal
151
152
153// command-line: 1=debug mode, 2=print presets
154static unsigned char debugmode = 0;
155
156// command-line: how long to sleep between checks
157static constexpr int default_checktime = 1800;
159static int checktime_min = 0; // Minimum individual check time, 0 if none
160
161// command-line: name of PID file (empty for no pid file)
162static std::string pid_file;
163
164// command-line: path prefix of persistent state file, empty if no persistence.
165static std::string state_path_prefix
166#ifdef SMARTMONTOOLS_SAVESTATES
167 = SMARTMONTOOLS_SAVESTATES
168#endif
169 ;
170
171// command-line: path prefix of attribute log file, empty if no logs.
172static std::string attrlog_path_prefix
173#ifdef SMARTMONTOOLS_ATTRIBUTELOG
174 = SMARTMONTOOLS_ATTRIBUTELOG
175#endif
176 ;
177
178// configuration file name
179static const char * configfile;
180// configuration file "name" if read from stdin
181static const char * const configfile_stdin = "<stdin>";
182// path of alternate configuration file
183static std::string configfile_alt;
184
185// warning script file
186static std::string warning_script;
187
188#ifdef HAVE_POSIX_API
189// run warning script as non-privileged user
190static bool warn_as_user;
191static uid_t warn_uid;
192static gid_t warn_gid;
193static std::string warn_uname, warn_gname;
194#elif defined(_WIN32)
195// run warning script as restricted user
196static bool warn_as_restr_user;
197#endif
198
199// command-line: when should we exit?
200enum quit_t {
205static bool quit_nodev0 = false;
206
207// command-line; this is the default syslog(3) log facility to use.
208static int facility=LOG_DAEMON;
209
210#ifndef _WIN32
211// command-line: fork into background?
212static bool do_fork=true;
213#endif
214
215// TODO: This smartctl only variable is also used in some os_*.cpp
216unsigned char failuretest_permissive = 0;
217
218// set to one if we catch a USR1 (check devices now)
219static volatile int caughtsigUSR1=0;
220
221#ifdef _WIN32
222// set to one if we catch a USR2 (toggle debug mode)
223static volatile int caughtsigUSR2=0;
224#endif
225
226// set to one if we catch a HUP (reload config file). In debug mode,
227// set to two, if we catch INT (also reload config file).
228static volatile int caughtsigHUP=0;
229
230// set to signal value if we catch INT, QUIT, or TERM
231static volatile int caughtsigEXIT=0;
232
233// This function prints either to stdout or to the syslog as needed.
234static void PrintOut(int priority, const char *fmt, ...)
236
237#ifdef HAVE_LIBSYSTEMD
238// systemd notify support
239
240static bool notify_enabled = false;
241static bool notify_ready = false;
242
243static inline void notify_init()
244{
245 if (!getenv("NOTIFY_SOCKET"))
246 return;
247 notify_enabled = true;
248}
249
250static inline bool notify_post_init()
251{
252 if (!notify_enabled)
253 return true;
254 if (do_fork) {
255 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
256 return false;
257 }
258 return true;
259}
260
261static inline void notify_extend_timeout()
262{
263 if (!notify_enabled)
264 return;
265 if (notify_ready)
266 return;
267 const char * notify = "EXTEND_TIMEOUT_USEC=20000000"; // typical drive spinup time is 20s tops
268 if (debugmode) {
269 pout("sd_notify(0, \"%s\")\n", notify);
270 return;
271 }
272 sd_notify(0, notify);
273}
274
275static void notify_msg(const char * msg, bool ready = false)
276{
277 if (!notify_enabled)
278 return;
279 if (debugmode) {
280 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
281 return;
282 }
283 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
284}
285
286static void notify_check(int numdev)
287{
288 if (!notify_enabled)
289 return;
290 char msg[32];
291 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
292 numdev, (numdev != 1 ? "s" : ""));
293 notify_msg(msg);
294}
295
296static void notify_wait(time_t wakeuptime, int numdev)
297{
298 if (!notify_enabled)
299 return;
300 char ts[16] = ""; struct tm tmbuf;
301 strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
302 char msg[64];
303 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
304 numdev, (numdev != 1 ? "s" : ""), ts);
305 notify_msg(msg, !notify_ready); // first call notifies READY=1
306 notify_ready = true;
307}
308
309static void notify_exit(int status)
310{
311 if (!notify_enabled)
312 return;
313 const char * msg;
314 switch (status) {
315 case 0: msg = "Exiting ..."; break;
316 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
317 case EXIT_BADCONF: case EXIT_NOCONF:
318 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
319 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
320 case EXIT_NODEV: msg = "No devices to monitor"; break;
321 default: msg = "Error (see SYSLOG)"; break;
322 }
323 // Ensure that READY=1 is notified before 'exit(0)' because otherwise
324 // systemd will report a service (protocol) failure
325 notify_msg(msg, (!status && !notify_ready));
326}
327
328#else // HAVE_LIBSYSTEMD
329// No systemd notify support
330
331static inline bool notify_post_init()
332{
333#ifdef __linux__
334 if (getenv("NOTIFY_SOCKET")) {
335 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
336 return false;
337 }
338#endif
339 return true;
340}
341
342static inline void notify_init() { }
343static inline void notify_extend_timeout() { }
344static inline void notify_msg(const char *) { }
345static inline void notify_check(int) { }
346static inline void notify_wait(time_t, int) { }
347static inline void notify_exit(int) { }
348
349#endif // HAVE_LIBSYSTEMD
350
351// Email frequencies
352enum class emailfreqs : unsigned char {
354};
355
356// Attribute monitoring flags.
357// See monitor_attr_flags below.
358enum {
365};
366
367// Array of flags for each attribute.
369{
370public:
371 bool is_set(int id, unsigned char flag) const
372 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
373
374 void set(int id, unsigned char flags)
375 {
376 if (0 < id && id < (int)sizeof(m_flags))
377 m_flags[id] |= flags;
378 }
379
380private:
381 unsigned char m_flags[256]{};
382};
383
384
385/// Configuration data for a device. Read from smartd.conf.
386/// Supports copy & assignment and is compatible with STL containers.
388{
389 int lineno{}; // Line number of entry in file
390 std::string name; // Device name (with optional extra info)
391 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
392 std::string dev_type; // Device type argument from -d directive, empty if none
393 std::string dev_idinfo; // Device identify info for warning emails
394 std::string state_file; // Path of the persistent state file, empty if none
395 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
396 int checktime{}; // Individual check interval, 0 if none
397 bool ignore{}; // Ignore this entry
398 bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
399 bool smartcheck{}; // Check SMART status
400 bool usagefailed{}; // Check for failed Usage Attributes
401 bool prefail{}; // Track changes in Prefail Attributes
402 bool usage{}; // Track changes in Usage Attributes
403 bool selftest{}; // Monitor number of selftest errors
404 bool errorlog{}; // Monitor number of ATA errors
405 bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
406 bool offlinests{}; // Monitor changes in offline data collection status
407 bool offlinests_ns{}; // Disable auto standby if in progress
408 bool selfteststs{}; // Monitor changes in self-test execution status
409 bool selfteststs_ns{}; // Disable auto standby if in progress
410 bool permissive{}; // Ignore failed SMART commands
411 char autosave{}; // 1=disable, 2=enable Autosave Attributes
412 char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
413 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
414 bool ignorepresets{}; // Ignore database of -v options
415 bool showpresets{}; // Show database entry for this device
416 bool removable{}; // Device may disappear (not be present)
417 char powermode{}; // skip check, if disk in idle or standby mode
418 bool powerquiet{}; // skip powermode 'skipping checks' message
419 int powerskipmax{}; // how many times can be check skipped
420 unsigned char tempdiff{}; // Track Temperature changes >= this limit
421 unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
422 regular_expression test_regex; // Regex for scheduled testing
423 unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
424
425 // Configuration of email warning messages
426 std::string emailcmdline; // script to execute, empty if no messages
427 std::string emailaddress; // email address, or empty
428 emailfreqs emailfreq{}; // Send emails once, daily, diminishing
429 bool emailtest{}; // Send test email?
430
431 // ATA ONLY
432 int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
433 int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
434 int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
435 int set_lookahead{}; // disable(-1), enable(1) read look-ahead
436 int set_standby{}; // set(1..255->0..254) standby timer
437 bool set_security_freeze{}; // Freeze ATA security
438 int set_wcache{}; // disable(-1), enable(1) write cache
439 int set_dsn{}; // disable(0x2), enable(0x1) DSN
440
441 bool sct_erc_set{}; // set SCT ERC to:
442 unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
443 unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
444
445 unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
446 unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
447 bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
448 bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
449
450 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
451
453
454 // NVMe only
455 unsigned nvme_err_log_max_entries{}; // size of error log
456};
457
458// Number of allowed mail message types
459static const int SMARTD_NMAIL = 13;
460// Type for '-M test' mails (state not persistent)
461static const int MAILTYPE_TEST = 0;
462// TODO: Add const or enum for all mail types.
463
464struct mailinfo {
465 int logged{}; // number of times an email has been sent
466 time_t firstsent{}; // time first email was sent, as defined by time(2)
467 time_t lastsent{}; // time last email was sent, as defined by time(2)
468};
469
470/// Persistent state data for a device.
472{
473 unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
474
475 unsigned char selflogcount{}; // total number of self-test errors
476 unsigned short selfloghour{}; // lifetime hours of last self-test error
477
478 time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
479
480 uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
481 uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
482
483 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
484
485 // ATA ONLY
486 int ataerrorcount{}; // Total number of ATA errors
487
488 // Persistent part of ata_smart_values:
490 unsigned char id{};
491 unsigned char val{};
492 unsigned char worst{}; // Byte needed for 'raw64' attribute only.
493 uint64_t raw{};
494 unsigned char resvd{};
495 };
497
498 // SCSI ONLY
499
502 unsigned char found{};
503 };
505
508 unsigned char found{};
509 };
511
512 // NVMe only
514};
515
516/// Non-persistent state data for a device.
518{
519 bool must_write{}; // true if persistent part should be written
520
521 bool skip{}; // skip during next check cycle
522 time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
523
524 bool not_cap_offline{}; // true == not capable of offline testing
529
530 unsigned char temperature{}; // last recorded Temperature (in Celsius)
531 time_t tempmin_delay{}; // time where Min Temperature tracking will start
532
533 bool removed{}; // true if open() failed for removable device
534
535 bool powermodefail{}; // true if power mode check failed
536 int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
537 int lastpowermodeskipped{}; // the last power mode that was skipped
538
539 bool attrlog_dirty{}; // true if persistent part has new attr values that
540 // need to be written to attrlog
541
542 // SCSI ONLY
543 // TODO: change to bool
544 unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
545 unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
550 unsigned char SuppressReport{}; // minimize nuisance reports
551 unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
552 // know yet) 6 or 10
553 // ATA ONLY
554 uint64_t num_sectors{}; // Number of sectors
555 ata_smart_values smartval{}; // SMART data
557 bool offline_started{}; // true if offline data collection was started
558 bool selftest_started{}; // true if self-test was started
559};
560
561/// Runtime state data for a device.
563: public persistent_dev_state,
564 public temp_dev_state
565{
567 void update_temp_state();
568};
569
570/// Container for configuration info for each device.
571typedef std::vector<dev_config> dev_config_vector;
572
573/// Container for state info for each device.
574typedef std::vector<dev_state> dev_state_vector;
575
576// Copy ATA attributes to persistent state.
578{
579 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
582 pa.id = ta.id;
583 if (ta.id == 0) {
584 pa.val = pa.worst = 0; pa.raw = 0;
585 continue;
586 }
587 pa.val = ta.current;
588 pa.worst = ta.worst;
589 pa.raw = ta.raw[0]
590 | ( ta.raw[1] << 8)
591 | ( ta.raw[2] << 16)
592 | ((uint64_t)ta.raw[3] << 24)
593 | ((uint64_t)ta.raw[4] << 32)
594 | ((uint64_t)ta.raw[5] << 40);
595 pa.resvd = ta.reserv;
596 }
597}
598
599// Copy ATA from persistent to temp state.
601{
602 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
603 const ata_attribute & pa = ata_attributes[i];
605 ta.id = pa.id;
606 if (pa.id == 0) {
607 ta.current = ta.worst = 0;
608 memset(ta.raw, 0, sizeof(ta.raw));
609 continue;
610 }
611 ta.current = pa.val;
612 ta.worst = pa.worst;
613 ta.raw[0] = (unsigned char) pa.raw;
614 ta.raw[1] = (unsigned char)(pa.raw >> 8);
615 ta.raw[2] = (unsigned char)(pa.raw >> 16);
616 ta.raw[3] = (unsigned char)(pa.raw >> 24);
617 ta.raw[4] = (unsigned char)(pa.raw >> 32);
618 ta.raw[5] = (unsigned char)(pa.raw >> 40);
619 ta.reserv = pa.resvd;
620 }
621}
622
623// Parse a line from a state file.
624static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
625{
626 static const regular_expression regex(
627 "^ *"
628 "((temperature-min)" // (1 (2)
629 "|(temperature-max)" // (3)
630 "|(self-test-errors)" // (4)
631 "|(self-test-last-err-hour)" // (5)
632 "|(scheduled-test-next-check)" // (6)
633 "|(selective-test-last-start)" // (7)
634 "|(selective-test-last-end)" // (8)
635 "|(ata-error-count)" // (9)
636 "|(mail\\.([0-9]+)\\." // (10 (11)
637 "((count)" // (12 (13)
638 "|(first-sent-time)" // (14)
639 "|(last-sent-time)" // (15)
640 ")" // 12)
641 ")" // 10)
642 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
643 "((id)" // (18 (19)
644 "|(val)" // (20)
645 "|(worst)" // (21)
646 "|(raw)" // (22)
647 "|(resvd)" // (23)
648 ")" // 18)
649 ")" // 16)
650 "|(nvme-err-log-entries)" // (24)
651 ")" // 1)
652 " *= *([0-9]+)[ \n]*$" // (25)
653 );
654
655 const int nmatch = 1+25;
657 if (!regex.execute(line, nmatch, match))
658 return false;
659 if (match[nmatch-1].rm_so < 0)
660 return false;
661
662 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
663
664 int m = 1;
665 if (match[++m].rm_so >= 0)
666 state.tempmin = (unsigned char)val;
667 else if (match[++m].rm_so >= 0)
668 state.tempmax = (unsigned char)val;
669 else if (match[++m].rm_so >= 0)
670 state.selflogcount = (unsigned char)val;
671 else if (match[++m].rm_so >= 0)
672 state.selfloghour = (unsigned short)val;
673 else if (match[++m].rm_so >= 0)
674 state.scheduled_test_next_check = (time_t)val;
675 else if (match[++m].rm_so >= 0)
676 state.selective_test_last_start = val;
677 else if (match[++m].rm_so >= 0)
678 state.selective_test_last_end = val;
679 else if (match[++m].rm_so >= 0)
680 state.ataerrorcount = (int)val;
681 else if (match[m+=2].rm_so >= 0) {
682 int i = atoi(line+match[m].rm_so);
683 if (!(0 <= i && i < SMARTD_NMAIL))
684 return false;
685 if (i == MAILTYPE_TEST) // Don't suppress test mails
686 return true;
687 if (match[m+=2].rm_so >= 0)
688 state.maillog[i].logged = (int)val;
689 else if (match[++m].rm_so >= 0)
690 state.maillog[i].firstsent = (time_t)val;
691 else if (match[++m].rm_so >= 0)
692 state.maillog[i].lastsent = (time_t)val;
693 else
694 return false;
695 }
696 else if (match[m+=5+1].rm_so >= 0) {
697 int i = atoi(line+match[m].rm_so);
698 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
699 return false;
700 if (match[m+=2].rm_so >= 0)
701 state.ata_attributes[i].id = (unsigned char)val;
702 else if (match[++m].rm_so >= 0)
703 state.ata_attributes[i].val = (unsigned char)val;
704 else if (match[++m].rm_so >= 0)
705 state.ata_attributes[i].worst = (unsigned char)val;
706 else if (match[++m].rm_so >= 0)
707 state.ata_attributes[i].raw = val;
708 else if (match[++m].rm_so >= 0)
709 state.ata_attributes[i].resvd = (unsigned char)val;
710 else
711 return false;
712 }
713 else if (match[m+7].rm_so >= 0)
714 state.nvme_err_log_entries = val;
715 else
716 return false;
717 return true;
718}
719
720// Read a state file.
721static bool read_dev_state(const char * path, persistent_dev_state & state)
722{
723 stdio_file f(path, "r");
724 if (!f) {
725 if (errno != ENOENT)
726 pout("Cannot read state file \"%s\"\n", path);
727 return false;
728 }
729#ifdef __CYGWIN__
730 setmode(fileno(f), O_TEXT); // Allow files with \r\n
731#endif
732
733 persistent_dev_state new_state;
734 int good = 0, bad = 0;
735 char line[256];
736 while (fgets(line, sizeof(line), f)) {
737 const char * s = line + strspn(line, " \t");
738 if (!*s || *s == '#')
739 continue;
740 if (!parse_dev_state_line(line, new_state))
741 bad++;
742 else
743 good++;
744 }
745
746 if (bad) {
747 if (!good) {
748 pout("%s: format error\n", path);
749 return false;
750 }
751 pout("%s: %d invalid line(s) ignored\n", path, bad);
752 }
753
754 // This sets the values missing in the file to 0.
755 state = new_state;
756 return true;
757}
758
759static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
760{
761 if (val)
762 fprintf(f, "%s = %" PRIu64 "\n", name, val);
763}
764
765static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
766{
767 if (val)
768 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
769}
770
771// Write a state file
772static bool write_dev_state(const char * path, const persistent_dev_state & state)
773{
774 // Rename old "file" to "file~"
775 std::string pathbak = path; pathbak += '~';
776 unlink(pathbak.c_str());
777 rename(path, pathbak.c_str());
778
779 stdio_file f(path, "w");
780 if (!f) {
781 pout("Cannot create state file \"%s\"\n", path);
782 return false;
783 }
784
785 fprintf(f, "# smartd state file\n");
786 write_dev_state_line(f, "temperature-min", state.tempmin);
787 write_dev_state_line(f, "temperature-max", state.tempmax);
788 write_dev_state_line(f, "self-test-errors", state.selflogcount);
789 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
790 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
791 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
792 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
793
794 for (int i = 0; i < SMARTD_NMAIL; i++) {
795 if (i == MAILTYPE_TEST) // Don't suppress test mails
796 continue;
797 const mailinfo & mi = state.maillog[i];
798 if (!mi.logged)
799 continue;
800 write_dev_state_line(f, "mail", i, "count", mi.logged);
801 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
802 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
803 }
804
805 // ATA ONLY
806 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
807
808 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
809 const auto & pa = state.ata_attributes[i];
810 if (!pa.id)
811 continue;
812 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
813 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
814 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
815 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
816 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
817 }
818
819 // NVMe only
820 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
821
822 return true;
823}
824
825// Write to the attrlog file
826static bool write_dev_attrlog(const char * path, const dev_state & state)
827{
828 stdio_file f(path, "a");
829 if (!f) {
830 pout("Cannot create attribute log file \"%s\"\n", path);
831 return false;
832 }
833
834
835 time_t now = time(nullptr);
836 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
837 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
838 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
839 tms->tm_hour, tms->tm_min, tms->tm_sec);
840 // ATA ONLY
841 for (const auto & pa : state.ata_attributes) {
842 if (!pa.id)
843 continue;
844 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
845 }
846 // SCSI ONLY
847 const struct scsiErrorCounter * ecp;
848 const char * pageNames[3] = {"read", "write", "verify"};
849 for (int k = 0; k < 3; ++k) {
850 if ( !state.scsi_error_counters[k].found ) continue;
851 ecp = &state.scsi_error_counters[k].errCounter;
852 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
853 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
854 "\t%s-corr-by-retry;%" PRIu64 ";"
855 "\t%s-total-err-corrected;%" PRIu64 ";"
856 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
857 "\t%s-gb-processed;%.3f;"
858 "\t%s-total-unc-errors;%" PRIu64 ";",
859 pageNames[k], ecp->counter[0],
860 pageNames[k], ecp->counter[1],
861 pageNames[k], ecp->counter[2],
862 pageNames[k], ecp->counter[3],
863 pageNames[k], ecp->counter[4],
864 pageNames[k], (ecp->counter[5] / 1000000000.0),
865 pageNames[k], ecp->counter[6]);
866 }
867 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
868 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
869 }
870 // write SCSI current temperature if it is monitored
871 if (state.temperature)
872 fprintf(f, "\ttemperature;%d;", state.temperature);
873 // end of line
874 fprintf(f, "\n");
875 return true;
876}
877
878// Write all state files. If write_always is false, don't write
879// unless must_write is set.
880static void write_all_dev_states(const dev_config_vector & configs,
881 dev_state_vector & states,
882 bool write_always = true)
883{
884 for (unsigned i = 0; i < states.size(); i++) {
885 const dev_config & cfg = configs.at(i);
886 if (cfg.state_file.empty())
887 continue;
888 dev_state & state = states[i];
889 if (!write_always && !state.must_write)
890 continue;
891 if (!write_dev_state(cfg.state_file.c_str(), state))
892 continue;
893 state.must_write = false;
894 if (write_always || debugmode)
895 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
896 cfg.name.c_str(), cfg.state_file.c_str());
897 }
898}
899
900// Write to all attrlog files
901static void write_all_dev_attrlogs(const dev_config_vector & configs,
902 dev_state_vector & states)
903{
904 for (unsigned i = 0; i < states.size(); i++) {
905 const dev_config & cfg = configs.at(i);
906 if (cfg.attrlog_file.empty())
907 continue;
908 dev_state & state = states[i];
909 if (state.attrlog_dirty) {
910 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
911 state.attrlog_dirty = false;
912 }
913 }
914}
915
916extern "C" { // signal handlers require C-linkage
917
918// Note if we catch a SIGUSR1
919static void USR1handler(int sig)
920{
921 if (SIGUSR1==sig)
923 return;
924}
925
926#ifdef _WIN32
927// Note if we catch a SIGUSR2
928static void USR2handler(int sig)
929{
930 if (SIGUSR2==sig)
931 caughtsigUSR2=1;
932 return;
933}
934#endif
935
936// Note if we catch a HUP (or INT in debug mode)
937static void HUPhandler(int sig)
938{
939 if (sig==SIGHUP)
940 caughtsigHUP=1;
941 else
942 caughtsigHUP=2;
943 return;
944}
945
946// signal handler for TERM, QUIT, and INT (if not in debug mode)
947static void sighandler(int sig)
948{
949 if (!caughtsigEXIT)
950 caughtsigEXIT=sig;
951 return;
952}
953
954} // extern "C"
955
956#ifdef HAVE_LIBCAP_NG
957// capabilities(7) support
958
959static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
960
961static void capabilities_drop_now()
962{
963 if (!capabilities_mode)
964 return;
965 capng_clear(CAPNG_SELECT_BOTH);
966 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
967 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
968 if (warn_as_user && (warn_uid || warn_gid)) {
969 // For popen_as_ugid()
970 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
971 CAP_SETGID, CAP_SETUID, -1);
972 }
973 if (capabilities_mode > 1) {
974 // For exim MTA
975 capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
976 CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
977 }
978 capng_apply(CAPNG_SELECT_BOTH);
979}
980
981static void capabilities_log_error_hint()
982{
983 if (!capabilities_mode)
984 return;
985 PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
986 (capabilities_mode == 1 ? "', try '--capabilities=mail'"
987 : "=mail', please inform " PACKAGE_BUGREPORT));
988}
989
990#else // HAVE_LIBCAP_NG
991// No capabilities(7) support
992
993static inline void capabilities_drop_now() { }
994static inline void capabilities_log_error_hint() { }
995
996#endif // HAVE_LIBCAP_NG
997
998// a replacement for setenv() which is not available on all platforms.
999// Note that the string passed to putenv must not be freed or made
1000// invalid, since a pointer to it is kept by putenv(). This means that
1001// it must either be a static buffer or allocated off the heap. The
1002// string can be freed if the environment variable is redefined via
1003// another call to putenv(). There is no portable way to unset a variable
1004// with putenv(). So we manage the buffer in a static object.
1005// Using setenv() if available is not considered because some
1006// implementations may produce memory leaks.
1007
1009{
1010public:
1011 env_buffer() = default;
1012 env_buffer(const env_buffer &) = delete;
1013 void operator=(const env_buffer &) = delete;
1014
1015 void set(const char * name, const char * value);
1016private:
1017 char * m_buf = nullptr;
1018};
1019
1020void env_buffer::set(const char * name, const char * value)
1021{
1022 int size = strlen(name) + 1 + strlen(value) + 1;
1023 char * newbuf = new char[size];
1024 snprintf(newbuf, size, "%s=%s", name, value);
1025
1026 if (putenv(newbuf))
1027 throw std::runtime_error("putenv() failed");
1028
1029 // This assumes that the same NAME is passed on each call
1030 delete [] m_buf;
1031 m_buf = newbuf;
1032}
1033
1034#define EBUFLEN 1024
1035
1036static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1038
1039// If either address or executable path is non-null then send and log
1040// a warning email, or execute executable
1041static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1042{
1043 // See if user wants us to send mail
1044 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1045 return;
1046
1047 // Which type of mail are we sending?
1048 static const char * const whichfail[] = {
1049 "EmailTest", // 0
1050 "Health", // 1
1051 "Usage", // 2
1052 "SelfTest", // 3
1053 "ErrorCount", // 4
1054 "FailedHealthCheck", // 5
1055 "FailedReadSmartData", // 6
1056 "FailedReadSmartErrorLog", // 7
1057 "FailedReadSmartSelfTestLog", // 8
1058 "FailedOpenDevice", // 9
1059 "CurrentPendingSector", // 10
1060 "OfflineUncorrectableSector", // 11
1061 "Temperature" // 12
1062 };
1063 STATIC_ASSERT(sizeof(whichfail) == SMARTD_NMAIL * sizeof(whichfail[0]));
1064
1065 if (!(0 <= which && which < SMARTD_NMAIL)) {
1066 PrintOut(LOG_CRIT, "Internal error in MailWarning(): which=%d\n", which);
1067 return;
1068 }
1069 mailinfo * mail = state.maillog + which;
1070
1071 // Calc current and next interval for warning reminder emails
1072 int days, nextdays;
1073 if (which == 0)
1074 days = nextdays = -1; // EmailTest
1075 else switch (cfg.emailfreq) {
1076 case emailfreqs::once:
1077 days = nextdays = -1; break;
1078 case emailfreqs::always:
1079 days = nextdays = 0; break;
1080 case emailfreqs::daily:
1081 days = nextdays = 1; break;
1083 // 0, 1, 2, 3, 4, 5, 6, 7, ... => 1, 2, 4, 8, 16, 32, 32, 32, ...
1084 nextdays = 1 << ((unsigned)mail->logged <= 5 ? mail->logged : 5);
1085 // 0, 1, 2, 3, 4, 5, 6, 7, ... => 0, 1, 2, 4, 8, 16, 32, 32, ... (0 not used below)
1086 days = ((unsigned)mail->logged <= 5 ? nextdays >> 1 : nextdays);
1087 break;
1088 default:
1089 PrintOut(LOG_CRIT, "Internal error in MailWarning(): cfg.emailfreq=%d\n", (int)cfg.emailfreq);
1090 return;
1091 }
1092
1093 time_t now = time(nullptr);
1094 if (mail->logged) {
1095 // Return if no warning reminder email needs to be sent (now)
1096 if (days < 0)
1097 return; // '-M once' or EmailTest
1098 if (days > 0 && now < mail->lastsent + days * 24 * 3600)
1099 return; // '-M daily/diminishing' and too early
1100 }
1101 else {
1102 // Record the time of this first email message
1103 mail->firstsent = now;
1104 }
1105
1106 // Record the time of this email message
1107 mail->lastsent = now;
1108
1109 // print warning string into message
1110 // Note: Message length may reach ~300 characters as device names may be
1111 // very long on certain platforms (macOS ~230 characters).
1112 // Message length must not exceed email line length limit, see RFC 5322:
1113 // "... MUST be no more than 998 characters, ... excluding the CRLF."
1114 char message[512];
1115 va_list ap;
1116 va_start(ap, fmt);
1117 vsnprintf(message, sizeof(message), fmt, ap);
1118 va_end(ap);
1119
1120 // replace commas by spaces to separate recipients
1121 std::string address = cfg.emailaddress;
1122 std::replace(address.begin(), address.end(), ',', ' ');
1123
1124 // Export information in environment variables that will be useful
1125 // for user scripts
1126 const char * executable = cfg.emailcmdline.c_str();
1127 static env_buffer env[13];
1128 env[0].set("SMARTD_MAILER", executable);
1129 env[1].set("SMARTD_MESSAGE", message);
1130 char dates[DATEANDEPOCHLEN];
1131 snprintf(dates, sizeof(dates), "%d", mail->logged);
1132 env[2].set("SMARTD_PREVCNT", dates);
1133 dateandtimezoneepoch(dates, mail->firstsent);
1134 env[3].set("SMARTD_TFIRST", dates);
1135 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1136 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1137 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1138 env[6].set("SMARTD_ADDRESS", address.c_str());
1139 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1140
1141 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1142 env[8].set("SMARTD_DEVICETYPE",
1143 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1144 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1145
1146 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1147 dates[0] = 0;
1148 if (nextdays >= 0)
1149 snprintf(dates, sizeof(dates), "%d", nextdays);
1150 env[11].set("SMARTD_NEXTDAYS", dates);
1151 // Avoid false positive recursion detection by smartd_warning.{sh,cmd}
1152 env[12].set("SMARTD_SUBJECT", "");
1153
1154 // now construct a command to send this as EMAIL
1155 if (!*executable)
1156 executable = "<mail>";
1157 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1158 const char * newwarn = (which? "Warning via" : "Test of");
1159
1160 char command[256];
1161#ifdef _WIN32
1162 // Path may contain spaces
1163 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1164#else
1165 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1166#endif
1167
1168 // tell SYSLOG what we are about to do...
1169 PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
1170 (which ? "Sending warning via" : "Executing test of"), executable, newadd,
1171 (
1172#ifdef HAVE_POSIX_API
1173 warn_as_user ?
1174 strprintf(" (uid=%u(%s) gid=%u(%s))",
1175 (unsigned)warn_uid, warn_uname.c_str(),
1176 (unsigned)warn_gid, warn_gname.c_str() ).c_str() :
1177#elif defined(_WIN32)
1178 warn_as_restr_user ? " (restricted user)" :
1179#endif
1180 ""
1181 )
1182 );
1183
1184 // issue the command to send mail or to run the user's executable
1185 errno=0;
1186 FILE * pfp;
1187
1188#ifdef HAVE_POSIX_API
1189 if (warn_as_user) {
1190 pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
1191 } else
1192#endif
1193 {
1194#ifdef _WIN32
1195 pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
1196#else
1197 pfp = popen(command, "r");
1198#endif
1199 }
1200
1201 if (!pfp)
1202 // failed to popen() mail process
1203 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1204 newwarn, executable, newadd, errno?strerror(errno):"");
1205 else {
1206 // pipe succeeded!
1207 int len;
1208 char buffer[EBUFLEN];
1209
1210 // if unexpected output on stdout/stderr, null terminate, print, and flush
1211 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1212 int count=0;
1213 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1214 buffer[newlen]='\0';
1215 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1216 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1217
1218 // flush pipe if needed
1219 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1220 count++;
1221
1222 // tell user that pipe was flushed, or that something is really wrong
1223 if (count && count<EBUFLEN)
1224 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1225 newwarn, executable, newadd);
1226 else if (count)
1227 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1228 newwarn, executable, newadd);
1229 }
1230
1231 // if something went wrong with mail process, print warning
1232 errno=0;
1233 int status;
1234
1235#ifdef HAVE_POSIX_API
1236 if (warn_as_user) {
1237 status = pclose_as_ugid(pfp);
1238 } else
1239#endif
1240 {
1241 status = pclose(pfp);
1242 }
1243
1244 if (status == -1)
1245 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1246 errno?strerror(errno):"");
1247 else {
1248 // mail process apparently succeeded. Check and report exit status
1249 if (WIFEXITED(status)) {
1250 // exited 'normally' (but perhaps with nonzero status)
1251 int status8 = WEXITSTATUS(status);
1252 if (status8>128)
1253 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1254 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1255 else if (status8) {
1256 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1257 newwarn, executable, newadd, status, status8);
1259 }
1260 else
1261 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1262 }
1263
1264 if (WIFSIGNALED(status))
1265 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1266 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1267
1268 // this branch is probably not possible. If subprocess is
1269 // stopped then pclose() should not return.
1270 if (WIFSTOPPED(status))
1271 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1272 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1273
1274 }
1275 }
1276
1277 // increment mail sent counter
1278 mail->logged++;
1279}
1280
1281static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1283
1284static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1285{
1286 if (!(0 <= which && which < SMARTD_NMAIL))
1287 return;
1288
1289 // Return if no mail sent yet
1290 mailinfo & mi = state.maillog[which];
1291 if (!mi.logged)
1292 return;
1293
1294 // Format & print message
1295 char msg[256];
1296 va_list ap;
1297 va_start(ap, fmt);
1298 vsnprintf(msg, sizeof(msg), fmt, ap);
1299 va_end(ap);
1300
1301 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1302 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1303
1304 // Clear mail counter and timestamps
1305 mi = mailinfo();
1306 state.must_write = true;
1307}
1308
1309#ifndef _WIN32
1310
1311// Output multiple lines via separate syslog(3) calls.
1313static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1314{
1315 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1316 vsnprintf(buf, sizeof(buf), fmt, ap);
1317
1318 for (char * p = buf, * q; p && *p; p = q) {
1319 if ((q = strchr(p, '\n')))
1320 *q++ = 0;
1321 if (*p)
1322 syslog(priority, "%s\n", p);
1323 }
1324}
1325
1326#else // _WIN32
1327// os_win32/syslog_win32.cpp supports multiple lines.
1328#define vsyslog_lines vsyslog
1329#endif // _WIN32
1330
1331// Printing function for watching ataprint commands, or losing them
1332// [From GLIBC Manual: Since the prototype doesn't specify types for
1333// optional arguments, in a call to a variadic function the default
1334// argument promotions are performed on the optional argument
1335// values. This means the objects of type char or short int (whether
1336// signed or not) are promoted to either int or unsigned int, as
1337// appropriate.]
1338void pout(const char *fmt, ...){
1339 va_list ap;
1340
1341 // get the correct time in syslog()
1343 // initialize variable argument list
1344 va_start(ap,fmt);
1345 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1346 if (debugmode && debugmode != 2) {
1347 FILE * f = stdout;
1348#ifdef _WIN32
1349 if (facility == LOG_LOCAL1) // logging to stdout
1350 f = stderr;
1351#endif
1352 vfprintf(f, fmt, ap);
1353 fflush(f);
1354 }
1355 // in debugmode==2 mode we print output from knowndrives.o functions
1356 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1357 openlog("smartd", LOG_PID, facility);
1358 vsyslog_lines(LOG_INFO, fmt, ap);
1359 closelog();
1360 }
1361 va_end(ap);
1362 return;
1363}
1364
1365// This function prints either to stdout or to the syslog as needed.
1366static void PrintOut(int priority, const char *fmt, ...){
1367 va_list ap;
1368
1369 // get the correct time in syslog()
1371 // initialize variable argument list
1372 va_start(ap,fmt);
1373 if (debugmode) {
1374 FILE * f = stdout;
1375#ifdef _WIN32
1376 if (facility == LOG_LOCAL1) // logging to stdout
1377 f = stderr;
1378#endif
1379 vfprintf(f, fmt, ap);
1380 fflush(f);
1381 }
1382 else {
1383 openlog("smartd", LOG_PID, facility);
1384 vsyslog_lines(priority, fmt, ap);
1385 closelog();
1386 }
1387 va_end(ap);
1388 return;
1389}
1390
1391// Used to warn users about invalid checksums. Called from atacmds.cpp.
1392void checksumwarning(const char * string)
1393{
1394 pout("Warning! %s error: invalid SMART checksum.\n", string);
1395}
1396
1397#ifndef _WIN32
1398
1399// Wait for the pid file to show up, this makes sure a calling program knows
1400// that the daemon is really up and running and has a pid to kill it
1401static bool WaitForPidFile()
1402{
1403 int waited, max_wait = 10;
1404 struct stat stat_buf;
1405
1406 if (pid_file.empty() || debugmode)
1407 return true;
1408
1409 for(waited = 0; waited < max_wait; ++waited) {
1410 if (!stat(pid_file.c_str(), &stat_buf)) {
1411 return true;
1412 } else
1413 sleep(1);
1414 }
1415 return false;
1416}
1417
1418#endif // _WIN32
1419
1420// Forks new process if needed, closes ALL file descriptors,
1421// redirects stdin, stdout, and stderr. Not quite daemon().
1422// See https://www.linuxjournal.com/article/2335
1423// for a good description of why we do things this way.
1424static int daemon_init()
1425{
1426#ifndef _WIN32
1427
1428 // flush all buffered streams. Else we might get two copies of open
1429 // streams since both parent and child get copies of the buffers.
1430 fflush(nullptr);
1431
1432 if (do_fork) {
1433 pid_t pid;
1434 if ((pid=fork()) < 0) {
1435 // unable to fork!
1436 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1437 return EXIT_STARTUP;
1438 }
1439 if (pid) {
1440 // we are the parent process, wait for pid file, then exit cleanly
1441 if(!WaitForPidFile()) {
1442 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1443 return EXIT_STARTUP;
1444 }
1445 return 0;
1446 }
1447
1448 // from here on, we are the child process.
1449 setsid();
1450
1451 // Fork one more time to avoid any possibility of having terminals
1452 if ((pid=fork()) < 0) {
1453 // unable to fork!
1454 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1455 return EXIT_STARTUP;
1456 }
1457 if (pid)
1458 // we are the parent process -- exit cleanly
1459 return 0;
1460
1461 // Now we are the child's child...
1462 }
1463
1464 // close any open file descriptors
1465 int open_max = sysconf(_SC_OPEN_MAX);
1466#ifdef HAVE_CLOSE_RANGE
1467 if (close_range(0, open_max - 1, 0))
1468#endif
1469 {
1470 // Limit number of unneeded close() calls under the assumption that
1471 // there are no large gaps between open FDs
1472 for (int i = 0, failed = 0; i < open_max && failed < 1024; i++)
1473 failed = (!close(i) ? 0 : failed + 1);
1474 }
1475
1476 // redirect any IO attempts to /dev/null and change to root directory
1477 int fd = open("/dev/null", O_RDWR);
1478 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1479 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1480 return EXIT_STARTUP;
1481 }
1482 umask(0022);
1483
1484 if (do_fork)
1485 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1486
1487#else // _WIN32
1488
1489 // No fork() on native Win32
1490 // Detach this process from console
1491 fflush(nullptr);
1492 if (daemon_detach("smartd")) {
1493 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1494 return EXIT_STARTUP;
1495 }
1496 // stdin/out/err now closed if not redirected
1497
1498#endif // _WIN32
1499
1500 // No error, continue in main_worker()
1501 return -1;
1502}
1503
1504// create a PID file containing the current process id
1505static bool write_pid_file()
1506{
1507 if (!pid_file.empty()) {
1508 pid_t pid = getpid();
1509 mode_t old_umask;
1510#ifndef __CYGWIN__
1511 old_umask = umask(0077); // rwx------
1512#else
1513 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1514 old_umask = umask(0033); // rwxr--r--
1515#endif
1516
1517 stdio_file f(pid_file.c_str(), "w");
1518 umask(old_umask);
1519 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1520 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1521 return false;
1522 }
1523 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1524 }
1525 return true;
1526}
1527
1528// Prints header identifying version of code and home
1529static void PrintHead()
1530{
1531 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1532}
1533
1534// prints help info for configuration file Directives
1535static void Directives()
1536{
1537 PrintOut(LOG_INFO,
1538 "Configuration file (%s) Directives (after device name):\n"
1539 " -d TYPE Set the device type: auto, ignore, removable,\n"
1540 " %s\n"
1541 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1542 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1543 " -S VAL Enable/disable attribute autosave (on/off)\n"
1544 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1545 " -H Monitor SMART Health Status, report if failed\n"
1546 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1547 " -l TYPE Monitor SMART log or self-test status:\n"
1548 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1549 " -l scterc,R,W Set SCT Error Recovery Control\n"
1550 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1551 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1552 " -f Monitor 'Usage' Attributes, report failures\n"
1553 " -m ADD Send email warning to address ADD\n"
1554 " -M TYPE Modify email warning behavior (see man page)\n"
1555 " -p Report changes in 'Prefailure' Attributes\n"
1556 " -u Report changes in 'Usage' Attributes\n"
1557 " -t Equivalent to -p and -u Directives\n"
1558 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1559 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1560 " -i ID Ignore Attribute ID for -f Directive\n"
1561 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1562 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1563 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1564 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1565 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1566 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1567 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1568 " -F TYPE Use firmware bug workaround:\n"
1569 " %s\n"
1570 " -c i=N Set interval between disk checks to N seconds\n"
1571 " # Comment: text after a hash sign is ignored\n"
1572 " \\ Line continuation character\n"
1573 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1574 "Use ID = 0 to turn off -C and/or -U Directives\n"
1575 "Example: /dev/sda -a\n",
1576 configfile,
1577 smi()->get_valid_dev_types_str().c_str(),
1579}
1580
1581/* Returns a pointer to a static string containing a formatted list of the valid
1582 arguments to the option opt or nullptr on failure. */
1583static const char *GetValidArgList(char opt)
1584{
1585 switch (opt) {
1586 case 'A':
1587 case 's':
1588 return "<PATH_PREFIX>, -";
1589 case 'B':
1590 return "[+]<FILE_NAME>";
1591 case 'c':
1592 return "<FILE_NAME>, -";
1593 case 'l':
1594 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1595 case 'q':
1596 return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
1597 case 'r':
1598 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1599 case 'p':
1600 case 'w':
1601 return "<FILE_NAME>";
1602 case 'i':
1603 return "<INTEGER_SECONDS>";
1604#ifdef HAVE_POSIX_API
1605 case 'u':
1606 return "<USER>[:<GROUP>], -";
1607#elif defined(_WIN32)
1608 case 'u':
1609 return "restricted, unchanged";
1610#endif
1611#ifdef HAVE_LIBCAP_NG
1612 case 'C':
1613 return "mail, <no_argument>";
1614#endif
1615 default:
1616 return nullptr;
1617 }
1618}
1619
1620/* prints help information for command syntax */
1621static void Usage()
1622{
1623 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1624#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1625 PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
1626#else
1627 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1628#endif
1629 PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
1630#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1631 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
1632#endif
1633 PrintOut(LOG_INFO,"\n");
1634 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1635 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1636 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1637#ifdef SMARTMONTOOLS_DRIVEDBDIR
1638 PrintOut(LOG_INFO,"\n");
1639 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1640#endif
1641 PrintOut(LOG_INFO,"]\n\n");
1642 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1643 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1644 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1645#ifdef HAVE_LIBCAP_NG
1646 PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
1647 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1648 " Warning: Mail notification may not work when used.\n\n");
1649#endif
1650 PrintOut(LOG_INFO," -d, --debug\n");
1651 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1652 PrintOut(LOG_INFO," -D, --showdirectives\n");
1653 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1654 PrintOut(LOG_INFO," -h, --help, --usage\n");
1655 PrintOut(LOG_INFO," Display this help and exit\n\n");
1656 PrintOut(LOG_INFO," -i N, --interval=N\n");
1657 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1658 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1659#ifndef _WIN32
1660 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1661#else
1662 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1663#endif
1664#ifndef _WIN32
1665 PrintOut(LOG_INFO," -n, --no-fork\n");
1666 PrintOut(LOG_INFO," Do not fork into background\n");
1667#ifdef HAVE_LIBSYSTEMD
1668 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1669#endif // HAVE_LIBSYSTEMD
1670 PrintOut(LOG_INFO,"\n");
1671#endif // WIN32
1672 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1673 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1674 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1675 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1676 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1677 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1678#ifdef SMARTMONTOOLS_SAVESTATES
1679 PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
1680#else
1681 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1682#endif
1683 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1684#ifdef SMARTMONTOOLS_SAVESTATES
1685 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1686#endif
1687 PrintOut(LOG_INFO,"\n");
1688 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1689 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1690#ifndef _WIN32
1691 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1692#else
1693 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1694#endif
1695#ifdef HAVE_POSIX_API
1696 PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
1697 PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
1698#elif defined(_WIN32)
1699 PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
1700 PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
1701#endif
1702#ifdef _WIN32
1703 PrintOut(LOG_INFO," --service\n");
1704 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1705 PrintOut(LOG_INFO," smartd install [options]\n");
1706 PrintOut(LOG_INFO," Remove service with:\n");
1707 PrintOut(LOG_INFO," smartd remove\n\n");
1708#endif // _WIN32
1709 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1710 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1711}
1712
1713static int CloseDevice(smart_device * device, const char * name)
1714{
1715 if (!device->close()){
1716 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1717 return 1;
1718 }
1719 // device successfully closed
1720 return 0;
1721}
1722
1723// Replace invalid characters in cfg.dev_idinfo
1724static bool sanitize_dev_idinfo(std::string & s)
1725{
1726 bool changed = false;
1727 for (unsigned i = 0; i < s.size(); i++) {
1728 char c = s[i];
1729 STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
1730 // Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
1731 if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
1732 continue;
1733 s[i] = '?';
1734 changed = true;
1735 }
1736 return changed;
1737}
1738
1739// return true if a char is not allowed in a state file name
1740static bool not_allowed_in_filename(char c)
1741{
1742 return !( ('0' <= c && c <= '9')
1743 || ('A' <= c && c <= 'Z')
1744 || ('a' <= c && c <= 'z'));
1745}
1746
1747// Read error count from Summary or Extended Comprehensive SMART error log
1748// Return -1 on error
1749static int read_ata_error_count(ata_device * device, const char * name,
1750 firmwarebug_defs firmwarebugs, bool extended)
1751{
1752 if (!extended) {
1754 if (ataReadErrorLog(device, &log, firmwarebugs)){
1755 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1756 return -1;
1757 }
1758 return (log.error_log_pointer ? log.ata_error_count : 0);
1759 }
1760 else {
1762 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1763 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1764 return -1;
1765 }
1766 // Some disks use the reserved byte as index, see ataprint.cpp.
1767 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1768 }
1769}
1770
1771// returns <0 if problem. Otherwise, bottom 8 bits are the self test
1772// error count, and top bits are the power-on hours of the last error.
1773static int SelfTestErrorCount(ata_device * device, const char * name,
1774 firmwarebug_defs firmwarebugs)
1775{
1776 struct ata_smart_selftestlog log;
1777
1778 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1779 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1780 return -1;
1781 }
1782
1783 if (!log.mostrecenttest)
1784 // No tests logged
1785 return 0;
1786
1787 // Count failed self-tests
1788 int errcnt = 0, hours = 0;
1789 for (int i = 20; i >= 0; i--) {
1790 int j = (i + log.mostrecenttest) % 21;
1792 if (!nonempty(&entry, sizeof(entry)))
1793 continue;
1794
1795 int status = entry.selfteststatus >> 4;
1796 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1797 // First successful extended self-test, stop count
1798 break;
1799
1800 if (0x3 <= status && status <= 0x8) {
1801 // Self-test showed an error
1802 errcnt++;
1803 // Keep track of time of most recent error
1804 if (!hours)
1805 hours = entry.timestamp;
1806 }
1807 }
1808
1809 return ((hours << 8) | errcnt);
1810}
1811
1812#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1813#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1814
1815// Check offline data collection status
1816static inline bool is_offl_coll_in_progress(unsigned char status)
1817{
1818 return ((status & 0x7f) == 0x03);
1819}
1820
1821// Check self-test execution status
1822static inline bool is_self_test_in_progress(unsigned char status)
1823{
1824 return ((status >> 4) == 0xf);
1825}
1826
1827// Log offline data collection status
1828static void log_offline_data_coll_status(const char * name, unsigned char status)
1829{
1830 const char * msg;
1831 switch (status & 0x7f) {
1832 case 0x00: msg = "was never started"; break;
1833 case 0x02: msg = "was completed without error"; break;
1834 case 0x03: msg = "is in progress"; break;
1835 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1836 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1837 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1838 default: msg = nullptr;
1839 }
1840
1841 if (msg)
1842 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1843 "Device: %s, offline data collection %s%s\n", name, msg,
1844 ((status & 0x80) ? " (auto:on)" : ""));
1845 else
1846 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1847 name, status);
1848}
1849
1850// Log self-test execution status
1851static void log_self_test_exec_status(const char * name, unsigned char status)
1852{
1853 const char * msg;
1854 switch (status >> 4) {
1855 case 0x0: msg = "completed without error"; break;
1856 case 0x1: msg = "was aborted by the host"; break;
1857 case 0x2: msg = "was interrupted by the host with a reset"; break;
1858 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1859 case 0x4: msg = "completed with error (unknown test element)"; break;
1860 case 0x5: msg = "completed with error (electrical test element)"; break;
1861 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1862 case 0x7: msg = "completed with error (read test element)"; break;
1863 case 0x8: msg = "completed with error (handling damage?)"; break;
1864 default: msg = nullptr;
1865 }
1866
1867 if (msg)
1868 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1869 "Device: %s, previous self-test %s\n", name, msg);
1870 else if ((status >> 4) == 0xf)
1871 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1872 name, status & 0x0f);
1873 else
1874 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1875 name, status);
1876}
1877
1878// Check pending sector count id (-C, -U directives).
1879static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1880 unsigned char id, const char * msg)
1881{
1882 // Check attribute index
1883 int i = ata_find_attr_index(id, state.smartval);
1884 if (i < 0) {
1885 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1886 cfg.name.c_str(), msg, id);
1887 return false;
1888 }
1889
1890 // Check value
1891 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1892 cfg.attribute_defs);
1893 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1894 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1895 cfg.name.c_str(), msg, id, rawval, rawval);
1896 return false;
1897 }
1898
1899 return true;
1900}
1901
1902// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1903static void finish_device_scan(dev_config & cfg, dev_state & state)
1904{
1905 // Set cfg.emailfreq if user hasn't set it
1906 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && cfg.emailfreq == emailfreqs::unknown) {
1907 // Avoid that emails are suppressed forever due to state persistence
1908 if (cfg.state_file.empty())
1910 else
1912 }
1913
1914 // Start self-test regex check now if time was not read from state file
1915 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1916 state.scheduled_test_next_check = time(nullptr);
1917}
1918
1919// Common function to format result message for ATA setting
1920static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1921 int set_option = 0, bool has_value = false)
1922{
1923 if (!msg.empty())
1924 msg += ", ";
1925 msg += name;
1926 if (!ok)
1927 msg += ":--";
1928 else if (set_option < 0)
1929 msg += ":off";
1930 else if (has_value)
1931 msg += strprintf(":%d", set_option-1);
1932 else if (set_option > 0)
1933 msg += ":on";
1934}
1935
1936// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1937static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1938{
1939 if (!cfg.id_is_unique)
1940 return false;
1941
1942 for (const auto & prev_cfg : prev_cfgs) {
1943 if (!prev_cfg.id_is_unique)
1944 continue;
1945 if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
1946 continue;
1947
1948 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1949 cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
1950 return true;
1951 }
1952
1953 return false;
1954}
1955
1956// TODO: Add '-F swapid' directive
1957const bool fix_swapped_id = false;
1958
1959// scan to see what ata devices there are, and if they support SMART
1960static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1961 const dev_config_vector * prev_cfgs)
1962{
1963 int supported=0;
1964 struct ata_identify_device drive;
1965 const char *name = cfg.name.c_str();
1966 int retid;
1967
1968 // Device must be open
1969
1970 // Get drive identity structure
1971 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1972 if (retid<0)
1973 // Unable to read Identity structure
1974 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1975 else
1976 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1977 name, packetdevicetype(retid-1));
1978 CloseDevice(atadev, name);
1979 return 2;
1980 }
1981
1982 // Get drive identity, size and rotation rate (HDD/SSD)
1983 char model[40+1], serial[20+1], firmware[8+1];
1984 ata_format_id_string(model, drive.model, sizeof(model)-1);
1985 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1986 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1987
1988 ata_size_info sizes;
1989 ata_get_size_info(&drive, sizes);
1990 state.num_sectors = sizes.sectors;
1991 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1992
1993 char wwn[64]; wwn[0] = 0;
1994 unsigned oui = 0; uint64_t unique_id = 0;
1995 int naa = ata_get_wwn(&drive, oui, unique_id);
1996 if (naa >= 0)
1997 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1998
1999 // Format device id string for warning emails
2000 char cap[32];
2001 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
2002 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
2003 cfg.id_is_unique = true; // TODO: Check serial?
2005 cfg.id_is_unique = false;
2006
2007 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2008
2009 // Check for duplicates
2010 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2011 CloseDevice(atadev, name);
2012 return 1;
2013 }
2014
2015 // Show if device in database, and use preset vendor attribute
2016 // options unless user has requested otherwise.
2017 if (cfg.ignorepresets)
2018 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
2019 else {
2020 // Apply vendor specific presets, print warning if present
2021 std::string dbversion;
2023 &drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
2024 if (!dbentry)
2025 PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
2026 (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
2027 else {
2028 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
2029 name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
2030 (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
2031 if (*dbentry->warningmsg)
2032 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
2033 }
2034 }
2035
2036 // Check for ATA Security LOCK
2037 unsigned short word128 = drive.words088_255[128-88];
2038 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
2039 if (locked)
2040 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
2041
2042 // Set default '-C 197[+]' if no '-C ID' is specified.
2043 if (!cfg.curr_pending_set)
2045 // Set default '-U 198[+]' if no '-U ID' is specified.
2046 if (!cfg.offl_pending_set)
2048
2049 // If requested, show which presets would be used for this drive
2050 if (cfg.showpresets) {
2051 int savedebugmode=debugmode;
2052 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2053 if (!debugmode)
2054 debugmode=2;
2055 show_presets(&drive);
2056 debugmode=savedebugmode;
2057 }
2058
2059 // see if drive supports SMART
2060 supported=ataSmartSupport(&drive);
2061 if (supported!=1) {
2062 if (supported==0)
2063 // drive does NOT support SMART
2064 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2065 else
2066 // can't tell if drive supports SMART
2067 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2068
2069 // should we proceed anyway?
2070 if (cfg.permissive) {
2071 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2072 }
2073 else {
2074 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2075 CloseDevice(atadev, name);
2076 return 2;
2077 }
2078 }
2079
2080 if (ataEnableSmart(atadev)) {
2081 // Enable SMART command has failed
2082 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2083
2084 if (ataIsSmartEnabled(&drive) <= 0) {
2085 if (!cfg.permissive) {
2086 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2087 CloseDevice(atadev, name);
2088 return 2;
2089 }
2090 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2091 }
2092 else {
2093 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2094 }
2095 }
2096
2097 // disable device attribute autosave...
2098 if (cfg.autosave==1) {
2099 if (ataDisableAutoSave(atadev))
2100 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2101 else
2102 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2103 }
2104
2105 // or enable device attribute autosave
2106 if (cfg.autosave==2) {
2107 if (ataEnableAutoSave(atadev))
2108 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2109 else
2110 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2111 }
2112
2113 // capability check: SMART status
2114 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2115 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2116 cfg.smartcheck = false;
2117 }
2118
2119 // capability check: Read smart values and thresholds. Note that
2120 // smart values are ALSO needed even if we ONLY want to know if the
2121 // device is self-test log or error-log capable! After ATA-5, this
2122 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2123 // but sadly not for ATA-5. Sigh.
2124
2125 // do we need to get SMART data?
2126 bool smart_val_ok = false;
2127 if ( cfg.autoofflinetest || cfg.selftest
2128 || cfg.errorlog || cfg.xerrorlog
2129 || cfg.offlinests || cfg.selfteststs
2130 || cfg.usagefailed || cfg.prefail || cfg.usage
2131 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2132 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2133
2134 if (ataReadSmartValues(atadev, &state.smartval)) {
2135 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2136 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2137 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2138 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2139 }
2140 else {
2141 smart_val_ok = true;
2142 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2143 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2144 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2145 cfg.usagefailed = false;
2146 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2147 memset(&state.smartthres, 0, sizeof(state.smartthres));
2148 }
2149 }
2150
2151 // see if the necessary Attribute is there to monitor offline or
2152 // current pending sectors or temperature
2153 if ( cfg.curr_pending_id
2154 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2155 "Current_Pending_Sector"))
2156 cfg.curr_pending_id = 0;
2157
2158 if ( cfg.offl_pending_id
2159 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2160 "Offline_Uncorrectable"))
2161 cfg.offl_pending_id = 0;
2162
2163 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2165 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2166 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2167 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2168 }
2169
2170 // Report ignored '-r' or '-R' directives
2171 for (int id = 1; id <= 255; id++) {
2173 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2174 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2175 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2176
2177 int idx = ata_find_attr_index(id, state.smartval);
2178 if (idx < 0)
2179 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2180 else {
2181 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2182 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2183 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2184 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2185 }
2186 }
2187 }
2188 }
2189
2190 // enable/disable automatic on-line testing
2191 if (cfg.autoofflinetest) {
2192 // is this an enable or disable request?
2193 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2194 if (!smart_val_ok)
2195 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2196 else {
2197 // if command appears unsupported, issue a warning...
2198 if (!isSupportAutomaticTimer(&state.smartval))
2199 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2200 // ... but then try anyway
2201 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2202 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2203 else
2204 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2205 }
2206 }
2207
2208 // Read log directories if required for capability check
2209 ata_smart_log_directory smart_logdir, gp_logdir;
2210 bool smart_logdir_ok = false, gp_logdir_ok = false;
2211
2213 && (cfg.errorlog || cfg.selftest)
2214 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2215 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2216 smart_logdir_ok = true;
2217 }
2218
2219 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2220 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2221 gp_logdir_ok = true;
2222 }
2223
2224 // capability check: self-test-log
2225 state.selflogcount = 0; state.selfloghour = 0;
2226 if (cfg.selftest) {
2227 int retval;
2228 if (!( cfg.permissive
2229 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2230 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2231 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2232 cfg.selftest = false;
2233 }
2234 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2235 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2236 cfg.selftest = false;
2237 }
2238 else {
2239 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2240 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2241 }
2242 }
2243
2244 // capability check: ATA error log
2245 state.ataerrorcount = 0;
2246 if (cfg.errorlog) {
2247 int errcnt1;
2248 if (!( cfg.permissive
2249 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2250 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2251 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2252 cfg.errorlog = false;
2253 }
2254 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2255 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2256 cfg.errorlog = false;
2257 }
2258 else
2259 state.ataerrorcount = errcnt1;
2260 }
2261
2262 if (cfg.xerrorlog) {
2263 int errcnt2;
2264 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2265 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2266 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2267 name);
2268 cfg.xerrorlog = false;
2269 }
2270 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2271 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2272 cfg.xerrorlog = false;
2273 }
2274 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2275 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2276 name, state.ataerrorcount, errcnt2);
2277 // Record max error count
2278 if (errcnt2 > state.ataerrorcount)
2279 state.ataerrorcount = errcnt2;
2280 }
2281 else
2282 state.ataerrorcount = errcnt2;
2283 }
2284
2285 // capability check: self-test and offline data collection status
2286 if (cfg.offlinests || cfg.selfteststs) {
2287 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2288 if (cfg.offlinests)
2289 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2290 if (cfg.selfteststs)
2291 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2292 cfg.offlinests = cfg.selfteststs = false;
2293 }
2294 }
2295
2296 // capabilities check -- does it support powermode?
2297 if (cfg.powermode) {
2298 int powermode = ataCheckPowerMode(atadev);
2299
2300 if (-1 == powermode) {
2301 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2302 cfg.powermode=0;
2303 }
2304 else if (powermode!=0x00 && powermode!=0x01
2305 && powermode!=0x40 && powermode!=0x41
2306 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2307 && powermode!=0xff) {
2308 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2309 name, powermode);
2310 cfg.powermode=0;
2311 }
2312 }
2313
2314 // Apply ATA settings
2315 std::string msg;
2316
2317 if (cfg.set_aam)
2318 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2319 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2320 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2321
2322 if (cfg.set_apm)
2323 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2324 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2325 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2326
2327 if (cfg.set_lookahead)
2328 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2330 cfg.set_lookahead);
2331
2332 if (cfg.set_wcache)
2333 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2335
2336 if (cfg.set_dsn)
2337 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2338 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2339
2340 if (cfg.set_security_freeze)
2341 format_set_result_msg(msg, "Security freeze",
2343
2344 if (cfg.set_standby)
2345 format_set_result_msg(msg, "Standby",
2346 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2347
2348 // Report as one log entry
2349 if (!msg.empty())
2350 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2351
2352 // set SCT Error Recovery Control if requested
2353 if (cfg.sct_erc_set) {
2355 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2356 name);
2357 else if (locked)
2358 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2359 name);
2360 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
2361 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
2362 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2363 else
2364 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2365 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2366 }
2367
2368 // If no tests available or selected, return
2369 if (!( cfg.smartcheck || cfg.selftest
2370 || cfg.errorlog || cfg.xerrorlog
2371 || cfg.offlinests || cfg.selfteststs
2372 || cfg.usagefailed || cfg.prefail || cfg.usage
2373 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2374 CloseDevice(atadev, name);
2375 return 3;
2376 }
2377
2378 // tell user we are registering device
2379 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2380
2381 // close file descriptor
2382 CloseDevice(atadev, name);
2383
2384 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2385 // Build file name for state file
2386 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2387 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2388 if (!state_path_prefix.empty()) {
2389 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2390 // Read previous state
2391 if (read_dev_state(cfg.state_file.c_str(), state)) {
2392 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2393 // Copy ATA attribute values to temp state
2394 state.update_temp_state();
2395 }
2396 }
2397 if (!attrlog_path_prefix.empty())
2398 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2399 }
2400
2401 finish_device_scan(cfg, state);
2402
2403 return 0;
2404}
2405
2406// on success, return 0. On failure, return >0. Never return <0,
2407// please.
2408static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2409 const dev_config_vector * prev_cfgs)
2410{
2411 int err, req_len, avail_len, version, len;
2412 const char *device = cfg.name.c_str();
2413 struct scsi_iec_mode_page iec;
2414 uint8_t tBuf[64];
2415 uint8_t inqBuf[96];
2416 uint8_t vpdBuf[252];
2417 char lu_id[64], serial[256], vendor[40], model[40];
2418
2419 // Device must be open
2420 memset(inqBuf, 0, 96);
2421 req_len = 36;
2422 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2423 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2424 req_len = 64;
2425 int err64;
2426 if ((err64 = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2427 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2428 "skip device [err=%d, %d]\n", device, err, err64);
2429 return 2;
2430 }
2431 }
2432 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2433
2434 avail_len = inqBuf[4] + 5;
2435 len = (avail_len < req_len) ? avail_len : req_len;
2436 if (len < 36) {
2437 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2438 "skip device\n", device);
2439 return 2;
2440 }
2441
2442 int pdt = inqBuf[0] & 0x1f;
2443
2444 switch (pdt) {
2446 case SCSI_PT_WO:
2447 case SCSI_PT_CDROM:
2448 case SCSI_PT_OPTICAL:
2449 case SCSI_PT_RBC: /* Reduced Block commands */
2450 case SCSI_PT_HOST_MANAGED: /* Zoned disk */
2451 break;
2452 default:
2453 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2454 "skip\n", device, pdt);
2455 return 2;
2456 }
2457
2459 delete supported_vpd_pages_p;
2460 supported_vpd_pages_p = nullptr;
2461 }
2463
2464 lu_id[0] = '\0';
2465 if (version >= 0x3) {
2466 /* SPC to SPC-5, assume SPC-6 is version==8 or higher */
2468 vpdBuf, sizeof(vpdBuf))) {
2469 len = vpdBuf[3];
2470 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
2471 }
2472 }
2473 serial[0] = '\0';
2475 vpdBuf, sizeof(vpdBuf))) {
2476 len = vpdBuf[3];
2477 vpdBuf[4 + len] = '\0';
2478 scsi_format_id_string(serial, &vpdBuf[4], len);
2479 }
2480
2481 char si_str[64];
2482 struct scsi_readcap_resp srr;
2483 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2484
2485 if (capacity)
2486 format_capacity(si_str, sizeof(si_str), capacity, ".");
2487 else
2488 si_str[0] = '\0';
2489
2490 // Format device id string for warning emails
2491 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2492 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2493 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2494 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2495 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2496 cfg.id_is_unique = (lu_id[0] || serial[0]);
2498 cfg.id_is_unique = false;
2499
2500 // format "model" string
2501 scsi_format_id_string(vendor, &inqBuf[8], 8);
2502 scsi_format_id_string(model, &inqBuf[16], 16);
2503 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2504
2505 // Check for duplicates
2506 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2507 CloseDevice(scsidev, device);
2508 return 1;
2509 }
2510
2511 // check that device is ready for commands. IE stores its stuff on
2512 // the media.
2513 if ((err = scsiTestUnitReady(scsidev))) {
2514 if (SIMPLE_ERR_NOT_READY == err)
2515 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2516 else if (SIMPLE_ERR_NO_MEDIUM == err)
2517 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2518 else if (SIMPLE_ERR_BECOMING_READY == err)
2519 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2520 else
2521 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2522 CloseDevice(scsidev, device);
2523 return 2;
2524 }
2525
2526 // Badly-conforming USB storage devices may fail this check.
2527 // The response to the following IE mode page fetch (current and
2528 // changeable values) is carefully examined. It has been found
2529 // that various USB devices that malform the response will lock up
2530 // if asked for a log page (e.g. temperature) so it is best to
2531 // bail out now.
2532 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2533 state.modese_len = iec.modese_len;
2534 else if (SIMPLE_ERR_BAD_FIELD == err)
2535 ; /* continue since it is reasonable not to support IE mpage */
2536 else { /* any other error (including malformed response) unreasonable */
2537 PrintOut(LOG_INFO,
2538 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2539 device, err);
2540 CloseDevice(scsidev, device);
2541 return 3;
2542 }
2543
2544 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2545 // smart if it is off). This may change to be the same as the ATA side.
2546 if (!scsi_IsExceptionControlEnabled(&iec)) {
2547 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2548 "Try 'smartctl -s on %s' to turn on SMART features\n",
2549 device, device);
2550 CloseDevice(scsidev, device);
2551 return 3;
2552 }
2553
2554 // Flag that certain log pages are supported (information may be
2555 // available from other sources).
2556 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2557 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2558 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2559 {
2560 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2561 switch (tBuf[k]) {
2562 case TEMPERATURE_LPAGE:
2563 state.TempPageSupported = 1;
2564 break;
2565 case IE_LPAGE:
2566 state.SmartPageSupported = 1;
2567 break;
2569 state.ReadECounterPageSupported = 1;
2570 break;
2573 break;
2576 break;
2579 break;
2580 default:
2581 break;
2582 }
2583 }
2584 }
2585
2586 // Check if scsiCheckIE() is going to work
2587 {
2588 uint8_t asc = 0;
2589 uint8_t ascq = 0;
2590 uint8_t currenttemp = 0;
2591 uint8_t triptemp = 0;
2592
2593 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2594 &asc, &ascq, &currenttemp, &triptemp)) {
2595 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2596 state.SuppressReport = 1;
2597 }
2598 if ( (state.SuppressReport || !currenttemp)
2599 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2600 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2601 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2602 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2603 }
2604 }
2605
2606 // capability check: self-test-log
2607 if (cfg.selftest){
2608 int retval = scsiCountFailedSelfTests(scsidev, 0);
2609 if (retval<0) {
2610 // no self-test log, turn off monitoring
2611 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2612 cfg.selftest = false;
2613 state.selflogcount = 0;
2614 state.selfloghour = 0;
2615 }
2616 else {
2617 // register starting values to watch for changes
2618 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2619 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2620 }
2621 }
2622
2623 // disable autosave (set GLTSD bit)
2624 if (cfg.autosave==1){
2625 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2626 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2627 else
2628 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2629 }
2630
2631 // or enable autosave (clear GLTSD bit)
2632 if (cfg.autosave==2){
2633 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2634 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2635 else
2636 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2637 }
2638
2639 // tell user we are registering device
2640 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2641
2642 // Make sure that init_standby_check() ignores SCSI devices
2643 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2644
2645 // close file descriptor
2646 CloseDevice(scsidev, device);
2647
2648 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2649 // Build file name for state file
2650 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2651 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2652 if (!state_path_prefix.empty()) {
2653 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2654 // Read previous state
2655 if (read_dev_state(cfg.state_file.c_str(), state)) {
2656 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2657 // Copy ATA attribute values to temp state
2658 state.update_temp_state();
2659 }
2660 }
2661 if (!attrlog_path_prefix.empty())
2662 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2663 }
2664
2665 finish_device_scan(cfg, state);
2666
2667 return 0;
2668}
2669
2670// Convert 128 bit LE integer to uint64_t or its max value on overflow.
2671static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2672{
2673 for (int i = 8; i < 16; i++) {
2674 if (val[i])
2675 return ~(uint64_t)0;
2676 }
2677 uint64_t lo = val[7];
2678 for (int i = 7-1; i >= 0; i--) {
2679 lo <<= 8; lo += val[i];
2680 }
2681 return lo;
2682}
2683
2684// Get max temperature in Kelvin reported in NVMe SMART/Health log.
2685static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2686{
2687 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2688 for (auto s : smart_log.temp_sensor) {
2689 if (s > k)
2690 k = s; // cppcheck-suppress useStlAlgorithm
2691 }
2692 return k;
2693}
2694
2695// Check the NVMe Error Information log for device related errors.
2696static bool check_nvme_error_log(const dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2697 uint64_t newcnt = 0)
2698{
2699 // Limit transfer size to one page (64 entries) to avoid problems with
2700 // limits of NVMe pass-through layer or too low MDTS values.
2701 unsigned want_entries = 64;
2702 if (want_entries > cfg.nvme_err_log_max_entries)
2703 want_entries = cfg.nvme_err_log_max_entries;
2704 raw_buffer error_log_buf(want_entries * sizeof(nvme_error_log_page));
2705 nvme_error_log_page * error_log =
2706 reinterpret_cast<nvme_error_log_page *>(error_log_buf.data());
2707 unsigned read_entries = nvme_read_error_log(nvmedev, error_log, want_entries, false /*!lpo_sup*/);
2708 if (!read_entries) {
2709 PrintOut(LOG_INFO, "Device: %s, Read %u entries from Error Information Log failed\n",
2710 cfg.name.c_str(), want_entries);
2711 return false;
2712 }
2713
2714 if (!newcnt)
2715 return true; // Support check only
2716
2717 // Scan log, find device related errors
2718 uint64_t oldcnt = state.nvme_err_log_entries, mincnt = newcnt;
2719 int err = 0, ign = 0;
2720 for (unsigned i = 0; i < read_entries; i++) {
2721 const nvme_error_log_page & e = error_log[i];
2722 if (!e.error_count)
2723 continue; // unused
2724 if (e.error_count <= oldcnt)
2725 break; // stop on first old entry
2726 if (e.error_count < mincnt)
2727 mincnt = e.error_count; // min known error
2728 if (e.error_count > newcnt)
2729 newcnt = e.error_count; // adjust maximum
2730 uint16_t status = e.status_field >> 1;
2731 if (!nvme_status_is_error(status) || nvme_status_to_errno(status) == EINVAL) {
2732 ign++; // Not a device related error
2733 continue;
2734 }
2735
2736 // Log the most recent 8 errors
2737 if (++err > 8)
2738 continue;
2739 char buf[64];
2740 PrintOut(LOG_INFO, "Device: %s, NVMe error [%u], count %" PRIu64 ", status 0x%04x: %s\n",
2741 cfg.name.c_str(), i, e.error_count, e.status_field,
2743 }
2744
2745 std::string msg = strprintf("Device: %s, NVMe error count increased from %" PRIu64 " to %" PRIu64
2746 " (%d new, %d ignored, %" PRIu64 " unknown)",
2747 cfg.name.c_str(), oldcnt, newcnt, err, ign,
2748 (mincnt > oldcnt + 1 ? mincnt - oldcnt - 1 : 0));
2749 // LOG_CRIT only if device related errors are found
2750 if (!err) {
2751 PrintOut(LOG_INFO, "%s\n", msg.c_str());
2752 }
2753 else {
2754 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
2755 MailWarning(cfg, state, 4, "%s", msg.c_str());
2756 }
2757
2758 state.nvme_err_log_entries = newcnt;
2759 state.must_write = true;
2760 return true;
2761}
2762
2763static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2764 const dev_config_vector * prev_cfgs)
2765{
2766 const char *name = cfg.name.c_str();
2767
2768 // Device must be open
2769
2770 // Get ID Controller
2771 nvme_id_ctrl id_ctrl;
2772 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2773 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2774 CloseDevice(nvmedev, name);
2775 return 2;
2776 }
2777
2778 // Get drive identity
2779 char model[40+1], serial[20+1], firmware[8+1];
2780 format_char_array(model, id_ctrl.mn);
2781 format_char_array(serial, id_ctrl.sn);
2782 format_char_array(firmware, id_ctrl.fr);
2783
2784 // Format device id string for warning emails
2785 char nsstr[32] = "", capstr[32] = "";
2786 unsigned nsid = nvmedev->get_nsid();
2787 if (nsid != 0xffffffff)
2788 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2789 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2790 if (capacity)
2791 format_capacity(capstr, sizeof(capstr), capacity, ".");
2792 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2793 nsstr, (capstr[0] ? ", " : ""), capstr);
2794 cfg.id_is_unique = true; // TODO: Check serial?
2796 cfg.id_is_unique = false;
2797
2798 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2799
2800 // Check for duplicates
2801 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2802 CloseDevice(nvmedev, name);
2803 return 1;
2804 }
2805
2806 // Read SMART/Health log
2807 nvme_smart_log smart_log;
2808 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2809 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2810 CloseDevice(nvmedev, name);
2811 return 2;
2812 }
2813
2814 // Check temperature sensor support
2815 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2816 if (!nvme_get_max_temp_kelvin(smart_log)) {
2817 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2818 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2819 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2820 }
2821 }
2822
2823 // Init total error count
2824 cfg.nvme_err_log_max_entries = id_ctrl.elpe + 1; // 0's based value
2825 if (cfg.errorlog || cfg.xerrorlog) {
2826 if (!check_nvme_error_log(cfg, state, nvmedev)) {
2827 PrintOut(LOG_INFO, "Device: %s, Error Information unavailable, ignoring -l [x]error\n", name);
2828 cfg.errorlog = cfg.xerrorlog = false;
2829 }
2830 else
2832 }
2833
2834 // If no supported tests selected, return
2835 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2836 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2837 CloseDevice(nvmedev, name);
2838 return 3;
2839 }
2840
2841 // Tell user we are registering device
2842 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2843
2844 // Make sure that init_standby_check() ignores NVMe devices
2845 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2846
2847 CloseDevice(nvmedev, name);
2848
2849 if (!state_path_prefix.empty()) {
2850 // Build file name for state file
2851 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2852 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2853 nsstr[0] = 0;
2854 if (nsid != 0xffffffff)
2855 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2856 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2857 // Read previous state
2858 if (read_dev_state(cfg.state_file.c_str(), state))
2859 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2860 }
2861
2862 finish_device_scan(cfg, state);
2863
2864 return 0;
2865}
2866
2867// Open device for next check, return false on error
2868static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2869 const char * type)
2870{
2871 const char * name = cfg.name.c_str();
2872
2873 // If user has asked, test the email warning system
2874 if (cfg.emailtest)
2875 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2876
2877 // User may have requested (with the -n Directive) to leave the disk
2878 // alone if it is in idle or standby mode. In this case check the
2879 // power mode first before opening the device for full access,
2880 // and exit without check if disk is reported in standby.
2881 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2882 // Note that 'is_powered_down()' handles opening the device itself, and
2883 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2884 if (device->is_powered_down())
2885 {
2886 // skip at most powerskipmax checks
2887 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2888 // report first only except if state has changed, avoid waking up system disk
2889 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2890 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2891 state.lastpowermodeskipped = -1;
2892 }
2893 state.powerskipcnt++;
2894 return false;
2895 }
2896 }
2897 }
2898
2899 // if we can't open device, fail gracefully rather than hard --
2900 // perhaps the next time around we'll be able to open it
2901 if (!device->open()) {
2902 // For removable devices, print error message only once and suppress email
2903 if (!cfg.removable) {
2904 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2905 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2906 }
2907 else if (!state.removed) {
2908 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2909 state.removed = true;
2910 }
2911 else if (debugmode)
2912 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2913 return false;
2914 }
2915
2916 if (debugmode)
2917 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2918
2919 if (!cfg.removable)
2920 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2921 else if (state.removed) {
2922 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2923 state.removed = false;
2924 }
2925
2926 return true;
2927}
2928
2929// If the self-test log has got more self-test errors (or more recent
2930// self-test errors) recorded, then notify user.
2931static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2932{
2933 const char * name = cfg.name.c_str();
2934
2935 if (newi<0)
2936 // command failed
2937 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2938 else {
2939 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2940
2941 // old and new error counts
2942 int oldc=state.selflogcount;
2943 int newc=SELFTEST_ERRORCOUNT(newi);
2944
2945 // old and new error timestamps in hours
2946 int oldh=state.selfloghour;
2947 int newh=SELFTEST_ERRORHOURS(newi);
2948
2949 if (oldc<newc) {
2950 // increase in error count
2951 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2952 name, oldc, newc);
2953 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2954 name, oldc, newc);
2955 state.must_write = true;
2956 }
2957 else if (newc > 0 && oldh != newh) {
2958 // more recent error
2959 // a 'more recent' error might actually be a smaller hour number,
2960 // if the hour number has wrapped.
2961 // There's still a bug here. You might just happen to run a new test
2962 // exactly 32768 hours after the previous failure, and have run exactly
2963 // 20 tests between the two, in which case smartd will miss the
2964 // new failure.
2965 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2966 name, newh);
2967 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2968 name, newh);
2969 state.must_write = true;
2970 }
2971
2972 // Print info if error entries have disappeared
2973 // or newer successful successful extended self-test exits
2974 if (oldc > newc) {
2975 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2976 name, oldc, newc);
2977 if (newc == 0)
2978 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2979 }
2980
2981 // Needed since self-test error count may DECREASE. Hour might
2982 // also have changed.
2983 state.selflogcount= newc;
2984 state.selfloghour = newh;
2985 }
2986 return;
2987}
2988
2989// Test types, ordered by priority.
2990static const char test_type_chars[] = "LncrSCO";
2991static const unsigned num_test_types = sizeof(test_type_chars)-1;
2992
2993// returns test type if time to do test of type testtype,
2994// 0 if not time to do test.
2995static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2996{
2997 // check that self-testing has been requested
2998 if (cfg.test_regex.empty())
2999 return 0;
3000
3001 // Exit if drive not capable of any test
3002 if ( state.not_cap_long && state.not_cap_short &&
3003 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
3004 return 0;
3005
3006 // since we are about to call localtime(), be sure glibc is informed
3007 // of any timezone changes we make.
3008 if (!usetime)
3010
3011 // Is it time for next check?
3012 time_t now = (!usetime ? time(nullptr) : usetime);
3013 if (now < state.scheduled_test_next_check) {
3014 if (state.scheduled_test_next_check <= now + 3600)
3015 return 0; // Next check within one hour
3016 // More than one hour, assume system clock time adjusted to the past
3017 state.scheduled_test_next_check = now;
3018 }
3019 else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
3020 // Limit time check interval to 90 days
3021 state.scheduled_test_next_check = now - (3600L*24*90);
3022 }
3023
3024 // Find ':NNN[-LLL]' in regex for possible offsets and limits
3025 const unsigned max_offsets = 1 + num_test_types;
3026 unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
3027 unsigned num_offsets = 1; // offsets/limits[0] == 0 always
3028 for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
3029 const char * q = strchr(p, ':');
3030 if (!q)
3031 break;
3032 p = q + 1;
3033 unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
3034 sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
3035 if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
3036 continue;
3037 offsets[num_offsets] = offset; limits[num_offsets] = limit;
3038 num_offsets++;
3039 p += (n3 > 0 ? n3 : n1);
3040 }
3041
3042 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
3043 char testtype = 0;
3044 time_t testtime = 0; int testhour = 0;
3045 int maxtest = num_test_types-1;
3046
3047 for (time_t t = state.scheduled_test_next_check; ; ) {
3048 // Check offset 0 and then all offsets for ':NNN' found above
3049 for (unsigned i = 0; i < num_offsets; i++) {
3050 unsigned offset = offsets[i], limit = limits[i];
3051 unsigned delay = cfg.test_offset_factor * offset;
3052 if (0 < limit && limit < delay)
3053 delay %= limit + 1;
3054 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
3055
3056 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
3057 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
3058 for (int j = 0; j <= maxtest; j++) {
3059 // Skip if drive not capable of this test
3060 switch (test_type_chars[j]) {
3061 case 'L': if (state.not_cap_long) continue; break;
3062 case 'S': if (state.not_cap_short) continue; break;
3063 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
3064 case 'O': if (scsi || state.not_cap_offline) continue; break;
3065 case 'c': case 'n':
3066 case 'r': if (scsi || state.not_cap_selective) continue; break;
3067 default: continue;
3068 }
3069 // Try match of "T/MM/DD/d/HH[:NNN]"
3070 char pattern[64];
3071 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
3072 test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
3073 if (i > 0) {
3074 const unsigned len = sizeof("S/01/01/1/01") - 1;
3075 snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
3076 if (limit > 0)
3077 snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
3078 }
3079 if (cfg.test_regex.full_match(pattern)) {
3080 // Test found
3081 testtype = pattern[0];
3082 testtime = t; testhour = tms->tm_hour;
3083 // Limit further matches to higher priority self-tests
3084 maxtest = j-1;
3085 break;
3086 }
3087 }
3088 }
3089
3090 // Exit if no tests left or current time reached
3091 if (maxtest < 0)
3092 break;
3093 if (t >= now)
3094 break;
3095 // Check next hour
3096 if ((t += 3600) > now)
3097 t = now;
3098 }
3099
3100 // Do next check not before next hour.
3101 struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
3102 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
3103
3104 if (testtype) {
3105 state.must_write = true;
3106 // Tell user if an old test was found.
3107 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
3108 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
3109 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
3110 cfg.name.c_str(), testtype, datebuf);
3111 }
3112 }
3113
3114 return testtype;
3115}
3116
3117// Print a list of future tests.
3119{
3120 unsigned numdev = configs.size();
3121 if (!numdev)
3122 return;
3123 std::vector<int> testcnts(numdev * num_test_types, 0);
3124
3125 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
3126
3127 // FixGlibcTimeZoneBug(); // done in PrintOut()
3128 time_t now = time(nullptr);
3129 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3130 dateandtimezoneepoch(datenow, now);
3131
3132 long seconds;
3133 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3134 // Check for each device whether a test will be run
3135 time_t testtime = now + seconds;
3136 for (unsigned i = 0; i < numdev; i++) {
3137 const dev_config & cfg = configs.at(i);
3138 dev_state & state = states.at(i);
3139 const char * p;
3140 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3141 if (testtype && (p = strchr(test_type_chars, testtype))) {
3142 unsigned t = (p - test_type_chars);
3143 // Report at most 5 tests of each type
3144 if (++testcnts[i*num_test_types + t] <= 5) {
3145 dateandtimezoneepoch(date, testtime);
3146 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3147 testcnts[i*num_test_types + t], testtype, date);
3148 }
3149 }
3150 }
3151 }
3152
3153 // Report totals
3154 dateandtimezoneepoch(date, now+seconds);
3155 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3156 for (unsigned i = 0; i < numdev; i++) {
3157 const dev_config & cfg = configs.at(i);
3158 bool scsi = devices.at(i)->is_scsi();
3159 for (unsigned t = 0; t < num_test_types; t++) {
3160 int cnt = testcnts[i*num_test_types + t];
3161 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3162 continue;
3163 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3164 cnt, (cnt==1?"":"s"), test_type_chars[t]);
3165 }
3166 }
3167
3168}
3169
3170// Return zero on success, nonzero on failure. Perform offline (background)
3171// short or long (extended) self test on given scsi device.
3172static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3173{
3174 int retval = 0;
3175 const char *testname = nullptr;
3176 const char *name = cfg.name.c_str();
3177 int inProgress;
3178
3179 if (scsiSelfTestInProgress(device, &inProgress)) {
3180 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3181 state.not_cap_short = state.not_cap_long = true;
3182 return 1;
3183 }
3184
3185 if (1 == inProgress) {
3186 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3187 "progress.\n", name);
3188 return 1;
3189 }
3190
3191 switch (testtype) {
3192 case 'S':
3193 testname = "Short Self";
3194 retval = scsiSmartShortSelfTest(device);
3195 break;
3196 case 'L':
3197 testname = "Long Self";
3198 retval = scsiSmartExtendSelfTest(device);
3199 break;
3200 }
3201 // If we can't do the test, exit
3202 if (!testname) {
3203 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3204 testtype);
3205 return 1;
3206 }
3207 if (retval) {
3208 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3209 (SIMPLE_ERR_BAD_FIELD == retval)) {
3210 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3211 testname);
3212 if ('L'==testtype)
3213 state.not_cap_long = true;
3214 else
3215 state.not_cap_short = true;
3216
3217 return 1;
3218 }
3219 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3220 testname, retval);
3221 return 1;
3222 }
3223
3224 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3225
3226 return 0;
3227}
3228
3229// Do an offline immediate or self-test. Return zero on success,
3230// nonzero on failure.
3231static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3232{
3233 const char *name = cfg.name.c_str();
3234
3235 // Read current smart data and check status/capability
3236 struct ata_smart_values data;
3237 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3238 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3239 return 1;
3240 }
3241
3242 // Check for capability to do the test
3243 int dotest = -1, mode = 0;
3244 const char *testname = nullptr;
3245 switch (testtype) {
3246 case 'O':
3247 testname="Offline Immediate ";
3249 dotest=OFFLINE_FULL_SCAN;
3250 else
3251 state.not_cap_offline = true;
3252 break;
3253 case 'C':
3254 testname="Conveyance Self-";
3256 dotest=CONVEYANCE_SELF_TEST;
3257 else
3258 state.not_cap_conveyance = true;
3259 break;
3260 case 'S':
3261 testname="Short Self-";
3262 if (isSupportSelfTest(&data))
3263 dotest=SHORT_SELF_TEST;
3264 else
3265 state.not_cap_short = true;
3266 break;
3267 case 'L':
3268 testname="Long Self-";
3269 if (isSupportSelfTest(&data))
3270 dotest=EXTEND_SELF_TEST;
3271 else
3272 state.not_cap_long = true;
3273 break;
3274
3275 case 'c': case 'n': case 'r':
3276 testname = "Selective Self-";
3278 dotest = SELECTIVE_SELF_TEST;
3279 switch (testtype) {
3280 case 'c': mode = SEL_CONT; break;
3281 case 'n': mode = SEL_NEXT; break;
3282 case 'r': mode = SEL_REDO; break;
3283 }
3284 }
3285 else
3286 state.not_cap_selective = true;
3287 break;
3288 }
3289
3290 // If we can't do the test, exit
3291 if (dotest<0) {
3292 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3293 return 1;
3294 }
3295
3296 // If currently running a self-test, do not interrupt it to start another.
3297 if (15==(data.self_test_exec_status >> 4)) {
3298 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3299 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3300 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3301 } else {
3302 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3303 name, testname, (int)(data.self_test_exec_status & 0x0f));
3304 return 1;
3305 }
3306 }
3307
3308 if (dotest == SELECTIVE_SELF_TEST) {
3309 // Set test span
3310 ata_selective_selftest_args selargs, prev_args;
3311 selargs.num_spans = 1;
3312 selargs.span[0].mode = mode;
3313 prev_args.num_spans = 1;
3314 prev_args.span[0].start = state.selective_test_last_start;
3315 prev_args.span[0].end = state.selective_test_last_end;
3316 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3317 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3318 return 1;
3319 }
3320 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3321 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3322 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3323 start, end, end - start + 1,
3324 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3325 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3326 state.selective_test_last_start = start;
3327 state.selective_test_last_end = end;
3328 }
3329
3330 // execute the test, and return status
3331 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, nullptr);
3332 if (retval) {
3333 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3334 return retval;
3335 }
3336
3337 // Report recent test start to do_disable_standby_check()
3338 // and force log of next test status
3339 if (testtype == 'O')
3340 state.offline_started = true;
3341 else
3342 state.selftest_started = true;
3343
3344 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3345 return 0;
3346}
3347
3348// Check pending sector count attribute values (-C, -U directives).
3349static void check_pending(const dev_config & cfg, dev_state & state,
3350 unsigned char id, bool increase_only,
3351 const ata_smart_values & smartval,
3352 int mailtype, const char * msg)
3353{
3354 // Find attribute index
3355 int i = ata_find_attr_index(id, smartval);
3356 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3357 return;
3358
3359 // No report if no sectors pending.
3360 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3361 if (rawval == 0) {
3362 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3363 return;
3364 }
3365
3366 // If attribute is not reset, report only sector count increases.
3367 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3368 if (!(!increase_only || prev_rawval < rawval))
3369 return;
3370
3371 // Format message.
3372 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3373 if (prev_rawval > 0 && rawval != prev_rawval)
3374 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3375
3376 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3377 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3378 state.must_write = true;
3379}
3380
3381// Format Temperature value
3382static const char * fmt_temp(unsigned char x, char (& buf)[20])
3383{
3384 if (!x) // unset
3385 return "??";
3386 snprintf(buf, sizeof(buf), "%u", x);
3387 return buf;
3388}
3389
3390// Check Temperature limits
3391static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3392{
3393 if (!(0 < currtemp && currtemp < 255)) {
3394 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3395 return;
3396 }
3397
3398 // Update Max Temperature
3399 const char * minchg = "", * maxchg = "";
3400 if (currtemp > state.tempmax) {
3401 if (state.tempmax)
3402 maxchg = "!";
3403 state.tempmax = currtemp;
3404 state.must_write = true;
3405 }
3406
3407 char buf[20];
3408 if (!state.temperature) {
3409 // First check
3410 if (!state.tempmin || currtemp < state.tempmin)
3411 // Delay Min Temperature update by ~ 30 minutes.
3412 state.tempmin_delay = time(nullptr) + default_checktime - 60;
3413 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3414 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3415 if (triptemp)
3416 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3417 state.temperature = currtemp;
3418 }
3419 else {
3420 if (state.tempmin_delay) {
3421 // End Min Temperature update delay if ...
3422 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3423 || (state.tempmin_delay <= time(nullptr))) { // or delay time is over.
3424 state.tempmin_delay = 0;
3425 if (!state.tempmin)
3426 state.tempmin = 255;
3427 }
3428 }
3429
3430 // Update Min Temperature
3431 if (!state.tempmin_delay && currtemp < state.tempmin) {
3432 state.tempmin = currtemp;
3433 state.must_write = true;
3434 if (currtemp != state.temperature)
3435 minchg = "!";
3436 }
3437
3438 // Track changes
3439 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3440 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3441 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3442 state.temperature = currtemp;
3443 }
3444 }
3445
3446 // Check limits
3447 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3448 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3449 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3450 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3451 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3452 }
3453 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3454 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3455 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3456 }
3457 else if (cfg.tempcrit) {
3458 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3459 if (currtemp < limit)
3460 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3461 }
3462}
3463
3464// Check normalized and raw attribute values.
3465static void check_attribute(const dev_config & cfg, dev_state & state,
3466 const ata_smart_attribute & attr,
3467 const ata_smart_attribute & prev,
3468 int attridx,
3469 const ata_smart_threshold_entry * thresholds)
3470{
3471 // Check attribute and threshold
3472 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3473 if (attrstate == ATTRSTATE_NON_EXISTING)
3474 return;
3475
3476 // If requested, check for usage attributes that have failed.
3477 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3479 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3480 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3481 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3482 state.must_write = true;
3483 }
3484
3485 // Return if we're not tracking this type of attribute
3486 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3487 if (!( ( prefail && cfg.prefail)
3488 || (!prefail && cfg.usage )))
3489 return;
3490
3491 // Return if '-I ID' was specified
3493 return;
3494
3495 // Issue warning if they don't have the same ID in all structures.
3496 if (attr.id != prev.id) {
3497 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3498 cfg.name.c_str(), attr.id, prev.id);
3499 return;
3500 }
3501
3502 // Compare normalized values if valid.
3503 bool valchanged = false;
3504 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3505 if (attr.current != prev.current)
3506 valchanged = true;
3507 }
3508
3509 // Compare raw values if requested.
3510 bool rawchanged = false;
3511 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3514 rawchanged = true;
3515 }
3516
3517 // Return if no change
3518 if (!(valchanged || rawchanged))
3519 return;
3520
3521 // Format value strings
3522 std::string currstr, prevstr;
3523 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3524 // Print raw values only
3525 currstr = strprintf("%s (Raw)",
3526 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3527 prevstr = strprintf("%s (Raw)",
3528 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3529 }
3530 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3531 // Print normalized and raw values
3532 currstr = strprintf("%d [Raw %s]", attr.current,
3533 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3534 prevstr = strprintf("%d [Raw %s]", prev.current,
3535 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3536 }
3537 else {
3538 // Print normalized values only
3539 currstr = strprintf("%d", attr.current);
3540 prevstr = strprintf("%d", prev.current);
3541 }
3542
3543 // Format message
3544 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3545 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3546 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3547 prevstr.c_str(), currstr.c_str());
3548
3549 // Report this change as critical ?
3550 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3551 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3552 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3553 MailWarning(cfg, state, 2, "%s", msg.c_str());
3554 }
3555 else {
3556 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3557 }
3558 state.must_write = true;
3559}
3560
3561
3562static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3563 bool firstpass, bool allow_selftests)
3564{
3565 if (!open_device(cfg, state, atadev, "ATA"))
3566 return 1;
3567
3568 const char * name = cfg.name.c_str();
3569
3570 // user may have requested (with the -n Directive) to leave the disk
3571 // alone if it is in idle or sleeping mode. In this case check the
3572 // power mode and exit without check if needed
3573 if (cfg.powermode && !state.powermodefail) {
3574 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3575 const char * mode = 0;
3576 if (0 <= powermode && powermode < 0xff) {
3577 // wait for possible spin up and check again
3578 int powermode2;
3579 sleep(5);
3580 powermode2 = ataCheckPowerMode(atadev);
3581 if (powermode2 > powermode)
3582 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3583 powermode = powermode2;
3584 }
3585
3586 switch (powermode){
3587 case -1:
3588 // SLEEP
3589 mode="SLEEP";
3590 if (cfg.powermode>=1)
3591 dontcheck=1;
3592 break;
3593 case 0x00:
3594 // STANDBY
3595 mode="STANDBY";
3596 if (cfg.powermode>=2)
3597 dontcheck=1;
3598 break;
3599 case 0x01:
3600 // STANDBY_Y
3601 mode="STANDBY_Y";
3602 if (cfg.powermode>=2)
3603 dontcheck=1;
3604 break;
3605 case 0x80:
3606 // IDLE
3607 mode="IDLE";
3608 if (cfg.powermode>=3)
3609 dontcheck=1;
3610 break;
3611 case 0x81:
3612 // IDLE_A
3613 mode="IDLE_A";
3614 if (cfg.powermode>=3)
3615 dontcheck=1;
3616 break;
3617 case 0x82:
3618 // IDLE_B
3619 mode="IDLE_B";
3620 if (cfg.powermode>=3)
3621 dontcheck=1;
3622 break;
3623 case 0x83:
3624 // IDLE_C
3625 mode="IDLE_C";
3626 if (cfg.powermode>=3)
3627 dontcheck=1;
3628 break;
3629 case 0xff:
3630 // ACTIVE/IDLE
3631 case 0x40:
3632 // ACTIVE
3633 case 0x41:
3634 // ACTIVE
3635 mode="ACTIVE or IDLE";
3636 break;
3637 default:
3638 // UNKNOWN
3639 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3640 name, powermode);
3641 state.powermodefail = true;
3642 break;
3643 }
3644
3645 // if we are going to skip a check, return now
3646 if (dontcheck){
3647 // skip at most powerskipmax checks
3648 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3649 CloseDevice(atadev, name);
3650 // report first only except if state has changed, avoid waking up system disk
3651 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3652 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3653 state.lastpowermodeskipped = powermode;
3654 }
3655 state.powerskipcnt++;
3656 return 0;
3657 }
3658 else {
3659 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3660 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3661 }
3662 state.powerskipcnt = 0;
3663 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3664 }
3665 else if (state.powerskipcnt) {
3666 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3667 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3668 state.powerskipcnt = 0;
3669 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3670 }
3671 }
3672
3673 // check smart status
3674 if (cfg.smartcheck) {
3675 int status=ataSmartStatus2(atadev);
3676 if (status==-1){
3677 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3678 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3679 state.must_write = true;
3680 }
3681 else if (status==1){
3682 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3683 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3684 state.must_write = true;
3685 }
3686 }
3687
3688 // Check everything that depends upon SMART Data (eg, Attribute values)
3689 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3690 || cfg.curr_pending_id || cfg.offl_pending_id
3691 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3692 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3693
3694 // Read current attribute values.
3695 ata_smart_values curval;
3696 if (ataReadSmartValues(atadev, &curval)){
3697 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3698 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3699 state.must_write = true;
3700 }
3701 else {
3702 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3703
3704 // look for current or offline pending sectors
3705 if (cfg.curr_pending_id)
3706 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3707 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3708 : "Total unreadable (pending) sectors" ));
3709
3710 if (cfg.offl_pending_id)
3711 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3712 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3713 : "Total offline uncorrectable sectors"));
3714
3715 // check temperature limits
3716 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3717 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3718
3719 // look for failed usage attributes, or track usage or prefail attributes
3720 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3721 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3722 check_attribute(cfg, state,
3723 curval.vendor_attributes[i],
3724 state.smartval.vendor_attributes[i],
3725 i, state.smartthres.thres_entries);
3726 }
3727 }
3728
3729 // Log changes of offline data collection status
3730 if (cfg.offlinests) {
3733 || state.offline_started // test was started in previous call
3734 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3736 }
3737
3738 // Log changes of self-test execution status
3739 if (cfg.selfteststs) {
3741 || state.selftest_started // test was started in previous call
3742 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3744 }
3745
3746 // Save the new values for the next time around
3747 state.smartval = curval;
3749 state.attrlog_dirty = true;
3750 }
3751 }
3752 state.offline_started = state.selftest_started = false;
3753
3754 // check if number of selftest errors has increased (note: may also DECREASE)
3755 if (cfg.selftest)
3756 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3757
3758 // check if number of ATA errors has increased
3759 if (cfg.errorlog || cfg.xerrorlog) {
3760
3761 int errcnt1 = -1, errcnt2 = -1;
3762 if (cfg.errorlog)
3763 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3764 if (cfg.xerrorlog)
3765 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3766
3767 // new number of errors is max of both logs
3768 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3769
3770 // did command fail?
3771 if (newc<0)
3772 // lack of PrintOut here is INTENTIONAL
3773 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3774
3775 // has error count increased?
3776 int oldc = state.ataerrorcount;
3777 if (newc>oldc){
3778 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3779 name, oldc, newc);
3780 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3781 name, oldc, newc);
3782 state.must_write = true;
3783 }
3784
3785 if (newc>=0)
3786 state.ataerrorcount=newc;
3787 }
3788
3789 // if the user has asked, and device is capable (or we're not yet
3790 // sure) check whether a self test should be done now.
3791 if (allow_selftests && !cfg.test_regex.empty()) {
3792 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3793 if (testtype)
3794 DoATASelfTest(cfg, state, atadev, testtype);
3795 }
3796
3797 // Don't leave device open -- the OS/user may want to access it
3798 // before the next smartd cycle!
3799 CloseDevice(atadev, name);
3800 return 0;
3801}
3802
3803static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3804{
3805 if (!open_device(cfg, state, scsidev, "SCSI"))
3806 return 1;
3807
3808 const char * name = cfg.name.c_str();
3809
3810 uint8_t asc = 0, ascq = 0;
3811 uint8_t currenttemp = 0, triptemp = 0;
3812 if (!state.SuppressReport) {
3813 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3814 &asc, &ascq, &currenttemp, &triptemp)) {
3815 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3816 name);
3817 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3818 state.SuppressReport = 1;
3819 }
3820 }
3821 if (asc > 0) {
3822 char b[128];
3823 const char * cp = scsiGetIEString(asc, ascq, b, sizeof(b));
3824
3825 if (cp) {
3826 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3827 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3828 } else if (asc == 4 && ascq == 9) {
3829 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3830 } else if (debugmode)
3831 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3832 name, (int)asc, (int)ascq);
3833 } else if (debugmode)
3834 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3835
3836 // check temperature limits
3837 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3838 CheckTemperature(cfg, state, currenttemp, triptemp);
3839
3840 // check if number of selftest errors has increased (note: may also DECREASE)
3841 if (cfg.selftest)
3842 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3843
3844 if (allow_selftests && !cfg.test_regex.empty()) {
3845 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3846 if (testtype)
3847 DoSCSISelfTest(cfg, state, scsidev, testtype);
3848 }
3849 if (!cfg.attrlog_file.empty()){
3850 // saving error counters to state
3851 uint8_t tBuf[252];
3852 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3853 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3856 state.scsi_error_counters[0].found=1;
3857 }
3858 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3859 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3862 state.scsi_error_counters[1].found=1;
3863 }
3864 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3865 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3868 state.scsi_error_counters[2].found=1;
3869 }
3870 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3871 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3875 }
3876 // store temperature if not done by CheckTemperature() above
3877 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3878 state.temperature = currenttemp;
3879 }
3880 CloseDevice(scsidev, name);
3881 state.attrlog_dirty = true;
3882 return 0;
3883}
3884
3885static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3886{
3887 if (!open_device(cfg, state, nvmedev, "NVMe"))
3888 return 1;
3889
3890 const char * name = cfg.name.c_str();
3891
3892 // Read SMART/Health log
3893 nvme_smart_log smart_log;
3894 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3895 CloseDevice(nvmedev, name);
3896 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3897 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3898 state.must_write = true;
3899 return 0;
3900 }
3901
3902 // Check Critical Warning bits
3903 if (cfg.smartcheck && smart_log.critical_warning) {
3904 unsigned char w = smart_log.critical_warning;
3905 std::string msg;
3906 static const char * const wnames[] =
3907 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3908
3909 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3910 if (!(w & (1 << b)))
3911 continue;
3912 if (cnt)
3913 msg += ", ";
3914 if (++cnt > 3) {
3915 msg += "..."; break;
3916 }
3917 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3918 msg += "*Unknown*"; break;
3919 }
3920 msg += wnames[b];
3921 }
3922
3923 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3924 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3925 state.must_write = true;
3926 }
3927
3928 // Check temperature limits
3929 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3930 int k = nvme_get_max_temp_kelvin(smart_log);
3931 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3932 int c = k - 273;
3933 if (c < 1)
3934 c = 1;
3935 else if (c > 0xff)
3936 c = 0xff;
3937 CheckTemperature(cfg, state, c, 0);
3938 }
3939
3940 // Check if number of errors has increased
3941 if (cfg.errorlog || cfg.xerrorlog) {
3942 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3943 if (newcnt > state.nvme_err_log_entries) {
3944 // Warn only if device related errors are found
3945 check_nvme_error_log(cfg, state, nvmedev, newcnt);
3946 }
3947 // else // TODO: Handle decrease of count?
3948 }
3949
3950 CloseDevice(nvmedev, name);
3951 state.attrlog_dirty = true;
3952 return 0;
3953}
3954
3955// 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3957
3959{
3960 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3961 bool sts1 = false, sts2 = false;
3962 for (const auto & cfg : configs) {
3963 if (cfg.offlinests_ns)
3964 sts1 = true;
3965 if (cfg.selfteststs_ns)
3966 sts2 = true;
3967 }
3968
3969 // Check for support of disable auto standby
3970 // Reenable standby if smartd.conf was reread
3971 if (sts1 || sts2 || standby_disable_state == 3) {
3972 if (!smi()->disable_system_auto_standby(false)) {
3973 if (standby_disable_state == 3)
3974 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3975 if (sts1 || sts2) {
3976 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3977 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3978 sts1 = sts2 = false;
3979 }
3980 }
3981 }
3982
3983 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3984}
3985
3986static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3987{
3989 return;
3990
3991 // Check for just started or still running self-tests
3992 bool running = false;
3993 for (unsigned i = 0; i < configs.size() && !running; i++) {
3994 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3995
3996 if ( ( cfg.offlinests_ns
3997 && (state.offline_started ||
3999 || ( cfg.selfteststs_ns
4000 && (state.selftest_started ||
4002 running = true;
4003 // state.offline/selftest_started will be reset after next logging of test status
4004 }
4005
4006 // Disable/enable auto standby and log state changes
4007 if (!running) {
4008 if (standby_disable_state != 1) {
4009 if (!smi()->disable_system_auto_standby(false))
4010 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
4011 smi()->get_errmsg());
4012 else
4013 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
4015 }
4016 }
4017 else if (!smi()->disable_system_auto_standby(true)) {
4018 if (standby_disable_state != 2) {
4019 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
4020 smi()->get_errmsg());
4022 }
4023 }
4024 else {
4025 if (standby_disable_state != 3) {
4026 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
4028 }
4029 }
4030}
4031
4032// Checks the SMART status of all ATA and SCSI devices
4033static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
4034 smart_device_list & devices, bool firstpass, bool allow_selftests)
4035{
4036 for (unsigned i = 0; i < configs.size(); i++) {
4037 const dev_config & cfg = configs.at(i);
4038 dev_state & state = states.at(i);
4039 if (state.skip) {
4040 if (debugmode)
4041 PrintOut(LOG_INFO, "Device: %s, skipped (interval=%d)\n", cfg.name.c_str(),
4042 (cfg.checktime ? cfg.checktime : checktime));
4043 continue;
4044 }
4045
4046 smart_device * dev = devices.at(i);
4047 if (dev->is_ata())
4048 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
4049 else if (dev->is_scsi())
4050 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
4051 else if (dev->is_nvme())
4052 NVMeCheckDevice(cfg, state, dev->to_nvme());
4053
4054 // Prevent systemd unit startup timeout when checking many devices on startup
4056 }
4057
4058 do_disable_standby_check(configs, states);
4059}
4060
4061// Install all signal handlers
4063{
4064 // normal and abnormal exit
4067
4068 // in debug mode, <CONTROL-C> ==> HUP
4070
4071 // Catch HUP and USR1
4074#ifdef _WIN32
4075 set_signal_if_not_ignored(SIGUSR2, USR2handler);
4076#endif
4077}
4078
4079#ifdef _WIN32
4080// Toggle debug mode implemented for native windows only
4081// (there is no easy way to reopen tty on *nix)
4082static void ToggleDebugMode()
4083{
4084 if (!debugmode) {
4085 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
4086 if (!daemon_enable_console("smartd [Debug]")) {
4087 debugmode = 1;
4088 daemon_signal(SIGINT, HUPhandler);
4089 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
4090 }
4091 else
4092 PrintOut(LOG_INFO,"enable console failed\n");
4093 }
4094 else if (debugmode == 1) {
4095 daemon_disable_console();
4096 debugmode = 0;
4097 daemon_signal(SIGINT, sighandler);
4098 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
4099 }
4100 else
4101 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
4102}
4103#endif
4104
4105time_t calc_next_wakeuptime(time_t wakeuptime, time_t timenow, int ct)
4106{
4107 if (timenow < wakeuptime)
4108 return wakeuptime;
4109 return timenow + ct - (timenow - wakeuptime) % ct;
4110}
4111
4112static time_t dosleep(time_t wakeuptime, const dev_config_vector & configs,
4113 dev_state_vector & states, bool & sigwakeup)
4114{
4115 // If past wake-up-time, compute next wake-up-time
4116 time_t timenow = time(nullptr);
4117 unsigned n = configs.size();
4118 int ct;
4119 if (!checktime_min) {
4120 // Same for all devices
4121 wakeuptime = calc_next_wakeuptime(wakeuptime, timenow, checktime);
4122 ct = checktime;
4123 }
4124 else {
4125 // Determine wakeuptime of next device(s)
4126 wakeuptime = 0;
4127 for (unsigned i = 0; i < n; i++) {
4128 const dev_config & cfg = configs.at(i);
4129 dev_state & state = states.at(i);
4130 if (!state.skip)
4131 state.wakeuptime = calc_next_wakeuptime((state.wakeuptime ? state.wakeuptime : timenow),
4132 timenow, (cfg.checktime ? cfg.checktime : checktime));
4133 if (!wakeuptime || state.wakeuptime < wakeuptime)
4134 wakeuptime = state.wakeuptime;
4135 }
4136 ct = checktime_min;
4137 }
4138
4139 notify_wait(wakeuptime, n);
4140
4141 // Sleep until we catch a signal or have completed sleeping
4142 bool no_skip = false;
4143 int addtime = 0;
4144 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
4145 // Restart if system clock has been adjusted to the past
4146 if (wakeuptime > timenow + ct) {
4147 PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
4148 wakeuptime = timenow + ct;
4149 for (auto & state : states)
4150 state.wakeuptime = 0;
4151 no_skip = true;
4152 }
4153
4154 // Exit sleep when time interval has expired or a signal is received
4155 sleep(wakeuptime+addtime-timenow);
4156
4157#ifdef _WIN32
4158 // toggle debug mode?
4159 if (caughtsigUSR2) {
4160 ToggleDebugMode();
4161 caughtsigUSR2 = 0;
4162 }
4163#endif
4164
4165 timenow = time(nullptr);
4166
4167 // Actual sleep time too long?
4168 if (!addtime && timenow > wakeuptime+60) {
4169 if (debugmode)
4170 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4171 (int)(timenow-wakeuptime));
4172 // Wait another 20 seconds to avoid I/O errors during disk spin-up
4173 addtime = timenow-wakeuptime+20;
4174 // Use next wake-up-time if close
4175 int nextcheck = ct - addtime % ct;
4176 if (nextcheck <= 20)
4177 addtime += nextcheck;
4178 }
4179 }
4180
4181 // if we caught a SIGUSR1 then print message and clear signal
4182 if (caughtsigUSR1){
4183 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4184 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4185 caughtsigUSR1=0;
4186 sigwakeup = no_skip = true;
4187 }
4188
4189 // Check which devices must be skipped in this cycle
4190 if (checktime_min) {
4191 for (auto & state : states)
4192 state.skip = (!no_skip && timenow < state.wakeuptime);
4193 }
4194
4195 // return adjusted wakeuptime
4196 return wakeuptime;
4197}
4198
4199// Print out a list of valid arguments for the Directive d
4200static void printoutvaliddirectiveargs(int priority, char d)
4201{
4202 switch (d) {
4203 case 'n':
4204 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4205 break;
4206 case 's':
4207 PrintOut(priority, "valid_regular_expression");
4208 break;
4209 case 'd':
4210 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4211 break;
4212 case 'T':
4213 PrintOut(priority, "normal, permissive");
4214 break;
4215 case 'o':
4216 case 'S':
4217 PrintOut(priority, "on, off");
4218 break;
4219 case 'l':
4220 PrintOut(priority, "error, selftest");
4221 break;
4222 case 'M':
4223 PrintOut(priority, "\"once\", \"always\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4224 break;
4225 case 'v':
4226 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4227 break;
4228 case 'P':
4229 PrintOut(priority, "use, ignore, show, showall");
4230 break;
4231 case 'F':
4232 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4233 break;
4234 case 'e':
4235 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4236 "security-freeze, standby,[N|off], wcache,[on|off]");
4237 break;
4238 case 'c':
4239 PrintOut(priority, "i=N, interval=N");
4240 break;
4241 }
4242}
4243
4244// exits with an error message, or returns integer value of token
4245static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4246 int min, int max, char * suffix = 0)
4247{
4248 // make sure argument is there
4249 if (!arg) {
4250 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4251 cfgfile, lineno, name, token, min, max);
4252 return -1;
4253 }
4254
4255 // get argument value (base 10), check that it's integer, and in-range
4256 char *endptr;
4257 int val = strtol(arg,&endptr,10);
4258
4259 // optional suffix present?
4260 if (suffix) {
4261 if (!strcmp(endptr, suffix))
4262 endptr += strlen(suffix);
4263 else
4264 *suffix = 0;
4265 }
4266
4267 if (!(!*endptr && min <= val && val <= max)) {
4268 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4269 cfgfile, lineno, name, token, arg, min, max);
4270 return -1;
4271 }
4272
4273 // all is well; return value
4274 return val;
4275}
4276
4277
4278// Get 1-3 small integer(s) for '-W' directive
4279static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4280 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4281{
4282 unsigned v1 = 0, v2 = 0, v3 = 0;
4283 int n1 = -1, n2 = -1, n3 = -1, len;
4284 if (!arg) {
4285 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4286 cfgfile, lineno, name, token);
4287 return -1;
4288 }
4289
4290 len = strlen(arg);
4291 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4292 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4293 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4294 cfgfile, lineno, name, token, arg);
4295 return -1;
4296 }
4297 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4298 return 0;
4299}
4300
4301
4302#ifdef _WIN32
4303
4304// Concatenate strtok() results if quoted with "..."
4305static const char * strtok_dequote(const char * delimiters)
4306{
4307 const char * t = strtok(nullptr, delimiters);
4308 if (!t || t[0] != '"')
4309 return t;
4310
4311 static std::string token;
4312 token = t+1;
4313 for (;;) {
4314 t = strtok(nullptr, delimiters);
4315 if (!t || !*t)
4316 return "\"";
4317 token += ' ';
4318 int len = strlen(t);
4319 if (t[len-1] == '"') {
4320 token += std::string(t, len-1);
4321 break;
4322 }
4323 token += t;
4324 }
4325 return token.c_str();
4326}
4327
4328#endif // _WIN32
4329
4330
4331// This function returns 1 if it has correctly parsed one token (and
4332// any arguments), else zero if no tokens remain. It returns -1 if an
4333// error was encountered.
4334static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4335{
4336 char sym;
4337 const char * name = cfg.name.c_str();
4338 int lineno=cfg.lineno;
4339 const char *delim = " \n\t";
4340 int badarg = 0;
4341 int missingarg = 0;
4342 const char *arg = 0;
4343
4344 // is the rest of the line a comment
4345 if (*token=='#')
4346 return 1;
4347
4348 // is the token not recognized?
4349 if (*token!='-' || strlen(token)!=2) {
4350 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4351 configfile, lineno, name, token);
4352 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4353 return -1;
4354 }
4355
4356 // token we will be parsing:
4357 sym=token[1];
4358
4359 // parse the token and swallow its argument
4360 int val;
4361 char plus[] = "+", excl[] = "!";
4362
4363 switch (sym) {
4364 case 'C':
4365 // monitor current pending sector count (default 197)
4366 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4367 return -1;
4368 cfg.curr_pending_id = (unsigned char)val;
4369 cfg.curr_pending_incr = (*plus == '+');
4370 cfg.curr_pending_set = true;
4371 break;
4372 case 'U':
4373 // monitor offline uncorrectable sectors (default 198)
4374 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4375 return -1;
4376 cfg.offl_pending_id = (unsigned char)val;
4377 cfg.offl_pending_incr = (*plus == '+');
4378 cfg.offl_pending_set = true;
4379 break;
4380 case 'T':
4381 // Set tolerance level for SMART command failures
4382 if (!(arg = strtok(nullptr, delim))) {
4383 missingarg = 1;
4384 } else if (!strcmp(arg, "normal")) {
4385 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4386 // not on failure of an optional S.M.A.R.T. command.
4387 // This is the default so we don't need to actually do anything here.
4388 cfg.permissive = false;
4389 } else if (!strcmp(arg, "permissive")) {
4390 // Permissive mode; ignore errors from Mandatory SMART commands
4391 cfg.permissive = true;
4392 } else {
4393 badarg = 1;
4394 }
4395 break;
4396 case 'd':
4397 // specify the device type
4398 if (!(arg = strtok(nullptr, delim))) {
4399 missingarg = 1;
4400 } else if (!strcmp(arg, "ignore")) {
4401 cfg.ignore = true;
4402 } else if (!strcmp(arg, "removable")) {
4403 cfg.removable = true;
4404 } else if (!strcmp(arg, "auto")) {
4405 cfg.dev_type = "";
4406 scan_types.clear();
4407 } else {
4408 cfg.dev_type = arg;
4409 scan_types.push_back(arg);
4410 }
4411 break;
4412 case 'F':
4413 // fix firmware bug
4414 if (!(arg = strtok(nullptr, delim)))
4415 missingarg = 1;
4416 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4417 badarg = 1;
4418 break;
4419 case 'H':
4420 // check SMART status
4421 cfg.smartcheck = true;
4422 break;
4423 case 'f':
4424 // check for failure of usage attributes
4425 cfg.usagefailed = true;
4426 break;
4427 case 't':
4428 // track changes in all vendor attributes
4429 cfg.prefail = true;
4430 cfg.usage = true;
4431 break;
4432 case 'p':
4433 // track changes in prefail vendor attributes
4434 cfg.prefail = true;
4435 break;
4436 case 'u':
4437 // track changes in usage vendor attributes
4438 cfg.usage = true;
4439 break;
4440 case 'l':
4441 // track changes in SMART logs
4442 if (!(arg = strtok(nullptr, delim))) {
4443 missingarg = 1;
4444 } else if (!strcmp(arg, "selftest")) {
4445 // track changes in self-test log
4446 cfg.selftest = true;
4447 } else if (!strcmp(arg, "error")) {
4448 // track changes in ATA error log
4449 cfg.errorlog = true;
4450 } else if (!strcmp(arg, "xerror")) {
4451 // track changes in Extended Comprehensive SMART error log
4452 cfg.xerrorlog = true;
4453 } else if (!strcmp(arg, "offlinests")) {
4454 // track changes in offline data collection status
4455 cfg.offlinests = true;
4456 } else if (!strcmp(arg, "offlinests,ns")) {
4457 // track changes in offline data collection status, disable auto standby
4458 cfg.offlinests = cfg.offlinests_ns = true;
4459 } else if (!strcmp(arg, "selfteststs")) {
4460 // track changes in self-test execution status
4461 cfg.selfteststs = true;
4462 } else if (!strcmp(arg, "selfteststs,ns")) {
4463 // track changes in self-test execution status, disable auto standby
4464 cfg.selfteststs = cfg.selfteststs_ns = true;
4465 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4466 // set SCT Error Recovery Control
4467 unsigned rt = ~0, wt = ~0; int nc = -1;
4468 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4469 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4470 cfg.sct_erc_set = true;
4471 cfg.sct_erc_readtime = rt;
4472 cfg.sct_erc_writetime = wt;
4473 }
4474 else
4475 badarg = 1;
4476 } else {
4477 badarg = 1;
4478 }
4479 break;
4480 case 'a':
4481 // monitor everything
4482 cfg.smartcheck = true;
4483 cfg.prefail = true;
4484 cfg.usagefailed = true;
4485 cfg.usage = true;
4486 cfg.selftest = true;
4487 cfg.errorlog = true;
4488 cfg.selfteststs = true;
4489 break;
4490 case 'o':
4491 // automatic offline testing enable/disable
4492 if (!(arg = strtok(nullptr, delim))) {
4493 missingarg = 1;
4494 } else if (!strcmp(arg, "on")) {
4495 cfg.autoofflinetest = 2;
4496 } else if (!strcmp(arg, "off")) {
4497 cfg.autoofflinetest = 1;
4498 } else {
4499 badarg = 1;
4500 }
4501 break;
4502 case 'n':
4503 // skip disk check if in idle or standby mode
4504 if (!(arg = strtok(nullptr, delim)))
4505 missingarg = 1;
4506 else {
4507 char *endptr = nullptr;
4508 char *next = strchr(const_cast<char*>(arg), ',');
4509
4510 cfg.powerquiet = false;
4511 cfg.powerskipmax = 0;
4512
4513 if (next)
4514 *next = '\0';
4515 if (!strcmp(arg, "never"))
4516 cfg.powermode = 0;
4517 else if (!strcmp(arg, "sleep"))
4518 cfg.powermode = 1;
4519 else if (!strcmp(arg, "standby"))
4520 cfg.powermode = 2;
4521 else if (!strcmp(arg, "idle"))
4522 cfg.powermode = 3;
4523 else
4524 badarg = 1;
4525
4526 // if optional arguments are present
4527 if (!badarg && next) {
4528 next++;
4529 cfg.powerskipmax = strtol(next, &endptr, 10);
4530 if (endptr == next)
4531 cfg.powerskipmax = 0;
4532 else {
4533 next = endptr + (*endptr != '\0');
4534 if (cfg.powerskipmax <= 0)
4535 badarg = 1;
4536 }
4537 if (*next != '\0') {
4538 if (!strcmp("q", next))
4539 cfg.powerquiet = true;
4540 else {
4541 badarg = 1;
4542 }
4543 }
4544 }
4545 }
4546 break;
4547 case 'S':
4548 // automatic attribute autosave enable/disable
4549 if (!(arg = strtok(nullptr, delim))) {
4550 missingarg = 1;
4551 } else if (!strcmp(arg, "on")) {
4552 cfg.autosave = 2;
4553 } else if (!strcmp(arg, "off")) {
4554 cfg.autosave = 1;
4555 } else {
4556 badarg = 1;
4557 }
4558 break;
4559 case 's':
4560 // warn user, and delete any previously given -s REGEXP Directives
4561 if (!cfg.test_regex.empty()){
4562 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4563 configfile, lineno, name, cfg.test_regex.get_pattern());
4565 }
4566 // check for missing argument
4567 if (!(arg = strtok(nullptr, delim))) {
4568 missingarg = 1;
4569 }
4570 // Compile regex
4571 else {
4572 if (!cfg.test_regex.compile(arg)) {
4573 // not a valid regular expression!
4574 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4575 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4576 return -1;
4577 }
4578 // Do a bit of sanity checking and warn user if we think that
4579 // their regexp is "strange". User probably confused about shell
4580 // glob(3) syntax versus regular expression syntax regexp(7).
4581 // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4582 static const regular_expression syntax_check(
4583 "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4584 ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4585 ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4586 );
4588 if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4589 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4590 "extended regular expression \"%s\"\n",
4591 configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4592 }
4593 break;
4594 case 'm':
4595 // send email to address that follows
4596 if (!(arg = strtok(nullptr, delim)))
4597 missingarg = 1;
4598 else {
4599 if (!cfg.emailaddress.empty())
4600 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4601 configfile, lineno, name, cfg.emailaddress.c_str());
4602 cfg.emailaddress = arg;
4603 }
4604 break;
4605 case 'M':
4606 // email warning options
4607 if (!(arg = strtok(nullptr, delim)))
4608 missingarg = 1;
4609 else if (!strcmp(arg, "once"))
4611 else if (!strcmp(arg, "always"))
4613 else if (!strcmp(arg, "daily"))
4615 else if (!strcmp(arg, "diminishing"))
4617 else if (!strcmp(arg, "test"))
4618 cfg.emailtest = true;
4619 else if (!strcmp(arg, "exec")) {
4620 // Get the next argument (the command line)
4621#ifdef _WIN32
4622 // Allow "/path name/with spaces/..." on Windows
4623 arg = strtok_dequote(delim);
4624 if (arg && arg[0] == '"') {
4625 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4626 configfile, lineno, name, token);
4627 return -1;
4628 }
4629#else
4630 arg = strtok(nullptr, delim);
4631#endif
4632 if (!arg) {
4633 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4634 configfile, lineno, name, token);
4635 return -1;
4636 }
4637 // Free the last cmd line given if any, and copy new one
4638 if (!cfg.emailcmdline.empty())
4639 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4640 configfile, lineno, name, cfg.emailcmdline.c_str());
4641 cfg.emailcmdline = arg;
4642 }
4643 else
4644 badarg = 1;
4645 break;
4646 case 'i':
4647 // ignore failure of usage attribute
4648 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4649 return -1;
4651 break;
4652 case 'I':
4653 // ignore attribute for tracking purposes
4654 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4655 return -1;
4657 break;
4658 case 'r':
4659 // print raw value when tracking
4660 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4661 return -1;
4663 if (*excl == '!') // attribute change is critical
4665 break;
4666 case 'R':
4667 // track changes in raw value (forces printing of raw value)
4668 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4669 return -1;
4671 if (*excl == '!') // raw value change is critical
4673 break;
4674 case 'W':
4675 // track Temperature
4676 if (Get3Integers((arg = strtok(nullptr, delim)), name, token, lineno, configfile,
4677 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4678 return -1;
4679 break;
4680 case 'v':
4681 // non-default vendor-specific attribute meaning
4682 if (!(arg = strtok(nullptr, delim))) {
4683 missingarg = 1;
4684 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4685 badarg = 1;
4686 }
4687 break;
4688 case 'P':
4689 // Define use of drive-specific presets.
4690 if (!(arg = strtok(nullptr, delim))) {
4691 missingarg = 1;
4692 } else if (!strcmp(arg, "use")) {
4693 cfg.ignorepresets = false;
4694 } else if (!strcmp(arg, "ignore")) {
4695 cfg.ignorepresets = true;
4696 } else if (!strcmp(arg, "show")) {
4697 cfg.showpresets = true;
4698 } else if (!strcmp(arg, "showall")) {
4700 } else {
4701 badarg = 1;
4702 }
4703 break;
4704
4705 case 'e':
4706 // Various ATA settings
4707 if (!(arg = strtok(nullptr, delim))) {
4708 missingarg = true;
4709 }
4710 else {
4711 char arg2[16+1]; unsigned uval;
4712 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4713 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &uval, &n3) >= 1
4714 && (n1 == len || n2 > 0)) {
4715 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4716 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4717 if (n3 != len)
4718 uval = ~0U;
4719
4720 if (!strcmp(arg2, "aam")) {
4721 if (off)
4722 cfg.set_aam = -1;
4723 else if (uval <= 254)
4724 cfg.set_aam = uval + 1;
4725 else
4726 badarg = true;
4727 }
4728 else if (!strcmp(arg2, "apm")) {
4729 if (off)
4730 cfg.set_apm = -1;
4731 else if (1 <= uval && uval <= 254)
4732 cfg.set_apm = uval + 1;
4733 else
4734 badarg = true;
4735 }
4736 else if (!strcmp(arg2, "lookahead")) {
4737 if (off)
4738 cfg.set_lookahead = -1;
4739 else if (on)
4740 cfg.set_lookahead = 1;
4741 else
4742 badarg = true;
4743 }
4744 else if (!strcmp(arg, "security-freeze")) {
4745 cfg.set_security_freeze = true;
4746 }
4747 else if (!strcmp(arg2, "standby")) {
4748 if (off)
4749 cfg.set_standby = 0 + 1;
4750 else if (uval <= 255)
4751 cfg.set_standby = uval + 1;
4752 else
4753 badarg = true;
4754 }
4755 else if (!strcmp(arg2, "wcache")) {
4756 if (off)
4757 cfg.set_wcache = -1;
4758 else if (on)
4759 cfg.set_wcache = 1;
4760 else
4761 badarg = true;
4762 }
4763 else if (!strcmp(arg2, "dsn")) {
4764 if (off)
4765 cfg.set_dsn = -1;
4766 else if (on)
4767 cfg.set_dsn = 1;
4768 else
4769 badarg = true;
4770 }
4771 else
4772 badarg = true;
4773 }
4774 else
4775 badarg = true;
4776 }
4777 break;
4778
4779 case 'c':
4780 // Override command line options
4781 {
4782 if (!(arg = strtok(nullptr, delim))) {
4783 missingarg = true;
4784 break;
4785 }
4786 int n = 0, nc = -1, len = strlen(arg);
4787 if ( ( sscanf(arg, "i=%d%n", &n, &nc) == 1
4788 || sscanf(arg, "interval=%d%n", &n, &nc) == 1)
4789 && nc == len && n >= 10)
4790 cfg.checktime = n;
4791 else
4792 badarg = true;
4793 }
4794 break;
4795
4796 default:
4797 // Directive not recognized
4798 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4799 configfile, lineno, name, token);
4800 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4801 return -1;
4802 }
4803 if (missingarg) {
4804 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4805 configfile, lineno, name, token);
4806 }
4807 if (badarg) {
4808 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4809 configfile, lineno, name, token, arg);
4810 }
4811 if (missingarg || badarg) {
4812 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4813 printoutvaliddirectiveargs(LOG_CRIT, sym);
4814 PrintOut(LOG_CRIT, "\n");
4815 return -1;
4816 }
4817
4818 return 1;
4819}
4820
4821// Scan directive for configuration file
4822#define SCANDIRECTIVE "DEVICESCAN"
4823
4824// This is the routine that adds things to the conf_entries list.
4825//
4826// Return values are:
4827// 1: parsed a normal line
4828// 0: found DEFAULT setting or comment or blank line
4829// -1: found SCANDIRECTIVE line
4830// -2: found an error
4831//
4832// Note: this routine modifies *line from the caller!
4833static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4834 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4835{
4836 const char *delim = " \n\t";
4837
4838 // get first token: device name. If a comment, skip line
4839 const char * name = strtok(line, delim);
4840 if (!name || *name == '#')
4841 return 0;
4842
4843 // Check device name for DEFAULT or DEVICESCAN
4844 int retval;
4845 if (!strcmp("DEFAULT", name)) {
4846 retval = 0;
4847 // Restart with empty defaults
4848 default_conf = dev_config();
4849 }
4850 else {
4851 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4852 // Init new entry with current defaults
4853 conf_entries.push_back(default_conf);
4854 }
4855 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4856
4857 cfg.name = name; // Later replaced by dev->get_info().info_name
4858 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4859 cfg.lineno = lineno;
4860
4861 // parse tokens one at a time from the file.
4862 while (char * token = strtok(nullptr, delim)) {
4863 int rc = ParseToken(token, cfg, scan_types);
4864 if (rc < 0)
4865 // error found on the line
4866 return -2;
4867
4868 if (rc == 0)
4869 // No tokens left
4870 break;
4871
4872 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4873 }
4874
4875 // Check for multiple -d TYPE directives
4876 if (retval != -1 && scan_types.size() > 1) {
4877 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4878 cfg.name.c_str(), cfg.lineno, configfile);
4879 return -2;
4880 }
4881
4882 // Don't perform checks below for DEFAULT entries
4883 if (retval == 0)
4884 return retval;
4885
4886 // If NO monitoring directives are set, then set all of them.
4887 if (!( cfg.smartcheck || cfg.selftest
4888 || cfg.errorlog || cfg.xerrorlog
4889 || cfg.offlinests || cfg.selfteststs
4890 || cfg.usagefailed || cfg.prefail || cfg.usage
4891 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4892
4893 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4894 cfg.name.c_str(), cfg.lineno, configfile);
4895
4896 cfg.smartcheck = true;
4897 cfg.usagefailed = true;
4898 cfg.prefail = true;
4899 cfg.usage = true;
4900 cfg.selftest = true;
4901 cfg.errorlog = true;
4902 cfg.selfteststs = true;
4903 }
4904
4905 // additional sanity check. Has user set -M options without -m?
4906 if ( cfg.emailaddress.empty()
4907 && (!cfg.emailcmdline.empty() || cfg.emailfreq != emailfreqs::unknown || cfg.emailtest)) {
4908 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4909 cfg.name.c_str(), cfg.lineno, configfile);
4910 return -2;
4911 }
4912
4913 // has the user has set <nomailer>?
4914 if (cfg.emailaddress == "<nomailer>") {
4915 // check that -M exec is also set
4916 if (cfg.emailcmdline.empty()){
4917 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4918 cfg.name.c_str(), cfg.lineno, configfile);
4919 return -2;
4920 }
4921 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4922 cfg.emailaddress.clear();
4923 }
4924
4925 return retval;
4926}
4927
4928// Parses a configuration file. Return values are:
4929// N=>0: found N entries
4930// -1: syntax error in config file
4931// -2: config file does not exist
4932// -3: config file exists but cannot be read
4933//
4934// In the case where the return value is 0, there are three
4935// possibilities:
4936// Empty configuration file ==> conf_entries.empty()
4937// No configuration file ==> conf_entries[0].lineno == 0
4938// SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4939static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4940{
4941 // maximum line length in configuration file
4942 const int MAXLINELEN = 256;
4943 // maximum length of a continued line in configuration file
4944 const int MAXCONTLINE = 1023;
4945
4946 stdio_file f;
4947 // Open config file, if it exists and is not <stdin>
4948 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4949 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4950 // file exists but we can't read it or it should exist due to '-c' option
4951 int ret = (errno!=ENOENT ? -3 : -2);
4952 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4953 strerror(errno),configfile);
4954 return ret;
4955 }
4956 }
4957 else // read from stdin ('-c -' option)
4958 f.open(stdin);
4959
4960 // Start with empty defaults
4961 dev_config default_conf;
4962
4963 // No configuration file found -- use fake one
4964 int entry = 0;
4965 if (!f) {
4966 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4967
4968 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4969 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4970 return 0;
4971 }
4972
4973#ifdef __CYGWIN__
4974 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4975#endif
4976
4977 // configuration file exists
4978 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4979
4980 // parse config file line by line
4981 int lineno = 1, cont = 0, contlineno = 0;
4982 char line[MAXLINELEN+2];
4983 char fullline[MAXCONTLINE+1];
4984
4985 for (;;) {
4986 int len=0,scandevice;
4987 char *lastslash;
4988 char *comment;
4989 char *code;
4990
4991 // make debugging simpler
4992 memset(line,0,sizeof(line));
4993
4994 // get a line
4995 code=fgets(line, MAXLINELEN+2, f);
4996
4997 // are we at the end of the file?
4998