smartmontools SVN Rev 5409
Utility to control and monitor storage systems with "S.M.A.R.T."
smartd.cpp
Go to the documentation of this file.
1/*
2 * Home page of code is: https://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-22 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12#include "config.h"
13#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15// unconditionally included files
16#include <inttypes.h>
17#include <stdio.h>
18#include <sys/types.h>
19#include <sys/stat.h> // umask
20#include <signal.h>
21#include <fcntl.h>
22#include <string.h>
23#include <syslog.h>
24#include <stdarg.h>
25#include <stdlib.h>
26#include <errno.h>
27#include <time.h>
28#include <limits.h>
29#include <getopt.h>
30
31#include <algorithm> // std::replace()
32#include <map>
33#include <stdexcept>
34#include <string>
35#include <vector>
36
37// conditionally included files
38#ifndef _WIN32
39#include <sys/wait.h>
40#endif
41#ifdef HAVE_UNISTD_H
42#include <unistd.h>
43#endif
44
45#ifdef _WIN32
46#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
47#ifdef _MSC_VER
48#pragma warning(disable:4761) // "conversion supplied"
49typedef unsigned short mode_t;
50typedef int pid_t;
51#endif
52#include <io.h> // umask()
53#include <process.h> // getpid()
54#endif // _WIN32
55
56#ifdef __CYGWIN__
57#include <io.h> // setmode()
58#endif // __CYGWIN__
59
60#ifdef HAVE_LIBCAP_NG
61#include <cap-ng.h>
62#endif // LIBCAP_NG
63
64#ifdef HAVE_LIBSYSTEMD
65#include <systemd/sd-daemon.h>
66#endif // HAVE_LIBSYSTEMD
67
68// locally included files
69#include "atacmds.h"
70#include "dev_interface.h"
71#include "knowndrives.h"
72#include "scsicmds.h"
73#include "nvmecmds.h"
74#include "utility.h"
75
76#ifdef HAVE_POSIX_API
77#include "popen_as_ugid.h"
78#endif
79
80#ifdef _WIN32
81// fork()/signal()/initd simulation for native Windows
82#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
83#define strsignal daemon_strsignal
84#define sleep daemon_sleep
85// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
86#define SIGQUIT SIGBREAK
87#define SIGQUIT_KEYNAME "CONTROL-Break"
88#else // _WIN32
89#define SIGQUIT_KEYNAME "CONTROL-\\"
90#endif // _WIN32
91
92const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5403 2022-08-06 16:09:49Z chrfranke $"
93 CONFIG_H_CVSID;
94
95extern "C" {
96 typedef void (*signal_handler_type)(int);
97}
98
100{
101#if defined(_WIN32)
102 // signal() emulation
103 daemon_signal(sig, handler);
104
105#elif defined(HAVE_SIGACTION)
106 // SVr4, POSIX.1-2001, POSIX.1-2008
107 struct sigaction sa;
108 sa.sa_handler = SIG_DFL;
109 sigaction(sig, (struct sigaction *)0, &sa);
110 if (sa.sa_handler == SIG_IGN)
111 return;
112
113 memset(&sa, 0, sizeof(sa));
114 sa.sa_handler = handler;
115 sa.sa_flags = SA_RESTART; // BSD signal() semantics
116 sigaction(sig, &sa, (struct sigaction *)0);
117
118#elif defined(HAVE_SIGSET)
119 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
120 if (sigset(sig, handler) == SIG_IGN)
121 sigset(sig, SIG_IGN);
122
123#else
124 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
125 // Important: BSD semantics is required. Traditional signal()
126 // resets the handler to SIG_DFL after the first signal is caught.
127 if (signal(sig, handler) == SIG_IGN)
128 signal(sig, SIG_IGN);
129#endif
130}
131
132using namespace smartmontools;
133
134// smartd exit codes
135#define EXIT_BADCMD 1 // command line did not parse
136#define EXIT_BADCONF 2 // syntax error in config file
137#define EXIT_STARTUP 3 // problem forking daemon
138#define EXIT_PID 4 // problem creating pid file
139#define EXIT_NOCONF 5 // config file does not exist
140#define EXIT_READCONF 6 // config file exists but cannot be read
141
142#define EXIT_NOMEM 8 // out of memory
143#define EXIT_BADCODE 10 // internal error - should NEVER happen
144
145#define EXIT_BADDEV 16 // we can't monitor this device
146#define EXIT_NODEV 17 // no devices to monitor
147
148#define EXIT_SIGNAL 254 // abort on signal
149
150
151// command-line: 1=debug mode, 2=print presets
152static unsigned char debugmode = 0;
153
154// command-line: how long to sleep between checks
155static constexpr int default_checktime = 1800;
157static int checktime_min = 0; // Minimum individual check time, 0 if none
158
159// command-line: name of PID file (empty for no pid file)
160static std::string pid_file;
161
162// command-line: path prefix of persistent state file, empty if no persistence.
163static std::string state_path_prefix
164#ifdef SMARTMONTOOLS_SAVESTATES
165 = SMARTMONTOOLS_SAVESTATES
166#endif
167 ;
168
169// command-line: path prefix of attribute log file, empty if no logs.
170static std::string attrlog_path_prefix
171#ifdef SMARTMONTOOLS_ATTRIBUTELOG
172 = SMARTMONTOOLS_ATTRIBUTELOG
173#endif
174 ;
175
176// configuration file name
177static const char * configfile;
178// configuration file "name" if read from stdin
179static const char * const configfile_stdin = "<stdin>";
180// path of alternate configuration file
181static std::string configfile_alt;
182
183// warning script file
184static std::string warning_script;
185
186#ifdef HAVE_POSIX_API
187// run warning script as non-privileged user
188static bool warn_as_user;
189static uid_t warn_uid;
190static gid_t warn_gid;
191static std::string warn_uname, warn_gname;
192#elif defined(_WIN32)
193// run warning script as restricted user
194static bool warn_as_restr_user;
195#endif
196
197// command-line: when should we exit?
198enum quit_t {
203static bool quit_nodev0 = false;
204
205// command-line; this is the default syslog(3) log facility to use.
206static int facility=LOG_DAEMON;
207
208#ifndef _WIN32
209// command-line: fork into background?
210static bool do_fork=true;
211#endif
212
213// TODO: This smartctl only variable is also used in some os_*.cpp
214unsigned char failuretest_permissive = 0;
215
216// set to one if we catch a USR1 (check devices now)
217static volatile int caughtsigUSR1=0;
218
219#ifdef _WIN32
220// set to one if we catch a USR2 (toggle debug mode)
221static volatile int caughtsigUSR2=0;
222#endif
223
224// set to one if we catch a HUP (reload config file). In debug mode,
225// set to two, if we catch INT (also reload config file).
226static volatile int caughtsigHUP=0;
227
228// set to signal value if we catch INT, QUIT, or TERM
229static volatile int caughtsigEXIT=0;
230
231// This function prints either to stdout or to the syslog as needed.
232static void PrintOut(int priority, const char *fmt, ...)
234
235#ifdef HAVE_LIBSYSTEMD
236// systemd notify support
237
238static bool notify_enabled = false;
239static bool notify_ready = false;
240
241static inline void notify_init()
242{
243 if (!getenv("NOTIFY_SOCKET"))
244 return;
245 notify_enabled = true;
246}
247
248static inline bool notify_post_init()
249{
250 if (!notify_enabled)
251 return true;
252 if (do_fork) {
253 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
254 return false;
255 }
256 return true;
257}
258
259static inline void notify_extend_timeout()
260{
261 if (!notify_enabled)
262 return;
263 if (notify_ready)
264 return;
265 const char * notify = "EXTEND_TIMEOUT_USEC=20000000"; // typical drive spinup time is 20s tops
266 if (debugmode) {
267 pout("sd_notify(0, \"%s\")\n", notify);
268 return;
269 }
270 sd_notify(0, notify);
271}
272
273static void notify_msg(const char * msg, bool ready = false)
274{
275 if (!notify_enabled)
276 return;
277 if (debugmode) {
278 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
279 return;
280 }
281 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
282}
283
284static void notify_check(int numdev)
285{
286 if (!notify_enabled)
287 return;
288 char msg[32];
289 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
290 numdev, (numdev != 1 ? "s" : ""));
291 notify_msg(msg);
292}
293
294static void notify_wait(time_t wakeuptime, int numdev)
295{
296 if (!notify_enabled)
297 return;
298 char ts[16] = ""; struct tm tmbuf;
299 strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
300 char msg[64];
301 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
302 numdev, (numdev != 1 ? "s" : ""), ts);
303 notify_msg(msg, !notify_ready); // first call notifies READY=1
304 notify_ready = true;
305}
306
307static void notify_exit(int status)
308{
309 if (!notify_enabled)
310 return;
311 const char * msg;
312 switch (status) {
313 case 0: msg = "Exiting ..."; break;
314 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
315 case EXIT_BADCONF: case EXIT_NOCONF:
316 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
317 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
318 case EXIT_NODEV: msg = "No devices to monitor"; break;
319 default: msg = "Error (see SYSLOG)"; break;
320 }
321 notify_msg(msg);
322}
323
324#else // HAVE_LIBSYSTEMD
325// No systemd notify support
326
327static inline bool notify_post_init()
328{
329#ifdef __linux__
330 if (getenv("NOTIFY_SOCKET")) {
331 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
332 return false;
333 }
334#endif
335 return true;
336}
337
338static inline void notify_init() { }
339static inline void notify_extend_timeout() { }
340static inline void notify_msg(const char *) { }
341static inline void notify_check(int) { }
342static inline void notify_wait(time_t, int) { }
343static inline void notify_exit(int) { }
344
345#endif // HAVE_LIBSYSTEMD
346
347// Attribute monitoring flags.
348// See monitor_attr_flags below.
349enum {
356};
357
358// Array of flags for each attribute.
360{
361public:
362 bool is_set(int id, unsigned char flag) const
363 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
364
365 void set(int id, unsigned char flags)
366 {
367 if (0 < id && id < (int)sizeof(m_flags))
368 m_flags[id] |= flags;
369 }
370
371private:
372 unsigned char m_flags[256]{};
373};
374
375
376/// Configuration data for a device. Read from smartd.conf.
377/// Supports copy & assignment and is compatible with STL containers.
379{
380 int lineno{}; // Line number of entry in file
381 std::string name; // Device name (with optional extra info)
382 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
383 std::string dev_type; // Device type argument from -d directive, empty if none
384 std::string dev_idinfo; // Device identify info for warning emails
385 std::string state_file; // Path of the persistent state file, empty if none
386 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
387 int checktime{}; // Individual check interval, 0 if none
388 bool ignore{}; // Ignore this entry
389 bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
390 bool smartcheck{}; // Check SMART status
391 bool usagefailed{}; // Check for failed Usage Attributes
392 bool prefail{}; // Track changes in Prefail Attributes
393 bool usage{}; // Track changes in Usage Attributes
394 bool selftest{}; // Monitor number of selftest errors
395 bool errorlog{}; // Monitor number of ATA errors
396 bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
397 bool offlinests{}; // Monitor changes in offline data collection status
398 bool offlinests_ns{}; // Disable auto standby if in progress
399 bool selfteststs{}; // Monitor changes in self-test execution status
400 bool selfteststs_ns{}; // Disable auto standby if in progress
401 bool permissive{}; // Ignore failed SMART commands
402 char autosave{}; // 1=disable, 2=enable Autosave Attributes
403 char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
404 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
405 bool ignorepresets{}; // Ignore database of -v options
406 bool showpresets{}; // Show database entry for this device
407 bool removable{}; // Device may disappear (not be present)
408 char powermode{}; // skip check, if disk in idle or standby mode
409 bool powerquiet{}; // skip powermode 'skipping checks' message
410 int powerskipmax{}; // how many times can be check skipped
411 unsigned char tempdiff{}; // Track Temperature changes >= this limit
412 unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
413 regular_expression test_regex; // Regex for scheduled testing
414 unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
415
416 // Configuration of email warning messages
417 std::string emailcmdline; // script to execute, empty if no messages
418 std::string emailaddress; // email address, or empty
419 unsigned char emailfreq{}; // Emails once (1) daily (2) diminishing (3)
420 bool emailtest{}; // Send test email?
421
422 // ATA ONLY
423 int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
424 int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
425 int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
426 int set_lookahead{}; // disable(-1), enable(1) read look-ahead
427 int set_standby{}; // set(1..255->0..254) standby timer
428 bool set_security_freeze{}; // Freeze ATA security
429 int set_wcache{}; // disable(-1), enable(1) write cache
430 int set_dsn{}; // disable(0x2), enable(0x1) DSN
431
432 bool sct_erc_set{}; // set SCT ERC to:
433 unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
434 unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
435
436 unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
437 unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
438 bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
439 bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
440
441 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
442
444};
445
446// Number of allowed mail message types
447static const int SMARTD_NMAIL = 13;
448// Type for '-M test' mails (state not persistent)
449static const int MAILTYPE_TEST = 0;
450// TODO: Add const or enum for all mail types.
451
452struct mailinfo {
453 int logged{}; // number of times an email has been sent
454 time_t firstsent{}; // time first email was sent, as defined by time(2)
455 time_t lastsent{}; // time last email was sent, as defined by time(2)
456};
457
458/// Persistent state data for a device.
460{
461 unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
462
463 unsigned char selflogcount{}; // total number of self-test errors
464 unsigned short selfloghour{}; // lifetime hours of last self-test error
465
466 time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
467
468 uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
469 uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
470
471 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
472
473 // ATA ONLY
474 int ataerrorcount{}; // Total number of ATA errors
475
476 // Persistent part of ata_smart_values:
478 unsigned char id{};
479 unsigned char val{};
480 unsigned char worst{}; // Byte needed for 'raw64' attribute only.
481 uint64_t raw{};
482 unsigned char resvd{};
483 };
485
486 // SCSI ONLY
487
490 unsigned char found{};
491 };
493
496 unsigned char found{};
497 };
499
500 // NVMe only
502};
503
504/// Non-persistent state data for a device.
506{
507 bool must_write{}; // true if persistent part should be written
508
509 bool skip{}; // skip during next check cycle
510 time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
511
512 bool not_cap_offline{}; // true == not capable of offline testing
517
518 unsigned char temperature{}; // last recorded Temperature (in Celsius)
519 time_t tempmin_delay{}; // time where Min Temperature tracking will start
520
521 bool removed{}; // true if open() failed for removable device
522
523 bool powermodefail{}; // true if power mode check failed
524 int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
525 int lastpowermodeskipped{}; // the last power mode that was skipped
526
527 bool attrlog_dirty{}; // true if persistent part has new attr values that
528 // need to be written to attrlog
529
530 // SCSI ONLY
531 // TODO: change to bool
532 unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
533 unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
538 unsigned char SuppressReport{}; // minimize nuisance reports
539 unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
540 // know yet) 6 or 10
541 // ATA ONLY
542 uint64_t num_sectors{}; // Number of sectors
543 ata_smart_values smartval{}; // SMART data
545 bool offline_started{}; // true if offline data collection was started
546 bool selftest_started{}; // true if self-test was started
547};
548
549/// Runtime state data for a device.
551: public persistent_dev_state,
552 public temp_dev_state
553{
555 void update_temp_state();
556};
557
558/// Container for configuration info for each device.
559typedef std::vector<dev_config> dev_config_vector;
560
561/// Container for state info for each device.
562typedef std::vector<dev_state> dev_state_vector;
563
564// Copy ATA attributes to persistent state.
566{
567 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
570 pa.id = ta.id;
571 if (ta.id == 0) {
572 pa.val = pa.worst = 0; pa.raw = 0;
573 continue;
574 }
575 pa.val = ta.current;
576 pa.worst = ta.worst;
577 pa.raw = ta.raw[0]
578 | ( ta.raw[1] << 8)
579 | ( ta.raw[2] << 16)
580 | ((uint64_t)ta.raw[3] << 24)
581 | ((uint64_t)ta.raw[4] << 32)
582 | ((uint64_t)ta.raw[5] << 40);
583 pa.resvd = ta.reserv;
584 }
585}
586
587// Copy ATA from persistent to temp state.
589{
590 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
591 const ata_attribute & pa = ata_attributes[i];
593 ta.id = pa.id;
594 if (pa.id == 0) {
595 ta.current = ta.worst = 0;
596 memset(ta.raw, 0, sizeof(ta.raw));
597 continue;
598 }
599 ta.current = pa.val;
600 ta.worst = pa.worst;
601 ta.raw[0] = (unsigned char) pa.raw;
602 ta.raw[1] = (unsigned char)(pa.raw >> 8);
603 ta.raw[2] = (unsigned char)(pa.raw >> 16);
604 ta.raw[3] = (unsigned char)(pa.raw >> 24);
605 ta.raw[4] = (unsigned char)(pa.raw >> 32);
606 ta.raw[5] = (unsigned char)(pa.raw >> 40);
607 ta.reserv = pa.resvd;
608 }
609}
610
611// Parse a line from a state file.
612static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
613{
614 static const regular_expression regex(
615 "^ *"
616 "((temperature-min)" // (1 (2)
617 "|(temperature-max)" // (3)
618 "|(self-test-errors)" // (4)
619 "|(self-test-last-err-hour)" // (5)
620 "|(scheduled-test-next-check)" // (6)
621 "|(selective-test-last-start)" // (7)
622 "|(selective-test-last-end)" // (8)
623 "|(ata-error-count)" // (9)
624 "|(mail\\.([0-9]+)\\." // (10 (11)
625 "((count)" // (12 (13)
626 "|(first-sent-time)" // (14)
627 "|(last-sent-time)" // (15)
628 ")" // 12)
629 ")" // 10)
630 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
631 "((id)" // (18 (19)
632 "|(val)" // (20)
633 "|(worst)" // (21)
634 "|(raw)" // (22)
635 "|(resvd)" // (23)
636 ")" // 18)
637 ")" // 16)
638 "|(nvme-err-log-entries)" // (24)
639 ")" // 1)
640 " *= *([0-9]+)[ \n]*$" // (25)
641 );
642
643 const int nmatch = 1+25;
645 if (!regex.execute(line, nmatch, match))
646 return false;
647 if (match[nmatch-1].rm_so < 0)
648 return false;
649
650 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
651
652 int m = 1;
653 if (match[++m].rm_so >= 0)
654 state.tempmin = (unsigned char)val;
655 else if (match[++m].rm_so >= 0)
656 state.tempmax = (unsigned char)val;
657 else if (match[++m].rm_so >= 0)
658 state.selflogcount = (unsigned char)val;
659 else if (match[++m].rm_so >= 0)
660 state.selfloghour = (unsigned short)val;
661 else if (match[++m].rm_so >= 0)
662 state.scheduled_test_next_check = (time_t)val;
663 else if (match[++m].rm_so >= 0)
664 state.selective_test_last_start = val;
665 else if (match[++m].rm_so >= 0)
666 state.selective_test_last_end = val;
667 else if (match[++m].rm_so >= 0)
668 state.ataerrorcount = (int)val;
669 else if (match[m+=2].rm_so >= 0) {
670 int i = atoi(line+match[m].rm_so);
671 if (!(0 <= i && i < SMARTD_NMAIL))
672 return false;
673 if (i == MAILTYPE_TEST) // Don't suppress test mails
674 return true;
675 if (match[m+=2].rm_so >= 0)
676 state.maillog[i].logged = (int)val;
677 else if (match[++m].rm_so >= 0)
678 state.maillog[i].firstsent = (time_t)val;
679 else if (match[++m].rm_so >= 0)
680 state.maillog[i].lastsent = (time_t)val;
681 else
682 return false;
683 }
684 else if (match[m+=5+1].rm_so >= 0) {
685 int i = atoi(line+match[m].rm_so);
686 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
687 return false;
688 if (match[m+=2].rm_so >= 0)
689 state.ata_attributes[i].id = (unsigned char)val;
690 else if (match[++m].rm_so >= 0)
691 state.ata_attributes[i].val = (unsigned char)val;
692 else if (match[++m].rm_so >= 0)
693 state.ata_attributes[i].worst = (unsigned char)val;
694 else if (match[++m].rm_so >= 0)
695 state.ata_attributes[i].raw = val;
696 else if (match[++m].rm_so >= 0)
697 state.ata_attributes[i].resvd = (unsigned char)val;
698 else
699 return false;
700 }
701 else if (match[m+7].rm_so >= 0)
702 state.nvme_err_log_entries = val;
703 else
704 return false;
705 return true;
706}
707
708// Read a state file.
709static bool read_dev_state(const char * path, persistent_dev_state & state)
710{
711 stdio_file f(path, "r");
712 if (!f) {
713 if (errno != ENOENT)
714 pout("Cannot read state file \"%s\"\n", path);
715 return false;
716 }
717#ifdef __CYGWIN__
718 setmode(fileno(f), O_TEXT); // Allow files with \r\n
719#endif
720
721 persistent_dev_state new_state;
722 int good = 0, bad = 0;
723 char line[256];
724 while (fgets(line, sizeof(line), f)) {
725 const char * s = line + strspn(line, " \t");
726 if (!*s || *s == '#')
727 continue;
728 if (!parse_dev_state_line(line, new_state))
729 bad++;
730 else
731 good++;
732 }
733
734 if (bad) {
735 if (!good) {
736 pout("%s: format error\n", path);
737 return false;
738 }
739 pout("%s: %d invalid line(s) ignored\n", path, bad);
740 }
741
742 // This sets the values missing in the file to 0.
743 state = new_state;
744 return true;
745}
746
747static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
748{
749 if (val)
750 fprintf(f, "%s = %" PRIu64 "\n", name, val);
751}
752
753static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
754{
755 if (val)
756 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
757}
758
759// Write a state file
760static bool write_dev_state(const char * path, const persistent_dev_state & state)
761{
762 // Rename old "file" to "file~"
763 std::string pathbak = path; pathbak += '~';
764 unlink(pathbak.c_str());
765 rename(path, pathbak.c_str());
766
767 stdio_file f(path, "w");
768 if (!f) {
769 pout("Cannot create state file \"%s\"\n", path);
770 return false;
771 }
772
773 fprintf(f, "# smartd state file\n");
774 write_dev_state_line(f, "temperature-min", state.tempmin);
775 write_dev_state_line(f, "temperature-max", state.tempmax);
776 write_dev_state_line(f, "self-test-errors", state.selflogcount);
777 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
778 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
779 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
780 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
781
782 for (int i = 0; i < SMARTD_NMAIL; i++) {
783 if (i == MAILTYPE_TEST) // Don't suppress test mails
784 continue;
785 const mailinfo & mi = state.maillog[i];
786 if (!mi.logged)
787 continue;
788 write_dev_state_line(f, "mail", i, "count", mi.logged);
789 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
790 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
791 }
792
793 // ATA ONLY
794 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
795
796 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
797 const auto & pa = state.ata_attributes[i];
798 if (!pa.id)
799 continue;
800 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
801 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
802 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
803 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
804 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
805 }
806
807 // NVMe only
808 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
809
810 return true;
811}
812
813// Write to the attrlog file
814static bool write_dev_attrlog(const char * path, const dev_state & state)
815{
816 stdio_file f(path, "a");
817 if (!f) {
818 pout("Cannot create attribute log file \"%s\"\n", path);
819 return false;
820 }
821
822
823 time_t now = time(nullptr);
824 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
825 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
826 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
827 tms->tm_hour, tms->tm_min, tms->tm_sec);
828 // ATA ONLY
829 for (const auto & pa : state.ata_attributes) {
830 if (!pa.id)
831 continue;
832 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
833 }
834 // SCSI ONLY
835 const struct scsiErrorCounter * ecp;
836 const char * pageNames[3] = {"read", "write", "verify"};
837 for (int k = 0; k < 3; ++k) {
838 if ( !state.scsi_error_counters[k].found ) continue;
839 ecp = &state.scsi_error_counters[k].errCounter;
840 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
841 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
842 "\t%s-corr-by-retry;%" PRIu64 ";"
843 "\t%s-total-err-corrected;%" PRIu64 ";"
844 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
845 "\t%s-gb-processed;%.3f;"
846 "\t%s-total-unc-errors;%" PRIu64 ";",
847 pageNames[k], ecp->counter[0],
848 pageNames[k], ecp->counter[1],
849 pageNames[k], ecp->counter[2],
850 pageNames[k], ecp->counter[3],
851 pageNames[k], ecp->counter[4],
852 pageNames[k], (ecp->counter[5] / 1000000000.0),
853 pageNames[k], ecp->counter[6]);
854 }
855 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
856 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
857 }
858 // write SCSI current temperature if it is monitored
859 if (state.temperature)
860 fprintf(f, "\ttemperature;%d;", state.temperature);
861 // end of line
862 fprintf(f, "\n");
863 return true;
864}
865
866// Write all state files. If write_always is false, don't write
867// unless must_write is set.
868static void write_all_dev_states(const dev_config_vector & configs,
869 dev_state_vector & states,
870 bool write_always = true)
871{
872 for (unsigned i = 0; i < states.size(); i++) {
873 const dev_config & cfg = configs.at(i);
874 if (cfg.state_file.empty())
875 continue;
876 dev_state & state = states[i];
877 if (!write_always && !state.must_write)
878 continue;
879 if (!write_dev_state(cfg.state_file.c_str(), state))
880 continue;
881 state.must_write = false;
882 if (write_always || debugmode)
883 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
884 cfg.name.c_str(), cfg.state_file.c_str());
885 }
886}
887
888// Write to all attrlog files
889static void write_all_dev_attrlogs(const dev_config_vector & configs,
890 dev_state_vector & states)
891{
892 for (unsigned i = 0; i < states.size(); i++) {
893 const dev_config & cfg = configs.at(i);
894 if (cfg.attrlog_file.empty())
895 continue;
896 dev_state & state = states[i];
897 if (state.attrlog_dirty) {
898 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
899 state.attrlog_dirty = false;
900 }
901 }
902}
903
904extern "C" { // signal handlers require C-linkage
905
906// Note if we catch a SIGUSR1
907static void USR1handler(int sig)
908{
909 if (SIGUSR1==sig)
911 return;
912}
913
914#ifdef _WIN32
915// Note if we catch a SIGUSR2
916static void USR2handler(int sig)
917{
918 if (SIGUSR2==sig)
919 caughtsigUSR2=1;
920 return;
921}
922#endif
923
924// Note if we catch a HUP (or INT in debug mode)
925static void HUPhandler(int sig)
926{
927 if (sig==SIGHUP)
928 caughtsigHUP=1;
929 else
930 caughtsigHUP=2;
931 return;
932}
933
934// signal handler for TERM, QUIT, and INT (if not in debug mode)
935static void sighandler(int sig)
936{
937 if (!caughtsigEXIT)
938 caughtsigEXIT=sig;
939 return;
940}
941
942} // extern "C"
943
944#ifdef HAVE_LIBCAP_NG
945// capabilities(7) support
946
947static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
948
949static void capabilities_drop_now()
950{
951 if (!capabilities_mode)
952 return;
953 capng_clear(CAPNG_SELECT_BOTH);
954 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
955 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
956 if (warn_as_user && (warn_uid || warn_gid)) {
957 // For popen_as_ugid()
958 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
959 CAP_SETGID, CAP_SETUID, -1);
960 }
961 if (capabilities_mode > 1) {
962 // For exim MTA
963 capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
964 CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
965 }
966 capng_apply(CAPNG_SELECT_BOTH);
967}
968
969static void capabilities_log_error_hint()
970{
971 if (!capabilities_mode)
972 return;
973 PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
974 (capabilities_mode == 1 ? "', try '--capabilities=mail'"
975 : "=mail', please inform " PACKAGE_BUGREPORT));
976}
977
978#else // HAVE_LIBCAP_NG
979// No capabilities(7) support
980
981static inline void capabilities_drop_now() { }
982static inline void capabilities_log_error_hint() { }
983
984#endif // HAVE_LIBCAP_NG
985
986// a replacement for setenv() which is not available on all platforms.
987// Note that the string passed to putenv must not be freed or made
988// invalid, since a pointer to it is kept by putenv(). This means that
989// it must either be a static buffer or allocated off the heap. The
990// string can be freed if the environment variable is redefined via
991// another call to putenv(). There is no portable way to unset a variable
992// with putenv(). So we manage the buffer in a static object.
993// Using setenv() if available is not considered because some
994// implementations may produce memory leaks.
995
997{
998public:
999 env_buffer() = default;
1000 env_buffer(const env_buffer &) = delete;
1001 void operator=(const env_buffer &) = delete;
1002
1003 void set(const char * name, const char * value);
1004private:
1005 char * m_buf = nullptr;
1006};
1007
1008void env_buffer::set(const char * name, const char * value)
1009{
1010 int size = strlen(name) + 1 + strlen(value) + 1;
1011 char * newbuf = new char[size];
1012 snprintf(newbuf, size, "%s=%s", name, value);
1013
1014 if (putenv(newbuf))
1015 throw std::runtime_error("putenv() failed");
1016
1017 // This assumes that the same NAME is passed on each call
1018 delete [] m_buf;
1019 m_buf = newbuf;
1020}
1021
1022#define EBUFLEN 1024
1023
1024static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1026
1027// If either address or executable path is non-null then send and log
1028// a warning email, or execute executable
1029static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1030{
1031 static const char * const whichfail[] = {
1032 "EmailTest", // 0
1033 "Health", // 1
1034 "Usage", // 2
1035 "SelfTest", // 3
1036 "ErrorCount", // 4
1037 "FailedHealthCheck", // 5
1038 "FailedReadSmartData", // 6
1039 "FailedReadSmartErrorLog", // 7
1040 "FailedReadSmartSelfTestLog", // 8
1041 "FailedOpenDevice", // 9
1042 "CurrentPendingSector", // 10
1043 "OfflineUncorrectableSector", // 11
1044 "Temperature" // 12
1045 };
1046
1047 // See if user wants us to send mail
1048 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1049 return;
1050
1051 std::string address = cfg.emailaddress;
1052 const char * executable = cfg.emailcmdline.c_str();
1053
1054 // which type of mail are we sending?
1055 mailinfo * mail=(state.maillog)+which;
1056
1057 // checks for sanity
1058 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
1059 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
1060 return;
1061 }
1062 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
1063 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
1064 which, (int)sizeof(whichfail));
1065 return;
1066 }
1067
1068 // Return if a single warning mail has been sent.
1069 if ((cfg.emailfreq==1) && mail->logged)
1070 return;
1071
1072 // Return if this is an email test and one has already been sent.
1073 if (which == 0 && mail->logged)
1074 return;
1075
1076 // To decide if to send mail, we need to know what time it is.
1077 time_t epoch = time(nullptr);
1078
1079 // Return if less than one day has gone by
1080 const int day = 24*3600;
1081 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1082 return;
1083
1084 // Return if less than 2^(logged-1) days have gone by
1085 if (cfg.emailfreq==3 && mail->logged) {
1086 int days = 0x01 << (mail->logged - 1);
1087 days*=day;
1088 if (epoch<(mail->lastsent+days))
1089 return;
1090 }
1091
1092 // record the time of this mail message, and the first mail message
1093 if (!mail->logged)
1094 mail->firstsent=epoch;
1095 mail->lastsent=epoch;
1096
1097 // print warning string into message
1098 // Note: Message length may reach ~300 characters as device names may be
1099 // very long on certain platforms (macOS ~230 characters).
1100 // Message length must not exceed email line length limit, see RFC 5322:
1101 // "... MUST be no more than 998 characters, ... excluding the CRLF."
1102 char message[512];
1103 va_list ap;
1104 va_start(ap, fmt);
1105 vsnprintf(message, sizeof(message), fmt, ap);
1106 va_end(ap);
1107
1108 // replace commas by spaces to separate recipients
1109 std::replace(address.begin(), address.end(), ',', ' ');
1110
1111 // Export information in environment variables that will be useful
1112 // for user scripts
1113 static env_buffer env[13];
1114 env[0].set("SMARTD_MAILER", executable);
1115 env[1].set("SMARTD_MESSAGE", message);
1116 char dates[DATEANDEPOCHLEN];
1117 snprintf(dates, sizeof(dates), "%d", mail->logged);
1118 env[2].set("SMARTD_PREVCNT", dates);
1119 dateandtimezoneepoch(dates, mail->firstsent);
1120 env[3].set("SMARTD_TFIRST", dates);
1121 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1122 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1123 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1124 env[6].set("SMARTD_ADDRESS", address.c_str());
1125 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1126
1127 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1128 env[8].set("SMARTD_DEVICETYPE",
1129 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1130 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1131
1132 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1133 dates[0] = 0;
1134 if (which) switch (cfg.emailfreq) {
1135 case 2: dates[0] = '1'; dates[1] = 0; break;
1136 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1137 }
1138 env[11].set("SMARTD_NEXTDAYS", dates);
1139 // Avoid false positive recursion detection by smartd_warning.{sh,cmd}
1140 env[12].set("SMARTD_SUBJECT", "");
1141
1142 // now construct a command to send this as EMAIL
1143 if (!*executable)
1144 executable = "<mail>";
1145 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1146 const char * newwarn = (which? "Warning via" : "Test of");
1147
1148 char command[256];
1149#ifdef _WIN32
1150 // Path may contain spaces
1151 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1152#else
1153 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1154#endif
1155
1156 // tell SYSLOG what we are about to do...
1157 PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
1158 (which ? "Sending warning via" : "Executing test of"), executable, newadd,
1159 (
1160#ifdef HAVE_POSIX_API
1161 warn_as_user ?
1162 strprintf(" (uid=%u(%s) gid=%u(%s))",
1163 (unsigned)warn_uid, warn_uname.c_str(),
1164 (unsigned)warn_gid, warn_gname.c_str() ).c_str() :
1165#elif defined(_WIN32)
1166 warn_as_restr_user ? " (restricted user)" :
1167#endif
1168 ""
1169 )
1170 );
1171
1172 // issue the command to send mail or to run the user's executable
1173 errno=0;
1174 FILE * pfp;
1175
1176#ifdef HAVE_POSIX_API
1177 if (warn_as_user) {
1178 pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
1179 } else
1180#endif
1181 {
1182#ifdef _WIN32
1183 pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
1184#else
1185 pfp = popen(command, "r");
1186#endif
1187 }
1188
1189 if (!pfp)
1190 // failed to popen() mail process
1191 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1192 newwarn, executable, newadd, errno?strerror(errno):"");
1193 else {
1194 // pipe succeeded!
1195 int len;
1196 char buffer[EBUFLEN];
1197
1198 // if unexpected output on stdout/stderr, null terminate, print, and flush
1199 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1200 int count=0;
1201 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1202 buffer[newlen]='\0';
1203 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1204 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1205
1206 // flush pipe if needed
1207 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1208 count++;
1209
1210 // tell user that pipe was flushed, or that something is really wrong
1211 if (count && count<EBUFLEN)
1212 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1213 newwarn, executable, newadd);
1214 else if (count)
1215 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1216 newwarn, executable, newadd);
1217 }
1218
1219 // if something went wrong with mail process, print warning
1220 errno=0;
1221 int status;
1222
1223#ifdef HAVE_POSIX_API
1224 if (warn_as_user) {
1225 status = pclose_as_ugid(pfp);
1226 } else
1227#endif
1228 {
1229 status = pclose(pfp);
1230 }
1231
1232 if (status == -1)
1233 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1234 errno?strerror(errno):"");
1235 else {
1236 // mail process apparently succeeded. Check and report exit status
1237 if (WIFEXITED(status)) {
1238 // exited 'normally' (but perhaps with nonzero status)
1239 int status8 = WEXITSTATUS(status);
1240 if (status8>128)
1241 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1242 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1243 else if (status8) {
1244 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1245 newwarn, executable, newadd, status, status8);
1247 }
1248 else
1249 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1250 }
1251
1252 if (WIFSIGNALED(status))
1253 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1254 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1255
1256 // this branch is probably not possible. If subprocess is
1257 // stopped then pclose() should not return.
1258 if (WIFSTOPPED(status))
1259 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1260 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1261
1262 }
1263 }
1264
1265 // increment mail sent counter
1266 mail->logged++;
1267}
1268
1269static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1271
1272static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1273{
1274 if (!(0 <= which && which < SMARTD_NMAIL))
1275 return;
1276
1277 // Return if no mail sent yet
1278 mailinfo & mi = state.maillog[which];
1279 if (!mi.logged)
1280 return;
1281
1282 // Format & print message
1283 char msg[256];
1284 va_list ap;
1285 va_start(ap, fmt);
1286 vsnprintf(msg, sizeof(msg), fmt, ap);
1287 va_end(ap);
1288
1289 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1290 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1291
1292 // Clear mail counter and timestamps
1293 mi = mailinfo();
1294 state.must_write = true;
1295}
1296
1297#ifndef _WIN32
1298
1299// Output multiple lines via separate syslog(3) calls.
1301static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1302{
1303 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1304 vsnprintf(buf, sizeof(buf), fmt, ap);
1305
1306 for (char * p = buf, * q; p && *p; p = q) {
1307 if ((q = strchr(p, '\n')))
1308 *q++ = 0;
1309 if (*p)
1310 syslog(priority, "%s\n", p);
1311 }
1312}
1313
1314#else // _WIN32
1315// os_win32/syslog_win32.cpp supports multiple lines.
1316#define vsyslog_lines vsyslog
1317#endif // _WIN32
1318
1319// Printing function for watching ataprint commands, or losing them
1320// [From GLIBC Manual: Since the prototype doesn't specify types for
1321// optional arguments, in a call to a variadic function the default
1322// argument promotions are performed on the optional argument
1323// values. This means the objects of type char or short int (whether
1324// signed or not) are promoted to either int or unsigned int, as
1325// appropriate.]
1326void pout(const char *fmt, ...){
1327 va_list ap;
1328
1329 // get the correct time in syslog()
1331 // initialize variable argument list
1332 va_start(ap,fmt);
1333 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1334 if (debugmode && debugmode != 2) {
1335 FILE * f = stdout;
1336#ifdef _WIN32
1337 if (facility == LOG_LOCAL1) // logging to stdout
1338 f = stderr;
1339#endif
1340 vfprintf(f, fmt, ap);
1341 fflush(f);
1342 }
1343 // in debugmode==2 mode we print output from knowndrives.o functions
1344 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1345 openlog("smartd", LOG_PID, facility);
1346 vsyslog_lines(LOG_INFO, fmt, ap);
1347 closelog();
1348 }
1349 va_end(ap);
1350 return;
1351}
1352
1353// This function prints either to stdout or to the syslog as needed.
1354static void PrintOut(int priority, const char *fmt, ...){
1355 va_list ap;
1356
1357 // get the correct time in syslog()
1359 // initialize variable argument list
1360 va_start(ap,fmt);
1361 if (debugmode) {
1362 FILE * f = stdout;
1363#ifdef _WIN32
1364 if (facility == LOG_LOCAL1) // logging to stdout
1365 f = stderr;
1366#endif
1367 vfprintf(f, fmt, ap);
1368 fflush(f);
1369 }
1370 else {
1371 openlog("smartd", LOG_PID, facility);
1372 vsyslog_lines(priority, fmt, ap);
1373 closelog();
1374 }
1375 va_end(ap);
1376 return;
1377}
1378
1379// Used to warn users about invalid checksums. Called from atacmds.cpp.
1380void checksumwarning(const char * string)
1381{
1382 pout("Warning! %s error: invalid SMART checksum.\n", string);
1383}
1384
1385#ifndef _WIN32
1386
1387// Wait for the pid file to show up, this makes sure a calling program knows
1388// that the daemon is really up and running and has a pid to kill it
1389static bool WaitForPidFile()
1390{
1391 int waited, max_wait = 10;
1392 struct stat stat_buf;
1393
1394 if (pid_file.empty() || debugmode)
1395 return true;
1396
1397 for(waited = 0; waited < max_wait; ++waited) {
1398 if (!stat(pid_file.c_str(), &stat_buf)) {
1399 return true;
1400 } else
1401 sleep(1);
1402 }
1403 return false;
1404}
1405
1406#endif // _WIN32
1407
1408// Forks new process if needed, closes ALL file descriptors,
1409// redirects stdin, stdout, and stderr. Not quite daemon().
1410// See https://www.linuxjournal.com/article/2335
1411// for a good description of why we do things this way.
1412static int daemon_init()
1413{
1414#ifndef _WIN32
1415
1416 // flush all buffered streams. Else we might get two copies of open
1417 // streams since both parent and child get copies of the buffers.
1418 fflush(nullptr);
1419
1420 if (do_fork) {
1421 pid_t pid;
1422 if ((pid=fork()) < 0) {
1423 // unable to fork!
1424 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1425 return EXIT_STARTUP;
1426 }
1427 if (pid) {
1428 // we are the parent process, wait for pid file, then exit cleanly
1429 if(!WaitForPidFile()) {
1430 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1431 return EXIT_STARTUP;
1432 }
1433 return 0;
1434 }
1435
1436 // from here on, we are the child process.
1437 setsid();
1438
1439 // Fork one more time to avoid any possibility of having terminals
1440 if ((pid=fork()) < 0) {
1441 // unable to fork!
1442 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1443 return EXIT_STARTUP;
1444 }
1445 if (pid)
1446 // we are the parent process -- exit cleanly
1447 return 0;
1448
1449 // Now we are the child's child...
1450 }
1451
1452 // close any open file descriptors
1453 for (int i = sysconf(_SC_OPEN_MAX); --i >= 0; )
1454 close(i);
1455
1456 // redirect any IO attempts to /dev/null and change to root directory
1457 int fd = open("/dev/null", O_RDWR);
1458 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1459 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1460 return EXIT_STARTUP;
1461 }
1462 umask(0022);
1463
1464 if (do_fork)
1465 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1466
1467#else // _WIN32
1468
1469 // No fork() on native Win32
1470 // Detach this process from console
1471 fflush(nullptr);
1472 if (daemon_detach("smartd")) {
1473 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1474 return EXIT_STARTUP;
1475 }
1476 // stdin/out/err now closed if not redirected
1477
1478#endif // _WIN32
1479
1480 // No error, continue in main_worker()
1481 return -1;
1482}
1483
1484// create a PID file containing the current process id
1485static bool write_pid_file()
1486{
1487 if (!pid_file.empty()) {
1488 pid_t pid = getpid();
1489 mode_t old_umask;
1490#ifndef __CYGWIN__
1491 old_umask = umask(0077); // rwx------
1492#else
1493 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1494 old_umask = umask(0033); // rwxr--r--
1495#endif
1496
1497 stdio_file f(pid_file.c_str(), "w");
1498 umask(old_umask);
1499 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1500 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1501 return false;
1502 }
1503 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1504 }
1505 return true;
1506}
1507
1508// Prints header identifying version of code and home
1509static void PrintHead()
1510{
1511 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1512}
1513
1514// prints help info for configuration file Directives
1515static void Directives()
1516{
1517 PrintOut(LOG_INFO,
1518 "Configuration file (%s) Directives (after device name):\n"
1519 " -d TYPE Set the device type: auto, ignore, removable,\n"
1520 " %s\n"
1521 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1522 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1523 " -S VAL Enable/disable attribute autosave (on/off)\n"
1524 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1525 " -H Monitor SMART Health Status, report if failed\n"
1526 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1527 " -l TYPE Monitor SMART log or self-test status:\n"
1528 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1529 " -l scterc,R,W Set SCT Error Recovery Control\n"
1530 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1531 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1532 " -f Monitor 'Usage' Attributes, report failures\n"
1533 " -m ADD Send email warning to address ADD\n"
1534 " -M TYPE Modify email warning behavior (see man page)\n"
1535 " -p Report changes in 'Prefailure' Attributes\n"
1536 " -u Report changes in 'Usage' Attributes\n"
1537 " -t Equivalent to -p and -u Directives\n"
1538 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1539 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1540 " -i ID Ignore Attribute ID for -f Directive\n"
1541 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1542 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1543 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1544 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1545 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1546 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1547 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1548 " -F TYPE Use firmware bug workaround:\n"
1549 " %s\n"
1550 " -c i=N Set interval between disk checks to N seconds\n"
1551 " # Comment: text after a hash sign is ignored\n"
1552 " \\ Line continuation character\n"
1553 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1554 "Use ID = 0 to turn off -C and/or -U Directives\n"
1555 "Example: /dev/sda -a\n",
1556 configfile,
1557 smi()->get_valid_dev_types_str().c_str(),
1559}
1560
1561/* Returns a pointer to a static string containing a formatted list of the valid
1562 arguments to the option opt or nullptr on failure. */
1563static const char *GetValidArgList(char opt)
1564{
1565 switch (opt) {
1566 case 'A':
1567 case 's':
1568 return "<PATH_PREFIX>, -";
1569 case 'B':
1570 return "[+]<FILE_NAME>";
1571 case 'c':
1572 return "<FILE_NAME>, -";
1573 case 'l':
1574 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1575 case 'q':
1576 return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
1577 case 'r':
1578 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1579 case 'p':
1580 case 'w':
1581 return "<FILE_NAME>";
1582 case 'i':
1583 return "<INTEGER_SECONDS>";
1584#ifdef HAVE_POSIX_API
1585 case 'u':
1586 return "<USER>[:<GROUP>], -";
1587#elif defined(_WIN32)
1588 case 'u':
1589 return "restricted, unchanged";
1590#endif
1591#ifdef HAVE_LIBCAP_NG
1592 case 'C':
1593 return "mail, <no_argument>";
1594#endif
1595 default:
1596 return nullptr;
1597 }
1598}
1599
1600/* prints help information for command syntax */
1601static void Usage()
1602{
1603 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1604#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1605 PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
1606#else
1607 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1608#endif
1609 PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
1610#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1611 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
1612#endif
1613 PrintOut(LOG_INFO,"\n");
1614 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1615 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1616 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1617#ifdef SMARTMONTOOLS_DRIVEDBDIR
1618 PrintOut(LOG_INFO,"\n");
1619 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1620#endif
1621 PrintOut(LOG_INFO,"]\n\n");
1622 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1623 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1624 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1625#ifdef HAVE_LIBCAP_NG
1626 PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
1627 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1628 " Warning: Mail notification may not work when used.\n\n");
1629#endif
1630 PrintOut(LOG_INFO," -d, --debug\n");
1631 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1632 PrintOut(LOG_INFO," -D, --showdirectives\n");
1633 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1634 PrintOut(LOG_INFO," -h, --help, --usage\n");
1635 PrintOut(LOG_INFO," Display this help and exit\n\n");
1636 PrintOut(LOG_INFO," -i N, --interval=N\n");
1637 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1638 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1639#ifndef _WIN32
1640 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1641#else
1642 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1643#endif
1644#ifndef _WIN32
1645 PrintOut(LOG_INFO," -n, --no-fork\n");
1646 PrintOut(LOG_INFO," Do not fork into background\n");
1647#ifdef HAVE_LIBSYSTEMD
1648 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1649#endif // HAVE_LIBSYSTEMD
1650 PrintOut(LOG_INFO,"\n");
1651#endif // WIN32
1652 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1653 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1654 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1655 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1656 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1657 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1658#ifdef SMARTMONTOOLS_SAVESTATES
1659 PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
1660#else
1661 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1662#endif
1663 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1664#ifdef SMARTMONTOOLS_SAVESTATES
1665 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1666#endif
1667 PrintOut(LOG_INFO,"\n");
1668 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1669 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1670#ifndef _WIN32
1671 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1672#else
1673 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1674#endif
1675#ifdef HAVE_POSIX_API
1676 PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
1677 PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
1678#elif defined(_WIN32)
1679 PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
1680 PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
1681#endif
1682#ifdef _WIN32
1683 PrintOut(LOG_INFO," --service\n");
1684 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1685 PrintOut(LOG_INFO," smartd install [options]\n");
1686 PrintOut(LOG_INFO," Remove service with:\n");
1687 PrintOut(LOG_INFO," smartd remove\n\n");
1688#endif // _WIN32
1689 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1690 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1691}
1692
1693static int CloseDevice(smart_device * device, const char * name)
1694{
1695 if (!device->close()){
1696 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1697 return 1;
1698 }
1699 // device successfully closed
1700 return 0;
1701}
1702
1703// Replace invalid characters in cfg.dev_idinfo
1704static bool sanitize_dev_idinfo(std::string & s)
1705{
1706 bool changed = false;
1707 for (unsigned i = 0; i < s.size(); i++) {
1708 char c = s[i];
1709 STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
1710 // Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
1711 if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
1712 continue;
1713 s[i] = '?';
1714 changed = true;
1715 }
1716 return changed;
1717}
1718
1719// return true if a char is not allowed in a state file name
1720static bool not_allowed_in_filename(char c)
1721{
1722 return !( ('0' <= c && c <= '9')
1723 || ('A' <= c && c <= 'Z')
1724 || ('a' <= c && c <= 'z'));
1725}
1726
1727// Read error count from Summary or Extended Comprehensive SMART error log
1728// Return -1 on error
1729static int read_ata_error_count(ata_device * device, const char * name,
1730 firmwarebug_defs firmwarebugs, bool extended)
1731{
1732 if (!extended) {
1734 if (ataReadErrorLog(device, &log, firmwarebugs)){
1735 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1736 return -1;
1737 }
1738 return (log.error_log_pointer ? log.ata_error_count : 0);
1739 }
1740 else {
1742 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1743 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1744 return -1;
1745 }
1746 // Some disks use the reserved byte as index, see ataprint.cpp.
1747 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1748 }
1749}
1750
1751// returns <0 if problem. Otherwise, bottom 8 bits are the self test
1752// error count, and top bits are the power-on hours of the last error.
1753static int SelfTestErrorCount(ata_device * device, const char * name,
1754 firmwarebug_defs firmwarebugs)
1755{
1756 struct ata_smart_selftestlog log;
1757
1758 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1759 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1760 return -1;
1761 }
1762
1763 if (!log.mostrecenttest)
1764 // No tests logged
1765 return 0;
1766
1767 // Count failed self-tests
1768 int errcnt = 0, hours = 0;
1769 for (int i = 20; i >= 0; i--) {
1770 int j = (i + log.mostrecenttest) % 21;
1772 if (!nonempty(&entry, sizeof(entry)))
1773 continue;
1774
1775 int status = entry.selfteststatus >> 4;
1776 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1777 // First successful extended self-test, stop count
1778 break;
1779
1780 if (0x3 <= status && status <= 0x8) {
1781 // Self-test showed an error
1782 errcnt++;
1783 // Keep track of time of most recent error
1784 if (!hours)
1785 hours = entry.timestamp;
1786 }
1787 }
1788
1789 return ((hours << 8) | errcnt);
1790}
1791
1792#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1793#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1794
1795// Check offline data collection status
1796static inline bool is_offl_coll_in_progress(unsigned char status)
1797{
1798 return ((status & 0x7f) == 0x03);
1799}
1800
1801// Check self-test execution status
1802static inline bool is_self_test_in_progress(unsigned char status)
1803{
1804 return ((status >> 4) == 0xf);
1805}
1806
1807// Log offline data collection status
1808static void log_offline_data_coll_status(const char * name, unsigned char status)
1809{
1810 const char * msg;
1811 switch (status & 0x7f) {
1812 case 0x00: msg = "was never started"; break;
1813 case 0x02: msg = "was completed without error"; break;
1814 case 0x03: msg = "is in progress"; break;
1815 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1816 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1817 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1818 default: msg = nullptr;
1819 }
1820
1821 if (msg)
1822 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1823 "Device: %s, offline data collection %s%s\n", name, msg,
1824 ((status & 0x80) ? " (auto:on)" : ""));
1825 else
1826 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1827 name, status);
1828}
1829
1830// Log self-test execution status
1831static void log_self_test_exec_status(const char * name, unsigned char status)
1832{
1833 const char * msg;
1834 switch (status >> 4) {
1835 case 0x0: msg = "completed without error"; break;
1836 case 0x1: msg = "was aborted by the host"; break;
1837 case 0x2: msg = "was interrupted by the host with a reset"; break;
1838 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1839 case 0x4: msg = "completed with error (unknown test element)"; break;
1840 case 0x5: msg = "completed with error (electrical test element)"; break;
1841 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1842 case 0x7: msg = "completed with error (read test element)"; break;
1843 case 0x8: msg = "completed with error (handling damage?)"; break;
1844 default: msg = nullptr;
1845 }
1846
1847 if (msg)
1848 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1849 "Device: %s, previous self-test %s\n", name, msg);
1850 else if ((status >> 4) == 0xf)
1851 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1852 name, status & 0x0f);
1853 else
1854 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1855 name, status);
1856}
1857
1858// Check pending sector count id (-C, -U directives).
1859static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1860 unsigned char id, const char * msg)
1861{
1862 // Check attribute index
1863 int i = ata_find_attr_index(id, state.smartval);
1864 if (i < 0) {
1865 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1866 cfg.name.c_str(), msg, id);
1867 return false;
1868 }
1869
1870 // Check value
1871 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1872 cfg.attribute_defs);
1873 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1874 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1875 cfg.name.c_str(), msg, id, rawval, rawval);
1876 return false;
1877 }
1878
1879 return true;
1880}
1881
1882// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1883static void finish_device_scan(dev_config & cfg, dev_state & state)
1884{
1885 // Set cfg.emailfreq if user hasn't set it
1886 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1887 // Avoid that emails are suppressed forever due to state persistence
1888 if (cfg.state_file.empty())
1889 cfg.emailfreq = 1; // '-M once'
1890 else
1891 cfg.emailfreq = 2; // '-M daily'
1892 }
1893
1894 // Start self-test regex check now if time was not read from state file
1895 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1896 state.scheduled_test_next_check = time(nullptr);
1897}
1898
1899// Common function to format result message for ATA setting
1900static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1901 int set_option = 0, bool has_value = false)
1902{
1903 if (!msg.empty())
1904 msg += ", ";
1905 msg += name;
1906 if (!ok)
1907 msg += ":--";
1908 else if (set_option < 0)
1909 msg += ":off";
1910 else if (has_value)
1911 msg += strprintf(":%d", set_option-1);
1912 else if (set_option > 0)
1913 msg += ":on";
1914}
1915
1916// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1917static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1918{
1919 if (!cfg.id_is_unique)
1920 return false;
1921
1922 for (const auto & prev_cfg : prev_cfgs) {
1923 if (!prev_cfg.id_is_unique)
1924 continue;
1925 if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
1926 continue;
1927
1928 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1929 cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
1930 return true;
1931 }
1932
1933 return false;
1934}
1935
1936// TODO: Add '-F swapid' directive
1937const bool fix_swapped_id = false;
1938
1939// scan to see what ata devices there are, and if they support SMART
1940static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1941 const dev_config_vector * prev_cfgs)
1942{
1943 int supported=0;
1944 struct ata_identify_device drive;
1945 const char *name = cfg.name.c_str();
1946 int retid;
1947
1948 // Device must be open
1949
1950 // Get drive identity structure
1951 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1952 if (retid<0)
1953 // Unable to read Identity structure
1954 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1955 else
1956 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1957 name, packetdevicetype(retid-1));
1958 CloseDevice(atadev, name);
1959 return 2;
1960 }
1961
1962 // Get drive identity, size and rotation rate (HDD/SSD)
1963 char model[40+1], serial[20+1], firmware[8+1];
1964 ata_format_id_string(model, drive.model, sizeof(model)-1);
1965 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1966 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1967
1968 ata_size_info sizes;
1969 ata_get_size_info(&drive, sizes);
1970 state.num_sectors = sizes.sectors;
1971 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1972
1973 char wwn[64]; wwn[0] = 0;
1974 unsigned oui = 0; uint64_t unique_id = 0;
1975 int naa = ata_get_wwn(&drive, oui, unique_id);
1976 if (naa >= 0)
1977 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1978
1979 // Format device id string for warning emails
1980 char cap[32];
1981 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1982 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1983 cfg.id_is_unique = true; // TODO: Check serial?
1985 cfg.id_is_unique = false;
1986
1987 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1988
1989 // Check for duplicates
1990 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1991 CloseDevice(atadev, name);
1992 return 1;
1993 }
1994
1995 // Show if device in database, and use preset vendor attribute
1996 // options unless user has requested otherwise.
1997 if (cfg.ignorepresets)
1998 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1999 else {
2000 // Apply vendor specific presets, print warning if present
2001 std::string dbversion;
2003 &drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
2004 if (!dbentry)
2005 PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
2006 (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
2007 else {
2008 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
2009 name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
2010 (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
2011 if (*dbentry->warningmsg)
2012 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
2013 }
2014 }
2015
2016 // Check for ATA Security LOCK
2017 unsigned short word128 = drive.words088_255[128-88];
2018 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
2019 if (locked)
2020 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
2021
2022 // Set default '-C 197[+]' if no '-C ID' is specified.
2023 if (!cfg.curr_pending_set)
2025 // Set default '-U 198[+]' if no '-U ID' is specified.
2026 if (!cfg.offl_pending_set)
2028
2029 // If requested, show which presets would be used for this drive
2030 if (cfg.showpresets) {
2031 int savedebugmode=debugmode;
2032 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2033 if (!debugmode)
2034 debugmode=2;
2035 show_presets(&drive);
2036 debugmode=savedebugmode;
2037 }
2038
2039 // see if drive supports SMART
2040 supported=ataSmartSupport(&drive);
2041 if (supported!=1) {
2042 if (supported==0)
2043 // drive does NOT support SMART
2044 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2045 else
2046 // can't tell if drive supports SMART
2047 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2048
2049 // should we proceed anyway?
2050 if (cfg.permissive) {
2051 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2052 }
2053 else {
2054 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2055 CloseDevice(atadev, name);
2056 return 2;
2057 }
2058 }
2059
2060 if (ataEnableSmart(atadev)) {
2061 // Enable SMART command has failed
2062 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2063
2064 if (ataIsSmartEnabled(&drive) <= 0) {
2065 if (!cfg.permissive) {
2066 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2067 CloseDevice(atadev, name);
2068 return 2;
2069 }
2070 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2071 }
2072 else {
2073 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2074 }
2075 }
2076
2077 // disable device attribute autosave...
2078 if (cfg.autosave==1) {
2079 if (ataDisableAutoSave(atadev))
2080 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2081 else
2082 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2083 }
2084
2085 // or enable device attribute autosave
2086 if (cfg.autosave==2) {
2087 if (ataEnableAutoSave(atadev))
2088 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2089 else
2090 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2091 }
2092
2093 // capability check: SMART status
2094 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2095 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2096 cfg.smartcheck = false;
2097 }
2098
2099 // capability check: Read smart values and thresholds. Note that
2100 // smart values are ALSO needed even if we ONLY want to know if the
2101 // device is self-test log or error-log capable! After ATA-5, this
2102 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2103 // but sadly not for ATA-5. Sigh.
2104
2105 // do we need to get SMART data?
2106 bool smart_val_ok = false;
2107 if ( cfg.autoofflinetest || cfg.selftest
2108 || cfg.errorlog || cfg.xerrorlog
2109 || cfg.offlinests || cfg.selfteststs
2110 || cfg.usagefailed || cfg.prefail || cfg.usage
2111 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2112 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2113
2114 if (ataReadSmartValues(atadev, &state.smartval)) {
2115 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2116 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2117 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2118 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2119 }
2120 else {
2121 smart_val_ok = true;
2122 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2123 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2124 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2125 cfg.usagefailed = false;
2126 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2127 memset(&state.smartthres, 0, sizeof(state.smartthres));
2128 }
2129 }
2130
2131 // see if the necessary Attribute is there to monitor offline or
2132 // current pending sectors or temperature
2133 if ( cfg.curr_pending_id
2134 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2135 "Current_Pending_Sector"))
2136 cfg.curr_pending_id = 0;
2137
2138 if ( cfg.offl_pending_id
2139 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2140 "Offline_Uncorrectable"))
2141 cfg.offl_pending_id = 0;
2142
2143 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2145 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2146 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2147 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2148 }
2149
2150 // Report ignored '-r' or '-R' directives
2151 for (int id = 1; id <= 255; id++) {
2153 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2154 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2155 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2156
2157 int idx = ata_find_attr_index(id, state.smartval);
2158 if (idx < 0)
2159 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2160 else {
2161 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2162 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2163 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2164 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2165 }
2166 }
2167 }
2168 }
2169
2170 // enable/disable automatic on-line testing
2171 if (cfg.autoofflinetest) {
2172 // is this an enable or disable request?
2173 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2174 if (!smart_val_ok)
2175 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2176 else {
2177 // if command appears unsupported, issue a warning...
2178 if (!isSupportAutomaticTimer(&state.smartval))
2179 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2180 // ... but then try anyway
2181 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2182 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2183 else
2184 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2185 }
2186 }
2187
2188 // Read log directories if required for capability check
2189 ata_smart_log_directory smart_logdir, gp_logdir;
2190 bool smart_logdir_ok = false, gp_logdir_ok = false;
2191
2193 && (cfg.errorlog || cfg.selftest)
2194 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2195 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2196 smart_logdir_ok = true;
2197 }
2198
2199 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2200 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2201 gp_logdir_ok = true;
2202 }
2203
2204 // capability check: self-test-log
2205 state.selflogcount = 0; state.selfloghour = 0;
2206 if (cfg.selftest) {
2207 int retval;
2208 if (!( cfg.permissive
2209 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2210 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2211 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2212 cfg.selftest = false;
2213 }
2214 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2215 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2216 cfg.selftest = false;
2217 }
2218 else {
2219 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2220 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2221 }
2222 }
2223
2224 // capability check: ATA error log
2225 state.ataerrorcount = 0;
2226 if (cfg.errorlog) {
2227 int errcnt1;
2228 if (!( cfg.permissive
2229 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2230 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2231 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2232 cfg.errorlog = false;
2233 }
2234 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2235 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2236 cfg.errorlog = false;
2237 }
2238 else
2239 state.ataerrorcount = errcnt1;
2240 }
2241
2242 if (cfg.xerrorlog) {
2243 int errcnt2;
2244 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2245 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2246 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2247 name);
2248 cfg.xerrorlog = false;
2249 }
2250 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2251 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2252 cfg.xerrorlog = false;
2253 }
2254 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2255 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2256 name, state.ataerrorcount, errcnt2);
2257 // Record max error count
2258 if (errcnt2 > state.ataerrorcount)
2259 state.ataerrorcount = errcnt2;
2260 }
2261 else
2262 state.ataerrorcount = errcnt2;
2263 }
2264
2265 // capability check: self-test and offline data collection status
2266 if (cfg.offlinests || cfg.selfteststs) {
2267 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2268 if (cfg.offlinests)
2269 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2270 if (cfg.selfteststs)
2271 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2272 cfg.offlinests = cfg.selfteststs = false;
2273 }
2274 }
2275
2276 // capabilities check -- does it support powermode?
2277 if (cfg.powermode) {
2278 int powermode = ataCheckPowerMode(atadev);
2279
2280 if (-1 == powermode) {
2281 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2282 cfg.powermode=0;
2283 }
2284 else if (powermode!=0x00 && powermode!=0x01
2285 && powermode!=0x40 && powermode!=0x41
2286 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2287 && powermode!=0xff) {
2288 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2289 name, powermode);
2290 cfg.powermode=0;
2291 }
2292 }
2293
2294 // Apply ATA settings
2295 std::string msg;
2296
2297 if (cfg.set_aam)
2298 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2299 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2300 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2301
2302 if (cfg.set_apm)
2303 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2304 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2305 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2306
2307 if (cfg.set_lookahead)
2308 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2310 cfg.set_lookahead);
2311
2312 if (cfg.set_wcache)
2313 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2315
2316 if (cfg.set_dsn)
2317 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2318 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2319
2320 if (cfg.set_security_freeze)
2321 format_set_result_msg(msg, "Security freeze",
2323
2324 if (cfg.set_standby)
2325 format_set_result_msg(msg, "Standby",
2326 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2327
2328 // Report as one log entry
2329 if (!msg.empty())
2330 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2331
2332 // set SCT Error Recovery Control if requested
2333 if (cfg.sct_erc_set) {
2335 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2336 name);
2337 else if (locked)
2338 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2339 name);
2340 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
2341 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
2342 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2343 else
2344 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2345 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2346 }
2347
2348 // If no tests available or selected, return
2349 if (!( cfg.smartcheck || cfg.selftest
2350 || cfg.errorlog || cfg.xerrorlog
2351 || cfg.offlinests || cfg.selfteststs
2352 || cfg.usagefailed || cfg.prefail || cfg.usage
2353 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2354 CloseDevice(atadev, name);
2355 return 3;
2356 }
2357
2358 // tell user we are registering device
2359 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2360
2361 // close file descriptor
2362 CloseDevice(atadev, name);
2363
2364 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2365 // Build file name for state file
2366 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2367 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2368 if (!state_path_prefix.empty()) {
2369 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2370 // Read previous state
2371 if (read_dev_state(cfg.state_file.c_str(), state)) {
2372 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2373 // Copy ATA attribute values to temp state
2374 state.update_temp_state();
2375 }
2376 }
2377 if (!attrlog_path_prefix.empty())
2378 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2379 }
2380
2381 finish_device_scan(cfg, state);
2382
2383 return 0;
2384}
2385
2386// on success, return 0. On failure, return >0. Never return <0,
2387// please.
2388static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2389 const dev_config_vector * prev_cfgs)
2390{
2391 int err, req_len, avail_len, version, len;
2392 const char *device = cfg.name.c_str();
2393 struct scsi_iec_mode_page iec;
2394 uint8_t tBuf[64];
2395 uint8_t inqBuf[96];
2396 uint8_t vpdBuf[252];
2397 char lu_id[64], serial[256], vendor[40], model[40];
2398
2399 // Device must be open
2400 memset(inqBuf, 0, 96);
2401 req_len = 36;
2402 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2403 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2404 req_len = 64;
2405 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2406 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2407 "skip device\n", device);
2408 return 2;
2409 }
2410 }
2411 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2412
2413 avail_len = inqBuf[4] + 5;
2414 len = (avail_len < req_len) ? avail_len : req_len;
2415 if (len < 36) {
2416 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2417 "skip device\n", device);
2418 return 2;
2419 }
2420
2421 int pdt = inqBuf[0] & 0x1f;
2422
2423 switch (pdt) {
2425 case SCSI_PT_WO:
2426 case SCSI_PT_CDROM:
2427 case SCSI_PT_OPTICAL:
2428 case SCSI_PT_RBC: /* Reduced Block commands */
2429 case SCSI_PT_HOST_MANAGED: /* Zoned disk */
2430 break;
2431 default:
2432 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2433 "skip\n", device, pdt);
2434 return 2;
2435 }
2436
2438 delete supported_vpd_pages_p;
2439 supported_vpd_pages_p = nullptr;
2440 }
2442
2443 lu_id[0] = '\0';
2444 if (version >= 0x3) {
2445 /* SPC to SPC-5, assume SPC-6 is version==8 or higher */
2447 vpdBuf, sizeof(vpdBuf))) {
2448 len = vpdBuf[3];
2449 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
2450 }
2451 }
2452 serial[0] = '\0';
2454 vpdBuf, sizeof(vpdBuf))) {
2455 len = vpdBuf[3];
2456 vpdBuf[4 + len] = '\0';
2457 scsi_format_id_string(serial, &vpdBuf[4], len);
2458 }
2459
2460 char si_str[64];
2461 struct scsi_readcap_resp srr;
2462 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2463
2464 if (capacity)
2465 format_capacity(si_str, sizeof(si_str), capacity, ".");
2466 else
2467 si_str[0] = '\0';
2468
2469 // Format device id string for warning emails
2470 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2471 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2472 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2473 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2474 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2475 cfg.id_is_unique = (lu_id[0] || serial[0]);
2477 cfg.id_is_unique = false;
2478
2479 // format "model" string
2480 scsi_format_id_string(vendor, &inqBuf[8], 8);
2481 scsi_format_id_string(model, &inqBuf[16], 16);
2482 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2483
2484 // Check for duplicates
2485 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2486 CloseDevice(scsidev, device);
2487 return 1;
2488 }
2489
2490 // check that device is ready for commands. IE stores its stuff on
2491 // the media.
2492 if ((err = scsiTestUnitReady(scsidev))) {
2493 if (SIMPLE_ERR_NOT_READY == err)
2494 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2495 else if (SIMPLE_ERR_NO_MEDIUM == err)
2496 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2497 else if (SIMPLE_ERR_BECOMING_READY == err)
2498 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2499 else
2500 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2501 CloseDevice(scsidev, device);
2502 return 2;
2503 }
2504
2505 // Badly-conforming USB storage devices may fail this check.
2506 // The response to the following IE mode page fetch (current and
2507 // changeable values) is carefully examined. It has been found
2508 // that various USB devices that malform the response will lock up
2509 // if asked for a log page (e.g. temperature) so it is best to
2510 // bail out now.
2511 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2512 state.modese_len = iec.modese_len;
2513 else if (SIMPLE_ERR_BAD_FIELD == err)
2514 ; /* continue since it is reasonable not to support IE mpage */
2515 else { /* any other error (including malformed response) unreasonable */
2516 PrintOut(LOG_INFO,
2517 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2518 device, err);
2519 CloseDevice(scsidev, device);
2520 return 3;
2521 }
2522
2523 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2524 // smart if it is off). This may change to be the same as the ATA side.
2525 if (!scsi_IsExceptionControlEnabled(&iec)) {
2526 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2527 "Try 'smartctl -s on %s' to turn on SMART features\n",
2528 device, device);
2529 CloseDevice(scsidev, device);
2530 return 3;
2531 }
2532
2533 // Flag that certain log pages are supported (information may be
2534 // available from other sources).
2535 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2536 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2537 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2538 {
2539 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2540 switch (tBuf[k]) {
2541 case TEMPERATURE_LPAGE:
2542 state.TempPageSupported = 1;
2543 break;
2544 case IE_LPAGE:
2545 state.SmartPageSupported = 1;
2546 break;
2548 state.ReadECounterPageSupported = 1;
2549 break;
2552 break;
2555 break;
2558 break;
2559 default:
2560 break;
2561 }
2562 }
2563 }
2564
2565 // Check if scsiCheckIE() is going to work
2566 {
2567 uint8_t asc = 0;
2568 uint8_t ascq = 0;
2569 uint8_t currenttemp = 0;
2570 uint8_t triptemp = 0;
2571
2572 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2573 &asc, &ascq, &currenttemp, &triptemp)) {
2574 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2575 state.SuppressReport = 1;
2576 }
2577 if ( (state.SuppressReport || !currenttemp)
2578 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2579 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2580 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2581 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2582 }
2583 }
2584
2585 // capability check: self-test-log
2586 if (cfg.selftest){
2587 int retval = scsiCountFailedSelfTests(scsidev, 0);
2588 if (retval<0) {
2589 // no self-test log, turn off monitoring
2590 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2591 cfg.selftest = false;
2592 state.selflogcount = 0;
2593 state.selfloghour = 0;
2594 }
2595 else {
2596 // register starting values to watch for changes
2597 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2598 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2599 }
2600 }
2601
2602 // disable autosave (set GLTSD bit)
2603 if (cfg.autosave==1){
2604 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2605 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2606 else
2607 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2608 }
2609
2610 // or enable autosave (clear GLTSD bit)
2611 if (cfg.autosave==2){
2612 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2613 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2614 else
2615 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2616 }
2617
2618 // tell user we are registering device
2619 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2620
2621 // Make sure that init_standby_check() ignores SCSI devices
2622 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2623
2624 // close file descriptor
2625 CloseDevice(scsidev, device);
2626
2627 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2628 // Build file name for state file
2629 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2630 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2631 if (!state_path_prefix.empty()) {
2632 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2633 // Read previous state
2634 if (read_dev_state(cfg.state_file.c_str(), state)) {
2635 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2636 // Copy ATA attribute values to temp state
2637 state.update_temp_state();
2638 }
2639 }
2640 if (!attrlog_path_prefix.empty())
2641 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2642 }
2643
2644 finish_device_scan(cfg, state);
2645
2646 return 0;
2647}
2648
2649// Convert 128 bit LE integer to uint64_t or its max value on overflow.
2650static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2651{
2652 for (int i = 8; i < 16; i++) {
2653 if (val[i])
2654 return ~(uint64_t)0;
2655 }
2656 uint64_t lo = val[7];
2657 for (int i = 7-1; i >= 0; i--) {
2658 lo <<= 8; lo += val[i];
2659 }
2660 return lo;
2661}
2662
2663// Get max temperature in Kelvin reported in NVMe SMART/Health log.
2664static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2665{
2666 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2667 for (auto s : smart_log.temp_sensor) {
2668 if (s > k)
2669 k = s; // cppcheck-suppress useStlAlgorithm
2670 }
2671 return k;
2672}
2673
2674static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2675 const dev_config_vector * prev_cfgs)
2676{
2677 const char *name = cfg.name.c_str();
2678
2679 // Device must be open
2680
2681 // Get ID Controller
2682 nvme_id_ctrl id_ctrl;
2683 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2684 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2685 CloseDevice(nvmedev, name);
2686 return 2;
2687 }
2688
2689 // Get drive identity
2690 char model[40+1], serial[20+1], firmware[8+1];
2691 format_char_array(model, id_ctrl.mn);
2692 format_char_array(serial, id_ctrl.sn);
2693 format_char_array(firmware, id_ctrl.fr);
2694
2695 // Format device id string for warning emails
2696 char nsstr[32] = "", capstr[32] = "";
2697 unsigned nsid = nvmedev->get_nsid();
2698 if (nsid != 0xffffffff)
2699 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2700 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2701 if (capacity)
2702 format_capacity(capstr, sizeof(capstr), capacity, ".");
2703 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2704 nsstr, (capstr[0] ? ", " : ""), capstr);
2705 cfg.id_is_unique = true; // TODO: Check serial?
2707 cfg.id_is_unique = false;
2708
2709 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2710
2711 // Check for duplicates
2712 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2713 CloseDevice(nvmedev, name);
2714 return 1;
2715 }
2716
2717 // Read SMART/Health log
2718 nvme_smart_log smart_log;
2719 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2720 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2721 CloseDevice(nvmedev, name);
2722 return 2;
2723 }
2724
2725 // Check temperature sensor support
2726 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2727 if (!nvme_get_max_temp_kelvin(smart_log)) {
2728 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2729 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2730 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2731 }
2732 }
2733
2734 // Init total error count
2735 if (cfg.errorlog || cfg.xerrorlog) {
2737 }
2738
2739 // If no supported tests selected, return
2740 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2741 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2742 CloseDevice(nvmedev, name);
2743 return 3;
2744 }
2745
2746 // Tell user we are registering device
2747 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2748
2749 // Make sure that init_standby_check() ignores NVMe devices
2750 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2751
2752 CloseDevice(nvmedev, name);
2753
2754 if (!state_path_prefix.empty()) {
2755 // Build file name for state file
2756 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2757 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2758 nsstr[0] = 0;
2759 if (nsid != 0xffffffff)
2760 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2761 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2762 // Read previous state
2763 if (read_dev_state(cfg.state_file.c_str(), state))
2764 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2765 }
2766
2767 finish_device_scan(cfg, state);
2768
2769 return 0;
2770}
2771
2772// Open device for next check, return false on error
2773static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2774 const char * type)
2775{
2776 const char * name = cfg.name.c_str();
2777
2778 // If user has asked, test the email warning system
2779 if (cfg.emailtest)
2780 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2781
2782 // User may have requested (with the -n Directive) to leave the disk
2783 // alone if it is in idle or standby mode. In this case check the
2784 // power mode first before opening the device for full access,
2785 // and exit without check if disk is reported in standby.
2786 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2787 // Note that 'is_powered_down()' handles opening the device itself, and
2788 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2789 if (device->is_powered_down())
2790 {
2791 // skip at most powerskipmax checks
2792 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2793 // report first only except if state has changed, avoid waking up system disk
2794 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2795 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2796 state.lastpowermodeskipped = -1;
2797 }
2798 state.powerskipcnt++;
2799 return false;
2800 }
2801 }
2802 }
2803
2804 // if we can't open device, fail gracefully rather than hard --
2805 // perhaps the next time around we'll be able to open it
2806 if (!device->open()) {
2807 // For removable devices, print error message only once and suppress email
2808 if (!cfg.removable) {
2809 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2810 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2811 }
2812 else if (!state.removed) {
2813 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2814 state.removed = true;
2815 }
2816 else if (debugmode)
2817 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2818 return false;
2819 }
2820
2821 if (debugmode)
2822 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2823
2824 if (!cfg.removable)
2825 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2826 else if (state.removed) {
2827 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2828 state.removed = false;
2829 }
2830
2831 return true;
2832}
2833
2834// If the self-test log has got more self-test errors (or more recent
2835// self-test errors) recorded, then notify user.
2836static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2837{
2838 const char * name = cfg.name.c_str();
2839
2840 if (newi<0)
2841 // command failed
2842 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2843 else {
2844 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2845
2846 // old and new error counts
2847 int oldc=state.selflogcount;
2848 int newc=SELFTEST_ERRORCOUNT(newi);
2849
2850 // old and new error timestamps in hours
2851 int oldh=state.selfloghour;
2852 int newh=SELFTEST_ERRORHOURS(newi);
2853
2854 if (oldc<newc) {
2855 // increase in error count
2856 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2857 name, oldc, newc);
2858 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2859 name, oldc, newc);
2860 state.must_write = true;
2861 }
2862 else if (newc > 0 && oldh != newh) {
2863 // more recent error
2864 // a 'more recent' error might actually be a smaller hour number,
2865 // if the hour number has wrapped.
2866 // There's still a bug here. You might just happen to run a new test
2867 // exactly 32768 hours after the previous failure, and have run exactly
2868 // 20 tests between the two, in which case smartd will miss the
2869 // new failure.
2870 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2871 name, newh);
2872 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2873 name, newh);
2874 state.must_write = true;
2875 }
2876
2877 // Print info if error entries have disappeared
2878 // or newer successful successful extended self-test exits
2879 if (oldc > newc) {
2880 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2881 name, oldc, newc);
2882 if (newc == 0)
2883 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2884 }
2885
2886 // Needed since self-test error count may DECREASE. Hour might
2887 // also have changed.
2888 state.selflogcount= newc;
2889 state.selfloghour = newh;
2890 }
2891 return;
2892}
2893
2894// Test types, ordered by priority.
2895static const char test_type_chars[] = "LncrSCO";
2896static const unsigned num_test_types = sizeof(test_type_chars)-1;
2897
2898// returns test type if time to do test of type testtype,
2899// 0 if not time to do test.
2900static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2901{
2902 // check that self-testing has been requested
2903 if (cfg.test_regex.empty())
2904 return 0;
2905
2906 // Exit if drive not capable of any test
2907 if ( state.not_cap_long && state.not_cap_short &&
2908 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2909 return 0;
2910
2911 // since we are about to call localtime(), be sure glibc is informed
2912 // of any timezone changes we make.
2913 if (!usetime)
2915
2916 // Is it time for next check?
2917 time_t now = (!usetime ? time(nullptr) : usetime);
2918 if (now < state.scheduled_test_next_check) {
2919 if (state.scheduled_test_next_check <= now + 3600)
2920 return 0; // Next check within one hour
2921 // More than one hour, assume system clock time adjusted to the past
2922 state.scheduled_test_next_check = now;
2923 }
2924 else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
2925 // Limit time check interval to 90 days
2926 state.scheduled_test_next_check = now - (3600L*24*90);
2927 }
2928
2929 // Find ':NNN[-LLL]' in regex for possible offsets and limits
2930 const unsigned max_offsets = 1 + num_test_types;
2931 unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
2932 unsigned num_offsets = 1; // offsets/limits[0] == 0 always
2933 for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
2934 const char * q = strchr(p, ':');
2935 if (!q)
2936 break;
2937 p = q + 1;
2938 unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
2939 sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
2940 if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
2941 continue;
2942 offsets[num_offsets] = offset; limits[num_offsets] = limit;
2943 num_offsets++;
2944 p += (n3 > 0 ? n3 : n1);
2945 }
2946
2947 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2948 char testtype = 0;
2949 time_t testtime = 0; int testhour = 0;
2950 int maxtest = num_test_types-1;
2951
2952 for (time_t t = state.scheduled_test_next_check; ; ) {
2953 // Check offset 0 and then all offsets for ':NNN' found above
2954 for (unsigned i = 0; i < num_offsets; i++) {
2955 unsigned offset = offsets[i], limit = limits[i];
2956 unsigned delay = cfg.test_offset_factor * offset;
2957 if (0 < limit && limit < delay)
2958 delay %= limit + 1;
2959 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
2960
2961 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2962 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2963 for (int j = 0; j <= maxtest; j++) {
2964 // Skip if drive not capable of this test
2965 switch (test_type_chars[j]) {
2966 case 'L': if (state.not_cap_long) continue; break;
2967 case 'S': if (state.not_cap_short) continue; break;
2968 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2969 case 'O': if (scsi || state.not_cap_offline) continue; break;
2970 case 'c': case 'n':
2971 case 'r': if (scsi || state.not_cap_selective) continue; break;
2972 default: continue;
2973 }
2974 // Try match of "T/MM/DD/d/HH[:NNN]"
2975 char pattern[64];
2976 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2977 test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2978 if (i > 0) {
2979 const unsigned len = sizeof("S/01/01/1/01") - 1;
2980 snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
2981 if (limit > 0)
2982 snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
2983 }
2984 if (cfg.test_regex.full_match(pattern)) {
2985 // Test found
2986 testtype = pattern[0];
2987 testtime = t; testhour = tms->tm_hour;
2988 // Limit further matches to higher priority self-tests
2989 maxtest = j-1;
2990 break;
2991 }
2992 }
2993 }
2994
2995 // Exit if no tests left or current time reached
2996 if (maxtest < 0)
2997 break;
2998 if (t >= now)
2999 break;
3000 // Check next hour
3001 if ((t += 3600) > now)
3002 t = now;
3003 }
3004
3005 // Do next check not before next hour.
3006 struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
3007 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
3008
3009 if (testtype) {
3010 state.must_write = true;
3011 // Tell user if an old test was found.
3012 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
3013 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
3014 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
3015 cfg.name.c_str(), testtype, datebuf);
3016 }
3017 }
3018
3019 return testtype;
3020}
3021
3022// Print a list of future tests.
3024{
3025 unsigned numdev = configs.size();
3026 if (!numdev)
3027 return;
3028 std::vector<int> testcnts(numdev * num_test_types, 0);
3029
3030 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
3031
3032 // FixGlibcTimeZoneBug(); // done in PrintOut()
3033 time_t now = time(nullptr);
3034 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3035 dateandtimezoneepoch(datenow, now);
3036
3037 long seconds;
3038 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3039 // Check for each device whether a test will be run
3040 time_t testtime = now + seconds;
3041 for (unsigned i = 0; i < numdev; i++) {
3042 const dev_config & cfg = configs.at(i);
3043 dev_state & state = states.at(i);
3044 const char * p;
3045 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3046 if (testtype && (p = strchr(test_type_chars, testtype))) {
3047 unsigned t = (p - test_type_chars);
3048 // Report at most 5 tests of each type
3049 if (++testcnts[i*num_test_types + t] <= 5) {
3050 dateandtimezoneepoch(date, testtime);
3051 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3052 testcnts[i*num_test_types + t], testtype, date);
3053 }
3054 }
3055 }
3056 }
3057
3058 // Report totals
3059 dateandtimezoneepoch(date, now+seconds);
3060 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3061 for (unsigned i = 0; i < numdev; i++) {
3062 const dev_config & cfg = configs.at(i);
3063 bool scsi = devices.at(i)->is_scsi();
3064 for (unsigned t = 0; t < num_test_types; t++) {
3065 int cnt = testcnts[i*num_test_types + t];
3066 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3067 continue;
3068 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3069 cnt, (cnt==1?"":"s"), test_type_chars[t]);
3070 }
3071 }
3072
3073}
3074
3075// Return zero on success, nonzero on failure. Perform offline (background)
3076// short or long (extended) self test on given scsi device.
3077static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3078{
3079 int retval = 0;
3080 const char *testname = nullptr;
3081 const char *name = cfg.name.c_str();
3082 int inProgress;
3083
3084 if (scsiSelfTestInProgress(device, &inProgress)) {
3085 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3086 state.not_cap_short = state.not_cap_long = true;
3087 return 1;
3088 }
3089
3090 if (1 == inProgress) {
3091 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3092 "progress.\n", name);
3093 return 1;
3094 }
3095
3096 switch (testtype) {
3097 case 'S':
3098 testname = "Short Self";
3099 retval = scsiSmartShortSelfTest(device);
3100 break;
3101 case 'L':
3102 testname = "Long Self";
3103 retval = scsiSmartExtendSelfTest(device);
3104 break;
3105 }
3106 // If we can't do the test, exit
3107 if (!testname) {
3108 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3109 testtype);
3110 return 1;
3111 }
3112 if (retval) {
3113 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3114 (SIMPLE_ERR_BAD_FIELD == retval)) {
3115 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3116 testname);
3117 if ('L'==testtype)
3118 state.not_cap_long = true;
3119 else
3120 state.not_cap_short = true;
3121
3122 return 1;
3123 }
3124 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3125 testname, retval);
3126 return 1;
3127 }
3128
3129 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3130
3131 return 0;
3132}
3133
3134// Do an offline immediate or self-test. Return zero on success,
3135// nonzero on failure.
3136static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3137{
3138 const char *name = cfg.name.c_str();
3139
3140 // Read current smart data and check status/capability
3141 struct ata_smart_values data;
3142 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3143 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3144 return 1;
3145 }
3146
3147 // Check for capability to do the test
3148 int dotest = -1, mode = 0;
3149 const char *testname = nullptr;
3150 switch (testtype) {
3151 case 'O':
3152 testname="Offline Immediate ";
3154 dotest=OFFLINE_FULL_SCAN;
3155 else
3156 state.not_cap_offline = true;
3157 break;
3158 case 'C':
3159 testname="Conveyance Self-";
3161 dotest=CONVEYANCE_SELF_TEST;
3162 else
3163 state.not_cap_conveyance = true;
3164 break;
3165 case 'S':
3166 testname="Short Self-";
3167 if (isSupportSelfTest(&data))
3168 dotest=SHORT_SELF_TEST;
3169 else
3170 state.not_cap_short = true;
3171 break;
3172 case 'L':
3173 testname="Long Self-";
3174 if (isSupportSelfTest(&data))
3175 dotest=EXTEND_SELF_TEST;
3176 else
3177 state.not_cap_long = true;
3178 break;
3179
3180 case 'c': case 'n': case 'r':
3181 testname = "Selective Self-";
3183 dotest = SELECTIVE_SELF_TEST;
3184 switch (testtype) {
3185 case 'c': mode = SEL_CONT; break;
3186 case 'n': mode = SEL_NEXT; break;
3187 case 'r': mode = SEL_REDO; break;
3188 }
3189 }
3190 else
3191 state.not_cap_selective = true;
3192 break;
3193 }
3194
3195 // If we can't do the test, exit
3196 if (dotest<0) {
3197 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3198 return 1;
3199 }
3200
3201 // If currently running a self-test, do not interrupt it to start another.
3202 if (15==(data.self_test_exec_status >> 4)) {
3203 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3204 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3205 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3206 } else {
3207 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3208 name, testname, (int)(data.self_test_exec_status & 0x0f));
3209 return 1;
3210 }
3211 }
3212
3213 if (dotest == SELECTIVE_SELF_TEST) {
3214 // Set test span
3215 ata_selective_selftest_args selargs, prev_args;
3216 selargs.num_spans = 1;
3217 selargs.span[0].mode = mode;
3218 prev_args.num_spans = 1;
3219 prev_args.span[0].start = state.selective_test_last_start;
3220 prev_args.span[0].end = state.selective_test_last_end;
3221 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3222 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3223 return 1;
3224 }
3225 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3226 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3227 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3228 start, end, end - start + 1,
3229 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3230 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3231 state.selective_test_last_start = start;
3232 state.selective_test_last_end = end;
3233 }
3234
3235 // execute the test, and return status
3236 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, nullptr);
3237 if (retval) {
3238 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3239 return retval;
3240 }
3241
3242 // Report recent test start to do_disable_standby_check()
3243 // and force log of next test status
3244 if (testtype == 'O')
3245 state.offline_started = true;
3246 else
3247 state.selftest_started = true;
3248
3249 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3250 return 0;
3251}
3252
3253// Check pending sector count attribute values (-C, -U directives).
3254static void check_pending(const dev_config & cfg, dev_state & state,
3255 unsigned char id, bool increase_only,
3256 const ata_smart_values & smartval,
3257 int mailtype, const char * msg)
3258{
3259 // Find attribute index
3260 int i = ata_find_attr_index(id, smartval);
3261 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3262 return;
3263
3264 // No report if no sectors pending.
3265 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3266 if (rawval == 0) {
3267 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3268 return;
3269 }
3270
3271 // If attribute is not reset, report only sector count increases.
3272 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3273 if (!(!increase_only || prev_rawval < rawval))
3274 return;
3275
3276 // Format message.
3277 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3278 if (prev_rawval > 0 && rawval != prev_rawval)
3279 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3280
3281 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3282 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3283 state.must_write = true;
3284}
3285
3286// Format Temperature value
3287static const char * fmt_temp(unsigned char x, char (& buf)[20])
3288{
3289 if (!x) // unset
3290 return "??";
3291 snprintf(buf, sizeof(buf), "%u", x);
3292 return buf;
3293}
3294
3295// Check Temperature limits
3296static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3297{
3298 if (!(0 < currtemp && currtemp < 255)) {
3299 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3300 return;
3301 }
3302
3303 // Update Max Temperature
3304 const char * minchg = "", * maxchg = "";
3305 if (currtemp > state.tempmax) {
3306 if (state.tempmax)
3307 maxchg = "!";
3308 state.tempmax = currtemp;
3309 state.must_write = true;
3310 }
3311
3312 char buf[20];
3313 if (!state.temperature) {
3314 // First check
3315 if (!state.tempmin || currtemp < state.tempmin)
3316 // Delay Min Temperature update by ~ 30 minutes.
3317 state.tempmin_delay = time(nullptr) + default_checktime - 60;
3318 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3319 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3320 if (triptemp)
3321 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3322 state.temperature = currtemp;
3323 }
3324 else {
3325 if (state.tempmin_delay) {
3326 // End Min Temperature update delay if ...
3327 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3328 || (state.tempmin_delay <= time(nullptr))) { // or delay time is over.
3329 state.tempmin_delay = 0;
3330 if (!state.tempmin)
3331 state.tempmin = 255;
3332 }
3333 }
3334
3335 // Update Min Temperature
3336 if (!state.tempmin_delay && currtemp < state.tempmin) {
3337 state.tempmin = currtemp;
3338 state.must_write = true;
3339 if (currtemp != state.temperature)
3340 minchg = "!";
3341 }
3342
3343 // Track changes
3344 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3345 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3346 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3347 state.temperature = currtemp;
3348 }
3349 }
3350
3351 // Check limits
3352 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3353 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3354 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3355 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3356 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3357 }
3358 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3359 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3360 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3361 }
3362 else if (cfg.tempcrit) {
3363 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3364 if (currtemp < limit)
3365 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3366 }
3367}
3368
3369// Check normalized and raw attribute values.
3370static void check_attribute(const dev_config & cfg, dev_state & state,
3371 const ata_smart_attribute & attr,
3372 const ata_smart_attribute & prev,
3373 int attridx,
3374 const ata_smart_threshold_entry * thresholds)
3375{
3376 // Check attribute and threshold
3377 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3378 if (attrstate == ATTRSTATE_NON_EXISTING)
3379 return;
3380
3381 // If requested, check for usage attributes that have failed.
3382 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3384 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3385 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3386 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3387 state.must_write = true;
3388 }
3389
3390 // Return if we're not tracking this type of attribute
3391 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3392 if (!( ( prefail && cfg.prefail)
3393 || (!prefail && cfg.usage )))
3394 return;
3395
3396 // Return if '-I ID' was specified
3398 return;
3399
3400 // Issue warning if they don't have the same ID in all structures.
3401 if (attr.id != prev.id) {
3402 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3403 cfg.name.c_str(), attr.id, prev.id);
3404 return;
3405 }
3406
3407 // Compare normalized values if valid.
3408 bool valchanged = false;
3409 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3410 if (attr.current != prev.current)
3411 valchanged = true;
3412 }
3413
3414 // Compare raw values if requested.
3415 bool rawchanged = false;
3416 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3419 rawchanged = true;
3420 }
3421
3422 // Return if no change
3423 if (!(valchanged || rawchanged))
3424 return;
3425
3426 // Format value strings
3427 std::string currstr, prevstr;
3428 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3429 // Print raw values only
3430 currstr = strprintf("%s (Raw)",
3431 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3432 prevstr = strprintf("%s (Raw)",
3433 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3434 }
3435 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3436 // Print normalized and raw values
3437 currstr = strprintf("%d [Raw %s]", attr.current,
3438 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3439 prevstr = strprintf("%d [Raw %s]", prev.current,
3440 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3441 }
3442 else {
3443 // Print normalized values only
3444 currstr = strprintf("%d", attr.current);
3445 prevstr = strprintf("%d", prev.current);
3446 }
3447
3448 // Format message
3449 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3450 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3451 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3452 prevstr.c_str(), currstr.c_str());
3453
3454 // Report this change as critical ?
3455 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3456 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3457 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3458 MailWarning(cfg, state, 2, "%s", msg.c_str());
3459 }
3460 else {
3461 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3462 }
3463 state.must_write = true;
3464}
3465
3466
3467static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3468 bool firstpass, bool allow_selftests)
3469{
3470 if (!open_device(cfg, state, atadev, "ATA"))
3471 return 1;
3472
3473 const char * name = cfg.name.c_str();
3474
3475 // user may have requested (with the -n Directive) to leave the disk
3476 // alone if it is in idle or sleeping mode. In this case check the
3477 // power mode and exit without check if needed
3478 if (cfg.powermode && !state.powermodefail) {
3479 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3480 const char * mode = 0;
3481 if (0 <= powermode && powermode < 0xff) {
3482 // wait for possible spin up and check again
3483 int powermode2;
3484 sleep(5);
3485 powermode2 = ataCheckPowerMode(atadev);
3486 if (powermode2 > powermode)
3487 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3488 powermode = powermode2;
3489 }
3490
3491 switch (powermode){
3492 case -1:
3493 // SLEEP
3494 mode="SLEEP";
3495 if (cfg.powermode>=1)
3496 dontcheck=1;
3497 break;
3498 case 0x00:
3499 // STANDBY
3500 mode="STANDBY";
3501 if (cfg.powermode>=2)
3502 dontcheck=1;
3503 break;
3504 case 0x01:
3505 // STANDBY_Y
3506 mode="STANDBY_Y";
3507 if (cfg.powermode>=2)
3508 dontcheck=1;
3509 break;
3510 case 0x80:
3511 // IDLE
3512 mode="IDLE";
3513 if (cfg.powermode>=3)
3514 dontcheck=1;
3515 break;
3516 case 0x81:
3517 // IDLE_A
3518 mode="IDLE_A";
3519 if (cfg.powermode>=3)
3520 dontcheck=1;
3521 break;
3522 case 0x82:
3523 // IDLE_B
3524 mode="IDLE_B";
3525 if (cfg.powermode>=3)
3526 dontcheck=1;
3527 break;
3528 case 0x83:
3529 // IDLE_C
3530 mode="IDLE_C";
3531 if (cfg.powermode>=3)
3532 dontcheck=1;
3533 break;
3534 case 0xff:
3535 // ACTIVE/IDLE
3536 case 0x40:
3537 // ACTIVE
3538 case 0x41:
3539 // ACTIVE
3540 mode="ACTIVE or IDLE";
3541 break;
3542 default:
3543 // UNKNOWN
3544 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3545 name, powermode);
3546 state.powermodefail = true;
3547 break;
3548 }
3549
3550 // if we are going to skip a check, return now
3551 if (dontcheck){
3552 // skip at most powerskipmax checks
3553 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3554 CloseDevice(atadev, name);
3555 // report first only except if state has changed, avoid waking up system disk
3556 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3557 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3558 state.lastpowermodeskipped = powermode;
3559 }
3560 state.powerskipcnt++;
3561 return 0;
3562 }
3563 else {
3564 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3565 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3566 }
3567 state.powerskipcnt = 0;
3568 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3569 }
3570 else if (state.powerskipcnt) {
3571 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3572 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3573 state.powerskipcnt = 0;
3574 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3575 }
3576 }
3577
3578 // check smart status
3579 if (cfg.smartcheck) {
3580 int status=ataSmartStatus2(atadev);
3581 if (status==-1){
3582 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3583 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3584 state.must_write = true;
3585 }
3586 else if (status==1){
3587 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3588 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3589 state.must_write = true;
3590 }
3591 }
3592
3593 // Check everything that depends upon SMART Data (eg, Attribute values)
3594 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3595 || cfg.curr_pending_id || cfg.offl_pending_id
3596 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3597 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3598
3599 // Read current attribute values.
3600 ata_smart_values curval;
3601 if (ataReadSmartValues(atadev, &curval)){
3602 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3603 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3604 state.must_write = true;
3605 }
3606 else {
3607 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3608
3609 // look for current or offline pending sectors
3610 if (cfg.curr_pending_id)
3611 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3612 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3613 : "Total unreadable (pending) sectors" ));
3614
3615 if (cfg.offl_pending_id)
3616 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3617 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3618 : "Total offline uncorrectable sectors"));
3619
3620 // check temperature limits
3621 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3622 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3623
3624 // look for failed usage attributes, or track usage or prefail attributes
3625 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3626 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3627 check_attribute(cfg, state,
3628 curval.vendor_attributes[i],
3629 state.smartval.vendor_attributes[i],
3630 i, state.smartthres.thres_entries);
3631 }
3632 }
3633
3634 // Log changes of offline data collection status
3635 if (cfg.offlinests) {
3638 || state.offline_started // test was started in previous call
3639 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3641 }
3642
3643 // Log changes of self-test execution status
3644 if (cfg.selfteststs) {
3646 || state.selftest_started // test was started in previous call
3647 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3649 }
3650
3651 // Save the new values for the next time around
3652 state.smartval = curval;
3653 }
3654 }
3655 state.offline_started = state.selftest_started = false;
3656
3657 // check if number of selftest errors has increased (note: may also DECREASE)
3658 if (cfg.selftest)
3659 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3660
3661 // check if number of ATA errors has increased
3662 if (cfg.errorlog || cfg.xerrorlog) {
3663
3664 int errcnt1 = -1, errcnt2 = -1;
3665 if (cfg.errorlog)
3666 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3667 if (cfg.xerrorlog)
3668 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3669
3670 // new number of errors is max of both logs
3671 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3672
3673 // did command fail?
3674 if (newc<0)
3675 // lack of PrintOut here is INTENTIONAL
3676 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3677
3678 // has error count increased?
3679 int oldc = state.ataerrorcount;
3680 if (newc>oldc){
3681 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3682 name, oldc, newc);
3683 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3684 name, oldc, newc);
3685 state.must_write = true;
3686 }
3687
3688 if (newc>=0)
3689 state.ataerrorcount=newc;
3690 }
3691
3692 // if the user has asked, and device is capable (or we're not yet
3693 // sure) check whether a self test should be done now.
3694 if (allow_selftests && !cfg.test_regex.empty()) {
3695 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3696 if (testtype)
3697 DoATASelfTest(cfg, state, atadev, testtype);
3698 }
3699
3700 // Don't leave device open -- the OS/user may want to access it
3701 // before the next smartd cycle!
3702 CloseDevice(atadev, name);
3703
3704 // Copy ATA attribute values to persistent state
3706
3707 state.attrlog_dirty = true;
3708 return 0;
3709}
3710
3711static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3712{
3713 if (!open_device(cfg, state, scsidev, "SCSI"))
3714 return 1;
3715
3716 const char * name = cfg.name.c_str();
3717
3718 uint8_t asc = 0, ascq = 0;
3719 uint8_t currenttemp = 0, triptemp = 0;
3720 if (!state.SuppressReport) {
3721 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3722 &asc, &ascq, &currenttemp, &triptemp)) {
3723 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3724 name);
3725 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3726 state.SuppressReport = 1;
3727 }
3728 }
3729 if (asc > 0) {
3730 char b[128];
3731 const char * cp = scsiGetIEString(asc, ascq, b, sizeof(b));
3732
3733 if (cp) {
3734 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3735 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3736 } else if (asc == 4 && ascq == 9) {
3737 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3738 } else if (debugmode)
3739 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3740 name, (int)asc, (int)ascq);
3741 } else if (debugmode)
3742 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3743
3744 // check temperature limits
3745 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3746 CheckTemperature(cfg, state, currenttemp, triptemp);
3747
3748 // check if number of selftest errors has increased (note: may also DECREASE)
3749 if (cfg.selftest)
3750 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3751
3752 if (allow_selftests && !cfg.test_regex.empty()) {
3753 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3754 if (testtype)
3755 DoSCSISelfTest(cfg, state, scsidev, testtype);
3756 }
3757 if (!cfg.attrlog_file.empty()){
3758 // saving error counters to state
3759 uint8_t tBuf[252];
3760 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3761 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3763 state.scsi_error_counters[0].found=1;
3764 }
3765 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3766 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3768 state.scsi_error_counters[1].found=1;
3769 }
3770 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3771 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3773 state.scsi_error_counters[2].found=1;
3774 }
3775 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3776 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3779 }
3780 // store temperature if not done by CheckTemperature() above
3781 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3782 state.temperature = currenttemp;
3783 }
3784 CloseDevice(scsidev, name);
3785 state.attrlog_dirty = true;
3786 return 0;
3787}
3788
3789static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3790{
3791 if (!open_device(cfg, state, nvmedev, "NVMe"))
3792 return 1;
3793
3794 const char * name = cfg.name.c_str();
3795
3796 // Read SMART/Health log
3797 nvme_smart_log smart_log;
3798 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3799 CloseDevice(nvmedev, name);
3800 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3801 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3802 state.must_write = true;
3803 return 0;
3804 }
3805
3806 // Check Critical Warning bits
3807 if (cfg.smartcheck && smart_log.critical_warning) {
3808 unsigned char w = smart_log.critical_warning;
3809 std::string msg;
3810 static const char * const wnames[] =
3811 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3812
3813 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3814 if (!(w & (1 << b)))
3815 continue;
3816 if (cnt)
3817 msg += ", ";
3818 if (++cnt > 3) {
3819 msg += "..."; break;
3820 }
3821 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3822 msg += "*Unknown*"; break;
3823 }
3824 msg += wnames[b];
3825 }
3826
3827 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3828 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3829 state.must_write = true;
3830 }
3831
3832 // Check temperature limits
3833 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3834 int k = nvme_get_max_temp_kelvin(smart_log);
3835 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3836 int c = k - 273;
3837 if (c < 1)
3838 c = 1;
3839 else if (c > 0xff)
3840 c = 0xff;
3841 CheckTemperature(cfg, state, c, 0);
3842 }
3843
3844 // Check if number of errors has increased
3845 if (cfg.errorlog || cfg.xerrorlog) {
3846 uint64_t oldcnt = state.nvme_err_log_entries;
3847 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3848 if (newcnt > oldcnt) {
3849 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3850 name, oldcnt, newcnt);
3851 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3852 name, oldcnt, newcnt);
3853 state.must_write = true;
3854 }
3855 state.nvme_err_log_entries = newcnt;
3856 }
3857
3858 CloseDevice(nvmedev, name);
3859 state.attrlog_dirty = true;
3860 return 0;
3861}
3862
3863// 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3865
3867{
3868 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3869 bool sts1 = false, sts2 = false;
3870 for (const auto & cfg : configs) {
3871 if (cfg.offlinests_ns)
3872 sts1 = true;
3873 if (cfg.selfteststs_ns)
3874 sts2 = true;
3875 }
3876
3877 // Check for support of disable auto standby
3878 // Reenable standby if smartd.conf was reread
3879 if (sts1 || sts2 || standby_disable_state == 3) {
3880 if (!smi()->disable_system_auto_standby(false)) {
3881 if (standby_disable_state == 3)
3882 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3883 if (sts1 || sts2) {
3884 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3885 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3886 sts1 = sts2 = false;
3887 }
3888 }
3889 }
3890
3891 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3892}
3893
3894static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3895{
3897 return;
3898
3899 // Check for just started or still running self-tests
3900 bool running = false;
3901 for (unsigned i = 0; i < configs.size() && !running; i++) {
3902 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3903
3904 if ( ( cfg.offlinests_ns
3905 && (state.offline_started ||
3907 || ( cfg.selfteststs_ns
3908 && (state.selftest_started ||
3910 running = true;
3911 // state.offline/selftest_started will be reset after next logging of test status
3912 }
3913
3914 // Disable/enable auto standby and log state changes
3915 if (!running) {
3916 if (standby_disable_state != 1) {
3917 if (!smi()->disable_system_auto_standby(false))
3918 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3919 smi()->get_errmsg());
3920 else
3921 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3923 }
3924 }
3925 else if (!smi()->disable_system_auto_standby(true)) {
3926 if (standby_disable_state != 2) {
3927 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3928 smi()->get_errmsg());
3930 }
3931 }
3932 else {
3933 if (standby_disable_state != 3) {
3934 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3936 }
3937 }
3938}
3939
3940// Checks the SMART status of all ATA and SCSI devices
3941static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3942 smart_device_list & devices, bool firstpass, bool allow_selftests)
3943{
3944 for (unsigned i = 0; i < configs.size(); i++) {
3945 const dev_config & cfg = configs.at(i);
3946 dev_state & state = states.at(i);
3947 if (state.skip) {
3948 if (debugmode)
3949 PrintOut(LOG_INFO, "Device: %s, skipped (interval=%d)\n", cfg.name.c_str(),
3950 (cfg.checktime ? cfg.checktime : checktime));
3951 continue;
3952 }
3953
3954 smart_device * dev = devices.at(i);
3955 if (dev->is_ata())
3956 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3957 else if (dev->is_scsi())
3958 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3959 else if (dev->is_nvme())
3960 NVMeCheckDevice(cfg, state, dev->to_nvme());
3961
3962 // Prevent systemd unit startup timeout when checking many devices on startup
3964 }
3965
3966 do_disable_standby_check(configs, states);
3967}
3968
3969// Install all signal handlers
3971{
3972 // normal and abnormal exit
3975
3976 // in debug mode, <CONTROL-C> ==> HUP
3978
3979 // Catch HUP and USR1
3982#ifdef _WIN32
3983 set_signal_if_not_ignored(SIGUSR2, USR2handler);
3984#endif
3985}
3986
3987#ifdef _WIN32
3988// Toggle debug mode implemented for native windows only
3989// (there is no easy way to reopen tty on *nix)
3990static void ToggleDebugMode()
3991{
3992 if (!debugmode) {
3993 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3994 if (!daemon_enable_console("smartd [Debug]")) {
3995 debugmode = 1;
3996 daemon_signal(SIGINT, HUPhandler);
3997 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3998 }
3999 else
4000 PrintOut(LOG_INFO,"enable console failed\n");
4001 }
4002 else if (debugmode == 1) {
4003 daemon_disable_console();
4004 debugmode = 0;
4005 daemon_signal(SIGINT, sighandler);
4006 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
4007 }
4008 else
4009 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
4010}
4011#endif
4012
4013time_t calc_next_wakeuptime(time_t wakeuptime, time_t timenow, int ct)
4014{
4015 if (timenow < wakeuptime)
4016 return wakeuptime;
4017 return timenow + ct - (timenow - wakeuptime) % ct;
4018}
4019
4020static time_t dosleep(time_t wakeuptime, const dev_config_vector & configs,
4021 dev_state_vector & states, bool & sigwakeup)
4022{
4023 // If past wake-up-time, compute next wake-up-time
4024 time_t timenow = time(nullptr);
4025 unsigned n = configs.size();
4026 int ct;
4027 if (!checktime_min) {
4028 // Same for all devices
4029 wakeuptime = calc_next_wakeuptime(wakeuptime, timenow, checktime);
4030 ct = checktime;
4031 }
4032 else {
4033 // Determine wakeuptime of next device(s)
4034 wakeuptime = 0;
4035 for (unsigned i = 0; i < n; i++) {
4036 const dev_config & cfg = configs.at(i);
4037 dev_state & state = states.at(i);
4038 if (!state.skip)
4039 state.wakeuptime = calc_next_wakeuptime((state.wakeuptime ? state.wakeuptime : timenow),
4040 timenow, (cfg.checktime ? cfg.checktime : checktime));
4041 if (!wakeuptime || state.wakeuptime < wakeuptime)
4042 wakeuptime = state.wakeuptime;
4043 }
4044 ct = checktime_min;
4045 }
4046
4047 notify_wait(wakeuptime, n);
4048
4049 // Sleep until we catch a signal or have completed sleeping
4050 bool no_skip = false;
4051 int addtime = 0;
4052 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
4053 // Restart if system clock has been adjusted to the past
4054 if (wakeuptime > timenow + ct) {
4055 PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
4056 wakeuptime = timenow + ct;
4057 for (auto & state : states)
4058 state.wakeuptime = 0;
4059 no_skip = true;
4060 }
4061
4062 // Exit sleep when time interval has expired or a signal is received
4063 sleep(wakeuptime+addtime-timenow);
4064
4065#ifdef _WIN32
4066 // toggle debug mode?
4067 if (caughtsigUSR2) {
4068 ToggleDebugMode();
4069 caughtsigUSR2 = 0;
4070 }
4071#endif
4072
4073 timenow = time(nullptr);
4074
4075 // Actual sleep time too long?
4076 if (!addtime && timenow > wakeuptime+60) {
4077 if (debugmode)
4078 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4079 (int)(timenow-wakeuptime));
4080 // Wait another 20 seconds to avoid I/O errors during disk spin-up
4081 addtime = timenow-wakeuptime+20;
4082 // Use next wake-up-time if close
4083 int nextcheck = ct - addtime % ct;
4084 if (nextcheck <= 20)
4085 addtime += nextcheck;
4086 }
4087 }
4088
4089 // if we caught a SIGUSR1 then print message and clear signal
4090 if (caughtsigUSR1){
4091 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4092 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4093 caughtsigUSR1=0;
4094 sigwakeup = no_skip = true;
4095 }
4096
4097 // Check which devices must be skipped in this cycle
4098 if (checktime_min) {
4099 for (auto & state : states)
4100 state.skip = (!no_skip && timenow < state.wakeuptime);
4101 }
4102
4103 // return adjusted wakeuptime
4104 return wakeuptime;
4105}
4106
4107// Print out a list of valid arguments for the Directive d
4108static void printoutvaliddirectiveargs(int priority, char d)
4109{
4110 switch (d) {
4111 case 'n':
4112 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4113 break;
4114 case 's':
4115 PrintOut(priority, "valid_regular_expression");
4116 break;
4117 case 'd':
4118 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4119 break;
4120 case 'T':
4121 PrintOut(priority, "normal, permissive");
4122 break;
4123 case 'o':
4124 case 'S':
4125 PrintOut(priority, "on, off");
4126 break;
4127 case 'l':
4128 PrintOut(priority, "error, selftest");
4129 break;
4130 case 'M':
4131 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4132 break;
4133 case 'v':
4134 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4135 break;
4136 case 'P':
4137 PrintOut(priority, "use, ignore, show, showall");
4138 break;
4139 case 'F':
4140 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4141 break;
4142 case 'e':
4143 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4144 "security-freeze, standby,[N|off], wcache,[on|off]");
4145 break;
4146 case 'c':
4147 PrintOut(priority, "i=N, interval=N");
4148 break;
4149 }
4150}
4151
4152// exits with an error message, or returns integer value of token
4153static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4154 int min, int max, char * suffix = 0)
4155{
4156 // make sure argument is there
4157 if (!arg) {
4158 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4159 cfgfile, lineno, name, token, min, max);
4160 return -1;
4161 }
4162
4163 // get argument value (base 10), check that it's integer, and in-range
4164 char *endptr;
4165 int val = strtol(arg,&endptr,10);
4166
4167 // optional suffix present?
4168 if (suffix) {
4169 if (!strcmp(endptr, suffix))
4170 endptr += strlen(suffix);
4171 else
4172 *suffix = 0;
4173 }
4174
4175 if (!(!*endptr && min <= val && val <= max)) {
4176 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4177 cfgfile, lineno, name, token, arg, min, max);
4178 return -1;
4179 }
4180
4181 // all is well; return value
4182 return val;
4183}
4184
4185
4186// Get 1-3 small integer(s) for '-W' directive
4187static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4188 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4189{
4190 unsigned v1 = 0, v2 = 0, v3 = 0;
4191 int n1 = -1, n2 = -1, n3 = -1, len;
4192 if (!arg) {
4193 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4194 cfgfile, lineno, name, token);
4195 return -1;
4196 }
4197
4198 len = strlen(arg);
4199 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4200 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4201 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4202 cfgfile, lineno, name, token, arg);
4203 return -1;
4204 }
4205 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4206 return 0;
4207}
4208
4209
4210#ifdef _WIN32
4211
4212// Concatenate strtok() results if quoted with "..."
4213static const char * strtok_dequote(const char * delimiters)
4214{
4215 const char * t = strtok(nullptr, delimiters);
4216 if (!t || t[0] != '"')
4217 return t;
4218
4219 static std::string token;
4220 token = t+1;
4221 for (;;) {
4222 t = strtok(nullptr, delimiters);
4223 if (!t || !*t)
4224 return "\"";
4225 token += ' ';
4226 int len = strlen(t);
4227 if (t[len-1] == '"') {
4228 token += std::string(t, len-1);
4229 break;
4230 }
4231 token += t;
4232 }
4233 return token.c_str();
4234}
4235
4236#endif // _WIN32
4237
4238
4239// This function returns 1 if it has correctly parsed one token (and
4240// any arguments), else zero if no tokens remain. It returns -1 if an
4241// error was encountered.
4242static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4243{
4244 char sym;
4245 const char * name = cfg.name.c_str();
4246 int lineno=cfg.lineno;
4247 const char *delim = " \n\t";
4248 int badarg = 0;
4249 int missingarg = 0;
4250 const char *arg = 0;
4251
4252 // is the rest of the line a comment
4253 if (*token=='#')
4254 return 1;
4255
4256 // is the token not recognized?
4257 if (*token!='-' || strlen(token)!=2) {
4258 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4259 configfile, lineno, name, token);
4260 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4261 return -1;
4262 }
4263
4264 // token we will be parsing:
4265 sym=token[1];
4266
4267 // parse the token and swallow its argument
4268 int val;
4269 char plus[] = "+", excl[] = "!";
4270
4271 switch (sym) {
4272 case 'C':
4273 // monitor current pending sector count (default 197)
4274 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4275 return -1;
4276 cfg.curr_pending_id = (unsigned char)val;
4277 cfg.curr_pending_incr = (*plus == '+');
4278 cfg.curr_pending_set = true;
4279 break;
4280 case 'U':
4281 // monitor offline uncorrectable sectors (default 198)
4282 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4283 return -1;
4284 cfg.offl_pending_id = (unsigned char)val;
4285 cfg.offl_pending_incr = (*plus == '+');
4286 cfg.offl_pending_set = true;
4287 break;
4288 case 'T':
4289 // Set tolerance level for SMART command failures
4290 if (!(arg = strtok(nullptr, delim))) {
4291 missingarg = 1;
4292 } else if (!strcmp(arg, "normal")) {
4293 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4294 // not on failure of an optional S.M.A.R.T. command.
4295 // This is the default so we don't need to actually do anything here.
4296 cfg.permissive = false;
4297 } else if (!strcmp(arg, "permissive")) {
4298 // Permissive mode; ignore errors from Mandatory SMART commands
4299 cfg.permissive = true;
4300 } else {
4301 badarg = 1;
4302 }
4303 break;
4304 case 'd':
4305 // specify the device type
4306 if (!(arg = strtok(nullptr, delim))) {
4307 missingarg = 1;
4308 } else if (!strcmp(arg, "ignore")) {
4309 cfg.ignore = true;
4310 } else if (!strcmp(arg, "removable")) {
4311 cfg.removable = true;
4312 } else if (!strcmp(arg, "auto")) {
4313 cfg.dev_type = "";
4314 scan_types.clear();
4315 } else {
4316 cfg.dev_type = arg;
4317 scan_types.push_back(arg);
4318 }
4319 break;
4320 case 'F':
4321 // fix firmware bug
4322 if (!(arg = strtok(nullptr, delim)))
4323 missingarg = 1;
4324 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4325 badarg = 1;
4326 break;
4327 case 'H':
4328 // check SMART status
4329 cfg.smartcheck = true;
4330 break;
4331 case 'f':
4332 // check for failure of usage attributes
4333 cfg.usagefailed = true;
4334 break;
4335 case 't':
4336 // track changes in all vendor attributes
4337 cfg.prefail = true;
4338 cfg.usage = true;
4339 break;
4340 case 'p':
4341 // track changes in prefail vendor attributes
4342 cfg.prefail = true;
4343 break;
4344 case 'u':
4345 // track changes in usage vendor attributes
4346 cfg.usage = true;
4347 break;
4348 case 'l':
4349 // track changes in SMART logs
4350 if (!(arg = strtok(nullptr, delim))) {
4351 missingarg = 1;
4352 } else if (!strcmp(arg, "selftest")) {
4353 // track changes in self-test log
4354 cfg.selftest = true;
4355 } else if (!strcmp(arg, "error")) {
4356 // track changes in ATA error log
4357 cfg.errorlog = true;
4358 } else if (!strcmp(arg, "xerror")) {
4359 // track changes in Extended Comprehensive SMART error log
4360 cfg.xerrorlog = true;
4361 } else if (!strcmp(arg, "offlinests")) {
4362 // track changes in offline data collection status
4363 cfg.offlinests = true;
4364 } else if (!strcmp(arg, "offlinests,ns")) {
4365 // track changes in offline data collection status, disable auto standby
4366 cfg.offlinests = cfg.offlinests_ns = true;
4367 } else if (!strcmp(arg, "selfteststs")) {
4368 // track changes in self-test execution status
4369 cfg.selfteststs = true;
4370 } else if (!strcmp(arg, "selfteststs,ns")) {
4371 // track changes in self-test execution status, disable auto standby
4372 cfg.selfteststs = cfg.selfteststs_ns = true;
4373 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4374 // set SCT Error Recovery Control
4375 unsigned rt = ~0, wt = ~0; int nc = -1;
4376 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4377 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4378 cfg.sct_erc_set = true;
4379 cfg.sct_erc_readtime = rt;
4380 cfg.sct_erc_writetime = wt;
4381 }
4382 else
4383 badarg = 1;
4384 } else {
4385 badarg = 1;
4386 }
4387 break;
4388 case 'a':
4389 // monitor everything
4390 cfg.smartcheck = true;
4391 cfg.prefail = true;
4392 cfg.usagefailed = true;
4393 cfg.usage = true;
4394 cfg.selftest = true;
4395 cfg.errorlog = true;
4396 cfg.selfteststs = true;
4397 break;
4398 case 'o':
4399 // automatic offline testing enable/disable
4400 if (!(arg = strtok(nullptr, delim))) {
4401 missingarg = 1;
4402 } else if (!strcmp(arg, "on")) {
4403 cfg.autoofflinetest = 2;
4404 } else if (!strcmp(arg, "off")) {
4405 cfg.autoofflinetest = 1;
4406 } else {
4407 badarg = 1;
4408 }
4409 break;
4410 case 'n':
4411 // skip disk check if in idle or standby mode
4412 if (!(arg = strtok(nullptr, delim)))
4413 missingarg = 1;
4414 else {
4415 char *endptr = nullptr;
4416 char *next = strchr(const_cast<char*>(arg), ',');
4417
4418 cfg.powerquiet = false;
4419 cfg.powerskipmax = 0;
4420
4421 if (next)
4422 *next = '\0';
4423 if (!strcmp(arg, "never"))
4424 cfg.powermode = 0;
4425 else if (!strcmp(arg, "sleep"))
4426 cfg.powermode = 1;
4427 else if (!strcmp(arg, "standby"))
4428 cfg.powermode = 2;
4429 else if (!strcmp(arg, "idle"))
4430 cfg.powermode = 3;
4431 else
4432 badarg = 1;
4433
4434 // if optional arguments are present
4435 if (!badarg && next) {
4436 next++;
4437 cfg.powerskipmax = strtol(next, &endptr, 10);
4438 if (endptr == next)
4439 cfg.powerskipmax = 0;
4440 else {
4441 next = endptr + (*endptr != '\0');
4442 if (cfg.powerskipmax <= 0)
4443 badarg = 1;
4444 }
4445 if (*next != '\0') {
4446 if (!strcmp("q", next))
4447 cfg.powerquiet = true;
4448 else {
4449 badarg = 1;
4450 }
4451 }
4452 }
4453 }
4454 break;
4455 case 'S':
4456 // automatic attribute autosave enable/disable
4457 if (!(arg = strtok(nullptr, delim))) {
4458 missingarg = 1;
4459 } else if (!strcmp(arg, "on")) {
4460 cfg.autosave = 2;
4461 } else if (!strcmp(arg, "off")) {
4462 cfg.autosave = 1;
4463 } else {
4464 badarg = 1;
4465 }
4466 break;
4467 case 's':
4468 // warn user, and delete any previously given -s REGEXP Directives
4469 if (!cfg.test_regex.empty()){
4470 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4471 configfile, lineno, name, cfg.test_regex.get_pattern());
4473 }
4474 // check for missing argument
4475 if (!(arg = strtok(nullptr, delim))) {
4476 missingarg = 1;
4477 }
4478 // Compile regex
4479 else {
4480 if (!cfg.test_regex.compile(arg)) {
4481 // not a valid regular expression!
4482 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4483 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4484 return -1;
4485 }
4486 // Do a bit of sanity checking and warn user if we think that
4487 // their regexp is "strange". User probably confused about shell
4488 // glob(3) syntax versus regular expression syntax regexp(7).
4489 // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4490 static const regular_expression syntax_check(
4491 "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4492 ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4493 ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4494 );
4496 if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4497 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4498 "extended regular expression \"%s\"\n",
4499 configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4500 }
4501 break;
4502 case 'm':
4503 // send email to address that follows
4504 if (!(arg = strtok(nullptr, delim)))
4505 missingarg = 1;
4506 else {
4507 if (!cfg.emailaddress.empty())
4508 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4509 configfile, lineno, name, cfg.emailaddress.c_str());
4510 cfg.emailaddress = arg;
4511 }
4512 break;
4513 case 'M':
4514 // email warning options
4515 if (!(arg = strtok(nullptr, delim)))
4516 missingarg = 1;
4517 else if (!strcmp(arg, "once"))
4518 cfg.emailfreq = 1;
4519 else if (!strcmp(arg, "daily"))
4520 cfg.emailfreq = 2;
4521 else if (!strcmp(arg, "diminishing"))
4522 cfg.emailfreq = 3;
4523 else if (!strcmp(arg, "test"))
4524 cfg.emailtest = 1;
4525 else if (!strcmp(arg, "exec")) {
4526 // Get the next argument (the command line)
4527#ifdef _WIN32
4528 // Allow "/path name/with spaces/..." on Windows
4529 arg = strtok_dequote(delim);
4530 if (arg && arg[0] == '"') {
4531 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4532 configfile, lineno, name, token);
4533 return -1;
4534 }
4535#else
4536 arg = strtok(nullptr, delim);
4537#endif
4538 if (!arg) {
4539 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4540 configfile, lineno, name, token);
4541 return -1;
4542 }
4543 // Free the last cmd line given if any, and copy new one
4544 if (!cfg.emailcmdline.empty())
4545 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4546 configfile, lineno, name, cfg.emailcmdline.c_str());
4547 cfg.emailcmdline = arg;
4548 }
4549 else
4550 badarg = 1;
4551 break;
4552 case 'i':
4553 // ignore failure of usage attribute
4554 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4555 return -1;
4557 break;
4558 case 'I':
4559 // ignore attribute for tracking purposes
4560 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4561 return -1;
4563 break;
4564 case 'r':
4565 // print raw value when tracking
4566 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4567 return -1;
4569 if (*excl == '!') // attribute change is critical
4571 break;
4572 case 'R':
4573 // track changes in raw value (forces printing of raw value)
4574 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4575 return -1;
4577 if (*excl == '!') // raw value change is critical
4579 break;
4580 case 'W':
4581 // track Temperature
4582 if (Get3Integers((arg = strtok(nullptr, delim)), name, token, lineno, configfile,
4583 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4584 return -1;
4585 break;
4586 case 'v':
4587 // non-default vendor-specific attribute meaning
4588 if (!(arg = strtok(nullptr, delim))) {
4589 missingarg = 1;
4590 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4591 badarg = 1;
4592 }
4593 break;
4594 case 'P':
4595 // Define use of drive-specific presets.
4596 if (!(arg = strtok(nullptr, delim))) {
4597 missingarg = 1;
4598 } else if (!strcmp(arg, "use")) {
4599 cfg.ignorepresets = false;
4600 } else if (!strcmp(arg, "ignore")) {
4601 cfg.ignorepresets = true;
4602 } else if (!strcmp(arg, "show")) {
4603 cfg.showpresets = true;
4604 } else if (!strcmp(arg, "showall")) {
4606 } else {
4607 badarg = 1;
4608 }
4609 break;
4610
4611 case 'e':
4612 // Various ATA settings
4613 if (!(arg = strtok(nullptr, delim))) {
4614 missingarg = true;
4615 }
4616 else {
4617 char arg2[16+1]; unsigned uval;
4618 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4619 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &uval, &n3) >= 1
4620 && (n1 == len || n2 > 0)) {
4621 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4622 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4623 if (n3 != len)
4624 uval = ~0U;
4625
4626 if (!strcmp(arg2, "aam")) {
4627 if (off)
4628 cfg.set_aam = -1;
4629 else if (uval <= 254)
4630 cfg.set_aam = uval + 1;
4631 else
4632 badarg = true;
4633 }
4634 else if (!strcmp(arg2, "apm")) {
4635 if (off)
4636 cfg.set_apm = -1;
4637 else if (1 <= uval && uval <= 254)
4638 cfg.set_apm = uval + 1;
4639 else
4640 badarg = true;
4641 }
4642 else if (!strcmp(arg2, "lookahead")) {
4643 if (off)
4644 cfg.set_lookahead = -1;
4645 else if (on)
4646 cfg.set_lookahead = 1;
4647 else
4648 badarg = true;
4649 }
4650 else if (!strcmp(arg, "security-freeze")) {
4651 cfg.set_security_freeze = true;
4652 }
4653 else if (!strcmp(arg2, "standby")) {
4654 if (off)
4655 cfg.set_standby = 0 + 1;
4656 else if (uval <= 255)
4657 cfg.set_standby = uval + 1;
4658 else
4659 badarg = true;
4660 }
4661 else if (!strcmp(arg2, "wcache")) {
4662 if (off)
4663 cfg.set_wcache = -1;
4664 else if (on)
4665 cfg.set_wcache = 1;
4666 else
4667 badarg = true;
4668 }
4669 else if (!strcmp(arg2, "dsn")) {
4670 if (off)
4671 cfg.set_dsn = -1;
4672 else if (on)
4673 cfg.set_dsn = 1;
4674 else
4675 badarg = true;
4676 }
4677 else
4678 badarg = true;
4679 }
4680 else
4681 badarg = true;
4682 }
4683 break;
4684
4685 case 'c':
4686 // Override command line options
4687 {
4688 if (!(arg = strtok(nullptr, delim))) {
4689 missingarg = true;
4690 break;
4691 }
4692 int n = 0, nc = -1, len = strlen(arg);
4693 if ( ( sscanf(arg, "i=%d%n", &n, &nc) == 1
4694 || sscanf(arg, "interval=%d%n", &n, &nc) == 1)
4695 && nc == len && n >= 10)
4696 cfg.checktime = n;
4697 else
4698 badarg = true;
4699 }
4700 break;
4701
4702 default:
4703 // Directive not recognized
4704 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4705 configfile, lineno, name, token);
4706 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4707 return -1;
4708 }
4709 if (missingarg) {
4710 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4711 configfile, lineno, name, token);
4712 }
4713 if (badarg) {
4714 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4715 configfile, lineno, name, token, arg);
4716 }
4717 if (missingarg || badarg) {
4718 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4719 printoutvaliddirectiveargs(LOG_CRIT, sym);
4720 PrintOut(LOG_CRIT, "\n");
4721 return -1;
4722 }
4723
4724 return 1;
4725}
4726
4727// Scan directive for configuration file
4728#define SCANDIRECTIVE "DEVICESCAN"
4729
4730// This is the routine that adds things to the conf_entries list.
4731//
4732// Return values are:
4733// 1: parsed a normal line
4734// 0: found DEFAULT setting or comment or blank line
4735// -1: found SCANDIRECTIVE line
4736// -2: found an error
4737//
4738// Note: this routine modifies *line from the caller!
4739static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4740 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4741{
4742 const char *delim = " \n\t";
4743
4744 // get first token: device name. If a comment, skip line
4745 const char * name = strtok(line, delim);
4746 if (!name || *name == '#')
4747 return 0;
4748
4749 // Check device name for DEFAULT or DEVICESCAN
4750 int retval;
4751 if (!strcmp("DEFAULT", name)) {
4752 retval = 0;
4753 // Restart with empty defaults
4754 default_conf = dev_config();
4755 }
4756 else {
4757 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4758 // Init new entry with current defaults
4759 conf_entries.push_back(default_conf);
4760 }
4761 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4762
4763 cfg.name = name; // Later replaced by dev->get_info().info_name
4764 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4765 cfg.lineno = lineno;
4766
4767 // parse tokens one at a time from the file.
4768 while (char * token = strtok(nullptr, delim)) {
4769 int rc = ParseToken(token, cfg, scan_types);
4770 if (rc < 0)
4771 // error found on the line
4772 return -2;
4773
4774 if (rc == 0)
4775 // No tokens left
4776 break;
4777
4778 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4779 }
4780
4781 // Check for multiple -d TYPE directives
4782 if (retval != -1 && scan_types.size() > 1) {
4783 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4784 cfg.name.c_str(), cfg.lineno, configfile);
4785 return -2;
4786 }
4787
4788 // Don't perform checks below for DEFAULT entries
4789 if (retval == 0)
4790 return retval;
4791
4792 // If NO monitoring directives are set, then set all of them.
4793 if (!( cfg.smartcheck || cfg.selftest
4794 || cfg.errorlog || cfg.xerrorlog
4795 || cfg.offlinests || cfg.selfteststs
4796 || cfg.usagefailed || cfg.prefail || cfg.usage
4797 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4798
4799 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4800 cfg.name.c_str(), cfg.lineno, configfile);
4801
4802 cfg.smartcheck = true;
4803 cfg.usagefailed = true;
4804 cfg.prefail = true;
4805 cfg.usage = true;
4806 cfg.selftest = true;
4807 cfg.errorlog = true;
4808 cfg.selfteststs = true;
4809 }
4810
4811 // additional sanity check. Has user set -M options without -m?
4812 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4813 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4814 cfg.name.c_str(), cfg.lineno, configfile);
4815 return -2;
4816 }
4817
4818 // has the user has set <nomailer>?
4819 if (cfg.emailaddress == "<nomailer>") {
4820 // check that -M exec is also set
4821 if (cfg.emailcmdline.empty()){
4822 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4823 cfg.name.c_str(), cfg.lineno, configfile);
4824 return -2;
4825 }
4826 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4827 cfg.emailaddress.clear();
4828 }
4829
4830 return retval;
4831}
4832
4833// Parses a configuration file. Return values are:
4834// N=>0: found N entries
4835// -1: syntax error in config file
4836// -2: config file does not exist
4837// -3: config file exists but cannot be read
4838//
4839// In the case where the return value is 0, there are three
4840// possibilities:
4841// Empty configuration file ==> conf_entries.empty()
4842// No configuration file ==> conf_entries[0].lineno == 0
4843// SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4844static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4845{
4846 // maximum line length in configuration file
4847 const int MAXLINELEN = 256;
4848 // maximum length of a continued line in configuration file
4849 const int MAXCONTLINE = 1023;
4850
4851 stdio_file f;
4852 // Open config file, if it exists and is not <stdin>
4853 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4854 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4855 // file exists but we can't read it or it should exist due to '-c' option
4856 int ret = (errno!=ENOENT ? -3 : -2);
4857 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4858 strerror(errno),configfile);
4859 return ret;
4860 }
4861 }
4862 else // read from stdin ('-c -' option)
4863 f.open(stdin);
4864
4865 // Start with empty defaults
4866 dev_config default_conf;
4867
4868 // No configuration file found -- use fake one
4869 int entry = 0;
4870 if (!f) {
4871 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4872
4873 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4874 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4875 return 0;
4876 }
4877
4878#ifdef __CYGWIN__
4879 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4880#endif
4881
4882 // configuration file exists
4883 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4884
4885 // parse config file line by line
4886 int lineno = 1, cont = 0, contlineno = 0;
4887 char line[MAXLINELEN+2];
4888 char fullline[MAXCONTLINE+1];
4889
4890 for (;;) {
4891 int len=0,scandevice;
4892 char *lastslash;
4893 char *comment;
4894 char *code;
4895
4896 // make debugging simpler
4897 memset(line,0,sizeof(line));
4898
4899 // get a line
4900 code=fgets(line, MAXLINELEN+2, f);
4901
4902 // are we at the end of the file?
4903 if (!code){
4904 if (cont) {
4905 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4906 // See if we found a SCANDIRECTIVE directive
4907 if (scandevice==-1)
4908 return 0;
4909 // did we find a syntax error
4910 if (scandevice==-2)
4911 return -1;
4912 // the final line is part of a continuation line
4913 entry+=scandevice;
4914 }
4915 break;
4916 }
4917
4918 // input file line number
4919 contlineno++;
4920
4921 // See if line is too long
4922 len=strlen(line);
4923 if (len>MAXLINELEN){
4924 const char *warn;
4925 if (line[len-1]=='\n')
4926 warn="(including newline!) ";
4927 else
4928 warn="";
4929 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4930 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4931 return -1;
4932 }
4933
4934 // Ignore anything after comment symbol
4935 if ((comment=strchr(line,'#'))){
4936 *comment='\0';
4937 len=strlen(line);
4938 }
4939
4940 // is the total line (made of all continuation lines) too long?
4941 if (cont+len>MAXCONTLINE){
4942 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4943 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4944 return -1;
4945 }
4946
4947 // copy string so far into fullline, and increment length
4948 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4949 cont+=len;
4950
4951 // is this a continuation line. If so, replace \ by space and look at next line
4952 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4953 *(fullline+(cont-len)+(lastslash-line))=' ';
4954 continue;
4955 }
4956
4957 // Not a continuation line. Parse it
4958 scan_types.clear();
4959 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4960
4961 // did we find a scandevice directive?
4962 if (scandevice==-1)
4963 return 0;
4964 // did we find a syntax error
4965 if (scandevice==-2)
4966 return -1;
4967
4968 entry+=scandevice;
4969 lineno++;
4970 cont=0;
4971 }
4972
4973 // note -- may be zero if syntax of file OK, but no valid entries!
4974 return entry;
4975}
4976
4977/* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4978 <LIST> is the list of valid arguments for option opt. */
4979static void PrintValidArgs(char opt)
4980{
4981 const char *s;
4982
4983 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4984 if (!(s = GetValidArgList(opt)))
4985 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4986 else
4987 PrintOut(LOG_CRIT, "%s", (char *)s);
4988 PrintOut(LOG_CRIT, " <=======\n");
4989}
4990
4991#ifndef _WIN32
4992// Report error and return false if specified path is not absolute.
4993static bool check_abs_path(char option, const std::string & path)
4994{
4995 if (path.empty() || path[0] == '/')
4996 return true;
4997
4998 debugmode = 1;
4999 PrintHead();
5000 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
5001 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
5002 return false;
5003}
5004#endif // !_WIN32
5005
5006// Parses input line, prints usage message and
5007// version/license/copyright messages
5008static int parse_options(int argc, char **argv)
5009{
5010 // Init default path names
5011#ifndef _WIN32
5012 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
5013 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
5014#else
5015 std::string exedir = get_exe_dir();
5016 static std::string configfile_str = exedir + "/smartd.conf";
5017 configfile = configfile_str.c_str();
5018 warning_script = exedir + "/smartd_warning.cmd";
5019#endif
5020
5021 // Please update GetValidArgList() if you edit shortopts
5022 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
5023#if defined(HAVE_POSIX_API) || defined(_WIN32)
5024 "u:"
5025#endif