smartmontools SVN Rev 5470
Utility to control and monitor storage systems with "S.M.A.R.T."
smartd.cpp
Go to the documentation of this file.
1/*
2 * Home page of code is: https://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-22 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12#include "config.h"
13#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15// unconditionally included files
16#include <inttypes.h>
17#include <stdio.h>
18#include <sys/types.h>
19#include <sys/stat.h> // umask
20#include <signal.h>
21#include <fcntl.h>
22#include <string.h>
23#include <syslog.h>
24#include <stdarg.h>
25#include <stdlib.h>
26#include <errno.h>
27#include <time.h>
28#include <limits.h>
29#include <getopt.h>
30
31#include <algorithm> // std::replace()
32#include <map>
33#include <stdexcept>
34#include <string>
35#include <vector>
36
37// conditionally included files
38#ifndef _WIN32
39#include <sys/wait.h>
40#endif
41#ifdef HAVE_UNISTD_H
42#include <unistd.h>
43#endif
44
45#ifdef _WIN32
46#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
47#ifdef _MSC_VER
48#pragma warning(disable:4761) // "conversion supplied"
49typedef unsigned short mode_t;
50typedef int pid_t;
51#endif
52#include <io.h> // umask()
53#include <process.h> // getpid()
54#endif // _WIN32
55
56#ifdef __CYGWIN__
57#include <io.h> // setmode()
58#endif // __CYGWIN__
59
60#ifdef HAVE_LIBCAP_NG
61#include <cap-ng.h>
62#endif // LIBCAP_NG
63
64#ifdef HAVE_LIBSYSTEMD
65#include <systemd/sd-daemon.h>
66#endif // HAVE_LIBSYSTEMD
67
68// locally included files
69#include "atacmds.h"
70#include "dev_interface.h"
71#include "knowndrives.h"
72#include "scsicmds.h"
73#include "nvmecmds.h"
74#include "utility.h"
75
76#ifdef HAVE_POSIX_API
77#include "popen_as_ugid.h"
78#endif
79
80#ifdef _WIN32
81// fork()/signal()/initd simulation for native Windows
82#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
83#define strsignal daemon_strsignal
84#define sleep daemon_sleep
85// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
86#define SIGQUIT SIGBREAK
87#define SIGQUIT_KEYNAME "CONTROL-Break"
88#else // _WIN32
89#define SIGQUIT_KEYNAME "CONTROL-\\"
90#endif // _WIN32
91
92const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5428 2022-12-31 15:55:43Z chrfranke $"
93 CONFIG_H_CVSID;
94
95extern "C" {
96 typedef void (*signal_handler_type)(int);
97}
98
100{
101#if defined(_WIN32)
102 // signal() emulation
103 daemon_signal(sig, handler);
104
105#elif defined(HAVE_SIGACTION)
106 // SVr4, POSIX.1-2001, POSIX.1-2008
107 struct sigaction sa;
108 sa.sa_handler = SIG_DFL;
109 sigaction(sig, (struct sigaction *)0, &sa);
110 if (sa.sa_handler == SIG_IGN)
111 return;
112
113 memset(&sa, 0, sizeof(sa));
114 sa.sa_handler = handler;
115 sa.sa_flags = SA_RESTART; // BSD signal() semantics
116 sigaction(sig, &sa, (struct sigaction *)0);
117
118#elif defined(HAVE_SIGSET)
119 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
120 if (sigset(sig, handler) == SIG_IGN)
121 sigset(sig, SIG_IGN);
122
123#else
124 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
125 // Important: BSD semantics is required. Traditional signal()
126 // resets the handler to SIG_DFL after the first signal is caught.
127 if (signal(sig, handler) == SIG_IGN)
128 signal(sig, SIG_IGN);
129#endif
130}
131
132using namespace smartmontools;
133
134static const int scsiLogRespLen = 252;
135
136// smartd exit codes
137#define EXIT_BADCMD 1 // command line did not parse
138#define EXIT_BADCONF 2 // syntax error in config file
139#define EXIT_STARTUP 3 // problem forking daemon
140#define EXIT_PID 4 // problem creating pid file
141#define EXIT_NOCONF 5 // config file does not exist
142#define EXIT_READCONF 6 // config file exists but cannot be read
143
144#define EXIT_NOMEM 8 // out of memory
145#define EXIT_BADCODE 10 // internal error - should NEVER happen
146
147#define EXIT_BADDEV 16 // we can't monitor this device
148#define EXIT_NODEV 17 // no devices to monitor
149
150#define EXIT_SIGNAL 254 // abort on signal
151
152
153// command-line: 1=debug mode, 2=print presets
154static unsigned char debugmode = 0;
155
156// command-line: how long to sleep between checks
157static constexpr int default_checktime = 1800;
159static int checktime_min = 0; // Minimum individual check time, 0 if none
160
161// command-line: name of PID file (empty for no pid file)
162static std::string pid_file;
163
164// command-line: path prefix of persistent state file, empty if no persistence.
165static std::string state_path_prefix
166#ifdef SMARTMONTOOLS_SAVESTATES
167 = SMARTMONTOOLS_SAVESTATES
168#endif
169 ;
170
171// command-line: path prefix of attribute log file, empty if no logs.
172static std::string attrlog_path_prefix
173#ifdef SMARTMONTOOLS_ATTRIBUTELOG
174 = SMARTMONTOOLS_ATTRIBUTELOG
175#endif
176 ;
177
178// configuration file name
179static const char * configfile;
180// configuration file "name" if read from stdin
181static const char * const configfile_stdin = "<stdin>";
182// path of alternate configuration file
183static std::string configfile_alt;
184
185// warning script file
186static std::string warning_script;
187
188#ifdef HAVE_POSIX_API
189// run warning script as non-privileged user
190static bool warn_as_user;
191static uid_t warn_uid;
192static gid_t warn_gid;
193static std::string warn_uname, warn_gname;
194#elif defined(_WIN32)
195// run warning script as restricted user
196static bool warn_as_restr_user;
197#endif
198
199// command-line: when should we exit?
200enum quit_t {
205static bool quit_nodev0 = false;
206
207// command-line; this is the default syslog(3) log facility to use.
208static int facility=LOG_DAEMON;
209
210#ifndef _WIN32
211// command-line: fork into background?
212static bool do_fork=true;
213#endif
214
215// TODO: This smartctl only variable is also used in some os_*.cpp
216unsigned char failuretest_permissive = 0;
217
218// set to one if we catch a USR1 (check devices now)
219static volatile int caughtsigUSR1=0;
220
221#ifdef _WIN32
222// set to one if we catch a USR2 (toggle debug mode)
223static volatile int caughtsigUSR2=0;
224#endif
225
226// set to one if we catch a HUP (reload config file). In debug mode,
227// set to two, if we catch INT (also reload config file).
228static volatile int caughtsigHUP=0;
229
230// set to signal value if we catch INT, QUIT, or TERM
231static volatile int caughtsigEXIT=0;
232
233// This function prints either to stdout or to the syslog as needed.
234static void PrintOut(int priority, const char *fmt, ...)
236
237#ifdef HAVE_LIBSYSTEMD
238// systemd notify support
239
240static bool notify_enabled = false;
241static bool notify_ready = false;
242
243static inline void notify_init()
244{
245 if (!getenv("NOTIFY_SOCKET"))
246 return;
247 notify_enabled = true;
248}
249
250static inline bool notify_post_init()
251{
252 if (!notify_enabled)
253 return true;
254 if (do_fork) {
255 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
256 return false;
257 }
258 return true;
259}
260
261static inline void notify_extend_timeout()
262{
263 if (!notify_enabled)
264 return;
265 if (notify_ready)
266 return;
267 const char * notify = "EXTEND_TIMEOUT_USEC=20000000"; // typical drive spinup time is 20s tops
268 if (debugmode) {
269 pout("sd_notify(0, \"%s\")\n", notify);
270 return;
271 }
272 sd_notify(0, notify);
273}
274
275static void notify_msg(const char * msg, bool ready = false)
276{
277 if (!notify_enabled)
278 return;
279 if (debugmode) {
280 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
281 return;
282 }
283 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
284}
285
286static void notify_check(int numdev)
287{
288 if (!notify_enabled)
289 return;
290 char msg[32];
291 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
292 numdev, (numdev != 1 ? "s" : ""));
293 notify_msg(msg);
294}
295
296static void notify_wait(time_t wakeuptime, int numdev)
297{
298 if (!notify_enabled)
299 return;
300 char ts[16] = ""; struct tm tmbuf;
301 strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
302 char msg[64];
303 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
304 numdev, (numdev != 1 ? "s" : ""), ts);
305 notify_msg(msg, !notify_ready); // first call notifies READY=1
306 notify_ready = true;
307}
308
309static void notify_exit(int status)
310{
311 if (!notify_enabled)
312 return;
313 const char * msg;
314 switch (status) {
315 case 0: msg = "Exiting ..."; break;
316 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
317 case EXIT_BADCONF: case EXIT_NOCONF:
318 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
319 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
320 case EXIT_NODEV: msg = "No devices to monitor"; break;
321 default: msg = "Error (see SYSLOG)"; break;
322 }
323 notify_msg(msg);
324}
325
326#else // HAVE_LIBSYSTEMD
327// No systemd notify support
328
329static inline bool notify_post_init()
330{
331#ifdef __linux__
332 if (getenv("NOTIFY_SOCKET")) {
333 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
334 return false;
335 }
336#endif
337 return true;
338}
339
340static inline void notify_init() { }
341static inline void notify_extend_timeout() { }
342static inline void notify_msg(const char *) { }
343static inline void notify_check(int) { }
344static inline void notify_wait(time_t, int) { }
345static inline void notify_exit(int) { }
346
347#endif // HAVE_LIBSYSTEMD
348
349// Email frequencies
350enum class emailfreqs : unsigned char {
352};
353
354// Attribute monitoring flags.
355// See monitor_attr_flags below.
356enum {
363};
364
365// Array of flags for each attribute.
367{
368public:
369 bool is_set(int id, unsigned char flag) const
370 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
371
372 void set(int id, unsigned char flags)
373 {
374 if (0 < id && id < (int)sizeof(m_flags))
375 m_flags[id] |= flags;
376 }
377
378private:
379 unsigned char m_flags[256]{};
380};
381
382
383/// Configuration data for a device. Read from smartd.conf.
384/// Supports copy & assignment and is compatible with STL containers.
386{
387 int lineno{}; // Line number of entry in file
388 std::string name; // Device name (with optional extra info)
389 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
390 std::string dev_type; // Device type argument from -d directive, empty if none
391 std::string dev_idinfo; // Device identify info for warning emails
392 std::string state_file; // Path of the persistent state file, empty if none
393 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
394 int checktime{}; // Individual check interval, 0 if none
395 bool ignore{}; // Ignore this entry
396 bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
397 bool smartcheck{}; // Check SMART status
398 bool usagefailed{}; // Check for failed Usage Attributes
399 bool prefail{}; // Track changes in Prefail Attributes
400 bool usage{}; // Track changes in Usage Attributes
401 bool selftest{}; // Monitor number of selftest errors
402 bool errorlog{}; // Monitor number of ATA errors
403 bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
404 bool offlinests{}; // Monitor changes in offline data collection status
405 bool offlinests_ns{}; // Disable auto standby if in progress
406 bool selfteststs{}; // Monitor changes in self-test execution status
407 bool selfteststs_ns{}; // Disable auto standby if in progress
408 bool permissive{}; // Ignore failed SMART commands
409 char autosave{}; // 1=disable, 2=enable Autosave Attributes
410 char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
411 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
412 bool ignorepresets{}; // Ignore database of -v options
413 bool showpresets{}; // Show database entry for this device
414 bool removable{}; // Device may disappear (not be present)
415 char powermode{}; // skip check, if disk in idle or standby mode
416 bool powerquiet{}; // skip powermode 'skipping checks' message
417 int powerskipmax{}; // how many times can be check skipped
418 unsigned char tempdiff{}; // Track Temperature changes >= this limit
419 unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
420 regular_expression test_regex; // Regex for scheduled testing
421 unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
422
423 // Configuration of email warning messages
424 std::string emailcmdline; // script to execute, empty if no messages
425 std::string emailaddress; // email address, or empty
426 emailfreqs emailfreq{}; // Send emails once, daily, diminishing
427 bool emailtest{}; // Send test email?
428
429 // ATA ONLY
430 int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
431 int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
432 int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
433 int set_lookahead{}; // disable(-1), enable(1) read look-ahead
434 int set_standby{}; // set(1..255->0..254) standby timer
435 bool set_security_freeze{}; // Freeze ATA security
436 int set_wcache{}; // disable(-1), enable(1) write cache
437 int set_dsn{}; // disable(0x2), enable(0x1) DSN
438
439 bool sct_erc_set{}; // set SCT ERC to:
440 unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
441 unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
442
443 unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
444 unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
445 bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
446 bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
447
448 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
449
451};
452
453// Number of allowed mail message types
454static const int SMARTD_NMAIL = 13;
455// Type for '-M test' mails (state not persistent)
456static const int MAILTYPE_TEST = 0;
457// TODO: Add const or enum for all mail types.
458
459struct mailinfo {
460 int logged{}; // number of times an email has been sent
461 time_t firstsent{}; // time first email was sent, as defined by time(2)
462 time_t lastsent{}; // time last email was sent, as defined by time(2)
463};
464
465/// Persistent state data for a device.
467{
468 unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
469
470 unsigned char selflogcount{}; // total number of self-test errors
471 unsigned short selfloghour{}; // lifetime hours of last self-test error
472
473 time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
474
475 uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
476 uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
477
478 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
479
480 // ATA ONLY
481 int ataerrorcount{}; // Total number of ATA errors
482
483 // Persistent part of ata_smart_values:
485 unsigned char id{};
486 unsigned char val{};
487 unsigned char worst{}; // Byte needed for 'raw64' attribute only.
488 uint64_t raw{};
489 unsigned char resvd{};
490 };
492
493 // SCSI ONLY
494
497 unsigned char found{};
498 };
500
503 unsigned char found{};
504 };
506
507 // NVMe only
509};
510
511/// Non-persistent state data for a device.
513{
514 bool must_write{}; // true if persistent part should be written
515
516 bool skip{}; // skip during next check cycle
517 time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
518
519 bool not_cap_offline{}; // true == not capable of offline testing
524
525 unsigned char temperature{}; // last recorded Temperature (in Celsius)
526 time_t tempmin_delay{}; // time where Min Temperature tracking will start
527
528 bool removed{}; // true if open() failed for removable device
529
530 bool powermodefail{}; // true if power mode check failed
531 int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
532 int lastpowermodeskipped{}; // the last power mode that was skipped
533
534 bool attrlog_dirty{}; // true if persistent part has new attr values that
535 // need to be written to attrlog
536
537 // SCSI ONLY
538 // TODO: change to bool
539 unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
540 unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
545 unsigned char SuppressReport{}; // minimize nuisance reports
546 unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
547 // know yet) 6 or 10
548 // ATA ONLY
549 uint64_t num_sectors{}; // Number of sectors
550 ata_smart_values smartval{}; // SMART data
552 bool offline_started{}; // true if offline data collection was started
553 bool selftest_started{}; // true if self-test was started
554};
555
556/// Runtime state data for a device.
558: public persistent_dev_state,
559 public temp_dev_state
560{
562 void update_temp_state();
563};
564
565/// Container for configuration info for each device.
566typedef std::vector<dev_config> dev_config_vector;
567
568/// Container for state info for each device.
569typedef std::vector<dev_state> dev_state_vector;
570
571// Copy ATA attributes to persistent state.
573{
574 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
577 pa.id = ta.id;
578 if (ta.id == 0) {
579 pa.val = pa.worst = 0; pa.raw = 0;
580 continue;
581 }
582 pa.val = ta.current;
583 pa.worst = ta.worst;
584 pa.raw = ta.raw[0]
585 | ( ta.raw[1] << 8)
586 | ( ta.raw[2] << 16)
587 | ((uint64_t)ta.raw[3] << 24)
588 | ((uint64_t)ta.raw[4] << 32)
589 | ((uint64_t)ta.raw[5] << 40);
590 pa.resvd = ta.reserv;
591 }
592}
593
594// Copy ATA from persistent to temp state.
596{
597 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
598 const ata_attribute & pa = ata_attributes[i];
600 ta.id = pa.id;
601 if (pa.id == 0) {
602 ta.current = ta.worst = 0;
603 memset(ta.raw, 0, sizeof(ta.raw));
604 continue;
605 }
606 ta.current = pa.val;
607 ta.worst = pa.worst;
608 ta.raw[0] = (unsigned char) pa.raw;
609 ta.raw[1] = (unsigned char)(pa.raw >> 8);
610 ta.raw[2] = (unsigned char)(pa.raw >> 16);
611 ta.raw[3] = (unsigned char)(pa.raw >> 24);
612 ta.raw[4] = (unsigned char)(pa.raw >> 32);
613 ta.raw[5] = (unsigned char)(pa.raw >> 40);
614 ta.reserv = pa.resvd;
615 }
616}
617
618// Parse a line from a state file.
619static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
620{
621 static const regular_expression regex(
622 "^ *"
623 "((temperature-min)" // (1 (2)
624 "|(temperature-max)" // (3)
625 "|(self-test-errors)" // (4)
626 "|(self-test-last-err-hour)" // (5)
627 "|(scheduled-test-next-check)" // (6)
628 "|(selective-test-last-start)" // (7)
629 "|(selective-test-last-end)" // (8)
630 "|(ata-error-count)" // (9)
631 "|(mail\\.([0-9]+)\\." // (10 (11)
632 "((count)" // (12 (13)
633 "|(first-sent-time)" // (14)
634 "|(last-sent-time)" // (15)
635 ")" // 12)
636 ")" // 10)
637 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
638 "((id)" // (18 (19)
639 "|(val)" // (20)
640 "|(worst)" // (21)
641 "|(raw)" // (22)
642 "|(resvd)" // (23)
643 ")" // 18)
644 ")" // 16)
645 "|(nvme-err-log-entries)" // (24)
646 ")" // 1)
647 " *= *([0-9]+)[ \n]*$" // (25)
648 );
649
650 const int nmatch = 1+25;
652 if (!regex.execute(line, nmatch, match))
653 return false;
654 if (match[nmatch-1].rm_so < 0)
655 return false;
656
657 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
658
659 int m = 1;
660 if (match[++m].rm_so >= 0)
661 state.tempmin = (unsigned char)val;
662 else if (match[++m].rm_so >= 0)
663 state.tempmax = (unsigned char)val;
664 else if (match[++m].rm_so >= 0)
665 state.selflogcount = (unsigned char)val;
666 else if (match[++m].rm_so >= 0)
667 state.selfloghour = (unsigned short)val;
668 else if (match[++m].rm_so >= 0)
669 state.scheduled_test_next_check = (time_t)val;
670 else if (match[++m].rm_so >= 0)
671 state.selective_test_last_start = val;
672 else if (match[++m].rm_so >= 0)
673 state.selective_test_last_end = val;
674 else if (match[++m].rm_so >= 0)
675 state.ataerrorcount = (int)val;
676 else if (match[m+=2].rm_so >= 0) {
677 int i = atoi(line+match[m].rm_so);
678 if (!(0 <= i && i < SMARTD_NMAIL))
679 return false;
680 if (i == MAILTYPE_TEST) // Don't suppress test mails
681 return true;
682 if (match[m+=2].rm_so >= 0)
683 state.maillog[i].logged = (int)val;
684 else if (match[++m].rm_so >= 0)
685 state.maillog[i].firstsent = (time_t)val;
686 else if (match[++m].rm_so >= 0)
687 state.maillog[i].lastsent = (time_t)val;
688 else
689 return false;
690 }
691 else if (match[m+=5+1].rm_so >= 0) {
692 int i = atoi(line+match[m].rm_so);
693 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
694 return false;
695 if (match[m+=2].rm_so >= 0)
696 state.ata_attributes[i].id = (unsigned char)val;
697 else if (match[++m].rm_so >= 0)
698 state.ata_attributes[i].val = (unsigned char)val;
699 else if (match[++m].rm_so >= 0)
700 state.ata_attributes[i].worst = (unsigned char)val;
701 else if (match[++m].rm_so >= 0)
702 state.ata_attributes[i].raw = val;
703 else if (match[++m].rm_so >= 0)
704 state.ata_attributes[i].resvd = (unsigned char)val;
705 else
706 return false;
707 }
708 else if (match[m+7].rm_so >= 0)
709 state.nvme_err_log_entries = val;
710 else
711 return false;
712 return true;
713}
714
715// Read a state file.
716static bool read_dev_state(const char * path, persistent_dev_state & state)
717{
718 stdio_file f(path, "r");
719 if (!f) {
720 if (errno != ENOENT)
721 pout("Cannot read state file \"%s\"\n", path);
722 return false;
723 }
724#ifdef __CYGWIN__
725 setmode(fileno(f), O_TEXT); // Allow files with \r\n
726#endif
727
728 persistent_dev_state new_state;
729 int good = 0, bad = 0;
730 char line[256];
731 while (fgets(line, sizeof(line), f)) {
732 const char * s = line + strspn(line, " \t");
733 if (!*s || *s == '#')
734 continue;
735 if (!parse_dev_state_line(line, new_state))
736 bad++;
737 else
738 good++;
739 }
740
741 if (bad) {
742 if (!good) {
743 pout("%s: format error\n", path);
744 return false;
745 }
746 pout("%s: %d invalid line(s) ignored\n", path, bad);
747 }
748
749 // This sets the values missing in the file to 0.
750 state = new_state;
751 return true;
752}
753
754static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
755{
756 if (val)
757 fprintf(f, "%s = %" PRIu64 "\n", name, val);
758}
759
760static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
761{
762 if (val)
763 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
764}
765
766// Write a state file
767static bool write_dev_state(const char * path, const persistent_dev_state & state)
768{
769 // Rename old "file" to "file~"
770 std::string pathbak = path; pathbak += '~';
771 unlink(pathbak.c_str());
772 rename(path, pathbak.c_str());
773
774 stdio_file f(path, "w");
775 if (!f) {
776 pout("Cannot create state file \"%s\"\n", path);
777 return false;
778 }
779
780 fprintf(f, "# smartd state file\n");
781 write_dev_state_line(f, "temperature-min", state.tempmin);
782 write_dev_state_line(f, "temperature-max", state.tempmax);
783 write_dev_state_line(f, "self-test-errors", state.selflogcount);
784 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
785 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
786 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
787 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
788
789 for (int i = 0; i < SMARTD_NMAIL; i++) {
790 if (i == MAILTYPE_TEST) // Don't suppress test mails
791 continue;
792 const mailinfo & mi = state.maillog[i];
793 if (!mi.logged)
794 continue;
795 write_dev_state_line(f, "mail", i, "count", mi.logged);
796 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
797 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
798 }
799
800 // ATA ONLY
801 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
802
803 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
804 const auto & pa = state.ata_attributes[i];
805 if (!pa.id)
806 continue;
807 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
808 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
809 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
810 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
811 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
812 }
813
814 // NVMe only
815 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
816
817 return true;
818}
819
820// Write to the attrlog file
821static bool write_dev_attrlog(const char * path, const dev_state & state)
822{
823 stdio_file f(path, "a");
824 if (!f) {
825 pout("Cannot create attribute log file \"%s\"\n", path);
826 return false;
827 }
828
829
830 time_t now = time(nullptr);
831 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
832 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
833 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
834 tms->tm_hour, tms->tm_min, tms->tm_sec);
835 // ATA ONLY
836 for (const auto & pa : state.ata_attributes) {
837 if (!pa.id)
838 continue;
839 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
840 }
841 // SCSI ONLY
842 const struct scsiErrorCounter * ecp;
843 const char * pageNames[3] = {"read", "write", "verify"};
844 for (int k = 0; k < 3; ++k) {
845 if ( !state.scsi_error_counters[k].found ) continue;
846 ecp = &state.scsi_error_counters[k].errCounter;
847 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
848 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
849 "\t%s-corr-by-retry;%" PRIu64 ";"
850 "\t%s-total-err-corrected;%" PRIu64 ";"
851 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
852 "\t%s-gb-processed;%.3f;"
853 "\t%s-total-unc-errors;%" PRIu64 ";",
854 pageNames[k], ecp->counter[0],
855 pageNames[k], ecp->counter[1],
856 pageNames[k], ecp->counter[2],
857 pageNames[k], ecp->counter[3],
858 pageNames[k], ecp->counter[4],
859 pageNames[k], (ecp->counter[5] / 1000000000.0),
860 pageNames[k], ecp->counter[6]);
861 }
862 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
863 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
864 }
865 // write SCSI current temperature if it is monitored
866 if (state.temperature)
867 fprintf(f, "\ttemperature;%d;", state.temperature);
868 // end of line
869 fprintf(f, "\n");
870 return true;
871}
872
873// Write all state files. If write_always is false, don't write
874// unless must_write is set.
875static void write_all_dev_states(const dev_config_vector & configs,
876 dev_state_vector & states,
877 bool write_always = true)
878{
879 for (unsigned i = 0; i < states.size(); i++) {
880 const dev_config & cfg = configs.at(i);
881 if (cfg.state_file.empty())
882 continue;
883 dev_state & state = states[i];
884 if (!write_always && !state.must_write)
885 continue;
886 if (!write_dev_state(cfg.state_file.c_str(), state))
887 continue;
888 state.must_write = false;
889 if (write_always || debugmode)
890 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
891 cfg.name.c_str(), cfg.state_file.c_str());
892 }
893}
894
895// Write to all attrlog files
896static void write_all_dev_attrlogs(const dev_config_vector & configs,
897 dev_state_vector & states)
898{
899 for (unsigned i = 0; i < states.size(); i++) {
900 const dev_config & cfg = configs.at(i);
901 if (cfg.attrlog_file.empty())
902 continue;
903 dev_state & state = states[i];
904 if (state.attrlog_dirty) {
905 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
906 state.attrlog_dirty = false;
907 }
908 }
909}
910
911extern "C" { // signal handlers require C-linkage
912
913// Note if we catch a SIGUSR1
914static void USR1handler(int sig)
915{
916 if (SIGUSR1==sig)
918 return;
919}
920
921#ifdef _WIN32
922// Note if we catch a SIGUSR2
923static void USR2handler(int sig)
924{
925 if (SIGUSR2==sig)
926 caughtsigUSR2=1;
927 return;
928}
929#endif
930
931// Note if we catch a HUP (or INT in debug mode)
932static void HUPhandler(int sig)
933{
934 if (sig==SIGHUP)
935 caughtsigHUP=1;
936 else
937 caughtsigHUP=2;
938 return;
939}
940
941// signal handler for TERM, QUIT, and INT (if not in debug mode)
942static void sighandler(int sig)
943{
944 if (!caughtsigEXIT)
945 caughtsigEXIT=sig;
946 return;
947}
948
949} // extern "C"
950
951#ifdef HAVE_LIBCAP_NG
952// capabilities(7) support
953
954static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
955
956static void capabilities_drop_now()
957{
958 if (!capabilities_mode)
959 return;
960 capng_clear(CAPNG_SELECT_BOTH);
961 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
962 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
963 if (warn_as_user && (warn_uid || warn_gid)) {
964 // For popen_as_ugid()
965 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
966 CAP_SETGID, CAP_SETUID, -1);
967 }
968 if (capabilities_mode > 1) {
969 // For exim MTA
970 capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
971 CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
972 }
973 capng_apply(CAPNG_SELECT_BOTH);
974}
975
976static void capabilities_log_error_hint()
977{
978 if (!capabilities_mode)
979 return;
980 PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
981 (capabilities_mode == 1 ? "', try '--capabilities=mail'"
982 : "=mail', please inform " PACKAGE_BUGREPORT));
983}
984
985#else // HAVE_LIBCAP_NG
986// No capabilities(7) support
987
988static inline void capabilities_drop_now() { }
989static inline void capabilities_log_error_hint() { }
990
991#endif // HAVE_LIBCAP_NG
992
993// a replacement for setenv() which is not available on all platforms.
994// Note that the string passed to putenv must not be freed or made
995// invalid, since a pointer to it is kept by putenv(). This means that
996// it must either be a static buffer or allocated off the heap. The
997// string can be freed if the environment variable is redefined via
998// another call to putenv(). There is no portable way to unset a variable
999// with putenv(). So we manage the buffer in a static object.
1000// Using setenv() if available is not considered because some
1001// implementations may produce memory leaks.
1002
1004{
1005public:
1006 env_buffer() = default;
1007 env_buffer(const env_buffer &) = delete;
1008 void operator=(const env_buffer &) = delete;
1009
1010 void set(const char * name, const char * value);
1011private:
1012 char * m_buf = nullptr;
1013};
1014
1015void env_buffer::set(const char * name, const char * value)
1016{
1017 int size = strlen(name) + 1 + strlen(value) + 1;
1018 char * newbuf = new char[size];
1019 snprintf(newbuf, size, "%s=%s", name, value);
1020
1021 if (putenv(newbuf))
1022 throw std::runtime_error("putenv() failed");
1023
1024 // This assumes that the same NAME is passed on each call
1025 delete [] m_buf;
1026 m_buf = newbuf;
1027}
1028
1029#define EBUFLEN 1024
1030
1031static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1033
1034// If either address or executable path is non-null then send and log
1035// a warning email, or execute executable
1036static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1037{
1038 // See if user wants us to send mail
1039 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1040 return;
1041
1042 // Which type of mail are we sending?
1043 static const char * const whichfail[] = {
1044 "EmailTest", // 0
1045 "Health", // 1
1046 "Usage", // 2
1047 "SelfTest", // 3
1048 "ErrorCount", // 4
1049 "FailedHealthCheck", // 5
1050 "FailedReadSmartData", // 6
1051 "FailedReadSmartErrorLog", // 7
1052 "FailedReadSmartSelfTestLog", // 8
1053 "FailedOpenDevice", // 9
1054 "CurrentPendingSector", // 10
1055 "OfflineUncorrectableSector", // 11
1056 "Temperature" // 12
1057 };
1058 STATIC_ASSERT(sizeof(whichfail) == SMARTD_NMAIL * sizeof(whichfail[0]));
1059
1060 if (!(0 <= which && which < SMARTD_NMAIL)) {
1061 PrintOut(LOG_CRIT, "Internal error in MailWarning(): which=%d\n", which);
1062 return;
1063 }
1064 mailinfo * mail = state.maillog + which;
1065
1066 // Calc current and next interval for warning reminder emails
1067 int days, nextdays;
1068 if (which == 0)
1069 days = nextdays = -1; // EmailTest
1070 else switch (cfg.emailfreq) {
1071 case emailfreqs::once:
1072 days = nextdays = -1; break;
1073 case emailfreqs::always:
1074 days = nextdays = 0; break;
1075 case emailfreqs::daily:
1076 days = nextdays = 1; break;
1078 // 0, 1, 2, 3, 4, 5, 6, 7, ... => 1, 2, 4, 8, 16, 32, 32, 32, ...
1079 nextdays = 1 << ((unsigned)mail->logged <= 5 ? mail->logged : 5);
1080 // 0, 1, 2, 3, 4, 5, 6, 7, ... => 0, 1, 2, 4, 8, 16, 32, 32, ... (0 not used below)
1081 days = ((unsigned)mail->logged <= 5 ? nextdays >> 1 : nextdays);
1082 break;
1083 default:
1084 PrintOut(LOG_CRIT, "Internal error in MailWarning(): cfg.emailfreq=%d\n", (int)cfg.emailfreq);
1085 return;
1086 }
1087
1088 time_t now = time(nullptr);
1089 if (mail->logged) {
1090 // Return if no warning reminder email needs to be sent (now)
1091 if (days < 0)
1092 return; // '-M once' or EmailTest
1093 if (days > 0 && now < mail->lastsent + days * 24 * 3600)
1094 return; // '-M daily/diminishing' and too early
1095 }
1096 else {
1097 // Record the time of this first email message
1098 mail->firstsent = now;
1099 }
1100
1101 // Record the time of this email message
1102 mail->lastsent = now;
1103
1104 // print warning string into message
1105 // Note: Message length may reach ~300 characters as device names may be
1106 // very long on certain platforms (macOS ~230 characters).
1107 // Message length must not exceed email line length limit, see RFC 5322:
1108 // "... MUST be no more than 998 characters, ... excluding the CRLF."
1109 char message[512];
1110 va_list ap;
1111 va_start(ap, fmt);
1112 vsnprintf(message, sizeof(message), fmt, ap);
1113 va_end(ap);
1114
1115 // replace commas by spaces to separate recipients
1116 std::string address = cfg.emailaddress;
1117 std::replace(address.begin(), address.end(), ',', ' ');
1118
1119 // Export information in environment variables that will be useful
1120 // for user scripts
1121 const char * executable = cfg.emailcmdline.c_str();
1122 static env_buffer env[13];
1123 env[0].set("SMARTD_MAILER", executable);
1124 env[1].set("SMARTD_MESSAGE", message);
1125 char dates[DATEANDEPOCHLEN];
1126 snprintf(dates, sizeof(dates), "%d", mail->logged);
1127 env[2].set("SMARTD_PREVCNT", dates);
1128 dateandtimezoneepoch(dates, mail->firstsent);
1129 env[3].set("SMARTD_TFIRST", dates);
1130 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1131 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1132 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1133 env[6].set("SMARTD_ADDRESS", address.c_str());
1134 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1135
1136 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1137 env[8].set("SMARTD_DEVICETYPE",
1138 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1139 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1140
1141 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1142 dates[0] = 0;
1143 if (nextdays >= 0)
1144 snprintf(dates, sizeof(dates), "%d", nextdays);
1145 env[11].set("SMARTD_NEXTDAYS", dates);
1146 // Avoid false positive recursion detection by smartd_warning.{sh,cmd}
1147 env[12].set("SMARTD_SUBJECT", "");
1148
1149 // now construct a command to send this as EMAIL
1150 if (!*executable)
1151 executable = "<mail>";
1152 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1153 const char * newwarn = (which? "Warning via" : "Test of");
1154
1155 char command[256];
1156#ifdef _WIN32
1157 // Path may contain spaces
1158 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1159#else
1160 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1161#endif
1162
1163 // tell SYSLOG what we are about to do...
1164 PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
1165 (which ? "Sending warning via" : "Executing test of"), executable, newadd,
1166 (
1167#ifdef HAVE_POSIX_API
1168 warn_as_user ?
1169 strprintf(" (uid=%u(%s) gid=%u(%s))",
1170 (unsigned)warn_uid, warn_uname.c_str(),
1171 (unsigned)warn_gid, warn_gname.c_str() ).c_str() :
1172#elif defined(_WIN32)
1173 warn_as_restr_user ? " (restricted user)" :
1174#endif
1175 ""
1176 )
1177 );
1178
1179 // issue the command to send mail or to run the user's executable
1180 errno=0;
1181 FILE * pfp;
1182
1183#ifdef HAVE_POSIX_API
1184 if (warn_as_user) {
1185 pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
1186 } else
1187#endif
1188 {
1189#ifdef _WIN32
1190 pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
1191#else
1192 pfp = popen(command, "r");
1193#endif
1194 }
1195
1196 if (!pfp)
1197 // failed to popen() mail process
1198 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1199 newwarn, executable, newadd, errno?strerror(errno):"");
1200 else {
1201 // pipe succeeded!
1202 int len;
1203 char buffer[EBUFLEN];
1204
1205 // if unexpected output on stdout/stderr, null terminate, print, and flush
1206 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1207 int count=0;
1208 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1209 buffer[newlen]='\0';
1210 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1211 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1212
1213 // flush pipe if needed
1214 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1215 count++;
1216
1217 // tell user that pipe was flushed, or that something is really wrong
1218 if (count && count<EBUFLEN)
1219 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1220 newwarn, executable, newadd);
1221 else if (count)
1222 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1223 newwarn, executable, newadd);
1224 }
1225
1226 // if something went wrong with mail process, print warning
1227 errno=0;
1228 int status;
1229
1230#ifdef HAVE_POSIX_API
1231 if (warn_as_user) {
1232 status = pclose_as_ugid(pfp);
1233 } else
1234#endif
1235 {
1236 status = pclose(pfp);
1237 }
1238
1239 if (status == -1)
1240 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1241 errno?strerror(errno):"");
1242 else {
1243 // mail process apparently succeeded. Check and report exit status
1244 if (WIFEXITED(status)) {
1245 // exited 'normally' (but perhaps with nonzero status)
1246 int status8 = WEXITSTATUS(status);
1247 if (status8>128)
1248 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1249 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1250 else if (status8) {
1251 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1252 newwarn, executable, newadd, status, status8);
1254 }
1255 else
1256 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1257 }
1258
1259 if (WIFSIGNALED(status))
1260 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1261 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1262
1263 // this branch is probably not possible. If subprocess is
1264 // stopped then pclose() should not return.
1265 if (WIFSTOPPED(status))
1266 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1267 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1268
1269 }
1270 }
1271
1272 // increment mail sent counter
1273 mail->logged++;
1274}
1275
1276static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1278
1279static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1280{
1281 if (!(0 <= which && which < SMARTD_NMAIL))
1282 return;
1283
1284 // Return if no mail sent yet
1285 mailinfo & mi = state.maillog[which];
1286 if (!mi.logged)
1287 return;
1288
1289 // Format & print message
1290 char msg[256];
1291 va_list ap;
1292 va_start(ap, fmt);
1293 vsnprintf(msg, sizeof(msg), fmt, ap);
1294 va_end(ap);
1295
1296 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1297 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1298
1299 // Clear mail counter and timestamps
1300 mi = mailinfo();
1301 state.must_write = true;
1302}
1303
1304#ifndef _WIN32
1305
1306// Output multiple lines via separate syslog(3) calls.
1308static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1309{
1310 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1311 vsnprintf(buf, sizeof(buf), fmt, ap);
1312
1313 for (char * p = buf, * q; p && *p; p = q) {
1314 if ((q = strchr(p, '\n')))
1315 *q++ = 0;
1316 if (*p)
1317 syslog(priority, "%s\n", p);
1318 }
1319}
1320
1321#else // _WIN32
1322// os_win32/syslog_win32.cpp supports multiple lines.
1323#define vsyslog_lines vsyslog
1324#endif // _WIN32
1325
1326// Printing function for watching ataprint commands, or losing them
1327// [From GLIBC Manual: Since the prototype doesn't specify types for
1328// optional arguments, in a call to a variadic function the default
1329// argument promotions are performed on the optional argument
1330// values. This means the objects of type char or short int (whether
1331// signed or not) are promoted to either int or unsigned int, as
1332// appropriate.]
1333void pout(const char *fmt, ...){
1334 va_list ap;
1335
1336 // get the correct time in syslog()
1338 // initialize variable argument list
1339 va_start(ap,fmt);
1340 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1341 if (debugmode && debugmode != 2) {
1342 FILE * f = stdout;
1343#ifdef _WIN32
1344 if (facility == LOG_LOCAL1) // logging to stdout
1345 f = stderr;
1346#endif
1347 vfprintf(f, fmt, ap);
1348 fflush(f);
1349 }
1350 // in debugmode==2 mode we print output from knowndrives.o functions
1351 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1352 openlog("smartd", LOG_PID, facility);
1353 vsyslog_lines(LOG_INFO, fmt, ap);
1354 closelog();
1355 }
1356 va_end(ap);
1357 return;
1358}
1359
1360// This function prints either to stdout or to the syslog as needed.
1361static void PrintOut(int priority, const char *fmt, ...){
1362 va_list ap;
1363
1364 // get the correct time in syslog()
1366 // initialize variable argument list
1367 va_start(ap,fmt);
1368 if (debugmode) {
1369 FILE * f = stdout;
1370#ifdef _WIN32
1371 if (facility == LOG_LOCAL1) // logging to stdout
1372 f = stderr;
1373#endif
1374 vfprintf(f, fmt, ap);
1375 fflush(f);
1376 }
1377 else {
1378 openlog("smartd", LOG_PID, facility);
1379 vsyslog_lines(priority, fmt, ap);
1380 closelog();
1381 }
1382 va_end(ap);
1383 return;
1384}
1385
1386// Used to warn users about invalid checksums. Called from atacmds.cpp.
1387void checksumwarning(const char * string)
1388{
1389 pout("Warning! %s error: invalid SMART checksum.\n", string);
1390}
1391
1392#ifndef _WIN32
1393
1394// Wait for the pid file to show up, this makes sure a calling program knows
1395// that the daemon is really up and running and has a pid to kill it
1396static bool WaitForPidFile()
1397{
1398 int waited, max_wait = 10;
1399 struct stat stat_buf;
1400
1401 if (pid_file.empty() || debugmode)
1402 return true;
1403
1404 for(waited = 0; waited < max_wait; ++waited) {
1405 if (!stat(pid_file.c_str(), &stat_buf)) {
1406 return true;
1407 } else
1408 sleep(1);
1409 }
1410 return false;
1411}
1412
1413#endif // _WIN32
1414
1415// Forks new process if needed, closes ALL file descriptors,
1416// redirects stdin, stdout, and stderr. Not quite daemon().
1417// See https://www.linuxjournal.com/article/2335
1418// for a good description of why we do things this way.
1419static int daemon_init()
1420{
1421#ifndef _WIN32
1422
1423 // flush all buffered streams. Else we might get two copies of open
1424 // streams since both parent and child get copies of the buffers.
1425 fflush(nullptr);
1426
1427 if (do_fork) {
1428 pid_t pid;
1429 if ((pid=fork()) < 0) {
1430 // unable to fork!
1431 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1432 return EXIT_STARTUP;
1433 }
1434 if (pid) {
1435 // we are the parent process, wait for pid file, then exit cleanly
1436 if(!WaitForPidFile()) {
1437 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1438 return EXIT_STARTUP;
1439 }
1440 return 0;
1441 }
1442
1443 // from here on, we are the child process.
1444 setsid();
1445
1446 // Fork one more time to avoid any possibility of having terminals
1447 if ((pid=fork()) < 0) {
1448 // unable to fork!
1449 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1450 return EXIT_STARTUP;
1451 }
1452 if (pid)
1453 // we are the parent process -- exit cleanly
1454 return 0;
1455
1456 // Now we are the child's child...
1457 }
1458
1459 // close any open file descriptors
1460 for (int i = sysconf(_SC_OPEN_MAX); --i >= 0; )
1461 close(i);
1462
1463 // redirect any IO attempts to /dev/null and change to root directory
1464 int fd = open("/dev/null", O_RDWR);
1465 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1466 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1467 return EXIT_STARTUP;
1468 }
1469 umask(0022);
1470
1471 if (do_fork)
1472 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1473
1474#else // _WIN32
1475
1476 // No fork() on native Win32
1477 // Detach this process from console
1478 fflush(nullptr);
1479 if (daemon_detach("smartd")) {
1480 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1481 return EXIT_STARTUP;
1482 }
1483 // stdin/out/err now closed if not redirected
1484
1485#endif // _WIN32
1486
1487 // No error, continue in main_worker()
1488 return -1;
1489}
1490
1491// create a PID file containing the current process id
1492static bool write_pid_file()
1493{
1494 if (!pid_file.empty()) {
1495 pid_t pid = getpid();
1496 mode_t old_umask;
1497#ifndef __CYGWIN__
1498 old_umask = umask(0077); // rwx------
1499#else
1500 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1501 old_umask = umask(0033); // rwxr--r--
1502#endif
1503
1504 stdio_file f(pid_file.c_str(), "w");
1505 umask(old_umask);
1506 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1507 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1508 return false;
1509 }
1510 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1511 }
1512 return true;
1513}
1514
1515// Prints header identifying version of code and home
1516static void PrintHead()
1517{
1518 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1519}
1520
1521// prints help info for configuration file Directives
1522static void Directives()
1523{
1524 PrintOut(LOG_INFO,
1525 "Configuration file (%s) Directives (after device name):\n"
1526 " -d TYPE Set the device type: auto, ignore, removable,\n"
1527 " %s\n"
1528 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1529 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1530 " -S VAL Enable/disable attribute autosave (on/off)\n"
1531 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1532 " -H Monitor SMART Health Status, report if failed\n"
1533 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1534 " -l TYPE Monitor SMART log or self-test status:\n"
1535 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1536 " -l scterc,R,W Set SCT Error Recovery Control\n"
1537 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1538 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1539 " -f Monitor 'Usage' Attributes, report failures\n"
1540 " -m ADD Send email warning to address ADD\n"
1541 " -M TYPE Modify email warning behavior (see man page)\n"
1542 " -p Report changes in 'Prefailure' Attributes\n"
1543 " -u Report changes in 'Usage' Attributes\n"
1544 " -t Equivalent to -p and -u Directives\n"
1545 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1546 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1547 " -i ID Ignore Attribute ID for -f Directive\n"
1548 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1549 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1550 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1551 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1552 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1553 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1554 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1555 " -F TYPE Use firmware bug workaround:\n"
1556 " %s\n"
1557 " -c i=N Set interval between disk checks to N seconds\n"
1558 " # Comment: text after a hash sign is ignored\n"
1559 " \\ Line continuation character\n"
1560 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1561 "Use ID = 0 to turn off -C and/or -U Directives\n"
1562 "Example: /dev/sda -a\n",
1563 configfile,
1564 smi()->get_valid_dev_types_str().c_str(),
1566}
1567
1568/* Returns a pointer to a static string containing a formatted list of the valid
1569 arguments to the option opt or nullptr on failure. */
1570static const char *GetValidArgList(char opt)
1571{
1572 switch (opt) {
1573 case 'A':
1574 case 's':
1575 return "<PATH_PREFIX>, -";
1576 case 'B':
1577 return "[+]<FILE_NAME>";
1578 case 'c':
1579 return "<FILE_NAME>, -";
1580 case 'l':
1581 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1582 case 'q':
1583 return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
1584 case 'r':
1585 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1586 case 'p':
1587 case 'w':
1588 return "<FILE_NAME>";
1589 case 'i':
1590 return "<INTEGER_SECONDS>";
1591#ifdef HAVE_POSIX_API
1592 case 'u':
1593 return "<USER>[:<GROUP>], -";
1594#elif defined(_WIN32)
1595 case 'u':
1596 return "restricted, unchanged";
1597#endif
1598#ifdef HAVE_LIBCAP_NG
1599 case 'C':
1600 return "mail, <no_argument>";
1601#endif
1602 default:
1603 return nullptr;
1604 }
1605}
1606
1607/* prints help information for command syntax */
1608static void Usage()
1609{
1610 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1611#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1612 PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
1613#else
1614 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1615#endif
1616 PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
1617#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1618 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
1619#endif
1620 PrintOut(LOG_INFO,"\n");
1621 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1622 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1623 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1624#ifdef SMARTMONTOOLS_DRIVEDBDIR
1625 PrintOut(LOG_INFO,"\n");
1626 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1627#endif
1628 PrintOut(LOG_INFO,"]\n\n");
1629 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1630 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1631 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1632#ifdef HAVE_LIBCAP_NG
1633 PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
1634 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1635 " Warning: Mail notification may not work when used.\n\n");
1636#endif
1637 PrintOut(LOG_INFO," -d, --debug\n");
1638 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1639 PrintOut(LOG_INFO," -D, --showdirectives\n");
1640 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1641 PrintOut(LOG_INFO," -h, --help, --usage\n");
1642 PrintOut(LOG_INFO," Display this help and exit\n\n");
1643 PrintOut(LOG_INFO," -i N, --interval=N\n");
1644 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1645 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1646#ifndef _WIN32
1647 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1648#else
1649 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1650#endif
1651#ifndef _WIN32
1652 PrintOut(LOG_INFO," -n, --no-fork\n");
1653 PrintOut(LOG_INFO," Do not fork into background\n");
1654#ifdef HAVE_LIBSYSTEMD
1655 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1656#endif // HAVE_LIBSYSTEMD
1657 PrintOut(LOG_INFO,"\n");
1658#endif // WIN32
1659 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1660 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1661 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1662 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1663 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1664 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1665#ifdef SMARTMONTOOLS_SAVESTATES
1666 PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
1667#else
1668 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1669#endif
1670 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1671#ifdef SMARTMONTOOLS_SAVESTATES
1672 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1673#endif
1674 PrintOut(LOG_INFO,"\n");
1675 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1676 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1677#ifndef _WIN32
1678 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1679#else
1680 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1681#endif
1682#ifdef HAVE_POSIX_API
1683 PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
1684 PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
1685#elif defined(_WIN32)
1686 PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
1687 PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
1688#endif
1689#ifdef _WIN32
1690 PrintOut(LOG_INFO," --service\n");
1691 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1692 PrintOut(LOG_INFO," smartd install [options]\n");
1693 PrintOut(LOG_INFO," Remove service with:\n");
1694 PrintOut(LOG_INFO," smartd remove\n\n");
1695#endif // _WIN32
1696 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1697 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1698}
1699
1700static int CloseDevice(smart_device * device, const char * name)
1701{
1702 if (!device->close()){
1703 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1704 return 1;
1705 }
1706 // device successfully closed
1707 return 0;
1708}
1709
1710// Replace invalid characters in cfg.dev_idinfo
1711static bool sanitize_dev_idinfo(std::string & s)
1712{
1713 bool changed = false;
1714 for (unsigned i = 0; i < s.size(); i++) {
1715 char c = s[i];
1716 STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
1717 // Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
1718 if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
1719 continue;
1720 s[i] = '?';
1721 changed = true;
1722 }
1723 return changed;
1724}
1725
1726// return true if a char is not allowed in a state file name
1727static bool not_allowed_in_filename(char c)
1728{
1729 return !( ('0' <= c && c <= '9')
1730 || ('A' <= c && c <= 'Z')
1731 || ('a' <= c && c <= 'z'));
1732}
1733
1734// Read error count from Summary or Extended Comprehensive SMART error log
1735// Return -1 on error
1736static int read_ata_error_count(ata_device * device, const char * name,
1737 firmwarebug_defs firmwarebugs, bool extended)
1738{
1739 if (!extended) {
1741 if (ataReadErrorLog(device, &log, firmwarebugs)){
1742 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1743 return -1;
1744 }
1745 return (log.error_log_pointer ? log.ata_error_count : 0);
1746 }
1747 else {
1749 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1750 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1751 return -1;
1752 }
1753 // Some disks use the reserved byte as index, see ataprint.cpp.
1754 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1755 }
1756}
1757
1758// returns <0 if problem. Otherwise, bottom 8 bits are the self test
1759// error count, and top bits are the power-on hours of the last error.
1760static int SelfTestErrorCount(ata_device * device, const char * name,
1761 firmwarebug_defs firmwarebugs)
1762{
1763 struct ata_smart_selftestlog log;
1764
1765 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1766 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1767 return -1;
1768 }
1769
1770 if (!log.mostrecenttest)
1771 // No tests logged
1772 return 0;
1773
1774 // Count failed self-tests
1775 int errcnt = 0, hours = 0;
1776 for (int i = 20; i >= 0; i--) {
1777 int j = (i + log.mostrecenttest) % 21;
1779 if (!nonempty(&entry, sizeof(entry)))
1780 continue;
1781
1782 int status = entry.selfteststatus >> 4;
1783 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1784 // First successful extended self-test, stop count
1785 break;
1786
1787 if (0x3 <= status && status <= 0x8) {
1788 // Self-test showed an error
1789 errcnt++;
1790 // Keep track of time of most recent error
1791 if (!hours)
1792 hours = entry.timestamp;
1793 }
1794 }
1795
1796 return ((hours << 8) | errcnt);
1797}
1798
1799#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1800#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1801
1802// Check offline data collection status
1803static inline bool is_offl_coll_in_progress(unsigned char status)
1804{
1805 return ((status & 0x7f) == 0x03);
1806}
1807
1808// Check self-test execution status
1809static inline bool is_self_test_in_progress(unsigned char status)
1810{
1811 return ((status >> 4) == 0xf);
1812}
1813
1814// Log offline data collection status
1815static void log_offline_data_coll_status(const char * name, unsigned char status)
1816{
1817 const char * msg;
1818 switch (status & 0x7f) {
1819 case 0x00: msg = "was never started"; break;
1820 case 0x02: msg = "was completed without error"; break;
1821 case 0x03: msg = "is in progress"; break;
1822 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1823 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1824 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1825 default: msg = nullptr;
1826 }
1827
1828 if (msg)
1829 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1830 "Device: %s, offline data collection %s%s\n", name, msg,
1831 ((status & 0x80) ? " (auto:on)" : ""));
1832 else
1833 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1834 name, status);
1835}
1836
1837// Log self-test execution status
1838static void log_self_test_exec_status(const char * name, unsigned char status)
1839{
1840 const char * msg;
1841 switch (status >> 4) {
1842 case 0x0: msg = "completed without error"; break;
1843 case 0x1: msg = "was aborted by the host"; break;
1844 case 0x2: msg = "was interrupted by the host with a reset"; break;
1845 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1846 case 0x4: msg = "completed with error (unknown test element)"; break;
1847 case 0x5: msg = "completed with error (electrical test element)"; break;
1848 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1849 case 0x7: msg = "completed with error (read test element)"; break;
1850 case 0x8: msg = "completed with error (handling damage?)"; break;
1851 default: msg = nullptr;
1852 }
1853
1854 if (msg)
1855 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1856 "Device: %s, previous self-test %s\n", name, msg);
1857 else if ((status >> 4) == 0xf)
1858 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1859 name, status & 0x0f);
1860 else
1861 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1862 name, status);
1863}
1864
1865// Check pending sector count id (-C, -U directives).
1866static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1867 unsigned char id, const char * msg)
1868{
1869 // Check attribute index
1870 int i = ata_find_attr_index(id, state.smartval);
1871 if (i < 0) {
1872 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1873 cfg.name.c_str(), msg, id);
1874 return false;
1875 }
1876
1877 // Check value
1878 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1879 cfg.attribute_defs);
1880 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1881 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1882 cfg.name.c_str(), msg, id, rawval, rawval);
1883 return false;
1884 }
1885
1886 return true;
1887}
1888
1889// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1890static void finish_device_scan(dev_config & cfg, dev_state & state)
1891{
1892 // Set cfg.emailfreq if user hasn't set it
1893 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && cfg.emailfreq == emailfreqs::unknown) {
1894 // Avoid that emails are suppressed forever due to state persistence
1895 if (cfg.state_file.empty())
1897 else
1899 }
1900
1901 // Start self-test regex check now if time was not read from state file
1902 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1903 state.scheduled_test_next_check = time(nullptr);
1904}
1905
1906// Common function to format result message for ATA setting
1907static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1908 int set_option = 0, bool has_value = false)
1909{
1910 if (!msg.empty())
1911 msg += ", ";
1912 msg += name;
1913 if (!ok)
1914 msg += ":--";
1915 else if (set_option < 0)
1916 msg += ":off";
1917 else if (has_value)
1918 msg += strprintf(":%d", set_option-1);
1919 else if (set_option > 0)
1920 msg += ":on";
1921}
1922
1923// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1924static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1925{
1926 if (!cfg.id_is_unique)
1927 return false;
1928
1929 for (const auto & prev_cfg : prev_cfgs) {
1930 if (!prev_cfg.id_is_unique)
1931 continue;
1932 if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
1933 continue;
1934
1935 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1936 cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
1937 return true;
1938 }
1939
1940 return false;
1941}
1942
1943// TODO: Add '-F swapid' directive
1944const bool fix_swapped_id = false;
1945
1946// scan to see what ata devices there are, and if they support SMART
1947static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1948 const dev_config_vector * prev_cfgs)
1949{
1950 int supported=0;
1951 struct ata_identify_device drive;
1952 const char *name = cfg.name.c_str();
1953 int retid;
1954
1955 // Device must be open
1956
1957 // Get drive identity structure
1958 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1959 if (retid<0)
1960 // Unable to read Identity structure
1961 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1962 else
1963 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1964 name, packetdevicetype(retid-1));
1965 CloseDevice(atadev, name);
1966 return 2;
1967 }
1968
1969 // Get drive identity, size and rotation rate (HDD/SSD)
1970 char model[40+1], serial[20+1], firmware[8+1];
1971 ata_format_id_string(model, drive.model, sizeof(model)-1);
1972 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1973 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1974
1975 ata_size_info sizes;
1976 ata_get_size_info(&drive, sizes);
1977 state.num_sectors = sizes.sectors;
1978 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1979
1980 char wwn[64]; wwn[0] = 0;
1981 unsigned oui = 0; uint64_t unique_id = 0;
1982 int naa = ata_get_wwn(&drive, oui, unique_id);
1983 if (naa >= 0)
1984 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1985
1986 // Format device id string for warning emails
1987 char cap[32];
1988 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1989 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1990 cfg.id_is_unique = true; // TODO: Check serial?
1992 cfg.id_is_unique = false;
1993
1994 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1995
1996 // Check for duplicates
1997 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1998 CloseDevice(atadev, name);
1999 return 1;
2000 }
2001
2002 // Show if device in database, and use preset vendor attribute
2003 // options unless user has requested otherwise.
2004 if (cfg.ignorepresets)
2005 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
2006 else {
2007 // Apply vendor specific presets, print warning if present
2008 std::string dbversion;
2010 &drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
2011 if (!dbentry)
2012 PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
2013 (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
2014 else {
2015 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
2016 name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
2017 (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
2018 if (*dbentry->warningmsg)
2019 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
2020 }
2021 }
2022
2023 // Check for ATA Security LOCK
2024 unsigned short word128 = drive.words088_255[128-88];
2025 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
2026 if (locked)
2027 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
2028
2029 // Set default '-C 197[+]' if no '-C ID' is specified.
2030 if (!cfg.curr_pending_set)
2032 // Set default '-U 198[+]' if no '-U ID' is specified.
2033 if (!cfg.offl_pending_set)
2035
2036 // If requested, show which presets would be used for this drive
2037 if (cfg.showpresets) {
2038 int savedebugmode=debugmode;
2039 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2040 if (!debugmode)
2041 debugmode=2;
2042 show_presets(&drive);
2043 debugmode=savedebugmode;
2044 }
2045
2046 // see if drive supports SMART
2047 supported=ataSmartSupport(&drive);
2048 if (supported!=1) {
2049 if (supported==0)
2050 // drive does NOT support SMART
2051 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2052 else
2053 // can't tell if drive supports SMART
2054 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2055
2056 // should we proceed anyway?
2057 if (cfg.permissive) {
2058 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2059 }
2060 else {
2061 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2062 CloseDevice(atadev, name);
2063 return 2;
2064 }
2065 }
2066
2067 if (ataEnableSmart(atadev)) {
2068 // Enable SMART command has failed
2069 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2070
2071 if (ataIsSmartEnabled(&drive) <= 0) {
2072 if (!cfg.permissive) {
2073 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2074 CloseDevice(atadev, name);
2075 return 2;
2076 }
2077 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2078 }
2079 else {
2080 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2081 }
2082 }
2083
2084 // disable device attribute autosave...
2085 if (cfg.autosave==1) {
2086 if (ataDisableAutoSave(atadev))
2087 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2088 else
2089 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2090 }
2091
2092 // or enable device attribute autosave
2093 if (cfg.autosave==2) {
2094 if (ataEnableAutoSave(atadev))
2095 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2096 else
2097 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2098 }
2099
2100 // capability check: SMART status
2101 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2102 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2103 cfg.smartcheck = false;
2104 }
2105
2106 // capability check: Read smart values and thresholds. Note that
2107 // smart values are ALSO needed even if we ONLY want to know if the
2108 // device is self-test log or error-log capable! After ATA-5, this
2109 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2110 // but sadly not for ATA-5. Sigh.
2111
2112 // do we need to get SMART data?
2113 bool smart_val_ok = false;
2114 if ( cfg.autoofflinetest || cfg.selftest
2115 || cfg.errorlog || cfg.xerrorlog
2116 || cfg.offlinests || cfg.selfteststs
2117 || cfg.usagefailed || cfg.prefail || cfg.usage
2118 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2119 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2120
2121 if (ataReadSmartValues(atadev, &state.smartval)) {
2122 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2123 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2124 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2125 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2126 }
2127 else {
2128 smart_val_ok = true;
2129 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2130 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2131 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2132 cfg.usagefailed = false;
2133 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2134 memset(&state.smartthres, 0, sizeof(state.smartthres));
2135 }
2136 }
2137
2138 // see if the necessary Attribute is there to monitor offline or
2139 // current pending sectors or temperature
2140 if ( cfg.curr_pending_id
2141 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2142 "Current_Pending_Sector"))
2143 cfg.curr_pending_id = 0;
2144
2145 if ( cfg.offl_pending_id
2146 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2147 "Offline_Uncorrectable"))
2148 cfg.offl_pending_id = 0;
2149
2150 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2152 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2153 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2154 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2155 }
2156
2157 // Report ignored '-r' or '-R' directives
2158 for (int id = 1; id <= 255; id++) {
2160 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2161 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2162 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2163
2164 int idx = ata_find_attr_index(id, state.smartval);
2165 if (idx < 0)
2166 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2167 else {
2168 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2169 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2170 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2171 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2172 }
2173 }
2174 }
2175 }
2176
2177 // enable/disable automatic on-line testing
2178 if (cfg.autoofflinetest) {
2179 // is this an enable or disable request?
2180 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2181 if (!smart_val_ok)
2182 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2183 else {
2184 // if command appears unsupported, issue a warning...
2185 if (!isSupportAutomaticTimer(&state.smartval))
2186 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2187 // ... but then try anyway
2188 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2189 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2190 else
2191 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2192 }
2193 }
2194
2195 // Read log directories if required for capability check
2196 ata_smart_log_directory smart_logdir, gp_logdir;
2197 bool smart_logdir_ok = false, gp_logdir_ok = false;
2198
2200 && (cfg.errorlog || cfg.selftest)
2201 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2202 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2203 smart_logdir_ok = true;
2204 }
2205
2206 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2207 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2208 gp_logdir_ok = true;
2209 }
2210
2211 // capability check: self-test-log
2212 state.selflogcount = 0; state.selfloghour = 0;
2213 if (cfg.selftest) {
2214 int retval;
2215 if (!( cfg.permissive
2216 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2217 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2218 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2219 cfg.selftest = false;
2220 }
2221 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2222 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2223 cfg.selftest = false;
2224 }
2225 else {
2226 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2227 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2228 }
2229 }
2230
2231 // capability check: ATA error log
2232 state.ataerrorcount = 0;
2233 if (cfg.errorlog) {
2234 int errcnt1;
2235 if (!( cfg.permissive
2236 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2237 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2238 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2239 cfg.errorlog = false;
2240 }
2241 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2242 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2243 cfg.errorlog = false;
2244 }
2245 else
2246 state.ataerrorcount = errcnt1;
2247 }
2248
2249 if (cfg.xerrorlog) {
2250 int errcnt2;
2251 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2252 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2253 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2254 name);
2255 cfg.xerrorlog = false;
2256 }
2257 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2258 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2259 cfg.xerrorlog = false;
2260 }
2261 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2262 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2263 name, state.ataerrorcount, errcnt2);
2264 // Record max error count
2265 if (errcnt2 > state.ataerrorcount)
2266 state.ataerrorcount = errcnt2;
2267 }
2268 else
2269 state.ataerrorcount = errcnt2;
2270 }
2271
2272 // capability check: self-test and offline data collection status
2273 if (cfg.offlinests || cfg.selfteststs) {
2274 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2275 if (cfg.offlinests)
2276 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2277 if (cfg.selfteststs)
2278 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2279 cfg.offlinests = cfg.selfteststs = false;
2280 }
2281 }
2282
2283 // capabilities check -- does it support powermode?
2284 if (cfg.powermode) {
2285 int powermode = ataCheckPowerMode(atadev);
2286
2287 if (-1 == powermode) {
2288 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2289 cfg.powermode=0;
2290 }
2291 else if (powermode!=0x00 && powermode!=0x01
2292 && powermode!=0x40 && powermode!=0x41
2293 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2294 && powermode!=0xff) {
2295 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2296 name, powermode);
2297 cfg.powermode=0;
2298 }
2299 }
2300
2301 // Apply ATA settings
2302 std::string msg;
2303
2304 if (cfg.set_aam)
2305 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2306 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2307 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2308
2309 if (cfg.set_apm)
2310 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2311 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2312 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2313
2314 if (cfg.set_lookahead)
2315 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2317 cfg.set_lookahead);
2318
2319 if (cfg.set_wcache)
2320 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2322
2323 if (cfg.set_dsn)
2324 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2325 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2326
2327 if (cfg.set_security_freeze)
2328 format_set_result_msg(msg, "Security freeze",
2330
2331 if (cfg.set_standby)
2332 format_set_result_msg(msg, "Standby",
2333 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2334
2335 // Report as one log entry
2336 if (!msg.empty())
2337 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2338
2339 // set SCT Error Recovery Control if requested
2340 if (cfg.sct_erc_set) {
2342 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2343 name);
2344 else if (locked)
2345 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2346 name);
2347 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
2348 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
2349 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2350 else
2351 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2352 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2353 }
2354
2355 // If no tests available or selected, return
2356 if (!( cfg.smartcheck || cfg.selftest
2357 || cfg.errorlog || cfg.xerrorlog
2358 || cfg.offlinests || cfg.selfteststs
2359 || cfg.usagefailed || cfg.prefail || cfg.usage
2360 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2361 CloseDevice(atadev, name);
2362 return 3;
2363 }
2364
2365 // tell user we are registering device
2366 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2367
2368 // close file descriptor
2369 CloseDevice(atadev, name);
2370
2371 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2372 // Build file name for state file
2373 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2374 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2375 if (!state_path_prefix.empty()) {
2376 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2377 // Read previous state
2378 if (read_dev_state(cfg.state_file.c_str(), state)) {
2379 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2380 // Copy ATA attribute values to temp state
2381 state.update_temp_state();
2382 }
2383 }
2384 if (!attrlog_path_prefix.empty())
2385 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2386 }
2387
2388 finish_device_scan(cfg, state);
2389
2390 return 0;
2391}
2392
2393// on success, return 0. On failure, return >0. Never return <0,
2394// please.
2395static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2396 const dev_config_vector * prev_cfgs)
2397{
2398 int err, req_len, avail_len, version, len;
2399 const char *device = cfg.name.c_str();
2400 struct scsi_iec_mode_page iec;
2401 uint8_t tBuf[64];
2402 uint8_t inqBuf[96];
2403 uint8_t vpdBuf[252];
2404 char lu_id[64], serial[256], vendor[40], model[40];
2405
2406 // Device must be open
2407 memset(inqBuf, 0, 96);
2408 req_len = 36;
2409 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2410 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2411 req_len = 64;
2412 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2413 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2414 "skip device\n", device);
2415 return 2;
2416 }
2417 }
2418 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2419
2420 avail_len = inqBuf[4] + 5;
2421 len = (avail_len < req_len) ? avail_len : req_len;
2422 if (len < 36) {
2423 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2424 "skip device\n", device);
2425 return 2;
2426 }
2427
2428 int pdt = inqBuf[0] & 0x1f;
2429
2430 switch (pdt) {
2432 case SCSI_PT_WO:
2433 case SCSI_PT_CDROM:
2434 case SCSI_PT_OPTICAL:
2435 case SCSI_PT_RBC: /* Reduced Block commands */
2436 case SCSI_PT_HOST_MANAGED: /* Zoned disk */
2437 break;
2438 default:
2439 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2440 "skip\n", device, pdt);
2441 return 2;
2442 }
2443
2445 delete supported_vpd_pages_p;
2446 supported_vpd_pages_p = nullptr;
2447 }
2449
2450 lu_id[0] = '\0';
2451 if (version >= 0x3) {
2452 /* SPC to SPC-5, assume SPC-6 is version==8 or higher */
2454 vpdBuf, sizeof(vpdBuf))) {
2455 len = vpdBuf[3];
2456 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
2457 }
2458 }
2459 serial[0] = '\0';
2461 vpdBuf, sizeof(vpdBuf))) {
2462 len = vpdBuf[3];
2463 vpdBuf[4 + len] = '\0';
2464 scsi_format_id_string(serial, &vpdBuf[4], len);
2465 }
2466
2467 char si_str[64];
2468 struct scsi_readcap_resp srr;
2469 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2470
2471 if (capacity)
2472 format_capacity(si_str, sizeof(si_str), capacity, ".");
2473 else
2474 si_str[0] = '\0';
2475
2476 // Format device id string for warning emails
2477 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2478 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2479 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2480 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2481 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2482 cfg.id_is_unique = (lu_id[0] || serial[0]);
2484 cfg.id_is_unique = false;
2485
2486 // format "model" string
2487 scsi_format_id_string(vendor, &inqBuf[8], 8);
2488 scsi_format_id_string(model, &inqBuf[16], 16);
2489 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2490
2491 // Check for duplicates
2492 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2493 CloseDevice(scsidev, device);
2494 return 1;
2495 }
2496
2497 // check that device is ready for commands. IE stores its stuff on
2498 // the media.
2499 if ((err = scsiTestUnitReady(scsidev))) {
2500 if (SIMPLE_ERR_NOT_READY == err)
2501 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2502 else if (SIMPLE_ERR_NO_MEDIUM == err)
2503 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2504 else if (SIMPLE_ERR_BECOMING_READY == err)
2505 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2506 else
2507 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2508 CloseDevice(scsidev, device);
2509 return 2;
2510 }
2511
2512 // Badly-conforming USB storage devices may fail this check.
2513 // The response to the following IE mode page fetch (current and
2514 // changeable values) is carefully examined. It has been found
2515 // that various USB devices that malform the response will lock up
2516 // if asked for a log page (e.g. temperature) so it is best to
2517 // bail out now.
2518 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2519 state.modese_len = iec.modese_len;
2520 else if (SIMPLE_ERR_BAD_FIELD == err)
2521 ; /* continue since it is reasonable not to support IE mpage */
2522 else { /* any other error (including malformed response) unreasonable */
2523 PrintOut(LOG_INFO,
2524 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2525 device, err);
2526 CloseDevice(scsidev, device);
2527 return 3;
2528 }
2529
2530 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2531 // smart if it is off). This may change to be the same as the ATA side.
2532 if (!scsi_IsExceptionControlEnabled(&iec)) {
2533 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2534 "Try 'smartctl -s on %s' to turn on SMART features\n",
2535 device, device);
2536 CloseDevice(scsidev, device);
2537 return 3;
2538 }
2539
2540 // Flag that certain log pages are supported (information may be
2541 // available from other sources).
2542 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2543 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2544 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2545 {
2546 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2547 switch (tBuf[k]) {
2548 case TEMPERATURE_LPAGE:
2549 state.TempPageSupported = 1;
2550 break;
2551 case IE_LPAGE:
2552 state.SmartPageSupported = 1;
2553 break;
2555 state.ReadECounterPageSupported = 1;
2556 break;
2559 break;
2562 break;
2565 break;
2566 default:
2567 break;
2568 }
2569 }
2570 }
2571
2572 // Check if scsiCheckIE() is going to work
2573 {
2574 uint8_t asc = 0;
2575 uint8_t ascq = 0;
2576 uint8_t currenttemp = 0;
2577 uint8_t triptemp = 0;
2578
2579 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2580 &asc, &ascq, &currenttemp, &triptemp)) {
2581 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2582 state.SuppressReport = 1;
2583 }
2584 if ( (state.SuppressReport || !currenttemp)
2585 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2586 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2587 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2588 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2589 }
2590 }
2591
2592 // capability check: self-test-log
2593 if (cfg.selftest){
2594 int retval = scsiCountFailedSelfTests(scsidev, 0);
2595 if (retval<0) {
2596 // no self-test log, turn off monitoring
2597 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2598 cfg.selftest = false;
2599 state.selflogcount = 0;
2600 state.selfloghour = 0;
2601 }
2602 else {
2603 // register starting values to watch for changes
2604 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2605 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2606 }
2607 }
2608
2609 // disable autosave (set GLTSD bit)
2610 if (cfg.autosave==1){
2611 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2612 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2613 else
2614 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2615 }
2616
2617 // or enable autosave (clear GLTSD bit)
2618 if (cfg.autosave==2){
2619 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2620 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2621 else
2622 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2623 }
2624
2625 // tell user we are registering device
2626 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2627
2628 // Make sure that init_standby_check() ignores SCSI devices
2629 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2630
2631 // close file descriptor
2632 CloseDevice(scsidev, device);
2633
2634 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2635 // Build file name for state file
2636 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2637 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2638 if (!state_path_prefix.empty()) {
2639 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2640 // Read previous state
2641 if (read_dev_state(cfg.state_file.c_str(), state)) {
2642 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2643 // Copy ATA attribute values to temp state
2644 state.update_temp_state();
2645 }
2646 }
2647 if (!attrlog_path_prefix.empty())
2648 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2649 }
2650
2651 finish_device_scan(cfg, state);
2652
2653 return 0;
2654}
2655
2656// Convert 128 bit LE integer to uint64_t or its max value on overflow.
2657static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2658{
2659 for (int i = 8; i < 16; i++) {
2660 if (val[i])
2661 return ~(uint64_t)0;
2662 }
2663 uint64_t lo = val[7];
2664 for (int i = 7-1; i >= 0; i--) {
2665 lo <<= 8; lo += val[i];
2666 }
2667 return lo;
2668}
2669
2670// Get max temperature in Kelvin reported in NVMe SMART/Health log.
2671static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2672{
2673 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2674 for (auto s : smart_log.temp_sensor) {
2675 if (s > k)
2676 k = s; // cppcheck-suppress useStlAlgorithm
2677 }
2678 return k;
2679}
2680
2681static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2682 const dev_config_vector * prev_cfgs)
2683{
2684 const char *name = cfg.name.c_str();
2685
2686 // Device must be open
2687
2688 // Get ID Controller
2689 nvme_id_ctrl id_ctrl;
2690 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2691 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2692 CloseDevice(nvmedev, name);
2693 return 2;
2694 }
2695
2696 // Get drive identity
2697 char model[40+1], serial[20+1], firmware[8+1];
2698 format_char_array(model, id_ctrl.mn);
2699 format_char_array(serial, id_ctrl.sn);
2700 format_char_array(firmware, id_ctrl.fr);
2701
2702 // Format device id string for warning emails
2703 char nsstr[32] = "", capstr[32] = "";
2704 unsigned nsid = nvmedev->get_nsid();
2705 if (nsid != 0xffffffff)
2706 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2707 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2708 if (capacity)
2709 format_capacity(capstr, sizeof(capstr), capacity, ".");
2710 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2711 nsstr, (capstr[0] ? ", " : ""), capstr);
2712 cfg.id_is_unique = true; // TODO: Check serial?
2714 cfg.id_is_unique = false;
2715
2716 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2717
2718 // Check for duplicates
2719 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2720 CloseDevice(nvmedev, name);
2721 return 1;
2722 }
2723
2724 // Read SMART/Health log
2725 nvme_smart_log smart_log;
2726 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2727 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2728 CloseDevice(nvmedev, name);
2729 return 2;
2730 }
2731
2732 // Check temperature sensor support
2733 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2734 if (!nvme_get_max_temp_kelvin(smart_log)) {
2735 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2736 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2737 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2738 }
2739 }
2740
2741 // Init total error count
2742 if (cfg.errorlog || cfg.xerrorlog) {
2744 }
2745
2746 // If no supported tests selected, return
2747 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2748 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2749 CloseDevice(nvmedev, name);
2750 return 3;
2751 }
2752
2753 // Tell user we are registering device
2754 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2755
2756 // Make sure that init_standby_check() ignores NVMe devices
2757 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2758
2759 CloseDevice(nvmedev, name);
2760
2761 if (!state_path_prefix.empty()) {
2762 // Build file name for state file
2763 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2764 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2765 nsstr[0] = 0;
2766 if (nsid != 0xffffffff)
2767 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2768 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2769 // Read previous state
2770 if (read_dev_state(cfg.state_file.c_str(), state))
2771 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2772 }
2773
2774 finish_device_scan(cfg, state);
2775
2776 return 0;
2777}
2778
2779// Open device for next check, return false on error
2780static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2781 const char * type)
2782{
2783 const char * name = cfg.name.c_str();
2784
2785 // If user has asked, test the email warning system
2786 if (cfg.emailtest)
2787 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2788
2789 // User may have requested (with the -n Directive) to leave the disk
2790 // alone if it is in idle or standby mode. In this case check the
2791 // power mode first before opening the device for full access,
2792 // and exit without check if disk is reported in standby.
2793 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2794 // Note that 'is_powered_down()' handles opening the device itself, and
2795 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2796 if (device->is_powered_down())
2797 {
2798 // skip at most powerskipmax checks
2799 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2800 // report first only except if state has changed, avoid waking up system disk
2801 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2802 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2803 state.lastpowermodeskipped = -1;
2804 }
2805 state.powerskipcnt++;
2806 return false;
2807 }
2808 }
2809 }
2810
2811 // if we can't open device, fail gracefully rather than hard --
2812 // perhaps the next time around we'll be able to open it
2813 if (!device->open()) {
2814 // For removable devices, print error message only once and suppress email
2815 if (!cfg.removable) {
2816 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2817 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2818 }
2819 else if (!state.removed) {
2820 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2821 state.removed = true;
2822 }
2823 else if (debugmode)
2824 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2825 return false;
2826 }
2827
2828 if (debugmode)
2829 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2830
2831 if (!cfg.removable)
2832 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2833 else if (state.removed) {
2834 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2835 state.removed = false;
2836 }
2837
2838 return true;
2839}
2840
2841// If the self-test log has got more self-test errors (or more recent
2842// self-test errors) recorded, then notify user.
2843static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2844{
2845 const char * name = cfg.name.c_str();
2846
2847 if (newi<0)
2848 // command failed
2849 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2850 else {
2851 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2852
2853 // old and new error counts
2854 int oldc=state.selflogcount;
2855 int newc=SELFTEST_ERRORCOUNT(newi);
2856
2857 // old and new error timestamps in hours
2858 int oldh=state.selfloghour;
2859 int newh=SELFTEST_ERRORHOURS(newi);
2860
2861 if (oldc<newc) {
2862 // increase in error count
2863 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2864 name, oldc, newc);
2865 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2866 name, oldc, newc);
2867 state.must_write = true;
2868 }
2869 else if (newc > 0 && oldh != newh) {
2870 // more recent error
2871 // a 'more recent' error might actually be a smaller hour number,
2872 // if the hour number has wrapped.
2873 // There's still a bug here. You might just happen to run a new test
2874 // exactly 32768 hours after the previous failure, and have run exactly
2875 // 20 tests between the two, in which case smartd will miss the
2876 // new failure.
2877 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2878 name, newh);
2879 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2880 name, newh);
2881 state.must_write = true;
2882 }
2883
2884 // Print info if error entries have disappeared
2885 // or newer successful successful extended self-test exits
2886 if (oldc > newc) {
2887 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2888 name, oldc, newc);
2889 if (newc == 0)
2890 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2891 }
2892
2893 // Needed since self-test error count may DECREASE. Hour might
2894 // also have changed.
2895 state.selflogcount= newc;
2896 state.selfloghour = newh;
2897 }
2898 return;
2899}
2900
2901// Test types, ordered by priority.
2902static const char test_type_chars[] = "LncrSCO";
2903static const unsigned num_test_types = sizeof(test_type_chars)-1;
2904
2905// returns test type if time to do test of type testtype,
2906// 0 if not time to do test.
2907static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2908{
2909 // check that self-testing has been requested
2910 if (cfg.test_regex.empty())
2911 return 0;
2912
2913 // Exit if drive not capable of any test
2914 if ( state.not_cap_long && state.not_cap_short &&
2915 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2916 return 0;
2917
2918 // since we are about to call localtime(), be sure glibc is informed
2919 // of any timezone changes we make.
2920 if (!usetime)
2922
2923 // Is it time for next check?
2924 time_t now = (!usetime ? time(nullptr) : usetime);
2925 if (now < state.scheduled_test_next_check) {
2926 if (state.scheduled_test_next_check <= now + 3600)
2927 return 0; // Next check within one hour
2928 // More than one hour, assume system clock time adjusted to the past
2929 state.scheduled_test_next_check = now;
2930 }
2931 else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
2932 // Limit time check interval to 90 days
2933 state.scheduled_test_next_check = now - (3600L*24*90);
2934 }
2935
2936 // Find ':NNN[-LLL]' in regex for possible offsets and limits
2937 const unsigned max_offsets = 1 + num_test_types;
2938 unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
2939 unsigned num_offsets = 1; // offsets/limits[0] == 0 always
2940 for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
2941 const char * q = strchr(p, ':');
2942 if (!q)
2943 break;
2944 p = q + 1;
2945 unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
2946 sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
2947 if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
2948 continue;
2949 offsets[num_offsets] = offset; limits[num_offsets] = limit;
2950 num_offsets++;
2951 p += (n3 > 0 ? n3 : n1);
2952 }
2953
2954 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2955 char testtype = 0;
2956 time_t testtime = 0; int testhour = 0;
2957 int maxtest = num_test_types-1;
2958
2959 for (time_t t = state.scheduled_test_next_check; ; ) {
2960 // Check offset 0 and then all offsets for ':NNN' found above
2961 for (unsigned i = 0; i < num_offsets; i++) {
2962 unsigned offset = offsets[i], limit = limits[i];
2963 unsigned delay = cfg.test_offset_factor * offset;
2964 if (0 < limit && limit < delay)
2965 delay %= limit + 1;
2966 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
2967
2968 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2969 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2970 for (int j = 0; j <= maxtest; j++) {
2971 // Skip if drive not capable of this test
2972 switch (test_type_chars[j]) {
2973 case 'L': if (state.not_cap_long) continue; break;
2974 case 'S': if (state.not_cap_short) continue; break;
2975 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2976 case 'O': if (scsi || state.not_cap_offline) continue; break;
2977 case 'c': case 'n':
2978 case 'r': if (scsi || state.not_cap_selective) continue; break;
2979 default: continue;
2980 }
2981 // Try match of "T/MM/DD/d/HH[:NNN]"
2982 char pattern[64];
2983 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2984 test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2985 if (i > 0) {
2986 const unsigned len = sizeof("S/01/01/1/01") - 1;
2987 snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
2988 if (limit > 0)
2989 snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
2990 }
2991 if (cfg.test_regex.full_match(pattern)) {
2992 // Test found
2993 testtype = pattern[0];
2994 testtime = t; testhour = tms->tm_hour;
2995 // Limit further matches to higher priority self-tests
2996 maxtest = j-1;
2997 break;
2998 }
2999 }
3000 }
3001
3002 // Exit if no tests left or current time reached
3003 if (maxtest < 0)
3004 break;
3005 if (t >= now)
3006 break;
3007 // Check next hour
3008 if ((t += 3600) > now)
3009 t = now;
3010 }
3011
3012 // Do next check not before next hour.
3013 struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
3014 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
3015
3016 if (testtype) {
3017 state.must_write = true;
3018 // Tell user if an old test was found.
3019 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
3020 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
3021 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
3022 cfg.name.c_str(), testtype, datebuf);
3023 }
3024 }
3025
3026 return testtype;
3027}
3028
3029// Print a list of future tests.
3031{
3032 unsigned numdev = configs.size();
3033 if (!numdev)
3034 return;
3035 std::vector<int> testcnts(numdev * num_test_types, 0);
3036
3037 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
3038
3039 // FixGlibcTimeZoneBug(); // done in PrintOut()
3040 time_t now = time(nullptr);
3041 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3042 dateandtimezoneepoch(datenow, now);
3043
3044 long seconds;
3045 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3046 // Check for each device whether a test will be run
3047 time_t testtime = now + seconds;
3048 for (unsigned i = 0; i < numdev; i++) {
3049 const dev_config & cfg = configs.at(i);
3050 dev_state & state = states.at(i);
3051 const char * p;
3052 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3053 if (testtype && (p = strchr(test_type_chars, testtype))) {
3054 unsigned t = (p - test_type_chars);
3055 // Report at most 5 tests of each type
3056 if (++testcnts[i*num_test_types + t] <= 5) {
3057 dateandtimezoneepoch(date, testtime);
3058 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3059 testcnts[i*num_test_types + t], testtype, date);
3060 }
3061 }
3062 }
3063 }
3064
3065 // Report totals
3066 dateandtimezoneepoch(date, now+seconds);
3067 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3068 for (unsigned i = 0; i < numdev; i++) {
3069 const dev_config & cfg = configs.at(i);
3070 bool scsi = devices.at(i)->is_scsi();
3071 for (unsigned t = 0; t < num_test_types; t++) {
3072 int cnt = testcnts[i*num_test_types + t];
3073 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3074 continue;
3075 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3076 cnt, (cnt==1?"":"s"), test_type_chars[t]);
3077 }
3078 }
3079
3080}
3081
3082// Return zero on success, nonzero on failure. Perform offline (background)
3083// short or long (extended) self test on given scsi device.
3084static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3085{
3086 int retval = 0;
3087 const char *testname = nullptr;
3088 const char *name = cfg.name.c_str();
3089 int inProgress;
3090
3091 if (scsiSelfTestInProgress(device, &inProgress)) {
3092 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3093 state.not_cap_short = state.not_cap_long = true;
3094 return 1;
3095 }
3096
3097 if (1 == inProgress) {
3098 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3099 "progress.\n", name);
3100 return 1;
3101 }
3102
3103 switch (testtype) {
3104 case 'S':
3105 testname = "Short Self";
3106 retval = scsiSmartShortSelfTest(device);
3107 break;
3108 case 'L':
3109 testname = "Long Self";
3110 retval = scsiSmartExtendSelfTest(device);
3111 break;
3112 }
3113 // If we can't do the test, exit
3114 if (!testname) {
3115 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3116 testtype);
3117 return 1;
3118 }
3119 if (retval) {
3120 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3121 (SIMPLE_ERR_BAD_FIELD == retval)) {
3122 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3123 testname);
3124 if ('L'==testtype)
3125 state.not_cap_long = true;
3126 else
3127 state.not_cap_short = true;
3128
3129 return 1;
3130 }
3131 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3132 testname, retval);
3133 return 1;
3134 }
3135
3136 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3137
3138 return 0;
3139}
3140
3141// Do an offline immediate or self-test. Return zero on success,
3142// nonzero on failure.
3143static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3144{
3145 const char *name = cfg.name.c_str();
3146
3147 // Read current smart data and check status/capability
3148 struct ata_smart_values data;
3149 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3150 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3151 return 1;
3152 }
3153
3154 // Check for capability to do the test
3155 int dotest = -1, mode = 0;
3156 const char *testname = nullptr;
3157 switch (testtype) {
3158 case 'O':
3159 testname="Offline Immediate ";
3161 dotest=OFFLINE_FULL_SCAN;
3162 else
3163 state.not_cap_offline = true;
3164 break;
3165 case 'C':
3166 testname="Conveyance Self-";
3168 dotest=CONVEYANCE_SELF_TEST;
3169 else
3170 state.not_cap_conveyance = true;
3171 break;
3172 case 'S':
3173 testname="Short Self-";
3174 if (isSupportSelfTest(&data))
3175 dotest=SHORT_SELF_TEST;
3176 else
3177 state.not_cap_short = true;
3178 break;
3179 case 'L':
3180 testname="Long Self-";
3181 if (isSupportSelfTest(&data))
3182 dotest=EXTEND_SELF_TEST;
3183 else
3184 state.not_cap_long = true;
3185 break;
3186
3187 case 'c': case 'n': case 'r':
3188 testname = "Selective Self-";
3190 dotest = SELECTIVE_SELF_TEST;
3191 switch (testtype) {
3192 case 'c': mode = SEL_CONT; break;
3193 case 'n': mode = SEL_NEXT; break;
3194 case 'r': mode = SEL_REDO; break;
3195 }
3196 }
3197 else
3198 state.not_cap_selective = true;
3199 break;
3200 }
3201
3202 // If we can't do the test, exit
3203 if (dotest<0) {
3204 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3205 return 1;
3206 }
3207
3208 // If currently running a self-test, do not interrupt it to start another.
3209 if (15==(data.self_test_exec_status >> 4)) {
3210 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3211 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3212 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3213 } else {
3214 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3215 name, testname, (int)(data.self_test_exec_status & 0x0f));
3216 return 1;
3217 }
3218 }
3219
3220 if (dotest == SELECTIVE_SELF_TEST) {
3221 // Set test span
3222 ata_selective_selftest_args selargs, prev_args;
3223 selargs.num_spans = 1;
3224 selargs.span[0].mode = mode;
3225 prev_args.num_spans = 1;
3226 prev_args.span[0].start = state.selective_test_last_start;
3227 prev_args.span[0].end = state.selective_test_last_end;
3228 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3229 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3230 return 1;
3231 }
3232 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3233 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3234 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3235 start, end, end - start + 1,
3236 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3237 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3238 state.selective_test_last_start = start;
3239 state.selective_test_last_end = end;
3240 }
3241
3242 // execute the test, and return status
3243 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, nullptr);
3244 if (retval) {
3245 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3246 return retval;
3247 }
3248
3249 // Report recent test start to do_disable_standby_check()
3250 // and force log of next test status
3251 if (testtype == 'O')
3252 state.offline_started = true;
3253 else
3254 state.selftest_started = true;
3255
3256 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3257 return 0;
3258}
3259
3260// Check pending sector count attribute values (-C, -U directives).
3261static void check_pending(const dev_config & cfg, dev_state & state,
3262 unsigned char id, bool increase_only,
3263 const ata_smart_values & smartval,
3264 int mailtype, const char * msg)
3265{
3266 // Find attribute index
3267 int i = ata_find_attr_index(id, smartval);
3268 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3269 return;
3270
3271 // No report if no sectors pending.
3272 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3273 if (rawval == 0) {
3274 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3275 return;
3276 }
3277
3278 // If attribute is not reset, report only sector count increases.
3279 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3280 if (!(!increase_only || prev_rawval < rawval))
3281 return;
3282
3283 // Format message.
3284 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3285 if (prev_rawval > 0 && rawval != prev_rawval)
3286 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3287
3288 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3289 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3290 state.must_write = true;
3291}
3292
3293// Format Temperature value
3294static const char * fmt_temp(unsigned char x, char (& buf)[20])
3295{
3296 if (!x) // unset
3297 return "??";
3298 snprintf(buf, sizeof(buf), "%u", x);
3299 return buf;
3300}
3301
3302// Check Temperature limits
3303static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3304{
3305 if (!(0 < currtemp && currtemp < 255)) {
3306 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3307 return;
3308 }
3309
3310 // Update Max Temperature
3311 const char * minchg = "", * maxchg = "";
3312 if (currtemp > state.tempmax) {
3313 if (state.tempmax)
3314 maxchg = "!";
3315 state.tempmax = currtemp;
3316 state.must_write = true;
3317 }
3318
3319 char buf[20];
3320 if (!state.temperature) {
3321 // First check
3322 if (!state.tempmin || currtemp < state.tempmin)
3323 // Delay Min Temperature update by ~ 30 minutes.
3324 state.tempmin_delay = time(nullptr) + default_checktime - 60;
3325 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3326 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3327 if (triptemp)
3328 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3329 state.temperature = currtemp;
3330 }
3331 else {
3332 if (state.tempmin_delay) {
3333 // End Min Temperature update delay if ...
3334 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3335 || (state.tempmin_delay <= time(nullptr))) { // or delay time is over.
3336 state.tempmin_delay = 0;
3337 if (!state.tempmin)
3338 state.tempmin = 255;
3339 }
3340 }
3341
3342 // Update Min Temperature
3343 if (!state.tempmin_delay && currtemp < state.tempmin) {
3344 state.tempmin = currtemp;
3345 state.must_write = true;
3346 if (currtemp != state.temperature)
3347 minchg = "!";
3348 }
3349
3350 // Track changes
3351 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3352 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3353 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3354 state.temperature = currtemp;
3355 }
3356 }
3357
3358 // Check limits
3359 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3360 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3361 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3362 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3363 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3364 }
3365 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3366 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3367 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3368 }
3369 else if (cfg.tempcrit) {
3370 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3371 if (currtemp < limit)
3372 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3373 }
3374}
3375
3376// Check normalized and raw attribute values.
3377static void check_attribute(const dev_config & cfg, dev_state & state,
3378 const ata_smart_attribute & attr,
3379 const ata_smart_attribute & prev,
3380 int attridx,
3381 const ata_smart_threshold_entry * thresholds)
3382{
3383 // Check attribute and threshold
3384 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3385 if (attrstate == ATTRSTATE_NON_EXISTING)
3386 return;
3387
3388 // If requested, check for usage attributes that have failed.
3389 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3391 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3392 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3393 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3394 state.must_write = true;
3395 }
3396
3397 // Return if we're not tracking this type of attribute
3398 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3399 if (!( ( prefail && cfg.prefail)
3400 || (!prefail && cfg.usage )))
3401 return;
3402
3403 // Return if '-I ID' was specified
3405 return;
3406
3407 // Issue warning if they don't have the same ID in all structures.
3408 if (attr.id != prev.id) {
3409 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3410 cfg.name.c_str(), attr.id, prev.id);
3411 return;
3412 }
3413
3414 // Compare normalized values if valid.
3415 bool valchanged = false;
3416 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3417 if (attr.current != prev.current)
3418 valchanged = true;
3419 }
3420
3421 // Compare raw values if requested.
3422 bool rawchanged = false;
3423 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3426 rawchanged = true;
3427 }
3428
3429 // Return if no change
3430 if (!(valchanged || rawchanged))
3431 return;
3432
3433 // Format value strings
3434 std::string currstr, prevstr;
3435 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3436 // Print raw values only
3437 currstr = strprintf("%s (Raw)",
3438 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3439 prevstr = strprintf("%s (Raw)",
3440 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3441 }
3442 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3443 // Print normalized and raw values
3444 currstr = strprintf("%d [Raw %s]", attr.current,
3445 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3446 prevstr = strprintf("%d [Raw %s]", prev.current,
3447 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3448 }
3449 else {
3450 // Print normalized values only
3451 currstr = strprintf("%d", attr.current);
3452 prevstr = strprintf("%d", prev.current);
3453 }
3454
3455 // Format message
3456 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3457 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3458 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3459 prevstr.c_str(), currstr.c_str());
3460
3461 // Report this change as critical ?
3462 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3463 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3464 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3465 MailWarning(cfg, state, 2, "%s", msg.c_str());
3466 }
3467 else {
3468 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3469 }
3470 state.must_write = true;
3471}
3472
3473
3474static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3475 bool firstpass, bool allow_selftests)
3476{
3477 if (!open_device(cfg, state, atadev, "ATA"))
3478 return 1;
3479
3480 const char * name = cfg.name.c_str();
3481
3482 // user may have requested (with the -n Directive) to leave the disk
3483 // alone if it is in idle or sleeping mode. In this case check the
3484 // power mode and exit without check if needed
3485 if (cfg.powermode && !state.powermodefail) {
3486 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3487 const char * mode = 0;
3488 if (0 <= powermode && powermode < 0xff) {
3489 // wait for possible spin up and check again
3490 int powermode2;
3491 sleep(5);
3492 powermode2 = ataCheckPowerMode(atadev);
3493 if (powermode2 > powermode)
3494 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3495 powermode = powermode2;
3496 }
3497
3498 switch (powermode){
3499 case -1:
3500 // SLEEP
3501 mode="SLEEP";
3502 if (cfg.powermode>=1)
3503 dontcheck=1;
3504 break;
3505 case 0x00:
3506 // STANDBY
3507 mode="STANDBY";
3508 if (cfg.powermode>=2)
3509 dontcheck=1;
3510 break;
3511 case 0x01:
3512 // STANDBY_Y
3513 mode="STANDBY_Y";
3514 if (cfg.powermode>=2)
3515 dontcheck=1;
3516 break;
3517 case 0x80:
3518 // IDLE
3519 mode="IDLE";
3520 if (cfg.powermode>=3)
3521 dontcheck=1;
3522 break;
3523 case 0x81:
3524 // IDLE_A
3525 mode="IDLE_A";
3526 if (cfg.powermode>=3)
3527 dontcheck=1;
3528 break;
3529 case 0x82:
3530 // IDLE_B
3531 mode="IDLE_B";
3532 if (cfg.powermode>=3)
3533 dontcheck=1;
3534 break;
3535 case 0x83:
3536 // IDLE_C
3537 mode="IDLE_C";
3538 if (cfg.powermode>=3)
3539 dontcheck=1;
3540 break;
3541 case 0xff:
3542 // ACTIVE/IDLE
3543 case 0x40:
3544 // ACTIVE
3545 case 0x41:
3546 // ACTIVE
3547 mode="ACTIVE or IDLE";
3548 break;
3549 default:
3550 // UNKNOWN
3551 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3552 name, powermode);
3553 state.powermodefail = true;
3554 break;
3555 }
3556
3557 // if we are going to skip a check, return now
3558 if (dontcheck){
3559 // skip at most powerskipmax checks
3560 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3561 CloseDevice(atadev, name);
3562 // report first only except if state has changed, avoid waking up system disk
3563 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3564 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3565 state.lastpowermodeskipped = powermode;
3566 }
3567 state.powerskipcnt++;
3568 return 0;
3569 }
3570 else {
3571 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3572 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3573 }
3574 state.powerskipcnt = 0;
3575 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3576 }
3577 else if (state.powerskipcnt) {
3578 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3579 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3580 state.powerskipcnt = 0;
3581 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3582 }
3583 }
3584
3585 // check smart status
3586 if (cfg.smartcheck) {
3587 int status=ataSmartStatus2(atadev);
3588 if (status==-1){
3589 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3590 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3591 state.must_write = true;
3592 }
3593 else if (status==1){
3594 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3595 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3596 state.must_write = true;
3597 }
3598 }
3599
3600 // Check everything that depends upon SMART Data (eg, Attribute values)
3601 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3602 || cfg.curr_pending_id || cfg.offl_pending_id
3603 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3604 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3605
3606 // Read current attribute values.
3607 ata_smart_values curval;
3608 if (ataReadSmartValues(atadev, &curval)){
3609 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3610 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3611 state.must_write = true;
3612 }
3613 else {
3614 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3615
3616 // look for current or offline pending sectors
3617 if (cfg.curr_pending_id)
3618 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3619 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3620 : "Total unreadable (pending) sectors" ));
3621
3622 if (cfg.offl_pending_id)
3623 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3624 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3625 : "Total offline uncorrectable sectors"));
3626
3627 // check temperature limits
3628 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3629 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3630
3631 // look for failed usage attributes, or track usage or prefail attributes
3632 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3633 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3634 check_attribute(cfg, state,
3635 curval.vendor_attributes[i],
3636 state.smartval.vendor_attributes[i],
3637 i, state.smartthres.thres_entries);
3638 }
3639 }
3640
3641 // Log changes of offline data collection status
3642 if (cfg.offlinests) {
3645 || state.offline_started // test was started in previous call
3646 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3648 }
3649
3650 // Log changes of self-test execution status
3651 if (cfg.selfteststs) {
3653 || state.selftest_started // test was started in previous call
3654 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3656 }
3657
3658 // Save the new values for the next time around
3659 state.smartval = curval;
3660 }
3661 }
3662 state.offline_started = state.selftest_started = false;
3663
3664 // check if number of selftest errors has increased (note: may also DECREASE)
3665 if (cfg.selftest)
3666 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3667
3668 // check if number of ATA errors has increased
3669 if (cfg.errorlog || cfg.xerrorlog) {
3670
3671 int errcnt1 = -1, errcnt2 = -1;
3672 if (cfg.errorlog)
3673 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3674 if (cfg.xerrorlog)
3675 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3676
3677 // new number of errors is max of both logs
3678 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3679
3680 // did command fail?
3681 if (newc<0)
3682 // lack of PrintOut here is INTENTIONAL
3683 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3684
3685 // has error count increased?
3686 int oldc = state.ataerrorcount;
3687 if (newc>oldc){
3688 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3689 name, oldc, newc);
3690 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3691 name, oldc, newc);
3692 state.must_write = true;
3693 }
3694
3695 if (newc>=0)
3696 state.ataerrorcount=newc;
3697 }
3698
3699 // if the user has asked, and device is capable (or we're not yet
3700 // sure) check whether a self test should be done now.
3701 if (allow_selftests && !cfg.test_regex.empty()) {
3702 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3703 if (testtype)
3704 DoATASelfTest(cfg, state, atadev, testtype);
3705 }
3706
3707 // Don't leave device open -- the OS/user may want to access it
3708 // before the next smartd cycle!
3709 CloseDevice(atadev, name);
3710
3711 // Copy ATA attribute values to persistent state
3713
3714 state.attrlog_dirty = true;
3715 return 0;
3716}
3717
3718static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3719{
3720 if (!open_device(cfg, state, scsidev, "SCSI"))
3721 return 1;
3722
3723 const char * name = cfg.name.c_str();
3724
3725 uint8_t asc = 0, ascq = 0;
3726 uint8_t currenttemp = 0, triptemp = 0;
3727 if (!state.SuppressReport) {
3728 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3729 &asc, &ascq, &currenttemp, &triptemp)) {
3730 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3731 name);
3732 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3733 state.SuppressReport = 1;
3734 }
3735 }
3736 if (asc > 0) {
3737 char b[128];
3738 const char * cp = scsiGetIEString(asc, ascq, b, sizeof(b));
3739
3740 if (cp) {
3741 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3742 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3743 } else if (asc == 4 && ascq == 9) {
3744 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3745 } else if (debugmode)
3746 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3747 name, (int)asc, (int)ascq);
3748 } else if (debugmode)
3749 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3750
3751 // check temperature limits
3752 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3753 CheckTemperature(cfg, state, currenttemp, triptemp);
3754
3755 // check if number of selftest errors has increased (note: may also DECREASE)
3756 if (cfg.selftest)
3757 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3758
3759 if (allow_selftests && !cfg.test_regex.empty()) {
3760 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3761 if (testtype)
3762 DoSCSISelfTest(cfg, state, scsidev, testtype);
3763 }
3764 if (!cfg.attrlog_file.empty()){
3765 // saving error counters to state
3766 uint8_t tBuf[252];
3767 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3768 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3771 state.scsi_error_counters[0].found=1;
3772 }
3773 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3774 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3777 state.scsi_error_counters[1].found=1;
3778 }
3779 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3780 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3783 state.scsi_error_counters[2].found=1;
3784 }
3785 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3786 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3790 }
3791 // store temperature if not done by CheckTemperature() above
3792 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3793 state.temperature = currenttemp;
3794 }
3795 CloseDevice(scsidev, name);
3796 state.attrlog_dirty = true;
3797 return 0;
3798}
3799
3800static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3801{
3802 if (!open_device(cfg, state, nvmedev, "NVMe"))
3803 return 1;
3804
3805 const char * name = cfg.name.c_str();
3806
3807 // Read SMART/Health log
3808 nvme_smart_log smart_log;
3809 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3810 CloseDevice(nvmedev, name);
3811 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3812 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3813 state.must_write = true;
3814 return 0;
3815 }
3816
3817 // Check Critical Warning bits
3818 if (cfg.smartcheck && smart_log.critical_warning) {
3819 unsigned char w = smart_log.critical_warning;
3820 std::string msg;
3821 static const char * const wnames[] =
3822 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3823
3824 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3825 if (!(w & (1 << b)))
3826 continue;
3827 if (cnt)
3828 msg += ", ";
3829 if (++cnt > 3) {
3830 msg += "..."; break;
3831 }
3832 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3833 msg += "*Unknown*"; break;
3834 }
3835 msg += wnames[b];
3836 }
3837
3838 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3839 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3840 state.must_write = true;
3841 }
3842
3843 // Check temperature limits
3844 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3845 int k = nvme_get_max_temp_kelvin(smart_log);
3846 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3847 int c = k - 273;
3848 if (c < 1)
3849 c = 1;
3850 else if (c > 0xff)
3851 c = 0xff;
3852 CheckTemperature(cfg, state, c, 0);
3853 }
3854
3855 // Check if number of errors has increased
3856 if (cfg.errorlog || cfg.xerrorlog) {
3857 uint64_t oldcnt = state.nvme_err_log_entries;
3858 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3859 if (newcnt > oldcnt) {
3860 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3861 name, oldcnt, newcnt);
3862 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3863 name, oldcnt, newcnt);
3864 state.must_write = true;
3865 }
3866 state.nvme_err_log_entries = newcnt;
3867 }
3868
3869 CloseDevice(nvmedev, name);
3870 state.attrlog_dirty = true;
3871 return 0;
3872}
3873
3874// 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3876
3878{
3879 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3880 bool sts1 = false, sts2 = false;
3881 for (const auto & cfg : configs) {
3882 if (cfg.offlinests_ns)
3883 sts1 = true;
3884 if (cfg.selfteststs_ns)
3885 sts2 = true;
3886 }
3887
3888 // Check for support of disable auto standby
3889 // Reenable standby if smartd.conf was reread
3890 if (sts1 || sts2 || standby_disable_state == 3) {
3891 if (!smi()->disable_system_auto_standby(false)) {
3892 if (standby_disable_state == 3)
3893 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3894 if (sts1 || sts2) {
3895 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3896 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3897 sts1 = sts2 = false;
3898 }
3899 }
3900 }
3901
3902 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3903}
3904
3905static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3906{
3908 return;
3909
3910 // Check for just started or still running self-tests
3911 bool running = false;
3912 for (unsigned i = 0; i < configs.size() && !running; i++) {
3913 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3914
3915 if ( ( cfg.offlinests_ns
3916 && (state.offline_started ||
3918 || ( cfg.selfteststs_ns
3919 && (state.selftest_started ||
3921 running = true;
3922 // state.offline/selftest_started will be reset after next logging of test status
3923 }
3924
3925 // Disable/enable auto standby and log state changes
3926 if (!running) {
3927 if (standby_disable_state != 1) {
3928 if (!smi()->disable_system_auto_standby(false))
3929 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3930 smi()->get_errmsg());
3931 else
3932 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3934 }
3935 }
3936 else if (!smi()->disable_system_auto_standby(true)) {
3937 if (standby_disable_state != 2) {
3938 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3939 smi()->get_errmsg());
3941 }
3942 }
3943 else {
3944 if (standby_disable_state != 3) {
3945 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3947 }
3948 }
3949}
3950
3951// Checks the SMART status of all ATA and SCSI devices
3952static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3953 smart_device_list & devices, bool firstpass, bool allow_selftests)
3954{
3955 for (unsigned i = 0; i < configs.size(); i++) {
3956 const dev_config & cfg = configs.at(i);
3957 dev_state & state = states.at(i);
3958 if (state.skip) {
3959 if (debugmode)
3960 PrintOut(LOG_INFO, "Device: %s, skipped (interval=%d)\n", cfg.name.c_str(),
3961 (cfg.checktime ? cfg.checktime : checktime));
3962 continue;
3963 }
3964
3965 smart_device * dev = devices.at(i);
3966 if (dev->is_ata())
3967 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3968 else if (dev->is_scsi())
3969 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3970 else if (dev->is_nvme())
3971 NVMeCheckDevice(cfg, state, dev->to_nvme());
3972
3973 // Prevent systemd unit startup timeout when checking many devices on startup
3975 }
3976
3977 do_disable_standby_check(configs, states);
3978}
3979
3980// Install all signal handlers
3982{
3983 // normal and abnormal exit
3986
3987 // in debug mode, <CONTROL-C> ==> HUP
3989
3990 // Catch HUP and USR1
3993#ifdef _WIN32
3994 set_signal_if_not_ignored(SIGUSR2, USR2handler);
3995#endif
3996}
3997
3998#ifdef _WIN32
3999// Toggle debug mode implemented for native windows only
4000// (there is no easy way to reopen tty on *nix)
4001static void ToggleDebugMode()
4002{
4003 if (!debugmode) {
4004 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
4005 if (!daemon_enable_console("smartd [Debug]")) {
4006 debugmode = 1;
4007 daemon_signal(SIGINT, HUPhandler);
4008 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
4009 }
4010 else
4011 PrintOut(LOG_INFO,"enable console failed\n");
4012 }
4013 else if (debugmode == 1) {
4014 daemon_disable_console();
4015 debugmode = 0;
4016 daemon_signal(SIGINT, sighandler);
4017 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
4018 }
4019 else
4020 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
4021}
4022#endif
4023
4024time_t calc_next_wakeuptime(time_t wakeuptime, time_t timenow, int ct)
4025{
4026 if (timenow < wakeuptime)
4027 return wakeuptime;
4028 return timenow + ct - (timenow - wakeuptime) % ct;
4029}
4030
4031static time_t dosleep(time_t wakeuptime, const dev_config_vector & configs,
4032 dev_state_vector & states, bool & sigwakeup)
4033{
4034 // If past wake-up-time, compute next wake-up-time
4035 time_t timenow = time(nullptr);
4036 unsigned n = configs.size();
4037 int ct;
4038 if (!checktime_min) {
4039 // Same for all devices
4040 wakeuptime = calc_next_wakeuptime(wakeuptime, timenow, checktime);
4041 ct = checktime;
4042 }
4043 else {
4044 // Determine wakeuptime of next device(s)
4045 wakeuptime = 0;
4046 for (unsigned i = 0; i < n; i++) {
4047 const dev_config & cfg = configs.at(i);
4048 dev_state & state = states.at(i);
4049 if (!state.skip)
4050 state.wakeuptime = calc_next_wakeuptime((state.wakeuptime ? state.wakeuptime : timenow),
4051 timenow, (cfg.checktime ? cfg.checktime : checktime));
4052 if (!wakeuptime || state.wakeuptime < wakeuptime)
4053 wakeuptime = state.wakeuptime;
4054 }
4055 ct = checktime_min;
4056 }
4057
4058 notify_wait(wakeuptime, n);
4059
4060 // Sleep until we catch a signal or have completed sleeping
4061 bool no_skip = false;
4062 int addtime = 0;
4063 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
4064 // Restart if system clock has been adjusted to the past
4065 if (wakeuptime > timenow + ct) {
4066 PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
4067 wakeuptime = timenow + ct;
4068 for (auto & state : states)
4069 state.wakeuptime = 0;
4070 no_skip = true;
4071 }
4072
4073 // Exit sleep when time interval has expired or a signal is received
4074 sleep(wakeuptime+addtime-timenow);
4075
4076#ifdef _WIN32
4077 // toggle debug mode?
4078 if (caughtsigUSR2) {
4079 ToggleDebugMode();
4080 caughtsigUSR2 = 0;
4081 }
4082#endif
4083
4084 timenow = time(nullptr);
4085
4086 // Actual sleep time too long?
4087 if (!addtime && timenow > wakeuptime+60) {
4088 if (debugmode)
4089 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4090 (int)(timenow-wakeuptime));
4091 // Wait another 20 seconds to avoid I/O errors during disk spin-up
4092 addtime = timenow-wakeuptime+20;
4093 // Use next wake-up-time if close
4094 int nextcheck = ct - addtime % ct;
4095 if (nextcheck <= 20)
4096 addtime += nextcheck;
4097 }
4098 }
4099
4100 // if we caught a SIGUSR1 then print message and clear signal
4101 if (caughtsigUSR1){
4102 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4103 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4104 caughtsigUSR1=0;
4105 sigwakeup = no_skip = true;
4106 }
4107
4108 // Check which devices must be skipped in this cycle
4109 if (checktime_min) {
4110 for (auto & state : states)
4111 state.skip = (!no_skip && timenow < state.wakeuptime);
4112 }
4113
4114 // return adjusted wakeuptime
4115 return wakeuptime;
4116}
4117
4118// Print out a list of valid arguments for the Directive d
4119static void printoutvaliddirectiveargs(int priority, char d)
4120{
4121 switch (d) {
4122 case 'n':
4123 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4124 break;
4125 case 's':
4126 PrintOut(priority, "valid_regular_expression");
4127 break;
4128 case 'd':
4129 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4130 break;
4131 case 'T':
4132 PrintOut(priority, "normal, permissive");
4133 break;
4134 case 'o':
4135 case 'S':
4136 PrintOut(priority, "on, off");
4137 break;
4138 case 'l':
4139 PrintOut(priority, "error, selftest");
4140 break;
4141 case 'M':
4142 PrintOut(priority, "\"once\", \"always\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4143 break;
4144 case 'v':
4145 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4146 break;
4147 case 'P':
4148 PrintOut(priority, "use, ignore, show, showall");
4149 break;
4150 case 'F':
4151 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4152 break;
4153 case 'e':
4154 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4155 "security-freeze, standby,[N|off], wcache,[on|off]");
4156 break;
4157 case 'c':
4158 PrintOut(priority, "i=N, interval=N");
4159 break;
4160 }
4161}
4162
4163// exits with an error message, or returns integer value of token
4164static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4165 int min, int max, char * suffix = 0)
4166{
4167 // make sure argument is there
4168 if (!arg) {
4169 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4170 cfgfile, lineno, name, token, min, max);
4171 return -1;
4172 }
4173
4174 // get argument value (base 10), check that it's integer, and in-range
4175 char *endptr;
4176 int val = strtol(arg,&endptr,10);
4177
4178 // optional suffix present?
4179 if (suffix) {
4180 if (!strcmp(endptr, suffix))
4181 endptr += strlen(suffix);
4182 else
4183 *suffix = 0;
4184 }
4185
4186 if (!(!*endptr && min <= val && val <= max)) {
4187 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4188 cfgfile, lineno, name, token, arg, min, max);
4189 return -1;
4190 }
4191
4192 // all is well; return value
4193 return val;
4194}
4195
4196
4197// Get 1-3 small integer(s) for '-W' directive
4198static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4199 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4200{
4201 unsigned v1 = 0, v2 = 0, v3 = 0;
4202 int n1 = -1, n2 = -1, n3 = -1, len;
4203 if (!arg) {
4204 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4205 cfgfile, lineno, name, token);
4206 return -1;
4207 }
4208
4209 len = strlen(arg);
4210 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4211 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4212 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4213 cfgfile, lineno, name, token, arg);
4214 return -1;
4215 }
4216 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4217 return 0;
4218}
4219
4220
4221#ifdef _WIN32
4222
4223// Concatenate strtok() results if quoted with "..."
4224static const char * strtok_dequote(const char * delimiters)
4225{
4226 const char * t = strtok(nullptr, delimiters);
4227 if (!t || t[0] != '"')
4228 return t;
4229
4230 static std::string token;
4231 token = t+1;
4232 for (;;) {
4233 t = strtok(nullptr, delimiters);
4234 if (!t || !*t)
4235 return "\"";
4236 token += ' ';
4237 int len = strlen(t);
4238 if (t[len-1] == '"') {
4239 token += std::string(t, len-1);
4240 break;
4241 }
4242 token += t;
4243 }
4244 return token.c_str();
4245}
4246
4247#endif // _WIN32
4248
4249
4250// This function returns 1 if it has correctly parsed one token (and
4251// any arguments), else zero if no tokens remain. It returns -1 if an
4252// error was encountered.
4253static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4254{
4255 char sym;
4256 const char * name = cfg.name.c_str();
4257 int lineno=cfg.lineno;
4258 const char *delim = " \n\t";
4259 int badarg = 0;
4260 int missingarg = 0;
4261 const char *arg = 0;
4262
4263 // is the rest of the line a comment
4264 if (*token=='#')
4265 return 1;
4266
4267 // is the token not recognized?
4268 if (*token!='-' || strlen(token)!=2) {
4269 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4270 configfile, lineno, name, token);
4271 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4272 return -1;
4273 }
4274
4275 // token we will be parsing:
4276 sym=token[1];
4277
4278 // parse the token and swallow its argument
4279 int val;
4280 char plus[] = "+", excl[] = "!";
4281
4282 switch (sym) {
4283 case 'C':
4284 // monitor current pending sector count (default 197)
4285 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4286 return -1;
4287 cfg.curr_pending_id = (unsigned char)val;
4288 cfg.curr_pending_incr = (*plus == '+');
4289 cfg.curr_pending_set = true;
4290 break;
4291 case 'U':
4292 // monitor offline uncorrectable sectors (default 198)
4293 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4294 return -1;
4295 cfg.offl_pending_id = (unsigned char)val;
4296 cfg.offl_pending_incr = (*plus == '+');
4297 cfg.offl_pending_set = true;
4298 break;
4299 case 'T':
4300 // Set tolerance level for SMART command failures
4301 if (!(arg = strtok(nullptr, delim))) {
4302 missingarg = 1;
4303 } else if (!strcmp(arg, "normal")) {
4304 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4305 // not on failure of an optional S.M.A.R.T. command.
4306 // This is the default so we don't need to actually do anything here.
4307 cfg.permissive = false;
4308 } else if (!strcmp(arg, "permissive")) {
4309 // Permissive mode; ignore errors from Mandatory SMART commands
4310 cfg.permissive = true;
4311 } else {
4312 badarg = 1;
4313 }
4314 break;
4315 case 'd':
4316 // specify the device type
4317 if (!(arg = strtok(nullptr, delim))) {
4318 missingarg = 1;
4319 } else if (!strcmp(arg, "ignore")) {
4320 cfg.ignore = true;
4321 } else if (!strcmp(arg, "removable")) {
4322 cfg.removable = true;
4323 } else if (!strcmp(arg, "auto")) {
4324 cfg.dev_type = "";
4325 scan_types.clear();
4326 } else {
4327 cfg.dev_type = arg;
4328 scan_types.push_back(arg);
4329 }
4330 break;
4331 case 'F':
4332 // fix firmware bug
4333 if (!(arg = strtok(nullptr, delim)))
4334 missingarg = 1;
4335 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4336 badarg = 1;
4337 break;
4338 case 'H':
4339 // check SMART status
4340 cfg.smartcheck = true;
4341 break;
4342 case 'f':
4343 // check for failure of usage attributes
4344 cfg.usagefailed = true;
4345 break;
4346 case 't':
4347 // track changes in all vendor attributes
4348 cfg.prefail = true;
4349 cfg.usage = true;
4350 break;
4351 case 'p':
4352 // track changes in prefail vendor attributes
4353 cfg.prefail = true;
4354 break;
4355 case 'u':
4356 // track changes in usage vendor attributes
4357 cfg.usage = true;
4358 break;
4359 case 'l':
4360 // track changes in SMART logs
4361 if (!(arg = strtok(nullptr, delim))) {
4362 missingarg = 1;
4363 } else if (!strcmp(arg, "selftest")) {
4364 // track changes in self-test log
4365 cfg.selftest = true;
4366 } else if (!strcmp(arg, "error")) {
4367 // track changes in ATA error log
4368 cfg.errorlog = true;
4369 } else if (!strcmp(arg, "xerror")) {
4370 // track changes in Extended Comprehensive SMART error log
4371 cfg.xerrorlog = true;
4372 } else if (!strcmp(arg, "offlinests")) {
4373 // track changes in offline data collection status
4374 cfg.offlinests = true;
4375 } else if (!strcmp(arg, "offlinests,ns")) {
4376 // track changes in offline data collection status, disable auto standby
4377 cfg.offlinests = cfg.offlinests_ns = true;
4378 } else if (!strcmp(arg, "selfteststs")) {
4379 // track changes in self-test execution status
4380 cfg.selfteststs = true;
4381 } else if (!strcmp(arg, "selfteststs,ns")) {
4382 // track changes in self-test execution status, disable auto standby
4383 cfg.selfteststs = cfg.selfteststs_ns = true;
4384 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4385 // set SCT Error Recovery Control
4386 unsigned rt = ~0, wt = ~0; int nc = -1;
4387 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4388 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4389 cfg.sct_erc_set = true;
4390 cfg.sct_erc_readtime = rt;
4391 cfg.sct_erc_writetime = wt;
4392 }
4393 else
4394 badarg = 1;
4395 } else {
4396 badarg = 1;
4397 }
4398 break;
4399 case 'a':
4400 // monitor everything
4401 cfg.smartcheck = true;
4402 cfg.prefail = true;
4403 cfg.usagefailed = true;
4404 cfg.usage = true;
4405 cfg.selftest = true;
4406 cfg.errorlog = true;
4407 cfg.selfteststs = true;
4408 break;
4409 case 'o':
4410 // automatic offline testing enable/disable
4411 if (!(arg = strtok(nullptr, delim))) {
4412 missingarg = 1;
4413 } else if (!strcmp(arg, "on")) {
4414 cfg.autoofflinetest = 2;
4415 } else if (!strcmp(arg, "off")) {
4416 cfg.autoofflinetest = 1;
4417 } else {
4418 badarg = 1;
4419 }
4420 break;
4421 case 'n':
4422 // skip disk check if in idle or standby mode
4423 if (!(arg = strtok(nullptr, delim)))
4424 missingarg = 1;
4425 else {
4426 char *endptr = nullptr;
4427 char *next = strchr(const_cast<char*>(arg), ',');
4428
4429 cfg.powerquiet = false;
4430 cfg.powerskipmax = 0;
4431
4432 if (next)
4433 *next = '\0';
4434 if (!strcmp(arg, "never"))
4435 cfg.powermode = 0;
4436 else if (!strcmp(arg, "sleep"))
4437 cfg.powermode = 1;
4438 else if (!strcmp(arg, "standby"))
4439 cfg.powermode = 2;
4440 else if (!strcmp(arg, "idle"))
4441 cfg.powermode = 3;
4442 else
4443 badarg = 1;
4444
4445 // if optional arguments are present
4446 if (!badarg && next) {
4447 next++;
4448 cfg.powerskipmax = strtol(next, &endptr, 10);
4449 if (endptr == next)
4450 cfg.powerskipmax = 0;
4451 else {
4452 next = endptr + (*endptr != '\0');
4453 if (cfg.powerskipmax <= 0)
4454 badarg = 1;
4455 }
4456 if (*next != '\0') {
4457 if (!strcmp("q", next))
4458 cfg.powerquiet = true;
4459 else {
4460 badarg = 1;
4461 }
4462 }
4463 }
4464 }
4465 break;
4466 case 'S':
4467 // automatic attribute autosave enable/disable
4468 if (!(arg = strtok(nullptr, delim))) {
4469 missingarg = 1;
4470 } else if (!strcmp(arg, "on")) {
4471 cfg.autosave = 2;
4472 } else if (!strcmp(arg, "off")) {
4473 cfg.autosave = 1;
4474 } else {
4475 badarg = 1;
4476 }
4477 break;
4478 case 's':
4479 // warn user, and delete any previously given -s REGEXP Directives
4480 if (!cfg.test_regex.empty()){
4481 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4482 configfile, lineno, name, cfg.test_regex.get_pattern());
4484 }
4485 // check for missing argument
4486 if (!(arg = strtok(nullptr, delim))) {
4487 missingarg = 1;
4488 }
4489 // Compile regex
4490 else {
4491 if (!cfg.test_regex.compile(arg)) {
4492 // not a valid regular expression!
4493 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4494 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4495 return -1;
4496 }
4497 // Do a bit of sanity checking and warn user if we think that
4498 // their regexp is "strange". User probably confused about shell
4499 // glob(3) syntax versus regular expression syntax regexp(7).
4500 // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4501 static const regular_expression syntax_check(
4502 "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4503 ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4504 ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4505 );
4507 if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4508 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4509 "extended regular expression \"%s\"\n",
4510 configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4511 }
4512 break;
4513 case 'm':
4514 // send email to address that follows
4515 if (!(arg = strtok(nullptr, delim)))
4516 missingarg = 1;
4517 else {
4518 if (!cfg.emailaddress.empty())
4519 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4520 configfile, lineno, name, cfg.emailaddress.c_str());
4521 cfg.emailaddress = arg;
4522 }
4523 break;
4524 case 'M':
4525 // email warning options
4526 if (!(arg = strtok(nullptr, delim)))
4527 missingarg = 1;
4528 else if (!strcmp(arg, "once"))
4530 else if (!strcmp(arg, "always"))
4532 else if (!strcmp(arg, "daily"))
4534 else if (!strcmp(arg, "diminishing"))
4536 else if (!strcmp(arg, "test"))
4537 cfg.emailtest = true;
4538 else if (!strcmp(arg, "exec")) {
4539 // Get the next argument (the command line)
4540#ifdef _WIN32
4541 // Allow "/path name/with spaces/..." on Windows
4542 arg = strtok_dequote(delim);
4543 if (arg && arg[0] == '"') {
4544 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4545 configfile, lineno, name, token);
4546 return -1;
4547 }
4548#else
4549 arg = strtok(nullptr, delim);
4550#endif
4551 if (!arg) {
4552 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4553 configfile, lineno, name, token);
4554 return -1;
4555 }
4556 // Free the last cmd line given if any, and copy new one
4557 if (!cfg.emailcmdline.empty())
4558 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4559 configfile, lineno, name, cfg.emailcmdline.c_str());
4560 cfg.emailcmdline = arg;
4561 }
4562 else
4563 badarg = 1;
4564 break;
4565 case 'i':
4566 // ignore failure of usage attribute
4567 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4568 return -1;
4570 break;
4571 case 'I':
4572 // ignore attribute for tracking purposes
4573 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4574 return -1;
4576 break;
4577 case 'r':
4578 // print raw value when tracking
4579 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4580 return -1;
4582 if (*excl == '!') // attribute change is critical
4584 break;
4585 case 'R':
4586 // track changes in raw value (forces printing of raw value)
4587 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4588 return -1;
4590 if (*excl == '!') // raw value change is critical
4592 break;
4593 case 'W':
4594 // track Temperature
4595 if (Get3Integers((arg = strtok(nullptr, delim)), name, token, lineno, configfile,
4596 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4597 return -1;
4598 break;
4599 case 'v':
4600 // non-default vendor-specific attribute meaning
4601 if (!(arg = strtok(nullptr, delim))) {
4602 missingarg = 1;
4603 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4604 badarg = 1;
4605 }
4606 break;
4607 case 'P':
4608 // Define use of drive-specific presets.
4609 if (!(arg = strtok(nullptr, delim))) {
4610 missingarg = 1;
4611 } else if (!strcmp(arg, "use")) {
4612 cfg.ignorepresets = false;
4613 } else if (!strcmp(arg, "ignore")) {
4614 cfg.ignorepresets = true;
4615 } else if (!strcmp(arg, "show")) {
4616 cfg.showpresets = true;
4617 } else if (!strcmp(arg, "showall")) {
4619 } else {
4620 badarg = 1;
4621 }
4622 break;
4623
4624 case 'e':
4625 // Various ATA settings
4626 if (!(arg = strtok(nullptr, delim))) {
4627 missingarg = true;
4628 }
4629 else {
4630 char arg2[16+1]; unsigned uval;
4631 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4632 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &uval, &n3) >= 1
4633 && (n1 == len || n2 > 0)) {
4634 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4635 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4636 if (n3 != len)
4637 uval = ~0U;
4638
4639 if (!strcmp(arg2, "aam")) {
4640 if (off)
4641 cfg.set_aam = -1;
4642 else if (uval <= 254)
4643 cfg.set_aam = uval + 1;
4644 else
4645 badarg = true;
4646 }
4647 else if (!strcmp(arg2, "apm")) {
4648 if (off)
4649 cfg.set_apm = -1;
4650 else if (1 <= uval && uval <= 254)
4651 cfg.set_apm = uval + 1;
4652 else
4653 badarg = true;
4654 }
4655 else if (!strcmp(arg2, "lookahead")) {
4656 if (off)
4657 cfg.set_lookahead = -1;
4658 else if (on)
4659 cfg.set_lookahead = 1;
4660 else
4661 badarg = true;
4662 }
4663 else if (!strcmp(arg, "security-freeze")) {
4664 cfg.set_security_freeze = true;
4665 }
4666 else if (!strcmp(arg2, "standby")) {
4667 if (off)
4668 cfg.set_standby = 0 + 1;
4669 else if (uval <= 255)
4670 cfg.set_standby = uval + 1;
4671 else
4672 badarg = true;
4673 }
4674 else if (!strcmp(arg2, "wcache")) {
4675 if (off)
4676 cfg.set_wcache = -1;
4677 else if (on)
4678 cfg.set_wcache = 1;
4679 else
4680 badarg = true;
4681 }
4682 else if (!strcmp(arg2, "dsn")) {
4683 if (off)
4684 cfg.set_dsn = -1;
4685 else if (on)
4686 cfg.set_dsn = 1;
4687 else
4688 badarg = true;
4689 }
4690 else
4691 badarg = true;
4692 }
4693 else
4694 badarg = true;
4695 }
4696 break;
4697
4698 case 'c':
4699 // Override command line options
4700 {
4701 if (!(arg = strtok(nullptr, delim))) {
4702 missingarg = true;
4703 break;
4704 }
4705 int n = 0, nc = -1, len = strlen(arg);
4706 if ( ( sscanf(arg, "i=%d%n", &n, &nc) == 1
4707 || sscanf(arg, "interval=%d%n", &n, &nc) == 1)
4708 && nc == len && n >= 10)
4709 cfg.checktime = n;
4710 else
4711 badarg = true;
4712 }
4713 break;
4714
4715 default:
4716 // Directive not recognized
4717 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4718 configfile, lineno, name, token);
4719 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4720 return -1;
4721 }
4722 if (missingarg) {
4723 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4724 configfile, lineno, name, token);
4725 }
4726 if (badarg) {
4727 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4728 configfile, lineno, name, token, arg);
4729 }
4730 if (missingarg || badarg) {
4731 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4732 printoutvaliddirectiveargs(LOG_CRIT, sym);
4733 PrintOut(LOG_CRIT, "\n");
4734 return -1;
4735 }
4736
4737 return 1;
4738}
4739
4740// Scan directive for configuration file
4741#define SCANDIRECTIVE "DEVICESCAN"
4742
4743// This is the routine that adds things to the conf_entries list.
4744//
4745// Return values are:
4746// 1: parsed a normal line
4747// 0: found DEFAULT setting or comment or blank line
4748// -1: found SCANDIRECTIVE line
4749// -2: found an error
4750//
4751// Note: this routine modifies *line from the caller!
4752static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4753 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4754{
4755 const char *delim = " \n\t";
4756
4757 // get first token: device name. If a comment, skip line
4758 const char * name = strtok(line, delim);
4759 if (!name || *name == '#')
4760 return 0;
4761
4762 // Check device name for DEFAULT or DEVICESCAN
4763 int retval;
4764 if (!strcmp("DEFAULT", name)) {
4765 retval = 0;
4766 // Restart with empty defaults
4767 default_conf = dev_config();
4768 }
4769 else {
4770 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4771 // Init new entry with current defaults
4772 conf_entries.push_back(default_conf);
4773 }
4774 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4775
4776 cfg.name = name; // Later replaced by dev->get_info().info_name
4777 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4778 cfg.lineno = lineno;
4779
4780 // parse tokens one at a time from the file.
4781 while (char * token = strtok(nullptr, delim)) {
4782 int rc = ParseToken(token, cfg, scan_types);
4783 if (rc < 0)
4784 // error found on the line
4785 return -2;
4786
4787 if (rc == 0)
4788 // No tokens left
4789 break;
4790
4791 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4792 }
4793
4794 // Check for multiple -d TYPE directives
4795 if (retval != -1 && scan_types.size() > 1) {
4796 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4797 cfg.name.c_str(), cfg.lineno, configfile);
4798 return -2;
4799 }
4800
4801 // Don't perform checks below for DEFAULT entries
4802 if (retval == 0)
4803 return retval;
4804
4805 // If NO monitoring directives are set, then set all of them.
4806 if (!( cfg.smartcheck || cfg.selftest
4807 || cfg.errorlog || cfg.xerrorlog
4808 || cfg.offlinests || cfg.selfteststs
4809 || cfg.usagefailed || cfg.prefail || cfg.usage
4810 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4811
4812 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4813 cfg.name.c_str(), cfg.lineno, configfile);
4814
4815 cfg.smartcheck = true;
4816 cfg.usagefailed = true;
4817 cfg.prefail = true;
4818 cfg.usage = true;
4819 cfg.selftest = true;
4820 cfg.errorlog = true;
4821 cfg.selfteststs = true;
4822 }
4823
4824 // additional sanity check. Has user set -M options without -m?
4825 if ( cfg.emailaddress.empty()
4826 && (!cfg.emailcmdline.empty() || cfg.emailfreq != emailfreqs::unknown || cfg.emailtest)) {
4827 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4828 cfg.name.c_str(), cfg.lineno, configfile);
4829 return -2;
4830 }
4831
4832 // has the user has set <nomailer>?
4833 if (cfg.emailaddress == "<nomailer>") {
4834 // check that -M exec is also set
4835 if (cfg.emailcmdline.empty()){
4836 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4837 cfg.name.c_str(), cfg.lineno, configfile);
4838 return -2;
4839 }
4840 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4841 cfg.emailaddress.clear();
4842 }
4843
4844 return retval;
4845}
4846
4847// Parses a configuration file. Return values are:
4848// N=>0: found N entries
4849// -1: syntax error in config file
4850// -2: config file does not exist
4851// -3: config file exists but cannot be read
4852//
4853// In the case where the return value is 0, there are three
4854// possibilities:
4855// Empty configuration file ==> conf_entries.empty()
4856// No configuration file ==> conf_entries[0].lineno == 0
4857// SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4858static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4859{
4860 // maximum line length in configuration file
4861 const int MAXLINELEN = 256;
4862 // maximum length of a continued line in configuration file
4863 const int MAXCONTLINE = 1023;
4864
4865 stdio_file f;
4866 // Open config file, if it exists and is not <stdin>
4867 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4868 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4869 // file exists but we can't read it or it should exist due to '-c' option
4870 int ret = (errno!=ENOENT ? -3 : -2);
4871 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4872 strerror(errno),configfile);
4873 return ret;
4874 }
4875 }
4876 else // read from stdin ('-c -' option)
4877 f.open(stdin);
4878
4879 // Start with empty defaults
4880 dev_config default_conf;
4881
4882 // No configuration file found -- use fake one
4883 int entry = 0;
4884 if (!f) {
4885 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4886
4887 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4888 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4889 return 0;
4890 }
4891
4892#ifdef __CYGWIN__
4893 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4894#endif
4895
4896 // configuration file exists
4897 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4898
4899 // parse config file line by line
4900 int lineno = 1, cont = 0, contlineno = 0;
4901 char line[MAXLINELEN+2];
4902 char fullline[MAXCONTLINE+1];
4903
4904 for (;;) {
4905 int len=0,scandevice;
4906 char *lastslash;
4907 char *comment;
4908 char *code;
4909
4910 // make debugging simpler
4911 memset(line,0,sizeof(line));
4912
4913 // get a line
4914 code=fgets(line, MAXLINELEN+2, f);
4915
4916 // are we at the end of the file?
4917 if (!code){
4918 if (cont) {
4919 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4920 // See if we found a SCANDIRECTIVE directive
4921 if (scandevice==-1)
4922 return 0;
4923 // did we find a syntax error
4924 if (scandevice==-2)
4925 return -1;
4926 // the final line is part of a continuation line
4927 entry+=scandevice;
4928 }
4929 break;
4930 }
4931
4932 // input file line number
4933 contlineno++;
4934
4935 // See if line is too long
4936 len=strlen(line);
4937 if (len>MAXLINELEN){
4938 const char *warn;
4939 if (line[len-1]=='\n')
4940 warn="(including newline!) ";
4941 else
4942 warn="";
4943 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4944 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4945 return -1;
4946 }
4947
4948 // Ignore anything after comment symbol
4949 if ((comment=strchr(line,'#'))){
4950 *comment='\0';
4951 len=strlen(line);
4952 }
4953
4954 // is the total line (made of all continuation lines) too long?
4955 if (cont+len>MAXCONTLINE){
4956 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4957 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4958 return -1;
4959 }
4960
4961 // copy string so far into fullline, and increment length
4962 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4963 cont+=len;
4964
4965 // is this a continuation line. If so, replace \ by space and look at next line
4966 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4967 *(fullline+(cont-len)+(lastslash-line))=' ';
4968 continue;
4969 }
4970
4971 // Not a continuation line. Parse it
4972 scan_types.clear();
4973 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4974
4975 // did we find a scandevice directive?
4976 if (scandevice==-1)
4977 return 0;
4978 // did we find a syntax error
4979 if (scandevice==-2)
4980 return -1;
4981
4982 entry+=scandevice;
4983 lineno++;
4984 cont=0;
4985 }
4986
4987 // note -- may be zero if syntax of file OK, but no valid entries!
4988 return entry;
4989}
4990
4991/* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4992 <LIST> is the list of valid arguments for option opt. */
4993static void PrintValidArgs(char opt)
4994{
4995 const char *s;
4996
4997 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4998 if (!(s = GetValidArgList(opt)))
4999 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
5000 else
5001 PrintOut(LOG_CRIT, "%s", (char *)s);
5002 PrintOut(LOG_CRIT, " <=======\n");
5003}
5004
5005#ifndef _WIN32
5006// Report error and return false if specified path is not absolute.
5007static bool check_abs_path(char option, const std::string & path)
5008{
5009 if (path.empty() || path[0] == '/')
5010 return true;
5011
5012 debugmode = 1;
5013 PrintHead();
5014 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
5015 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
5016 return false;
5017}