smartmontools SVN Rev 5382
Utility to control and monitor storage systems with "S.M.A.R.T."
smartd.cpp
Go to the documentation of this file.
1/*
2 * Home page of code is: https://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-22 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12#include "config.h"
13#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15// unconditionally included files
16#include <inttypes.h>
17#include <stdio.h>
18#include <sys/types.h>
19#include <sys/stat.h> // umask
20#include <signal.h>
21#include <fcntl.h>
22#include <string.h>
23#include <syslog.h>
24#include <stdarg.h>
25#include <stdlib.h>
26#include <errno.h>
27#include <time.h>
28#include <limits.h>
29#include <getopt.h>
30
31#include <algorithm> // std::replace()
32#include <map>
33#include <stdexcept>
34#include <string>
35#include <vector>
36
37// conditionally included files
38#ifndef _WIN32
39#include <sys/wait.h>
40#endif
41#ifdef HAVE_UNISTD_H
42#include <unistd.h>
43#endif
44
45#ifdef _WIN32
46#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
47#ifdef _MSC_VER
48#pragma warning(disable:4761) // "conversion supplied"
49typedef unsigned short mode_t;
50typedef int pid_t;
51#endif
52#include <io.h> // umask()
53#include <process.h> // getpid()
54#endif // _WIN32
55
56#ifdef __CYGWIN__
57#include <io.h> // setmode()
58#endif // __CYGWIN__
59
60#ifdef HAVE_LIBCAP_NG
61#include <cap-ng.h>
62#endif // LIBCAP_NG
63
64#ifdef HAVE_LIBSYSTEMD
65#include <systemd/sd-daemon.h>
66#endif // HAVE_LIBSYSTEMD
67
68// locally included files
69#include "atacmds.h"
70#include "dev_interface.h"
71#include "knowndrives.h"
72#include "scsicmds.h"
73#include "nvmecmds.h"
74#include "utility.h"
75
76#ifdef HAVE_POSIX_API
77#include "popen_as_ugid.h"
78#endif
79
80#ifdef _WIN32
81// fork()/signal()/initd simulation for native Windows
82#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
83#define strsignal daemon_strsignal
84#define sleep daemon_sleep
85// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
86#define SIGQUIT SIGBREAK
87#define SIGQUIT_KEYNAME "CONTROL-Break"
88#else // _WIN32
89#define SIGQUIT_KEYNAME "CONTROL-\\"
90#endif // _WIN32
91
92const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5324 2022-02-22 18:00:47Z chrfranke $"
93 CONFIG_H_CVSID;
94
95extern "C" {
96 typedef void (*signal_handler_type)(int);
97}
98
100{
101#if defined(_WIN32)
102 // signal() emulation
103 daemon_signal(sig, handler);
104
105#elif defined(HAVE_SIGACTION)
106 // SVr4, POSIX.1-2001, POSIX.1-2008
107 struct sigaction sa;
108 sa.sa_handler = SIG_DFL;
109 sigaction(sig, (struct sigaction *)0, &sa);
110 if (sa.sa_handler == SIG_IGN)
111 return;
112
113 memset(&sa, 0, sizeof(sa));
114 sa.sa_handler = handler;
115 sa.sa_flags = SA_RESTART; // BSD signal() semantics
116 sigaction(sig, &sa, (struct sigaction *)0);
117
118#elif defined(HAVE_SIGSET)
119 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
120 if (sigset(sig, handler) == SIG_IGN)
121 sigset(sig, SIG_IGN);
122
123#else
124 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
125 // Important: BSD semantics is required. Traditional signal()
126 // resets the handler to SIG_DFL after the first signal is caught.
127 if (signal(sig, handler) == SIG_IGN)
128 signal(sig, SIG_IGN);
129#endif
130}
131
132using namespace smartmontools;
133
134// smartd exit codes
135#define EXIT_BADCMD 1 // command line did not parse
136#define EXIT_BADCONF 2 // syntax error in config file
137#define EXIT_STARTUP 3 // problem forking daemon
138#define EXIT_PID 4 // problem creating pid file
139#define EXIT_NOCONF 5 // config file does not exist
140#define EXIT_READCONF 6 // config file exists but cannot be read
141
142#define EXIT_NOMEM 8 // out of memory
143#define EXIT_BADCODE 10 // internal error - should NEVER happen
144
145#define EXIT_BADDEV 16 // we can't monitor this device
146#define EXIT_NODEV 17 // no devices to monitor
147
148#define EXIT_SIGNAL 254 // abort on signal
149
150
151// command-line: 1=debug mode, 2=print presets
152static unsigned char debugmode = 0;
153
154// command-line: how long to sleep between checks
155static constexpr int default_checktime = 1800;
157static int checktime_min = 0; // Minimum individual check time, 0 if none
158
159// command-line: name of PID file (empty for no pid file)
160static std::string pid_file;
161
162// command-line: path prefix of persistent state file, empty if no persistence.
163static std::string state_path_prefix
164#ifdef SMARTMONTOOLS_SAVESTATES
165 = SMARTMONTOOLS_SAVESTATES
166#endif
167 ;
168
169// command-line: path prefix of attribute log file, empty if no logs.
170static std::string attrlog_path_prefix
171#ifdef SMARTMONTOOLS_ATTRIBUTELOG
172 = SMARTMONTOOLS_ATTRIBUTELOG
173#endif
174 ;
175
176// configuration file name
177static const char * configfile;
178// configuration file "name" if read from stdin
179static const char * const configfile_stdin = "<stdin>";
180// path of alternate configuration file
181static std::string configfile_alt;
182
183// warning script file
184static std::string warning_script;
185
186#ifdef HAVE_POSIX_API
187// run warning script as non-privileged user
188static bool warn_as_user;
189static uid_t warn_uid;
190static gid_t warn_gid;
191static std::string warn_uname, warn_gname;
192#elif defined(_WIN32)
193// run warning script as restricted user
194static bool warn_as_restr_user;
195#endif
196
197// command-line: when should we exit?
198enum quit_t {
203static bool quit_nodev0 = false;
204
205// command-line; this is the default syslog(3) log facility to use.
206static int facility=LOG_DAEMON;
207
208#ifndef _WIN32
209// command-line: fork into background?
210static bool do_fork=true;
211#endif
212
213// TODO: This smartctl only variable is also used in some os_*.cpp
214unsigned char failuretest_permissive = 0;
215
216// set to one if we catch a USR1 (check devices now)
217static volatile int caughtsigUSR1=0;
218
219#ifdef _WIN32
220// set to one if we catch a USR2 (toggle debug mode)
221static volatile int caughtsigUSR2=0;
222#endif
223
224// set to one if we catch a HUP (reload config file). In debug mode,
225// set to two, if we catch INT (also reload config file).
226static volatile int caughtsigHUP=0;
227
228// set to signal value if we catch INT, QUIT, or TERM
229static volatile int caughtsigEXIT=0;
230
231// This function prints either to stdout or to the syslog as needed.
232static void PrintOut(int priority, const char *fmt, ...)
234
235#ifdef HAVE_LIBSYSTEMD
236// systemd notify support
237
238static bool notify_enabled = false;
239
240static inline void notify_init()
241{
242 if (!getenv("NOTIFY_SOCKET"))
243 return;
244 notify_enabled = true;
245}
246
247static inline bool notify_post_init()
248{
249 if (!notify_enabled)
250 return true;
251 if (do_fork) {
252 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
253 return false;
254 }
255 return true;
256}
257
258static void notify_msg(const char * msg, bool ready = false)
259{
260 if (!notify_enabled)
261 return;
262 if (debugmode) {
263 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
264 return;
265 }
266 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
267}
268
269static void notify_check(int numdev)
270{
271 if (!notify_enabled)
272 return;
273 char msg[32];
274 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
275 numdev, (numdev != 1 ? "s" : ""));
276 notify_msg(msg);
277}
278
279static void notify_wait(time_t wakeuptime, int numdev)
280{
281 if (!notify_enabled)
282 return;
283 char ts[16] = ""; struct tm tmbuf;
284 strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
285 char msg[64];
286 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
287 numdev, (numdev != 1 ? "s" : ""), ts);
288 static bool ready = true; // first call notifies READY=1
289 notify_msg(msg, ready);
290 ready = false;
291}
292
293static void notify_exit(int status)
294{
295 if (!notify_enabled)
296 return;
297 const char * msg;
298 switch (status) {
299 case 0: msg = "Exiting ..."; break;
300 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
301 case EXIT_BADCONF: case EXIT_NOCONF:
302 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
303 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
304 case EXIT_NODEV: msg = "No devices to monitor"; break;
305 default: msg = "Error (see SYSLOG)"; break;
306 }
307 notify_msg(msg);
308}
309
310#else // HAVE_LIBSYSTEMD
311// No systemd notify support
312
313static inline bool notify_post_init()
314{
315#ifdef __linux__
316 if (getenv("NOTIFY_SOCKET")) {
317 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
318 return false;
319 }
320#endif
321 return true;
322}
323
324static inline void notify_init() { }
325static inline void notify_msg(const char *) { }
326static inline void notify_check(int) { }
327static inline void notify_wait(time_t, int) { }
328static inline void notify_exit(int) { }
329
330#endif // HAVE_LIBSYSTEMD
331
332// Attribute monitoring flags.
333// See monitor_attr_flags below.
334enum {
341};
342
343// Array of flags for each attribute.
345{
346public:
347 bool is_set(int id, unsigned char flag) const
348 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
349
350 void set(int id, unsigned char flags)
351 {
352 if (0 < id && id < (int)sizeof(m_flags))
353 m_flags[id] |= flags;
354 }
355
356private:
357 unsigned char m_flags[256]{};
358};
359
360
361/// Configuration data for a device. Read from smartd.conf.
362/// Supports copy & assignment and is compatible with STL containers.
364{
365 int lineno{}; // Line number of entry in file
366 std::string name; // Device name (with optional extra info)
367 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
368 std::string dev_type; // Device type argument from -d directive, empty if none
369 std::string dev_idinfo; // Device identify info for warning emails
370 std::string state_file; // Path of the persistent state file, empty if none
371 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
372 int checktime{}; // Individual check interval, 0 if none
373 bool ignore{}; // Ignore this entry
374 bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
375 bool smartcheck{}; // Check SMART status
376 bool usagefailed{}; // Check for failed Usage Attributes
377 bool prefail{}; // Track changes in Prefail Attributes
378 bool usage{}; // Track changes in Usage Attributes
379 bool selftest{}; // Monitor number of selftest errors
380 bool errorlog{}; // Monitor number of ATA errors
381 bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
382 bool offlinests{}; // Monitor changes in offline data collection status
383 bool offlinests_ns{}; // Disable auto standby if in progress
384 bool selfteststs{}; // Monitor changes in self-test execution status
385 bool selfteststs_ns{}; // Disable auto standby if in progress
386 bool permissive{}; // Ignore failed SMART commands
387 char autosave{}; // 1=disable, 2=enable Autosave Attributes
388 char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
389 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
390 bool ignorepresets{}; // Ignore database of -v options
391 bool showpresets{}; // Show database entry for this device
392 bool removable{}; // Device may disappear (not be present)
393 char powermode{}; // skip check, if disk in idle or standby mode
394 bool powerquiet{}; // skip powermode 'skipping checks' message
395 int powerskipmax{}; // how many times can be check skipped
396 unsigned char tempdiff{}; // Track Temperature changes >= this limit
397 unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
398 regular_expression test_regex; // Regex for scheduled testing
399 unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
400
401 // Configuration of email warning messages
402 std::string emailcmdline; // script to execute, empty if no messages
403 std::string emailaddress; // email address, or empty
404 unsigned char emailfreq{}; // Emails once (1) daily (2) diminishing (3)
405 bool emailtest{}; // Send test email?
406
407 // ATA ONLY
408 int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
409 int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
410 int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
411 int set_lookahead{}; // disable(-1), enable(1) read look-ahead
412 int set_standby{}; // set(1..255->0..254) standby timer
413 bool set_security_freeze{}; // Freeze ATA security
414 int set_wcache{}; // disable(-1), enable(1) write cache
415 int set_dsn{}; // disable(0x2), enable(0x1) DSN
416
417 bool sct_erc_set{}; // set SCT ERC to:
418 unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
419 unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
420
421 unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
422 unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
423 bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
424 bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
425
426 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
427
429};
430
431// Number of allowed mail message types
432static const int SMARTD_NMAIL = 13;
433// Type for '-M test' mails (state not persistent)
434static const int MAILTYPE_TEST = 0;
435// TODO: Add const or enum for all mail types.
436
437struct mailinfo {
438 int logged{}; // number of times an email has been sent
439 time_t firstsent{}; // time first email was sent, as defined by time(2)
440 time_t lastsent{}; // time last email was sent, as defined by time(2)
441};
442
443/// Persistent state data for a device.
445{
446 unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
447
448 unsigned char selflogcount{}; // total number of self-test errors
449 unsigned short selfloghour{}; // lifetime hours of last self-test error
450
451 time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
452
453 uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
454 uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
455
456 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
457
458 // ATA ONLY
459 int ataerrorcount{}; // Total number of ATA errors
460
461 // Persistent part of ata_smart_values:
463 unsigned char id{};
464 unsigned char val{};
465 unsigned char worst{}; // Byte needed for 'raw64' attribute only.
466 uint64_t raw{};
467 unsigned char resvd{};
468 };
470
471 // SCSI ONLY
472
475 unsigned char found{};
476 };
478
481 unsigned char found{};
482 };
484
485 // NVMe only
487};
488
489/// Non-persistent state data for a device.
491{
492 bool must_write{}; // true if persistent part should be written
493
494 bool skip{}; // skip during next check cycle
495 time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
496
497 bool not_cap_offline{}; // true == not capable of offline testing
502
503 unsigned char temperature{}; // last recorded Temperature (in Celsius)
504 time_t tempmin_delay{}; // time where Min Temperature tracking will start
505
506 bool removed{}; // true if open() failed for removable device
507
508 bool powermodefail{}; // true if power mode check failed
509 int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
510 int lastpowermodeskipped{}; // the last power mode that was skipped
511
512 bool attrlog_dirty{}; // true if persistent part has new attr values that
513 // need to be written to attrlog
514
515 // SCSI ONLY
516 // TODO: change to bool
517 unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
518 unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
523 unsigned char SuppressReport{}; // minimize nuisance reports
524 unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
525 // know yet) 6 or 10
526 // ATA ONLY
527 uint64_t num_sectors{}; // Number of sectors
528 ata_smart_values smartval{}; // SMART data
530 bool offline_started{}; // true if offline data collection was started
531 bool selftest_started{}; // true if self-test was started
532};
533
534/// Runtime state data for a device.
536: public persistent_dev_state,
537 public temp_dev_state
538{
540 void update_temp_state();
541};
542
543/// Container for configuration info for each device.
544typedef std::vector<dev_config> dev_config_vector;
545
546/// Container for state info for each device.
547typedef std::vector<dev_state> dev_state_vector;
548
549// Copy ATA attributes to persistent state.
551{
552 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
555 pa.id = ta.id;
556 if (ta.id == 0) {
557 pa.val = pa.worst = 0; pa.raw = 0;
558 continue;
559 }
560 pa.val = ta.current;
561 pa.worst = ta.worst;
562 pa.raw = ta.raw[0]
563 | ( ta.raw[1] << 8)
564 | ( ta.raw[2] << 16)
565 | ((uint64_t)ta.raw[3] << 24)
566 | ((uint64_t)ta.raw[4] << 32)
567 | ((uint64_t)ta.raw[5] << 40);
568 pa.resvd = ta.reserv;
569 }
570}
571
572// Copy ATA from persistent to temp state.
574{
575 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
576 const ata_attribute & pa = ata_attributes[i];
578 ta.id = pa.id;
579 if (pa.id == 0) {
580 ta.current = ta.worst = 0;
581 memset(ta.raw, 0, sizeof(ta.raw));
582 continue;
583 }
584 ta.current = pa.val;
585 ta.worst = pa.worst;
586 ta.raw[0] = (unsigned char) pa.raw;
587 ta.raw[1] = (unsigned char)(pa.raw >> 8);
588 ta.raw[2] = (unsigned char)(pa.raw >> 16);
589 ta.raw[3] = (unsigned char)(pa.raw >> 24);
590 ta.raw[4] = (unsigned char)(pa.raw >> 32);
591 ta.raw[5] = (unsigned char)(pa.raw >> 40);
592 ta.reserv = pa.resvd;
593 }
594}
595
596// Parse a line from a state file.
597static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
598{
599 static const regular_expression regex(
600 "^ *"
601 "((temperature-min)" // (1 (2)
602 "|(temperature-max)" // (3)
603 "|(self-test-errors)" // (4)
604 "|(self-test-last-err-hour)" // (5)
605 "|(scheduled-test-next-check)" // (6)
606 "|(selective-test-last-start)" // (7)
607 "|(selective-test-last-end)" // (8)
608 "|(ata-error-count)" // (9)
609 "|(mail\\.([0-9]+)\\." // (10 (11)
610 "((count)" // (12 (13)
611 "|(first-sent-time)" // (14)
612 "|(last-sent-time)" // (15)
613 ")" // 12)
614 ")" // 10)
615 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
616 "((id)" // (18 (19)
617 "|(val)" // (20)
618 "|(worst)" // (21)
619 "|(raw)" // (22)
620 "|(resvd)" // (23)
621 ")" // 18)
622 ")" // 16)
623 "|(nvme-err-log-entries)" // (24)
624 ")" // 1)
625 " *= *([0-9]+)[ \n]*$" // (25)
626 );
627
628 const int nmatch = 1+25;
630 if (!regex.execute(line, nmatch, match))
631 return false;
632 if (match[nmatch-1].rm_so < 0)
633 return false;
634
635 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
636
637 int m = 1;
638 if (match[++m].rm_so >= 0)
639 state.tempmin = (unsigned char)val;
640 else if (match[++m].rm_so >= 0)
641 state.tempmax = (unsigned char)val;
642 else if (match[++m].rm_so >= 0)
643 state.selflogcount = (unsigned char)val;
644 else if (match[++m].rm_so >= 0)
645 state.selfloghour = (unsigned short)val;
646 else if (match[++m].rm_so >= 0)
647 state.scheduled_test_next_check = (time_t)val;
648 else if (match[++m].rm_so >= 0)
649 state.selective_test_last_start = val;
650 else if (match[++m].rm_so >= 0)
651 state.selective_test_last_end = val;
652 else if (match[++m].rm_so >= 0)
653 state.ataerrorcount = (int)val;
654 else if (match[m+=2].rm_so >= 0) {
655 int i = atoi(line+match[m].rm_so);
656 if (!(0 <= i && i < SMARTD_NMAIL))
657 return false;
658 if (i == MAILTYPE_TEST) // Don't suppress test mails
659 return true;
660 if (match[m+=2].rm_so >= 0)
661 state.maillog[i].logged = (int)val;
662 else if (match[++m].rm_so >= 0)
663 state.maillog[i].firstsent = (time_t)val;
664 else if (match[++m].rm_so >= 0)
665 state.maillog[i].lastsent = (time_t)val;
666 else
667 return false;
668 }
669 else if (match[m+=5+1].rm_so >= 0) {
670 int i = atoi(line+match[m].rm_so);
671 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
672 return false;
673 if (match[m+=2].rm_so >= 0)
674 state.ata_attributes[i].id = (unsigned char)val;
675 else if (match[++m].rm_so >= 0)
676 state.ata_attributes[i].val = (unsigned char)val;
677 else if (match[++m].rm_so >= 0)
678 state.ata_attributes[i].worst = (unsigned char)val;
679 else if (match[++m].rm_so >= 0)
680 state.ata_attributes[i].raw = val;
681 else if (match[++m].rm_so >= 0)
682 state.ata_attributes[i].resvd = (unsigned char)val;
683 else
684 return false;
685 }
686 else if (match[m+7].rm_so >= 0)
687 state.nvme_err_log_entries = val;
688 else
689 return false;
690 return true;
691}
692
693// Read a state file.
694static bool read_dev_state(const char * path, persistent_dev_state & state)
695{
696 stdio_file f(path, "r");
697 if (!f) {
698 if (errno != ENOENT)
699 pout("Cannot read state file \"%s\"\n", path);
700 return false;
701 }
702#ifdef __CYGWIN__
703 setmode(fileno(f), O_TEXT); // Allow files with \r\n
704#endif
705
706 persistent_dev_state new_state;
707 int good = 0, bad = 0;
708 char line[256];
709 while (fgets(line, sizeof(line), f)) {
710 const char * s = line + strspn(line, " \t");
711 if (!*s || *s == '#')
712 continue;
713 if (!parse_dev_state_line(line, new_state))
714 bad++;
715 else
716 good++;
717 }
718
719 if (bad) {
720 if (!good) {
721 pout("%s: format error\n", path);
722 return false;
723 }
724 pout("%s: %d invalid line(s) ignored\n", path, bad);
725 }
726
727 // This sets the values missing in the file to 0.
728 state = new_state;
729 return true;
730}
731
732static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
733{
734 if (val)
735 fprintf(f, "%s = %" PRIu64 "\n", name, val);
736}
737
738static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
739{
740 if (val)
741 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
742}
743
744// Write a state file
745static bool write_dev_state(const char * path, const persistent_dev_state & state)
746{
747 // Rename old "file" to "file~"
748 std::string pathbak = path; pathbak += '~';
749 unlink(pathbak.c_str());
750 rename(path, pathbak.c_str());
751
752 stdio_file f(path, "w");
753 if (!f) {
754 pout("Cannot create state file \"%s\"\n", path);
755 return false;
756 }
757
758 fprintf(f, "# smartd state file\n");
759 write_dev_state_line(f, "temperature-min", state.tempmin);
760 write_dev_state_line(f, "temperature-max", state.tempmax);
761 write_dev_state_line(f, "self-test-errors", state.selflogcount);
762 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
763 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
764 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
765 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
766
767 for (int i = 0; i < SMARTD_NMAIL; i++) {
768 if (i == MAILTYPE_TEST) // Don't suppress test mails
769 continue;
770 const mailinfo & mi = state.maillog[i];
771 if (!mi.logged)
772 continue;
773 write_dev_state_line(f, "mail", i, "count", mi.logged);
774 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
775 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
776 }
777
778 // ATA ONLY
779 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
780
781 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
782 const auto & pa = state.ata_attributes[i];
783 if (!pa.id)
784 continue;
785 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
786 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
787 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
788 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
789 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
790 }
791
792 // NVMe only
793 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
794
795 return true;
796}
797
798// Write to the attrlog file
799static bool write_dev_attrlog(const char * path, const dev_state & state)
800{
801 stdio_file f(path, "a");
802 if (!f) {
803 pout("Cannot create attribute log file \"%s\"\n", path);
804 return false;
805 }
806
807
808 time_t now = time(nullptr);
809 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
810 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
811 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
812 tms->tm_hour, tms->tm_min, tms->tm_sec);
813 // ATA ONLY
814 for (const auto & pa : state.ata_attributes) {
815 if (!pa.id)
816 continue;
817 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
818 }
819 // SCSI ONLY
820 const struct scsiErrorCounter * ecp;
821 const char * pageNames[3] = {"read", "write", "verify"};
822 for (int k = 0; k < 3; ++k) {
823 if ( !state.scsi_error_counters[k].found ) continue;
824 ecp = &state.scsi_error_counters[k].errCounter;
825 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
826 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
827 "\t%s-corr-by-retry;%" PRIu64 ";"
828 "\t%s-total-err-corrected;%" PRIu64 ";"
829 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
830 "\t%s-gb-processed;%.3f;"
831 "\t%s-total-unc-errors;%" PRIu64 ";",
832 pageNames[k], ecp->counter[0],
833 pageNames[k], ecp->counter[1],
834 pageNames[k], ecp->counter[2],
835 pageNames[k], ecp->counter[3],
836 pageNames[k], ecp->counter[4],
837 pageNames[k], (ecp->counter[5] / 1000000000.0),
838 pageNames[k], ecp->counter[6]);
839 }
840 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
841 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
842 }
843 // write SCSI current temperature if it is monitored
844 if (state.temperature)
845 fprintf(f, "\ttemperature;%d;", state.temperature);
846 // end of line
847 fprintf(f, "\n");
848 return true;
849}
850
851// Write all state files. If write_always is false, don't write
852// unless must_write is set.
853static void write_all_dev_states(const dev_config_vector & configs,
854 dev_state_vector & states,
855 bool write_always = true)
856{
857 for (unsigned i = 0; i < states.size(); i++) {
858 const dev_config & cfg = configs.at(i);
859 if (cfg.state_file.empty())
860 continue;
861 dev_state & state = states[i];
862 if (!write_always && !state.must_write)
863 continue;
864 if (!write_dev_state(cfg.state_file.c_str(), state))
865 continue;
866 state.must_write = false;
867 if (write_always || debugmode)
868 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
869 cfg.name.c_str(), cfg.state_file.c_str());
870 }
871}
872
873// Write to all attrlog files
874static void write_all_dev_attrlogs(const dev_config_vector & configs,
875 dev_state_vector & states)
876{
877 for (unsigned i = 0; i < states.size(); i++) {
878 const dev_config & cfg = configs.at(i);
879 if (cfg.attrlog_file.empty())
880 continue;
881 dev_state & state = states[i];
882 if (state.attrlog_dirty) {
883 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
884 state.attrlog_dirty = false;
885 }
886 }
887}
888
889extern "C" { // signal handlers require C-linkage
890
891// Note if we catch a SIGUSR1
892static void USR1handler(int sig)
893{
894 if (SIGUSR1==sig)
896 return;
897}
898
899#ifdef _WIN32
900// Note if we catch a SIGUSR2
901static void USR2handler(int sig)
902{
903 if (SIGUSR2==sig)
904 caughtsigUSR2=1;
905 return;
906}
907#endif
908
909// Note if we catch a HUP (or INT in debug mode)
910static void HUPhandler(int sig)
911{
912 if (sig==SIGHUP)
913 caughtsigHUP=1;
914 else
915 caughtsigHUP=2;
916 return;
917}
918
919// signal handler for TERM, QUIT, and INT (if not in debug mode)
920static void sighandler(int sig)
921{
922 if (!caughtsigEXIT)
923 caughtsigEXIT=sig;
924 return;
925}
926
927} // extern "C"
928
929#ifdef HAVE_LIBCAP_NG
930// capabilities(7) support
931
932static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
933
934static void capabilities_drop_now()
935{
936 if (!capabilities_mode)
937 return;
938 capng_clear(CAPNG_SELECT_BOTH);
939 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
940 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
941 if (warn_as_user && (warn_uid || warn_gid)) {
942 // For popen_as_ugid()
943 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
944 CAP_SETGID, CAP_SETUID, -1);
945 }
946 if (capabilities_mode > 1) {
947 // For exim MTA
948 capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
949 CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
950 }
951 capng_apply(CAPNG_SELECT_BOTH);
952}
953
954static void capabilities_log_error_hint()
955{
956 if (!capabilities_mode)
957 return;
958 PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
959 (capabilities_mode == 1 ? "', try '--capabilities=mail'"
960 : "=mail', please inform " PACKAGE_BUGREPORT));
961}
962
963#else // HAVE_LIBCAP_NG
964// No capabilities(7) support
965
966static inline void capabilities_drop_now() { }
967static inline void capabilities_log_error_hint() { }
968
969#endif // HAVE_LIBCAP_NG
970
971// a replacement for setenv() which is not available on all platforms.
972// Note that the string passed to putenv must not be freed or made
973// invalid, since a pointer to it is kept by putenv(). This means that
974// it must either be a static buffer or allocated off the heap. The
975// string can be freed if the environment variable is redefined via
976// another call to putenv(). There is no portable way to unset a variable
977// with putenv(). So we manage the buffer in a static object.
978// Using setenv() if available is not considered because some
979// implementations may produce memory leaks.
980
982{
983public:
984 env_buffer() = default;
985 env_buffer(const env_buffer &) = delete;
986 void operator=(const env_buffer &) = delete;
987
988 void set(const char * name, const char * value);
989private:
990 char * m_buf = nullptr;
991};
992
993void env_buffer::set(const char * name, const char * value)
994{
995 int size = strlen(name) + 1 + strlen(value) + 1;
996 char * newbuf = new char[size];
997 snprintf(newbuf, size, "%s=%s", name, value);
998
999 if (putenv(newbuf))
1000 throw std::runtime_error("putenv() failed");
1001
1002 // This assumes that the same NAME is passed on each call
1003 delete [] m_buf;
1004 m_buf = newbuf;
1005}
1006
1007#define EBUFLEN 1024
1008
1009static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1011
1012// If either address or executable path is non-null then send and log
1013// a warning email, or execute executable
1014static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1015{
1016 static const char * const whichfail[] = {
1017 "EmailTest", // 0
1018 "Health", // 1
1019 "Usage", // 2
1020 "SelfTest", // 3
1021 "ErrorCount", // 4
1022 "FailedHealthCheck", // 5
1023 "FailedReadSmartData", // 6
1024 "FailedReadSmartErrorLog", // 7
1025 "FailedReadSmartSelfTestLog", // 8
1026 "FailedOpenDevice", // 9
1027 "CurrentPendingSector", // 10
1028 "OfflineUncorrectableSector", // 11
1029 "Temperature" // 12
1030 };
1031
1032 // See if user wants us to send mail
1033 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1034 return;
1035
1036 std::string address = cfg.emailaddress;
1037 const char * executable = cfg.emailcmdline.c_str();
1038
1039 // which type of mail are we sending?
1040 mailinfo * mail=(state.maillog)+which;
1041
1042 // checks for sanity
1043 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
1044 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
1045 return;
1046 }
1047 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
1048 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
1049 which, (int)sizeof(whichfail));
1050 return;
1051 }
1052
1053 // Return if a single warning mail has been sent.
1054 if ((cfg.emailfreq==1) && mail->logged)
1055 return;
1056
1057 // Return if this is an email test and one has already been sent.
1058 if (which == 0 && mail->logged)
1059 return;
1060
1061 // To decide if to send mail, we need to know what time it is.
1062 time_t epoch = time(nullptr);
1063
1064 // Return if less than one day has gone by
1065 const int day = 24*3600;
1066 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1067 return;
1068
1069 // Return if less than 2^(logged-1) days have gone by
1070 if (cfg.emailfreq==3 && mail->logged) {
1071 int days = 0x01 << (mail->logged - 1);
1072 days*=day;
1073 if (epoch<(mail->lastsent+days))
1074 return;
1075 }
1076
1077 // record the time of this mail message, and the first mail message
1078 if (!mail->logged)
1079 mail->firstsent=epoch;
1080 mail->lastsent=epoch;
1081
1082 // print warning string into message
1083 // Note: Message length may reach ~300 characters as device names may be
1084 // very long on certain platforms (macOS ~230 characters).
1085 // Message length must not exceed email line length limit, see RFC 5322:
1086 // "... MUST be no more than 998 characters, ... excluding the CRLF."
1087 char message[512];
1088 va_list ap;
1089 va_start(ap, fmt);
1090 vsnprintf(message, sizeof(message), fmt, ap);
1091 va_end(ap);
1092
1093 // replace commas by spaces to separate recipients
1094 std::replace(address.begin(), address.end(), ',', ' ');
1095
1096 // Export information in environment variables that will be useful
1097 // for user scripts
1098 static env_buffer env[12];
1099 env[0].set("SMARTD_MAILER", executable);
1100 env[1].set("SMARTD_MESSAGE", message);
1101 char dates[DATEANDEPOCHLEN];
1102 snprintf(dates, sizeof(dates), "%d", mail->logged);
1103 env[2].set("SMARTD_PREVCNT", dates);
1104 dateandtimezoneepoch(dates, mail->firstsent);
1105 env[3].set("SMARTD_TFIRST", dates);
1106 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1107 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1108 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1109 env[6].set("SMARTD_ADDRESS", address.c_str());
1110 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1111
1112 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1113 env[8].set("SMARTD_DEVICETYPE",
1114 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1115 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1116
1117 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1118 dates[0] = 0;
1119 if (which) switch (cfg.emailfreq) {
1120 case 2: dates[0] = '1'; dates[1] = 0; break;
1121 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1122 }
1123 env[11].set("SMARTD_NEXTDAYS", dates);
1124
1125 // now construct a command to send this as EMAIL
1126 if (!*executable)
1127 executable = "<mail>";
1128 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1129 const char * newwarn = (which? "Warning via" : "Test of");
1130
1131 char command[256];
1132#ifdef _WIN32
1133 // Path may contain spaces
1134 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1135#else
1136 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1137#endif
1138
1139 // tell SYSLOG what we are about to do...
1140 PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
1141 (which ? "Sending warning via" : "Executing test of"), executable, newadd,
1142 (
1143#ifdef HAVE_POSIX_API
1144 warn_as_user ?
1145 strprintf(" (uid=%u(%s) gid=%u(%s))",
1146 (unsigned)warn_uid, warn_uname.c_str(),
1147 (unsigned)warn_gid, warn_gname.c_str() ).c_str() :
1148#elif defined(_WIN32)
1149 warn_as_restr_user ? " (restricted user)" :
1150#endif
1151 ""
1152 )
1153 );
1154
1155 // issue the command to send mail or to run the user's executable
1156 errno=0;
1157 FILE * pfp;
1158
1159#ifdef HAVE_POSIX_API
1160 if (warn_as_user) {
1161 pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
1162 } else
1163#endif
1164 {
1165#ifdef _WIN32
1166 pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
1167#else
1168 pfp = popen(command, "r");
1169#endif
1170 }
1171
1172 if (!pfp)
1173 // failed to popen() mail process
1174 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1175 newwarn, executable, newadd, errno?strerror(errno):"");
1176 else {
1177 // pipe succeeded!
1178 int len;
1179 char buffer[EBUFLEN];
1180
1181 // if unexpected output on stdout/stderr, null terminate, print, and flush
1182 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1183 int count=0;
1184 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1185 buffer[newlen]='\0';
1186 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1187 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1188
1189 // flush pipe if needed
1190 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1191 count++;
1192
1193 // tell user that pipe was flushed, or that something is really wrong
1194 if (count && count<EBUFLEN)
1195 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1196 newwarn, executable, newadd);
1197 else if (count)
1198 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1199 newwarn, executable, newadd);
1200 }
1201
1202 // if something went wrong with mail process, print warning
1203 errno=0;
1204 int status;
1205
1206#ifdef HAVE_POSIX_API
1207 if (warn_as_user) {
1208 status = pclose_as_ugid(pfp);
1209 } else
1210#endif
1211 {
1212 status = pclose(pfp);
1213 }
1214
1215 if (status == -1)
1216 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1217 errno?strerror(errno):"");
1218 else {
1219 // mail process apparently succeeded. Check and report exit status
1220 if (WIFEXITED(status)) {
1221 // exited 'normally' (but perhaps with nonzero status)
1222 int status8 = WEXITSTATUS(status);
1223 if (status8>128)
1224 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1225 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1226 else if (status8) {
1227 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1228 newwarn, executable, newadd, status, status8);
1230 }
1231 else
1232 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1233 }
1234
1235 if (WIFSIGNALED(status))
1236 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1237 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1238
1239 // this branch is probably not possible. If subprocess is
1240 // stopped then pclose() should not return.
1241 if (WIFSTOPPED(status))
1242 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1243 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1244
1245 }
1246 }
1247
1248 // increment mail sent counter
1249 mail->logged++;
1250}
1251
1252static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1254
1255static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1256{
1257 if (!(0 <= which && which < SMARTD_NMAIL))
1258 return;
1259
1260 // Return if no mail sent yet
1261 mailinfo & mi = state.maillog[which];
1262 if (!mi.logged)
1263 return;
1264
1265 // Format & print message
1266 char msg[256];
1267 va_list ap;
1268 va_start(ap, fmt);
1269 vsnprintf(msg, sizeof(msg), fmt, ap);
1270 va_end(ap);
1271
1272 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1273 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1274
1275 // Clear mail counter and timestamps
1276 mi = mailinfo();
1277 state.must_write = true;
1278}
1279
1280#ifndef _WIN32
1281
1282// Output multiple lines via separate syslog(3) calls.
1284static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1285{
1286 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1287 vsnprintf(buf, sizeof(buf), fmt, ap);
1288
1289 for (char * p = buf, * q; p && *p; p = q) {
1290 if ((q = strchr(p, '\n')))
1291 *q++ = 0;
1292 if (*p)
1293 syslog(priority, "%s\n", p);
1294 }
1295}
1296
1297#else // _WIN32
1298// os_win32/syslog_win32.cpp supports multiple lines.
1299#define vsyslog_lines vsyslog
1300#endif // _WIN32
1301
1302// Printing function for watching ataprint commands, or losing them
1303// [From GLIBC Manual: Since the prototype doesn't specify types for
1304// optional arguments, in a call to a variadic function the default
1305// argument promotions are performed on the optional argument
1306// values. This means the objects of type char or short int (whether
1307// signed or not) are promoted to either int or unsigned int, as
1308// appropriate.]
1309void pout(const char *fmt, ...){
1310 va_list ap;
1311
1312 // get the correct time in syslog()
1314 // initialize variable argument list
1315 va_start(ap,fmt);
1316 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1317 if (debugmode && debugmode != 2) {
1318 FILE * f = stdout;
1319#ifdef _WIN32
1320 if (facility == LOG_LOCAL1) // logging to stdout
1321 f = stderr;
1322#endif
1323 vfprintf(f, fmt, ap);
1324 fflush(f);
1325 }
1326 // in debugmode==2 mode we print output from knowndrives.o functions
1327 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1328 openlog("smartd", LOG_PID, facility);
1329 vsyslog_lines(LOG_INFO, fmt, ap);
1330 closelog();
1331 }
1332 va_end(ap);
1333 return;
1334}
1335
1336// This function prints either to stdout or to the syslog as needed.
1337static void PrintOut(int priority, const char *fmt, ...){
1338 va_list ap;
1339
1340 // get the correct time in syslog()
1342 // initialize variable argument list
1343 va_start(ap,fmt);
1344 if (debugmode) {
1345 FILE * f = stdout;
1346#ifdef _WIN32
1347 if (facility == LOG_LOCAL1) // logging to stdout
1348 f = stderr;
1349#endif
1350 vfprintf(f, fmt, ap);
1351 fflush(f);
1352 }
1353 else {
1354 openlog("smartd", LOG_PID, facility);
1355 vsyslog_lines(priority, fmt, ap);
1356 closelog();
1357 }
1358 va_end(ap);
1359 return;
1360}
1361
1362// Used to warn users about invalid checksums. Called from atacmds.cpp.
1363void checksumwarning(const char * string)
1364{
1365 pout("Warning! %s error: invalid SMART checksum.\n", string);
1366}
1367
1368#ifndef _WIN32
1369
1370// Wait for the pid file to show up, this makes sure a calling program knows
1371// that the daemon is really up and running and has a pid to kill it
1372static bool WaitForPidFile()
1373{
1374 int waited, max_wait = 10;
1375 struct stat stat_buf;
1376
1377 if (pid_file.empty() || debugmode)
1378 return true;
1379
1380 for(waited = 0; waited < max_wait; ++waited) {
1381 if (!stat(pid_file.c_str(), &stat_buf)) {
1382 return true;
1383 } else
1384 sleep(1);
1385 }
1386 return false;
1387}
1388
1389#endif // _WIN32
1390
1391// Forks new process if needed, closes ALL file descriptors,
1392// redirects stdin, stdout, and stderr. Not quite daemon().
1393// See https://www.linuxjournal.com/article/2335
1394// for a good description of why we do things this way.
1395static int daemon_init()
1396{
1397#ifndef _WIN32
1398
1399 // flush all buffered streams. Else we might get two copies of open
1400 // streams since both parent and child get copies of the buffers.
1401 fflush(nullptr);
1402
1403 if (do_fork) {
1404 pid_t pid;
1405 if ((pid=fork()) < 0) {
1406 // unable to fork!
1407 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1408 return EXIT_STARTUP;
1409 }
1410 if (pid) {
1411 // we are the parent process, wait for pid file, then exit cleanly
1412 if(!WaitForPidFile()) {
1413 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1414 return EXIT_STARTUP;
1415 }
1416 return 0;
1417 }
1418
1419 // from here on, we are the child process.
1420 setsid();
1421
1422 // Fork one more time to avoid any possibility of having terminals
1423 if ((pid=fork()) < 0) {
1424 // unable to fork!
1425 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1426 return EXIT_STARTUP;
1427 }
1428 if (pid)
1429 // we are the parent process -- exit cleanly
1430 return 0;
1431
1432 // Now we are the child's child...
1433 }
1434
1435 // close any open file descriptors
1436 for (int i = getdtablesize(); --i >= 0; )
1437 close(i);
1438
1439 // redirect any IO attempts to /dev/null and change to root directory
1440 int fd = open("/dev/null", O_RDWR);
1441 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1442 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1443 return EXIT_STARTUP;
1444 }
1445 umask(0022);
1446
1447 if (do_fork)
1448 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1449
1450#else // _WIN32
1451
1452 // No fork() on native Win32
1453 // Detach this process from console
1454 fflush(nullptr);
1455 if (daemon_detach("smartd")) {
1456 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1457 return EXIT_STARTUP;
1458 }
1459 // stdin/out/err now closed if not redirected
1460
1461#endif // _WIN32
1462
1463 // No error, continue in main_worker()
1464 return -1;
1465}
1466
1467// create a PID file containing the current process id
1468static bool write_pid_file()
1469{
1470 if (!pid_file.empty()) {
1471 pid_t pid = getpid();
1472 mode_t old_umask;
1473#ifndef __CYGWIN__
1474 old_umask = umask(0077); // rwx------
1475#else
1476 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1477 old_umask = umask(0033); // rwxr--r--
1478#endif
1479
1480 stdio_file f(pid_file.c_str(), "w");
1481 umask(old_umask);
1482 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1483 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1484 return false;
1485 }
1486 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1487 }
1488 return true;
1489}
1490
1491// Prints header identifying version of code and home
1492static void PrintHead()
1493{
1494 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1495}
1496
1497// prints help info for configuration file Directives
1498static void Directives()
1499{
1500 PrintOut(LOG_INFO,
1501 "Configuration file (%s) Directives (after device name):\n"
1502 " -d TYPE Set the device type: auto, ignore, removable,\n"
1503 " %s\n"
1504 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1505 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1506 " -S VAL Enable/disable attribute autosave (on/off)\n"
1507 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1508 " -H Monitor SMART Health Status, report if failed\n"
1509 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1510 " -l TYPE Monitor SMART log or self-test status:\n"
1511 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1512 " -l scterc,R,W Set SCT Error Recovery Control\n"
1513 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1514 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1515 " -f Monitor 'Usage' Attributes, report failures\n"
1516 " -m ADD Send email warning to address ADD\n"
1517 " -M TYPE Modify email warning behavior (see man page)\n"
1518 " -p Report changes in 'Prefailure' Attributes\n"
1519 " -u Report changes in 'Usage' Attributes\n"
1520 " -t Equivalent to -p and -u Directives\n"
1521 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1522 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1523 " -i ID Ignore Attribute ID for -f Directive\n"
1524 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1525 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1526 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1527 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1528 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1529 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1530 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1531 " -F TYPE Use firmware bug workaround:\n"
1532 " %s\n"
1533 " -c i=N Set interval between disk checks to N seconds\n"
1534 " # Comment: text after a hash sign is ignored\n"
1535 " \\ Line continuation character\n"
1536 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1537 "Use ID = 0 to turn off -C and/or -U Directives\n"
1538 "Example: /dev/sda -a\n",
1539 configfile,
1540 smi()->get_valid_dev_types_str().c_str(),
1542}
1543
1544/* Returns a pointer to a static string containing a formatted list of the valid
1545 arguments to the option opt or nullptr on failure. */
1546static const char *GetValidArgList(char opt)
1547{
1548 switch (opt) {
1549 case 'A':
1550 case 's':
1551 return "<PATH_PREFIX>, -";
1552 case 'B':
1553 return "[+]<FILE_NAME>";
1554 case 'c':
1555 return "<FILE_NAME>, -";
1556 case 'l':
1557 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1558 case 'q':
1559 return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
1560 case 'r':
1561 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1562 case 'p':
1563 case 'w':
1564 return "<FILE_NAME>";
1565 case 'i':
1566 return "<INTEGER_SECONDS>";
1567#ifdef HAVE_POSIX_API
1568 case 'u':
1569 return "<USER>[:<GROUP>], -";
1570#elif defined(_WIN32)
1571 case 'u':
1572 return "restricted, unchanged";
1573#endif
1574#ifdef HAVE_LIBCAP_NG
1575 case 'C':
1576 return "mail, <no_argument>";
1577#endif
1578 default:
1579 return nullptr;
1580 }
1581}
1582
1583/* prints help information for command syntax */
1584static void Usage()
1585{
1586 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1587#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1588 PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
1589#else
1590 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1591#endif
1592 PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
1593#ifdef SMARTMONTOOLS_ATTRIBUTELOG
1594 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
1595#endif
1596 PrintOut(LOG_INFO,"\n");
1597 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1598 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1599 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1600#ifdef SMARTMONTOOLS_DRIVEDBDIR
1601 PrintOut(LOG_INFO,"\n");
1602 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1603#endif
1604 PrintOut(LOG_INFO,"]\n\n");
1605 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1606 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1607 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1608#ifdef HAVE_LIBCAP_NG
1609 PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
1610 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1611 " Warning: Mail notification may not work when used.\n\n");
1612#endif
1613 PrintOut(LOG_INFO," -d, --debug\n");
1614 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1615 PrintOut(LOG_INFO," -D, --showdirectives\n");
1616 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1617 PrintOut(LOG_INFO," -h, --help, --usage\n");
1618 PrintOut(LOG_INFO," Display this help and exit\n\n");
1619 PrintOut(LOG_INFO," -i N, --interval=N\n");
1620 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1621 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1622#ifndef _WIN32
1623 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1624#else
1625 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1626#endif
1627#ifndef _WIN32
1628 PrintOut(LOG_INFO," -n, --no-fork\n");
1629 PrintOut(LOG_INFO," Do not fork into background\n");
1630#ifdef HAVE_LIBSYSTEMD
1631 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1632#endif // HAVE_LIBSYSTEMD
1633 PrintOut(LOG_INFO,"\n");
1634#endif // WIN32
1635 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1636 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1637 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1638 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1639 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1640 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1641#ifdef SMARTMONTOOLS_SAVESTATES
1642 PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
1643#else
1644 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1645#endif
1646 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1647#ifdef SMARTMONTOOLS_SAVESTATES
1648 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1649#endif
1650 PrintOut(LOG_INFO,"\n");
1651 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1652 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1653#ifndef _WIN32
1654 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1655#else
1656 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1657#endif
1658#ifdef HAVE_POSIX_API
1659 PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
1660 PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
1661#elif defined(_WIN32)
1662 PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
1663 PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
1664#endif
1665#ifdef _WIN32
1666 PrintOut(LOG_INFO," --service\n");
1667 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1668 PrintOut(LOG_INFO," smartd install [options]\n");
1669 PrintOut(LOG_INFO," Remove service with:\n");
1670 PrintOut(LOG_INFO," smartd remove\n\n");
1671#endif // _WIN32
1672 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1673 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1674}
1675
1676static int CloseDevice(smart_device * device, const char * name)
1677{
1678 if (!device->close()){
1679 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1680 return 1;
1681 }
1682 // device successfully closed
1683 return 0;
1684}
1685
1686// Replace invalid characters in cfg.dev_idinfo
1687static bool sanitize_dev_idinfo(std::string & s)
1688{
1689 bool changed = false;
1690 for (unsigned i = 0; i < s.size(); i++) {
1691 char c = s[i];
1692 STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
1693 // Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
1694 if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
1695 continue;
1696 s[i] = '?';
1697 changed = true;
1698 }
1699 return changed;
1700}
1701
1702// return true if a char is not allowed in a state file name
1703static bool not_allowed_in_filename(char c)
1704{
1705 return !( ('0' <= c && c <= '9')
1706 || ('A' <= c && c <= 'Z')
1707 || ('a' <= c && c <= 'z'));
1708}
1709
1710// Read error count from Summary or Extended Comprehensive SMART error log
1711// Return -1 on error
1712static int read_ata_error_count(ata_device * device, const char * name,
1713 firmwarebug_defs firmwarebugs, bool extended)
1714{
1715 if (!extended) {
1717 if (ataReadErrorLog(device, &log, firmwarebugs)){
1718 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1719 return -1;
1720 }
1721 return (log.error_log_pointer ? log.ata_error_count : 0);
1722 }
1723 else {
1725 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1726 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1727 return -1;
1728 }
1729 // Some disks use the reserved byte as index, see ataprint.cpp.
1730 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1731 }
1732}
1733
1734// returns <0 if problem. Otherwise, bottom 8 bits are the self test
1735// error count, and top bits are the power-on hours of the last error.
1736static int SelfTestErrorCount(ata_device * device, const char * name,
1737 firmwarebug_defs firmwarebugs)
1738{
1739 struct ata_smart_selftestlog log;
1740
1741 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1742 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1743 return -1;
1744 }
1745
1746 if (!log.mostrecenttest)
1747 // No tests logged
1748 return 0;
1749
1750 // Count failed self-tests
1751 int errcnt = 0, hours = 0;
1752 for (int i = 20; i >= 0; i--) {
1753 int j = (i + log.mostrecenttest) % 21;
1755 if (!nonempty(&entry, sizeof(entry)))
1756 continue;
1757
1758 int status = entry.selfteststatus >> 4;
1759 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1760 // First successful extended self-test, stop count
1761 break;
1762
1763 if (0x3 <= status && status <= 0x8) {
1764 // Self-test showed an error
1765 errcnt++;
1766 // Keep track of time of most recent error
1767 if (!hours)
1768 hours = entry.timestamp;
1769 }
1770 }
1771
1772 return ((hours << 8) | errcnt);
1773}
1774
1775#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1776#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1777
1778// Check offline data collection status
1779static inline bool is_offl_coll_in_progress(unsigned char status)
1780{
1781 return ((status & 0x7f) == 0x03);
1782}
1783
1784// Check self-test execution status
1785static inline bool is_self_test_in_progress(unsigned char status)
1786{
1787 return ((status >> 4) == 0xf);
1788}
1789
1790// Log offline data collection status
1791static void log_offline_data_coll_status(const char * name, unsigned char status)
1792{
1793 const char * msg;
1794 switch (status & 0x7f) {
1795 case 0x00: msg = "was never started"; break;
1796 case 0x02: msg = "was completed without error"; break;
1797 case 0x03: msg = "is in progress"; break;
1798 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1799 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1800 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1801 default: msg = nullptr;
1802 }
1803
1804 if (msg)
1805 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1806 "Device: %s, offline data collection %s%s\n", name, msg,
1807 ((status & 0x80) ? " (auto:on)" : ""));
1808 else
1809 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1810 name, status);
1811}
1812
1813// Log self-test execution status
1814static void log_self_test_exec_status(const char * name, unsigned char status)
1815{
1816 const char * msg;
1817 switch (status >> 4) {
1818 case 0x0: msg = "completed without error"; break;
1819 case 0x1: msg = "was aborted by the host"; break;
1820 case 0x2: msg = "was interrupted by the host with a reset"; break;
1821 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1822 case 0x4: msg = "completed with error (unknown test element)"; break;
1823 case 0x5: msg = "completed with error (electrical test element)"; break;
1824 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1825 case 0x7: msg = "completed with error (read test element)"; break;
1826 case 0x8: msg = "completed with error (handling damage?)"; break;
1827 default: msg = nullptr;
1828 }
1829
1830 if (msg)
1831 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1832 "Device: %s, previous self-test %s\n", name, msg);
1833 else if ((status >> 4) == 0xf)
1834 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1835 name, status & 0x0f);
1836 else
1837 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1838 name, status);
1839}
1840
1841// Check pending sector count id (-C, -U directives).
1842static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1843 unsigned char id, const char * msg)
1844{
1845 // Check attribute index
1846 int i = ata_find_attr_index(id, state.smartval);
1847 if (i < 0) {
1848 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1849 cfg.name.c_str(), msg, id);
1850 return false;
1851 }
1852
1853 // Check value
1854 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1855 cfg.attribute_defs);
1856 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1857 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1858 cfg.name.c_str(), msg, id, rawval, rawval);
1859 return false;
1860 }
1861
1862 return true;
1863}
1864
1865// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
1866static void finish_device_scan(dev_config & cfg, dev_state & state)
1867{
1868 // Set cfg.emailfreq if user hasn't set it
1869 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1870 // Avoid that emails are suppressed forever due to state persistence
1871 if (cfg.state_file.empty())
1872 cfg.emailfreq = 1; // '-M once'
1873 else
1874 cfg.emailfreq = 2; // '-M daily'
1875 }
1876
1877 // Start self-test regex check now if time was not read from state file
1878 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1879 state.scheduled_test_next_check = time(nullptr);
1880}
1881
1882// Common function to format result message for ATA setting
1883static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1884 int set_option = 0, bool has_value = false)
1885{
1886 if (!msg.empty())
1887 msg += ", ";
1888 msg += name;
1889 if (!ok)
1890 msg += ":--";
1891 else if (set_option < 0)
1892 msg += ":off";
1893 else if (has_value)
1894 msg += strprintf(":%d", set_option-1);
1895 else if (set_option > 0)
1896 msg += ":on";
1897}
1898
1899// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
1900static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1901{
1902 if (!cfg.id_is_unique)
1903 return false;
1904
1905 for (const auto & prev_cfg : prev_cfgs) {
1906 if (!prev_cfg.id_is_unique)
1907 continue;
1908 if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
1909 continue;
1910
1911 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1912 cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
1913 return true;
1914 }
1915
1916 return false;
1917}
1918
1919// TODO: Add '-F swapid' directive
1920const bool fix_swapped_id = false;
1921
1922// scan to see what ata devices there are, and if they support SMART
1923static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1924 const dev_config_vector * prev_cfgs)
1925{
1926 int supported=0;
1927 struct ata_identify_device drive;
1928 const char *name = cfg.name.c_str();
1929 int retid;
1930
1931 // Device must be open
1932
1933 // Get drive identity structure
1934 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1935 if (retid<0)
1936 // Unable to read Identity structure
1937 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1938 else
1939 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1940 name, packetdevicetype(retid-1));
1941 CloseDevice(atadev, name);
1942 return 2;
1943 }
1944
1945 // Get drive identity, size and rotation rate (HDD/SSD)
1946 char model[40+1], serial[20+1], firmware[8+1];
1947 ata_format_id_string(model, drive.model, sizeof(model)-1);
1948 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1949 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1950
1951 ata_size_info sizes;
1952 ata_get_size_info(&drive, sizes);
1953 state.num_sectors = sizes.sectors;
1954 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1955
1956 char wwn[64]; wwn[0] = 0;
1957 unsigned oui = 0; uint64_t unique_id = 0;
1958 int naa = ata_get_wwn(&drive, oui, unique_id);
1959 if (naa >= 0)
1960 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1961
1962 // Format device id string for warning emails
1963 char cap[32];
1964 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1965 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1966 cfg.id_is_unique = true; // TODO: Check serial?
1968 cfg.id_is_unique = false;
1969
1970 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1971
1972 // Check for duplicates
1973 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1974 CloseDevice(atadev, name);
1975 return 1;
1976 }
1977
1978 // Show if device in database, and use preset vendor attribute
1979 // options unless user has requested otherwise.
1980 if (cfg.ignorepresets)
1981 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1982 else {
1983 // Apply vendor specific presets, print warning if present
1984 std::string dbversion;
1986 &drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
1987 if (!dbentry)
1988 PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
1989 (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
1990 else {
1991 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
1992 name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
1993 (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1994 if (*dbentry->warningmsg)
1995 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1996 }
1997 }
1998
1999 // Check for ATA Security LOCK
2000 unsigned short word128 = drive.words088_255[128-88];
2001 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
2002 if (locked)
2003 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
2004
2005 // Set default '-C 197[+]' if no '-C ID' is specified.
2006 if (!cfg.curr_pending_set)
2008 // Set default '-U 198[+]' if no '-U ID' is specified.
2009 if (!cfg.offl_pending_set)
2011
2012 // If requested, show which presets would be used for this drive
2013 if (cfg.showpresets) {
2014 int savedebugmode=debugmode;
2015 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2016 if (!debugmode)
2017 debugmode=2;
2018 show_presets(&drive);
2019 debugmode=savedebugmode;
2020 }
2021
2022 // see if drive supports SMART
2023 supported=ataSmartSupport(&drive);
2024 if (supported!=1) {
2025 if (supported==0)
2026 // drive does NOT support SMART
2027 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2028 else
2029 // can't tell if drive supports SMART
2030 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2031
2032 // should we proceed anyway?
2033 if (cfg.permissive) {
2034 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2035 }
2036 else {
2037 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2038 CloseDevice(atadev, name);
2039 return 2;
2040 }
2041 }
2042
2043 if (ataEnableSmart(atadev)) {
2044 // Enable SMART command has failed
2045 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2046
2047 if (ataIsSmartEnabled(&drive) <= 0) {
2048 if (!cfg.permissive) {
2049 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2050 CloseDevice(atadev, name);
2051 return 2;
2052 }
2053 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2054 }
2055 else {
2056 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2057 }
2058 }
2059
2060 // disable device attribute autosave...
2061 if (cfg.autosave==1) {
2062 if (ataDisableAutoSave(atadev))
2063 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2064 else
2065 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2066 }
2067
2068 // or enable device attribute autosave
2069 if (cfg.autosave==2) {
2070 if (ataEnableAutoSave(atadev))
2071 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2072 else
2073 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2074 }
2075
2076 // capability check: SMART status
2077 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2078 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2079 cfg.smartcheck = false;
2080 }
2081
2082 // capability check: Read smart values and thresholds. Note that
2083 // smart values are ALSO needed even if we ONLY want to know if the
2084 // device is self-test log or error-log capable! After ATA-5, this
2085 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2086 // but sadly not for ATA-5. Sigh.
2087
2088 // do we need to get SMART data?
2089 bool smart_val_ok = false;
2090 if ( cfg.autoofflinetest || cfg.selftest
2091 || cfg.errorlog || cfg.xerrorlog
2092 || cfg.offlinests || cfg.selfteststs
2093 || cfg.usagefailed || cfg.prefail || cfg.usage
2094 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2095 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2096
2097 if (ataReadSmartValues(atadev, &state.smartval)) {
2098 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2099 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2100 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2101 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2102 }
2103 else {
2104 smart_val_ok = true;
2105 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2106 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2107 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2108 cfg.usagefailed = false;
2109 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2110 memset(&state.smartthres, 0, sizeof(state.smartthres));
2111 }
2112 }
2113
2114 // see if the necessary Attribute is there to monitor offline or
2115 // current pending sectors or temperature
2116 if ( cfg.curr_pending_id
2117 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2118 "Current_Pending_Sector"))
2119 cfg.curr_pending_id = 0;
2120
2121 if ( cfg.offl_pending_id
2122 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2123 "Offline_Uncorrectable"))
2124 cfg.offl_pending_id = 0;
2125
2126 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2128 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2129 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2130 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2131 }
2132
2133 // Report ignored '-r' or '-R' directives
2134 for (int id = 1; id <= 255; id++) {
2136 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2137 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2138 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2139
2140 int idx = ata_find_attr_index(id, state.smartval);
2141 if (idx < 0)
2142 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2143 else {
2144 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2145 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2146 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2147 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2148 }
2149 }
2150 }
2151 }
2152
2153 // enable/disable automatic on-line testing
2154 if (cfg.autoofflinetest) {
2155 // is this an enable or disable request?
2156 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2157 if (!smart_val_ok)
2158 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2159 else {
2160 // if command appears unsupported, issue a warning...
2161 if (!isSupportAutomaticTimer(&state.smartval))
2162 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2163 // ... but then try anyway
2164 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2165 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2166 else
2167 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2168 }
2169 }
2170
2171 // Read log directories if required for capability check
2172 ata_smart_log_directory smart_logdir, gp_logdir;
2173 bool smart_logdir_ok = false, gp_logdir_ok = false;
2174
2176 && (cfg.errorlog || cfg.selftest)
2177 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2178 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2179 smart_logdir_ok = true;
2180 }
2181
2182 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2183 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2184 gp_logdir_ok = true;
2185 }
2186
2187 // capability check: self-test-log
2188 state.selflogcount = 0; state.selfloghour = 0;
2189 if (cfg.selftest) {
2190 int retval;
2191 if (!( cfg.permissive
2192 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2193 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2194 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2195 cfg.selftest = false;
2196 }
2197 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2198 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2199 cfg.selftest = false;
2200 }
2201 else {
2202 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2203 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2204 }
2205 }
2206
2207 // capability check: ATA error log
2208 state.ataerrorcount = 0;
2209 if (cfg.errorlog) {
2210 int errcnt1;
2211 if (!( cfg.permissive
2212 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2213 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2214 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2215 cfg.errorlog = false;
2216 }
2217 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2218 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2219 cfg.errorlog = false;
2220 }
2221 else
2222 state.ataerrorcount = errcnt1;
2223 }
2224
2225 if (cfg.xerrorlog) {
2226 int errcnt2;
2227 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2228 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2229 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2230 name);
2231 cfg.xerrorlog = false;
2232 }
2233 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2234 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2235 cfg.xerrorlog = false;
2236 }
2237 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2238 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2239 name, state.ataerrorcount, errcnt2);
2240 // Record max error count
2241 if (errcnt2 > state.ataerrorcount)
2242 state.ataerrorcount = errcnt2;
2243 }
2244 else
2245 state.ataerrorcount = errcnt2;
2246 }
2247
2248 // capability check: self-test and offline data collection status
2249 if (cfg.offlinests || cfg.selfteststs) {
2250 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2251 if (cfg.offlinests)
2252 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2253 if (cfg.selfteststs)
2254 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2255 cfg.offlinests = cfg.selfteststs = false;
2256 }
2257 }
2258
2259 // capabilities check -- does it support powermode?
2260 if (cfg.powermode) {
2261 int powermode = ataCheckPowerMode(atadev);
2262
2263 if (-1 == powermode) {
2264 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2265 cfg.powermode=0;
2266 }
2267 else if (powermode!=0x00 && powermode!=0x01
2268 && powermode!=0x40 && powermode!=0x41
2269 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2270 && powermode!=0xff) {
2271 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2272 name, powermode);
2273 cfg.powermode=0;
2274 }
2275 }
2276
2277 // Apply ATA settings
2278 std::string msg;
2279
2280 if (cfg.set_aam)
2281 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2282 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2283 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2284
2285 if (cfg.set_apm)
2286 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2287 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2288 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2289
2290 if (cfg.set_lookahead)
2291 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2293 cfg.set_lookahead);
2294
2295 if (cfg.set_wcache)
2296 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2298
2299 if (cfg.set_dsn)
2300 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2301 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2302
2303 if (cfg.set_security_freeze)
2304 format_set_result_msg(msg, "Security freeze",
2306
2307 if (cfg.set_standby)
2308 format_set_result_msg(msg, "Standby",
2309 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2310
2311 // Report as one log entry
2312 if (!msg.empty())
2313 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2314
2315 // set SCT Error Recovery Control if requested
2316 if (cfg.sct_erc_set) {
2318 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2319 name);
2320 else if (locked)
2321 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2322 name);
2323 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
2324 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
2325 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2326 else
2327 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2328 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2329 }
2330
2331 // If no tests available or selected, return
2332 if (!( cfg.smartcheck || cfg.selftest
2333 || cfg.errorlog || cfg.xerrorlog
2334 || cfg.offlinests || cfg.selfteststs
2335 || cfg.usagefailed || cfg.prefail || cfg.usage
2336 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2337 CloseDevice(atadev, name);
2338 return 3;
2339 }
2340
2341 // tell user we are registering device
2342 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2343
2344 // close file descriptor
2345 CloseDevice(atadev, name);
2346
2347 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2348 // Build file name for state file
2349 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2350 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2351 if (!state_path_prefix.empty()) {
2352 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2353 // Read previous state
2354 if (read_dev_state(cfg.state_file.c_str(), state)) {
2355 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2356 // Copy ATA attribute values to temp state
2357 state.update_temp_state();
2358 }
2359 }
2360 if (!attrlog_path_prefix.empty())
2361 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2362 }
2363
2364 finish_device_scan(cfg, state);
2365
2366 return 0;
2367}
2368
2369// on success, return 0. On failure, return >0. Never return <0,
2370// please.
2371static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2372 const dev_config_vector * prev_cfgs)
2373{
2374 int err, req_len, avail_len, version, len;
2375 const char *device = cfg.name.c_str();
2376 struct scsi_iec_mode_page iec;
2377 uint8_t tBuf[64];
2378 uint8_t inqBuf[96];
2379 uint8_t vpdBuf[252];
2380 char lu_id[64], serial[256], vendor[40], model[40];
2381
2382 // Device must be open
2383 memset(inqBuf, 0, 96);
2384 req_len = 36;
2385 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2386 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2387 req_len = 64;
2388 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2389 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2390 "skip device\n", device);
2391 return 2;
2392 }
2393 }
2394 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2395
2396 avail_len = inqBuf[4] + 5;
2397 len = (avail_len < req_len) ? avail_len : req_len;
2398 if (len < 36) {
2399 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2400 "skip device\n", device);
2401 return 2;
2402 }
2403
2404 int pdt = inqBuf[0] & 0x1f;
2405
2406 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2407 (0xe == pdt))) {
2408 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2409 "skip\n", device, pdt);
2410 return 2;
2411 }
2412
2414 delete supported_vpd_pages_p;
2415 supported_vpd_pages_p = nullptr;
2416 }
2418
2419 lu_id[0] = '\0';
2420 if ((version >= 0x3) && (version < 0x8)) {
2421 /* SPC to SPC-5 */
2423 vpdBuf, sizeof(vpdBuf))) {
2424 len = vpdBuf[3];
2425 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
2426 }
2427 }
2428 serial[0] = '\0';
2430 vpdBuf, sizeof(vpdBuf))) {
2431 len = vpdBuf[3];
2432 vpdBuf[4 + len] = '\0';
2433 scsi_format_id_string(serial, &vpdBuf[4], len);
2434 }
2435
2436 char si_str[64];
2437 struct scsi_readcap_resp srr;
2438 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2439
2440 if (capacity)
2441 format_capacity(si_str, sizeof(si_str), capacity, ".");
2442 else
2443 si_str[0] = '\0';
2444
2445 // Format device id string for warning emails
2446 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2447 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2448 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2449 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2450 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2451 cfg.id_is_unique = (lu_id[0] || serial[0]);
2453 cfg.id_is_unique = false;
2454
2455 // format "model" string
2456 scsi_format_id_string(vendor, &inqBuf[8], 8);
2457 scsi_format_id_string(model, &inqBuf[16], 16);
2458 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2459
2460 // Check for duplicates
2461 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2462 CloseDevice(scsidev, device);
2463 return 1;
2464 }
2465
2466 // check that device is ready for commands. IE stores its stuff on
2467 // the media.
2468 if ((err = scsiTestUnitReady(scsidev))) {
2469 if (SIMPLE_ERR_NOT_READY == err)
2470 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2471 else if (SIMPLE_ERR_NO_MEDIUM == err)
2472 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2473 else if (SIMPLE_ERR_BECOMING_READY == err)
2474 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2475 else
2476 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2477 CloseDevice(scsidev, device);
2478 return 2;
2479 }
2480
2481 // Badly-conforming USB storage devices may fail this check.
2482 // The response to the following IE mode page fetch (current and
2483 // changeable values) is carefully examined. It has been found
2484 // that various USB devices that malform the response will lock up
2485 // if asked for a log page (e.g. temperature) so it is best to
2486 // bail out now.
2487 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2488 state.modese_len = iec.modese_len;
2489 else if (SIMPLE_ERR_BAD_FIELD == err)
2490 ; /* continue since it is reasonable not to support IE mpage */
2491 else { /* any other error (including malformed response) unreasonable */
2492 PrintOut(LOG_INFO,
2493 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2494 device, err);
2495 CloseDevice(scsidev, device);
2496 return 3;
2497 }
2498
2499 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2500 // smart if it is off). This may change to be the same as the ATA side.
2501 if (!scsi_IsExceptionControlEnabled(&iec)) {
2502 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2503 "Try 'smartctl -s on %s' to turn on SMART features\n",
2504 device, device);
2505 CloseDevice(scsidev, device);
2506 return 3;
2507 }
2508
2509 // Flag that certain log pages are supported (information may be
2510 // available from other sources).
2511 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2512 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2513 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2514 {
2515 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2516 switch (tBuf[k]) {
2517 case TEMPERATURE_LPAGE:
2518 state.TempPageSupported = 1;
2519 break;
2520 case IE_LPAGE:
2521 state.SmartPageSupported = 1;
2522 break;
2524 state.ReadECounterPageSupported = 1;
2525 break;
2528 break;
2531 break;
2534 break;
2535 default:
2536 break;
2537 }
2538 }
2539 }
2540
2541 // Check if scsiCheckIE() is going to work
2542 {
2543 uint8_t asc = 0;
2544 uint8_t ascq = 0;
2545 uint8_t currenttemp = 0;
2546 uint8_t triptemp = 0;
2547
2548 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2549 &asc, &ascq, &currenttemp, &triptemp)) {
2550 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2551 state.SuppressReport = 1;
2552 }
2553 if ( (state.SuppressReport || !currenttemp)
2554 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2555 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2556 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2557 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2558 }
2559 }
2560
2561 // capability check: self-test-log
2562 if (cfg.selftest){
2563 int retval = scsiCountFailedSelfTests(scsidev, 0);
2564 if (retval<0) {
2565 // no self-test log, turn off monitoring
2566 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2567 cfg.selftest = false;
2568 state.selflogcount = 0;
2569 state.selfloghour = 0;
2570 }
2571 else {
2572 // register starting values to watch for changes
2573 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2574 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2575 }
2576 }
2577
2578 // disable autosave (set GLTSD bit)
2579 if (cfg.autosave==1){
2580 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2581 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2582 else
2583 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2584 }
2585
2586 // or enable autosave (clear GLTSD bit)
2587 if (cfg.autosave==2){
2588 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2589 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2590 else
2591 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2592 }
2593
2594 // tell user we are registering device
2595 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2596
2597 // Make sure that init_standby_check() ignores SCSI devices
2598 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2599
2600 // close file descriptor
2601 CloseDevice(scsidev, device);
2602
2603 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2604 // Build file name for state file
2605 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2606 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2607 if (!state_path_prefix.empty()) {
2608 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2609 // Read previous state
2610 if (read_dev_state(cfg.state_file.c_str(), state)) {
2611 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2612 // Copy ATA attribute values to temp state
2613 state.update_temp_state();
2614 }
2615 }
2616 if (!attrlog_path_prefix.empty())
2617 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2618 }
2619
2620 finish_device_scan(cfg, state);
2621
2622 return 0;
2623}
2624
2625// Convert 128 bit LE integer to uint64_t or its max value on overflow.
2626static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2627{
2628 for (int i = 8; i < 16; i++) {
2629 if (val[i])
2630 return ~(uint64_t)0;
2631 }
2632 uint64_t lo = val[7];
2633 for (int i = 7-1; i >= 0; i--) {
2634 lo <<= 8; lo += val[i];
2635 }
2636 return lo;
2637}
2638
2639// Get max temperature in Kelvin reported in NVMe SMART/Health log.
2640static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2641{
2642 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2643 for (auto s : smart_log.temp_sensor) {
2644 if (s > k)
2645 k = s; // cppcheck-suppress useStlAlgorithm
2646 }
2647 return k;
2648}
2649
2650static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2651 const dev_config_vector * prev_cfgs)
2652{
2653 const char *name = cfg.name.c_str();
2654
2655 // Device must be open
2656
2657 // Get ID Controller
2658 nvme_id_ctrl id_ctrl;
2659 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2660 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2661 CloseDevice(nvmedev, name);
2662 return 2;
2663 }
2664
2665 // Get drive identity
2666 char model[40+1], serial[20+1], firmware[8+1];
2667 format_char_array(model, id_ctrl.mn);
2668 format_char_array(serial, id_ctrl.sn);
2669 format_char_array(firmware, id_ctrl.fr);
2670
2671 // Format device id string for warning emails
2672 char nsstr[32] = "", capstr[32] = "";
2673 unsigned nsid = nvmedev->get_nsid();
2674 if (nsid != 0xffffffff)
2675 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2676 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2677 if (capacity)
2678 format_capacity(capstr, sizeof(capstr), capacity, ".");
2679 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2680 nsstr, (capstr[0] ? ", " : ""), capstr);
2681 cfg.id_is_unique = true; // TODO: Check serial?
2683 cfg.id_is_unique = false;
2684
2685 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2686
2687 // Check for duplicates
2688 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2689 CloseDevice(nvmedev, name);
2690 return 1;
2691 }
2692
2693 // Read SMART/Health log
2694 nvme_smart_log smart_log;
2695 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2696 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2697 CloseDevice(nvmedev, name);
2698 return 2;
2699 }
2700
2701 // Check temperature sensor support
2702 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2703 if (!nvme_get_max_temp_kelvin(smart_log)) {
2704 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2705 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2706 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2707 }
2708 }
2709
2710 // Init total error count
2711 if (cfg.errorlog || cfg.xerrorlog) {
2713 }
2714
2715 // If no supported tests selected, return
2716 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2717 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2718 CloseDevice(nvmedev, name);
2719 return 3;
2720 }
2721
2722 // Tell user we are registering device
2723 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2724
2725 // Make sure that init_standby_check() ignores NVMe devices
2726 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2727
2728 CloseDevice(nvmedev, name);
2729
2730 if (!state_path_prefix.empty()) {
2731 // Build file name for state file
2732 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2733 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2734 nsstr[0] = 0;
2735 if (nsid != 0xffffffff)
2736 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2737 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2738 // Read previous state
2739 if (read_dev_state(cfg.state_file.c_str(), state))
2740 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2741 }
2742
2743 finish_device_scan(cfg, state);
2744
2745 return 0;
2746}
2747
2748// Open device for next check, return false on error
2749static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2750 const char * type)
2751{
2752 const char * name = cfg.name.c_str();
2753
2754 // If user has asked, test the email warning system
2755 if (cfg.emailtest)
2756 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2757
2758 // User may have requested (with the -n Directive) to leave the disk
2759 // alone if it is in idle or standby mode. In this case check the
2760 // power mode first before opening the device for full access,
2761 // and exit without check if disk is reported in standby.
2762 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2763 // Note that 'is_powered_down()' handles opening the device itself, and
2764 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2765 if (device->is_powered_down())
2766 {
2767 // skip at most powerskipmax checks
2768 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2769 // report first only except if state has changed, avoid waking up system disk
2770 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2771 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2772 state.lastpowermodeskipped = -1;
2773 }
2774 state.powerskipcnt++;
2775 return false;
2776 }
2777 }
2778 }
2779
2780 // if we can't open device, fail gracefully rather than hard --
2781 // perhaps the next time around we'll be able to open it
2782 if (!device->open()) {
2783 // For removable devices, print error message only once and suppress email
2784 if (!cfg.removable) {
2785 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2786 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2787 }
2788 else if (!state.removed) {
2789 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2790 state.removed = true;
2791 }
2792 else if (debugmode)
2793 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2794 return false;
2795 }
2796
2797 if (debugmode)
2798 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2799
2800 if (!cfg.removable)
2801 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2802 else if (state.removed) {
2803 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2804 state.removed = false;
2805 }
2806
2807 return true;
2808}
2809
2810// If the self-test log has got more self-test errors (or more recent
2811// self-test errors) recorded, then notify user.
2812static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2813{
2814 const char * name = cfg.name.c_str();
2815
2816 if (newi<0)
2817 // command failed
2818 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2819 else {
2820 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2821
2822 // old and new error counts
2823 int oldc=state.selflogcount;
2824 int newc=SELFTEST_ERRORCOUNT(newi);
2825
2826 // old and new error timestamps in hours
2827 int oldh=state.selfloghour;
2828 int newh=SELFTEST_ERRORHOURS(newi);
2829
2830 if (oldc<newc) {
2831 // increase in error count
2832 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2833 name, oldc, newc);
2834 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2835 name, oldc, newc);
2836 state.must_write = true;
2837 }
2838 else if (newc > 0 && oldh != newh) {
2839 // more recent error
2840 // a 'more recent' error might actually be a smaller hour number,
2841 // if the hour number has wrapped.
2842 // There's still a bug here. You might just happen to run a new test
2843 // exactly 32768 hours after the previous failure, and have run exactly
2844 // 20 tests between the two, in which case smartd will miss the
2845 // new failure.
2846 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2847 name, newh);
2848 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2849 name, newh);
2850 state.must_write = true;
2851 }
2852
2853 // Print info if error entries have disappeared
2854 // or newer successful successful extended self-test exits
2855 if (oldc > newc) {
2856 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2857 name, oldc, newc);
2858 if (newc == 0)
2859 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2860 }
2861
2862 // Needed since self-test error count may DECREASE. Hour might
2863 // also have changed.
2864 state.selflogcount= newc;
2865 state.selfloghour = newh;
2866 }
2867 return;
2868}
2869
2870// Test types, ordered by priority.
2871static const char test_type_chars[] = "LncrSCO";
2872static const unsigned num_test_types = sizeof(test_type_chars)-1;
2873
2874// returns test type if time to do test of type testtype,
2875// 0 if not time to do test.
2876static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2877{
2878 // check that self-testing has been requested
2879 if (cfg.test_regex.empty())
2880 return 0;
2881
2882 // Exit if drive not capable of any test
2883 if ( state.not_cap_long && state.not_cap_short &&
2884 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2885 return 0;
2886
2887 // since we are about to call localtime(), be sure glibc is informed
2888 // of any timezone changes we make.
2889 if (!usetime)
2891
2892 // Is it time for next check?
2893 time_t now = (!usetime ? time(nullptr) : usetime);
2894 if (now < state.scheduled_test_next_check) {
2895 if (state.scheduled_test_next_check <= now + 3600)
2896 return 0; // Next check within one hour
2897 // More than one hour, assume system clock time adjusted to the past
2898 state.scheduled_test_next_check = now;
2899 }
2900 else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
2901 // Limit time check interval to 90 days
2902 state.scheduled_test_next_check = now - (3600L*24*90);
2903 }
2904
2905 // Find ':NNN[-LLL]' in regex for possible offsets and limits
2906 const unsigned max_offsets = 1 + num_test_types;
2907 unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
2908 unsigned num_offsets = 1; // offsets/limits[0] == 0 always
2909 for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
2910 const char * q = strchr(p, ':');
2911 if (!q)
2912 break;
2913 p = q + 1;
2914 unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
2915 sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
2916 if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
2917 continue;
2918 offsets[num_offsets] = offset; limits[num_offsets] = limit;
2919 num_offsets++;
2920 p += (n3 > 0 ? n3 : n1);
2921 }
2922
2923 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2924 char testtype = 0;
2925 time_t testtime = 0; int testhour = 0;
2926 int maxtest = num_test_types-1;
2927
2928 for (time_t t = state.scheduled_test_next_check; ; ) {
2929 // Check offset 0 and then all offsets for ':NNN' found above
2930 for (unsigned i = 0; i < num_offsets; i++) {
2931 unsigned offset = offsets[i], limit = limits[i];
2932 unsigned delay = cfg.test_offset_factor * offset;
2933 if (0 < limit && limit < delay)
2934 delay %= limit + 1;
2935 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
2936
2937 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2938 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2939 for (int j = 0; j <= maxtest; j++) {
2940 // Skip if drive not capable of this test
2941 switch (test_type_chars[j]) {
2942 case 'L': if (state.not_cap_long) continue; break;
2943 case 'S': if (state.not_cap_short) continue; break;
2944 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2945 case 'O': if (scsi || state.not_cap_offline) continue; break;
2946 case 'c': case 'n':
2947 case 'r': if (scsi || state.not_cap_selective) continue; break;
2948 default: continue;
2949 }
2950 // Try match of "T/MM/DD/d/HH[:NNN]"
2951 char pattern[64];
2952 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2953 test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2954 if (i > 0) {
2955 const unsigned len = sizeof("S/01/01/1/01") - 1;
2956 snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
2957 if (limit > 0)
2958 snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
2959 }
2960 if (cfg.test_regex.full_match(pattern)) {
2961 // Test found
2962 testtype = pattern[0];
2963 testtime = t; testhour = tms->tm_hour;
2964 // Limit further matches to higher priority self-tests
2965 maxtest = j-1;
2966 break;
2967 }
2968 }
2969 }
2970
2971 // Exit if no tests left or current time reached
2972 if (maxtest < 0)
2973 break;
2974 if (t >= now)
2975 break;
2976 // Check next hour
2977 if ((t += 3600) > now)
2978 t = now;
2979 }
2980
2981 // Do next check not before next hour.
2982 struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
2983 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2984
2985 if (testtype) {
2986 state.must_write = true;
2987 // Tell user if an old test was found.
2988 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2989 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2990 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2991 cfg.name.c_str(), testtype, datebuf);
2992 }
2993 }
2994
2995 return testtype;
2996}
2997
2998// Print a list of future tests.
3000{
3001 unsigned numdev = configs.size();
3002 if (!numdev)
3003 return;
3004 std::vector<int> testcnts(numdev * num_test_types, 0);
3005
3006 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
3007
3008 // FixGlibcTimeZoneBug(); // done in PrintOut()
3009 time_t now = time(nullptr);
3010 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3011 dateandtimezoneepoch(datenow, now);
3012
3013 long seconds;
3014 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3015 // Check for each device whether a test will be run
3016 time_t testtime = now + seconds;
3017 for (unsigned i = 0; i < numdev; i++) {
3018 const dev_config & cfg = configs.at(i);
3019 dev_state & state = states.at(i);
3020 const char * p;
3021 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3022 if (testtype && (p = strchr(test_type_chars, testtype))) {
3023 unsigned t = (p - test_type_chars);
3024 // Report at most 5 tests of each type
3025 if (++testcnts[i*num_test_types + t] <= 5) {
3026 dateandtimezoneepoch(date, testtime);
3027 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3028 testcnts[i*num_test_types + t], testtype, date);
3029 }
3030 }
3031 }
3032 }
3033
3034 // Report totals
3035 dateandtimezoneepoch(date, now+seconds);
3036 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3037 for (unsigned i = 0; i < numdev; i++) {
3038 const dev_config & cfg = configs.at(i);
3039 bool scsi = devices.at(i)->is_scsi();
3040 for (unsigned t = 0; t < num_test_types; t++) {
3041 int cnt = testcnts[i*num_test_types + t];
3042 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3043 continue;
3044 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3045 cnt, (cnt==1?"":"s"), test_type_chars[t]);
3046 }
3047 }
3048
3049}
3050
3051// Return zero on success, nonzero on failure. Perform offline (background)
3052// short or long (extended) self test on given scsi device.
3053static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3054{
3055 int retval = 0;
3056 const char *testname = nullptr;
3057 const char *name = cfg.name.c_str();
3058 int inProgress;
3059
3060 if (scsiSelfTestInProgress(device, &inProgress)) {
3061 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3062 state.not_cap_short = state.not_cap_long = true;
3063 return 1;
3064 }
3065
3066 if (1 == inProgress) {
3067 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3068 "progress.\n", name);
3069 return 1;
3070 }
3071
3072 switch (testtype) {
3073 case 'S':
3074 testname = "Short Self";
3075 retval = scsiSmartShortSelfTest(device);
3076 break;
3077 case 'L':
3078 testname = "Long Self";
3079 retval = scsiSmartExtendSelfTest(device);
3080 break;
3081 }
3082 // If we can't do the test, exit
3083 if (!testname) {
3084 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3085 testtype);
3086 return 1;
3087 }
3088 if (retval) {
3089 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3090 (SIMPLE_ERR_BAD_FIELD == retval)) {
3091 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3092 testname);
3093 if ('L'==testtype)
3094 state.not_cap_long = true;
3095 else
3096 state.not_cap_short = true;
3097
3098 return 1;
3099 }
3100 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3101 testname, retval);
3102 return 1;
3103 }
3104
3105 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3106
3107 return 0;
3108}
3109
3110// Do an offline immediate or self-test. Return zero on success,
3111// nonzero on failure.
3112static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3113{
3114 const char *name = cfg.name.c_str();
3115
3116 // Read current smart data and check status/capability
3117 struct ata_smart_values data;
3118 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3119 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3120 return 1;
3121 }
3122
3123 // Check for capability to do the test
3124 int dotest = -1, mode = 0;
3125 const char *testname = nullptr;
3126 switch (testtype) {
3127 case 'O':
3128 testname="Offline Immediate ";
3130 dotest=OFFLINE_FULL_SCAN;
3131 else
3132 state.not_cap_offline = true;
3133 break;
3134 case 'C':
3135 testname="Conveyance Self-";
3137 dotest=CONVEYANCE_SELF_TEST;
3138 else
3139 state.not_cap_conveyance = true;
3140 break;
3141 case 'S':
3142 testname="Short Self-";
3143 if (isSupportSelfTest(&data))
3144 dotest=SHORT_SELF_TEST;
3145 else
3146 state.not_cap_short = true;
3147 break;
3148 case 'L':
3149 testname="Long Self-";
3150 if (isSupportSelfTest(&data))
3151 dotest=EXTEND_SELF_TEST;
3152 else
3153 state.not_cap_long = true;
3154 break;
3155
3156 case 'c': case 'n': case 'r':
3157 testname = "Selective Self-";
3159 dotest = SELECTIVE_SELF_TEST;
3160 switch (testtype) {
3161 case 'c': mode = SEL_CONT; break;
3162 case 'n': mode = SEL_NEXT; break;
3163 case 'r': mode = SEL_REDO; break;
3164 }
3165 }
3166 else
3167 state.not_cap_selective = true;
3168 break;
3169 }
3170
3171 // If we can't do the test, exit
3172 if (dotest<0) {
3173 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3174 return 1;
3175 }
3176
3177 // If currently running a self-test, do not interrupt it to start another.
3178 if (15==(data.self_test_exec_status >> 4)) {
3179 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3180 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3181 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3182 } else {
3183 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3184 name, testname, (int)(data.self_test_exec_status & 0x0f));
3185 return 1;
3186 }
3187 }
3188
3189 if (dotest == SELECTIVE_SELF_TEST) {
3190 // Set test span
3191 ata_selective_selftest_args selargs, prev_args;
3192 selargs.num_spans = 1;
3193 selargs.span[0].mode = mode;
3194 prev_args.num_spans = 1;
3195 prev_args.span[0].start = state.selective_test_last_start;
3196 prev_args.span[0].end = state.selective_test_last_end;
3197 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3198 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3199 return 1;
3200 }
3201 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3202 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3203 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3204 start, end, end - start + 1,
3205 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3206 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3207 state.selective_test_last_start = start;
3208 state.selective_test_last_end = end;
3209 }
3210
3211 // execute the test, and return status
3212 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, nullptr);
3213 if (retval) {
3214 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3215 return retval;
3216 }
3217
3218 // Report recent test start to do_disable_standby_check()
3219 // and force log of next test status
3220 if (testtype == 'O')
3221 state.offline_started = true;
3222 else
3223 state.selftest_started = true;
3224
3225 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3226 return 0;
3227}
3228
3229// Check pending sector count attribute values (-C, -U directives).
3230static void check_pending(const dev_config & cfg, dev_state & state,
3231 unsigned char id, bool increase_only,
3232 const ata_smart_values & smartval,
3233 int mailtype, const char * msg)
3234{
3235 // Find attribute index
3236 int i = ata_find_attr_index(id, smartval);
3237 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3238 return;
3239
3240 // No report if no sectors pending.
3241 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3242 if (rawval == 0) {
3243 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3244 return;
3245 }
3246
3247 // If attribute is not reset, report only sector count increases.
3248 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3249 if (!(!increase_only || prev_rawval < rawval))
3250 return;
3251
3252 // Format message.
3253 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3254 if (prev_rawval > 0 && rawval != prev_rawval)
3255 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3256
3257 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3258 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3259 state.must_write = true;
3260}
3261
3262// Format Temperature value
3263static const char * fmt_temp(unsigned char x, char (& buf)[20])
3264{
3265 if (!x) // unset
3266 return "??";
3267 snprintf(buf, sizeof(buf), "%u", x);
3268 return buf;
3269}
3270
3271// Check Temperature limits
3272static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3273{
3274 if (!(0 < currtemp && currtemp < 255)) {
3275 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3276 return;
3277 }
3278
3279 // Update Max Temperature
3280 const char * minchg = "", * maxchg = "";
3281 if (currtemp > state.tempmax) {
3282 if (state.tempmax)
3283 maxchg = "!";
3284 state.tempmax = currtemp;
3285 state.must_write = true;
3286 }
3287
3288 char buf[20];
3289 if (!state.temperature) {
3290 // First check
3291 if (!state.tempmin || currtemp < state.tempmin)
3292 // Delay Min Temperature update by ~ 30 minutes.
3293 state.tempmin_delay = time(nullptr) + default_checktime - 60;
3294 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3295 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3296 if (triptemp)
3297 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3298 state.temperature = currtemp;
3299 }
3300 else {
3301 if (state.tempmin_delay) {
3302 // End Min Temperature update delay if ...
3303 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3304 || (state.tempmin_delay <= time(nullptr))) { // or delay time is over.
3305 state.tempmin_delay = 0;
3306 if (!state.tempmin)
3307 state.tempmin = 255;
3308 }
3309 }
3310
3311 // Update Min Temperature
3312 if (!state.tempmin_delay && currtemp < state.tempmin) {
3313 state.tempmin = currtemp;
3314 state.must_write = true;
3315 if (currtemp != state.temperature)
3316 minchg = "!";
3317 }
3318
3319 // Track changes
3320 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3321 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3322 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3323 state.temperature = currtemp;
3324 }
3325 }
3326
3327 // Check limits
3328 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3329 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3330 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3331 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3332 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3333 }
3334 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3335 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3336 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3337 }
3338 else if (cfg.tempcrit) {
3339 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3340 if (currtemp < limit)
3341 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3342 }
3343}
3344
3345// Check normalized and raw attribute values.
3346static void check_attribute(const dev_config & cfg, dev_state & state,
3347 const ata_smart_attribute & attr,
3348 const ata_smart_attribute & prev,
3349 int attridx,
3350 const ata_smart_threshold_entry * thresholds)
3351{
3352 // Check attribute and threshold
3353 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3354 if (attrstate == ATTRSTATE_NON_EXISTING)
3355 return;
3356
3357 // If requested, check for usage attributes that have failed.
3358 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3360 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3361 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3362 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3363 state.must_write = true;
3364 }
3365
3366 // Return if we're not tracking this type of attribute
3367 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3368 if (!( ( prefail && cfg.prefail)
3369 || (!prefail && cfg.usage )))
3370 return;
3371
3372 // Return if '-I ID' was specified
3374 return;
3375
3376 // Issue warning if they don't have the same ID in all structures.
3377 if (attr.id != prev.id) {
3378 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3379 cfg.name.c_str(), attr.id, prev.id);
3380 return;
3381 }
3382
3383 // Compare normalized values if valid.
3384 bool valchanged = false;
3385 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3386 if (attr.current != prev.current)
3387 valchanged = true;
3388 }
3389
3390 // Compare raw values if requested.
3391 bool rawchanged = false;
3392 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3395 rawchanged = true;
3396 }
3397
3398 // Return if no change
3399 if (!(valchanged || rawchanged))
3400 return;
3401
3402 // Format value strings
3403 std::string currstr, prevstr;
3404 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3405 // Print raw values only
3406 currstr = strprintf("%s (Raw)",
3407 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3408 prevstr = strprintf("%s (Raw)",
3409 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3410 }
3411 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3412 // Print normalized and raw values
3413 currstr = strprintf("%d [Raw %s]", attr.current,
3414 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3415 prevstr = strprintf("%d [Raw %s]", prev.current,
3416 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3417 }
3418 else {
3419 // Print normalized values only
3420 currstr = strprintf("%d", attr.current);
3421 prevstr = strprintf("%d", prev.current);
3422 }
3423
3424 // Format message
3425 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3426 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3427 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3428 prevstr.c_str(), currstr.c_str());
3429
3430 // Report this change as critical ?
3431 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3432 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3433 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3434 MailWarning(cfg, state, 2, "%s", msg.c_str());
3435 }
3436 else {
3437 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3438 }
3439 state.must_write = true;
3440}
3441
3442
3443static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3444 bool firstpass, bool allow_selftests)
3445{
3446 if (!open_device(cfg, state, atadev, "ATA"))
3447 return 1;
3448
3449 const char * name = cfg.name.c_str();
3450
3451 // user may have requested (with the -n Directive) to leave the disk
3452 // alone if it is in idle or sleeping mode. In this case check the
3453 // power mode and exit without check if needed
3454 if (cfg.powermode && !state.powermodefail) {
3455 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3456 const char * mode = 0;
3457 if (0 <= powermode && powermode < 0xff) {
3458 // wait for possible spin up and check again
3459 int powermode2;
3460 sleep(5);
3461 powermode2 = ataCheckPowerMode(atadev);
3462 if (powermode2 > powermode)
3463 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3464 powermode = powermode2;
3465 }
3466
3467 switch (powermode){
3468 case -1:
3469 // SLEEP
3470 mode="SLEEP";
3471 if (cfg.powermode>=1)
3472 dontcheck=1;
3473 break;
3474 case 0x00:
3475 // STANDBY
3476 mode="STANDBY";
3477 if (cfg.powermode>=2)
3478 dontcheck=1;
3479 break;
3480 case 0x01:
3481 // STANDBY_Y
3482 mode="STANDBY_Y";
3483 if (cfg.powermode>=2)
3484 dontcheck=1;
3485 break;
3486 case 0x80:
3487 // IDLE
3488 mode="IDLE";
3489 if (cfg.powermode>=3)
3490 dontcheck=1;
3491 break;
3492 case 0x81:
3493 // IDLE_A
3494 mode="IDLE_A";
3495 if (cfg.powermode>=3)
3496 dontcheck=1;
3497 break;
3498 case 0x82:
3499 // IDLE_B
3500 mode="IDLE_B";
3501 if (cfg.powermode>=3)
3502 dontcheck=1;
3503 break;
3504 case 0x83:
3505 // IDLE_C
3506 mode="IDLE_C";
3507 if (cfg.powermode>=3)
3508 dontcheck=1;
3509 break;
3510 case 0xff:
3511 // ACTIVE/IDLE
3512 case 0x40:
3513 // ACTIVE
3514 case 0x41:
3515 // ACTIVE
3516 mode="ACTIVE or IDLE";
3517 break;
3518 default:
3519 // UNKNOWN
3520 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3521 name, powermode);
3522 state.powermodefail = true;
3523 break;
3524 }
3525
3526 // if we are going to skip a check, return now
3527 if (dontcheck){
3528 // skip at most powerskipmax checks
3529 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3530 CloseDevice(atadev, name);
3531 // report first only except if state has changed, avoid waking up system disk
3532 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3533 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3534 state.lastpowermodeskipped = powermode;
3535 }
3536 state.powerskipcnt++;
3537 return 0;
3538 }
3539 else {
3540 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3541 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3542 }
3543 state.powerskipcnt = 0;
3544 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3545 }
3546 else if (state.powerskipcnt) {
3547 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3548 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3549 state.powerskipcnt = 0;
3550 state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
3551 }
3552 }
3553
3554 // check smart status
3555 if (cfg.smartcheck) {
3556 int status=ataSmartStatus2(atadev);
3557 if (status==-1){
3558 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3559 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3560 state.must_write = true;
3561 }
3562 else if (status==1){
3563 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3564 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3565 state.must_write = true;
3566 }
3567 }
3568
3569 // Check everything that depends upon SMART Data (eg, Attribute values)
3570 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3571 || cfg.curr_pending_id || cfg.offl_pending_id
3572 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3573 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3574
3575 // Read current attribute values.
3576 ata_smart_values curval;
3577 if (ataReadSmartValues(atadev, &curval)){
3578 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3579 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3580 state.must_write = true;
3581 }
3582 else {
3583 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3584
3585 // look for current or offline pending sectors
3586 if (cfg.curr_pending_id)
3587 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3588 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3589 : "Total unreadable (pending) sectors" ));
3590
3591 if (cfg.offl_pending_id)
3592 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3593 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3594 : "Total offline uncorrectable sectors"));
3595
3596 // check temperature limits
3597 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3598 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3599
3600 // look for failed usage attributes, or track usage or prefail attributes
3601 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3602 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3603 check_attribute(cfg, state,
3604 curval.vendor_attributes[i],
3605 state.smartval.vendor_attributes[i],
3606 i, state.smartthres.thres_entries);
3607 }
3608 }
3609
3610 // Log changes of offline data collection status
3611 if (cfg.offlinests) {
3614 || state.offline_started // test was started in previous call
3615 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3617 }
3618
3619 // Log changes of self-test execution status
3620 if (cfg.selfteststs) {
3622 || state.selftest_started // test was started in previous call
3623 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3625 }
3626
3627 // Save the new values for the next time around
3628 state.smartval = curval;
3629 }
3630 }
3631 state.offline_started = state.selftest_started = false;
3632
3633 // check if number of selftest errors has increased (note: may also DECREASE)
3634 if (cfg.selftest)
3635 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3636
3637 // check if number of ATA errors has increased
3638 if (cfg.errorlog || cfg.xerrorlog) {
3639
3640 int errcnt1 = -1, errcnt2 = -1;
3641 if (cfg.errorlog)
3642 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3643 if (cfg.xerrorlog)
3644 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3645
3646 // new number of errors is max of both logs
3647 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3648
3649 // did command fail?
3650 if (newc<0)
3651 // lack of PrintOut here is INTENTIONAL
3652 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3653
3654 // has error count increased?
3655 int oldc = state.ataerrorcount;
3656 if (newc>oldc){
3657 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3658 name, oldc, newc);
3659 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3660 name, oldc, newc);
3661 state.must_write = true;
3662 }
3663
3664 if (newc>=0)
3665 state.ataerrorcount=newc;
3666 }
3667
3668 // if the user has asked, and device is capable (or we're not yet
3669 // sure) check whether a self test should be done now.
3670 if (allow_selftests && !cfg.test_regex.empty()) {
3671 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3672 if (testtype)
3673 DoATASelfTest(cfg, state, atadev, testtype);
3674 }
3675
3676 // Don't leave device open -- the OS/user may want to access it
3677 // before the next smartd cycle!
3678 CloseDevice(atadev, name);
3679
3680 // Copy ATA attribute values to persistent state
3682
3683 state.attrlog_dirty = true;
3684 return 0;
3685}
3686
3687static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3688{
3689 if (!open_device(cfg, state, scsidev, "SCSI"))
3690 return 1;
3691
3692 const char * name = cfg.name.c_str();
3693
3694 uint8_t asc = 0, ascq = 0;
3695 uint8_t currenttemp = 0, triptemp = 0;
3696 if (!state.SuppressReport) {
3697 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3698 &asc, &ascq, &currenttemp, &triptemp)) {
3699 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3700 name);
3701 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3702 state.SuppressReport = 1;
3703 }
3704 }
3705 if (asc > 0) {
3706 const char * cp = scsiGetIEString(asc, ascq);
3707 if (cp) {
3708 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3709 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3710 } else if (asc == 4 && ascq == 9) {
3711 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3712 } else if (debugmode)
3713 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3714 name, (int)asc, (int)ascq);
3715 } else if (debugmode)
3716 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3717
3718 // check temperature limits
3719 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3720 CheckTemperature(cfg, state, currenttemp, triptemp);
3721
3722 // check if number of selftest errors has increased (note: may also DECREASE)
3723 if (cfg.selftest)
3724 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3725
3726 if (allow_selftests && !cfg.test_regex.empty()) {
3727 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3728 if (testtype)
3729 DoSCSISelfTest(cfg, state, scsidev, testtype);
3730 }
3731 if (!cfg.attrlog_file.empty()){
3732 // saving error counters to state
3733 uint8_t tBuf[252];
3734 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3735 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3737 state.scsi_error_counters[0].found=1;
3738 }
3739 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3740 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3742 state.scsi_error_counters[1].found=1;
3743 }
3744 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3745 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3747 state.scsi_error_counters[2].found=1;
3748 }
3749 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3750 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3753 }
3754 // store temperature if not done by CheckTemperature() above
3755 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3756 state.temperature = currenttemp;
3757 }
3758 CloseDevice(scsidev, name);
3759 state.attrlog_dirty = true;
3760 return 0;
3761}
3762
3763static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3764{
3765 if (!open_device(cfg, state, nvmedev, "NVMe"))
3766 return 1;
3767
3768 const char * name = cfg.name.c_str();
3769
3770 // Read SMART/Health log
3771 nvme_smart_log smart_log;
3772 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3773 CloseDevice(nvmedev, name);
3774 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3775 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3776 state.must_write = true;
3777 return 0;
3778 }
3779
3780 // Check Critical Warning bits
3781 if (cfg.smartcheck && smart_log.critical_warning) {
3782 unsigned char w = smart_log.critical_warning;
3783 std::string msg;
3784 static const char * const wnames[] =
3785 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3786
3787 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3788 if (!(w & (1 << b)))
3789 continue;
3790 if (cnt)
3791 msg += ", ";
3792 if (++cnt > 3) {
3793 msg += "..."; break;
3794 }
3795 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3796 msg += "*Unknown*"; break;
3797 }
3798 msg += wnames[b];
3799 }
3800
3801 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3802 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3803 state.must_write = true;
3804 }
3805
3806 // Check temperature limits
3807 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3808 int k = nvme_get_max_temp_kelvin(smart_log);
3809 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3810 int c = k - 273;
3811 if (c < 1)
3812 c = 1;
3813 else if (c > 0xff)
3814 c = 0xff;
3815 CheckTemperature(cfg, state, c, 0);
3816 }
3817
3818 // Check if number of errors has increased
3819 if (cfg.errorlog || cfg.xerrorlog) {
3820 uint64_t oldcnt = state.nvme_err_log_entries;
3821 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3822 if (newcnt > oldcnt) {
3823 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3824 name, oldcnt, newcnt);
3825 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3826 name, oldcnt, newcnt);
3827 state.must_write = true;
3828 }
3829 state.nvme_err_log_entries = newcnt;
3830 }
3831
3832 CloseDevice(nvmedev, name);
3833 state.attrlog_dirty = true;
3834 return 0;
3835}
3836
3837// 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3839
3841{
3842 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3843 bool sts1 = false, sts2 = false;
3844 for (const auto & cfg : configs) {
3845 if (cfg.offlinests_ns)
3846 sts1 = true;
3847 if (cfg.selfteststs_ns)
3848 sts2 = true;
3849 }
3850
3851 // Check for support of disable auto standby
3852 // Reenable standby if smartd.conf was reread
3853 if (sts1 || sts2 || standby_disable_state == 3) {
3854 if (!smi()->disable_system_auto_standby(false)) {
3855 if (standby_disable_state == 3)
3856 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3857 if (sts1 || sts2) {
3858 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3859 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3860 sts1 = sts2 = false;
3861 }
3862 }
3863 }
3864
3865 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3866}
3867
3868static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3869{
3871 return;
3872
3873 // Check for just started or still running self-tests
3874 bool running = false;
3875 for (unsigned i = 0; i < configs.size() && !running; i++) {
3876 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3877
3878 if ( ( cfg.offlinests_ns
3879 && (state.offline_started ||
3881 || ( cfg.selfteststs_ns
3882 && (state.selftest_started ||
3884 running = true;
3885 // state.offline/selftest_started will be reset after next logging of test status
3886 }
3887
3888 // Disable/enable auto standby and log state changes
3889 if (!running) {
3890 if (standby_disable_state != 1) {
3891 if (!smi()->disable_system_auto_standby(false))
3892 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3893 smi()->get_errmsg());
3894 else
3895 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3897 }
3898 }
3899 else if (!smi()->disable_system_auto_standby(true)) {
3900 if (standby_disable_state != 2) {
3901 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3902 smi()->get_errmsg());
3904 }
3905 }
3906 else {
3907 if (standby_disable_state != 3) {
3908 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3910 }
3911 }
3912}
3913
3914// Checks the SMART status of all ATA and SCSI devices
3915static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3916 smart_device_list & devices, bool firstpass, bool allow_selftests)
3917{
3918 for (unsigned i = 0; i < configs.size(); i++) {
3919 const dev_config & cfg = configs.at(i);
3920 dev_state & state = states.at(i);
3921 if (state.skip) {
3922 if (debugmode)
3923 PrintOut(LOG_INFO, "Device: %s, skipped (interval=%d)\n", cfg.name.c_str(),
3924 (cfg.checktime ? cfg.checktime : checktime));
3925 continue;
3926 }
3927
3928 smart_device * dev = devices.at(i);
3929 if (dev->is_ata())
3930 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3931 else if (dev->is_scsi())
3932 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3933 else if (dev->is_nvme())
3934 NVMeCheckDevice(cfg, state, dev->to_nvme());
3935 }
3936
3937 do_disable_standby_check(configs, states);
3938}
3939
3940// Install all signal handlers
3942{
3943 // normal and abnormal exit
3946
3947 // in debug mode, <CONTROL-C> ==> HUP
3949
3950 // Catch HUP and USR1
3953#ifdef _WIN32
3954 set_signal_if_not_ignored(SIGUSR2, USR2handler);
3955#endif
3956}
3957
3958#ifdef _WIN32
3959// Toggle debug mode implemented for native windows only
3960// (there is no easy way to reopen tty on *nix)
3961static void ToggleDebugMode()
3962{
3963 if (!debugmode) {
3964 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3965 if (!daemon_enable_console("smartd [Debug]")) {
3966 debugmode = 1;
3967 daemon_signal(SIGINT, HUPhandler);
3968 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3969 }
3970 else
3971 PrintOut(LOG_INFO,"enable console failed\n");
3972 }
3973 else if (debugmode == 1) {
3974 daemon_disable_console();
3975 debugmode = 0;
3976 daemon_signal(SIGINT, sighandler);
3977 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3978 }
3979 else
3980 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3981}
3982#endif
3983
3984time_t calc_next_wakeuptime(time_t wakeuptime, time_t timenow, int ct)
3985{
3986 if (timenow < wakeuptime)
3987 return wakeuptime;
3988 return timenow + ct - (timenow - wakeuptime) % ct;
3989}
3990
3991static time_t dosleep(time_t wakeuptime, const dev_config_vector & configs,
3992 dev_state_vector & states, bool & sigwakeup)
3993{
3994 // If past wake-up-time, compute next wake-up-time
3995 time_t timenow = time(nullptr);
3996 unsigned n = configs.size();
3997 int ct;
3998 if (!checktime_min) {
3999 // Same for all devices
4000 wakeuptime = calc_next_wakeuptime(wakeuptime, timenow, checktime);
4001 ct = checktime;
4002 }
4003 else {
4004 // Determine wakeuptime of next device(s)
4005 wakeuptime = 0;
4006 for (unsigned i = 0; i < n; i++) {
4007 const dev_config & cfg = configs.at(i);
4008 dev_state & state = states.at(i);
4009 if (!state.skip)
4010 state.wakeuptime = calc_next_wakeuptime((state.wakeuptime ? state.wakeuptime : timenow),
4011 timenow, (cfg.checktime ? cfg.checktime : checktime));
4012 if (!wakeuptime || state.wakeuptime < wakeuptime)
4013 wakeuptime = state.wakeuptime;
4014 }
4015 ct = checktime_min;
4016 }
4017
4018 notify_wait(wakeuptime, n);
4019
4020 // Sleep until we catch a signal or have completed sleeping
4021 bool no_skip = false;
4022 int addtime = 0;
4023 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
4024 // Restart if system clock has been adjusted to the past
4025 if (wakeuptime > timenow + ct) {
4026 PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
4027 wakeuptime = timenow + ct;
4028 for (auto & state : states)
4029 state.wakeuptime = 0;
4030 no_skip = true;
4031 }
4032
4033 // Exit sleep when time interval has expired or a signal is received
4034 sleep(wakeuptime+addtime-timenow);
4035
4036#ifdef _WIN32
4037 // toggle debug mode?
4038 if (caughtsigUSR2) {
4039 ToggleDebugMode();
4040 caughtsigUSR2 = 0;
4041 }
4042#endif
4043
4044 timenow = time(nullptr);
4045
4046 // Actual sleep time too long?
4047 if (!addtime && timenow > wakeuptime+60) {
4048 if (debugmode)
4049 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4050 (int)(timenow-wakeuptime));
4051 // Wait another 20 seconds to avoid I/O errors during disk spin-up
4052 addtime = timenow-wakeuptime+20;
4053 // Use next wake-up-time if close
4054 int nextcheck = ct - addtime % ct;
4055 if (nextcheck <= 20)
4056 addtime += nextcheck;
4057 }
4058 }
4059
4060 // if we caught a SIGUSR1 then print message and clear signal
4061 if (caughtsigUSR1){
4062 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4063 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4064 caughtsigUSR1=0;
4065 sigwakeup = no_skip = true;
4066 }
4067
4068 // Check which devices must be skipped in this cycle
4069 if (checktime_min) {
4070 for (auto & state : states)
4071 state.skip = (!no_skip && timenow < state.wakeuptime);
4072 }
4073
4074 // return adjusted wakeuptime
4075 return wakeuptime;
4076}
4077
4078// Print out a list of valid arguments for the Directive d
4079static void printoutvaliddirectiveargs(int priority, char d)
4080{
4081 switch (d) {
4082 case 'n':
4083 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4084 break;
4085 case 's':
4086 PrintOut(priority, "valid_regular_expression");
4087 break;
4088 case 'd':
4089 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4090 break;
4091 case 'T':
4092 PrintOut(priority, "normal, permissive");
4093 break;
4094 case 'o':
4095 case 'S':
4096 PrintOut(priority, "on, off");
4097 break;
4098 case 'l':
4099 PrintOut(priority, "error, selftest");
4100 break;
4101 case 'M':
4102 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4103 break;
4104 case 'v':
4105 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4106 break;
4107 case 'P':
4108 PrintOut(priority, "use, ignore, show, showall");
4109 break;
4110 case 'F':
4111 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4112 break;
4113 case 'e':
4114 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4115 "security-freeze, standby,[N|off], wcache,[on|off]");
4116 break;
4117 case 'c':
4118 PrintOut(priority, "i=N, interval=N");
4119 break;
4120 }
4121}
4122
4123// exits with an error message, or returns integer value of token
4124static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4125 int min, int max, char * suffix = 0)
4126{
4127 // make sure argument is there
4128 if (!arg) {
4129 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4130 cfgfile, lineno, name, token, min, max);
4131 return -1;
4132 }
4133
4134 // get argument value (base 10), check that it's integer, and in-range
4135 char *endptr;
4136 int val = strtol(arg,&endptr,10);
4137
4138 // optional suffix present?
4139 if (suffix) {
4140 if (!strcmp(endptr, suffix))
4141 endptr += strlen(suffix);
4142 else
4143 *suffix = 0;
4144 }
4145
4146 if (!(!*endptr && min <= val && val <= max)) {
4147 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4148 cfgfile, lineno, name, token, arg, min, max);
4149 return -1;
4150 }
4151
4152 // all is well; return value
4153 return val;
4154}
4155
4156
4157// Get 1-3 small integer(s) for '-W' directive
4158static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4159 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4160{
4161 unsigned v1 = 0, v2 = 0, v3 = 0;
4162 int n1 = -1, n2 = -1, n3 = -1, len;
4163 if (!arg) {
4164 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4165 cfgfile, lineno, name, token);
4166 return -1;
4167 }
4168
4169 len = strlen(arg);
4170 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4171 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4172 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4173 cfgfile, lineno, name, token, arg);
4174 return -1;
4175 }
4176 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4177 return 0;
4178}
4179
4180
4181#ifdef _WIN32
4182
4183// Concatenate strtok() results if quoted with "..."
4184static const char * strtok_dequote(const char * delimiters)
4185{
4186 const char * t = strtok(nullptr, delimiters);
4187 if (!t || t[0] != '"')
4188 return t;
4189
4190 static std::string token;
4191 token = t+1;
4192 for (;;) {
4193 t = strtok(nullptr, delimiters);
4194 if (!t || !*t)
4195 return "\"";
4196 token += ' ';
4197 int len = strlen(t);
4198 if (t[len-1] == '"') {
4199 token += std::string(t, len-1);
4200 break;
4201 }
4202 token += t;
4203 }
4204 return token.c_str();
4205}
4206
4207#endif // _WIN32
4208
4209
4210// This function returns 1 if it has correctly parsed one token (and
4211// any arguments), else zero if no tokens remain. It returns -1 if an
4212// error was encountered.
4213static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4214{
4215 char sym;
4216 const char * name = cfg.name.c_str();
4217 int lineno=cfg.lineno;
4218 const char *delim = " \n\t";
4219 int badarg = 0;
4220 int missingarg = 0;
4221 const char *arg = 0;
4222
4223 // is the rest of the line a comment
4224 if (*token=='#')
4225 return 1;
4226
4227 // is the token not recognized?
4228 if (*token!='-' || strlen(token)!=2) {
4229 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4230 configfile, lineno, name, token);
4231 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4232 return -1;
4233 }
4234
4235 // token we will be parsing:
4236 sym=token[1];
4237
4238 // parse the token and swallow its argument
4239 int val;
4240 char plus[] = "+", excl[] = "!";
4241
4242 switch (sym) {
4243 case 'C':
4244 // monitor current pending sector count (default 197)
4245 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4246 return -1;
4247 cfg.curr_pending_id = (unsigned char)val;
4248 cfg.curr_pending_incr = (*plus == '+');
4249 cfg.curr_pending_set = true;
4250 break;
4251 case 'U':
4252 // monitor offline uncorrectable sectors (default 198)
4253 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
4254 return -1;
4255 cfg.offl_pending_id = (unsigned char)val;
4256 cfg.offl_pending_incr = (*plus == '+');
4257 cfg.offl_pending_set = true;
4258 break;
4259 case 'T':
4260 // Set tolerance level for SMART command failures
4261 if (!(arg = strtok(nullptr, delim))) {
4262 missingarg = 1;
4263 } else if (!strcmp(arg, "normal")) {
4264 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4265 // not on failure of an optional S.M.A.R.T. command.
4266 // This is the default so we don't need to actually do anything here.
4267 cfg.permissive = false;
4268 } else if (!strcmp(arg, "permissive")) {
4269 // Permissive mode; ignore errors from Mandatory SMART commands
4270 cfg.permissive = true;
4271 } else {
4272 badarg = 1;
4273 }
4274 break;
4275 case 'd':
4276 // specify the device type
4277 if (!(arg = strtok(nullptr, delim))) {
4278 missingarg = 1;
4279 } else if (!strcmp(arg, "ignore")) {
4280 cfg.ignore = true;
4281 } else if (!strcmp(arg, "removable")) {
4282 cfg.removable = true;
4283 } else if (!strcmp(arg, "auto")) {
4284 cfg.dev_type = "";
4285 scan_types.clear();
4286 } else {
4287 cfg.dev_type = arg;
4288 scan_types.push_back(arg);
4289 }
4290 break;
4291 case 'F':
4292 // fix firmware bug
4293 if (!(arg = strtok(nullptr, delim)))
4294 missingarg = 1;
4295 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4296 badarg = 1;
4297 break;
4298 case 'H':
4299 // check SMART status
4300 cfg.smartcheck = true;
4301 break;
4302 case 'f':
4303 // check for failure of usage attributes
4304 cfg.usagefailed = true;
4305 break;
4306 case 't':
4307 // track changes in all vendor attributes
4308 cfg.prefail = true;
4309 cfg.usage = true;
4310 break;
4311 case 'p':
4312 // track changes in prefail vendor attributes
4313 cfg.prefail = true;
4314 break;
4315 case 'u':
4316 // track changes in usage vendor attributes
4317 cfg.usage = true;
4318 break;
4319 case 'l':
4320 // track changes in SMART logs
4321 if (!(arg = strtok(nullptr, delim))) {
4322 missingarg = 1;
4323 } else if (!strcmp(arg, "selftest")) {
4324 // track changes in self-test log
4325 cfg.selftest = true;
4326 } else if (!strcmp(arg, "error")) {
4327 // track changes in ATA error log
4328 cfg.errorlog = true;
4329 } else if (!strcmp(arg, "xerror")) {
4330 // track changes in Extended Comprehensive SMART error log
4331 cfg.xerrorlog = true;
4332 } else if (!strcmp(arg, "offlinests")) {
4333 // track changes in offline data collection status
4334 cfg.offlinests = true;
4335 } else if (!strcmp(arg, "offlinests,ns")) {
4336 // track changes in offline data collection status, disable auto standby
4337 cfg.offlinests = cfg.offlinests_ns = true;
4338 } else if (!strcmp(arg, "selfteststs")) {
4339 // track changes in self-test execution status
4340 cfg.selfteststs = true;
4341 } else if (!strcmp(arg, "selfteststs,ns")) {
4342 // track changes in self-test execution status, disable auto standby
4343 cfg.selfteststs = cfg.selfteststs_ns = true;
4344 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4345 // set SCT Error Recovery Control
4346 unsigned rt = ~0, wt = ~0; int nc = -1;
4347 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4348 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4349 cfg.sct_erc_set = true;
4350 cfg.sct_erc_readtime = rt;
4351 cfg.sct_erc_writetime = wt;
4352 }
4353 else
4354 badarg = 1;
4355 } else {
4356 badarg = 1;
4357 }
4358 break;
4359 case 'a':
4360 // monitor everything
4361 cfg.smartcheck = true;
4362 cfg.prefail = true;
4363 cfg.usagefailed = true;
4364 cfg.usage = true;
4365 cfg.selftest = true;
4366 cfg.errorlog = true;
4367 cfg.selfteststs = true;
4368 break;
4369 case 'o':
4370 // automatic offline testing enable/disable
4371 if (!(arg = strtok(nullptr, delim))) {
4372 missingarg = 1;
4373 } else if (!strcmp(arg, "on")) {
4374 cfg.autoofflinetest = 2;
4375 } else if (!strcmp(arg, "off")) {
4376 cfg.autoofflinetest = 1;
4377 } else {
4378 badarg = 1;
4379 }
4380 break;
4381 case 'n':
4382 // skip disk check if in idle or standby mode
4383 if (!(arg = strtok(nullptr, delim)))
4384 missingarg = 1;
4385 else {
4386 char *endptr = nullptr;
4387 char *next = strchr(const_cast<char*>(arg), ',');
4388
4389 cfg.powerquiet = false;
4390 cfg.powerskipmax = 0;
4391
4392 if (next)
4393 *next = '\0';
4394 if (!strcmp(arg, "never"))
4395 cfg.powermode = 0;
4396 else if (!strcmp(arg, "sleep"))
4397 cfg.powermode = 1;
4398 else if (!strcmp(arg, "standby"))
4399 cfg.powermode = 2;
4400 else if (!strcmp(arg, "idle"))
4401 cfg.powermode = 3;
4402 else
4403 badarg = 1;
4404
4405 // if optional arguments are present
4406 if (!badarg && next) {
4407 next++;
4408 cfg.powerskipmax = strtol(next, &endptr, 10);
4409 if (endptr == next)
4410 cfg.powerskipmax = 0;
4411 else {
4412 next = endptr + (*endptr != '\0');
4413 if (cfg.powerskipmax <= 0)
4414 badarg = 1;
4415 }
4416 if (*next != '\0') {
4417 if (!strcmp("q", next))
4418 cfg.powerquiet = true;
4419 else {
4420 badarg = 1;
4421 }
4422 }
4423 }
4424 }
4425 break;
4426 case 'S':
4427 // automatic attribute autosave enable/disable
4428 if (!(arg = strtok(nullptr, delim))) {
4429 missingarg = 1;
4430 } else if (!strcmp(arg, "on")) {
4431 cfg.autosave = 2;
4432 } else if (!strcmp(arg, "off")) {
4433 cfg.autosave = 1;
4434 } else {
4435 badarg = 1;
4436 }
4437 break;
4438 case 's':
4439 // warn user, and delete any previously given -s REGEXP Directives
4440 if (!cfg.test_regex.empty()){
4441 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4442 configfile, lineno, name, cfg.test_regex.get_pattern());
4444 }
4445 // check for missing argument
4446 if (!(arg = strtok(nullptr, delim))) {
4447 missingarg = 1;
4448 }
4449 // Compile regex
4450 else {
4451 if (!cfg.test_regex.compile(arg)) {
4452 // not a valid regular expression!
4453 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4454 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4455 return -1;
4456 }
4457 // Do a bit of sanity checking and warn user if we think that
4458 // their regexp is "strange". User probably confused about shell
4459 // glob(3) syntax versus regular expression syntax regexp(7).
4460 // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4461 static const regular_expression syntax_check(
4462 "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4463 ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4464 ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4465 );
4467 if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4468 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4469 "extended regular expression \"%s\"\n",
4470 configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4471 }
4472 break;
4473 case 'm':
4474 // send email to address that follows
4475 if (!(arg = strtok(nullptr, delim)))
4476 missingarg = 1;
4477 else {
4478 if (!cfg.emailaddress.empty())
4479 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4480 configfile, lineno, name, cfg.emailaddress.c_str());
4481 cfg.emailaddress = arg;
4482 }
4483 break;
4484 case 'M':
4485 // email warning options
4486 if (!(arg = strtok(nullptr, delim)))
4487 missingarg = 1;
4488 else if (!strcmp(arg, "once"))
4489 cfg.emailfreq = 1;
4490 else if (!strcmp(arg, "daily"))
4491 cfg.emailfreq = 2;
4492 else if (!strcmp(arg, "diminishing"))
4493 cfg.emailfreq = 3;
4494 else if (!strcmp(arg, "test"))
4495 cfg.emailtest = 1;
4496 else if (!strcmp(arg, "exec")) {
4497 // Get the next argument (the command line)
4498#ifdef _WIN32
4499 // Allow "/path name/with spaces/..." on Windows
4500 arg = strtok_dequote(delim);
4501 if (arg && arg[0] == '"') {
4502 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4503 configfile, lineno, name, token);
4504 return -1;
4505 }
4506#else
4507 arg = strtok(nullptr, delim);
4508#endif
4509 if (!arg) {
4510 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4511 configfile, lineno, name, token);
4512 return -1;
4513 }
4514 // Free the last cmd line given if any, and copy new one
4515 if (!cfg.emailcmdline.empty())
4516 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4517 configfile, lineno, name, cfg.emailcmdline.c_str());
4518 cfg.emailcmdline = arg;
4519 }
4520 else
4521 badarg = 1;
4522 break;
4523 case 'i':
4524 // ignore failure of usage attribute
4525 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4526 return -1;
4528 break;
4529 case 'I':
4530 // ignore attribute for tracking purposes
4531 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
4532 return -1;
4534 break;
4535 case 'r':
4536 // print raw value when tracking
4537 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4538 return -1;
4540 if (*excl == '!') // attribute change is critical
4542 break;
4543 case 'R':
4544 // track changes in raw value (forces printing of raw value)
4545 if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
4546 return -1;
4548 if (*excl == '!') // raw value change is critical
4550 break;
4551 case 'W':
4552 // track Temperature
4553 if (Get3Integers((arg = strtok(nullptr, delim)), name, token, lineno, configfile,
4554 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4555 return -1;
4556 break;
4557 case 'v':
4558 // non-default vendor-specific attribute meaning
4559 if (!(arg = strtok(nullptr, delim))) {
4560 missingarg = 1;
4561 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4562 badarg = 1;
4563 }
4564 break;
4565 case 'P':
4566 // Define use of drive-specific presets.
4567 if (!(arg = strtok(nullptr, delim))) {
4568 missingarg = 1;
4569 } else if (!strcmp(arg, "use")) {
4570 cfg.ignorepresets = false;
4571 } else if (!strcmp(arg, "ignore")) {
4572 cfg.ignorepresets = true;
4573 } else if (!strcmp(arg, "show")) {
4574 cfg.showpresets = true;
4575 } else if (!strcmp(arg, "showall")) {
4577 } else {
4578 badarg = 1;
4579 }
4580 break;
4581
4582 case 'e':
4583 // Various ATA settings
4584 if (!(arg = strtok(nullptr, delim))) {
4585 missingarg = true;
4586 }
4587 else {
4588 char arg2[16+1]; unsigned uval;
4589 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4590 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &uval, &n3) >= 1
4591 && (n1 == len || n2 > 0)) {
4592 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4593 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4594 if (n3 != len)
4595 uval = ~0U;
4596
4597 if (!strcmp(arg2, "aam")) {
4598 if (off)
4599 cfg.set_aam = -1;
4600 else if (uval <= 254)
4601 cfg.set_aam = uval + 1;
4602 else
4603 badarg = true;
4604 }
4605 else if (!strcmp(arg2, "apm")) {
4606 if (off)
4607 cfg.set_apm = -1;
4608 else if (1 <= uval && uval <= 254)
4609 cfg.set_apm = uval + 1;
4610 else
4611 badarg = true;
4612 }
4613 else if (!strcmp(arg2, "lookahead")) {
4614 if (off)
4615 cfg.set_lookahead = -1;
4616 else if (on)
4617 cfg.set_lookahead = 1;
4618 else
4619 badarg = true;
4620 }
4621 else if (!strcmp(arg, "security-freeze")) {
4622 cfg.set_security_freeze = true;
4623 }
4624 else if (!strcmp(arg2, "standby")) {
4625 if (off)
4626 cfg.set_standby = 0 + 1;
4627 else if (uval <= 255)
4628 cfg.set_standby = uval + 1;
4629 else
4630 badarg = true;
4631 }
4632 else if (!strcmp(arg2, "wcache")) {
4633 if (off)
4634 cfg.set_wcache = -1;
4635 else if (on)
4636 cfg.set_wcache = 1;
4637 else
4638 badarg = true;
4639 }
4640 else if (!strcmp(arg2, "dsn")) {
4641 if (off)
4642 cfg.set_dsn = -1;
4643 else if (on)
4644 cfg.set_dsn = 1;
4645 else
4646 badarg = true;
4647 }
4648 else
4649 badarg = true;
4650 }
4651 else
4652 badarg = true;
4653 }
4654 break;
4655
4656 case 'c':
4657 // Override command line options
4658 {
4659 if (!(arg = strtok(nullptr, delim))) {
4660 missingarg = true;
4661 break;
4662 }
4663 int n = 0, nc = -1, len = strlen(arg);
4664 if ( ( sscanf(arg, "i=%d%n", &n, &nc) == 1
4665 || sscanf(arg, "interval=%d%n", &n, &nc) == 1)
4666 && nc == len && n >= 10)
4667 cfg.checktime = n;
4668 else
4669 badarg = true;
4670 }
4671 break;
4672
4673 default:
4674 // Directive not recognized
4675 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4676 configfile, lineno, name, token);
4677 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4678 return -1;
4679 }
4680 if (missingarg) {
4681 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4682 configfile, lineno, name, token);
4683 }
4684 if (badarg) {
4685 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4686 configfile, lineno, name, token, arg);
4687 }
4688 if (missingarg || badarg) {
4689 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4690 printoutvaliddirectiveargs(LOG_CRIT, sym);
4691 PrintOut(LOG_CRIT, "\n");
4692 return -1;
4693 }
4694
4695 return 1;
4696}
4697
4698// Scan directive for configuration file
4699#define SCANDIRECTIVE "DEVICESCAN"
4700
4701// This is the routine that adds things to the conf_entries list.
4702//
4703// Return values are:
4704// 1: parsed a normal line
4705// 0: found DEFAULT setting or comment or blank line
4706// -1: found SCANDIRECTIVE line
4707// -2: found an error
4708//
4709// Note: this routine modifies *line from the caller!
4710static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4711 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4712{
4713 const char *delim = " \n\t";
4714
4715 // get first token: device name. If a comment, skip line
4716 const char * name = strtok(line, delim);
4717 if (!name || *name == '#')
4718 return 0;
4719
4720 // Check device name for DEFAULT or DEVICESCAN
4721 int retval;
4722 if (!strcmp("DEFAULT", name)) {
4723 retval = 0;
4724 // Restart with empty defaults
4725 default_conf = dev_config();
4726 }
4727 else {
4728 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4729 // Init new entry with current defaults
4730 conf_entries.push_back(default_conf);
4731 }
4732 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4733
4734 cfg.name = name; // Later replaced by dev->get_info().info_name
4735 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4736 cfg.lineno = lineno;
4737
4738 // parse tokens one at a time from the file.
4739 while (char * token = strtok(nullptr, delim)) {
4740 int rc = ParseToken(token, cfg, scan_types);
4741 if (rc < 0)
4742 // error found on the line
4743 return -2;
4744
4745 if (rc == 0)
4746 // No tokens left
4747 break;
4748
4749 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4750 }
4751
4752 // Check for multiple -d TYPE directives
4753 if (retval != -1 && scan_types.size() > 1) {
4754 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4755 cfg.name.c_str(), cfg.lineno, configfile);
4756 return -2;
4757 }
4758
4759 // Don't perform checks below for DEFAULT entries
4760 if (retval == 0)
4761 return retval;
4762
4763 // If NO monitoring directives are set, then set all of them.
4764 if (!( cfg.smartcheck || cfg.selftest
4765 || cfg.errorlog || cfg.xerrorlog
4766 || cfg.offlinests || cfg.selfteststs
4767 || cfg.usagefailed || cfg.prefail || cfg.usage
4768 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4769
4770 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4771 cfg.name.c_str(), cfg.lineno, configfile);
4772
4773 cfg.smartcheck = true;
4774 cfg.usagefailed = true;
4775 cfg.prefail = true;
4776 cfg.usage = true;
4777 cfg.selftest = true;
4778 cfg.errorlog = true;
4779 cfg.selfteststs = true;
4780 }
4781
4782 // additional sanity check. Has user set -M options without -m?
4783 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4784 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4785 cfg.name.c_str(), cfg.lineno, configfile);
4786 return -2;
4787 }
4788
4789 // has the user has set <nomailer>?
4790 if (cfg.emailaddress == "<nomailer>") {
4791 // check that -M exec is also set
4792 if (cfg.emailcmdline.empty()){
4793 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4794 cfg.name.c_str(), cfg.lineno, configfile);
4795 return -2;
4796 }
4797 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4798 cfg.emailaddress.clear();
4799 }
4800
4801 return retval;
4802}
4803
4804// Parses a configuration file. Return values are:
4805// N=>0: found N entries
4806// -1: syntax error in config file
4807// -2: config file does not exist
4808// -3: config file exists but cannot be read
4809//
4810// In the case where the return value is 0, there are three
4811// possibilities:
4812// Empty configuration file ==> conf_entries.empty()
4813// No configuration file ==> conf_entries[0].lineno == 0
4814// SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
4815static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4816{
4817 // maximum line length in configuration file
4818 const int MAXLINELEN = 256;
4819 // maximum length of a continued line in configuration file
4820 const int MAXCONTLINE = 1023;
4821
4822 stdio_file f;
4823 // Open config file, if it exists and is not <stdin>
4824 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4825 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4826 // file exists but we can't read it or it should exist due to '-c' option
4827 int ret = (errno!=ENOENT ? -3 : -2);
4828 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4829 strerror(errno),configfile);
4830 return ret;
4831 }
4832 }
4833 else // read from stdin ('-c -' option)
4834 f.open(stdin);
4835
4836 // Start with empty defaults
4837 dev_config default_conf;
4838
4839 // No configuration file found -- use fake one
4840 int entry = 0;
4841 if (!f) {
4842 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4843
4844 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4845 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4846 return 0;
4847 }
4848
4849#ifdef __CYGWIN__
4850 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4851#endif
4852
4853 // configuration file exists
4854 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4855
4856 // parse config file line by line
4857 int lineno = 1, cont = 0, contlineno = 0;
4858 char line[MAXLINELEN+2];
4859 char fullline[MAXCONTLINE+1];
4860
4861 for (;;) {
4862 int len=0,scandevice;
4863 char *lastslash;
4864 char *comment;
4865 char *code;
4866
4867 // make debugging simpler
4868 memset(line,0,sizeof(line));
4869
4870 // get a line
4871 code=fgets(line, MAXLINELEN+2, f);
4872
4873 // are we at the end of the file?
4874 if (!code){
4875 if (cont) {
4876 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4877 // See if we found a SCANDIRECTIVE directive
4878 if (scandevice==-1)
4879 return 0;
4880 // did we find a syntax error
4881 if (scandevice==-2)
4882 return -1;
4883 // the final line is part of a continuation line
4884 entry+=scandevice;
4885 }
4886 break;
4887 }
4888
4889 // input file line number
4890 contlineno++;
4891
4892 // See if line is too long
4893 len=strlen(line);
4894 if (len>MAXLINELEN){
4895 const char *warn;
4896 if (line[len-1]=='\n')
4897 warn="(including newline!) ";
4898 else
4899 warn="";
4900 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4901 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4902 return -1;
4903 }
4904
4905 // Ignore anything after comment symbol
4906 if ((comment=strchr(line,'#'))){
4907 *comment='\0';
4908 len=strlen(line);
4909 }
4910
4911 // is the total line (made of all continuation lines) too long?
4912 if (cont+len>MAXCONTLINE){
4913 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4914 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4915 return -1;
4916 }
4917
4918 // copy string so far into fullline, and increment length
4919 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4920 cont+=len;
4921
4922 // is this a continuation line. If so, replace \ by space and look at next line
4923 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4924 *(fullline+(cont-len)+(lastslash-line))=' ';
4925 continue;
4926 }
4927
4928 // Not a continuation line. Parse it
4929 scan_types.clear();
4930 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4931
4932 // did we find a scandevice directive?
4933 if (scandevice==-1)
4934 return 0;
4935 // did we find a syntax error
4936 if (scandevice==-2)
4937 return -1;
4938
4939 entry+=scandevice;
4940 lineno++;
4941 cont=0;
4942 }
4943
4944 // note -- may be zero if syntax of file OK, but no valid entries!
4945 return entry;
4946}
4947
4948/* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4949 <LIST> is the list of valid arguments for option opt. */
4950static void PrintValidArgs(char opt)
4951{
4952 const char *s;
4953
4954 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4955 if (!(s = GetValidArgList(opt)))
4956 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4957 else
4958 PrintOut(LOG_CRIT, "%s", (char *)s);
4959 PrintOut(LOG_CRIT, " <=======\n");
4960}
4961
4962#ifndef _WIN32
4963// Report error and return false if specified path is not absolute.
4964static bool check_abs_path(char option, const std::string & path)
4965{
4966 if (path.empty() || path[0] == '/')
4967 return true;
4968
4969 debugmode = 1;
4970 PrintHead();
4971 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4972 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4973 return false;
4974}
4975#endif // !_WIN32
4976
4977// Parses input line, prints usage message and
4978// version/license/copyright messages
4979static int parse_options(int argc, char **argv)
4980{
4981 // Init default path names
4982#ifndef _WIN32
4983 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4984 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4985#else
4986 std::string exedir = get_exe_dir();
4987 static std::string configfile_str = exedir + "/smartd.conf";
4988 configfile = configfile_str.c_str();
4989 warning_script = exedir + "/smartd_warning.cmd";
4990#endif
4991
4992 // Please update GetValidArgList() if you edit shortopts
4993 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4994#if defined(HAVE_POSIX_API) || defined(_WIN32)
4995 "u:"
4996#endif
4997#ifdef HAVE_LIBCAP_NG
4998 "C"
4999#endif
5000 ;
5001 // Please update GetValidArgList() if you edit longopts
5002 struct option longopts[] = {
5003 { "configfile", required_argument, 0, 'c' },
5004 { "logfacility", required_argument, 0, 'l' },
5005 { "quit", required_argument, 0, 'q' },
5006 { "debug", no_argument, 0, 'd' },
5007 { "showdirectives", no_argument, 0, 'D' },
5008 { "interval", required_argument, 0, 'i' },
5009#ifndef _WIN32
5010 { "no-fork", no_argument, 0, 'n' },
5011#else
5012 { "service", no_argument, 0, 'n' },
5013#endif
5014 { "pidfile", required_argument, 0, 'p' },
5015 { "report", required_argument, 0, 'r' },
5016 { "savestates", required_argument, 0, 's' },
5017 { "attributelog", required_argument, 0, 'A' },
5018 { "drivedb", required_argument, 0, 'B' },
5019 { "warnexec", required_argument, 0, 'w' },
5020 { "version", no_argument, 0, 'V' },
5021 { "license", no_argument, 0, 'V' },
5022 { "copyright", no_argument, 0, 'V' },
5023 { "help", no_argument, 0, 'h' },
5024 { "usage", no_argument, 0, 'h' },
5025#if defined(HAVE_POSIX_API) || defined(_WIN32)