aboutsummaryrefslogtreecommitdiff
path: root/cddl/usr.sbin/zfsd/case_file.h
blob: b4dc2dee5d968d11cd3ccd9544692a4a5a1704be (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
/*-
 * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer,
 *    without modification.
 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
 *    substantially similar to the "NO WARRANTY" disclaimer below
 *    ("Disclaimer") and any redistribution must be conditioned upon
 *    including a substantially similar Disclaimer requirement for further
 *    binary redistribution.
 *
 * NO WARRANTY
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGES.
 *
 * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
 *
 * $FreeBSD$
 */

/**
 * \file case_file.h
 *
 * CaseFile objects aggregate vdev faults that may require ZFSD action
 * in order to maintain the health of a ZFS pool.
 *
 * Header requirements:
 *
 *    #include <list>
 *
 *    #include "callout.h"
 *    #include "zfsd_event.h"
 */
#ifndef _CASE_FILE_H_
#define	_CASE_FILE_H_

/*=========================== Forward Declarations ===========================*/
class CaseFile;
class Vdev;

/*============================= Class Definitions ============================*/
/*------------------------------- CaseFileList -------------------------------*/
/**
 * CaseFileList is a specialization of the standard list STL container.
 */
typedef std::list< CaseFile *> CaseFileList;

/*--------------------------------- CaseFile ---------------------------------*/
/**
 * A CaseFile object is instantiated anytime a vdev for an active pool
 * experiences an I/O error, is faulted by ZFS, or is determined to be
 * missing/removed.
 *
 * A vdev may have at most one CaseFile.
 *
 * CaseFiles are retired when a vdev leaves an active pool configuration
 * or an action is taken to resolve the issues recorded in the CaseFile.
 *
 * Logging a case against a vdev does not imply that an immediate action
 * to resolve a fault is required or even desired.  For example, a CaseFile
 * must accumulate a number of I/O errors in order to flag a device as
 * degraded.
 *
 * Vdev I/O errors are not recorded in ZFS label inforamation.  For this
 * reasons, CaseFile%%s with accumulated I/O error events are serialized
 * to the file system so that they survive across boots.  Currently all
 * other fault types can be reconstructed from ZFS label information, so
 * CaseFile%%s for missing, faulted, or degradded members are just recreated
 * at ZFSD startup instead of being deserialized from the file system.
 */
class CaseFile
{
public:
	/**
	 * \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple.
	 *
	 * \param poolGUID  Pool GUID for the vdev of the CaseFile to find.
	 * 		    If InvalidGuid, then only match the vdev GUID
	 * 		    instead of both pool and vdev GUIDs.
	 * \param vdevGUID  Vdev GUID for the vdev of the CaseFile to find.
	 *
	 * \return  If found, a pointer to a valid CaseFile object.
	 *          Otherwise NULL.
	 */
	static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID);

	/**
	 * \brief Find a CaseFile object by a vdev's current/last known
	 *        physical path.
	 *
	 * \param physPath  Physical path of the vdev of the CaseFile to find.
	 *
	 * \return  If found, a pointer to a valid CaseFile object.
	 *          Otherwise NULL.
	 */
	static CaseFile *Find(const string &physPath);

	/**
	 * \brief ReEvaluate all open cases whose pool guid matches the argument
	 *
	 * \param poolGUID	Only reevaluate cases for this pool
	 * \param event		Try to consume this event with the casefile
	 */
	static void ReEvaluateByGuid(DevdCtl::Guid poolGUID,
				     const ZfsEvent &event);

	/**
	 * \brief Create or return an existing active CaseFile for the
	 *        specified vdev.
	 *
	 * \param vdev  The vdev object for which to find/create a CaseFile.
	 *
	 * \return  A reference to a valid CaseFile object.
	 */
	static CaseFile &Create(Vdev &vdev);

	/**
	 * \brief Deserialize all serialized CaseFile objects found in
	 *        the file system.
	 */
	static void      DeSerialize();

	/**
	 * \brief returns true if there are no CaseFiles
	 */
	static bool	Empty();

	/**
	 * \brief Emit syslog data on all active CaseFile%%s in the system.
	 */
	static void      LogAll();

	/**
	 * \brief Destroy the in-core cache of CaseFile data.
	 *
	 * This routine does not disturb the on disk, serialized, CaseFile
	 * data.
	 */
	static void      PurgeAll();

	DevdCtl::Guid PoolGUID()       const;
	DevdCtl::Guid VdevGUID()       const;
	vdev_state    VdevState()      const;
	const string &PoolGUIDString() const;
	const string &VdevGUIDString() const;
	const string &PhysicalPath()   const;

	/**
	 * \brief Attempt to resolve this CaseFile using the disk
	 *        resource at the given device/physical path/vdev object
	 *        tuple.
	 *
	 * \param devPath   The devfs path for the disk resource.
	 * \param physPath  The physical path information reported by
	 *                  the disk resource.
	 * \param vdev      If the disk contains ZFS label information,
	 *                  a pointer to the disk label's vdev object
	 *                  data.  Otherwise NULL.
	 *
	 * \return  True if this event was consumed by this CaseFile.
	 */
	bool ReEvaluate(const string &devPath, const string &physPath,
			Vdev *vdev);

	/**
	 * \brief Update this CaseFile in light of the provided ZfsEvent.
	 *
	 * Must be virtual so it can be overridden in the unit tests
	 *
	 * \param event  The ZfsEvent to evaluate.
	 *
	 * \return  True if this event was consumed by this CaseFile.
	 */
	virtual bool ReEvaluate(const ZfsEvent &event);

	/**
	 * \brief Register an itimer callout for the given event, if necessary
	 */
	virtual void RegisterCallout(const DevdCtl::Event &event);

	/**
	 * \brief Close a case if it is no longer relevant.
	 *
	 * This method deals with cases tracking soft errors.  Soft errors
	 * will be discarded should a remove event occur within a short period
	 * of the soft errors being reported.  We also discard the events
	 * if the vdev is marked degraded or failed.
	 *
	 * \return  True if the case is closed.  False otherwise.
	 */
	bool CloseIfSolved();

	/**
	 * \brief Emit data about this CaseFile via syslog(3).
	 */
	void Log();

	/**
	 * \brief Whether we should degrade this vdev
	 */
	bool ShouldDegrade() const;

	/**
	 * \brief Whether we should fault this vdev
	 */
	bool ShouldFault() const;

protected:
	enum {
		/**
		 * The number of soft errors on a vdev required
		 * to transition a vdev from healthy to degraded
		 * status.
		 */
		ZFS_DEGRADE_IO_COUNT = 50
	};

	static CalloutFunc_t OnGracePeriodEnded;

	/**
	 * \brief scandir(3) filter function used to find files containing
	 *        serialized CaseFile data.
	 *
	 * \param dirEntry  Directory entry for the file to filter.
	 *
	 * \return  Non-zero for a file to include in the selection,
	 *          otherwise 0.
	 */
	static int  DeSerializeSelector(const struct dirent *dirEntry);

	/**
	 * \brief Given the name of a file containing serialized events from a
	 *        CaseFile object, create/update an in-core CaseFile object
	 *        representing the serialized data.
	 *
	 * \param fileName  The name of a file containing serialized events
	 *                  from a CaseFile object.
	 */
	static void DeSerializeFile(const char *fileName);

	/** Constructor. */
	CaseFile(const Vdev &vdev);

	/**
	 * Destructor.
	 * Must be virtual so it can be subclassed in the unit tests
	 */
	virtual ~CaseFile();

	/**
	 * \brief Reload state for the vdev associated with this CaseFile.
	 *
	 * \return  True if the refresh was successful.  False if the system
	 *          has no record of the pool or vdev for this CaseFile.
	 */
	virtual bool RefreshVdevState();

	/**
	 * \brief Free all events in the m_events list.
	 */
	void PurgeEvents();

	/**
	 * \brief Free all events in the m_tentativeEvents list.
	 */
	void PurgeTentativeEvents();

	/**
	 * \brief Commit to file system storage.
	 */
	void Serialize();

	/**
	 * \brief Retrieve event data from a serialization stream.
	 *
	 * \param caseStream  The serializtion stream to parse.
	 */
	void DeSerialize(std::ifstream &caseStream);

	/**
	 * \brief Serializes the supplied event list and writes it to fd
	 *
	 * \param prefix  If not NULL, this prefix will be prepended to
	 *                every event in the file.
	 */
	void SerializeEvList(const DevdCtl::EventList events, int fd,
			     const char* prefix=NULL) const;

	/**
	 * \brief Unconditionally close a CaseFile.
	 */
	virtual void Close();

	/**
	 * \brief Callout callback invoked when the remove timer grace
	 *        period expires.
	 *
	 * If no remove events are received prior to the grace period
	 * firing, then any tentative events are promoted and counted
	 * against the health of the vdev.
	 */
	void OnGracePeriodEnded();

	/**
	 * \brief Attempt to activate a spare on this case's pool.
	 *
	 * Call this whenever a pool becomes degraded.  It will look for any
	 * spare devices and activate one to replace the casefile's vdev.  It
	 * will _not_ close the casefile; that should only happen when the
	 * missing drive is replaced or the user promotes the spare.
	 *
	 * \return True if a spare was activated
	 */
	bool ActivateSpare();

	/**
	 * \brief replace a pool's vdev with another
	 *
	 * \param vdev_type   The type of the new vdev.  Usually either
	 *                    VDEV_TYPE_DISK or VDEV_TYPE_FILE
	 * \param path        The file system path to the new vdev
	 * \param isspare     Whether the new vdev is a spare
	 *
	 * \return            true iff the replacement was successful
	 */
	bool Replace(const char* vdev_type, const char* path, bool isspare);

	/**
	 * \brief Which vdev, if any, is replacing ours.
	 *
	 * \param zhp		Pool handle state from the caller context
	 *
	 * \return		the vdev that is currently replacing ours,
	 *			or NonexistentVdev if there isn't one.
	 */
	Vdev BeingReplacedBy(zpool_handle_t *zhp);

	/**
	 * \brief All CaseFiles being tracked by ZFSD.
	 */
	static CaseFileList  s_activeCases;

	/**
	 * \brief The file system path to serialized CaseFile data.
	 */
	static const string  s_caseFilePath;

	/**
	 * \brief The time ZFSD waits before promoting a tentative event
	 *        into a permanent event.
	 */
	static const timeval s_removeGracePeriod;

	/**
	 * \brief A list of soft error events counted against the health of
	 *        a vdev.
	 */
	DevdCtl::EventList m_events;

	/**
	 * \brief A list of soft error events waiting for a grace period
	 *        expiration before being counted against the health of
	 *        a vdev.
	 */
	DevdCtl::EventList m_tentativeEvents;

	DevdCtl::Guid	   m_poolGUID;
	DevdCtl::Guid	   m_vdevGUID;
	vdev_state	   m_vdevState;
	string		   m_poolGUIDString;
	string		   m_vdevGUIDString;
	string		   m_vdevPhysPath;

	/**
	 * \brief Callout activated when a grace period
	 */
	Callout		  m_tentativeTimer;

private:
	nvlist_t	*CaseVdev(zpool_handle_t *zhp)	const;
};

inline DevdCtl::Guid
CaseFile::PoolGUID() const
{
	return (m_poolGUID);
}

inline DevdCtl::Guid
CaseFile::VdevGUID() const
{
	return (m_vdevGUID);
}

inline vdev_state
CaseFile::VdevState() const
{
	return (m_vdevState);
}

inline const string &
CaseFile::PoolGUIDString() const
{
	return (m_poolGUIDString);
}

inline const string &
CaseFile::VdevGUIDString() const
{
	return (m_vdevGUIDString);
}

inline const string &
CaseFile::PhysicalPath() const
{
	return (m_vdevPhysPath);
}

#endif /* _CASE_FILE_H_ */