1 | /* $NetBSD: rf_parityscan.c,v 1.34 2011/05/01 01:09:05 mrg Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Author: Mark Holland |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | |
29 | /***************************************************************************** |
30 | * |
31 | * rf_parityscan.c -- misc utilities related to parity verification |
32 | * |
33 | ****************************************************************************/ |
34 | |
35 | #include <sys/cdefs.h> |
36 | __KERNEL_RCSID(0, "$NetBSD: rf_parityscan.c,v 1.34 2011/05/01 01:09:05 mrg Exp $" ); |
37 | |
38 | #include <dev/raidframe/raidframevar.h> |
39 | |
40 | #include "rf_raid.h" |
41 | #include "rf_dag.h" |
42 | #include "rf_dagfuncs.h" |
43 | #include "rf_dagutils.h" |
44 | #include "rf_mcpair.h" |
45 | #include "rf_general.h" |
46 | #include "rf_engine.h" |
47 | #include "rf_parityscan.h" |
48 | #include "rf_map.h" |
49 | #include "rf_paritymap.h" |
50 | |
51 | /***************************************************************************** |
52 | * |
53 | * walk through the entire arry and write new parity. This works by |
54 | * creating two DAGs, one to read a stripe of data and one to write |
55 | * new parity. The first is executed, the data is xored together, and |
56 | * then the second is executed. To avoid constantly building and |
57 | * tearing down the DAGs, we create them a priori and fill them in |
58 | * with the mapping information as we go along. |
59 | * |
60 | * there should never be more than one thread running this. |
61 | * |
62 | ****************************************************************************/ |
63 | |
64 | int |
65 | rf_RewriteParity(RF_Raid_t *raidPtr) |
66 | { |
67 | if (raidPtr->parity_map != NULL) |
68 | return rf_paritymap_rewrite(raidPtr->parity_map); |
69 | else |
70 | return rf_RewriteParityRange(raidPtr, 0, raidPtr->totalSectors); |
71 | } |
72 | |
73 | int |
74 | rf_RewriteParityRange(RF_Raid_t *raidPtr, RF_SectorNum_t sec_begin, |
75 | RF_SectorNum_t sec_len) |
76 | { |
77 | /* |
78 | * Note: It is the caller's responsibility to ensure that |
79 | * sec_begin and sec_len are stripe-aligned. |
80 | */ |
81 | RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; |
82 | RF_AccessStripeMapHeader_t *asm_h; |
83 | int ret_val; |
84 | int rc; |
85 | RF_SectorNum_t i; |
86 | |
87 | if (raidPtr->Layout.map->faultsTolerated == 0) { |
88 | /* There isn't any parity. Call it "okay." */ |
89 | return (RF_PARITY_OKAY); |
90 | } |
91 | if (raidPtr->status != rf_rs_optimal) { |
92 | /* |
93 | * We're in degraded mode. Don't try to verify parity now! |
94 | * XXX: this should be a "we don't want to", not a |
95 | * "we can't" error. |
96 | */ |
97 | return (RF_PARITY_COULD_NOT_VERIFY); |
98 | } |
99 | |
100 | ret_val = 0; |
101 | |
102 | rc = RF_PARITY_OKAY; |
103 | |
104 | for (i = sec_begin; i < sec_begin + sec_len && |
105 | rc <= RF_PARITY_CORRECTED; |
106 | i += layoutPtr->dataSectorsPerStripe) { |
107 | if (raidPtr->waitShutdown) { |
108 | /* Someone is pulling the plug on this set... |
109 | abort the re-write */ |
110 | return (1); |
111 | } |
112 | asm_h = rf_MapAccess(raidPtr, i, |
113 | layoutPtr->dataSectorsPerStripe, |
114 | NULL, RF_DONT_REMAP); |
115 | raidPtr->parity_rewrite_stripes_done = |
116 | i / layoutPtr->dataSectorsPerStripe ; |
117 | rc = rf_VerifyParity(raidPtr, asm_h->stripeMap, 1, 0); |
118 | |
119 | switch (rc) { |
120 | case RF_PARITY_OKAY: |
121 | case RF_PARITY_CORRECTED: |
122 | break; |
123 | case RF_PARITY_BAD: |
124 | printf("Parity bad during correction\n" ); |
125 | ret_val = 1; |
126 | break; |
127 | case RF_PARITY_COULD_NOT_CORRECT: |
128 | printf("Could not correct bad parity\n" ); |
129 | ret_val = 1; |
130 | break; |
131 | case RF_PARITY_COULD_NOT_VERIFY: |
132 | printf("Could not verify parity\n" ); |
133 | ret_val = 1; |
134 | break; |
135 | default: |
136 | printf("Bad rc=%d from VerifyParity in RewriteParity\n" , rc); |
137 | ret_val = 1; |
138 | } |
139 | rf_FreeAccessStripeMap(asm_h); |
140 | } |
141 | return (ret_val); |
142 | } |
143 | /***************************************************************************** |
144 | * |
145 | * verify that the parity in a particular stripe is correct. we |
146 | * validate only the range of parity defined by parityPDA, since this |
147 | * is all we have locked. The way we do this is to create an asm that |
148 | * maps the whole stripe and then range-restrict it to the parity |
149 | * region defined by the parityPDA. |
150 | * |
151 | ****************************************************************************/ |
152 | int |
153 | rf_VerifyParity(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *aasm, |
154 | int correct_it, RF_RaidAccessFlags_t flags) |
155 | { |
156 | RF_PhysDiskAddr_t *parityPDA; |
157 | RF_AccessStripeMap_t *doasm; |
158 | const RF_LayoutSW_t *lp; |
159 | int lrc, rc; |
160 | |
161 | lp = raidPtr->Layout.map; |
162 | if (lp->faultsTolerated == 0) { |
163 | /* |
164 | * There isn't any parity. Call it "okay." |
165 | */ |
166 | return (RF_PARITY_OKAY); |
167 | } |
168 | rc = RF_PARITY_OKAY; |
169 | if (lp->VerifyParity) { |
170 | for (doasm = aasm; doasm; doasm = doasm->next) { |
171 | for (parityPDA = doasm->parityInfo; parityPDA; |
172 | parityPDA = parityPDA->next) { |
173 | lrc = lp->VerifyParity(raidPtr, |
174 | doasm->raidAddress, |
175 | parityPDA, |
176 | correct_it, flags); |
177 | if (lrc > rc) { |
178 | /* see rf_parityscan.h for why this |
179 | * works */ |
180 | rc = lrc; |
181 | } |
182 | } |
183 | } |
184 | } else { |
185 | rc = RF_PARITY_COULD_NOT_VERIFY; |
186 | } |
187 | return (rc); |
188 | } |
189 | |
190 | int |
191 | rf_VerifyParityBasic(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr, |
192 | RF_PhysDiskAddr_t *parityPDA, int correct_it, |
193 | RF_RaidAccessFlags_t flags) |
194 | { |
195 | RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout); |
196 | RF_RaidAddr_t startAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, |
197 | raidAddr); |
198 | RF_SectorCount_t numsector = parityPDA->numSector; |
199 | int numbytes = rf_RaidAddressToByte(raidPtr, numsector); |
200 | int bytesPerStripe = numbytes * layoutPtr->numDataCol; |
201 | RF_DagHeader_t *rd_dag_h, *wr_dag_h; /* read, write dag */ |
202 | RF_DagNode_t *blockNode, *wrBlock; |
203 | RF_AccessStripeMapHeader_t *asm_h; |
204 | RF_AccessStripeMap_t *asmap; |
205 | RF_AllocListElem_t *alloclist; |
206 | RF_PhysDiskAddr_t *pda; |
207 | char *pbuf, *bf, *end_p, *p; |
208 | int i, retcode; |
209 | RF_ReconUnitNum_t which_ru; |
210 | RF_StripeNum_t psID = rf_RaidAddressToParityStripeID(layoutPtr, |
211 | raidAddr, |
212 | &which_ru); |
213 | int stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol; |
214 | #if RF_ACC_TRACE > 0 |
215 | RF_AccTraceEntry_t tracerec; |
216 | #endif |
217 | RF_MCPair_t *mcpair; |
218 | |
219 | retcode = RF_PARITY_OKAY; |
220 | |
221 | mcpair = rf_AllocMCPair(); |
222 | rf_MakeAllocList(alloclist); |
223 | RF_MallocAndAdd(bf, numbytes * (layoutPtr->numDataCol + layoutPtr->numParityCol), (char *), alloclist); |
224 | RF_MallocAndAdd(pbuf, numbytes, (char *), alloclist); |
225 | end_p = bf + bytesPerStripe; |
226 | |
227 | rd_dag_h = rf_MakeSimpleDAG(raidPtr, stripeWidth, numbytes, bf, rf_DiskReadFunc, rf_DiskReadUndoFunc, |
228 | "Rod" , alloclist, flags, RF_IO_NORMAL_PRIORITY); |
229 | blockNode = rd_dag_h->succedents[0]; |
230 | |
231 | /* map the stripe and fill in the PDAs in the dag */ |
232 | asm_h = rf_MapAccess(raidPtr, startAddr, layoutPtr->dataSectorsPerStripe, bf, RF_DONT_REMAP); |
233 | asmap = asm_h->stripeMap; |
234 | |
235 | for (pda = asmap->physInfo, i = 0; i < layoutPtr->numDataCol; i++, pda = pda->next) { |
236 | RF_ASSERT(pda); |
237 | rf_RangeRestrictPDA(raidPtr, parityPDA, pda, 0, 1); |
238 | RF_ASSERT(pda->numSector != 0); |
239 | if (rf_TryToRedirectPDA(raidPtr, pda, 0)) |
240 | goto out; /* no way to verify parity if disk is |
241 | * dead. return w/ good status */ |
242 | blockNode->succedents[i]->params[0].p = pda; |
243 | blockNode->succedents[i]->params[2].v = psID; |
244 | blockNode->succedents[i]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
245 | } |
246 | |
247 | RF_ASSERT(!asmap->parityInfo->next); |
248 | rf_RangeRestrictPDA(raidPtr, parityPDA, asmap->parityInfo, 0, 1); |
249 | RF_ASSERT(asmap->parityInfo->numSector != 0); |
250 | if (rf_TryToRedirectPDA(raidPtr, asmap->parityInfo, 1)) |
251 | goto out; |
252 | blockNode->succedents[layoutPtr->numDataCol]->params[0].p = asmap->parityInfo; |
253 | |
254 | /* fire off the DAG */ |
255 | #if RF_ACC_TRACE > 0 |
256 | memset((char *) &tracerec, 0, sizeof(tracerec)); |
257 | rd_dag_h->tracerec = &tracerec; |
258 | #endif |
259 | #if 0 |
260 | if (rf_verifyParityDebug) { |
261 | printf("Parity verify read dag:\n" ); |
262 | rf_PrintDAGList(rd_dag_h); |
263 | } |
264 | #endif |
265 | RF_LOCK_MCPAIR(mcpair); |
266 | mcpair->flag = 0; |
267 | RF_UNLOCK_MCPAIR(mcpair); |
268 | |
269 | rf_DispatchDAG(rd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
270 | (void *) mcpair); |
271 | |
272 | RF_LOCK_MCPAIR(mcpair); |
273 | while (!mcpair->flag) |
274 | RF_WAIT_MCPAIR(mcpair); |
275 | RF_UNLOCK_MCPAIR(mcpair); |
276 | if (rd_dag_h->status != rf_enable) { |
277 | RF_ERRORMSG("Unable to verify parity: can't read the stripe\n" ); |
278 | retcode = RF_PARITY_COULD_NOT_VERIFY; |
279 | goto out; |
280 | } |
281 | for (p = bf; p < end_p; p += numbytes) { |
282 | rf_bxor(p, pbuf, numbytes); |
283 | } |
284 | for (i = 0; i < numbytes; i++) { |
285 | if (pbuf[i] != bf[bytesPerStripe + i]) { |
286 | if (!correct_it) |
287 | RF_ERRORMSG3("Parity verify error: byte %d of parity is 0x%x should be 0x%x\n" , |
288 | i, (u_char) bf[bytesPerStripe + i], (u_char) pbuf[i]); |
289 | retcode = RF_PARITY_BAD; |
290 | break; |
291 | } |
292 | } |
293 | |
294 | if (retcode && correct_it) { |
295 | wr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, numbytes, pbuf, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
296 | "Wnp" , alloclist, flags, RF_IO_NORMAL_PRIORITY); |
297 | wrBlock = wr_dag_h->succedents[0]; |
298 | wrBlock->succedents[0]->params[0].p = asmap->parityInfo; |
299 | wrBlock->succedents[0]->params[2].v = psID; |
300 | wrBlock->succedents[0]->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru); |
301 | #if RF_ACC_TRACE > 0 |
302 | memset((char *) &tracerec, 0, sizeof(tracerec)); |
303 | wr_dag_h->tracerec = &tracerec; |
304 | #endif |
305 | #if 0 |
306 | if (rf_verifyParityDebug) { |
307 | printf("Parity verify write dag:\n" ); |
308 | rf_PrintDAGList(wr_dag_h); |
309 | } |
310 | #endif |
311 | RF_LOCK_MCPAIR(mcpair); |
312 | mcpair->flag = 0; |
313 | RF_UNLOCK_MCPAIR(mcpair); |
314 | |
315 | rf_DispatchDAG(wr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
316 | (void *) mcpair); |
317 | |
318 | RF_LOCK_MCPAIR(mcpair); |
319 | while (!mcpair->flag) |
320 | RF_WAIT_MCPAIR(mcpair); |
321 | RF_UNLOCK_MCPAIR(mcpair); |
322 | if (wr_dag_h->status != rf_enable) { |
323 | RF_ERRORMSG("Unable to correct parity in VerifyParity: can't write the stripe\n" ); |
324 | retcode = RF_PARITY_COULD_NOT_CORRECT; |
325 | } |
326 | rf_FreeDAG(wr_dag_h); |
327 | if (retcode == RF_PARITY_BAD) |
328 | retcode = RF_PARITY_CORRECTED; |
329 | } |
330 | out: |
331 | rf_FreeAccessStripeMap(asm_h); |
332 | rf_FreeAllocList(alloclist); |
333 | rf_FreeDAG(rd_dag_h); |
334 | rf_FreeMCPair(mcpair); |
335 | return (retcode); |
336 | } |
337 | |
338 | int |
339 | rf_TryToRedirectPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda, |
340 | int parity) |
341 | { |
342 | if (raidPtr->Disks[pda->col].status == rf_ds_reconstructing) { |
343 | if (rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, pda->startSector)) { |
344 | #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 |
345 | if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) { |
346 | #if RF_DEBUG_VERIFYPARITY |
347 | RF_RowCol_t oc = pda->col; |
348 | RF_SectorNum_t os = pda->startSector; |
349 | #endif |
350 | if (parity) { |
351 | (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); |
352 | #if RF_DEBUG_VERIFYPARITY |
353 | if (rf_verifyParityDebug) |
354 | printf("VerifyParity: Redir P c %d sect %ld -> c %d sect %ld\n" , |
355 | oc, (long) os, pda->col, (long) pda->startSector); |
356 | #endif |
357 | } else { |
358 | (raidPtr->Layout.map->MapSector) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP); |
359 | #if RF_DEBUG_VERIFYPARITY |
360 | if (rf_verifyParityDebug) |
361 | printf("VerifyParity: Redir D c %d sect %ld -> c %d sect %ld\n" , |
362 | oc, (long) os, pda->col, (long) pda->startSector); |
363 | #endif |
364 | } |
365 | } else { |
366 | #endif |
367 | RF_RowCol_t spCol = raidPtr->Disks[pda->col].spareCol; |
368 | pda->col = spCol; |
369 | #if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0 |
370 | } |
371 | #endif |
372 | } |
373 | } |
374 | if (RF_DEAD_DISK(raidPtr->Disks[pda->col].status)) |
375 | return (1); |
376 | return (0); |
377 | } |
378 | /***************************************************************************** |
379 | * |
380 | * currently a stub. |
381 | * |
382 | * takes as input an ASM describing a write operation and containing |
383 | * one failure, and verifies that the parity was correctly updated to |
384 | * reflect the write. |
385 | * |
386 | * if it's a data unit that's failed, we read the other data units in |
387 | * the stripe and the parity unit, XOR them together, and verify that |
388 | * we get the data intended for the failed disk. Since it's easy, we |
389 | * also validate that the right data got written to the surviving data |
390 | * disks. |
391 | * |
392 | * If it's the parity that failed, there's really no validation we can |
393 | * do except the above verification that the right data got written to |
394 | * all disks. This is because the new data intended for the failed |
395 | * disk is supplied in the ASM, but this is of course not the case for |
396 | * the new parity. |
397 | * |
398 | ****************************************************************************/ |
399 | #if 0 |
400 | int |
401 | rf_VerifyDegrModeWrite(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *asmh) |
402 | { |
403 | return (0); |
404 | } |
405 | #endif |
406 | /* creates a simple DAG with a header, a block-recon node at level 1, |
407 | * nNodes nodes at level 2, an unblock-recon node at level 3, and a |
408 | * terminator node at level 4. The stripe address field in the block |
409 | * and unblock nodes are not touched, nor are the pda fields in the |
410 | * second-level nodes, so they must be filled in later. |
411 | * |
412 | * commit point is established at unblock node - this means that any |
413 | * failure during dag execution causes the dag to fail |
414 | * |
415 | * name - node names at the second level |
416 | */ |
417 | RF_DagHeader_t * |
418 | rf_MakeSimpleDAG(RF_Raid_t *raidPtr, int nNodes, int bytesPerSU, char *databuf, |
419 | int (*doFunc) (RF_DagNode_t * node), |
420 | int (*undoFunc) (RF_DagNode_t * node), |
421 | const char *name, RF_AllocListElem_t *alloclist, |
422 | RF_RaidAccessFlags_t flags, int priority) |
423 | { |
424 | RF_DagHeader_t *dag_h; |
425 | RF_DagNode_t *nodes, *termNode, *blockNode, *unblockNode, *tmpNode; |
426 | int i; |
427 | |
428 | /* grab a DAG header... */ |
429 | |
430 | dag_h = rf_AllocDAGHeader(); |
431 | dag_h->raidPtr = (void *) raidPtr; |
432 | dag_h->allocList = NULL;/* we won't use this alloc list */ |
433 | dag_h->status = rf_enable; |
434 | dag_h->numSuccedents = 1; |
435 | dag_h->creator = "SimpleDAG" ; |
436 | |
437 | /* this dag can not commit until the unblock node is reached errors |
438 | * prior to the commit point imply the dag has failed */ |
439 | dag_h->numCommitNodes = 1; |
440 | dag_h->numCommits = 0; |
441 | |
442 | /* create the nodes, the block & unblock nodes, and the terminator |
443 | * node */ |
444 | |
445 | for (i = 0; i < nNodes; i++) { |
446 | tmpNode = rf_AllocDAGNode(); |
447 | tmpNode->list_next = dag_h->nodes; |
448 | dag_h->nodes = tmpNode; |
449 | } |
450 | nodes = dag_h->nodes; |
451 | |
452 | blockNode = rf_AllocDAGNode(); |
453 | blockNode->list_next = dag_h->nodes; |
454 | dag_h->nodes = blockNode; |
455 | |
456 | unblockNode = rf_AllocDAGNode(); |
457 | unblockNode->list_next = dag_h->nodes; |
458 | dag_h->nodes = unblockNode; |
459 | |
460 | termNode = rf_AllocDAGNode(); |
461 | termNode->list_next = dag_h->nodes; |
462 | dag_h->nodes = termNode; |
463 | |
464 | dag_h->succedents[0] = blockNode; |
465 | rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil" , alloclist); |
466 | rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil" , alloclist); |
467 | unblockNode->succedents[0] = termNode; |
468 | tmpNode = nodes; |
469 | for (i = 0; i < nNodes; i++) { |
470 | blockNode->succedents[i] = unblockNode->antecedents[i] = tmpNode; |
471 | unblockNode->antType[i] = rf_control; |
472 | rf_InitNode(tmpNode, rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, name, alloclist); |
473 | tmpNode->succedents[0] = unblockNode; |
474 | tmpNode->antecedents[0] = blockNode; |
475 | tmpNode->antType[0] = rf_control; |
476 | tmpNode->params[1].p = (databuf + (i * bytesPerSU)); |
477 | tmpNode = tmpNode->list_next; |
478 | } |
479 | rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm" , alloclist); |
480 | termNode->antecedents[0] = unblockNode; |
481 | termNode->antType[0] = rf_control; |
482 | return (dag_h); |
483 | } |
484 | |