1 | /* $NetBSD: rf_evenodd_dagfuncs.c,v 1.22 2014/03/23 09:30:59 christos Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Author: ChangMing Wu |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | |
29 | /* |
30 | * Code for RAID-EVENODD architecture. |
31 | */ |
32 | |
33 | #include <sys/cdefs.h> |
34 | __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.22 2014/03/23 09:30:59 christos Exp $" ); |
35 | |
36 | #include "rf_archs.h" |
37 | |
38 | #ifdef _KERNEL_OPT |
39 | #include "opt_raid_diagnostic.h" |
40 | #endif |
41 | |
42 | #if RF_INCLUDE_EVENODD > 0 |
43 | |
44 | #include <dev/raidframe/raidframevar.h> |
45 | |
46 | #include "rf_raid.h" |
47 | #include "rf_dag.h" |
48 | #include "rf_dagffrd.h" |
49 | #include "rf_dagffwr.h" |
50 | #include "rf_dagdegrd.h" |
51 | #include "rf_dagdegwr.h" |
52 | #include "rf_dagutils.h" |
53 | #include "rf_dagfuncs.h" |
54 | #include "rf_etimer.h" |
55 | #include "rf_general.h" |
56 | #include "rf_parityscan.h" |
57 | #include "rf_evenodd.h" |
58 | #include "rf_evenodd_dagfuncs.h" |
59 | |
60 | /* These redundant functions are for small write */ |
61 | RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P" , rf_SimpleXorFunc, "Simple Old-New P" }; |
62 | RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E" , rf_SimpleONEFunc, "Regular Old-New E" }; |
63 | /* These redundant functions are for degraded read */ |
64 | RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr" , rf_RecoveryXorFunc, "Recovery Xr" }; |
65 | RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func" , rf_RecoveryEFunc, "Recovery E Func" }; |
66 | /********************************************************************************************** |
67 | * the following encoding node functions is used in EO_000_CreateLargeWriteDAG |
68 | **********************************************************************************************/ |
69 | int |
70 | rf_RegularPEFunc(RF_DagNode_t *node) |
71 | { |
72 | rf_RegularESubroutine(node, node->results[1]); |
73 | rf_RegularXorFunc(node);/* does the wakeup here! */ |
74 | #if 1 |
75 | return (0); /* XXX This was missing... GO */ |
76 | #endif |
77 | } |
78 | |
79 | |
80 | /************************************************************************************************ |
81 | * For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to |
82 | * be used. The previous case is when write access at least sectors of full stripe unit. |
83 | * The later function is used when the write access two stripe units but with total sectors |
84 | * less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected |
85 | * areas in their stripe unit and parity write and 'E' write are both devided into two distinct |
86 | * writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5 |
87 | ************************************************************************************************/ |
88 | |
89 | /* Algorithm: |
90 | 1. Store the difference of old data and new data in the Rod buffer. |
91 | 2. then encode this buffer into the buffer which already have old 'E' information inside it, |
92 | the result can be shown to be the new 'E' information. |
93 | 3. xor the Wnd buffer into the difference buffer to recover the original old data. |
94 | Here we have another alternative: to allocate a temporary buffer for storing the difference of |
95 | old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach |
96 | take the same speed as the previous, and need more memory. |
97 | */ |
98 | int |
99 | rf_RegularONEFunc(RF_DagNode_t *node) |
100 | { |
101 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
102 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
103 | int EpdaIndex = (node->numParams - 1) / 2 - 1; /* the parameter of node |
104 | * where you can find |
105 | * e-pda */ |
106 | int i, k; |
107 | int suoffset, length; |
108 | RF_RowCol_t scol; |
109 | char *srcbuf, *destbuf; |
110 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
111 | RF_Etimer_t timer; |
112 | RF_PhysDiskAddr_t *pda; |
113 | #ifdef RAID_DIAGNOSTIC |
114 | RF_PhysDiskAddr_t *EPDA = |
115 | (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p; |
116 | int ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector); |
117 | |
118 | RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q); |
119 | RF_ASSERT(ESUOffset == 0); |
120 | #endif /* RAID_DIAGNOSTIC */ |
121 | |
122 | RF_ETIMER_START(timer); |
123 | |
124 | /* Xor the Wnd buffer into Rod buffer, the difference of old data and |
125 | * new data is stored in Rod buffer */ |
126 | for (k = 0; k < EpdaIndex; k += 2) { |
127 | length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); |
128 | rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length); |
129 | } |
130 | /* Start to encoding the buffer storing the difference of old data and |
131 | * new data into 'E' buffer */ |
132 | for (i = 0; i < EpdaIndex; i += 2) |
133 | if (node->params[i + 1].p != node->results[0]) { /* results[0] is buf ptr |
134 | * of E */ |
135 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
136 | srcbuf = (char *) node->params[i + 1].p; |
137 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
138 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
139 | destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset); |
140 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
141 | } |
142 | /* Recover the original old data to be used by parity encoding |
143 | * function in XorNode */ |
144 | for (k = 0; k < EpdaIndex; k += 2) { |
145 | length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector); |
146 | rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length); |
147 | } |
148 | RF_ETIMER_STOP(timer); |
149 | RF_ETIMER_EVAL(timer); |
150 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
151 | rf_GenericWakeupFunc(node, 0); |
152 | #if 1 |
153 | return (0); /* XXX this was missing.. GO */ |
154 | #endif |
155 | } |
156 | |
157 | int |
158 | rf_SimpleONEFunc(RF_DagNode_t *node) |
159 | { |
160 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
161 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
162 | RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p; |
163 | int retcode = 0; |
164 | char *srcbuf, *destbuf; |
165 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
166 | int length; |
167 | RF_RowCol_t scol; |
168 | RF_Etimer_t timer; |
169 | |
170 | RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q); |
171 | if (node->dagHdr->status == rf_enable) { |
172 | RF_ETIMER_START(timer); |
173 | length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector); /* this is a pda of |
174 | * writeDataNodes */ |
175 | /* bxor to buffer of readDataNodes */ |
176 | retcode = rf_bxor(node->params[5].p, node->params[1].p, length); |
177 | /* find out the corresponding colume in encoding matrix for |
178 | * write colume to be encoded into redundant disk 'E' */ |
179 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
180 | srcbuf = node->params[1].p; |
181 | destbuf = node->params[3].p; |
182 | /* Start encoding process */ |
183 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
184 | rf_bxor(node->params[5].p, node->params[1].p, length); |
185 | RF_ETIMER_STOP(timer); |
186 | RF_ETIMER_EVAL(timer); |
187 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
188 | |
189 | } |
190 | return (rf_GenericWakeupFunc(node, retcode)); /* call wake func |
191 | * explicitly since no |
192 | * I/O in this node */ |
193 | } |
194 | |
195 | |
196 | /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write ********/ |
197 | void |
198 | rf_RegularESubroutine(RF_DagNode_t *node, char *ebuf) |
199 | { |
200 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
201 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
202 | RF_PhysDiskAddr_t *pda; |
203 | int i, suoffset; |
204 | RF_RowCol_t scol; |
205 | char *srcbuf, *destbuf; |
206 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
207 | RF_Etimer_t timer; |
208 | |
209 | RF_ETIMER_START(timer); |
210 | for (i = 0; i < node->numParams - 2; i += 2) { |
211 | RF_ASSERT(node->params[i + 1].p != ebuf); |
212 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
213 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
214 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
215 | srcbuf = (char *) node->params[i + 1].p; |
216 | destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset); |
217 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
218 | } |
219 | RF_ETIMER_STOP(timer); |
220 | RF_ETIMER_EVAL(timer); |
221 | tracerec->xor_us += RF_ETIMER_VAL_US(timer); |
222 | } |
223 | |
224 | |
225 | /******************************************************************************************* |
226 | * Used in EO_001_CreateLargeWriteDAG |
227 | ******************************************************************************************/ |
228 | int |
229 | rf_RegularEFunc(RF_DagNode_t *node) |
230 | { |
231 | rf_RegularESubroutine(node, node->results[0]); |
232 | rf_GenericWakeupFunc(node, 0); |
233 | #if 1 |
234 | return (0); /* XXX this was missing?.. GO */ |
235 | #endif |
236 | } |
237 | /******************************************************************************************* |
238 | * This degraded function allow only two case: |
239 | * 1. when write access the full failed stripe unit, then the access can be more than |
240 | * one tripe units. |
241 | * 2. when write access only part of the failed SU, we assume accesses of more than |
242 | * one stripe unit is not allowed so that the write can be dealt with like a |
243 | * large write. |
244 | * The following function is based on these assumptions. So except in the second case, |
245 | * it looks the same as a large write encodeing function. But this is not exactly the |
246 | * normal way for doing a degraded write, since raidframe have to break cases of access |
247 | * other than the above two into smaller accesses. We may have to change |
248 | * DegrESubroutin in the future. |
249 | *******************************************************************************************/ |
250 | void |
251 | rf_DegrESubroutine(RF_DagNode_t *node, char *ebuf) |
252 | { |
253 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
254 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
255 | RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; |
256 | RF_PhysDiskAddr_t *pda; |
257 | int i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); |
258 | RF_RowCol_t scol; |
259 | char *srcbuf, *destbuf; |
260 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
261 | RF_Etimer_t timer; |
262 | |
263 | RF_ETIMER_START(timer); |
264 | for (i = 0; i < node->numParams - 2; i += 2) { |
265 | RF_ASSERT(node->params[i + 1].p != ebuf); |
266 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
267 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
268 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
269 | srcbuf = (char *) node->params[i + 1].p; |
270 | destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); |
271 | rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector); |
272 | } |
273 | |
274 | RF_ETIMER_STOP(timer); |
275 | RF_ETIMER_EVAL(timer); |
276 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
277 | } |
278 | |
279 | |
280 | /************************************************************************************** |
281 | * This function is used in case where one data disk failed and both redundant disks |
282 | * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk |
283 | * failed in the stripe but not accessed at this time, then we should, instead, use |
284 | * the rf_EOWriteDoubleRecoveryFunc(). |
285 | **************************************************************************************/ |
286 | int |
287 | rf_Degraded_100_EOFunc(RF_DagNode_t *node) |
288 | { |
289 | rf_DegrESubroutine(node, node->results[1]); |
290 | rf_RecoveryXorFunc(node); /* does the wakeup here! */ |
291 | #if 1 |
292 | return (0); /* XXX this was missing... SHould these be |
293 | * void functions??? GO */ |
294 | #endif |
295 | } |
296 | /************************************************************************************** |
297 | * This function is to encode one sector in one of the data disks to the E disk. |
298 | * However, in evenodd this function can also be used as decoding function to recover |
299 | * data from dead disk in the case of parity failure and a single data failure. |
300 | **************************************************************************************/ |
301 | void |
302 | rf_e_EncOneSect( |
303 | RF_RowCol_t srcLogicCol, |
304 | char *srcSecbuf, |
305 | RF_RowCol_t destLogicCol, |
306 | char *destSecbuf, |
307 | int bytesPerSector) |
308 | { |
309 | int S_index; /* index of the EU in the src col which need |
310 | * be Xored into all EUs in a dest sector */ |
311 | int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; |
312 | RF_RowCol_t j, indexInDest, /* row index of an encoding unit in |
313 | * the destination colume of encoding |
314 | * matrix */ |
315 | indexInSrc; /* row index of an encoding unit in the source |
316 | * colume used for recovery */ |
317 | int bytesPerEU = bytesPerSector / numRowInEncMatix; |
318 | |
319 | #if RF_EO_MATRIX_DIM > 17 |
320 | int shortsPerEU = bytesPerEU / sizeof(short); |
321 | short *destShortBuf, *srcShortBuf1, *srcShortBuf2; |
322 | short temp1; |
323 | #elif RF_EO_MATRIX_DIM == 17 |
324 | int longsPerEU = bytesPerEU / sizeof(long); |
325 | long *destLongBuf, *srcLongBuf1, *srcLongBuf2; |
326 | long temp1; |
327 | #endif |
328 | |
329 | #if RF_EO_MATRIX_DIM > 17 |
330 | RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1); |
331 | RF_ASSERT(bytesPerEU % sizeof(short) == 0); |
332 | #elif RF_EO_MATRIX_DIM == 17 |
333 | RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4); |
334 | RF_ASSERT(bytesPerEU % sizeof(long) == 0); |
335 | #endif |
336 | |
337 | S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); |
338 | #if RF_EO_MATRIX_DIM > 17 |
339 | srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU); |
340 | #elif RF_EO_MATRIX_DIM == 17 |
341 | srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU); |
342 | #endif |
343 | |
344 | for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) { |
345 | indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM); |
346 | |
347 | #if RF_EO_MATRIX_DIM > 17 |
348 | destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU); |
349 | srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU); |
350 | for (j = 0; j < shortsPerEU; j++) { |
351 | temp1 = destShortBuf[j] ^ srcShortBuf1[j]; |
352 | /* note: S_index won't be at the end row for any src |
353 | * col! */ |
354 | if (indexInSrc != RF_EO_MATRIX_DIM - 1) |
355 | destShortBuf[j] = (srcShortBuf2[j]) ^ temp1; |
356 | /* if indexInSrc is at the end row, ie. |
357 | * RF_EO_MATRIX_DIM -1, then all elements are zero! */ |
358 | else |
359 | destShortBuf[j] = temp1; |
360 | } |
361 | |
362 | #elif RF_EO_MATRIX_DIM == 17 |
363 | destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU); |
364 | srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU); |
365 | for (j = 0; j < longsPerEU; j++) { |
366 | temp1 = destLongBuf[j] ^ srcLongBuf1[j]; |
367 | if (indexInSrc != RF_EO_MATRIX_DIM - 1) |
368 | destLongBuf[j] = (srcLongBuf2[j]) ^ temp1; |
369 | else |
370 | destLongBuf[j] = temp1; |
371 | } |
372 | #endif |
373 | } |
374 | } |
375 | |
376 | void |
377 | rf_e_encToBuf( |
378 | RF_Raid_t * raidPtr, |
379 | RF_RowCol_t srcLogicCol, |
380 | char *srcbuf, |
381 | RF_RowCol_t destLogicCol, |
382 | char *destbuf, |
383 | int numSector) |
384 | { |
385 | int i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
386 | |
387 | for (i = 0; i < numSector; i++) { |
388 | rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector); |
389 | srcbuf += bytesPerSector; |
390 | destbuf += bytesPerSector; |
391 | } |
392 | } |
393 | /************************************************************************************** |
394 | * when parity die and one data die, We use second redundant information, 'E', |
395 | * to recover the data in dead disk. This function is used in the recovery node of |
396 | * for EO_110_CreateReadDAG |
397 | **************************************************************************************/ |
398 | int |
399 | rf_RecoveryEFunc(RF_DagNode_t *node) |
400 | { |
401 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p; |
402 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout; |
403 | RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p; |
404 | RF_RowCol_t scol, /* source logical column */ |
405 | fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress); /* logical column of |
406 | * failed SU */ |
407 | int i; |
408 | RF_PhysDiskAddr_t *pda; |
409 | int suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector); |
410 | char *srcbuf, *destbuf; |
411 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
412 | RF_Etimer_t timer; |
413 | |
414 | memset((char *) node->results[0], 0, |
415 | rf_RaidAddressToByte(raidPtr, failedPDA->numSector)); |
416 | if (node->dagHdr->status == rf_enable) { |
417 | RF_ETIMER_START(timer); |
418 | for (i = 0; i < node->numParams - 2; i += 2) |
419 | if (node->params[i + 1].p != node->results[0]) { |
420 | pda = (RF_PhysDiskAddr_t *) node->params[i].p; |
421 | if (i == node->numParams - 4) |
422 | scol = RF_EO_MATRIX_DIM - 2; /* the colume of |
423 | * redundant E */ |
424 | else |
425 | scol = rf_EUCol(layoutPtr, pda->raidAddress); |
426 | srcbuf = (char *) node->params[i + 1].p; |
427 | suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
428 | destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset); |
429 | rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector); |
430 | } |
431 | RF_ETIMER_STOP(timer); |
432 | RF_ETIMER_EVAL(timer); |
433 | tracerec->xor_us += RF_ETIMER_VAL_US(timer); |
434 | } |
435 | return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */ |
436 | } |
437 | /************************************************************************************** |
438 | * This function is used in the case where one data and the parity have filed. |
439 | * (in EO_110_CreateWriteDAG ) |
440 | **************************************************************************************/ |
441 | int |
442 | rf_EO_DegradedWriteEFunc(RF_DagNode_t * node) |
443 | { |
444 | rf_DegrESubroutine(node, node->results[0]); |
445 | rf_GenericWakeupFunc(node, 0); |
446 | #if 1 |
447 | return (0); /* XXX Yet another one!! GO */ |
448 | #endif |
449 | } |
450 | |
451 | |
452 | |
453 | /************************************************************************************** |
454 | * THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES |
455 | **************************************************************************************/ |
456 | |
457 | void |
458 | rf_doubleEOdecode( |
459 | RF_Raid_t * raidPtr, |
460 | char **rrdbuf, |
461 | char **dest, |
462 | RF_RowCol_t * fcol, |
463 | char *pbuf, |
464 | char *ebuf) |
465 | { |
466 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); |
467 | int i, j, k, f1, f2, row; |
468 | int rrdrow, erow, count = 0; |
469 | int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
470 | int numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1; |
471 | #if 0 |
472 | int pcol = (RF_EO_MATRIX_DIM) - 1; |
473 | #endif |
474 | int ecol = (RF_EO_MATRIX_DIM) - 2; |
475 | int bytesPerEU = bytesPerSector / numRowInEncMatix; |
476 | int numDataCol = layoutPtr->numDataCol; |
477 | #if RF_EO_MATRIX_DIM > 17 |
478 | int shortsPerEU = bytesPerEU / sizeof(short); |
479 | short *rrdbuf_current, *pbuf_current, *ebuf_current; |
480 | short *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; |
481 | short *temp; |
482 | short *P; |
483 | |
484 | RF_ASSERT(bytesPerEU % sizeof(short) == 0); |
485 | RF_Malloc(P, bytesPerEU, (short *)); |
486 | RF_Malloc(temp, bytesPerEU, (short *)); |
487 | #elif RF_EO_MATRIX_DIM == 17 |
488 | int longsPerEU = bytesPerEU / sizeof(long); |
489 | long *rrdbuf_current, *pbuf_current, *ebuf_current; |
490 | long *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current; |
491 | long *temp; |
492 | long *P; |
493 | |
494 | RF_ASSERT(bytesPerEU % sizeof(long) == 0); |
495 | RF_Malloc(P, bytesPerEU, (long *)); |
496 | RF_Malloc(temp, bytesPerEU, (long *)); |
497 | #endif |
498 | RF_ASSERT(*((long *) dest[0]) == 0); |
499 | RF_ASSERT(*((long *) dest[1]) == 0); |
500 | memset((char *) P, 0, bytesPerEU); |
501 | memset((char *) temp, 0, bytesPerEU); |
502 | RF_ASSERT(*P == 0); |
503 | /* calculate the 'P' parameter, which, not parity, is the Xor of all |
504 | * elements in the last two column, ie. 'E' and 'parity' colume, see |
505 | * the Ref. paper by Blaum, et al 1993 */ |
506 | for (i = 0; i < numRowInEncMatix; i++) |
507 | for (k = 0; k < longsPerEU; k++) { |
508 | #if RF_EO_MATRIX_DIM > 17 |
509 | ebuf_current = ((short *) ebuf) + i * shortsPerEU + k; |
510 | pbuf_current = ((short *) pbuf) + i * shortsPerEU + k; |
511 | #elif RF_EO_MATRIX_DIM == 17 |
512 | ebuf_current = ((long *) ebuf) + i * longsPerEU + k; |
513 | pbuf_current = ((long *) pbuf) + i * longsPerEU + k; |
514 | #endif |
515 | P[k] ^= *ebuf_current; |
516 | P[k] ^= *pbuf_current; |
517 | } |
518 | RF_ASSERT(fcol[0] != fcol[1]); |
519 | if (fcol[0] < fcol[1]) { |
520 | #if RF_EO_MATRIX_DIM > 17 |
521 | dest_smaller = (short *) (dest[0]); |
522 | dest_larger = (short *) (dest[1]); |
523 | #elif RF_EO_MATRIX_DIM == 17 |
524 | dest_smaller = (long *) (dest[0]); |
525 | dest_larger = (long *) (dest[1]); |
526 | #endif |
527 | f1 = fcol[0]; |
528 | f2 = fcol[1]; |
529 | } else { |
530 | #if RF_EO_MATRIX_DIM > 17 |
531 | dest_smaller = (short *) (dest[1]); |
532 | dest_larger = (short *) (dest[0]); |
533 | #elif RF_EO_MATRIX_DIM == 17 |
534 | dest_smaller = (long *) (dest[1]); |
535 | dest_larger = (long *) (dest[0]); |
536 | #endif |
537 | f1 = fcol[1]; |
538 | f2 = fcol[0]; |
539 | } |
540 | row = (RF_EO_MATRIX_DIM) - 1; |
541 | while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) { |
542 | #if RF_EO_MATRIX_DIM > 17 |
543 | dest_larger_current = dest_larger + row * shortsPerEU; |
544 | dest_smaller_current = dest_smaller + row * shortsPerEU; |
545 | #elif RF_EO_MATRIX_DIM == 17 |
546 | dest_larger_current = dest_larger + row * longsPerEU; |
547 | dest_smaller_current = dest_smaller + row * longsPerEU; |
548 | #endif |
549 | /** Do the diagonal recovery. Initially, temp[k] = (failed 1), |
550 | which is the failed data in the colume which has smaller col index. **/ |
551 | /* step 1: ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3)) */ |
552 | for (j = 0; j < numDataCol; j++) { |
553 | if (j == f1 || j == f2) |
554 | continue; |
555 | rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM); |
556 | if (rrdrow != (RF_EO_MATRIX_DIM) - 1) { |
557 | #if RF_EO_MATRIX_DIM > 17 |
558 | rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU; |
559 | for (k = 0; k < shortsPerEU; k++) |
560 | temp[k] ^= *(rrdbuf_current + k); |
561 | #elif RF_EO_MATRIX_DIM == 17 |
562 | rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU; |
563 | for (k = 0; k < longsPerEU; k++) |
564 | temp[k] ^= *(rrdbuf_current + k); |
565 | #endif |
566 | } |
567 | } |
568 | /* step 2: ^E(erow,m-2), If erow is at the buttom row, don't |
569 | * Xor into it E(erow,m-2) = (principle diagonal) ^ (failed |
570 | * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal |
571 | * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle |
572 | * diagonal) ^ (failed 2) */ |
573 | |
574 | erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM)); |
575 | if (erow != (RF_EO_MATRIX_DIM) - 1) { |
576 | #if RF_EO_MATRIX_DIM > 17 |
577 | ebuf_current = (short *) ebuf + shortsPerEU * erow; |
578 | for (k = 0; k < shortsPerEU; k++) |
579 | temp[k] ^= *(ebuf_current + k); |
580 | #elif RF_EO_MATRIX_DIM == 17 |
581 | ebuf_current = (long *) ebuf + longsPerEU * erow; |
582 | for (k = 0; k < longsPerEU; k++) |
583 | temp[k] ^= *(ebuf_current + k); |
584 | #endif |
585 | } |
586 | /* step 3: ^P to obtain the failed data (failed 2). P can be |
587 | * proved to be actually (principle diagonal) After this |
588 | * step, temp[k] = (failed 2), the failed data to be recovered */ |
589 | #if RF_EO_MATRIX_DIM > 17 |
590 | for (k = 0; k < shortsPerEU; k++) |
591 | temp[k] ^= P[k]; |
592 | /* Put the data to the destination buffer */ |
593 | for (k = 0; k < shortsPerEU; k++) |
594 | dest_larger_current[k] = temp[k]; |
595 | #elif RF_EO_MATRIX_DIM == 17 |
596 | for (k = 0; k < longsPerEU; k++) |
597 | temp[k] ^= P[k]; |
598 | /* Put the data to the destination buffer */ |
599 | for (k = 0; k < longsPerEU; k++) |
600 | dest_larger_current[k] = temp[k]; |
601 | #endif |
602 | |
603 | /** THE FOLLOWING DO THE HORIZONTAL XOR **/ |
604 | /* step 1: ^(SUM of A(row,0..m-3)), ie. all nonfailed data |
605 | * columes */ |
606 | for (j = 0; j < numDataCol; j++) { |
607 | if (j == f1 || j == f2) |
608 | continue; |
609 | #if RF_EO_MATRIX_DIM > 17 |
610 | rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU; |
611 | for (k = 0; k < shortsPerEU; k++) |
612 | temp[k] ^= *(rrdbuf_current + k); |
613 | #elif RF_EO_MATRIX_DIM == 17 |
614 | rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU; |
615 | for (k = 0; k < longsPerEU; k++) |
616 | temp[k] ^= *(rrdbuf_current + k); |
617 | #endif |
618 | } |
619 | /* step 2: ^A(row,m-1) */ |
620 | /* step 3: Put the data to the destination buffer */ |
621 | #if RF_EO_MATRIX_DIM > 17 |
622 | pbuf_current = (short *) pbuf + shortsPerEU * row; |
623 | for (k = 0; k < shortsPerEU; k++) |
624 | temp[k] ^= *(pbuf_current + k); |
625 | for (k = 0; k < shortsPerEU; k++) |
626 | dest_smaller_current[k] = temp[k]; |
627 | #elif RF_EO_MATRIX_DIM == 17 |
628 | pbuf_current = (long *) pbuf + longsPerEU * row; |
629 | for (k = 0; k < longsPerEU; k++) |
630 | temp[k] ^= *(pbuf_current + k); |
631 | for (k = 0; k < longsPerEU; k++) |
632 | dest_smaller_current[k] = temp[k]; |
633 | #endif |
634 | count++; |
635 | } |
636 | /* Check if all Encoding Unit in the data buffer have been decoded, |
637 | * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number, |
638 | * this algorithm will covered all buffer */ |
639 | RF_ASSERT(count == numRowInEncMatix); |
640 | RF_Free((char *) P, bytesPerEU); |
641 | RF_Free((char *) temp, bytesPerEU); |
642 | } |
643 | |
644 | |
645 | /*************************************************************************************** |
646 | * This function is called by double degragded read |
647 | * EO_200_CreateReadDAG |
648 | * |
649 | ***************************************************************************************/ |
650 | int |
651 | rf_EvenOddDoubleRecoveryFunc(RF_DagNode_t *node) |
652 | { |
653 | int ndataParam = 0; |
654 | int np = node->numParams; |
655 | RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; |
656 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; |
657 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); |
658 | int i, prm, sector, nresults = node->numResults; |
659 | RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; |
660 | unsigned sosAddr; |
661 | int mallc_one = 0, mallc_two = 0; /* flags to indicate if |
662 | * memory is allocated */ |
663 | int bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
664 | RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1, |
665 | npda; |
666 | RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol; |
667 | char **buf, *ebuf, *pbuf, *dest[2]; |
668 | long *suoff = NULL, *suend = NULL, *prmToCol = NULL, |
669 | psuoff = 0, esuoff = 0; |
670 | RF_SectorNum_t startSector, endSector; |
671 | RF_Etimer_t timer; |
672 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
673 | |
674 | RF_ETIMER_START(timer); |
675 | |
676 | /* Find out the number of parameters which are pdas for data |
677 | * information */ |
678 | for (i = 0; i <= np; i++) |
679 | if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) { |
680 | ndataParam = i; |
681 | break; |
682 | } |
683 | RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); |
684 | if (ndataParam != 0) { |
685 | RF_Malloc(suoff, ndataParam * sizeof(long), (long *)); |
686 | RF_Malloc(suend, ndataParam * sizeof(long), (long *)); |
687 | RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *)); |
688 | } |
689 | if (asmap->failedPDAs[1] && |
690 | (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) { |
691 | RF_ASSERT(0); /* currently, no support for this situation */ |
692 | ppda = node->params[np - 6].p; |
693 | ppda2 = node->params[np - 5].p; |
694 | RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY); |
695 | epda = node->params[np - 4].p; |
696 | epda2 = node->params[np - 3].p; |
697 | RF_ASSERT(epda2->type == RF_PDA_TYPE_Q); |
698 | } else { |
699 | ppda = node->params[np - 4].p; |
700 | epda = node->params[np - 3].p; |
701 | psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector); |
702 | esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector); |
703 | RF_ASSERT(psuoff == esuoff); |
704 | } |
705 | /* |
706 | the followings have three goals: |
707 | 1. determine the startSector to begin decoding and endSector to end decoding. |
708 | 2. determine the colume numbers of the two failed disks. |
709 | 3. determine the offset and end offset of the access within each failed stripe unit. |
710 | */ |
711 | if (nresults == 1) { |
712 | /* find the startSector to begin decoding */ |
713 | pda = node->results[0]; |
714 | memset(pda->bufPtr, 0, bytesPerSector * pda->numSector); |
715 | fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
716 | fsuend[0] = fsuoff[0] + pda->numSector; |
717 | fsuoff[1] = 0; |
718 | fsuend[1] = 0; |
719 | startSector = fsuoff[0]; |
720 | endSector = fsuend[0]; |
721 | |
722 | /* find out the column of failed disk being accessed */ |
723 | fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress); |
724 | |
725 | /* find out the other failed colume not accessed */ |
726 | sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); |
727 | for (i = 0; i < numDataCol; i++) { |
728 | npda.raidAddress = sosAddr + (i * secPerSU); |
729 | (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0); |
730 | /* skip over dead disks */ |
731 | if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status)) |
732 | if (i != fcol[0]) |
733 | break; |
734 | } |
735 | RF_ASSERT(i < numDataCol); |
736 | fcol[1] = i; |
737 | } else { |
738 | RF_ASSERT(nresults == 2); |
739 | pda0 = node->results[0]; |
740 | memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector); |
741 | pda1 = node->results[1]; |
742 | memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector); |
743 | /* determine the failed colume numbers of the two failed |
744 | * disks. */ |
745 | fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress); |
746 | fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress); |
747 | /* determine the offset and end offset of the access within |
748 | * each failed stripe unit. */ |
749 | fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector); |
750 | fsuend[0] = fsuoff[0] + pda0->numSector; |
751 | fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector); |
752 | fsuend[1] = fsuoff[1] + pda1->numSector; |
753 | /* determine the startSector to begin decoding */ |
754 | startSector = RF_MIN(pda0->startSector, pda1->startSector); |
755 | /* determine the endSector to end decoding */ |
756 | endSector = RF_MAX(fsuend[0], fsuend[1]); |
757 | } |
758 | /* |
759 | assign the beginning sector and the end sector for each parameter |
760 | find out the corresponding colume # for each parameter |
761 | */ |
762 | for (prm = 0; prm < ndataParam; prm++) { |
763 | pda = node->params[prm].p; |
764 | suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector); |
765 | suend[prm] = suoff[prm] + pda->numSector; |
766 | prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress); |
767 | } |
768 | /* 'sector' is the sector for the current decoding algorithm. For each |
769 | * sector in the failed SU, find out the corresponding parameters that |
770 | * cover the current sector and that are needed for decoding of this |
771 | * sector in failed SU. 2. Find out if sector is in the shadow of any |
772 | * accessed failed SU. If not, malloc a temporary space of a sector in |
773 | * size. */ |
774 | for (sector = startSector; sector < endSector; sector++) { |
775 | if (nresults == 2) |
776 | if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1])) |
777 | continue; |
778 | for (prm = 0; prm < ndataParam; prm++) |
779 | if (suoff[prm] <= sector && sector < suend[prm]) |
780 | buf[(prmToCol[prm])] = (char *)((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr + |
781 | rf_RaidAddressToByte(raidPtr, sector - suoff[prm]); |
782 | /* find out if sector is in the shadow of any accessed failed |
783 | * SU. If yes, assign dest[0], dest[1] to point at suitable |
784 | * position of the buffer corresponding to failed SUs. if no, |
785 | * malloc a temporary space of a sector in size for |
786 | * destination of decoding. */ |
787 | RF_ASSERT(nresults == 1 || nresults == 2); |
788 | if (nresults == 1) { |
789 | dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); |
790 | /* Always malloc temp buffer to dest[1] */ |
791 | RF_Malloc(dest[1], bytesPerSector, (char *)); |
792 | memset(dest[1], 0, bytesPerSector); |
793 | mallc_two = 1; |
794 | } else { |
795 | if (fsuoff[0] <= sector && sector < fsuend[0]) |
796 | dest[0] = (char *)((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]); |
797 | else { |
798 | RF_Malloc(dest[0], bytesPerSector, (char *)); |
799 | memset(dest[0], 0, bytesPerSector); |
800 | mallc_one = 1; |
801 | } |
802 | if (fsuoff[1] <= sector && sector < fsuend[1]) |
803 | dest[1] = (char *)((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]); |
804 | else { |
805 | RF_Malloc(dest[1], bytesPerSector, (char *)); |
806 | memset(dest[1], 0, bytesPerSector); |
807 | mallc_two = 1; |
808 | } |
809 | RF_ASSERT(mallc_one == 0 || mallc_two == 0); |
810 | } |
811 | pbuf = (char *)ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff); |
812 | ebuf = (char *)epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff); |
813 | /* |
814 | * After finish finding all needed sectors, call doubleEOdecode function for decoding |
815 | * one sector to destination. |
816 | */ |
817 | rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); |
818 | /* free all allocated memory, and mark flag to indicate no |
819 | * memory is being allocated */ |
820 | if (mallc_one == 1) |
821 | RF_Free(dest[0], bytesPerSector); |
822 | if (mallc_two == 1) |
823 | RF_Free(dest[1], bytesPerSector); |
824 | mallc_one = mallc_two = 0; |
825 | } |
826 | RF_Free(buf, numDataCol * sizeof(char *)); |
827 | if (ndataParam != 0) { |
828 | RF_Free(suoff, ndataParam * sizeof(long)); |
829 | RF_Free(suend, ndataParam * sizeof(long)); |
830 | RF_Free(prmToCol, ndataParam * sizeof(long)); |
831 | } |
832 | RF_ETIMER_STOP(timer); |
833 | RF_ETIMER_EVAL(timer); |
834 | if (tracerec) { |
835 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
836 | } |
837 | rf_GenericWakeupFunc(node, 0); |
838 | #if 1 |
839 | return (0); /* XXX is this even close!!?!?!!? GO */ |
840 | #endif |
841 | } |
842 | |
843 | |
844 | /* currently, only access of one of the two failed SU is allowed in this function. |
845 | * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into |
846 | * many accesses of single stripe unit. |
847 | */ |
848 | |
849 | int |
850 | rf_EOWriteDoubleRecoveryFunc(RF_DagNode_t *node) |
851 | { |
852 | int np = node->numParams; |
853 | RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p; |
854 | RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p; |
855 | RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout); |
856 | RF_SectorNum_t sector; |
857 | RF_RowCol_t col, scol; |
858 | int prm, i, j; |
859 | RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit; |
860 | unsigned sosAddr; |
861 | unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1); |
862 | RF_int64 numbytes; |
863 | RF_SectorNum_t startSector, endSector; |
864 | RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda; |
865 | RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol; |
866 | char **buf; /* buf[0], buf[1], buf[2], ...etc. point to |
867 | * buffer storing data read from col0, col1, |
868 | * col2 */ |
869 | char *ebuf, *pbuf, *dest[2], *olddata[2]; |
870 | RF_Etimer_t timer; |
871 | RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec; |
872 | |
873 | RF_ASSERT(asmap->numDataFailed == 1); /* currently only support this |
874 | * case, the other failed SU |
875 | * is not being accessed */ |
876 | RF_ETIMER_START(timer); |
877 | RF_Malloc(buf, numDataCol * sizeof(char *), (char **)); |
878 | |
879 | ppda = node->results[0];/* Instead of being buffers, node->results[0] |
880 | * and [1] are Ppda and Epda */ |
881 | epda = node->results[1]; |
882 | fpda = asmap->failedPDAs[0]; |
883 | |
884 | /* First, recovery the failed old SU using EvenOdd double decoding */ |
885 | /* determine the startSector and endSector for decoding */ |
886 | startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector); |
887 | endSector = startSector + fpda->numSector; |
888 | /* Assign buf[col] pointers to point to each non-failed colume and |
889 | * initialize the pbuf and ebuf to point at the beginning of each |
890 | * source buffers and destination buffers */ |
891 | for (prm = 0; prm < numDataCol - 2; prm++) { |
892 | pda = (RF_PhysDiskAddr_t *) node->params[prm].p; |
893 | col = rf_EUCol(layoutPtr, pda->raidAddress); |
894 | buf[col] = pda->bufPtr; |
895 | } |
896 | /* pbuf and ebuf: they will change values as double recovery decoding |
897 | * goes on */ |
898 | pbuf = ppda->bufPtr; |
899 | ebuf = epda->bufPtr; |
900 | /* find out the logical colume numbers in the encoding matrix of the |
901 | * two failed columes */ |
902 | fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress); |
903 | |
904 | /* find out the other failed colume not accessed this time */ |
905 | sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress); |
906 | for (i = 0; i < numDataCol; i++) { |
907 | npda.raidAddress = sosAddr + (i * secPerSU); |
908 | (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0); |
909 | /* skip over dead disks */ |
910 | if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status)) |
911 | if (i != fcol[0]) |
912 | break; |
913 | } |
914 | RF_ASSERT(i < numDataCol); |
915 | fcol[1] = i; |
916 | /* assign temporary space to put recovered failed SU */ |
917 | numbytes = fpda->numSector * bytesPerSector; |
918 | RF_Malloc(olddata[0], numbytes, (char *)); |
919 | RF_Malloc(olddata[1], numbytes, (char *)); |
920 | dest[0] = olddata[0]; |
921 | dest[1] = olddata[1]; |
922 | memset(olddata[0], 0, numbytes); |
923 | memset(olddata[1], 0, numbytes); |
924 | /* Begin the recovery decoding, initially buf[j], ebuf, pbuf, dest[j] |
925 | * have already pointed at the beginning of each source buffers and |
926 | * destination buffers */ |
927 | for (sector = startSector, i = 0; sector < endSector; sector++, i++) { |
928 | rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf); |
929 | for (j = 0; j < numDataCol; j++) |
930 | if ((j != fcol[0]) && (j != fcol[1])) |
931 | buf[j] += bytesPerSector; |
932 | dest[0] += bytesPerSector; |
933 | dest[1] += bytesPerSector; |
934 | ebuf += bytesPerSector; |
935 | pbuf += bytesPerSector; |
936 | } |
937 | /* after recovery, the buffer pointed by olddata[0] is the old failed |
938 | * data. With new writing data and this old data, use small write to |
939 | * calculate the new redundant informations */ |
940 | /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of |
941 | * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol |
942 | * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[ |
943 | * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol |
944 | * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of |
945 | * wudNodes; For current implementation, we assume the simplest case: |
946 | * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1 |
947 | * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new |
948 | * data to be writen to the failed disk. We first bxor the new data |
949 | * into the old recovered data, then do the same things as small |
950 | * write. */ |
951 | |
952 | rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes); |
953 | /* do new 'E' calculation */ |
954 | /* find out the corresponding colume in encoding matrix for write |
955 | * colume to be encoded into redundant disk 'E' */ |
956 | scol = rf_EUCol(layoutPtr, fpda->raidAddress); |
957 | /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest |
958 | * buffer pointer */ |
959 | rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector); |
960 | |
961 | /* do new 'P' calculation */ |
962 | rf_bxor(olddata[0], ppda->bufPtr, numbytes); |
963 | /* Free the allocated buffer */ |
964 | RF_Free(olddata[0], numbytes); |
965 | RF_Free(olddata[1], numbytes); |
966 | RF_Free(buf, numDataCol * sizeof(char *)); |
967 | |
968 | RF_ETIMER_STOP(timer); |
969 | RF_ETIMER_EVAL(timer); |
970 | if (tracerec) { |
971 | tracerec->q_us += RF_ETIMER_VAL_US(timer); |
972 | } |
973 | rf_GenericWakeupFunc(node, 0); |
974 | return (0); |
975 | } |
976 | #endif /* RF_INCLUDE_EVENODD > 0 */ |
977 | |