1 | /* $NetBSD: rf_paritylogDiskMgr.c,v 1.28 2011/05/11 06:20:33 mrg Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Author: William V. Courtright II |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | /* Code for flushing and reintegration operations related to parity logging. |
29 | * |
30 | */ |
31 | |
32 | #include <sys/cdefs.h> |
33 | __KERNEL_RCSID(0, "$NetBSD: rf_paritylogDiskMgr.c,v 1.28 2011/05/11 06:20:33 mrg Exp $" ); |
34 | |
35 | #include "rf_archs.h" |
36 | |
37 | #if RF_INCLUDE_PARITYLOGGING > 0 |
38 | |
39 | #include <dev/raidframe/raidframevar.h> |
40 | |
41 | #include "rf_threadstuff.h" |
42 | #include "rf_mcpair.h" |
43 | #include "rf_raid.h" |
44 | #include "rf_dag.h" |
45 | #include "rf_dagfuncs.h" |
46 | #include "rf_desc.h" |
47 | #include "rf_layout.h" |
48 | #include "rf_diskqueue.h" |
49 | #include "rf_paritylog.h" |
50 | #include "rf_general.h" |
51 | #include "rf_etimer.h" |
52 | #include "rf_paritylogging.h" |
53 | #include "rf_engine.h" |
54 | #include "rf_dagutils.h" |
55 | #include "rf_map.h" |
56 | #include "rf_parityscan.h" |
57 | |
58 | #include "rf_paritylogDiskMgr.h" |
59 | |
60 | static void *AcquireReintBuffer(RF_RegionBufferQueue_t *); |
61 | |
62 | static void * |
63 | AcquireReintBuffer(RF_RegionBufferQueue_t *pool) |
64 | { |
65 | void *bufPtr = NULL; |
66 | |
67 | /* Return a region buffer from the free list (pool). If the free list |
68 | * is empty, WAIT. BLOCKING */ |
69 | |
70 | rf_lock_mutex2(pool->mutex); |
71 | if (pool->availableBuffers > 0) { |
72 | bufPtr = pool->buffers[pool->availBuffersIndex]; |
73 | pool->availableBuffers--; |
74 | pool->availBuffersIndex++; |
75 | if (pool->availBuffersIndex == pool->totalBuffers) |
76 | pool->availBuffersIndex = 0; |
77 | rf_unlock_mutex2(pool->mutex); |
78 | } else { |
79 | RF_PANIC(); /* should never happen in correct config, |
80 | * single reint */ |
81 | rf_wait_cond2(pool->cond, pool->mutex); |
82 | } |
83 | return (bufPtr); |
84 | } |
85 | |
86 | static void |
87 | ReleaseReintBuffer( |
88 | RF_RegionBufferQueue_t * pool, |
89 | void *bufPtr) |
90 | { |
91 | /* Insert a region buffer (bufPtr) into the free list (pool). |
92 | * NON-BLOCKING */ |
93 | |
94 | rf_lock_mutex2(pool->mutex); |
95 | pool->availableBuffers++; |
96 | pool->buffers[pool->emptyBuffersIndex] = bufPtr; |
97 | pool->emptyBuffersIndex++; |
98 | if (pool->emptyBuffersIndex == pool->totalBuffers) |
99 | pool->emptyBuffersIndex = 0; |
100 | RF_ASSERT(pool->availableBuffers <= pool->totalBuffers); |
101 | /* |
102 | * XXXmrg this signal goes with the above "shouldn't happen" wait? |
103 | */ |
104 | rf_signal_cond2(pool->cond); |
105 | rf_unlock_mutex2(pool->mutex); |
106 | } |
107 | |
108 | |
109 | |
110 | static void |
111 | ReadRegionLog( |
112 | RF_RegionId_t regionID, |
113 | RF_MCPair_t * rrd_mcpair, |
114 | void *regionBuffer, |
115 | RF_Raid_t * raidPtr, |
116 | RF_DagHeader_t ** rrd_dag_h, |
117 | RF_AllocListElem_t ** rrd_alloclist, |
118 | RF_PhysDiskAddr_t ** rrd_pda) |
119 | { |
120 | /* Initiate the read a region log from disk. Once initiated, return |
121 | * to the calling routine. |
122 | * |
123 | * NON-BLOCKING */ |
124 | |
125 | RF_AccTraceEntry_t *tracerec; |
126 | RF_DagNode_t *rrd_rdNode; |
127 | |
128 | /* create DAG to read region log from disk */ |
129 | rf_MakeAllocList(*rrd_alloclist); |
130 | *rrd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, regionBuffer, |
131 | rf_DiskReadFunc, rf_DiskReadUndoFunc, |
132 | "Rrl" , *rrd_alloclist, |
133 | RF_DAG_FLAGS_NONE, |
134 | RF_IO_NORMAL_PRIORITY); |
135 | |
136 | /* create and initialize PDA for the core log */ |
137 | /* RF_Malloc(*rrd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
138 | * *)); */ |
139 | *rrd_pda = rf_AllocPDAList(1); |
140 | rf_MapLogParityLogging(raidPtr, regionID, 0, |
141 | &((*rrd_pda)->col), &((*rrd_pda)->startSector)); |
142 | (*rrd_pda)->numSector = raidPtr->regionInfo[regionID].capacity; |
143 | |
144 | if ((*rrd_pda)->next) { |
145 | (*rrd_pda)->next = NULL; |
146 | printf("set rrd_pda->next to NULL\n" ); |
147 | } |
148 | /* initialize DAG parameters */ |
149 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
150 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
151 | (*rrd_dag_h)->tracerec = tracerec; |
152 | rrd_rdNode = (*rrd_dag_h)->succedents[0]->succedents[0]; |
153 | rrd_rdNode->params[0].p = *rrd_pda; |
154 | /* rrd_rdNode->params[1] = regionBuffer; */ |
155 | rrd_rdNode->params[2].v = 0; |
156 | rrd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
157 | |
158 | /* launch region log read dag */ |
159 | rf_DispatchDAG(*rrd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
160 | (void *) rrd_mcpair); |
161 | } |
162 | |
163 | |
164 | |
165 | static void |
166 | WriteCoreLog( |
167 | RF_ParityLog_t * log, |
168 | RF_MCPair_t * fwr_mcpair, |
169 | RF_Raid_t * raidPtr, |
170 | RF_DagHeader_t ** fwr_dag_h, |
171 | RF_AllocListElem_t ** fwr_alloclist, |
172 | RF_PhysDiskAddr_t ** fwr_pda) |
173 | { |
174 | RF_RegionId_t regionID = log->regionID; |
175 | RF_AccTraceEntry_t *tracerec; |
176 | RF_SectorNum_t regionOffset; |
177 | RF_DagNode_t *fwr_wrNode; |
178 | |
179 | /* Initiate the write of a core log to a region log disk. Once |
180 | * initiated, return to the calling routine. |
181 | * |
182 | * NON-BLOCKING */ |
183 | |
184 | /* create DAG to write a core log to a region log disk */ |
185 | rf_MakeAllocList(*fwr_alloclist); |
186 | *fwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, log->bufPtr, |
187 | rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
188 | "Wcl" , *fwr_alloclist, RF_DAG_FLAGS_NONE, RF_IO_NORMAL_PRIORITY); |
189 | |
190 | /* create and initialize PDA for the region log */ |
191 | /* RF_Malloc(*fwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
192 | * *)); */ |
193 | *fwr_pda = rf_AllocPDAList(1); |
194 | regionOffset = log->diskOffset; |
195 | rf_MapLogParityLogging(raidPtr, regionID, regionOffset, |
196 | &((*fwr_pda)->col), |
197 | &((*fwr_pda)->startSector)); |
198 | (*fwr_pda)->numSector = raidPtr->numSectorsPerLog; |
199 | |
200 | /* initialize DAG parameters */ |
201 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
202 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
203 | (*fwr_dag_h)->tracerec = tracerec; |
204 | fwr_wrNode = (*fwr_dag_h)->succedents[0]->succedents[0]; |
205 | fwr_wrNode->params[0].p = *fwr_pda; |
206 | /* fwr_wrNode->params[1] = log->bufPtr; */ |
207 | fwr_wrNode->params[2].v = 0; |
208 | fwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
209 | |
210 | /* launch the dag to write the core log to disk */ |
211 | rf_DispatchDAG(*fwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
212 | (void *) fwr_mcpair); |
213 | } |
214 | |
215 | |
216 | static void |
217 | ReadRegionParity( |
218 | RF_RegionId_t regionID, |
219 | RF_MCPair_t * prd_mcpair, |
220 | void *parityBuffer, |
221 | RF_Raid_t * raidPtr, |
222 | RF_DagHeader_t ** prd_dag_h, |
223 | RF_AllocListElem_t ** prd_alloclist, |
224 | RF_PhysDiskAddr_t ** prd_pda) |
225 | { |
226 | /* Initiate the read region parity from disk. Once initiated, return |
227 | * to the calling routine. |
228 | * |
229 | * NON-BLOCKING */ |
230 | |
231 | RF_AccTraceEntry_t *tracerec; |
232 | RF_DagNode_t *prd_rdNode; |
233 | |
234 | /* create DAG to read region parity from disk */ |
235 | rf_MakeAllocList(*prd_alloclist); |
236 | *prd_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, NULL, rf_DiskReadFunc, |
237 | rf_DiskReadUndoFunc, "Rrp" , |
238 | *prd_alloclist, RF_DAG_FLAGS_NONE, |
239 | RF_IO_NORMAL_PRIORITY); |
240 | |
241 | /* create and initialize PDA for region parity */ |
242 | /* RF_Malloc(*prd_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
243 | * *)); */ |
244 | *prd_pda = rf_AllocPDAList(1); |
245 | rf_MapRegionParity(raidPtr, regionID, |
246 | &((*prd_pda)->col), &((*prd_pda)->startSector), |
247 | &((*prd_pda)->numSector)); |
248 | if (rf_parityLogDebug) |
249 | printf("[reading %d sectors of parity from region %d]\n" , |
250 | (int) (*prd_pda)->numSector, regionID); |
251 | if ((*prd_pda)->next) { |
252 | (*prd_pda)->next = NULL; |
253 | printf("set prd_pda->next to NULL\n" ); |
254 | } |
255 | /* initialize DAG parameters */ |
256 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
257 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
258 | (*prd_dag_h)->tracerec = tracerec; |
259 | prd_rdNode = (*prd_dag_h)->succedents[0]->succedents[0]; |
260 | prd_rdNode->params[0].p = *prd_pda; |
261 | prd_rdNode->params[1].p = parityBuffer; |
262 | prd_rdNode->params[2].v = 0; |
263 | prd_rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
264 | #if RF_DEBUG_VALIDATE_DAG |
265 | if (rf_validateDAGDebug) |
266 | rf_ValidateDAG(*prd_dag_h); |
267 | #endif |
268 | /* launch region parity read dag */ |
269 | rf_DispatchDAG(*prd_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
270 | (void *) prd_mcpair); |
271 | } |
272 | |
273 | static void |
274 | WriteRegionParity( |
275 | RF_RegionId_t regionID, |
276 | RF_MCPair_t * pwr_mcpair, |
277 | void *parityBuffer, |
278 | RF_Raid_t * raidPtr, |
279 | RF_DagHeader_t ** pwr_dag_h, |
280 | RF_AllocListElem_t ** pwr_alloclist, |
281 | RF_PhysDiskAddr_t ** pwr_pda) |
282 | { |
283 | /* Initiate the write of region parity to disk. Once initiated, return |
284 | * to the calling routine. |
285 | * |
286 | * NON-BLOCKING */ |
287 | |
288 | RF_AccTraceEntry_t *tracerec; |
289 | RF_DagNode_t *pwr_wrNode; |
290 | |
291 | /* create DAG to write region log from disk */ |
292 | rf_MakeAllocList(*pwr_alloclist); |
293 | *pwr_dag_h = rf_MakeSimpleDAG(raidPtr, 1, 0, parityBuffer, |
294 | rf_DiskWriteFunc, rf_DiskWriteUndoFunc, |
295 | "Wrp" , *pwr_alloclist, |
296 | RF_DAG_FLAGS_NONE, |
297 | RF_IO_NORMAL_PRIORITY); |
298 | |
299 | /* create and initialize PDA for region parity */ |
300 | /* RF_Malloc(*pwr_pda, sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t |
301 | * *)); */ |
302 | *pwr_pda = rf_AllocPDAList(1); |
303 | rf_MapRegionParity(raidPtr, regionID, |
304 | &((*pwr_pda)->col), &((*pwr_pda)->startSector), |
305 | &((*pwr_pda)->numSector)); |
306 | |
307 | /* initialize DAG parameters */ |
308 | RF_Malloc(tracerec,sizeof(RF_AccTraceEntry_t), (RF_AccTraceEntry_t *)); |
309 | memset((char *) tracerec, 0, sizeof(RF_AccTraceEntry_t)); |
310 | (*pwr_dag_h)->tracerec = tracerec; |
311 | pwr_wrNode = (*pwr_dag_h)->succedents[0]->succedents[0]; |
312 | pwr_wrNode->params[0].p = *pwr_pda; |
313 | /* pwr_wrNode->params[1] = parityBuffer; */ |
314 | pwr_wrNode->params[2].v = 0; |
315 | pwr_wrNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0); |
316 | |
317 | /* launch the dag to write region parity to disk */ |
318 | rf_DispatchDAG(*pwr_dag_h, (void (*) (void *)) rf_MCPairWakeupFunc, |
319 | (void *) pwr_mcpair); |
320 | } |
321 | |
322 | static void |
323 | FlushLogsToDisk( |
324 | RF_Raid_t * raidPtr, |
325 | RF_ParityLog_t * logList) |
326 | { |
327 | /* Flush a linked list of core logs to the log disk. Logs contain the |
328 | * disk location where they should be written. Logs were written in |
329 | * FIFO order and that order must be preserved. |
330 | * |
331 | * Recommended optimizations: 1) allow multiple flushes to occur |
332 | * simultaneously 2) coalesce contiguous flush operations |
333 | * |
334 | * BLOCKING */ |
335 | |
336 | RF_ParityLog_t *log; |
337 | RF_RegionId_t regionID; |
338 | RF_MCPair_t *fwr_mcpair; |
339 | RF_DagHeader_t *fwr_dag_h; |
340 | RF_AllocListElem_t *fwr_alloclist; |
341 | RF_PhysDiskAddr_t *fwr_pda; |
342 | |
343 | fwr_mcpair = rf_AllocMCPair(); |
344 | RF_LOCK_MCPAIR(fwr_mcpair); |
345 | |
346 | RF_ASSERT(logList); |
347 | log = logList; |
348 | while (log) { |
349 | regionID = log->regionID; |
350 | |
351 | /* create and launch a DAG to write the core log */ |
352 | if (rf_parityLogDebug) |
353 | printf("[initiating write of core log for region %d]\n" , regionID); |
354 | fwr_mcpair->flag = RF_FALSE; |
355 | WriteCoreLog(log, fwr_mcpair, raidPtr, &fwr_dag_h, |
356 | &fwr_alloclist, &fwr_pda); |
357 | |
358 | /* wait for the DAG to complete */ |
359 | while (!fwr_mcpair->flag) |
360 | RF_WAIT_MCPAIR(fwr_mcpair); |
361 | if (fwr_dag_h->status != rf_enable) { |
362 | RF_ERRORMSG1("Unable to write core log to disk (region %d)\n" , regionID); |
363 | RF_ASSERT(0); |
364 | } |
365 | /* RF_Free(fwr_pda, sizeof(RF_PhysDiskAddr_t)); */ |
366 | rf_FreePhysDiskAddr(fwr_pda); |
367 | rf_FreeDAG(fwr_dag_h); |
368 | rf_FreeAllocList(fwr_alloclist); |
369 | |
370 | log = log->next; |
371 | } |
372 | RF_UNLOCK_MCPAIR(fwr_mcpair); |
373 | rf_FreeMCPair(fwr_mcpair); |
374 | rf_ReleaseParityLogs(raidPtr, logList); |
375 | } |
376 | |
377 | static void |
378 | ReintegrateRegion( |
379 | RF_Raid_t * raidPtr, |
380 | RF_RegionId_t regionID, |
381 | RF_ParityLog_t * coreLog) |
382 | { |
383 | RF_MCPair_t *rrd_mcpair = NULL, *prd_mcpair, *pwr_mcpair; |
384 | RF_DagHeader_t *rrd_dag_h = NULL, *prd_dag_h, *pwr_dag_h; |
385 | RF_AllocListElem_t *rrd_alloclist = NULL, *prd_alloclist, *pwr_alloclist; |
386 | RF_PhysDiskAddr_t *rrd_pda = NULL, *prd_pda, *pwr_pda; |
387 | void *parityBuffer, *regionBuffer = NULL; |
388 | |
389 | /* Reintegrate a region (regionID). |
390 | * |
391 | * 1. acquire region and parity buffers |
392 | * 2. read log from disk |
393 | * 3. read parity from disk |
394 | * 4. apply log to parity |
395 | * 5. apply core log to parity |
396 | * 6. write new parity to disk |
397 | * |
398 | * BLOCKING */ |
399 | |
400 | if (rf_parityLogDebug) |
401 | printf("[reintegrating region %d]\n" , regionID); |
402 | |
403 | /* initiate read of region parity */ |
404 | if (rf_parityLogDebug) |
405 | printf("[initiating read of parity for region %d]\n" ,regionID); |
406 | parityBuffer = AcquireReintBuffer(&raidPtr->parityBufferPool); |
407 | prd_mcpair = rf_AllocMCPair(); |
408 | RF_LOCK_MCPAIR(prd_mcpair); |
409 | prd_mcpair->flag = RF_FALSE; |
410 | ReadRegionParity(regionID, prd_mcpair, parityBuffer, raidPtr, |
411 | &prd_dag_h, &prd_alloclist, &prd_pda); |
412 | |
413 | /* if region log nonempty, initiate read */ |
414 | if (raidPtr->regionInfo[regionID].diskCount > 0) { |
415 | if (rf_parityLogDebug) |
416 | printf("[initiating read of disk log for region %d]\n" , |
417 | regionID); |
418 | regionBuffer = AcquireReintBuffer(&raidPtr->regionBufferPool); |
419 | rrd_mcpair = rf_AllocMCPair(); |
420 | RF_LOCK_MCPAIR(rrd_mcpair); |
421 | rrd_mcpair->flag = RF_FALSE; |
422 | ReadRegionLog(regionID, rrd_mcpair, regionBuffer, raidPtr, |
423 | &rrd_dag_h, &rrd_alloclist, &rrd_pda); |
424 | } |
425 | /* wait on read of region parity to complete */ |
426 | while (!prd_mcpair->flag) { |
427 | RF_WAIT_MCPAIR(prd_mcpair); |
428 | } |
429 | RF_UNLOCK_MCPAIR(prd_mcpair); |
430 | if (prd_dag_h->status != rf_enable) { |
431 | RF_ERRORMSG("Unable to read parity from disk\n" ); |
432 | /* add code to fail the parity disk */ |
433 | RF_ASSERT(0); |
434 | } |
435 | /* apply core log to parity */ |
436 | /* if (coreLog) ApplyLogsToParity(coreLog, parityBuffer); */ |
437 | |
438 | if (raidPtr->regionInfo[regionID].diskCount > 0) { |
439 | /* wait on read of region log to complete */ |
440 | while (!rrd_mcpair->flag) |
441 | RF_WAIT_MCPAIR(rrd_mcpair); |
442 | RF_UNLOCK_MCPAIR(rrd_mcpair); |
443 | if (rrd_dag_h->status != rf_enable) { |
444 | RF_ERRORMSG("Unable to read region log from disk\n" ); |
445 | /* add code to fail the log disk */ |
446 | RF_ASSERT(0); |
447 | } |
448 | /* apply region log to parity */ |
449 | /* ApplyRegionToParity(regionID, regionBuffer, parityBuffer); */ |
450 | /* release resources associated with region log */ |
451 | /* RF_Free(rrd_pda, sizeof(RF_PhysDiskAddr_t)); */ |
452 | rf_FreePhysDiskAddr(rrd_pda); |
453 | rf_FreeDAG(rrd_dag_h); |
454 | rf_FreeAllocList(rrd_alloclist); |
455 | rf_FreeMCPair(rrd_mcpair); |
456 | ReleaseReintBuffer(&raidPtr->regionBufferPool, regionBuffer); |
457 | } |
458 | /* write reintegrated parity to disk */ |
459 | if (rf_parityLogDebug) |
460 | printf("[initiating write of parity for region %d]\n" , |
461 | regionID); |
462 | pwr_mcpair = rf_AllocMCPair(); |
463 | RF_LOCK_MCPAIR(pwr_mcpair); |
464 | pwr_mcpair->flag = RF_FALSE; |
465 | WriteRegionParity(regionID, pwr_mcpair, parityBuffer, raidPtr, |
466 | &pwr_dag_h, &pwr_alloclist, &pwr_pda); |
467 | while (!pwr_mcpair->flag) |
468 | RF_WAIT_MCPAIR(pwr_mcpair); |
469 | RF_UNLOCK_MCPAIR(pwr_mcpair); |
470 | if (pwr_dag_h->status != rf_enable) { |
471 | RF_ERRORMSG("Unable to write parity to disk\n" ); |
472 | /* add code to fail the parity disk */ |
473 | RF_ASSERT(0); |
474 | } |
475 | /* release resources associated with read of old parity */ |
476 | /* RF_Free(prd_pda, sizeof(RF_PhysDiskAddr_t)); */ |
477 | rf_FreePhysDiskAddr(prd_pda); |
478 | rf_FreeDAG(prd_dag_h); |
479 | rf_FreeAllocList(prd_alloclist); |
480 | rf_FreeMCPair(prd_mcpair); |
481 | |
482 | /* release resources associated with write of new parity */ |
483 | ReleaseReintBuffer(&raidPtr->parityBufferPool, parityBuffer); |
484 | /* RF_Free(pwr_pda, sizeof(RF_PhysDiskAddr_t)); */ |
485 | rf_FreePhysDiskAddr(pwr_pda); |
486 | rf_FreeDAG(pwr_dag_h); |
487 | rf_FreeAllocList(pwr_alloclist); |
488 | rf_FreeMCPair(pwr_mcpair); |
489 | |
490 | if (rf_parityLogDebug) |
491 | printf("[finished reintegrating region %d]\n" , regionID); |
492 | } |
493 | |
494 | |
495 | |
496 | static void |
497 | ReintegrateLogs( |
498 | RF_Raid_t * raidPtr, |
499 | RF_ParityLog_t * logList) |
500 | { |
501 | RF_ParityLog_t *log, *freeLogList = NULL; |
502 | RF_ParityLogData_t *logData, *logDataList; |
503 | RF_RegionId_t regionID; |
504 | |
505 | RF_ASSERT(logList); |
506 | while (logList) { |
507 | log = logList; |
508 | logList = logList->next; |
509 | log->next = NULL; |
510 | regionID = log->regionID; |
511 | ReintegrateRegion(raidPtr, regionID, log); |
512 | log->numRecords = 0; |
513 | |
514 | /* remove all items which are blocked on reintegration of this |
515 | * region */ |
516 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
517 | logData = rf_SearchAndDequeueParityLogData(raidPtr, regionID, |
518 | &raidPtr->parityLogDiskQueue.reintBlockHead, |
519 | &raidPtr->parityLogDiskQueue.reintBlockTail, |
520 | RF_TRUE); |
521 | logDataList = logData; |
522 | while (logData) { |
523 | logData->next = rf_SearchAndDequeueParityLogData( |
524 | raidPtr, regionID, |
525 | &raidPtr->parityLogDiskQueue.reintBlockHead, |
526 | &raidPtr->parityLogDiskQueue.reintBlockTail, |
527 | RF_TRUE); |
528 | logData = logData->next; |
529 | } |
530 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
531 | |
532 | /* process blocked log data and clear reintInProgress flag for |
533 | * this region */ |
534 | if (logDataList) |
535 | rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_TRUE); |
536 | else { |
537 | /* Enable flushing for this region. Holding both |
538 | * locks provides a synchronization barrier with |
539 | * DumpParityLogToDisk */ |
540 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
541 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
542 | /* XXXmrg: don't need this? */ |
543 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
544 | raidPtr->regionInfo[regionID].diskCount = 0; |
545 | raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; |
546 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
547 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now |
548 | * enabled */ |
549 | /* XXXmrg: don't need this? */ |
550 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
551 | } |
552 | /* if log wasn't used, attach it to the list of logs to be |
553 | * returned */ |
554 | if (log) { |
555 | log->next = freeLogList; |
556 | freeLogList = log; |
557 | } |
558 | } |
559 | if (freeLogList) |
560 | rf_ReleaseParityLogs(raidPtr, freeLogList); |
561 | } |
562 | |
563 | int |
564 | rf_ShutdownLogging(RF_Raid_t * raidPtr) |
565 | { |
566 | /* shutdown parity logging 1) disable parity logging in all regions 2) |
567 | * reintegrate all regions */ |
568 | |
569 | RF_SectorCount_t diskCount; |
570 | RF_RegionId_t regionID; |
571 | RF_ParityLog_t *log; |
572 | |
573 | if (rf_parityLogDebug) |
574 | printf("[shutting down parity logging]\n" ); |
575 | /* Since parity log maps are volatile, we must reintegrate all |
576 | * regions. */ |
577 | if (rf_forceParityLogReint) { |
578 | for (regionID = 0; regionID < rf_numParityRegions; regionID++) { |
579 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
580 | raidPtr->regionInfo[regionID].loggingEnabled = |
581 | RF_FALSE; |
582 | log = raidPtr->regionInfo[regionID].coreLog; |
583 | raidPtr->regionInfo[regionID].coreLog = NULL; |
584 | diskCount = raidPtr->regionInfo[regionID].diskCount; |
585 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
586 | if (diskCount > 0 || log != NULL) |
587 | ReintegrateRegion(raidPtr, regionID, log); |
588 | if (log != NULL) |
589 | rf_ReleaseParityLogs(raidPtr, log); |
590 | } |
591 | } |
592 | if (rf_parityLogDebug) { |
593 | printf("[parity logging disabled]\n" ); |
594 | printf("[should be done!]\n" ); |
595 | } |
596 | return (0); |
597 | } |
598 | |
599 | int |
600 | rf_ParityLoggingDiskManager(RF_Raid_t * raidPtr) |
601 | { |
602 | RF_ParityLog_t *reintQueue, *flushQueue; |
603 | int workNeeded, done = RF_FALSE; |
604 | int s; |
605 | |
606 | /* Main program for parity logging disk thread. This routine waits |
607 | * for work to appear in either the flush or reintegration queues and |
608 | * is responsible for flushing core logs to the log disk as well as |
609 | * reintegrating parity regions. |
610 | * |
611 | * BLOCKING */ |
612 | |
613 | s = splbio(); |
614 | |
615 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
616 | |
617 | /* |
618 | * Inform our creator that we're running. Don't bother doing the |
619 | * mutex lock/unlock dance- we locked above, and we'll unlock |
620 | * below with nothing to do, yet. |
621 | */ |
622 | raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_RUNNING; |
623 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
624 | |
625 | /* empty the work queues */ |
626 | flushQueue = raidPtr->parityLogDiskQueue.flushQueue; |
627 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
628 | reintQueue = raidPtr->parityLogDiskQueue.reintQueue; |
629 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
630 | workNeeded = (flushQueue || reintQueue); |
631 | |
632 | while (!done) { |
633 | while (workNeeded) { |
634 | /* First, flush all logs in the flush queue, freeing |
635 | * buffers Second, reintegrate all regions which are |
636 | * reported as full. Third, append queued log data |
637 | * until blocked. |
638 | * |
639 | * Note: Incoming appends (ParityLogAppend) can block on |
640 | * either 1. empty buffer pool 2. region under |
641 | * reintegration To preserve a global FIFO ordering of |
642 | * appends, buffers are not released to the world |
643 | * until those appends blocked on buffers are removed |
644 | * from the append queue. Similarly, regions which |
645 | * are reintegrated are not opened for general use |
646 | * until the append queue has been emptied. */ |
647 | |
648 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
649 | |
650 | /* empty flushQueue, using free'd log buffers to |
651 | * process bufTail */ |
652 | if (flushQueue) |
653 | FlushLogsToDisk(raidPtr, flushQueue); |
654 | |
655 | /* empty reintQueue, flushing from reintTail as we go */ |
656 | if (reintQueue) |
657 | ReintegrateLogs(raidPtr, reintQueue); |
658 | |
659 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
660 | flushQueue = raidPtr->parityLogDiskQueue.flushQueue; |
661 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
662 | reintQueue = raidPtr->parityLogDiskQueue.reintQueue; |
663 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
664 | workNeeded = (flushQueue || reintQueue); |
665 | } |
666 | /* no work is needed at this point */ |
667 | if (raidPtr->parityLogDiskQueue.threadState & RF_PLOG_TERMINATE) { |
668 | /* shutdown parity logging 1. disable parity logging |
669 | * in all regions 2. reintegrate all regions */ |
670 | done = RF_TRUE; /* thread disabled, no work needed */ |
671 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
672 | rf_ShutdownLogging(raidPtr); |
673 | } |
674 | if (!done) { |
675 | /* thread enabled, no work needed, so sleep */ |
676 | if (rf_parityLogDebug) |
677 | printf("[parity logging disk manager sleeping]\n" ); |
678 | rf_wait_cond2(raidPtr->parityLogDiskQueue.cond, |
679 | raidPtr->parityLogDiskQueue.mutex); |
680 | if (rf_parityLogDebug) |
681 | printf("[parity logging disk manager just woke up]\n" ); |
682 | flushQueue = raidPtr->parityLogDiskQueue.flushQueue; |
683 | raidPtr->parityLogDiskQueue.flushQueue = NULL; |
684 | reintQueue = raidPtr->parityLogDiskQueue.reintQueue; |
685 | raidPtr->parityLogDiskQueue.reintQueue = NULL; |
686 | workNeeded = (flushQueue || reintQueue); |
687 | } |
688 | } |
689 | /* |
690 | * Announce that we're done. |
691 | */ |
692 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
693 | raidPtr->parityLogDiskQueue.threadState |= RF_PLOG_SHUTDOWN; |
694 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
695 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
696 | |
697 | splx(s); |
698 | |
699 | /* |
700 | * In the NetBSD kernel, the thread must exit; returning would |
701 | * cause the proc trampoline to attempt to return to userspace. |
702 | */ |
703 | kthread_exit(0); /* does not return */ |
704 | } |
705 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
706 | |