1 | /* $NetBSD: rf_paritylog.c,v 1.18 2011/05/11 06:03:06 mrg Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Author: William V. Courtright II |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | |
29 | /* Code for manipulating in-core parity logs |
30 | * |
31 | */ |
32 | |
33 | #include <sys/cdefs.h> |
34 | __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.18 2011/05/11 06:03:06 mrg Exp $" ); |
35 | |
36 | #include "rf_archs.h" |
37 | |
38 | #if RF_INCLUDE_PARITYLOGGING > 0 |
39 | |
40 | /* |
41 | * Append-only log for recording parity "update" and "overwrite" records |
42 | */ |
43 | |
44 | #include <dev/raidframe/raidframevar.h> |
45 | |
46 | #include "rf_threadstuff.h" |
47 | #include "rf_mcpair.h" |
48 | #include "rf_raid.h" |
49 | #include "rf_dag.h" |
50 | #include "rf_dagfuncs.h" |
51 | #include "rf_desc.h" |
52 | #include "rf_layout.h" |
53 | #include "rf_diskqueue.h" |
54 | #include "rf_etimer.h" |
55 | #include "rf_paritylog.h" |
56 | #include "rf_general.h" |
57 | #include "rf_map.h" |
58 | #include "rf_paritylogging.h" |
59 | #include "rf_paritylogDiskMgr.h" |
60 | |
61 | static RF_CommonLogData_t * |
62 | AllocParityLogCommonData(RF_Raid_t * raidPtr) |
63 | { |
64 | RF_CommonLogData_t *common = NULL; |
65 | |
66 | /* Return a struct for holding common parity log information from the |
67 | * free list (rf_parityLogDiskQueue.freeCommonList). If the free list |
68 | * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */ |
69 | |
70 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
71 | if (raidPtr->parityLogDiskQueue.freeCommonList) { |
72 | common = raidPtr->parityLogDiskQueue.freeCommonList; |
73 | raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next; |
74 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
75 | } else { |
76 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
77 | RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *)); |
78 | /* destroy is in rf_paritylogging.c */ |
79 | rf_init_mutex2(common->mutex, IPL_VM); |
80 | } |
81 | common->next = NULL; |
82 | return (common); |
83 | } |
84 | |
85 | static void |
86 | FreeParityLogCommonData(RF_CommonLogData_t * common) |
87 | { |
88 | RF_Raid_t *raidPtr; |
89 | |
90 | /* Insert a single struct for holding parity log information (data) |
91 | * into the free list (rf_parityLogDiskQueue.freeCommonList). |
92 | * NON-BLOCKING */ |
93 | |
94 | raidPtr = common->raidPtr; |
95 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
96 | common->next = raidPtr->parityLogDiskQueue.freeCommonList; |
97 | raidPtr->parityLogDiskQueue.freeCommonList = common; |
98 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
99 | } |
100 | |
101 | static RF_ParityLogData_t * |
102 | AllocParityLogData(RF_Raid_t * raidPtr) |
103 | { |
104 | RF_ParityLogData_t *data = NULL; |
105 | |
106 | /* Return a struct for holding parity log information from the free |
107 | * list (rf_parityLogDiskQueue.freeList). If the free list is empty, |
108 | * call RF_Malloc to create a new structure. NON-BLOCKING */ |
109 | |
110 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
111 | if (raidPtr->parityLogDiskQueue.freeDataList) { |
112 | data = raidPtr->parityLogDiskQueue.freeDataList; |
113 | raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next; |
114 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
115 | } else { |
116 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
117 | RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *)); |
118 | } |
119 | data->next = NULL; |
120 | data->prev = NULL; |
121 | return (data); |
122 | } |
123 | |
124 | |
125 | static void |
126 | FreeParityLogData(RF_ParityLogData_t * data) |
127 | { |
128 | RF_ParityLogData_t *nextItem; |
129 | RF_Raid_t *raidPtr; |
130 | |
131 | /* Insert a linked list of structs for holding parity log information |
132 | * (data) into the free list (parityLogDiskQueue.freeList). |
133 | * NON-BLOCKING */ |
134 | |
135 | raidPtr = data->common->raidPtr; |
136 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
137 | while (data) { |
138 | nextItem = data->next; |
139 | data->next = raidPtr->parityLogDiskQueue.freeDataList; |
140 | raidPtr->parityLogDiskQueue.freeDataList = data; |
141 | data = nextItem; |
142 | } |
143 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
144 | } |
145 | |
146 | |
147 | static void |
148 | EnqueueParityLogData( |
149 | RF_ParityLogData_t * data, |
150 | RF_ParityLogData_t ** head, |
151 | RF_ParityLogData_t ** tail) |
152 | { |
153 | RF_Raid_t *raidPtr; |
154 | |
155 | /* Insert an in-core parity log (*data) into the head of a disk queue |
156 | * (*head, *tail). NON-BLOCKING */ |
157 | |
158 | raidPtr = data->common->raidPtr; |
159 | if (rf_parityLogDebug) |
160 | printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n" , data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); |
161 | RF_ASSERT(data->prev == NULL); |
162 | RF_ASSERT(data->next == NULL); |
163 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
164 | if (*head) { |
165 | /* insert into head of queue */ |
166 | RF_ASSERT((*head)->prev == NULL); |
167 | RF_ASSERT((*tail)->next == NULL); |
168 | data->next = *head; |
169 | (*head)->prev = data; |
170 | *head = data; |
171 | } else { |
172 | /* insert into empty list */ |
173 | RF_ASSERT(*head == NULL); |
174 | RF_ASSERT(*tail == NULL); |
175 | *head = data; |
176 | *tail = data; |
177 | } |
178 | RF_ASSERT((*head)->prev == NULL); |
179 | RF_ASSERT((*tail)->next == NULL); |
180 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
181 | } |
182 | |
183 | static RF_ParityLogData_t * |
184 | DequeueParityLogData( |
185 | RF_Raid_t * raidPtr, |
186 | RF_ParityLogData_t ** head, |
187 | RF_ParityLogData_t ** tail, |
188 | int ignoreLocks) |
189 | { |
190 | RF_ParityLogData_t *data; |
191 | |
192 | /* Remove and return an in-core parity log from the tail of a disk |
193 | * queue (*head, *tail). NON-BLOCKING */ |
194 | |
195 | /* remove from tail, preserving FIFO order */ |
196 | if (!ignoreLocks) |
197 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
198 | data = *tail; |
199 | if (data) { |
200 | if (*head == *tail) { |
201 | /* removing last item from queue */ |
202 | *head = NULL; |
203 | *tail = NULL; |
204 | } else { |
205 | *tail = (*tail)->prev; |
206 | (*tail)->next = NULL; |
207 | RF_ASSERT((*head)->prev == NULL); |
208 | RF_ASSERT((*tail)->next == NULL); |
209 | } |
210 | data->next = NULL; |
211 | data->prev = NULL; |
212 | if (rf_parityLogDebug) |
213 | printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n" , data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); |
214 | } |
215 | if (*head) { |
216 | RF_ASSERT((*head)->prev == NULL); |
217 | RF_ASSERT((*tail)->next == NULL); |
218 | } |
219 | if (!ignoreLocks) |
220 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
221 | return (data); |
222 | } |
223 | |
224 | |
225 | static void |
226 | RequeueParityLogData( |
227 | RF_ParityLogData_t * data, |
228 | RF_ParityLogData_t ** head, |
229 | RF_ParityLogData_t ** tail) |
230 | { |
231 | RF_Raid_t *raidPtr; |
232 | |
233 | /* Insert an in-core parity log (*data) into the tail of a disk queue |
234 | * (*head, *tail). NON-BLOCKING */ |
235 | |
236 | raidPtr = data->common->raidPtr; |
237 | RF_ASSERT(data); |
238 | if (rf_parityLogDebug) |
239 | printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n" , data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector); |
240 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
241 | if (*tail) { |
242 | /* append to tail of list */ |
243 | data->prev = *tail; |
244 | data->next = NULL; |
245 | (*tail)->next = data; |
246 | *tail = data; |
247 | } else { |
248 | /* inserting into an empty list */ |
249 | *head = data; |
250 | *tail = data; |
251 | (*head)->prev = NULL; |
252 | (*tail)->next = NULL; |
253 | } |
254 | RF_ASSERT((*head)->prev == NULL); |
255 | RF_ASSERT((*tail)->next == NULL); |
256 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
257 | } |
258 | |
259 | RF_ParityLogData_t * |
260 | rf_CreateParityLogData( |
261 | RF_ParityRecordType_t operation, |
262 | RF_PhysDiskAddr_t * pda, |
263 | void *bufPtr, |
264 | RF_Raid_t * raidPtr, |
265 | int (*wakeFunc) (RF_DagNode_t * node, int status), |
266 | void *wakeArg, |
267 | RF_AccTraceEntry_t * tracerec, |
268 | RF_Etimer_t startTime) |
269 | { |
270 | RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL; |
271 | RF_CommonLogData_t *common; |
272 | RF_PhysDiskAddr_t *diskAddress; |
273 | int boundary, offset = 0; |
274 | |
275 | /* Return an initialized struct of info to be logged. Build one item |
276 | * per physical disk address, one item per region. |
277 | * |
278 | * NON-BLOCKING */ |
279 | |
280 | diskAddress = pda; |
281 | common = AllocParityLogCommonData(raidPtr); |
282 | RF_ASSERT(common); |
283 | |
284 | common->operation = operation; |
285 | common->bufPtr = bufPtr; |
286 | common->raidPtr = raidPtr; |
287 | common->wakeFunc = wakeFunc; |
288 | common->wakeArg = wakeArg; |
289 | common->tracerec = tracerec; |
290 | common->startTime = startTime; |
291 | common->cnt = 0; |
292 | |
293 | if (rf_parityLogDebug) |
294 | printf("[entering CreateParityLogData]\n" ); |
295 | while (diskAddress) { |
296 | common->cnt++; |
297 | data = AllocParityLogData(raidPtr); |
298 | RF_ASSERT(data); |
299 | data->common = common; |
300 | data->next = NULL; |
301 | data->prev = NULL; |
302 | data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector); |
303 | if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) { |
304 | /* disk address does not cross a region boundary */ |
305 | data->diskAddress = *diskAddress; |
306 | data->bufOffset = offset; |
307 | offset = offset + diskAddress->numSector; |
308 | EnqueueParityLogData(data, &resultHead, &resultTail); |
309 | /* adjust disk address */ |
310 | diskAddress = diskAddress->next; |
311 | } else { |
312 | /* disk address crosses a region boundary */ |
313 | /* find address where region is crossed */ |
314 | boundary = 0; |
315 | while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary)) |
316 | boundary++; |
317 | |
318 | /* enter data before the boundary */ |
319 | data->diskAddress = *diskAddress; |
320 | data->diskAddress.numSector = boundary; |
321 | data->bufOffset = offset; |
322 | offset += boundary; |
323 | EnqueueParityLogData(data, &resultHead, &resultTail); |
324 | /* adjust disk address */ |
325 | diskAddress->startSector += boundary; |
326 | diskAddress->numSector -= boundary; |
327 | } |
328 | } |
329 | if (rf_parityLogDebug) |
330 | printf("[leaving CreateParityLogData]\n" ); |
331 | return (resultHead); |
332 | } |
333 | |
334 | |
335 | RF_ParityLogData_t * |
336 | rf_SearchAndDequeueParityLogData( |
337 | RF_Raid_t * raidPtr, |
338 | int regionID, |
339 | RF_ParityLogData_t ** head, |
340 | RF_ParityLogData_t ** tail, |
341 | int ignoreLocks) |
342 | { |
343 | RF_ParityLogData_t *w; |
344 | |
345 | /* Remove and return an in-core parity log from a specified region |
346 | * (regionID). If a matching log is not found, return NULL. |
347 | * |
348 | * NON-BLOCKING. */ |
349 | |
350 | /* walk backward through a list, looking for an entry with a matching |
351 | * region ID */ |
352 | if (!ignoreLocks) |
353 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
354 | w = (*tail); |
355 | while (w) { |
356 | if (w->regionID == regionID) { |
357 | /* remove an element from the list */ |
358 | if (w == *tail) { |
359 | if (*head == *tail) { |
360 | /* removing only element in the list */ |
361 | *head = NULL; |
362 | *tail = NULL; |
363 | } else { |
364 | /* removing last item in the list */ |
365 | *tail = (*tail)->prev; |
366 | (*tail)->next = NULL; |
367 | RF_ASSERT((*head)->prev == NULL); |
368 | RF_ASSERT((*tail)->next == NULL); |
369 | } |
370 | } else { |
371 | if (w == *head) { |
372 | /* removing first item in the list */ |
373 | *head = (*head)->next; |
374 | (*head)->prev = NULL; |
375 | RF_ASSERT((*head)->prev == NULL); |
376 | RF_ASSERT((*tail)->next == NULL); |
377 | } else { |
378 | /* removing an item from the middle of |
379 | * the list */ |
380 | w->prev->next = w->next; |
381 | w->next->prev = w->prev; |
382 | RF_ASSERT((*head)->prev == NULL); |
383 | RF_ASSERT((*tail)->next == NULL); |
384 | } |
385 | } |
386 | w->prev = NULL; |
387 | w->next = NULL; |
388 | if (rf_parityLogDebug) |
389 | printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n" , w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector); |
390 | return (w); |
391 | } else |
392 | w = w->prev; |
393 | } |
394 | if (!ignoreLocks) |
395 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
396 | return (NULL); |
397 | } |
398 | |
399 | static RF_ParityLogData_t * |
400 | DequeueMatchingLogData( |
401 | RF_Raid_t * raidPtr, |
402 | RF_ParityLogData_t ** head, |
403 | RF_ParityLogData_t ** tail) |
404 | { |
405 | RF_ParityLogData_t *logDataList, *logData; |
406 | int regionID; |
407 | |
408 | /* Remove and return an in-core parity log from the tail of a disk |
409 | * queue (*head, *tail). Then remove all matching (identical |
410 | * regionIDs) logData and return as a linked list. |
411 | * |
412 | * NON-BLOCKING */ |
413 | |
414 | logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE); |
415 | if (logDataList) { |
416 | regionID = logDataList->regionID; |
417 | logData = logDataList; |
418 | logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); |
419 | while (logData->next) { |
420 | logData = logData->next; |
421 | logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE); |
422 | } |
423 | } |
424 | return (logDataList); |
425 | } |
426 | |
427 | |
428 | static RF_ParityLog_t * |
429 | AcquireParityLog( |
430 | RF_ParityLogData_t * logData, |
431 | int finish) |
432 | { |
433 | RF_ParityLog_t *log = NULL; |
434 | RF_Raid_t *raidPtr; |
435 | |
436 | /* Grab a log buffer from the pool and return it. If no buffers are |
437 | * available, return NULL. NON-BLOCKING */ |
438 | raidPtr = logData->common->raidPtr; |
439 | rf_lock_mutex2(raidPtr->parityLogPool.mutex); |
440 | if (raidPtr->parityLogPool.parityLogs) { |
441 | log = raidPtr->parityLogPool.parityLogs; |
442 | raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next; |
443 | log->regionID = logData->regionID; |
444 | log->numRecords = 0; |
445 | log->next = NULL; |
446 | raidPtr->logsInUse++; |
447 | RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); |
448 | } else { |
449 | /* no logs available, so place ourselves on the queue of work |
450 | * waiting on log buffers this is done while |
451 | * parityLogPool.mutex is held, to ensure synchronization with |
452 | * ReleaseParityLogs. */ |
453 | if (rf_parityLogDebug) |
454 | printf("[blocked on log, region %d, finish %d]\n" , logData->regionID, finish); |
455 | if (finish) |
456 | RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
457 | else |
458 | EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
459 | } |
460 | rf_unlock_mutex2(raidPtr->parityLogPool.mutex); |
461 | return (log); |
462 | } |
463 | |
464 | void |
465 | rf_ReleaseParityLogs( |
466 | RF_Raid_t * raidPtr, |
467 | RF_ParityLog_t * firstLog) |
468 | { |
469 | RF_ParityLogData_t *logDataList; |
470 | RF_ParityLog_t *log, *lastLog; |
471 | int cnt; |
472 | |
473 | /* Insert a linked list of parity logs (firstLog) to the free list |
474 | * (parityLogPool.parityLogPool) |
475 | * |
476 | * NON-BLOCKING. */ |
477 | |
478 | RF_ASSERT(firstLog); |
479 | |
480 | /* Before returning logs to global free list, service all requests |
481 | * which are blocked on logs. Holding mutexes for parityLogPool and |
482 | * parityLogDiskQueue forces synchronization with AcquireParityLog(). */ |
483 | rf_lock_mutex2(raidPtr->parityLogPool.mutex); |
484 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
485 | logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
486 | log = firstLog; |
487 | if (firstLog) |
488 | firstLog = firstLog->next; |
489 | log->numRecords = 0; |
490 | log->next = NULL; |
491 | while (logDataList && log) { |
492 | rf_unlock_mutex2(raidPtr->parityLogPool.mutex); |
493 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
494 | rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE); |
495 | if (rf_parityLogDebug) |
496 | printf("[finishing up buf-blocked log data, region %d]\n" , logDataList->regionID); |
497 | if (log == NULL) { |
498 | log = firstLog; |
499 | if (firstLog) { |
500 | firstLog = firstLog->next; |
501 | log->numRecords = 0; |
502 | log->next = NULL; |
503 | } |
504 | } |
505 | rf_lock_mutex2(raidPtr->parityLogPool.mutex); |
506 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
507 | if (log) |
508 | logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail); |
509 | } |
510 | /* return remaining logs to pool */ |
511 | if (log) { |
512 | log->next = firstLog; |
513 | firstLog = log; |
514 | } |
515 | if (firstLog) { |
516 | lastLog = firstLog; |
517 | raidPtr->logsInUse--; |
518 | RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); |
519 | while (lastLog->next) { |
520 | lastLog = lastLog->next; |
521 | raidPtr->logsInUse--; |
522 | RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs); |
523 | } |
524 | lastLog->next = raidPtr->parityLogPool.parityLogs; |
525 | raidPtr->parityLogPool.parityLogs = firstLog; |
526 | cnt = 0; |
527 | log = raidPtr->parityLogPool.parityLogs; |
528 | while (log) { |
529 | cnt++; |
530 | log = log->next; |
531 | } |
532 | RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs); |
533 | } |
534 | rf_unlock_mutex2(raidPtr->parityLogPool.mutex); |
535 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
536 | } |
537 | |
538 | static void |
539 | ReintLog( |
540 | RF_Raid_t * raidPtr, |
541 | int regionID, |
542 | RF_ParityLog_t * log) |
543 | { |
544 | RF_ASSERT(log); |
545 | |
546 | /* Insert an in-core parity log (log) into the disk queue of |
547 | * reintegration work. Set the flag (reintInProgress) for the |
548 | * specified region (regionID) to indicate that reintegration is in |
549 | * progress for this region. NON-BLOCKING */ |
550 | |
551 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
552 | raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint |
553 | * complete */ |
554 | |
555 | if (rf_parityLogDebug) |
556 | printf("[requesting reintegration of region %d]\n" , log->regionID); |
557 | /* move record to reintegration queue */ |
558 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
559 | log->next = raidPtr->parityLogDiskQueue.reintQueue; |
560 | raidPtr->parityLogDiskQueue.reintQueue = log; |
561 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
562 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
563 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
564 | } |
565 | |
566 | static void |
567 | FlushLog( |
568 | RF_Raid_t * raidPtr, |
569 | RF_ParityLog_t * log) |
570 | { |
571 | /* insert a core log (log) into a list of logs |
572 | * (parityLogDiskQueue.flushQueue) waiting to be written to disk. |
573 | * NON-BLOCKING */ |
574 | |
575 | RF_ASSERT(log); |
576 | RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); |
577 | RF_ASSERT(log->next == NULL); |
578 | /* move log to flush queue */ |
579 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
580 | log->next = raidPtr->parityLogDiskQueue.flushQueue; |
581 | raidPtr->parityLogDiskQueue.flushQueue = log; |
582 | rf_signal_cond2(raidPtr->parityLogDiskQueue.cond); |
583 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
584 | } |
585 | |
586 | static int |
587 | DumpParityLogToDisk( |
588 | int finish, |
589 | RF_ParityLogData_t * logData) |
590 | { |
591 | int i, diskCount, regionID = logData->regionID; |
592 | RF_ParityLog_t *log; |
593 | RF_Raid_t *raidPtr; |
594 | |
595 | raidPtr = logData->common->raidPtr; |
596 | |
597 | /* Move a core log to disk. If the log disk is full, initiate |
598 | * reintegration. |
599 | * |
600 | * Return (0) if we can enqueue the dump immediately, otherwise return |
601 | * (1) to indicate we are blocked on reintegration and control of the |
602 | * thread should be relinquished. |
603 | * |
604 | * Caller must hold regionInfo[regionID].mutex |
605 | * |
606 | * NON-BLOCKING */ |
607 | |
608 | RF_ASSERT(rf_owned_mutex2(raidPtr->regionInfo[regionID].mutex)); |
609 | |
610 | if (rf_parityLogDebug) |
611 | printf("[dumping parity log to disk, region %d]\n" , regionID); |
612 | log = raidPtr->regionInfo[regionID].coreLog; |
613 | RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog); |
614 | RF_ASSERT(log->next == NULL); |
615 | |
616 | /* if reintegration is in progress, must queue work */ |
617 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
618 | if (raidPtr->regionInfo[regionID].reintInProgress) { |
619 | /* Can not proceed since this region is currently being |
620 | * reintegrated. We can not block, so queue remaining work and |
621 | * return */ |
622 | if (rf_parityLogDebug) |
623 | printf("[region %d waiting on reintegration]\n" , regionID); |
624 | /* XXX not sure about the use of finish - shouldn't this |
625 | * always be "Enqueue"? */ |
626 | if (finish) |
627 | RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); |
628 | else |
629 | EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail); |
630 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
631 | return (1); /* relenquish control of this thread */ |
632 | } |
633 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
634 | raidPtr->regionInfo[regionID].coreLog = NULL; |
635 | if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity) |
636 | /* IMPORTANT!! this loop bound assumes region disk holds an |
637 | * integral number of core logs */ |
638 | { |
639 | /* update disk map for this region */ |
640 | diskCount = raidPtr->regionInfo[regionID].diskCount; |
641 | for (i = 0; i < raidPtr->numSectorsPerLog; i++) { |
642 | raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation; |
643 | raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr; |
644 | } |
645 | log->diskOffset = diskCount; |
646 | raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog; |
647 | FlushLog(raidPtr, log); |
648 | } else { |
649 | /* no room for log on disk, send it to disk manager and |
650 | * request reintegration */ |
651 | RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity); |
652 | ReintLog(raidPtr, regionID, log); |
653 | } |
654 | if (rf_parityLogDebug) |
655 | printf("[finished dumping parity log to disk, region %d]\n" , regionID); |
656 | return (0); |
657 | } |
658 | |
659 | int |
660 | rf_ParityLogAppend( |
661 | RF_ParityLogData_t * logData, |
662 | int finish, |
663 | RF_ParityLog_t ** incomingLog, |
664 | int clearReintFlag) |
665 | { |
666 | int regionID, logItem, itemDone; |
667 | RF_ParityLogData_t *item; |
668 | int punt, done = RF_FALSE; |
669 | RF_ParityLog_t *log; |
670 | RF_Raid_t *raidPtr; |
671 | RF_Etimer_t timer; |
672 | int (*wakeFunc) (RF_DagNode_t * node, int status); |
673 | void *wakeArg; |
674 | |
675 | /* Add parity to the appropriate log, one sector at a time. This |
676 | * routine is called is called by dag functions ParityLogUpdateFunc |
677 | * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING. |
678 | * |
679 | * Parity to be logged is contained in a linked-list (logData). When |
680 | * this routine returns, every sector in the list will be in one of |
681 | * three places: 1) entered into the parity log 2) queued, waiting on |
682 | * reintegration 3) queued, waiting on a core log |
683 | * |
684 | * Blocked work is passed to the ParityLoggingDiskManager for completion. |
685 | * Later, as conditions which required the block are removed, the work |
686 | * reenters this routine with the "finish" parameter set to "RF_TRUE." |
687 | * |
688 | * NON-BLOCKING */ |
689 | |
690 | raidPtr = logData->common->raidPtr; |
691 | /* lock the region for the first item in logData */ |
692 | RF_ASSERT(logData != NULL); |
693 | regionID = logData->regionID; |
694 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
695 | RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); |
696 | |
697 | if (clearReintFlag) { |
698 | /* Enable flushing for this region. Holding both locks |
699 | * provides a synchronization barrier with DumpParityLogToDisk */ |
700 | rf_lock_mutex2(raidPtr->regionInfo[regionID].reintMutex); |
701 | /* XXXmrg need this? */ |
702 | rf_lock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
703 | RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE); |
704 | raidPtr->regionInfo[regionID].diskCount = 0; |
705 | raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE; |
706 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now |
707 | * enabled */ |
708 | /* XXXmrg need this? */ |
709 | rf_unlock_mutex2(raidPtr->parityLogDiskQueue.mutex); |
710 | } |
711 | /* process each item in logData */ |
712 | while (logData) { |
713 | /* remove an item from logData */ |
714 | item = logData; |
715 | logData = logData->next; |
716 | item->next = NULL; |
717 | item->prev = NULL; |
718 | |
719 | if (rf_parityLogDebug) |
720 | printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n" , item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector); |
721 | |
722 | /* see if we moved to a new region */ |
723 | if (regionID != item->regionID) { |
724 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
725 | regionID = item->regionID; |
726 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
727 | RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled); |
728 | } |
729 | punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This |
730 | * can happen in one of two ways: 1) no core |
731 | * log (AcquireParityLog) 2) waiting on |
732 | * reintegration (DumpParityLogToDisk) If punt |
733 | * is RF_TRUE, the dataItem was queued, so |
734 | * skip to next item. */ |
735 | |
736 | /* process item, one sector at a time, until all sectors |
737 | * processed or we punt */ |
738 | if (item->diskAddress.numSector > 0) |
739 | done = RF_FALSE; |
740 | else |
741 | RF_ASSERT(0); |
742 | while (!punt && !done) { |
743 | /* verify that a core log exists for this region */ |
744 | if (!raidPtr->regionInfo[regionID].coreLog) { |
745 | /* Attempt to acquire a parity log. If |
746 | * acquisition fails, queue remaining work in |
747 | * data item and move to nextItem. */ |
748 | if (incomingLog) |
749 | if (*incomingLog) { |
750 | RF_ASSERT((*incomingLog)->next == NULL); |
751 | raidPtr->regionInfo[regionID].coreLog = *incomingLog; |
752 | raidPtr->regionInfo[regionID].coreLog->regionID = regionID; |
753 | *incomingLog = NULL; |
754 | } else |
755 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
756 | else |
757 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
758 | /* Note: AcquireParityLog either returns a log |
759 | * or enqueues currentItem */ |
760 | } |
761 | if (!raidPtr->regionInfo[regionID].coreLog) |
762 | punt = RF_TRUE; /* failed to find a core log */ |
763 | else { |
764 | RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); |
765 | /* verify that the log has room for new |
766 | * entries */ |
767 | /* if log is full, dump it to disk and grab a |
768 | * new log */ |
769 | if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) { |
770 | /* log is full, dump it to disk */ |
771 | if (DumpParityLogToDisk(finish, item)) |
772 | punt = RF_TRUE; /* dump unsuccessful, |
773 | * blocked on |
774 | * reintegration */ |
775 | else { |
776 | /* dump was successful */ |
777 | if (incomingLog) |
778 | if (*incomingLog) { |
779 | RF_ASSERT((*incomingLog)->next == NULL); |
780 | raidPtr->regionInfo[regionID].coreLog = *incomingLog; |
781 | raidPtr->regionInfo[regionID].coreLog->regionID = regionID; |
782 | *incomingLog = NULL; |
783 | } else |
784 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
785 | else |
786 | raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish); |
787 | /* if a core log is not |
788 | * available, must queue work |
789 | * and return */ |
790 | if (!raidPtr->regionInfo[regionID].coreLog) |
791 | punt = RF_TRUE; /* blocked on log |
792 | * availability */ |
793 | } |
794 | } |
795 | } |
796 | /* if we didn't punt on this item, attempt to add a |
797 | * sector to the core log */ |
798 | if (!punt) { |
799 | RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL); |
800 | /* at this point, we have a core log with |
801 | * enough room for a sector */ |
802 | /* copy a sector into the log */ |
803 | log = raidPtr->regionInfo[regionID].coreLog; |
804 | RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog); |
805 | logItem = log->numRecords++; |
806 | log->records[logItem].parityAddr = item->diskAddress; |
807 | RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr); |
808 | RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity); |
809 | log->records[logItem].parityAddr.numSector = 1; |
810 | log->records[logItem].operation = item->common->operation; |
811 | memcpy((char *)log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), ((char *)item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector)); |
812 | item->diskAddress.numSector--; |
813 | item->diskAddress.startSector++; |
814 | if (item->diskAddress.numSector == 0) |
815 | done = RF_TRUE; |
816 | } |
817 | } |
818 | |
819 | if (!punt) { |
820 | /* Processed this item completely, decrement count of |
821 | * items to be processed. */ |
822 | RF_ASSERT(item->diskAddress.numSector == 0); |
823 | rf_lock_mutex2(item->common->mutex); |
824 | item->common->cnt--; |
825 | if (item->common->cnt == 0) |
826 | itemDone = RF_TRUE; |
827 | else |
828 | itemDone = RF_FALSE; |
829 | rf_unlock_mutex2(item->common->mutex); |
830 | if (itemDone) { |
831 | /* Finished processing all log data for this |
832 | * IO Return structs to free list and invoke |
833 | * wakeup function. */ |
834 | timer = item->common->startTime; /* grab initial value of |
835 | * timer */ |
836 | RF_ETIMER_STOP(timer); |
837 | RF_ETIMER_EVAL(timer); |
838 | item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer); |
839 | if (rf_parityLogDebug) |
840 | printf("[waking process for region %d]\n" , item->regionID); |
841 | wakeFunc = item->common->wakeFunc; |
842 | wakeArg = item->common->wakeArg; |
843 | FreeParityLogCommonData(item->common); |
844 | FreeParityLogData(item); |
845 | (wakeFunc) (wakeArg, 0); |
846 | } else |
847 | FreeParityLogData(item); |
848 | } |
849 | } |
850 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
851 | if (rf_parityLogDebug) |
852 | printf("[exiting ParityLogAppend]\n" ); |
853 | return (0); |
854 | } |
855 | |
856 | |
857 | void |
858 | rf_EnableParityLogging(RF_Raid_t * raidPtr) |
859 | { |
860 | int regionID; |
861 | |
862 | for (regionID = 0; regionID < rf_numParityRegions; regionID++) { |
863 | rf_lock_mutex2(raidPtr->regionInfo[regionID].mutex); |
864 | raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE; |
865 | rf_unlock_mutex2(raidPtr->regionInfo[regionID].mutex); |
866 | } |
867 | if (rf_parityLogDebug) |
868 | printf("[parity logging enabled]\n" ); |
869 | } |
870 | #endif /* RF_INCLUDE_PARITYLOGGING > 0 */ |
871 | |