1 | /* $NetBSD: rf_reconbuffer.c,v 1.25 2011/05/02 07:29:18 mrg Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Author: Mark Holland |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | |
29 | /*************************************************** |
30 | * |
31 | * rf_reconbuffer.c -- reconstruction buffer manager |
32 | * |
33 | ***************************************************/ |
34 | |
35 | #include <sys/cdefs.h> |
36 | __KERNEL_RCSID(0, "$NetBSD: rf_reconbuffer.c,v 1.25 2011/05/02 07:29:18 mrg Exp $" ); |
37 | |
38 | #include "rf_raid.h" |
39 | #include "rf_reconbuffer.h" |
40 | #include "rf_acctrace.h" |
41 | #include "rf_etimer.h" |
42 | #include "rf_general.h" |
43 | #include "rf_revent.h" |
44 | #include "rf_reconutil.h" |
45 | #include "rf_nwayxor.h" |
46 | |
47 | #ifdef DEBUG |
48 | |
49 | #define Dprintf1(s,a) if (rf_reconbufferDebug) printf(s,a) |
50 | #define Dprintf2(s,a,b) if (rf_reconbufferDebug) printf(s,a,b) |
51 | #define Dprintf3(s,a,b,c) if (rf_reconbufferDebug) printf(s,a,b,c) |
52 | #define Dprintf4(s,a,b,c,d) if (rf_reconbufferDebug) printf(s,a,b,c,d) |
53 | #define Dprintf5(s,a,b,c,d,e) if (rf_reconbufferDebug) printf(s,a,b,c,d,e) |
54 | |
55 | #else /* DEBUG */ |
56 | |
57 | #define Dprintf1(s,a) {} |
58 | #define Dprintf2(s,a,b) {} |
59 | #define Dprintf3(s,a,b,c) {} |
60 | #define Dprintf4(s,a,b,c,d) {} |
61 | #define Dprintf5(s,a,b,c,d,e) {} |
62 | |
63 | #endif |
64 | |
65 | /***************************************************************************** |
66 | * |
67 | * Submit a reconstruction buffer to the manager for XOR. We can only |
68 | * submit a buffer if (1) we can xor into an existing buffer, which |
69 | * means we don't have to acquire a new one, (2) we can acquire a |
70 | * floating recon buffer, or (3) the caller has indicated that we are |
71 | * allowed to keep the submitted buffer. |
72 | * |
73 | * Returns non-zero if and only if we were not able to submit. |
74 | * In this case, we append the current disk ID to the wait list on the |
75 | * indicated RU, so that it will be re-enabled when we acquire a buffer |
76 | * for this RU. |
77 | * |
78 | ****************************************************************************/ |
79 | |
80 | /* |
81 | * nWayXorFuncs[i] is a pointer to a function that will xor "i" |
82 | * bufs into the accumulating sum. |
83 | */ |
84 | static const RF_VoidFuncPtr nWayXorFuncs[] = { |
85 | NULL, |
86 | (RF_VoidFuncPtr) rf_nWayXor1, |
87 | (RF_VoidFuncPtr) rf_nWayXor2, |
88 | (RF_VoidFuncPtr) rf_nWayXor3, |
89 | (RF_VoidFuncPtr) rf_nWayXor4, |
90 | (RF_VoidFuncPtr) rf_nWayXor5, |
91 | (RF_VoidFuncPtr) rf_nWayXor6, |
92 | (RF_VoidFuncPtr) rf_nWayXor7, |
93 | (RF_VoidFuncPtr) rf_nWayXor8, |
94 | (RF_VoidFuncPtr) rf_nWayXor9 |
95 | }; |
96 | |
97 | /* |
98 | * rbuf - the recon buffer to submit |
99 | * keep_it - whether we can keep this buffer or we have to return it |
100 | * use_committed - whether to use a committed or an available recon buffer |
101 | */ |
102 | int |
103 | rf_SubmitReconBuffer(RF_ReconBuffer_t *rbuf, int keep_it, int use_committed) |
104 | { |
105 | const RF_LayoutSW_t *lp; |
106 | int rc; |
107 | |
108 | lp = rbuf->raidPtr->Layout.map; |
109 | rc = lp->SubmitReconBuffer(rbuf, keep_it, use_committed); |
110 | return (rc); |
111 | } |
112 | |
113 | /* |
114 | * rbuf - the recon buffer to submit |
115 | * keep_it - whether we can keep this buffer or we have to return it |
116 | * use_committed - whether to use a committed or an available recon buffer |
117 | */ |
118 | int |
119 | rf_SubmitReconBufferBasic(RF_ReconBuffer_t *rbuf, int keep_it, |
120 | int use_committed) |
121 | { |
122 | RF_Raid_t *raidPtr = rbuf->raidPtr; |
123 | RF_RaidLayout_t *layoutPtr = &raidPtr->Layout; |
124 | RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl; |
125 | RF_ReconParityStripeStatus_t *pssPtr; |
126 | RF_ReconBuffer_t *targetRbuf, *t = NULL; /* temporary rbuf |
127 | * pointers */ |
128 | void *ta; /* temporary data buffer pointer */ |
129 | RF_CallbackDesc_t *cb, *p; |
130 | int retcode = 0; |
131 | |
132 | RF_Etimer_t timer; |
133 | |
134 | /* makes no sense to have a submission from the failed disk */ |
135 | RF_ASSERT(rbuf); |
136 | RF_ASSERT(rbuf->col != reconCtrlPtr->fcol); |
137 | |
138 | Dprintf4("RECON: submission by col %d for psid %ld ru %d (failed offset %ld)\n" , |
139 | rbuf->col, (long) rbuf->parityStripeID, rbuf->which_ru, (long) rbuf->failedDiskSectorOffset); |
140 | |
141 | RF_LOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); |
142 | |
143 | rf_lock_mutex2(reconCtrlPtr->rb_mutex); |
144 | while(reconCtrlPtr->rb_lock) { |
145 | rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex); |
146 | } |
147 | reconCtrlPtr->rb_lock = 1; |
148 | rf_unlock_mutex2(reconCtrlPtr->rb_mutex); |
149 | |
150 | pssPtr = rf_LookupRUStatus(raidPtr, reconCtrlPtr->pssTable, rbuf->parityStripeID, rbuf->which_ru, RF_PSS_NONE, NULL); |
151 | RF_ASSERT(pssPtr); /* if it didn't exist, we wouldn't have gotten |
152 | * an rbuf for it */ |
153 | |
154 | /* check to see if enough buffers have accumulated to do an XOR. If |
155 | * so, there's no need to acquire a floating rbuf. Before we can do |
156 | * any XORing, we must have acquired a destination buffer. If we |
157 | * have, then we can go ahead and do the XOR if (1) including this |
158 | * buffer, enough bufs have accumulated, or (2) this is the last |
159 | * submission for this stripe. Otherwise, we have to go acquire a |
160 | * floating rbuf. */ |
161 | |
162 | targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; |
163 | if ((targetRbuf != NULL) && |
164 | ((pssPtr->xorBufCount == rf_numBufsToAccumulate - 1) || (targetRbuf->count + pssPtr->xorBufCount + 1 == layoutPtr->numDataCol))) { |
165 | pssPtr->rbufsForXor[pssPtr->xorBufCount++] = rbuf; /* install this buffer */ |
166 | Dprintf2("RECON: col %d invoking a %d-way XOR\n" , rbuf->col, pssPtr->xorBufCount); |
167 | RF_ETIMER_START(timer); |
168 | rf_MultiWayReconXor(raidPtr, pssPtr); |
169 | RF_ETIMER_STOP(timer); |
170 | RF_ETIMER_EVAL(timer); |
171 | raidPtr->accumXorTimeUs += RF_ETIMER_VAL_US(timer); |
172 | if (!keep_it) { |
173 | #if RF_ACC_TRACE > 0 |
174 | raidPtr->recon_tracerecs[rbuf->col].xor_us = RF_ETIMER_VAL_US(timer); |
175 | RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
176 | RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
177 | raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += |
178 | RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
179 | RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
180 | |
181 | rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); |
182 | #endif |
183 | } |
184 | rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); |
185 | |
186 | /* if use_committed is on, we _must_ consume a buffer off the |
187 | * committed list. */ |
188 | if (use_committed) { |
189 | t = reconCtrlPtr->committedRbufs; |
190 | RF_ASSERT(t); |
191 | reconCtrlPtr->committedRbufs = t->next; |
192 | rf_ReleaseFloatingReconBuffer(raidPtr, t); |
193 | } |
194 | if (keep_it) { |
195 | RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); |
196 | rf_lock_mutex2(reconCtrlPtr->rb_mutex); |
197 | reconCtrlPtr->rb_lock = 0; |
198 | rf_broadcast_cond2(reconCtrlPtr->rb_cv); |
199 | rf_unlock_mutex2(reconCtrlPtr->rb_mutex); |
200 | rf_FreeReconBuffer(rbuf); |
201 | return (retcode); |
202 | } |
203 | goto out; |
204 | } |
205 | /* set the value of "t", which we'll use as the rbuf from here on */ |
206 | if (keep_it) { |
207 | t = rbuf; |
208 | } else { |
209 | if (use_committed) { /* if a buffer has been committed to |
210 | * us, use it */ |
211 | t = reconCtrlPtr->committedRbufs; |
212 | RF_ASSERT(t); |
213 | reconCtrlPtr->committedRbufs = t->next; |
214 | t->next = NULL; |
215 | } else |
216 | if (reconCtrlPtr->floatingRbufs) { |
217 | t = reconCtrlPtr->floatingRbufs; |
218 | reconCtrlPtr->floatingRbufs = t->next; |
219 | t->next = NULL; |
220 | } |
221 | } |
222 | |
223 | /* If we weren't able to acquire a buffer, append to the end of the |
224 | * buf list in the recon ctrl struct. */ |
225 | if (!t) { |
226 | RF_ASSERT(!keep_it && !use_committed); |
227 | Dprintf1("RECON: col %d failed to acquire floating rbuf\n" , rbuf->col); |
228 | |
229 | raidPtr->procsInBufWait++; |
230 | if ((raidPtr->procsInBufWait == raidPtr->numCol - 1) && (raidPtr->numFullReconBuffers == 0)) { |
231 | printf("Buffer wait deadlock detected. Exiting.\n" ); |
232 | rf_PrintPSStatusTable(raidPtr); |
233 | RF_PANIC(); |
234 | } |
235 | pssPtr->flags |= RF_PSS_BUFFERWAIT; |
236 | cb = rf_AllocCallbackDesc(); /* append to buf wait list in |
237 | * recon ctrl structure */ |
238 | cb->col = rbuf->col; |
239 | cb->callbackArg.v = rbuf->parityStripeID; |
240 | cb->next = NULL; |
241 | if (!reconCtrlPtr->bufferWaitList) |
242 | reconCtrlPtr->bufferWaitList = cb; |
243 | else { /* might want to maintain head/tail pointers |
244 | * here rather than search for end of list */ |
245 | for (p = reconCtrlPtr->bufferWaitList; p->next; p = p->next); |
246 | p->next = cb; |
247 | } |
248 | retcode = 1; |
249 | goto out; |
250 | } |
251 | Dprintf1("RECON: col %d acquired rbuf\n" , rbuf->col); |
252 | #if RF_ACC_TRACE > 0 |
253 | RF_ETIMER_STOP(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
254 | RF_ETIMER_EVAL(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
255 | raidPtr->recon_tracerecs[rbuf->col].specific.recon.recon_return_to_submit_us += |
256 | RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
257 | RF_ETIMER_START(raidPtr->recon_tracerecs[rbuf->col].recon_timer); |
258 | |
259 | rf_LogTraceRec(raidPtr, &raidPtr->recon_tracerecs[rbuf->col]); |
260 | #endif |
261 | |
262 | /* initialize the buffer */ |
263 | if (t != rbuf) { |
264 | t->col = reconCtrlPtr->fcol; |
265 | t->parityStripeID = rbuf->parityStripeID; |
266 | t->which_ru = rbuf->which_ru; |
267 | t->failedDiskSectorOffset = rbuf->failedDiskSectorOffset; |
268 | t->spCol = rbuf->spCol; |
269 | t->spOffset = rbuf->spOffset; |
270 | |
271 | ta = t->buffer; |
272 | t->buffer = rbuf->buffer; |
273 | rbuf->buffer = ta; /* swap buffers */ |
274 | } |
275 | /* the first installation always gets installed as the destination |
276 | * buffer. subsequent installations get stacked up to allow for |
277 | * multi-way XOR */ |
278 | if (!pssPtr->rbuf) { |
279 | pssPtr->rbuf = t; |
280 | t->count = 1; |
281 | } else |
282 | pssPtr->rbufsForXor[pssPtr->xorBufCount++] = t; /* install this buffer */ |
283 | |
284 | rf_CheckForFullRbuf(raidPtr, reconCtrlPtr, pssPtr, layoutPtr->numDataCol); /* the buffer is full if |
285 | * G=2 */ |
286 | |
287 | out: |
288 | RF_UNLOCK_PSS_MUTEX(raidPtr, rbuf->parityStripeID); |
289 | rf_lock_mutex2(reconCtrlPtr->rb_mutex); |
290 | reconCtrlPtr->rb_lock = 0; |
291 | rf_broadcast_cond2(reconCtrlPtr->rb_cv); |
292 | rf_unlock_mutex2(reconCtrlPtr->rb_mutex); |
293 | return (retcode); |
294 | } |
295 | /* pssPtr - the pss descriptor for this parity stripe */ |
296 | int |
297 | rf_MultiWayReconXor(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *pssPtr) |
298 | { |
299 | int i, numBufs = pssPtr->xorBufCount; |
300 | int numBytes = rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU); |
301 | RF_ReconBuffer_t **rbufs = (RF_ReconBuffer_t **) pssPtr->rbufsForXor; |
302 | RF_ReconBuffer_t *targetRbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; |
303 | |
304 | RF_ASSERT(pssPtr->rbuf != NULL); |
305 | RF_ASSERT(numBufs > 0 && numBufs < RF_PS_MAX_BUFS); |
306 | #ifdef _KERNEL |
307 | #ifndef __NetBSD__ |
308 | thread_block(); /* yield the processor before doing a big XOR */ |
309 | #endif |
310 | #endif /* _KERNEL */ |
311 | /* |
312 | * XXX |
313 | * |
314 | * What if more than 9 bufs? |
315 | */ |
316 | nWayXorFuncs[numBufs] (pssPtr->rbufsForXor, targetRbuf, numBytes / sizeof(long)); |
317 | |
318 | /* release all the reconstruction buffers except the last one, which |
319 | * belongs to the disk whose submission caused this XOR to take place */ |
320 | for (i = 0; i < numBufs - 1; i++) { |
321 | if (rbufs[i]->type == RF_RBUF_TYPE_FLOATING) |
322 | rf_ReleaseFloatingReconBuffer(raidPtr, rbufs[i]); |
323 | else |
324 | if (rbufs[i]->type == RF_RBUF_TYPE_FORCED) |
325 | rf_FreeReconBuffer(rbufs[i]); |
326 | else |
327 | RF_ASSERT(0); |
328 | } |
329 | targetRbuf->count += pssPtr->xorBufCount; |
330 | pssPtr->xorBufCount = 0; |
331 | return (0); |
332 | } |
333 | /* removes one full buffer from one of the full-buffer lists and returns it. |
334 | * |
335 | * ASSUMES THE RB_MUTEX IS UNLOCKED AT ENTRY. |
336 | */ |
337 | RF_ReconBuffer_t * |
338 | rf_GetFullReconBuffer(RF_ReconCtrl_t *reconCtrlPtr) |
339 | { |
340 | RF_ReconBuffer_t *p; |
341 | |
342 | rf_lock_mutex2(reconCtrlPtr->rb_mutex); |
343 | while(reconCtrlPtr->rb_lock) { |
344 | rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex); |
345 | } |
346 | reconCtrlPtr->rb_lock = 1; |
347 | rf_unlock_mutex2(reconCtrlPtr->rb_mutex); |
348 | |
349 | if ((p = reconCtrlPtr->fullBufferList) != NULL) { |
350 | reconCtrlPtr->fullBufferList = p->next; |
351 | p->next = NULL; |
352 | } |
353 | rf_lock_mutex2(reconCtrlPtr->rb_mutex); |
354 | reconCtrlPtr->rb_lock = 0; |
355 | rf_broadcast_cond2(reconCtrlPtr->rb_cv); |
356 | rf_unlock_mutex2(reconCtrlPtr->rb_mutex); |
357 | return (p); |
358 | } |
359 | |
360 | |
361 | /* if the reconstruction buffer is full, move it to the full list, |
362 | * which is maintained sorted by failed disk sector offset |
363 | * |
364 | * ASSUMES THE RB_MUTEX IS LOCKED AT ENTRY. */ |
365 | int |
366 | rf_CheckForFullRbuf(RF_Raid_t *raidPtr, RF_ReconCtrl_t *reconCtrl, |
367 | RF_ReconParityStripeStatus_t *pssPtr, int numDataCol) |
368 | { |
369 | RF_ReconBuffer_t *p, *pt, *rbuf = (RF_ReconBuffer_t *) pssPtr->rbuf; |
370 | |
371 | if (rbuf->count == numDataCol) { |
372 | raidPtr->numFullReconBuffers++; |
373 | Dprintf2("RECON: rbuf for psid %ld ru %d has filled\n" , |
374 | (long) rbuf->parityStripeID, rbuf->which_ru); |
375 | if (!reconCtrl->fullBufferList || (rbuf->failedDiskSectorOffset < reconCtrl->fullBufferList->failedDiskSectorOffset)) { |
376 | Dprintf2("RECON: rbuf for psid %ld ru %d is head of list\n" , |
377 | (long) rbuf->parityStripeID, rbuf->which_ru); |
378 | rbuf->next = reconCtrl->fullBufferList; |
379 | reconCtrl->fullBufferList = rbuf; |
380 | } else { |
381 | for (pt = reconCtrl->fullBufferList, p = pt->next; p && p->failedDiskSectorOffset < rbuf->failedDiskSectorOffset; pt = p, p = p->next); |
382 | rbuf->next = p; |
383 | pt->next = rbuf; |
384 | Dprintf2("RECON: rbuf for psid %ld ru %d is in list\n" , |
385 | (long) rbuf->parityStripeID, rbuf->which_ru); |
386 | } |
387 | rbuf->pssPtr = pssPtr; |
388 | pssPtr->rbuf = NULL; |
389 | rf_CauseReconEvent(raidPtr, rbuf->col, NULL, RF_REVENT_BUFREADY); |
390 | } |
391 | return (0); |
392 | } |
393 | |
394 | |
395 | /* release a floating recon buffer for someone else to use. |
396 | * assumes the rb_mutex is LOCKED at entry |
397 | */ |
398 | void |
399 | rf_ReleaseFloatingReconBuffer(RF_Raid_t *raidPtr, RF_ReconBuffer_t *rbuf) |
400 | { |
401 | RF_ReconCtrl_t *rcPtr = raidPtr->reconControl; |
402 | RF_CallbackDesc_t *cb; |
403 | |
404 | Dprintf2("RECON: releasing rbuf for psid %ld ru %d\n" , |
405 | (long) rbuf->parityStripeID, rbuf->which_ru); |
406 | |
407 | /* if anyone is waiting on buffers, wake one of them up. They will |
408 | * subsequently wake up anyone else waiting on their RU */ |
409 | if (rcPtr->bufferWaitList) { |
410 | rbuf->next = rcPtr->committedRbufs; |
411 | rcPtr->committedRbufs = rbuf; |
412 | cb = rcPtr->bufferWaitList; |
413 | rcPtr->bufferWaitList = cb->next; |
414 | rf_CauseReconEvent(raidPtr, cb->col, (void *) 1, RF_REVENT_BUFCLEAR); /* arg==1 => we've |
415 | * committed a buffer */ |
416 | rf_FreeCallbackDesc(cb); |
417 | raidPtr->procsInBufWait--; |
418 | } else { |
419 | rbuf->next = rcPtr->floatingRbufs; |
420 | rcPtr->floatingRbufs = rbuf; |
421 | } |
422 | } |
423 | |