1 | /* $NetBSD: rf_stripelocks.c,v 1.32 2011/05/05 08:21:29 mrg Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Authors: Mark Holland, Jim Zelenka |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | |
29 | /* |
30 | * stripelocks.c -- code to lock stripes for read and write access |
31 | * |
32 | * The code distinguishes between read locks and write locks. There can be |
33 | * as many readers to given stripe as desired. When a write request comes |
34 | * in, no further readers are allowed to enter, and all subsequent requests |
35 | * are queued in FIFO order. When a the number of readers goes to zero, the |
36 | * writer is given the lock. When a writer releases the lock, the list of |
37 | * queued requests is scanned, and all readersq up to the next writer are |
38 | * given the lock. |
39 | * |
40 | * The lock table size must be one less than a power of two, but HASH_STRIPEID |
41 | * is the only function that requires this. |
42 | * |
43 | * The code now supports "range locks". When you ask to lock a stripe, you |
44 | * specify a range of addresses in that stripe that you want to lock. When |
45 | * you acquire the lock, you've locked only this range of addresses, and |
46 | * other threads can concurrently read/write any non-overlapping portions |
47 | * of the stripe. The "addresses" that you lock are abstract in that you |
48 | * can pass in anything you like. The expectation is that you'll pass in |
49 | * the range of physical disk offsets of the parity bits you're planning |
50 | * to update. The idea behind this, of course, is to allow sub-stripe |
51 | * locking. The implementation is perhaps not the best imaginable; in the |
52 | * worst case a lock release is O(n^2) in the total number of outstanding |
53 | * requests to a given stripe. Note that if you're striping with a |
54 | * stripe unit size equal to an entire disk (i.e. not striping), there will |
55 | * be only one stripe and you may spend some significant number of cycles |
56 | * searching through stripe lock descriptors. |
57 | */ |
58 | |
59 | #include <sys/cdefs.h> |
60 | __KERNEL_RCSID(0, "$NetBSD: rf_stripelocks.c,v 1.32 2011/05/05 08:21:29 mrg Exp $" ); |
61 | |
62 | #include <dev/raidframe/raidframevar.h> |
63 | |
64 | #include "rf_raid.h" |
65 | #include "rf_stripelocks.h" |
66 | #include "rf_alloclist.h" |
67 | #include "rf_debugprint.h" |
68 | #include "rf_general.h" |
69 | #include "rf_driver.h" |
70 | #include "rf_shutdown.h" |
71 | |
72 | #ifdef DEBUG |
73 | |
74 | #define Dprintf1(s,a) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) |
75 | #define Dprintf2(s,a,b) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) |
76 | #define Dprintf3(s,a,b,c) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) |
77 | #define Dprintf4(s,a,b,c,d) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL) |
78 | #define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL) |
79 | #define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL) |
80 | #define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL) |
81 | #define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h)) |
82 | |
83 | #else /* DEBUG */ |
84 | |
85 | #define Dprintf1(s,a) {} |
86 | #define Dprintf2(s,a,b) {} |
87 | #define Dprintf3(s,a,b,c) {} |
88 | #define Dprintf4(s,a,b,c,d) {} |
89 | #define Dprintf5(s,a,b,c,d,e) {} |
90 | #define Dprintf6(s,a,b,c,d,e,f) {} |
91 | #define Dprintf7(s,a,b,c,d,e,f,g) {} |
92 | #define Dprintf8(s,a,b,c,d,e,f,g,h) {} |
93 | |
94 | #endif /* DEBUG */ |
95 | |
96 | #define FLUSH |
97 | |
98 | #define HASH_STRIPEID(_sid_) ( (_sid_) & (rf_lockTableSize-1) ) |
99 | |
100 | static void AddToWaitersQueue(RF_StripeLockDesc_t * lockDesc, |
101 | RF_LockReqDesc_t * lockReqDesc); |
102 | static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_StripeNum_t stripeID); |
103 | static void FreeStripeLockDesc(RF_StripeLockDesc_t * p); |
104 | static RF_LockTableEntry_t *rf_MakeLockTable(void); |
105 | #if RF_DEBUG_STRIPELOCK |
106 | static void PrintLockedStripes(RF_LockTableEntry_t * lockTable); |
107 | #endif |
108 | |
109 | /* determines if two ranges overlap. always yields false if either |
110 | start value is negative */ |
111 | #define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2) \ |
112 | ( (_strt1 >= 0) && (_strt2 >= 0) && \ |
113 | (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) ) |
114 | |
115 | /* determines if any of the ranges specified in the two lock |
116 | descriptors overlap each other */ |
117 | |
118 | #define RANGE_OVERLAP(_cand, _pred) \ |
119 | ( SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \ |
120 | (_pred)->start, (_pred)->stop ) || \ |
121 | SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \ |
122 | (_pred)->start, (_pred)->stop ) || \ |
123 | SINGLE_RANGE_OVERLAP((_cand)->start, (_cand)->stop, \ |
124 | (_pred)->start2, (_pred)->stop2) || \ |
125 | SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2, \ |
126 | (_pred)->start2, (_pred)->stop2) ) |
127 | |
128 | /* Determines if a candidate lock request conflicts with a predecessor |
129 | * lock req. Note that the arguments are not interchangeable. |
130 | * |
131 | * The rules are: |
132 | * |
133 | * a candidate read conflicts with a predecessor write if any |
134 | * ranges overlap |
135 | * |
136 | * a candidate write conflicts with a predecessor read if any |
137 | * ranges overlap |
138 | * |
139 | * a candidate write conflicts with a predecessor write if any |
140 | * ranges overlap */ |
141 | |
142 | #define STRIPELOCK_CONFLICT(_cand, _pred) \ |
143 | RANGE_OVERLAP((_cand), (_pred)) && \ |
144 | ( ( (((_cand)->type == RF_IO_TYPE_READ) && \ |
145 | ((_pred)->type == RF_IO_TYPE_WRITE)) || \ |
146 | (((_cand)->type == RF_IO_TYPE_WRITE) && \ |
147 | ((_pred)->type == RF_IO_TYPE_READ)) || \ |
148 | (((_cand)->type == RF_IO_TYPE_WRITE) && \ |
149 | ((_pred)->type == RF_IO_TYPE_WRITE)) \ |
150 | ) \ |
151 | ) |
152 | |
153 | #define RF_MAX_FREE_STRIPELOCK 128 |
154 | #define RF_MIN_FREE_STRIPELOCK 32 |
155 | |
156 | static void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable); |
157 | static void rf_ShutdownStripeLockFreeList(void *); |
158 | static void rf_RaidShutdownStripeLocks(void *); |
159 | |
160 | static void |
161 | rf_ShutdownStripeLockFreeList(void *ignored) |
162 | { |
163 | pool_destroy(&rf_pools.stripelock); |
164 | } |
165 | |
166 | int |
167 | rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp) |
168 | { |
169 | unsigned mask; |
170 | |
171 | rf_pool_init(&rf_pools.stripelock, sizeof(RF_StripeLockDesc_t), |
172 | "rf_stripelock_pl" , RF_MIN_FREE_STRIPELOCK, RF_MAX_FREE_STRIPELOCK); |
173 | rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, NULL); |
174 | |
175 | for (mask = 0x1; mask; mask <<= 1) |
176 | if (rf_lockTableSize == mask) |
177 | break; |
178 | if (!mask) { |
179 | printf("[WARNING: lock table size must be a power of two. Setting to %d.]\n" , RF_DEFAULT_LOCK_TABLE_SIZE); |
180 | rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE; |
181 | } |
182 | return (0); |
183 | } |
184 | |
185 | static void |
186 | rf_DestroyLockTable(RF_LockTableEntry_t *lockTable) |
187 | { |
188 | int i; |
189 | |
190 | for (i = 0; i < rf_lockTableSize; i++) { |
191 | rf_destroy_mutex2(lockTable[i].mutex); |
192 | } |
193 | RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t)); |
194 | } |
195 | |
196 | static RF_LockTableEntry_t * |
197 | rf_MakeLockTable(void) |
198 | { |
199 | RF_LockTableEntry_t *lockTable; |
200 | int i; |
201 | |
202 | RF_Malloc(lockTable, |
203 | ((int) rf_lockTableSize) * sizeof(RF_LockTableEntry_t), |
204 | (RF_LockTableEntry_t *)); |
205 | if (lockTable == NULL) |
206 | return (NULL); |
207 | for (i = 0; i < rf_lockTableSize; i++) { |
208 | rf_init_mutex2(lockTable[i].mutex, IPL_VM); |
209 | } |
210 | return (lockTable); |
211 | } |
212 | |
213 | static void |
214 | rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable) |
215 | { |
216 | |
217 | #if RF_DEBUG_STRIPELOCK |
218 | if (rf_stripeLockDebug) { |
219 | PrintLockedStripes(lockTable); |
220 | } |
221 | #endif |
222 | rf_DestroyLockTable(lockTable); |
223 | } |
224 | |
225 | static void |
226 | rf_RaidShutdownStripeLocks(void *arg) |
227 | { |
228 | RF_Raid_t *raidPtr = (RF_Raid_t *) arg; |
229 | rf_ShutdownStripeLocks(raidPtr->lockTable); |
230 | } |
231 | |
232 | int |
233 | rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, |
234 | RF_Config_t *cfgPtr) |
235 | { |
236 | |
237 | raidPtr->lockTable = rf_MakeLockTable(); |
238 | if (raidPtr->lockTable == NULL) |
239 | return (ENOMEM); |
240 | rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr); |
241 | |
242 | return (0); |
243 | } |
244 | /* returns 0 if you've got the lock, and non-zero if you have to wait. |
245 | * if and only if you have to wait, we'll cause cbFunc to get invoked |
246 | * with cbArg when you are granted the lock. We store a tag in |
247 | * *releaseTag that you need to give back to us when you release the |
248 | * lock. */ |
249 | int |
250 | rf_AcquireStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID, |
251 | RF_LockReqDesc_t *lockReqDesc) |
252 | { |
253 | RF_StripeLockDesc_t *lockDesc; |
254 | RF_StripeLockDesc_t *newlockDesc; |
255 | RF_LockReqDesc_t *p; |
256 | #if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0) |
257 | int tid = 0; |
258 | #endif |
259 | int hashval = HASH_STRIPEID(stripeID); |
260 | int retcode = 0; |
261 | |
262 | RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); |
263 | |
264 | #if RF_DEBUG_STRIPELOCK |
265 | if (rf_stripeLockDebug) { |
266 | if (stripeID == -1) { |
267 | Dprintf1("[%d] Lock acquisition supressed (stripeID == -1)\n" , tid); |
268 | } else { |
269 | Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n" , |
270 | tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start, |
271 | lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); |
272 | Dprintf3("[%d] lock %ld hashval %d\n" , tid, stripeID, hashval); |
273 | FLUSH; |
274 | } |
275 | } |
276 | #endif |
277 | if (stripeID == -1) |
278 | return (0); |
279 | lockReqDesc->next = NULL; /* just to be sure */ |
280 | newlockDesc = AllocStripeLockDesc(stripeID); |
281 | |
282 | rf_lock_mutex2(lockTable[hashval].mutex); |
283 | for (lockDesc = lockTable[hashval].descList; lockDesc; |
284 | lockDesc = lockDesc->next) { |
285 | if (lockDesc->stripeID == stripeID) |
286 | break; |
287 | } |
288 | |
289 | if (!lockDesc) { |
290 | /* no entry in table => no one reading or writing */ |
291 | lockDesc = newlockDesc; |
292 | lockDesc->next = lockTable[hashval].descList; |
293 | lockTable[hashval].descList = lockDesc; |
294 | if (lockReqDesc->type == RF_IO_TYPE_WRITE) |
295 | lockDesc->nWriters++; |
296 | lockDesc->granted = lockReqDesc; |
297 | #if RF_DEBUG_STRIPELOCK |
298 | if (rf_stripeLockDebug) { |
299 | Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n" , |
300 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); |
301 | FLUSH; |
302 | } |
303 | #endif |
304 | } else { |
305 | /* we won't be needing newlockDesc after all.. pity.. */ |
306 | FreeStripeLockDesc(newlockDesc); |
307 | |
308 | if (lockReqDesc->type == RF_IO_TYPE_WRITE) |
309 | lockDesc->nWriters++; |
310 | |
311 | if (lockDesc->nWriters == 0) { |
312 | /* no need to search any lists if there are no |
313 | * writers anywhere */ |
314 | lockReqDesc->next = lockDesc->granted; |
315 | lockDesc->granted = lockReqDesc; |
316 | #if RF_DEBUG_STRIPELOCK |
317 | if (rf_stripeLockDebug) { |
318 | Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n" , |
319 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2); |
320 | FLUSH; |
321 | } |
322 | #endif |
323 | } else { |
324 | |
325 | /* search the granted & waiting lists for a |
326 | * conflict. stop searching as soon as we |
327 | * find one */ |
328 | retcode = 0; |
329 | for (p = lockDesc->granted; p; p = p->next) |
330 | if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { |
331 | retcode = 1; |
332 | break; |
333 | } |
334 | if (!retcode) |
335 | for (p = lockDesc->waitersH; p; p = p->next) |
336 | if (STRIPELOCK_CONFLICT(lockReqDesc, p)) { |
337 | retcode = 2; |
338 | break; |
339 | } |
340 | if (!retcode) { |
341 | /* no conflicts found => grant lock */ |
342 | lockReqDesc->next = lockDesc->granted; |
343 | lockDesc->granted = lockReqDesc; |
344 | #if RF_DEBUG_STRIPELOCK |
345 | if (rf_stripeLockDebug) { |
346 | Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n" , |
347 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, |
348 | lockReqDesc->start2, lockReqDesc->stop2); |
349 | FLUSH; |
350 | } |
351 | #endif |
352 | } else { |
353 | #if RF_DEBUG_STRIPELOCK |
354 | if (rf_stripeLockDebug) { |
355 | Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n" , |
356 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, |
357 | hashval); |
358 | Dprintf3("[%d] lock %ld retcode=%d\n" , tid, stripeID, retcode); |
359 | FLUSH; |
360 | } |
361 | #endif |
362 | AddToWaitersQueue(lockDesc, lockReqDesc); |
363 | /* conflict => the current access must wait */ |
364 | } |
365 | } |
366 | } |
367 | |
368 | rf_unlock_mutex2(lockTable[hashval].mutex); |
369 | return (retcode); |
370 | } |
371 | |
372 | void |
373 | rf_ReleaseStripeLock(RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID, |
374 | RF_LockReqDesc_t *lockReqDesc) |
375 | { |
376 | RF_StripeLockDesc_t *lockDesc, *ld_t; |
377 | RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t; |
378 | #if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0) |
379 | int tid = 0; |
380 | #endif |
381 | int hashval = HASH_STRIPEID(stripeID); |
382 | int release_it, consider_it; |
383 | RF_LockReqDesc_t *candidate, *candidate_t, *predecessor; |
384 | |
385 | RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type)); |
386 | |
387 | #if RF_DEBUG_STRIPELOCK |
388 | if (rf_stripeLockDebug) { |
389 | if (stripeID == -1) { |
390 | Dprintf1("[%d] Lock release supressed (stripeID == -1)\n" , tid); |
391 | } else { |
392 | Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
393 | tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable); |
394 | FLUSH; |
395 | } |
396 | } |
397 | #endif |
398 | if (stripeID == -1) |
399 | return; |
400 | |
401 | rf_lock_mutex2(lockTable[hashval].mutex); |
402 | |
403 | /* find the stripe lock descriptor */ |
404 | for (ld_t = NULL, lockDesc = lockTable[hashval].descList; |
405 | lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) { |
406 | if (lockDesc->stripeID == stripeID) |
407 | break; |
408 | } |
409 | RF_ASSERT(lockDesc); /* major error to release a lock that doesn't |
410 | * exist */ |
411 | |
412 | /* find the stripe lock request descriptor & delete it from the list */ |
413 | for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next) |
414 | if (lr == lockReqDesc) |
415 | break; |
416 | |
417 | RF_ASSERT(lr && (lr == lockReqDesc)); /* major error to release a |
418 | * lock that hasn't been |
419 | * granted */ |
420 | if (lr_t) |
421 | lr_t->next = lr->next; |
422 | else { |
423 | RF_ASSERT(lr == lockDesc->granted); |
424 | lockDesc->granted = lr->next; |
425 | } |
426 | lr->next = NULL; |
427 | |
428 | if (lockReqDesc->type == RF_IO_TYPE_WRITE) |
429 | lockDesc->nWriters--; |
430 | |
431 | /* search through the waiters list to see if anyone needs to |
432 | * be woken up. for each such descriptor in the wait list, we |
433 | * check it against everything granted and against everything |
434 | * _in front_ of it in the waiters queue. If it conflicts |
435 | * with none of these, we release it. |
436 | * |
437 | * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED |
438 | * LIST HERE. |
439 | * |
440 | * This will roach the case where the callback tries to |
441 | * acquire a new lock in the same stripe. There are some |
442 | * asserts to try and detect this. |
443 | * |
444 | * We apply 2 performance optimizations: (1) if releasing this |
445 | * lock results in no more writers to this stripe, we just |
446 | * release everybody waiting, since we place no restrictions |
447 | * on the number of concurrent reads. (2) we consider as |
448 | * candidates for wakeup only those waiters that have a range |
449 | * overlap with either the descriptor being woken up or with |
450 | * something in the callbacklist (i.e. something we've just |
451 | * now woken up). This allows us to avoid the long evaluation |
452 | * for some descriptors. */ |
453 | |
454 | callbacklist = NULL; |
455 | if (lockDesc->nWriters == 0) { /* performance tweak (1) */ |
456 | while (lockDesc->waitersH) { |
457 | /* delete from waiters list */ |
458 | lr = lockDesc->waitersH; |
459 | lockDesc->waitersH = lr->next; |
460 | |
461 | RF_ASSERT(lr->type == RF_IO_TYPE_READ); |
462 | |
463 | /* add to granted list */ |
464 | lr->next = lockDesc->granted; |
465 | lockDesc->granted = lr; |
466 | |
467 | RF_ASSERT(!lr->templink); |
468 | /* put on callback list so that we'll invoke |
469 | callback below */ |
470 | lr->templink = callbacklist; |
471 | callbacklist = lr; |
472 | #if RF_DEBUG_STRIPELOCK |
473 | if (rf_stripeLockDebug) { |
474 | Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
475 | tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable); |
476 | FLUSH; |
477 | } |
478 | #endif |
479 | } |
480 | lockDesc->waitersT = NULL; |
481 | /* we've purged the whole waiters list */ |
482 | |
483 | } else |
484 | for (candidate_t = NULL, candidate = lockDesc->waitersH; |
485 | candidate;) { |
486 | |
487 | /* performance tweak (2) */ |
488 | consider_it = 0; |
489 | if (RANGE_OVERLAP(lockReqDesc, candidate)) |
490 | consider_it = 1; |
491 | else |
492 | for (t = callbacklist; t; t = t->templink) |
493 | if (RANGE_OVERLAP(t, candidate)) { |
494 | consider_it = 1; |
495 | break; |
496 | } |
497 | if (!consider_it) { |
498 | #if RF_DEBUG_STRIPELOCK |
499 | if (rf_stripeLockDebug) { |
500 | Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
501 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
502 | (unsigned long) lockTable); |
503 | FLUSH; |
504 | } |
505 | #endif |
506 | candidate_t = candidate; |
507 | candidate = candidate->next; |
508 | continue; |
509 | } |
510 | /* we have a candidate for release. check to |
511 | * make sure it is not blocked by any granted |
512 | * locks */ |
513 | release_it = 1; |
514 | for (predecessor = lockDesc->granted; predecessor; |
515 | predecessor = predecessor->next) { |
516 | if (STRIPELOCK_CONFLICT(candidate, |
517 | predecessor)) { |
518 | #if RF_DEBUG_STRIPELOCK |
519 | if (rf_stripeLockDebug) { |
520 | Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
521 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
522 | (unsigned long) lockTable); |
523 | FLUSH; |
524 | } |
525 | #endif |
526 | release_it = 0; |
527 | break; |
528 | } |
529 | } |
530 | |
531 | /* now check to see if the candidate is |
532 | * blocked by any waiters that occur before it |
533 | * it the wait queue */ |
534 | if (release_it) |
535 | for (predecessor = lockDesc->waitersH; |
536 | predecessor != candidate; |
537 | predecessor = predecessor->next) { |
538 | if (STRIPELOCK_CONFLICT(candidate, |
539 | predecessor)) { |
540 | #if RF_DEBUG_STRIPELOCK |
541 | if (rf_stripeLockDebug) { |
542 | Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
543 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
544 | (unsigned long) lockTable); |
545 | FLUSH; |
546 | } |
547 | #endif |
548 | release_it = 0; |
549 | break; |
550 | } |
551 | } |
552 | |
553 | /* release it if indicated */ |
554 | if (release_it) { |
555 | #if RF_DEBUG_STRIPELOCK |
556 | if (rf_stripeLockDebug) { |
557 | Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n" , |
558 | tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2, |
559 | (unsigned long) lockTable); |
560 | FLUSH; |
561 | } |
562 | #endif |
563 | if (candidate_t) { |
564 | candidate_t->next = candidate->next; |
565 | if (lockDesc->waitersT == candidate) |
566 | lockDesc->waitersT = candidate_t; /* cannot be waitersH since candidate_t is not NULL */ |
567 | } else { |
568 | RF_ASSERT(candidate == lockDesc->waitersH); |
569 | lockDesc->waitersH = lockDesc->waitersH->next; |
570 | if (!lockDesc->waitersH) |
571 | lockDesc->waitersT = NULL; |
572 | } |
573 | /* move it to the granted list */ |
574 | candidate->next = lockDesc->granted; |
575 | lockDesc->granted = candidate; |
576 | |
577 | RF_ASSERT(!candidate->templink); |
578 | /* put it on the list of things to be |
579 | called after we release the mutex */ |
580 | candidate->templink = callbacklist; |
581 | |
582 | callbacklist = candidate; |
583 | |
584 | if (!candidate_t) |
585 | candidate = lockDesc->waitersH; |
586 | else |
587 | candidate = candidate_t->next; |
588 | /* continue with the rest of the list */ |
589 | } else { |
590 | candidate_t = candidate; |
591 | /* continue with the rest of the list */ |
592 | candidate = candidate->next; |
593 | } |
594 | } |
595 | |
596 | /* delete the descriptor if no one is waiting or active */ |
597 | if (!lockDesc->granted && !lockDesc->waitersH) { |
598 | RF_ASSERT(lockDesc->nWriters == 0); |
599 | #if RF_DEBUG_STRIPELOCK |
600 | if (rf_stripeLockDebug) { |
601 | Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n" , tid, (unsigned long) lockTable, stripeID); |
602 | FLUSH; |
603 | } |
604 | #endif |
605 | if (ld_t) |
606 | ld_t->next = lockDesc->next; |
607 | else { |
608 | RF_ASSERT(lockDesc == lockTable[hashval].descList); |
609 | lockTable[hashval].descList = lockDesc->next; |
610 | } |
611 | FreeStripeLockDesc(lockDesc); |
612 | lockDesc = NULL;/* only for the ASSERT below */ |
613 | } |
614 | rf_unlock_mutex2(lockTable[hashval].mutex); |
615 | |
616 | /* now that we've unlocked the mutex, invoke the callback on |
617 | * all the descriptors in the list */ |
618 | |
619 | /* if we deleted the descriptor, we should have no callbacks |
620 | * to do */ |
621 | RF_ASSERT(!((callbacklist) && (!lockDesc))); |
622 | for (candidate = callbacklist; candidate;) { |
623 | t = candidate; |
624 | candidate = candidate->templink; |
625 | t->templink = NULL; |
626 | (t->cbFunc) (t->cbArg); |
627 | } |
628 | } |
629 | /* must have the indicated lock table mutex upon entry */ |
630 | static void |
631 | AddToWaitersQueue(RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc) |
632 | { |
633 | if (!lockDesc->waitersH) { |
634 | lockDesc->waitersH = lockDesc->waitersT = lockReqDesc; |
635 | } else { |
636 | lockDesc->waitersT->next = lockReqDesc; |
637 | lockDesc->waitersT = lockReqDesc; |
638 | } |
639 | } |
640 | |
641 | static RF_StripeLockDesc_t * |
642 | AllocStripeLockDesc(RF_StripeNum_t stripeID) |
643 | { |
644 | RF_StripeLockDesc_t *p; |
645 | |
646 | p = pool_get(&rf_pools.stripelock, PR_WAITOK); |
647 | if (p) { |
648 | p->stripeID = stripeID; |
649 | p->granted = NULL; |
650 | p->waitersH = NULL; |
651 | p->waitersT = NULL; |
652 | p->nWriters = 0; |
653 | p->next = NULL; |
654 | } |
655 | return (p); |
656 | } |
657 | |
658 | static void |
659 | FreeStripeLockDesc(RF_StripeLockDesc_t *p) |
660 | { |
661 | pool_put(&rf_pools.stripelock, p); |
662 | } |
663 | |
664 | #if RF_DEBUG_STRIPELOCK |
665 | static void |
666 | PrintLockedStripes(RF_LockTableEntry_t *lockTable) |
667 | { |
668 | int i, j, foundone = 0, did; |
669 | RF_StripeLockDesc_t *p; |
670 | RF_LockReqDesc_t *q; |
671 | |
672 | rf_lock_mutex2(rf_printf_mutex); |
673 | printf("Locked stripes:\n" ); |
674 | for (i = 0; i < rf_lockTableSize; i++) |
675 | if (lockTable[i].descList) { |
676 | foundone = 1; |
677 | for (p = lockTable[i].descList; p; p = p->next) { |
678 | printf("Stripe ID 0x%lx (%d) nWriters %d\n" , |
679 | (long) p->stripeID, (int) p->stripeID, |
680 | p->nWriters); |
681 | |
682 | if (!(p->granted)) |
683 | printf("Granted: (none)\n" ); |
684 | else |
685 | printf("Granted:\n" ); |
686 | for (did = 1, j = 0, q = p->granted; q; |
687 | j++, q = q->next) { |
688 | printf(" %c(%ld-%ld" , q->type, (long) q->start, (long) q->stop); |
689 | if (q->start2 != -1) |
690 | printf(",%ld-%ld) " , (long) q->start2, |
691 | (long) q->stop2); |
692 | else |
693 | printf(") " ); |
694 | if (j && !(j % 4)) { |
695 | printf("\n" ); |
696 | did = 1; |
697 | } else |
698 | did = 0; |
699 | } |
700 | if (!did) |
701 | printf("\n" ); |
702 | |
703 | if (!(p->waitersH)) |
704 | printf("Waiting: (none)\n" ); |
705 | else |
706 | printf("Waiting:\n" ); |
707 | for (did = 1, j = 0, q = p->waitersH; q; |
708 | j++, q = q->next) { |
709 | printf("%c(%ld-%ld" , q->type, (long) q->start, (long) q->stop); |
710 | if (q->start2 != -1) |
711 | printf(",%ld-%ld) " , (long) q->start2, (long) q->stop2); |
712 | else |
713 | printf(") " ); |
714 | if (j && !(j % 4)) { |
715 | printf("\n " ); |
716 | did = 1; |
717 | } else |
718 | did = 0; |
719 | } |
720 | if (!did) |
721 | printf("\n" ); |
722 | } |
723 | } |
724 | if (!foundone) |
725 | printf("(none)\n" ); |
726 | else |
727 | printf("\n" ); |
728 | rf_unlock_mutex2(rf_printf_mutex); |
729 | } |
730 | #endif |
731 | |