1 | /* $NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $ */ |
2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. |
5 | * |
6 | * Author: Mark Holland, Daniel Stodolsky |
7 | * |
8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. |
13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * |
18 | * Carnegie Mellon requests users of this software to return to |
19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science |
22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 |
24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. |
27 | */ |
28 | |
29 | /************************************************************ |
30 | * |
31 | * nwayxor.c -- code to do N-way xors for reconstruction |
32 | * |
33 | * nWayXorN xors N input buffers into the destination buffer. |
34 | * adapted from danner's longword_bxor code. |
35 | * |
36 | ************************************************************/ |
37 | |
38 | #include <sys/cdefs.h> |
39 | __KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $" ); |
40 | |
41 | #include "rf_nwayxor.h" |
42 | #include "rf_shutdown.h" |
43 | |
44 | static int callcount[10]; |
45 | static void rf_ShutdownNWayXor(void *); |
46 | |
47 | static void |
48 | rf_ShutdownNWayXor(void *ignored) |
49 | { |
50 | int i; |
51 | |
52 | if (rf_showXorCallCounts == 0) |
53 | return; |
54 | printf("Call counts for n-way xor routines: " ); |
55 | for (i = 0; i < 10; i++) |
56 | printf("%d " , callcount[i]); |
57 | printf("\n" ); |
58 | } |
59 | |
60 | int |
61 | rf_ConfigureNWayXor(RF_ShutdownList_t **listp) |
62 | { |
63 | int i; |
64 | |
65 | for (i = 0; i < 10; i++) |
66 | callcount[i] = 0; |
67 | rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); |
68 | return (0); |
69 | } |
70 | |
71 | void |
72 | rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
73 | { |
74 | unsigned long *src = (unsigned long *) src_rbs[0]->buffer; |
75 | unsigned long *dest = (unsigned long *) dest_rb->buffer; |
76 | unsigned long *end = src + len; |
77 | unsigned long d0, d1, d2, d3, s0, s1, s2, s3; |
78 | |
79 | callcount[1]++; |
80 | while (len >= 4) { |
81 | d0 = dest[0]; |
82 | d1 = dest[1]; |
83 | d2 = dest[2]; |
84 | d3 = dest[3]; |
85 | s0 = src[0]; |
86 | s1 = src[1]; |
87 | s2 = src[2]; |
88 | s3 = src[3]; |
89 | dest[0] = d0 ^ s0; |
90 | dest[1] = d1 ^ s1; |
91 | dest[2] = d2 ^ s2; |
92 | dest[3] = d3 ^ s3; |
93 | src += 4; |
94 | dest += 4; |
95 | len -= 4; |
96 | } |
97 | while (src < end) { |
98 | *dest++ ^= *src++; |
99 | } |
100 | } |
101 | |
102 | void |
103 | rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
104 | { |
105 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
106 | unsigned long *a = dst; |
107 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
108 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
109 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
110 | |
111 | callcount[2]++; |
112 | /* align dest to cache line */ |
113 | while ((((unsigned long) dst) & 0x1f)) { |
114 | *dst++ = *a++ ^ *b++ ^ *c++; |
115 | len--; |
116 | } |
117 | while (len > 4) { |
118 | a0 = a[0]; |
119 | len -= 4; |
120 | |
121 | a1 = a[1]; |
122 | a2 = a[2]; |
123 | |
124 | a3 = a[3]; |
125 | a += 4; |
126 | |
127 | b0 = b[0]; |
128 | b1 = b[1]; |
129 | |
130 | b2 = b[2]; |
131 | b3 = b[3]; |
132 | /* start dual issue */ |
133 | a0 ^= b0; |
134 | b0 = c[0]; |
135 | |
136 | b += 4; |
137 | a1 ^= b1; |
138 | |
139 | a2 ^= b2; |
140 | a3 ^= b3; |
141 | |
142 | b1 = c[1]; |
143 | a0 ^= b0; |
144 | |
145 | b2 = c[2]; |
146 | a1 ^= b1; |
147 | |
148 | b3 = c[3]; |
149 | a2 ^= b2; |
150 | |
151 | dst[0] = a0; |
152 | a3 ^= b3; |
153 | dst[1] = a1; |
154 | c += 4; |
155 | dst[2] = a2; |
156 | dst[3] = a3; |
157 | dst += 4; |
158 | } |
159 | while (len) { |
160 | *dst++ = *a++ ^ *b++ ^ *c++; |
161 | len--; |
162 | } |
163 | } |
164 | /* note that first arg is not incremented but 2nd arg is */ |
165 | #define LOAD_FIRST(_dst,_b) \ |
166 | a0 = _dst[0]; len -= 4; \ |
167 | a1 = _dst[1]; \ |
168 | a2 = _dst[2]; \ |
169 | a3 = _dst[3]; \ |
170 | b0 = _b[0]; \ |
171 | b1 = _b[1]; \ |
172 | b2 = _b[2]; \ |
173 | b3 = _b[3]; _b += 4; |
174 | |
175 | /* note: arg is incremented */ |
176 | #define XOR_AND_LOAD_NEXT(_n) \ |
177 | a0 ^= b0; b0 = _n[0]; \ |
178 | a1 ^= b1; b1 = _n[1]; \ |
179 | a2 ^= b2; b2 = _n[2]; \ |
180 | a3 ^= b3; b3 = _n[3]; \ |
181 | _n += 4; |
182 | |
183 | /* arg is incremented */ |
184 | #define XOR_AND_STORE(_dst) \ |
185 | a0 ^= b0; _dst[0] = a0; \ |
186 | a1 ^= b1; _dst[1] = a1; \ |
187 | a2 ^= b2; _dst[2] = a2; \ |
188 | a3 ^= b3; _dst[3] = a3; \ |
189 | _dst += 4; |
190 | |
191 | |
192 | void |
193 | rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
194 | { |
195 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
196 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
197 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
198 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
199 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
200 | |
201 | callcount[3]++; |
202 | /* align dest to cache line */ |
203 | while ((((unsigned long) dst) & 0x1f)) { |
204 | *dst++ ^= *b++ ^ *c++ ^ *d++; |
205 | len--; |
206 | } |
207 | while (len > 4) { |
208 | LOAD_FIRST(dst, b); |
209 | XOR_AND_LOAD_NEXT(c); |
210 | XOR_AND_LOAD_NEXT(d); |
211 | XOR_AND_STORE(dst); |
212 | } |
213 | while (len) { |
214 | *dst++ ^= *b++ ^ *c++ ^ *d++; |
215 | len--; |
216 | } |
217 | } |
218 | |
219 | void |
220 | rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
221 | { |
222 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
223 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
224 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
225 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
226 | unsigned long *e = (unsigned long *) src_rbs[3]->buffer; |
227 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
228 | |
229 | callcount[4]++; |
230 | /* align dest to cache line */ |
231 | while ((((unsigned long) dst) & 0x1f)) { |
232 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; |
233 | len--; |
234 | } |
235 | while (len > 4) { |
236 | LOAD_FIRST(dst, b); |
237 | XOR_AND_LOAD_NEXT(c); |
238 | XOR_AND_LOAD_NEXT(d); |
239 | XOR_AND_LOAD_NEXT(e); |
240 | XOR_AND_STORE(dst); |
241 | } |
242 | while (len) { |
243 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; |
244 | len--; |
245 | } |
246 | } |
247 | |
248 | void |
249 | rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
250 | { |
251 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
252 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
253 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
254 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
255 | unsigned long *e = (unsigned long *) src_rbs[3]->buffer; |
256 | unsigned long *f = (unsigned long *) src_rbs[4]->buffer; |
257 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
258 | |
259 | callcount[5]++; |
260 | /* align dest to cache line */ |
261 | while ((((unsigned long) dst) & 0x1f)) { |
262 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; |
263 | len--; |
264 | } |
265 | while (len > 4) { |
266 | LOAD_FIRST(dst, b); |
267 | XOR_AND_LOAD_NEXT(c); |
268 | XOR_AND_LOAD_NEXT(d); |
269 | XOR_AND_LOAD_NEXT(e); |
270 | XOR_AND_LOAD_NEXT(f); |
271 | XOR_AND_STORE(dst); |
272 | } |
273 | while (len) { |
274 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; |
275 | len--; |
276 | } |
277 | } |
278 | |
279 | void |
280 | rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
281 | { |
282 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
283 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
284 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
285 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
286 | unsigned long *e = (unsigned long *) src_rbs[3]->buffer; |
287 | unsigned long *f = (unsigned long *) src_rbs[4]->buffer; |
288 | unsigned long *g = (unsigned long *) src_rbs[5]->buffer; |
289 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
290 | |
291 | callcount[6]++; |
292 | /* align dest to cache line */ |
293 | while ((((unsigned long) dst) & 0x1f)) { |
294 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; |
295 | len--; |
296 | } |
297 | while (len > 4) { |
298 | LOAD_FIRST(dst, b); |
299 | XOR_AND_LOAD_NEXT(c); |
300 | XOR_AND_LOAD_NEXT(d); |
301 | XOR_AND_LOAD_NEXT(e); |
302 | XOR_AND_LOAD_NEXT(f); |
303 | XOR_AND_LOAD_NEXT(g); |
304 | XOR_AND_STORE(dst); |
305 | } |
306 | while (len) { |
307 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; |
308 | len--; |
309 | } |
310 | } |
311 | |
312 | void |
313 | rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
314 | { |
315 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
316 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
317 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
318 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
319 | unsigned long *e = (unsigned long *) src_rbs[3]->buffer; |
320 | unsigned long *f = (unsigned long *) src_rbs[4]->buffer; |
321 | unsigned long *g = (unsigned long *) src_rbs[5]->buffer; |
322 | unsigned long *h = (unsigned long *) src_rbs[6]->buffer; |
323 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
324 | |
325 | callcount[7]++; |
326 | /* align dest to cache line */ |
327 | while ((((unsigned long) dst) & 0x1f)) { |
328 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; |
329 | len--; |
330 | } |
331 | while (len > 4) { |
332 | LOAD_FIRST(dst, b); |
333 | XOR_AND_LOAD_NEXT(c); |
334 | XOR_AND_LOAD_NEXT(d); |
335 | XOR_AND_LOAD_NEXT(e); |
336 | XOR_AND_LOAD_NEXT(f); |
337 | XOR_AND_LOAD_NEXT(g); |
338 | XOR_AND_LOAD_NEXT(h); |
339 | XOR_AND_STORE(dst); |
340 | } |
341 | while (len) { |
342 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; |
343 | len--; |
344 | } |
345 | } |
346 | |
347 | void |
348 | rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
349 | { |
350 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
351 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
352 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
353 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
354 | unsigned long *e = (unsigned long *) src_rbs[3]->buffer; |
355 | unsigned long *f = (unsigned long *) src_rbs[4]->buffer; |
356 | unsigned long *g = (unsigned long *) src_rbs[5]->buffer; |
357 | unsigned long *h = (unsigned long *) src_rbs[6]->buffer; |
358 | unsigned long *i = (unsigned long *) src_rbs[7]->buffer; |
359 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
360 | |
361 | callcount[8]++; |
362 | /* align dest to cache line */ |
363 | while ((((unsigned long) dst) & 0x1f)) { |
364 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; |
365 | len--; |
366 | } |
367 | while (len > 4) { |
368 | LOAD_FIRST(dst, b); |
369 | XOR_AND_LOAD_NEXT(c); |
370 | XOR_AND_LOAD_NEXT(d); |
371 | XOR_AND_LOAD_NEXT(e); |
372 | XOR_AND_LOAD_NEXT(f); |
373 | XOR_AND_LOAD_NEXT(g); |
374 | XOR_AND_LOAD_NEXT(h); |
375 | XOR_AND_LOAD_NEXT(i); |
376 | XOR_AND_STORE(dst); |
377 | } |
378 | while (len) { |
379 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; |
380 | len--; |
381 | } |
382 | } |
383 | |
384 | |
385 | void |
386 | rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) |
387 | { |
388 | unsigned long *dst = (unsigned long *) dest_rb->buffer; |
389 | unsigned long *b = (unsigned long *) src_rbs[0]->buffer; |
390 | unsigned long *c = (unsigned long *) src_rbs[1]->buffer; |
391 | unsigned long *d = (unsigned long *) src_rbs[2]->buffer; |
392 | unsigned long *e = (unsigned long *) src_rbs[3]->buffer; |
393 | unsigned long *f = (unsigned long *) src_rbs[4]->buffer; |
394 | unsigned long *g = (unsigned long *) src_rbs[5]->buffer; |
395 | unsigned long *h = (unsigned long *) src_rbs[6]->buffer; |
396 | unsigned long *i = (unsigned long *) src_rbs[7]->buffer; |
397 | unsigned long *j = (unsigned long *) src_rbs[8]->buffer; |
398 | unsigned long a0, a1, a2, a3, b0, b1, b2, b3; |
399 | |
400 | callcount[9]++; |
401 | /* align dest to cache line */ |
402 | while ((((unsigned long) dst) & 0x1f)) { |
403 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; |
404 | len--; |
405 | } |
406 | while (len > 4) { |
407 | LOAD_FIRST(dst, b); |
408 | XOR_AND_LOAD_NEXT(c); |
409 | XOR_AND_LOAD_NEXT(d); |
410 | XOR_AND_LOAD_NEXT(e); |
411 | XOR_AND_LOAD_NEXT(f); |
412 | XOR_AND_LOAD_NEXT(g); |
413 | XOR_AND_LOAD_NEXT(h); |
414 | XOR_AND_LOAD_NEXT(i); |
415 | XOR_AND_LOAD_NEXT(j); |
416 | XOR_AND_STORE(dst); |
417 | } |
418 | while (len) { |
419 | *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; |
420 | len--; |
421 | } |
422 | } |
423 | |