1 | /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== |
---|---|
2 | * |
3 | * The LLVM Compiler Infrastructure |
4 | * |
5 | * This file is dual licensed under the MIT and the University of Illinois Open |
6 | * Source Licenses. See LICENSE.TXT for details. |
7 | * |
8 | * ===----------------------------------------------------------------------=== |
9 | * |
10 | * This file implements __udivmoddi4 for the compiler_rt library. |
11 | * |
12 | * ===----------------------------------------------------------------------=== |
13 | */ |
14 | |
15 | #include "int_lib.h" |
16 | |
17 | /* Effects: if rem != 0, *rem = a % b |
18 | * Returns: a / b |
19 | */ |
20 | |
21 | /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ |
22 | |
23 | COMPILER_RT_ABI du_int |
24 | __udivmoddi4(du_int a, du_int b, du_int* rem) |
25 | { |
26 | const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; |
27 | const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; |
28 | udwords n; |
29 | n.all = a; |
30 | udwords d; |
31 | d.all = b; |
32 | udwords q; |
33 | udwords r; |
34 | unsigned sr; |
35 | /* special cases, X is unknown, K != 0 */ |
36 | if (n.s.high == 0) |
37 | { |
38 | if (d.s.high == 0) |
39 | { |
40 | /* 0 X |
41 | * --- |
42 | * 0 X |
43 | */ |
44 | if (rem) |
45 | *rem = n.s.low % d.s.low; |
46 | return n.s.low / d.s.low; |
47 | } |
48 | /* 0 X |
49 | * --- |
50 | * K X |
51 | */ |
52 | if (rem) |
53 | *rem = n.s.low; |
54 | return 0; |
55 | } |
56 | /* n.s.high != 0 */ |
57 | if (d.s.low == 0) |
58 | { |
59 | if (d.s.high == 0) |
60 | { |
61 | /* K X |
62 | * --- |
63 | * 0 0 |
64 | */ |
65 | if (rem) |
66 | *rem = n.s.high % d.s.low; |
67 | return n.s.high / d.s.low; |
68 | } |
69 | /* d.s.high != 0 */ |
70 | if (n.s.low == 0) |
71 | { |
72 | /* K 0 |
73 | * --- |
74 | * K 0 |
75 | */ |
76 | if (rem) |
77 | { |
78 | r.s.high = n.s.high % d.s.high; |
79 | r.s.low = 0; |
80 | *rem = r.all; |
81 | } |
82 | return n.s.high / d.s.high; |
83 | } |
84 | /* K K |
85 | * --- |
86 | * K 0 |
87 | */ |
88 | if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ |
89 | { |
90 | if (rem) |
91 | { |
92 | r.s.low = n.s.low; |
93 | r.s.high = n.s.high & (d.s.high - 1); |
94 | *rem = r.all; |
95 | } |
96 | return n.s.high >> __builtin_ctz(d.s.high); |
97 | } |
98 | /* K K |
99 | * --- |
100 | * K 0 |
101 | */ |
102 | sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); |
103 | /* 0 <= sr <= n_uword_bits - 2 or sr large */ |
104 | if (sr > n_uword_bits - 2) |
105 | { |
106 | if (rem) |
107 | *rem = n.all; |
108 | return 0; |
109 | } |
110 | ++sr; |
111 | /* 1 <= sr <= n_uword_bits - 1 */ |
112 | /* q.all = n.all << (n_udword_bits - sr); */ |
113 | q.s.low = 0; |
114 | q.s.high = n.s.low << (n_uword_bits - sr); |
115 | /* r.all = n.all >> sr; */ |
116 | r.s.high = n.s.high >> sr; |
117 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
118 | } |
119 | else /* d.s.low != 0 */ |
120 | { |
121 | if (d.s.high == 0) |
122 | { |
123 | /* K X |
124 | * --- |
125 | * 0 K |
126 | */ |
127 | if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ |
128 | { |
129 | if (rem) |
130 | *rem = n.s.low & (d.s.low - 1); |
131 | if (d.s.low == 1) |
132 | return n.all; |
133 | sr = __builtin_ctz(d.s.low); |
134 | q.s.high = n.s.high >> sr; |
135 | q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
136 | return q.all; |
137 | } |
138 | /* K X |
139 | * --- |
140 | * 0 K |
141 | */ |
142 | sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); |
143 | /* 2 <= sr <= n_udword_bits - 1 |
144 | * q.all = n.all << (n_udword_bits - sr); |
145 | * r.all = n.all >> sr; |
146 | */ |
147 | if (sr == n_uword_bits) |
148 | { |
149 | q.s.low = 0; |
150 | q.s.high = n.s.low; |
151 | r.s.high = 0; |
152 | r.s.low = n.s.high; |
153 | } |
154 | else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 |
155 | { |
156 | q.s.low = 0; |
157 | q.s.high = n.s.low << (n_uword_bits - sr); |
158 | r.s.high = n.s.high >> sr; |
159 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
160 | } |
161 | else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 |
162 | { |
163 | q.s.low = n.s.low << (n_udword_bits - sr); |
164 | q.s.high = (n.s.high << (n_udword_bits - sr)) | |
165 | (n.s.low >> (sr - n_uword_bits)); |
166 | r.s.high = 0; |
167 | r.s.low = n.s.high >> (sr - n_uword_bits); |
168 | } |
169 | } |
170 | else |
171 | { |
172 | /* K X |
173 | * --- |
174 | * K K |
175 | */ |
176 | sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); |
177 | /* 0 <= sr <= n_uword_bits - 1 or sr large */ |
178 | if (sr > n_uword_bits - 1) |
179 | { |
180 | if (rem) |
181 | *rem = n.all; |
182 | return 0; |
183 | } |
184 | ++sr; |
185 | /* 1 <= sr <= n_uword_bits */ |
186 | /* q.all = n.all << (n_udword_bits - sr); */ |
187 | q.s.low = 0; |
188 | if (sr == n_uword_bits) |
189 | { |
190 | q.s.high = n.s.low; |
191 | r.s.high = 0; |
192 | r.s.low = n.s.high; |
193 | } |
194 | else |
195 | { |
196 | q.s.high = n.s.low << (n_uword_bits - sr); |
197 | r.s.high = n.s.high >> sr; |
198 | r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); |
199 | } |
200 | } |
201 | } |
202 | /* Not a special case |
203 | * q and r are initialized with: |
204 | * q.all = n.all << (n_udword_bits - sr); |
205 | * r.all = n.all >> sr; |
206 | * 1 <= sr <= n_udword_bits - 1 |
207 | */ |
208 | su_int carry = 0; |
209 | for (; sr > 0; --sr) |
210 | { |
211 | /* r:q = ((r:q) << 1) | carry */ |
212 | r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); |
213 | r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); |
214 | q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); |
215 | q.s.low = (q.s.low << 1) | carry; |
216 | /* carry = 0; |
217 | * if (r.all >= d.all) |
218 | * { |
219 | * r.all -= d.all; |
220 | * carry = 1; |
221 | * } |
222 | */ |
223 | const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); |
224 | carry = s & 1; |
225 | r.all -= d.all & s; |
226 | } |
227 | q.all = (q.all << 1) | carry; |
228 | if (rem) |
229 | *rem = r.all; |
230 | return q.all; |
231 | } |
232 |