raw
ffa_ch5_egypt.kv        1 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 2 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. --
ffa_ch5_egypt.kv 4 -- --
ffa_ch5_egypt.kv 5 -- (C) 2017 Stanislav Datskovskiy ( www.loper-os.org ) --
ffa_ch5_egypt.kv 6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html --
ffa_ch5_egypt.kv 7 -- --
ffa_ch5_egypt.kv 8 -- You do not have, nor can you ever acquire the right to use, copy or --
ffa_ch5_egypt.kv 9 -- distribute this software ; Should you use this software for any purpose, --
ffa_ch5_egypt.kv 10 -- or copy and distribute it to anyone or in any manner, you are breaking --
ffa_ch5_egypt.kv 11 -- the laws of whatever soi-disant jurisdiction, and you promise to --
ffa_ch5_egypt.kv 12 -- continue doing so for the indefinite future. In any case, please --
ffa_ch5_egypt.kv 13 -- always : read and understand any software ; verify any PGP signatures --
ffa_ch5_egypt.kv 14 -- that you use - for any purpose. --
ffa_ch5_egypt.kv 15 -- --
ffa_ch5_egypt.kv 16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . --
ffa_ch5_egypt.kv 17 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 18 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 19
ffa_ch5_egypt.kv 20 with Words; use Words;
ffa_ch9_exodus.kv 21 with Word_Ops; use Word_Ops;
ffa_ch9_exodus.kv 22 with W_Mul; use W_Mul;
ffa_ch10_karatsub... 23 with FZ_Arith; use FZ_Arith;
ffa_ch5_egypt.kv 24
ffa_ch5_egypt.kv 25
ffa_ch5_egypt.kv 26 package body FZ_Mul is
ffa_ch10_karatsub... 27
ch11_unrolled_asm... 28 -- Comba's multiplier fastpath. (CAUTION: UNBUFFERED)
ch11_unrolled_asm... 29 procedure FZ_Mul_Comba_Fast(X : in FZ;
ch11_unrolled_asm... 30 Y : in FZ;
ch11_unrolled_asm... 31 XY : out FZ)
ch11_unrolled_asm... 32 is
ch11_unrolled_asm... 33 procedure Asm_Comba(X : in FZ;
ch11_unrolled_asm... 34 Y : in FZ;
ch11_unrolled_asm... 35 XY : out FZ;
ch11_unrolled_asm... 36 L : in Word_Index);
ch11_unrolled_asm... 37 pragma Import (C, Asm_Comba, "x86_64_comba_unrolled");
ch11_unrolled_asm... 38 begin
ch11_unrolled_asm... 39 pragma Assert(X'Length = Karatsuba_Thresh and
ch11_unrolled_asm... 40 Y'Length = Karatsuba_Thresh and
ch11_unrolled_asm... 41 XY'Length = 2*Karatsuba_Thresh);
ch11_unrolled_asm... 42 Asm_Comba(X, Y, XY, X'Length);
ch11_unrolled_asm... 43 end FZ_Mul_Comba_Fast;
ch11_unrolled_asm... 44
ch11_unrolled_asm... 45
ffa_ch10_karatsub... 46 -- Comba's multiplier. (CAUTION: UNBUFFERED)
ffa_ch9_exodus.kv 47 procedure FZ_Mul_Comba(X : in FZ;
ffa_ch9_exodus.kv 48 Y : in FZ;
ffa_ch10_karatsub... 49 XY : out FZ) is
ffa_ch5_egypt.kv 50
ffa_ch9_exodus.kv 51 -- Words in each multiplicand
ffa_ch9_exodus.kv 52 L : constant Word_Index := X'Length;
ffa_ch7_turbo_egy... 53
ffa_ch9_exodus.kv 54 -- Length of Product, i.e. double the length of either multiplicand
ffa_ch9_exodus.kv 55 LP : constant Word_Index := 2 * L;
ffa_ch5_egypt.kv 56
ffa_ch9_exodus.kv 57 -- 3-word Accumulator
ffa_ch9_exodus.kv 58 A2, A1, A0 : Word := 0;
ffa_ch9_exodus.kv 59
ffa_ch9_exodus.kv 60 -- Type for referring to a column of XY
ffa_ch10_karatsub... 61 subtype ColXY is Word_Index range 0 .. LP - 1;
ffa_ch9_exodus.kv 62
ffa_ch9_exodus.kv 63 -- Compute the Nth (indexed from zero) column of the Product
ffa_ch9_exodus.kv 64 procedure Col(N : in ColXY; U : in ColXY; V : in ColXY) is
ffa_ch9_exodus.kv 65
ffa_ch9_exodus.kv 66 -- The outputs of a Word multiplication
ffa_ch9_exodus.kv 67 Lo, Hi : Word;
ffa_ch9_exodus.kv 68
ffa_ch9_exodus.kv 69 -- Carry for the Accumulator addition
ffa_ch9_exodus.kv 70 C : WBool;
ffa_ch9_exodus.kv 71
ffa_ch9_exodus.kv 72 -- Sum for Accumulator addition
ffa_ch9_exodus.kv 73 Sum : Word;
ffa_ch9_exodus.kv 74
ffa_ch9_exodus.kv 75 begin
ffa_ch9_exodus.kv 76
ffa_ch9_exodus.kv 77 -- For lower half of XY, will go from 0 to N
ffa_ch9_exodus.kv 78 -- For upper half of XY, will go from N - L + 1 to L - 1
ffa_ch9_exodus.kv 79 for j in U .. V loop
ffa_ch9_exodus.kv 80
ffa_ch9_exodus.kv 81 -- Hi:Lo := j-th Word of X * (N - j)-th Word of Y
ffa_ch9_exodus.kv 82 Mul_Word(X(X'First + j),
ffa_ch9_exodus.kv 83 Y(Y'First - j + N),
ffa_ch9_exodus.kv 84 Lo, Hi);
ffa_ch9_exodus.kv 85
ffa_ch9_exodus.kv 86 -- Now add Hi:Lo into the Accumulator:
ffa_ch9_exodus.kv 87
ffa_ch9_exodus.kv 88 -- A0 += Lo; C := Carry
ffa_ch9_exodus.kv 89 Sum := A0 + Lo;
ffa_ch9_exodus.kv 90 C := W_Carry(A0, Lo, Sum);
ffa_ch9_exodus.kv 91 A0 := Sum;
ffa_ch9_exodus.kv 92
ffa_ch9_exodus.kv 93 -- A1 += Hi + C; C := Carry
ffa_ch9_exodus.kv 94 Sum := A1 + Hi + C;
ffa_ch9_exodus.kv 95 C := W_Carry(A1, Hi, Sum);
ffa_ch9_exodus.kv 96 A1 := Sum;
ffa_ch9_exodus.kv 97
ffa_ch9_exodus.kv 98 -- A2 += A2 + C
ffa_ch9_exodus.kv 99 A2 := A2 + C;
ffa_ch9_exodus.kv 100
ffa_ch9_exodus.kv 101 end loop;
ffa_ch9_exodus.kv 102
ffa_ch9_exodus.kv 103 -- We now have the Nth (indexed from zero) word of XY
ffa_ch10_karatsub... 104 XY(XY'First + N) := A0;
ffa_ch9_exodus.kv 105
ffa_ch9_exodus.kv 106 -- Right-Shift the Accumulator by one Word width:
ffa_ch9_exodus.kv 107 A0 := A1;
ffa_ch9_exodus.kv 108 A1 := A2;
ffa_ch9_exodus.kv 109 A2 := 0;
ffa_ch9_exodus.kv 110
ffa_ch9_exodus.kv 111 end Col;
ffa_ch9_exodus.kv 112 pragma Inline_Always(Col);
ffa_ch5_egypt.kv 113
ffa_ch5_egypt.kv 114 begin
ffa_ch5_egypt.kv 115
ffa_ch9_exodus.kv 116 -- Compute the lower half of the Product:
ffa_ch9_exodus.kv 117 for i in 0 .. L - 1 loop
ffa_ch9_exodus.kv 118
ffa_ch9_exodus.kv 119 Col(i, 0, i);
ffa_ch9_exodus.kv 120
ffa_ch9_exodus.kv 121 end loop;
ffa_ch9_exodus.kv 122
ffa_ch9_exodus.kv 123 -- Compute the upper half (sans last Word) of the Product:
ffa_ch9_exodus.kv 124 for i in L .. LP - 2 loop
ffa_ch9_exodus.kv 125
ffa_ch9_exodus.kv 126 Col(i, i - L + 1, L - 1);
ffa_ch5_egypt.kv 127
ffa_ch5_egypt.kv 128 end loop;
ffa_ch5_egypt.kv 129
ffa_ch9_exodus.kv 130 -- The very last Word of the Product:
ffa_ch9_exodus.kv 131 XY(XY'Last) := A0;
ffa_ch5_egypt.kv 132
ffa_ch9_exodus.kv 133 end FZ_Mul_Comba;
ffa_ch9_exodus.kv 134
ffa_ch10_karatsub... 135
ffa_ch10_karatsub... 136 -- Karatsuba's Multiplier. (CAUTION: UNBUFFERED)
ffa_ch10_karatsub... 137 procedure Mul_Karatsuba(X : in FZ;
ffa_ch10_karatsub... 138 Y : in FZ;
ffa_ch10_karatsub... 139 XY : out FZ) is
ffa_ch10_karatsub... 140
ffa_ch10_karatsub... 141 -- L is the wordness of a multiplicand. Guaranteed to be a power of two.
ffa_ch10_karatsub... 142 L : constant Word_Count := X'Length;
ffa_ch10_karatsub... 143
ffa_ch10_karatsub... 144 -- An 'LSeg' is the same length as either multiplicand.
ffa_ch10_karatsub... 145 subtype LSeg is FZ(1 .. L);
ffa_ch10_karatsub... 146
ffa_ch10_karatsub... 147 -- K is HALF of the length of a multiplicand.
ffa_ch10_karatsub... 148 K : constant Word_Index := L / 2;
ffa_ch10_karatsub... 149
ffa_ch10_karatsub... 150 -- A 'KSeg' is the same length as HALF of a multiplicand.
ffa_ch10_karatsub... 151 subtype KSeg is FZ(1 .. K);
ffa_ch10_karatsub... 152
ffa_ch10_karatsub... 153 -- The three L-sized variables of the product equation, i.e.:
ffa_ch10_karatsub... 154 -- XY = LL + 2^(K*Bitness)(LL + HH + (-1^DD_Sub)*DD) + 2^(2*K*Bitness)HH
ffa_ch10_karatsub... 155 LL, DD, HH : LSeg;
ffa_ch10_karatsub... 156
ffa_ch10_karatsub... 157 -- K-sized terms of Dx * Dy = DD
ffa_ch10_karatsub... 158 Dx, Dy : KSeg; -- Dx = abs(XLo - XHi) , Dy = abs(YLo - YHi)
ffa_ch10_karatsub... 159
ffa_ch10_karatsub... 160 -- Subtraction borrows, signs of (XL - XH) and (YL - YH),
ffa_ch10_karatsub... 161 Cx, Cy : WBool; -- so that we can calculate (-1^DD_Sub)
ffa_ch10_karatsub... 162
ffa_ch10_karatsub... 163 -- Bottom and Top K-sized halves of the multiplicand X.
ffa_ch10_karatsub... 164 XLo : KSeg renames X( X'First .. X'Last - K );
ffa_ch10_karatsub... 165 XHi : KSeg renames X( X'First + K .. X'Last );
ffa_ch10_karatsub... 166
ffa_ch10_karatsub... 167 -- Bottom and Top K-sized halves of the multiplicand Y.
ffa_ch10_karatsub... 168 YLo : KSeg renames Y( Y'First .. Y'Last - K );
ffa_ch10_karatsub... 169 YHi : KSeg renames Y( Y'First + K .. Y'Last );
ffa_ch10_karatsub... 170
ffa_ch10_karatsub... 171 -- L-sized middle segment of the product XY (+/- K from the midpoint).
ffa_ch10_karatsub... 172 XYMid : LSeg renames XY( XY'First + K .. XY'Last - K );
ffa_ch10_karatsub... 173
ffa_ch10_karatsub... 174 -- Bottom and Top L-sized halves of the product XY.
ffa_ch10_karatsub... 175 XYLo : LSeg renames XY( XY'First .. XY'Last - L );
ffa_ch10_karatsub... 176 XYHi : LSeg renames XY( XY'First + L .. XY'Last );
ffa_ch10_karatsub... 177
ffa_ch10_karatsub... 178 -- Topmost K-sized quarter segment of the product XY, or 'tail'
ffa_ch10_karatsub... 179 XYHiHi : KSeg renames XYHi( XYHi'First + K .. XYHi'Last );
ffa_ch10_karatsub... 180
ffa_ch10_karatsub... 181 -- Whether the DD term is being subtracted.
ffa_ch10_karatsub... 182 DD_Sub : WBool;
ffa_ch10_karatsub... 183
ffa_ch10_karatsub... 184 -- Carry from individual term additions.
ffa_ch10_karatsub... 185 C : WBool;
ffa_ch10_karatsub... 186
ffa_ch10_karatsub... 187 -- Tail-Carry accumulator, for the final ripple
ffa_ch10_karatsub... 188 TC : Word;
ffa_ch10_karatsub... 189
ffa_ch10_karatsub... 190 begin
ffa_ch10_karatsub... 191
ffa_ch10_karatsub... 192 -- Recurse: LL := XL * YL
ffa_ch11_tuning_a... 193 FZ_Multiply_Unbuffered(XLo, YLo, LL);
ffa_ch10_karatsub... 194
ffa_ch10_karatsub... 195 -- Recurse: HH := XH * YH
ffa_ch11_tuning_a... 196 FZ_Multiply_Unbuffered(XHi, YHi, HH);
ffa_ch10_karatsub... 197
ffa_ch10_karatsub... 198 -- Dx := |XL - XH| , Cx := Borrow (i.e. 1 iff XL < XH)
ffa_ch10_karatsub... 199 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
ffa_ch10_karatsub... 200
ffa_ch10_karatsub... 201 -- Dy := |YL - YH| , Cy := Borrow (i.e. 1 iff YL < YH)
ffa_ch10_karatsub... 202 FZ_Sub_Abs(X => YLo, Y => YHi, Difference => Dy, Underflow => Cy);
ffa_ch10_karatsub... 203
ffa_ch10_karatsub... 204 -- Recurse: DD := Dx * Dy
ffa_ch11_tuning_a... 205 FZ_Multiply_Unbuffered(Dx, Dy, DD);
ffa_ch10_karatsub... 206
ffa_ch10_karatsub... 207 -- Whether (XL - XH)(YL - YH) is positive, and so DD must be subtracted:
ffa_ch10_karatsub... 208 DD_Sub := 1 - (Cx xor Cy);
ffa_ch10_karatsub... 209
ffa_ch10_karatsub... 210 -- XY := LL + 2^(2 * K * Bitness) * HH
ffa_ch10_karatsub... 211 XYLo := LL;
ffa_ch10_karatsub... 212 XYHi := HH;
ffa_ch10_karatsub... 213
ffa_ch10_karatsub... 214 -- XY += 2^(K * Bitness) * HH, but carry goes in Tail Carry accum.
ffa_ch10_karatsub... 215 FZ_Add_D(X => XYMid, Y => HH, Overflow => TC);
ffa_ch10_karatsub... 216
ffa_ch10_karatsub... 217 -- XY += 2^(K * Bitness) * LL, ...
ffa_ch10_karatsub... 218 FZ_Add_D(X => XYMid, Y => LL, Overflow => C);
ffa_ch10_karatsub... 219
ffa_ch10_karatsub... 220 -- ... but the carry goes into the Tail Carry accumulator.
ffa_ch10_karatsub... 221 TC := TC + C;
ffa_ch10_karatsub... 222
ffa_ch10_karatsub... 223 -- XY += 2^(K * Bitness) * (-1^DD_Sub) * DD
ffa_ch10_karatsub... 224 FZ_Not_Cond_D(N => DD, Cond => DD_Sub); -- invert DD if 2s-complementing
ffa_ch10_karatsub... 225 FZ_Add_D(OF_In => DD_Sub, -- ... and then must increment
ffa_ch10_karatsub... 226 X => XYMid,
ffa_ch10_karatsub... 227 Y => DD,
ffa_ch10_karatsub... 228 Overflow => C); -- carry will go in Tail Carry accumulator
ffa_ch10_karatsub... 229
ffa_ch10_karatsub... 230 -- Compute the final Tail Carry for the ripple
ffa_ch10_karatsub... 231 TC := TC + C - DD_Sub;
ffa_ch10_karatsub... 232
ffa_ch10_karatsub... 233 -- Barring a cosmic ray, 0 <= TC <= 2 .
ffa_ch10_karatsub... 234 pragma Assert(TC <= 2);
ffa_ch10_karatsub... 235
ffa_ch10_karatsub... 236 -- Ripple the Tail Carry into the tail.
ffa_ch10_karatsub... 237 FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => C);
ffa_ch10_karatsub... 238
ffa_ch10_karatsub... 239 -- Barring a cosmic ray, the tail ripple will NOT overflow.
ffa_ch10_karatsub... 240 pragma Assert(C = 0);
ffa_ch10_karatsub... 241
ffa_ch10_karatsub... 242 end Mul_Karatsuba;
ffa_ch10_karatsub... 243 -- CAUTION: Inlining prohibited for Mul_Karatsuba !
ffa_ch10_karatsub... 244
ffa_ch10_karatsub... 245
ffa_ch10_karatsub... 246 -- Multiplier. (CAUTION: UNBUFFERED)
ffa_ch11_tuning_a... 247 procedure FZ_Multiply_Unbuffered(X : in FZ;
ffa_ch11_tuning_a... 248 Y : in FZ;
ffa_ch11_tuning_a... 249 XY : out FZ) is
ffa_ch10_karatsub... 250
ffa_ch10_karatsub... 251 -- The length of either multiplicand
ffa_ch10_karatsub... 252 L : constant Word_Count := X'Length;
ffa_ch10_karatsub... 253
ffa_ch10_karatsub... 254 begin
ffa_ch10_karatsub... 255
ch11_unrolled_asm... 256 if L = Karatsuba_Thresh then
ffa_ch10_karatsub... 257
ch11_unrolled_asm... 258 -- Optimized case:
ch11_unrolled_asm... 259 FZ_Mul_Comba_Fast(X, Y, XY);
ch11_unrolled_asm... 260 elsif L < Karatsuba_Thresh then
ch11_unrolled_asm... 261
ch11_unrolled_asm... 262 -- Base case
ch11_unrolled_asm... 263 FZ_Mul_Comba(X, Y, XY);
ffa_ch10_karatsub... 264 else
ffa_ch10_karatsub... 265
ffa_ch10_karatsub... 266 -- Recursive case:
ffa_ch10_karatsub... 267 Mul_Karatsuba(X, Y, XY);
ffa_ch10_karatsub... 268
ffa_ch10_karatsub... 269 end if;
ffa_ch10_karatsub... 270
ffa_ch11_tuning_a... 271 end FZ_Multiply_Unbuffered;
ffa_ch10_karatsub... 272
ffa_ch10_karatsub... 273
ffa_ch10_karatsub... 274 -- Multiplier. Preserves the inputs.
ffa_ch11_tuning_a... 275 procedure FZ_Multiply_Buffered(X : in FZ;
ffa_ch11_tuning_a... 276 Y : in FZ;
ffa_ch11_tuning_a... 277 XY_Lo : out FZ;
ffa_ch11_tuning_a... 278 XY_Hi : out FZ) is
ffa_ch10_karatsub... 279
ffa_ch10_karatsub... 280 -- Product buffer.
ffa_ch10_karatsub... 281 P : FZ(1 .. 2 * X'Length);
ffa_ch10_karatsub... 282
ffa_ch10_karatsub... 283 begin
ffa_ch10_karatsub... 284
ffa_ch11_tuning_a... 285 FZ_Multiply_Unbuffered(X, Y, P);
ffa_ch10_karatsub... 286
ffa_ch10_karatsub... 287 XY_Lo := P(P'First .. P'First + X'Length - 1);
ffa_ch10_karatsub... 288 XY_Hi := P(P'First + X'Length .. P'Last);
ffa_ch10_karatsub... 289
ffa_ch11_tuning_a... 290 end FZ_Multiply_Buffered;
ffa_ch10_karatsub... 291
ffa_ch5_egypt.kv 292 end FZ_Mul;