raw
ffa_ch5_egypt.kv        1 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 2 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 3 -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. --
ffa_ch5_egypt.kv 4 -- --
ffa_ch15_gcd.kv 5 -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) --
ffa_ch5_egypt.kv 6 -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html --
ffa_ch5_egypt.kv 7 -- --
ffa_ch5_egypt.kv 8 -- You do not have, nor can you ever acquire the right to use, copy or --
ffa_ch5_egypt.kv 9 -- distribute this software ; Should you use this software for any purpose, --
ffa_ch5_egypt.kv 10 -- or copy and distribute it to anyone or in any manner, you are breaking --
ffa_ch5_egypt.kv 11 -- the laws of whatever soi-disant jurisdiction, and you promise to --
ffa_ch5_egypt.kv 12 -- continue doing so for the indefinite future. In any case, please --
ffa_ch5_egypt.kv 13 -- always : read and understand any software ; verify any PGP signatures --
ffa_ch5_egypt.kv 14 -- that you use - for any purpose. --
ffa_ch5_egypt.kv 15 -- --
ffa_ch5_egypt.kv 16 -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . --
ffa_ch5_egypt.kv 17 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 18 ------------------------------------------------------------------------------
ffa_ch5_egypt.kv 19
ffa_ch5_egypt.kv 20 with Words; use Words;
ffa_ch9_exodus.kv 21 with Word_Ops; use Word_Ops;
ffa_ch9_exodus.kv 22 with W_Mul; use W_Mul;
ffa_ch10_karatsub... 23 with FZ_Arith; use FZ_Arith;
ffa_ch5_egypt.kv 24
ffa_ch5_egypt.kv 25
ffa_ch5_egypt.kv 26 package body FZ_Mul is
ffa_ch10_karatsub... 27
ffa_ch10_karatsub... 28 -- Comba's multiplier. (CAUTION: UNBUFFERED)
ffa_ch9_exodus.kv 29 procedure FZ_Mul_Comba(X : in FZ;
ffa_ch9_exodus.kv 30 Y : in FZ;
ffa_ch10_karatsub... 31 XY : out FZ) is
ffa_ch5_egypt.kv 32
ffa_ch9_exodus.kv 33 -- Words in each multiplicand
ffa_ch9_exodus.kv 34 L : constant Word_Index := X'Length;
ffa_ch7_turbo_egy... 35
ffa_ch9_exodus.kv 36 -- Length of Product, i.e. double the length of either multiplicand
ffa_ch9_exodus.kv 37 LP : constant Word_Index := 2 * L;
ffa_ch5_egypt.kv 38
ffa_ch9_exodus.kv 39 -- 3-word Accumulator
ffa_ch9_exodus.kv 40 A2, A1, A0 : Word := 0;
ffa_ch9_exodus.kv 41
ffa_ch9_exodus.kv 42 -- Type for referring to a column of XY
ffa_ch10_karatsub... 43 subtype ColXY is Word_Index range 0 .. LP - 1;
ffa_ch9_exodus.kv 44
ffa_ch9_exodus.kv 45 -- Compute the Nth (indexed from zero) column of the Product
ffa_ch9_exodus.kv 46 procedure Col(N : in ColXY; U : in ColXY; V : in ColXY) is
ffa_ch9_exodus.kv 47
ffa_ch9_exodus.kv 48 -- The outputs of a Word multiplication
ffa_ch9_exodus.kv 49 Lo, Hi : Word;
ffa_ch9_exodus.kv 50
ffa_ch9_exodus.kv 51 -- Carry for the Accumulator addition
ffa_ch9_exodus.kv 52 C : WBool;
ffa_ch9_exodus.kv 53
ffa_ch9_exodus.kv 54 -- Sum for Accumulator addition
ffa_ch9_exodus.kv 55 Sum : Word;
ffa_ch9_exodus.kv 56
ffa_ch9_exodus.kv 57 begin
ffa_ch9_exodus.kv 58
ffa_ch9_exodus.kv 59 -- For lower half of XY, will go from 0 to N
ffa_ch9_exodus.kv 60 -- For upper half of XY, will go from N - L + 1 to L - 1
ffa_ch9_exodus.kv 61 for j in U .. V loop
ffa_ch9_exodus.kv 62
ffa_ch9_exodus.kv 63 -- Hi:Lo := j-th Word of X * (N - j)-th Word of Y
ffa_ch9_exodus.kv 64 Mul_Word(X(X'First + j),
ffa_ch9_exodus.kv 65 Y(Y'First - j + N),
ffa_ch9_exodus.kv 66 Lo, Hi);
ffa_ch9_exodus.kv 67
ffa_ch9_exodus.kv 68 -- Now add Hi:Lo into the Accumulator:
ffa_ch9_exodus.kv 69
ffa_ch9_exodus.kv 70 -- A0 += Lo; C := Carry
ffa_ch9_exodus.kv 71 Sum := A0 + Lo;
ffa_ch9_exodus.kv 72 C := W_Carry(A0, Lo, Sum);
ffa_ch9_exodus.kv 73 A0 := Sum;
ffa_ch9_exodus.kv 74
ffa_ch9_exodus.kv 75 -- A1 += Hi + C; C := Carry
ffa_ch9_exodus.kv 76 Sum := A1 + Hi + C;
ffa_ch9_exodus.kv 77 C := W_Carry(A1, Hi, Sum);
ffa_ch9_exodus.kv 78 A1 := Sum;
ffa_ch9_exodus.kv 79
ffa_ch9_exodus.kv 80 -- A2 += A2 + C
ffa_ch9_exodus.kv 81 A2 := A2 + C;
ffa_ch9_exodus.kv 82
ffa_ch9_exodus.kv 83 end loop;
ffa_ch9_exodus.kv 84
ffa_ch9_exodus.kv 85 -- We now have the Nth (indexed from zero) word of XY
ffa_ch10_karatsub... 86 XY(XY'First + N) := A0;
ffa_ch9_exodus.kv 87
ffa_ch9_exodus.kv 88 -- Right-Shift the Accumulator by one Word width:
ffa_ch9_exodus.kv 89 A0 := A1;
ffa_ch9_exodus.kv 90 A1 := A2;
ffa_ch9_exodus.kv 91 A2 := 0;
ffa_ch9_exodus.kv 92
ffa_ch9_exodus.kv 93 end Col;
ffa_ch9_exodus.kv 94 pragma Inline_Always(Col);
ffa_ch5_egypt.kv 95
ffa_ch5_egypt.kv 96 begin
ffa_ch5_egypt.kv 97
ffa_ch9_exodus.kv 98 -- Compute the lower half of the Product:
ffa_ch9_exodus.kv 99 for i in 0 .. L - 1 loop
ffa_ch9_exodus.kv 100
ffa_ch9_exodus.kv 101 Col(i, 0, i);
ffa_ch9_exodus.kv 102
ffa_ch9_exodus.kv 103 end loop;
ffa_ch9_exodus.kv 104
ffa_ch9_exodus.kv 105 -- Compute the upper half (sans last Word) of the Product:
ffa_ch9_exodus.kv 106 for i in L .. LP - 2 loop
ffa_ch9_exodus.kv 107
ffa_ch9_exodus.kv 108 Col(i, i - L + 1, L - 1);
ffa_ch5_egypt.kv 109
ffa_ch5_egypt.kv 110 end loop;
ffa_ch5_egypt.kv 111
ffa_ch9_exodus.kv 112 -- The very last Word of the Product:
ffa_ch9_exodus.kv 113 XY(XY'Last) := A0;
ffa_ch5_egypt.kv 114
ffa_ch9_exodus.kv 115 end FZ_Mul_Comba;
ffa_ch9_exodus.kv 116
ffa_ch10_karatsub... 117
ffa_ch10_karatsub... 118 -- Karatsuba's Multiplier. (CAUTION: UNBUFFERED)
ffa_ch10_karatsub... 119 procedure Mul_Karatsuba(X : in FZ;
ffa_ch10_karatsub... 120 Y : in FZ;
ffa_ch10_karatsub... 121 XY : out FZ) is
ffa_ch10_karatsub... 122
ffa_ch10_karatsub... 123 -- L is the wordness of a multiplicand. Guaranteed to be a power of two.
ffa_ch10_karatsub... 124 L : constant Word_Count := X'Length;
ffa_ch10_karatsub... 125
ffa_ch10_karatsub... 126 -- An 'LSeg' is the same length as either multiplicand.
ffa_ch10_karatsub... 127 subtype LSeg is FZ(1 .. L);
ffa_ch10_karatsub... 128
ffa_ch10_karatsub... 129 -- K is HALF of the length of a multiplicand.
ffa_ch10_karatsub... 130 K : constant Word_Index := L / 2;
ffa_ch10_karatsub... 131
ffa_ch10_karatsub... 132 -- A 'KSeg' is the same length as HALF of a multiplicand.
ffa_ch10_karatsub... 133 subtype KSeg is FZ(1 .. K);
ffa_ch10_karatsub... 134
ffa_ch10_karatsub... 135 -- The three L-sized variables of the product equation, i.e.:
ffa_ch10_karatsub... 136 -- XY = LL + 2^(K*Bitness)(LL + HH + (-1^DD_Sub)*DD) + 2^(2*K*Bitness)HH
ffa_ch10_karatsub... 137 LL, DD, HH : LSeg;
ffa_ch10_karatsub... 138
ffa_ch10_karatsub... 139 -- K-sized terms of Dx * Dy = DD
ffa_ch10_karatsub... 140 Dx, Dy : KSeg; -- Dx = abs(XLo - XHi) , Dy = abs(YLo - YHi)
ffa_ch10_karatsub... 141
ffa_ch10_karatsub... 142 -- Subtraction borrows, signs of (XL - XH) and (YL - YH),
ffa_ch10_karatsub... 143 Cx, Cy : WBool; -- so that we can calculate (-1^DD_Sub)
ffa_ch10_karatsub... 144
ffa_ch10_karatsub... 145 -- Bottom and Top K-sized halves of the multiplicand X.
ffa_ch10_karatsub... 146 XLo : KSeg renames X( X'First .. X'Last - K );
ffa_ch10_karatsub... 147 XHi : KSeg renames X( X'First + K .. X'Last );
ffa_ch10_karatsub... 148
ffa_ch10_karatsub... 149 -- Bottom and Top K-sized halves of the multiplicand Y.
ffa_ch10_karatsub... 150 YLo : KSeg renames Y( Y'First .. Y'Last - K );
ffa_ch10_karatsub... 151 YHi : KSeg renames Y( Y'First + K .. Y'Last );
ffa_ch10_karatsub... 152
ffa_ch10_karatsub... 153 -- L-sized middle segment of the product XY (+/- K from the midpoint).
ffa_ch10_karatsub... 154 XYMid : LSeg renames XY( XY'First + K .. XY'Last - K );
ffa_ch10_karatsub... 155
ffa_ch10_karatsub... 156 -- Bottom and Top L-sized halves of the product XY.
ffa_ch10_karatsub... 157 XYLo : LSeg renames XY( XY'First .. XY'Last - L );
ffa_ch10_karatsub... 158 XYHi : LSeg renames XY( XY'First + L .. XY'Last );
ffa_ch10_karatsub... 159
ffa_ch10_karatsub... 160 -- Topmost K-sized quarter segment of the product XY, or 'tail'
ffa_ch10_karatsub... 161 XYHiHi : KSeg renames XYHi( XYHi'First + K .. XYHi'Last );
ffa_ch10_karatsub... 162
ffa_ch10_karatsub... 163 -- Whether the DD term is being subtracted.
ffa_ch10_karatsub... 164 DD_Sub : WBool;
ffa_ch10_karatsub... 165
ffa_ch10_karatsub... 166 -- Carry from individual term additions.
ffa_ch10_karatsub... 167 C : WBool;
ffa_ch10_karatsub... 168
ffa_ch12_karatsub... 169 -- Barring a cosmic ray, 0 <= TC <= 2
ffa_ch12_karatsub... 170 subtype TailCarry is Word range 0 .. 2;
ffa_ch12_karatsub... 171
ffa_ch12_karatsub... 172 -- Tail-Carry accumulator, for the final ripple-out into XXHiHi
ffa_ch12_karatsub... 173 TC : TailCarry := 0;
ffa_ch12_karatsub... 174
ffa_ch12_karatsub... 175 -- Barring a cosmic ray, the tail ripple will NOT overflow.
ffa_ch12_karatsub... 176 FinalCarry : WZeroOrDie := 0;
ffa_ch10_karatsub... 177
ffa_ch10_karatsub... 178 begin
ffa_ch10_karatsub... 179
ffa_ch10_karatsub... 180 -- Recurse: LL := XL * YL
ffa_ch11_tuning_a... 181 FZ_Multiply_Unbuffered(XLo, YLo, LL);
ffa_ch10_karatsub... 182
ffa_ch10_karatsub... 183 -- Recurse: HH := XH * YH
ffa_ch11_tuning_a... 184 FZ_Multiply_Unbuffered(XHi, YHi, HH);
ffa_ch10_karatsub... 185
ffa_ch10_karatsub... 186 -- Dx := |XL - XH| , Cx := Borrow (i.e. 1 iff XL < XH)
ffa_ch10_karatsub... 187 FZ_Sub_Abs(X => XLo, Y => XHi, Difference => Dx, Underflow => Cx);
ffa_ch10_karatsub... 188
ffa_ch10_karatsub... 189 -- Dy := |YL - YH| , Cy := Borrow (i.e. 1 iff YL < YH)
ffa_ch10_karatsub... 190 FZ_Sub_Abs(X => YLo, Y => YHi, Difference => Dy, Underflow => Cy);
ffa_ch10_karatsub... 191
ffa_ch10_karatsub... 192 -- Recurse: DD := Dx * Dy
ffa_ch11_tuning_a... 193 FZ_Multiply_Unbuffered(Dx, Dy, DD);
ffa_ch10_karatsub... 194
ffa_ch10_karatsub... 195 -- Whether (XL - XH)(YL - YH) is positive, and so DD must be subtracted:
ffa_ch10_karatsub... 196 DD_Sub := 1 - (Cx xor Cy);
ffa_ch10_karatsub... 197
ffa_ch10_karatsub... 198 -- XY := LL + 2^(2 * K * Bitness) * HH
ffa_ch10_karatsub... 199 XYLo := LL;
ffa_ch10_karatsub... 200 XYHi := HH;
ffa_ch10_karatsub... 201
ffa_ch10_karatsub... 202 -- XY += 2^(K * Bitness) * HH, but carry goes in Tail Carry accum.
ffa_ch10_karatsub... 203 FZ_Add_D(X => XYMid, Y => HH, Overflow => TC);
ffa_ch10_karatsub... 204
ffa_ch10_karatsub... 205 -- XY += 2^(K * Bitness) * LL, ...
ffa_ch10_karatsub... 206 FZ_Add_D(X => XYMid, Y => LL, Overflow => C);
ffa_ch10_karatsub... 207
ffa_ch10_karatsub... 208 -- ... but the carry goes into the Tail Carry accumulator.
ffa_ch10_karatsub... 209 TC := TC + C;
ffa_ch10_karatsub... 210
ffa_ch10_karatsub... 211 -- XY += 2^(K * Bitness) * (-1^DD_Sub) * DD
ffa_ch10_karatsub... 212 FZ_Not_Cond_D(N => DD, Cond => DD_Sub); -- invert DD if 2s-complementing
ffa_ch10_karatsub... 213 FZ_Add_D(OF_In => DD_Sub, -- ... and then must increment
ffa_ch10_karatsub... 214 X => XYMid,
ffa_ch10_karatsub... 215 Y => DD,
ffa_ch10_karatsub... 216 Overflow => C); -- carry will go in Tail Carry accumulator
ffa_ch10_karatsub... 217
ffa_ch10_karatsub... 218 -- Compute the final Tail Carry for the ripple
ffa_ch10_karatsub... 219 TC := TC + C - DD_Sub;
ffa_ch10_karatsub... 220
ffa_ch10_karatsub... 221 -- Ripple the Tail Carry into the tail.
ffa_ch12_karatsub... 222 FZ_Add_D_W(X => XYHiHi, W => TC, Overflow => FinalCarry);
ffa_ch10_karatsub... 223
ffa_ch10_karatsub... 224 end Mul_Karatsuba;
ffa_ch10_karatsub... 225 -- CAUTION: Inlining prohibited for Mul_Karatsuba !
ffa_ch10_karatsub... 226
ffa_ch10_karatsub... 227
ffa_ch10_karatsub... 228 -- Multiplier. (CAUTION: UNBUFFERED)
ffa_ch11_tuning_a... 229 procedure FZ_Multiply_Unbuffered(X : in FZ;
ffa_ch11_tuning_a... 230 Y : in FZ;
ffa_ch11_tuning_a... 231 XY : out FZ) is
ffa_ch10_karatsub... 232
ffa_ch10_karatsub... 233 -- The length of either multiplicand
ffa_ch10_karatsub... 234 L : constant Word_Count := X'Length;
ffa_ch10_karatsub... 235
ffa_ch10_karatsub... 236 begin
ffa_ch10_karatsub... 237
ffa_ch10_karatsub... 238 if L <= Karatsuba_Thresh then
ffa_ch10_karatsub... 239
ffa_ch10_karatsub... 240 -- Base case:
ffa_ch10_karatsub... 241 FZ_Mul_Comba(X, Y, XY);
ffa_ch10_karatsub... 242
ffa_ch10_karatsub... 243 else
ffa_ch10_karatsub... 244
ffa_ch10_karatsub... 245 -- Recursive case:
ffa_ch10_karatsub... 246 Mul_Karatsuba(X, Y, XY);
ffa_ch10_karatsub... 247
ffa_ch10_karatsub... 248 end if;
ffa_ch10_karatsub... 249
ffa_ch11_tuning_a... 250 end FZ_Multiply_Unbuffered;
ffa_ch10_karatsub... 251
ffa_ch10_karatsub... 252
ffa_ch10_karatsub... 253 -- Multiplier. Preserves the inputs.
ffa_ch11_tuning_a... 254 procedure FZ_Multiply_Buffered(X : in FZ;
ffa_ch11_tuning_a... 255 Y : in FZ;
ffa_ch11_tuning_a... 256 XY_Lo : out FZ;
ffa_ch11_tuning_a... 257 XY_Hi : out FZ) is
ffa_ch10_karatsub... 258
ffa_ch10_karatsub... 259 -- Product buffer.
ffa_ch10_karatsub... 260 P : FZ(1 .. 2 * X'Length);
ffa_ch10_karatsub... 261
ffa_ch10_karatsub... 262 begin
ffa_ch10_karatsub... 263
ffa_ch11_tuning_a... 264 FZ_Multiply_Unbuffered(X, Y, P);
ffa_ch10_karatsub... 265
ffa_ch10_karatsub... 266 XY_Lo := P(P'First .. P'First + X'Length - 1);
ffa_ch10_karatsub... 267 XY_Hi := P(P'First + X'Length .. P'Last);
ffa_ch10_karatsub... 268
ffa_ch11_tuning_a... 269 end FZ_Multiply_Buffered;
ffa_ch10_karatsub... 270
ffa_ch5_egypt.kv 271 end FZ_Mul;