------------------------------------------------------------------------------ ------------------------------------------------------------------------------ -- This file is part of 'Finite Field Arithmetic', aka 'FFA'. -- -- -- -- (C) 2019 Stanislav Datskovskiy ( www.loper-os.org ) -- -- http://wot.deedbot.org/17215D118B7239507FAFED98B98228A001ABFFC7.html -- -- -- -- You do not have, nor can you ever acquire the right to use, copy or -- -- distribute this software ; Should you use this software for any purpose, -- -- or copy and distribute it to anyone or in any manner, you are breaking -- -- the laws of whatever soi-disant jurisdiction, and you promise to -- -- continue doing so for the indefinite future. In any case, please -- -- always : read and understand any software ; verify any PGP signatures -- -- that you use - for any purpose. -- -- -- -- See also http://trilema.com/2015/a-new-software-licensing-paradigm . -- ------------------------------------------------------------------------------ ----------------------------------------------------------------------------- -- BEFORE YOU EVEN *THINK* ABOUT MODIFYING THIS PROGRAM: -- ----------------------------------------------------------------------------- -- `dMMd` +NMMMMMMMNo -- -- .dM++Md..oNMMMMMMmo` -- -- /mM+ +MmmMMMMMMNo. -- -- /NM+ +MMMMMMNo` / -- -- /Nd- `sNMMMMMy.-oohNNm` -- -- `yNd- -yMMMMMMMNNNMMMMs. -- -- hMd::. -mMMMMMdNMMMMMMm+` -- -- :hNs.`.:yNyyyo--/sNMMMMy` -- -- -o.. `.. `.sNh` -- -- ..`RRR EEE AA DDD !+:` -- -- `: R R E A A D D ! .o- -- -- .s RRR EEE AAAA D D ! .:` -- -- .. `` R R E A A D D ys. -- -- -h /: R R EEE A A DDD !/ :mm- -- -- -mm `/- THE PROOFS!!! -y sMm- -- -- -mNy `++` YES THAT MEANS YOU! .s. .`-Nm- -- -- `oNN-`:/y``:////:` `-////``-o.+. -NNo` -- -- `oNN: `+:::hNMMMMNyo. `smNMMMMmy`++ :NNo` -- -- `oNy- .: sMMMMMMMMM: -MMMMMMMMMs/s. -yNh- -- -- -dNy. `s. sMMMMMMMmo.----mMMMMMMMNo `.` .yMd- -- -- .dmo. `o` /mNNNmyo.`sNMMy.+ymNNNNh `-` .omd. -- -- .mN/ -o` .---. `oNMNMNs .----. -/. /Nm. -- -- +mN/ .hhs:.. ` .hMN-MMy ` `.-+-` /Nm+ -- -- +NN: :hMMMs/m`d -y- dy -`:/y :NN+ -- -- +Nd: /: `hMMMmm/ y:Ns::.`````.:oh-- :dNs. -- -- .sNh. .h+:hMMMy./- -yMMMyyod+dssMM:. `: .hMh. -- -- .hMy. +MNMMMMh` ` `yNMhmsNsmhNh: /` +Mh. -- -- -hN+ -dMMMMMNso+- :s .ymmNMmyh+- + +Nh- -- -- `MN+ /MMMMMMh:- :- :: : .+ +NM` -- -- `Md///////+mMMMMys////////sh/- -yy/////////////////////////dM` -- -- -ssssssssymssssssssssssssssso- .+ossssssssssssssssssssssssssss- -- -- -- --Ch. 14A: Barrett's Modular Reduction: http://www.loper-os.org/?p=2842 -- --Ch. 14A-Bis: Barrett's Physical Bounds: http://www.loper-os.org/?p=2875 -- -- -- ----------------------------------------------------------------------------- with W_Pred; use W_Pred; with W_Shifts; use W_Shifts; with FZ_Basic; use FZ_Basic; with FZ_Shift; use FZ_Shift; with FZ_Arith; use FZ_Arith; with FZ_Mul; use FZ_Mul; with FZ_LoMul; use FZ_LoMul; with FZ_Measr; use FZ_Measr; with FZ_QShft; use FZ_QShft; package body FZ_Barr is -- Prepare the precomputed Barrettoid corresponding to a given Modulus procedure FZ_Make_Barrettoid(Modulus : in FZ; Result : out Barretoid) is -- Length of Modulus and Remainder Lm : constant Indices := Modulus'Length; -- Remainder register, starts as zero Remainder : FZ(1 .. Lm) := (others => 0); -- Length of Quotient, with an extra Word for top bit (if Degenerate) Lq : constant Indices := (2 * Lm) + 1; -- Valid indices into Quotient, using the above subtype Quotient_Index is Word_Index range 1 .. Lq; -- The Quotient we need, i.e. 2^(2 * ModulusBitness) / Modulus Quotient : FZ(Quotient_Index); -- Permissible 'cuts' for the Slice operation subtype Divisor_Cuts is Word_Index range 2 .. Lm; -- Current bit of Pseudo-Dividend; high bit is 1, all others 0 Pb : WBool := 1; -- Performs Restoring Division on a given segment procedure Slice(Index : Quotient_Index; Cut : Divisor_Cuts; Bits : Positive) is begin declare -- Borrow, from comparator C : WBool; -- Left-Shift Overflow LsO : WBool; -- Current cut of Remainder register Rs : FZ renames Remainder(1 .. Cut); -- Current cut of Divisor Ds : FZ renames Modulus(1 .. Cut); -- Current Word of Quotient being made, starting from the highest W : Word := 0; -- Current bit of Quotient (inverted) nQb : WBool; begin -- For each bit in the current Pseudo-Dividend Word: for b in 1 .. Bits loop -- Advance Rs, shifting in the current Pseudo-Dividend bit: FZ_ShiftLeft_O_I(N => Rs, ShiftedN => Rs, Count => 1, OF_In => Pb, -- Current Pseudo-Dividend bit Overflow => LsO); -- Subtract Divisor-Cut from R-Cut; Underflow goes into C: FZ_Sub(X => Rs, Y => Ds, Difference => Rs, Underflow => C); -- Negation of current Quotient bit nQb := C and W_Not(LsO); -- If C=1, the subtraction underflowed, and we must undo it: FZ_Add_Gated(X => Rs, Y => Ds, Sum => Rs, Gate => nQb); -- Save the bit of Quotient that we have found: W := Shift_Left(W, 1) or (1 - nQb); -- i.e. inverse of nQb end loop; -- We made a complete Word of the Quotient; save it: Quotient(Quotient'Last + 1 - Index) := W; -- Indexed from end end; end Slice; pragma Inline_Always(Slice); -- Measure of the Modulus ModulusMeasure : constant FZBit_Index := FZ_Measure(Modulus); begin -- First, process the high Word of the Pseudo-Dividend: Slice(1, 2, 1); -- ... it has just one bit: a 1 at the bottom position -- Once we ate the top 1 bit of Pseudo-Dividend, rest of its bits are 0 Pb := 0; -- Process the Modulus-sized segment below the top Word: for i in 2 .. Lm - 1 loop Slice(i, i + 1, Bitness); -- stay ahead by a word to handle carry end loop; -- Process remaining Words: for i in Lm .. Lq loop Slice(i, Lm, Bitness); end loop; -- Record the Quotient (i.e. the Barrettoid proper, Bm) Result.B := Quotient(Result.B'Range); -- Record whether we have the Degenerate Case (1 iff Modulus = 1) Result.Degenerate := W_NZeroP(Quotient(Lq)); -- Record a copy of the Modulus, extended with zero Word: Result.ZXM(Modulus'Range) := Modulus; Result.ZXM(Result.ZXM'Last) := 0; -- Record the parameter Jm: Result.J := ModulusMeasure - 1; -- Wm - Jm Result.ZSlide := FZBit_Index(Bitness * Modulus'Length) - ModulusMeasure + 1; end FZ_Make_Barrettoid; -- Reduce X using the given precomputed Barrettoid. procedure FZ_Barrett_Reduce(X : in FZ; Bar : in Barretoid; XReduced : in out FZ) is -- Wordness of X, the quantity being reduced Xl : constant Indices := X'Length; -- Wordness of XReduced (result), one-half of Xl, and same as of Modulus Ml : constant Indices := XReduced'Length; -- i.e. # of Words in Wm. -- The Modulus we will reduce X by Modulus : FZ renames Bar.ZXM(1 .. Ml); -- Shifted X Xs : FZ(X'Range); -- Z := Xs * Bm (has twice the length of X) Z : FZ(1 .. 2 * Xl); -- Upper 3Wm-bit segment of Z that gets shifted to form Zs ZHi : FZ renames Z(Ml + 1 .. Z'Last ); -- Middle 2Wm-bit segment of Z, that gets multiplied by M to form Q Zs : FZ renames Z(Z'First + Ml .. Z'Last - Ml ); -- Sub-terms of the Zs * M multiplication: ZsLo : FZ renames Zs(Zs'First .. Zs'Last - Ml ); ZsHi : FZ renames Zs(Zs'First + Ml .. Zs'Last ); ZsHiM : FZ(1 .. Ml); -- Q := Modulus * Zs, i.e. floor(X / M)*M + E*M Q : FZ(1 .. Xl); QHi : FZ renames Q(Q'First + Ml .. Q'Last ); -- R is made one Word longer than Modulus (see proof re: why) Rl : constant Indices := Ml + 1; -- Reduction estimate, overshot by 0, 1, or 2 multiple of Modulus R : FZ(1 .. Rl); -- Scratch for the outputs of the gated subtractions S : FZ(1 .. Rl); -- Borrow from the gated subtractions C : WBool; -- Barring cosmic ray, no underflow can take place in (4) and (5) NoCarry : WZeroOrDie := 0; begin -- Result is initially zero (and will stay zero if Modulus = 1) FZ_Clear(XReduced); -- (1) Xs := X >> Jm FZ_Quiet_ShiftRight(N => X, ShiftedN => Xs, Count => Bar.J); -- (2) Z := Xs * Bm FZ_Multiply_Unbuffered(X => Bar.B, Y => Xs, XY => Z); -- (3) Zs := Z >> 2Wm - Jm (already thrown lower Wm, so only Wm - Jm now) FZ_Quiet_ShiftRight(N => ZHi, ShiftedN => ZHi, Count => Bar.ZSlide); -- (4) Q := Zs * M (this is split into three operations, see below) -- ... first, Q := ZsLo * M, FZ_Multiply_Unbuffered(ZsLo, Modulus, Q); -- ... then, compute ZsHiM := ZsHi * M, FZ_Low_Multiply_Unbuffered(ZsHi, Modulus, ZsHiM); -- ... finally, add ZsHiM to upper half of Q. FZ_Add_D(X => QHi, Y => ZsHiM, Overflow => NoCarry); -- (5) R := X - Q (we only need Rl-sized segments of X and Q here) FZ_Sub(X => X(1 .. Rl), Y => Q(1 .. Rl), Difference => R, Underflow => NoCarry); -- (6) S1 := R - M, C1 := Borrow (1st gated subtraction of Modulus) FZ_Sub(X => R, Y => Bar.ZXM, Difference => S, Underflow => C); -- (7) R := {C1=0 -> S1, C1=1 -> R} FZ_Mux(X => S, Y => R, Result => R, Sel => C); -- (8) S2 := R - M, C2 := Borrow (2nd gated subtraction of Modulus) FZ_Sub(X => R, Y => Bar.ZXM, Difference => S, Underflow => C); -- (9) R := {C2=0 -> S2, C2=1 -> R} FZ_Mux(X => S, Y => R, Result => R, Sel => C); -- (10) RFinal := {DM=0 -> R, DM=1 -> 0} (handle the degenerate case) FZ_Mux(X => R(1 .. Ml), Y => XReduced, Result => XReduced, Sel => Bar.Degenerate); -- If Modulus = 1, then XReduced is 0. end FZ_Barrett_Reduce; end FZ_Barr;