raw
mpi-genesis             1 /* mpihelp-mul.c  -  MPI helper functions
mpi_second_cut 2 * Modified by No Such Labs. (C) 2015. See README.
mpi-genesis 3 *
mpi_second_cut 4 * This file was originally part of Gnu Privacy Guard (GPG), ver. 1.4.10,
mpi_second_cut 5 * SHA256(gnupg-1.4.10.tar.gz):
mpi_second_cut 6 * 0bfd74660a2f6cedcf7d8256db4a63c996ffebbcdc2cf54397bfb72878c5a85a
mpi_second_cut 7 * (C) 1994-2005 Free Software Foundation, Inc.
mpi-genesis 8 *
mpi_second_cut 9 * This program is free software: you can redistribute it and/or modify
mpi-genesis 10 * it under the terms of the GNU General Public License as published by
mpi_second_cut 11 * the Free Software Foundation, either version 3 of the License, or
mpi-genesis 12 * (at your option) any later version.
mpi-genesis 13 *
mpi_second_cut 14 * This program is distributed in the hope that it will be useful,
mpi-genesis 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
mpi-genesis 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
mpi-genesis 17 * GNU General Public License for more details.
mpi-genesis 18 *
mpi-genesis 19 * You should have received a copy of the GNU General Public License
mpi_second_cut 20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
mpi-genesis 21 */
mpi-genesis 22
mpi-genesis 23 #include <stdio.h>
mpi-genesis 24 #include <stdlib.h>
mpi-genesis 25 #include <string.h>
mpi_second_cut 26
mpi_second_cut 27 #include "knobs.h"
mpi-genesis 28 #include "mpi-internal.h"
mpi-genesis 29 #include "longlong.h"
mpi-genesis 30
mpi-genesis 31
mpi-genesis 32 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
mpi-genesis 33 do { \
mpi-genesis 34 if( (size) < KARATSUBA_THRESHOLD ) \
mpi-genesis 35 mul_n_basecase (prodp, up, vp, size); \
mpi-genesis 36 else \
mpi-genesis 37 mul_n (prodp, up, vp, size, tspace); \
mpi-genesis 38 } while (0);
mpi-genesis 39
mpi-genesis 40 #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
mpi-genesis 41 do { \
mpi-genesis 42 if ((size) < KARATSUBA_THRESHOLD) \
mpi-genesis 43 mpih_sqr_n_basecase (prodp, up, size); \
mpi-genesis 44 else \
mpi-genesis 45 mpih_sqr_n (prodp, up, size, tspace); \
mpi-genesis 46 } while (0);
mpi-genesis 47
mpi-genesis 48
mpi-genesis 49 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
mpi-genesis 50 * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
mpi-genesis 51 * always stored. Return the most significant limb.
mpi-genesis 52 *
mpi-genesis 53 * Argument constraints:
mpi-genesis 54 * 1. PRODP != UP and PRODP != VP, i.e. the destination
mpi-genesis 55 * must be distinct from the multiplier and the multiplicand.
mpi-genesis 56 *
mpi-genesis 57 *
mpi-genesis 58 * Handle simple cases with traditional multiplication.
mpi-genesis 59 *
mpi-genesis 60 * This is the most critical code of multiplication. All multiplies rely
mpi-genesis 61 * on this, both small and huge. Small ones arrive here immediately. Huge
mpi-genesis 62 * ones arrive here as this is the base case for Karatsuba's recursive
mpi-genesis 63 * algorithm below.
mpi-genesis 64 */
mpi-genesis 65
mpi-genesis 66 static mpi_limb_t
mpi-genesis 67 mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
mpi-genesis 68 mpi_ptr_t vp, mpi_size_t size)
mpi-genesis 69 {
mpi-genesis 70 mpi_size_t i;
mpi-genesis 71 mpi_limb_t cy;
mpi-genesis 72 mpi_limb_t v_limb;
mpi-genesis 73
mpi-genesis 74 /* Multiply by the first limb in V separately, as the result can be
mpi-genesis 75 * stored (not added) to PROD. We also avoid a loop for zeroing. */
mpi-genesis 76 v_limb = vp[0];
mpi-genesis 77 if( v_limb <= 1 ) {
mpi-genesis 78 if( v_limb == 1 )
mpi-genesis 79 MPN_COPY( prodp, up, size );
mpi-genesis 80 else
mpi-genesis 81 MPN_ZERO( prodp, size );
mpi-genesis 82 cy = 0;
mpi-genesis 83 }
mpi-genesis 84 else
mpi-genesis 85 cy = mpihelp_mul_1( prodp, up, size, v_limb );
mpi-genesis 86
mpi-genesis 87 prodp[size] = cy;
mpi-genesis 88 prodp++;
mpi-genesis 89
mpi-genesis 90 /* For each iteration in the outer loop, multiply one limb from
mpi-genesis 91 * U with one limb from V, and add it to PROD. */
mpi-genesis 92 for( i = 1; i < size; i++ ) {
mpi-genesis 93 v_limb = vp[i];
mpi-genesis 94 if( v_limb <= 1 ) {
mpi-genesis 95 cy = 0;
mpi-genesis 96 if( v_limb == 1 )
mpi-genesis 97 cy = mpihelp_add_n(prodp, prodp, up, size);
mpi-genesis 98 }
mpi-genesis 99 else
mpi-genesis 100 cy = mpihelp_addmul_1(prodp, up, size, v_limb);
mpi-genesis 101
mpi-genesis 102 prodp[size] = cy;
mpi-genesis 103 prodp++;
mpi-genesis 104 }
mpi-genesis 105
mpi-genesis 106 return cy;
mpi-genesis 107 }
mpi-genesis 108
mpi-genesis 109
mpi-genesis 110 static void
mpi-genesis 111 mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
mpi-genesis 112 mpi_size_t size, mpi_ptr_t tspace )
mpi-genesis 113 {
mpi-genesis 114 if( size & 1 ) {
mpi-genesis 115 /* The size is odd, and the code below doesn't handle that.
mpi-genesis 116 * Multiply the least significant (size - 1) limbs with a recursive
mpi-genesis 117 * call, and handle the most significant limb of S1 and S2
mpi-genesis 118 * separately.
mpi-genesis 119 * A slightly faster way to do this would be to make the Karatsuba
mpi-genesis 120 * code below behave as if the size were even, and let it check for
mpi-genesis 121 * odd size in the end. I.e., in essence move this code to the end.
mpi-genesis 122 * Doing so would save us a recursive call, and potentially make the
mpi-genesis 123 * stack grow a lot less.
mpi-genesis 124 */
mpi-genesis 125 mpi_size_t esize = size - 1; /* even size */
mpi-genesis 126 mpi_limb_t cy_limb;
mpi-genesis 127
mpi-genesis 128 MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
mpi-genesis 129 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
mpi-genesis 130 prodp[esize + esize] = cy_limb;
mpi-genesis 131 cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
mpi-genesis 132 prodp[esize + size] = cy_limb;
mpi-genesis 133 }
mpi-genesis 134 else {
mpi-genesis 135 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
mpi-genesis 136 *
mpi-genesis 137 * Split U in two pieces, U1 and U0, such that
mpi-genesis 138 * U = U0 + U1*(B**n),
mpi-genesis 139 * and V in V1 and V0, such that
mpi-genesis 140 * V = V0 + V1*(B**n).
mpi-genesis 141 *
mpi-genesis 142 * UV is then computed recursively using the identity
mpi-genesis 143 *
mpi-genesis 144 * 2n n n n
mpi-genesis 145 * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
mpi-genesis 146 * 1 1 1 0 0 1 0 0
mpi-genesis 147 *
mpi-genesis 148 * Where B = 2**BITS_PER_MP_LIMB.
mpi-genesis 149 */
mpi-genesis 150 mpi_size_t hsize = size >> 1;
mpi-genesis 151 mpi_limb_t cy;
mpi-genesis 152 int negflg;
mpi-genesis 153
mpi-genesis 154 /* Product H. ________________ ________________
mpi-genesis 155 * |_____U1 x V1____||____U0 x V0_____|
mpi-genesis 156 * Put result in upper part of PROD and pass low part of TSPACE
mpi-genesis 157 * as new TSPACE.
mpi-genesis 158 */
mpi-genesis 159 MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
mpi-genesis 160
mpi-genesis 161 /* Product M. ________________
mpi-genesis 162 * |_(U1-U0)(V0-V1)_|
mpi-genesis 163 */
mpi-genesis 164 if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
mpi-genesis 165 mpihelp_sub_n(prodp, up + hsize, up, hsize);
mpi-genesis 166 negflg = 0;
mpi-genesis 167 }
mpi-genesis 168 else {
mpi-genesis 169 mpihelp_sub_n(prodp, up, up + hsize, hsize);
mpi-genesis 170 negflg = 1;
mpi-genesis 171 }
mpi-genesis 172 if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
mpi-genesis 173 mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
mpi-genesis 174 negflg ^= 1;
mpi-genesis 175 }
mpi-genesis 176 else {
mpi-genesis 177 mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
mpi-genesis 178 /* No change of NEGFLG. */
mpi-genesis 179 }
mpi-genesis 180 /* Read temporary operands from low part of PROD.
mpi-genesis 181 * Put result in low part of TSPACE using upper part of TSPACE
mpi-genesis 182 * as new TSPACE.
mpi-genesis 183 */
mpi-genesis 184 MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
mpi-genesis 185
mpi-genesis 186 /* Add/copy product H. */
mpi-genesis 187 MPN_COPY (prodp + hsize, prodp + size, hsize);
mpi-genesis 188 cy = mpihelp_add_n( prodp + size, prodp + size,
mpi-genesis 189 prodp + size + hsize, hsize);
mpi-genesis 190
mpi-genesis 191 /* Add product M (if NEGFLG M is a negative number) */
mpi-genesis 192 if(negflg)
mpi-genesis 193 cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
mpi-genesis 194 else
mpi-genesis 195 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
mpi-genesis 196
mpi-genesis 197 /* Product L. ________________ ________________
mpi-genesis 198 * |________________||____U0 x V0_____|
mpi-genesis 199 * Read temporary operands from low part of PROD.
mpi-genesis 200 * Put result in low part of TSPACE using upper part of TSPACE
mpi-genesis 201 * as new TSPACE.
mpi-genesis 202 */
mpi-genesis 203 MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
mpi-genesis 204
mpi-genesis 205 /* Add/copy Product L (twice) */
mpi-genesis 206
mpi-genesis 207 cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
mpi-genesis 208 if( cy )
mpi-genesis 209 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
mpi-genesis 210
mpi-genesis 211 MPN_COPY(prodp, tspace, hsize);
mpi-genesis 212 cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
mpi-genesis 213 if( cy )
mpi-genesis 214 mpihelp_add_1(prodp + size, prodp + size, size, 1);
mpi-genesis 215 }
mpi-genesis 216 }
mpi-genesis 217
mpi-genesis 218
mpi-genesis 219 void
mpi-genesis 220 mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
mpi-genesis 221 {
mpi-genesis 222 mpi_size_t i;
mpi-genesis 223 mpi_limb_t cy_limb;
mpi-genesis 224 mpi_limb_t v_limb;
mpi-genesis 225
mpi-genesis 226 /* Multiply by the first limb in V separately, as the result can be
mpi-genesis 227 * stored (not added) to PROD. We also avoid a loop for zeroing. */
mpi-genesis 228 v_limb = up[0];
mpi-genesis 229 if( v_limb <= 1 ) {
mpi-genesis 230 if( v_limb == 1 )
mpi-genesis 231 MPN_COPY( prodp, up, size );
mpi-genesis 232 else
mpi-genesis 233 MPN_ZERO(prodp, size);
mpi-genesis 234 cy_limb = 0;
mpi-genesis 235 }
mpi-genesis 236 else
mpi-genesis 237 cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
mpi-genesis 238
mpi-genesis 239 prodp[size] = cy_limb;
mpi-genesis 240 prodp++;
mpi-genesis 241
mpi-genesis 242 /* For each iteration in the outer loop, multiply one limb from
mpi-genesis 243 * U with one limb from V, and add it to PROD. */
mpi-genesis 244 for( i=1; i < size; i++) {
mpi-genesis 245 v_limb = up[i];
mpi-genesis 246 if( v_limb <= 1 ) {
mpi-genesis 247 cy_limb = 0;
mpi-genesis 248 if( v_limb == 1 )
mpi-genesis 249 cy_limb = mpihelp_add_n(prodp, prodp, up, size);
mpi-genesis 250 }
mpi-genesis 251 else
mpi-genesis 252 cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
mpi-genesis 253
mpi-genesis 254 prodp[size] = cy_limb;
mpi-genesis 255 prodp++;
mpi-genesis 256 }
mpi-genesis 257 }
mpi-genesis 258
mpi-genesis 259
mpi-genesis 260 void
mpi-genesis 261 mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
mpi-genesis 262 {
mpi-genesis 263 if( size & 1 ) {
mpi-genesis 264 /* The size is odd, and the code below doesn't handle that.
mpi-genesis 265 * Multiply the least significant (size - 1) limbs with a recursive
mpi-genesis 266 * call, and handle the most significant limb of S1 and S2
mpi-genesis 267 * separately.
mpi-genesis 268 * A slightly faster way to do this would be to make the Karatsuba
mpi-genesis 269 * code below behave as if the size were even, and let it check for
mpi-genesis 270 * odd size in the end. I.e., in essence move this code to the end.
mpi-genesis 271 * Doing so would save us a recursive call, and potentially make the
mpi-genesis 272 * stack grow a lot less.
mpi-genesis 273 */
mpi-genesis 274 mpi_size_t esize = size - 1; /* even size */
mpi-genesis 275 mpi_limb_t cy_limb;
mpi-genesis 276
mpi-genesis 277 MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
mpi-genesis 278 cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
mpi-genesis 279 prodp[esize + esize] = cy_limb;
mpi-genesis 280 cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
mpi-genesis 281
mpi-genesis 282 prodp[esize + size] = cy_limb;
mpi-genesis 283 }
mpi-genesis 284 else {
mpi-genesis 285 mpi_size_t hsize = size >> 1;
mpi-genesis 286 mpi_limb_t cy;
mpi-genesis 287
mpi-genesis 288 /* Product H. ________________ ________________
mpi-genesis 289 * |_____U1 x U1____||____U0 x U0_____|
mpi-genesis 290 * Put result in upper part of PROD and pass low part of TSPACE
mpi-genesis 291 * as new TSPACE.
mpi-genesis 292 */
mpi-genesis 293 MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
mpi-genesis 294
mpi-genesis 295 /* Product M. ________________
mpi-genesis 296 * |_(U1-U0)(U0-U1)_|
mpi-genesis 297 */
mpi-genesis 298 if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
mpi-genesis 299 mpihelp_sub_n( prodp, up + hsize, up, hsize);
mpi-genesis 300 else
mpi-genesis 301 mpihelp_sub_n (prodp, up, up + hsize, hsize);
mpi-genesis 302
mpi-genesis 303 /* Read temporary operands from low part of PROD.
mpi-genesis 304 * Put result in low part of TSPACE using upper part of TSPACE
mpi-genesis 305 * as new TSPACE. */
mpi-genesis 306 MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
mpi-genesis 307
mpi-genesis 308 /* Add/copy product H */
mpi-genesis 309 MPN_COPY(prodp + hsize, prodp + size, hsize);
mpi-genesis 310 cy = mpihelp_add_n(prodp + size, prodp + size,
mpi-genesis 311 prodp + size + hsize, hsize);
mpi-genesis 312
mpi-genesis 313 /* Add product M (if NEGFLG M is a negative number). */
mpi-genesis 314 cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
mpi-genesis 315
mpi-genesis 316 /* Product L. ________________ ________________
mpi-genesis 317 * |________________||____U0 x U0_____|
mpi-genesis 318 * Read temporary operands from low part of PROD.
mpi-genesis 319 * Put result in low part of TSPACE using upper part of TSPACE
mpi-genesis 320 * as new TSPACE. */
mpi-genesis 321 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
mpi-genesis 322
mpi-genesis 323 /* Add/copy Product L (twice). */
mpi-genesis 324 cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
mpi-genesis 325 if( cy )
mpi-genesis 326 mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
mpi-genesis 327 hsize, cy);
mpi-genesis 328
mpi-genesis 329 MPN_COPY(prodp, tspace, hsize);
mpi-genesis 330 cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
mpi-genesis 331 if( cy )
mpi-genesis 332 mpihelp_add_1 (prodp + size, prodp + size, size, 1);
mpi-genesis 333 }
mpi-genesis 334 }
mpi-genesis 335
mpi-genesis 336
mpi-genesis 337 /* This should be made into an inline function in gmp.h. */
mpi-genesis 338 void
mpi-genesis 339 mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
mpi-genesis 340 {
mpi-genesis 341 int secure;
mpi-genesis 342
mpi-genesis 343 if( up == vp ) {
mpi-genesis 344 if( size < KARATSUBA_THRESHOLD )
mpi-genesis 345 mpih_sqr_n_basecase( prodp, up, size );
mpi-genesis 346 else {
mpi-genesis 347 mpi_ptr_t tspace;
mpi-genesis 348 secure = m_is_secure( up );
mpi-genesis 349 tspace = mpi_alloc_limb_space( 2 * size, secure );
mpi-genesis 350 mpih_sqr_n( prodp, up, size, tspace );
mpi-genesis 351 mpi_free_limb_space( tspace );
mpi-genesis 352 }
mpi-genesis 353 }
mpi-genesis 354 else {
mpi-genesis 355 if( size < KARATSUBA_THRESHOLD )
mpi-genesis 356 mul_n_basecase( prodp, up, vp, size );
mpi-genesis 357 else {
mpi-genesis 358 mpi_ptr_t tspace;
mpi-genesis 359 secure = m_is_secure( up ) || m_is_secure( vp );
mpi-genesis 360 tspace = mpi_alloc_limb_space( 2 * size, secure );
mpi-genesis 361 mul_n (prodp, up, vp, size, tspace);
mpi-genesis 362 mpi_free_limb_space( tspace );
mpi-genesis 363 }
mpi-genesis 364 }
mpi-genesis 365 }
mpi-genesis 366
mpi-genesis 367
mpi-genesis 368
mpi-genesis 369 void
mpi-genesis 370 mpihelp_mul_karatsuba_case( mpi_ptr_t prodp,
mpi-genesis 371 mpi_ptr_t up, mpi_size_t usize,
mpi-genesis 372 mpi_ptr_t vp, mpi_size_t vsize,
mpi-genesis 373 struct karatsuba_ctx *ctx )
mpi-genesis 374 {
mpi-genesis 375 mpi_limb_t cy;
mpi-genesis 376
mpi-genesis 377 if( !ctx->tspace || ctx->tspace_size < vsize ) {
mpi-genesis 378 if( ctx->tspace )
mpi-genesis 379 mpi_free_limb_space( ctx->tspace );
mpi-genesis 380 ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
mpi-genesis 381 m_is_secure( up ) || m_is_secure( vp ) );
mpi-genesis 382 ctx->tspace_size = vsize;
mpi-genesis 383 }
mpi-genesis 384
mpi-genesis 385 MPN_MUL_N_RECURSE( prodp, up, vp, vsize, ctx->tspace );
mpi-genesis 386
mpi-genesis 387 prodp += vsize;
mpi-genesis 388 up += vsize;
mpi-genesis 389 usize -= vsize;
mpi-genesis 390 if( usize >= vsize ) {
mpi-genesis 391 if( !ctx->tp || ctx->tp_size < vsize ) {
mpi-genesis 392 if( ctx->tp )
mpi-genesis 393 mpi_free_limb_space( ctx->tp );
mpi-genesis 394 ctx->tp = mpi_alloc_limb_space( 2 * vsize, m_is_secure( up )
mpi-genesis 395 || m_is_secure( vp ) );
mpi-genesis 396 ctx->tp_size = vsize;
mpi-genesis 397 }
mpi-genesis 398
mpi-genesis 399 do {
mpi-genesis 400 MPN_MUL_N_RECURSE( ctx->tp, up, vp, vsize, ctx->tspace );
mpi-genesis 401 cy = mpihelp_add_n( prodp, prodp, ctx->tp, vsize );
mpi-genesis 402 mpihelp_add_1( prodp + vsize, ctx->tp + vsize, vsize, cy );
mpi-genesis 403 prodp += vsize;
mpi-genesis 404 up += vsize;
mpi-genesis 405 usize -= vsize;
mpi-genesis 406 } while( usize >= vsize );
mpi-genesis 407 }
mpi-genesis 408
mpi-genesis 409 if( usize ) {
mpi-genesis 410 if( usize < KARATSUBA_THRESHOLD ) {
mpi-genesis 411 mpihelp_mul( ctx->tspace, vp, vsize, up, usize );
mpi-genesis 412 }
mpi-genesis 413 else {
mpi-genesis 414 if( !ctx->next ) {
mpi-genesis 415 ctx->next = xmalloc_clear( sizeof *ctx );
mpi-genesis 416 }
mpi-genesis 417 mpihelp_mul_karatsuba_case( ctx->tspace,
mpi-genesis 418 vp, vsize,
mpi-genesis 419 up, usize,
mpi-genesis 420 ctx->next );
mpi-genesis 421 }
mpi-genesis 422
mpi-genesis 423 cy = mpihelp_add_n( prodp, prodp, ctx->tspace, vsize);
mpi-genesis 424 mpihelp_add_1( prodp + vsize, ctx->tspace + vsize, usize, cy );
mpi-genesis 425 }
mpi-genesis 426 }
mpi-genesis 427
mpi-genesis 428
mpi-genesis 429 void
mpi-genesis 430 mpihelp_release_karatsuba_ctx( struct karatsuba_ctx *ctx )
mpi-genesis 431 {
mpi-genesis 432 struct karatsuba_ctx *ctx2;
mpi-genesis 433
mpi-genesis 434 if( ctx->tp )
mpi-genesis 435 mpi_free_limb_space( ctx->tp );
mpi-genesis 436 if( ctx->tspace )
mpi-genesis 437 mpi_free_limb_space( ctx->tspace );
mpi-genesis 438 for( ctx=ctx->next; ctx; ctx = ctx2 ) {
mpi-genesis 439 ctx2 = ctx->next;
mpi-genesis 440 if( ctx->tp )
mpi-genesis 441 mpi_free_limb_space( ctx->tp );
mpi-genesis 442 if( ctx->tspace )
mpi-genesis 443 mpi_free_limb_space( ctx->tspace );
mpi-genesis 444 xfree( ctx );
mpi-genesis 445 }
mpi-genesis 446 }
mpi-genesis 447
mpi-genesis 448 /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
mpi-genesis 449 * and v (pointed to by VP, with VSIZE limbs), and store the result at
mpi-genesis 450 * PRODP. USIZE + VSIZE limbs are always stored, but if the input
mpi-genesis 451 * operands are normalized. Return the most significant limb of the
mpi-genesis 452 * result.
mpi-genesis 453 *
mpi-genesis 454 * NOTE: The space pointed to by PRODP is overwritten before finished
mpi-genesis 455 * with U and V, so overlap is an error.
mpi-genesis 456 *
mpi-genesis 457 * Argument constraints:
mpi-genesis 458 * 1. USIZE >= VSIZE.
mpi-genesis 459 * 2. PRODP != UP and PRODP != VP, i.e. the destination
mpi-genesis 460 * must be distinct from the multiplier and the multiplicand.
mpi-genesis 461 */
mpi-genesis 462
mpi-genesis 463 mpi_limb_t
mpi-genesis 464 mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
mpi-genesis 465 mpi_ptr_t vp, mpi_size_t vsize)
mpi-genesis 466 {
mpi-genesis 467 mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
mpi-genesis 468 mpi_limb_t cy;
mpi-genesis 469 struct karatsuba_ctx ctx;
mpi-genesis 470
mpi-genesis 471 if( vsize < KARATSUBA_THRESHOLD ) {
mpi-genesis 472 mpi_size_t i;
mpi-genesis 473 mpi_limb_t v_limb;
mpi-genesis 474
mpi-genesis 475 if( !vsize )
mpi-genesis 476 return 0;
mpi-genesis 477
mpi-genesis 478 /* Multiply by the first limb in V separately, as the result can be
mpi-genesis 479 * stored (not added) to PROD. We also avoid a loop for zeroing. */
mpi-genesis 480 v_limb = vp[0];
mpi-genesis 481 if( v_limb <= 1 ) {
mpi-genesis 482 if( v_limb == 1 )
mpi-genesis 483 MPN_COPY( prodp, up, usize );
mpi-genesis 484 else
mpi-genesis 485 MPN_ZERO( prodp, usize );
mpi-genesis 486 cy = 0;
mpi-genesis 487 }
mpi-genesis 488 else
mpi-genesis 489 cy = mpihelp_mul_1( prodp, up, usize, v_limb );
mpi-genesis 490
mpi-genesis 491 prodp[usize] = cy;
mpi-genesis 492 prodp++;
mpi-genesis 493
mpi-genesis 494 /* For each iteration in the outer loop, multiply one limb from
mpi-genesis 495 * U with one limb from V, and add it to PROD. */
mpi-genesis 496 for( i = 1; i < vsize; i++ ) {
mpi-genesis 497 v_limb = vp[i];
mpi-genesis 498 if( v_limb <= 1 ) {
mpi-genesis 499 cy = 0;
mpi-genesis 500 if( v_limb == 1 )
mpi-genesis 501 cy = mpihelp_add_n(prodp, prodp, up, usize);
mpi-genesis 502 }
mpi-genesis 503 else
mpi-genesis 504 cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
mpi-genesis 505
mpi-genesis 506 prodp[usize] = cy;
mpi-genesis 507 prodp++;
mpi-genesis 508 }
mpi-genesis 509
mpi-genesis 510 return cy;
mpi-genesis 511 }
mpi-genesis 512
mpi-genesis 513 memset( &ctx, 0, sizeof ctx );
mpi-genesis 514 mpihelp_mul_karatsuba_case( prodp, up, usize, vp, vsize, &ctx );
mpi-genesis 515 mpihelp_release_karatsuba_ctx( &ctx );
mpi-genesis 516 return *prod_endp;
mpi-genesis 517 }
mpi-genesis 518
mpi-genesis 519