raw
keksum_genesis_3        1 /* KECCAK permutation and sponge construction
keksum_genesis_3 2 * J. Welsh, May 2019
keksum_genesis_3 3 *
keksum_genesis_3 4 * Based on Guido Bertoni, Joan Daemen, Michael Peeters and Gilles van Assche
keksum_genesis_3 5 * (2011): The KECCAK reference, Version 3.0.
keksum_genesis_3 6 */
keksum_genesis_3 7
keksum_genesis_3 8 #include <stdint.h>
keksum_genesis_3 9 #include "io.h"
keksum_genesis_3 10
keksum_genesis_3 11 /* State is arranged in a 5 x 5 x W bit array, indexed as (x,y,z).
keksum_genesis_3 12 * The 1D section for a given (x,y) is called a lane and packed in a lane_t.
keksum_genesis_3 13 * The 1D section for a given (x,z) is called a column.
keksum_genesis_3 14 * The 2D section for a given z is called a slice.
keksum_genesis_3 15 */
keksum_genesis_3 16
keksum_genesis_3 17 typedef uint64_t lane_t;
keksum_genesis_3 18 #define L 6
keksum_genesis_3 19 #define W (1 << L) /* lane size = 64 bits */
keksum_genesis_3 20 #define B 25*W /* permutation width = 1600 bits */
keksum_genesis_3 21 /* we require L be at least 3 (W=8, B=200) to avoid sub-byte lane size */
keksum_genesis_3 22
keksum_genesis_3 23 #define ROUNDS (12 + 2*L)
keksum_genesis_3 24
keksum_genesis_3 25 #define mask(x) (((unsigned) (x)) & (W-1))
keksum_genesis_3 26 #define rot(v,r) (v = v<<mask(r) | v>>mask(W-mask(r)))
keksum_genesis_3 27
keksum_genesis_3 28 static lane_t state[5][5];
keksum_genesis_3 29 /* physical storage order can be swapped as a performance tweak */
keksum_genesis_3 30 #define a(x,y) (state[x][y])
keksum_genesis_3 31
keksum_genesis_3 32 /* Galois linear feedback shift register, output in low bit:
keksum_genesis_3 33 * (x^t mod x^8 + x^6 + x^5 + x^4 + 1) mod x in GF(2)(x)
keksum_genesis_3 34 * (I'd forgotten my grade-school algebra but the Handbook of Applied
keksum_genesis_3 35 * Cryptography plus http://archive.is/hhvKKJ (nayuki.io) and some written
keksum_genesis_3 36 * exercises cleared this up for me.) */
keksum_genesis_3 37 static uint8_t lfsr_step(uint8_t rstate) {
keksum_genesis_3 38 /* polynomial xor'd in when high (overflowing) bit is set */
keksum_genesis_3 39 uint8_t gate = ((int8_t) rstate) >> 7;
keksum_genesis_3 40 return (rstate << 1) ^ (gate & 0x71); /* bits 6,5,4,0 */
keksum_genesis_3 41 }
keksum_genesis_3 42
keksum_genesis_3 43 /* Column parity diffusion */
keksum_genesis_3 44 static void theta(void) {
keksum_genesis_3 45 /* LUT for x+1 mod 5 (not used on secret bits!) */
keksum_genesis_3 46 uint8_t const inc_mod5[8] = {1,2,3,4,0,1,2,3};
keksum_genesis_3 47 unsigned x, y;
keksum_genesis_3 48 lane_t c[5], d;
keksum_genesis_3 49
keksum_genesis_3 50 for (x=0; x<5; ++x)
keksum_genesis_3 51 c[x] = a(x,0) ^ a(x,1) ^ a(x,2) ^ a(x,3) ^ a(x,4);
keksum_genesis_3 52
keksum_genesis_3 53 for (x=0; x<5; ++x) {
keksum_genesis_3 54 d = c[inc_mod5[x]];
keksum_genesis_3 55 rot(d,1);
keksum_genesis_3 56 d ^= c[inc_mod5[x+3]]; /* x+3+1 == x-1 mod 5 */
keksum_genesis_3 57 for (y=0; y<5; ++y)
keksum_genesis_3 58 a(x,y) ^= d;
keksum_genesis_3 59 }
keksum_genesis_3 60 }
keksum_genesis_3 61
keksum_genesis_3 62 /* Inter-slice dispersion */
keksum_genesis_3 63 static void rho(void) {
keksum_genesis_3 64 /* precomputed offsets from Table 2.1 */
keksum_genesis_3 65 rot(a(0,1), 36);
keksum_genesis_3 66 rot(a(0,2), 3);
keksum_genesis_3 67 rot(a(0,3), 105);
keksum_genesis_3 68 rot(a(0,4), 210);
keksum_genesis_3 69
keksum_genesis_3 70 rot(a(1,0), 1);
keksum_genesis_3 71 rot(a(1,1), 300);
keksum_genesis_3 72 rot(a(1,2), 10);
keksum_genesis_3 73 rot(a(1,3), 45);
keksum_genesis_3 74 rot(a(1,4), 66);
keksum_genesis_3 75
keksum_genesis_3 76 rot(a(2,0), 190);
keksum_genesis_3 77 rot(a(2,1), 6);
keksum_genesis_3 78 rot(a(2,2), 171);
keksum_genesis_3 79 rot(a(2,3), 15);
keksum_genesis_3 80 rot(a(2,4), 253);
keksum_genesis_3 81
keksum_genesis_3 82 rot(a(3,0), 28);
keksum_genesis_3 83 rot(a(3,1), 55);
keksum_genesis_3 84 rot(a(3,2), 153);
keksum_genesis_3 85 rot(a(3,3), 21);
keksum_genesis_3 86 rot(a(3,4), 120);
keksum_genesis_3 87
keksum_genesis_3 88 rot(a(4,0), 91);
keksum_genesis_3 89 rot(a(4,1), 276);
keksum_genesis_3 90 rot(a(4,2), 231);
keksum_genesis_3 91 rot(a(4,3), 136);
keksum_genesis_3 92 rot(a(4,4), 78);
keksum_genesis_3 93 }
keksum_genesis_3 94
keksum_genesis_3 95 /* Slicewise transposition by matrix
keksum_genesis_3 96 * [ 0 1 ]
keksum_genesis_3 97 * [ 2 3 ]
keksum_genesis_3 98 * For all indices x, y, z:
keksum_genesis_3 99 * a'(y, 2x+3y mod 5, z) = a(x, y, z)
keksum_genesis_3 100 */
keksum_genesis_3 101 static void pi(void) {
keksum_genesis_3 102 /* shuffle in-place by swapping alternating temporaries with successive
keksum_genesis_3 103 * destinations */
keksum_genesis_3 104 lane_t c, d;
keksum_genesis_3 105 #define even(x,y) (c=a(x,y), a(x,y)=d)
keksum_genesis_3 106 #define odd(x,y) (d=a(x,y), a(x,y)=c)
keksum_genesis_3 107 /* a[0][0] doesn't move */
keksum_genesis_3 108 d=a(0,1); /* 2*0 + 3*1 = 3 -> 3 */
keksum_genesis_3 109 even(1,3); /* 2+9=11 -> 1 */
keksum_genesis_3 110 odd (3,1); /* 6+3=9 -> 4 */
keksum_genesis_3 111 even(1,4); /* 2+12=14 */
keksum_genesis_3 112 odd (4,4); /* 8+12=20 */
keksum_genesis_3 113 even(4,0); /* 8+0=8 */
keksum_genesis_3 114 odd (0,3); /* 0+9=9 */
keksum_genesis_3 115 even(3,4); /* 6+12=18 */
keksum_genesis_3 116 odd (4,3); /* 8+9=17 */
keksum_genesis_3 117 even(3,2); /* 6+6=12 */
keksum_genesis_3 118 odd (2,2); /* 4+6=10 */
keksum_genesis_3 119 even(2,0); /* 4+0=4 */
keksum_genesis_3 120 odd (0,4); /* 0+12=12 */
keksum_genesis_3 121 even(4,2); /* 8+6=14 */
keksum_genesis_3 122 odd (2,4); /* 4+12=16 */
keksum_genesis_3 123 even(4,1); /* 8+3=11 */
keksum_genesis_3 124 odd (1,1); /* 2+3=5 */
keksum_genesis_3 125 even(1,0); /* 2+0=2 */
keksum_genesis_3 126 odd (0,2); /* 0+6=6 */
keksum_genesis_3 127 even(2,1); /* 4+3=7 */
keksum_genesis_3 128 odd (1,2); /* 2+6=8 */
keksum_genesis_3 129 even(2,3); /* 4+9=13 */
keksum_genesis_3 130 odd (3,3); /* 6+9=15 */
keksum_genesis_3 131 even(3,0); /* 6+0=6 */
keksum_genesis_3 132 a(0,1)=c; /* And we're back home in 24 steps, lovely! */
keksum_genesis_3 133 }
keksum_genesis_3 134
keksum_genesis_3 135 /* Nonlinear row mapping */
keksum_genesis_3 136 static void chi(void) {
keksum_genesis_3 137 unsigned y;
keksum_genesis_3 138 for (y=0; y<5; ++y) {
keksum_genesis_3 139 lane_t c0=a(0,y), c1=a(1,y), c2=a(2,y), c3=a(3,y), c4=a(4,y);
keksum_genesis_3 140
keksum_genesis_3 141 a(0,y) = c0 ^ (~c1 & c2);
keksum_genesis_3 142 a(1,y) = c1 ^ (~c2 & c3);
keksum_genesis_3 143 a(2,y) = c2 ^ (~c3 & c4);
keksum_genesis_3 144 a(3,y) = c3 ^ (~c4 & c0);
keksum_genesis_3 145 a(4,y) = c4 ^ (~c0 & c1);
keksum_genesis_3 146 }
keksum_genesis_3 147 }
keksum_genesis_3 148
keksum_genesis_3 149 /* Round constants for disrupting z symmetry
keksum_genesis_3 150 *
keksum_genesis_3 151 * Mix LFSR bitstream sequentially into first lane with z index given by
keksum_genesis_3 152 * 2^j - 1
keksum_genesis_3 153 * LFSR advances 7 steps per round regardless of how many "j" are needed to
keksum_genesis_3 154 * fill the lane, corresponding to the maximum 64-bit lane size.
keksum_genesis_3 155 */
keksum_genesis_3 156 static uint8_t iota(uint8_t rstate) {
keksum_genesis_3 157 unsigned j;
keksum_genesis_3 158 for (j=0; j<=L; ++j) {
keksum_genesis_3 159 a(0,0) ^= ((lane_t) (rstate & 1)) << ((1 << j) - 1);
keksum_genesis_3 160 rstate = lfsr_step(rstate);
keksum_genesis_3 161 }
keksum_genesis_3 162 for (; j<7; ++j)
keksum_genesis_3 163 rstate = lfsr_step(rstate);
keksum_genesis_3 164 return rstate;
keksum_genesis_3 165 }
keksum_genesis_3 166
keksum_genesis_3 167 #ifdef TEST
keksum_genesis_3 168
keksum_genesis_3 169 #include <stdio.h>
keksum_genesis_3 170 #include "testvectors.h"
keksum_genesis_3 171
keksum_genesis_3 172 static int test_fail;
keksum_genesis_3 173 static unsigned test, test_step;
keksum_genesis_3 174
keksum_genesis_3 175 static void checkpoint(unsigned round, char const *step) {
keksum_genesis_3 176 int fail = 0;
keksum_genesis_3 177 unsigned x, y, i=0;
keksum_genesis_3 178 printf("test=%u round=%u %s ", test, round, step);
keksum_genesis_3 179 for (y=0; y<5; ++y)
keksum_genesis_3 180 for (x=0; x<5; ++x, ++i) {
keksum_genesis_3 181 assert(i<25);
keksum_genesis_3 182 assert(test_step<TEST_STEPS);
keksum_genesis_3 183 if (a(x,y) != tests[test][test_step][i]) {
keksum_genesis_3 184 test_fail = fail = 1;
keksum_genesis_3 185 printf("FAIL x=%u y=%u (0x%lx != 0x%lx)\n",
keksum_genesis_3 186 x, y, a(x,y),
keksum_genesis_3 187 tests[test][test_step][i]);
keksum_genesis_3 188 }
keksum_genesis_3 189 }
keksum_genesis_3 190 if (!fail) puts("PASS");
keksum_genesis_3 191 ++test_step;
keksum_genesis_3 192 }
keksum_genesis_3 193 #else
keksum_genesis_3 194 #define checkpoint(r,s)
keksum_genesis_3 195 #endif
keksum_genesis_3 196
keksum_genesis_3 197 static void keccakf(void) {
keksum_genesis_3 198 unsigned round;
keksum_genesis_3 199 uint8_t rstate=1;
keksum_genesis_3 200 for (round=0; round<ROUNDS; ++round) {
keksum_genesis_3 201 theta(); checkpoint(round,"theta");
keksum_genesis_3 202 rho(); checkpoint(round,"rho");
keksum_genesis_3 203 pi(); checkpoint(round,"pi");
keksum_genesis_3 204 chi(); checkpoint(round,"chi");
keksum_genesis_3 205 rstate = iota(rstate); checkpoint(round,"iota");
keksum_genesis_3 206 }
keksum_genesis_3 207 }
keksum_genesis_3 208
keksum_genesis_3 209 static void reset(void) {
keksum_genesis_3 210 unsigned x, y;
keksum_genesis_3 211 for (x=0; x<5; ++x)
keksum_genesis_3 212 for (y=0; y<5; ++y)
keksum_genesis_3 213 a(x,y) = 0;
keksum_genesis_3 214 }
keksum_genesis_3 215
keksum_genesis_3 216 static void load(uint8_t const *block, unsigned len) {
keksum_genesis_3 217 /* Keeping bytes in lane little-endian, independent of machine. This
keksum_genesis_3 218 * harmonizes with (rumored) Keccak convention for interpretation of
keksum_genesis_3 219 * octet stream as bitstream. For a big-endian interpretation we could
keksum_genesis_3 220 * flip the byte load/store order and the direction of rotations.
keksum_genesis_3 221 *
keksum_genesis_3 222 * Ordering of lanes is x-major due to the specified mapping of
keksum_genesis_3 223 * bitstring to state coordinates: s[w(5y+x)+z] = a[x][y][z] */
keksum_genesis_3 224 unsigned x, y, z, i=0;
keksum_genesis_3 225 for (y=0; y<5; ++y)
keksum_genesis_3 226 for (x=0; x<5; ++x)
keksum_genesis_3 227 for (z=0; z!=W; z+=8, ++i)
keksum_genesis_3 228 if (i==len) return;
keksum_genesis_3 229 else a(x,y) ^= ((lane_t) block[i]) << z;
keksum_genesis_3 230 }
keksum_genesis_3 231
keksum_genesis_3 232 static void store_hex(char *buf, unsigned block_len) {
keksum_genesis_3 233 static char const hex[] = "0123456789abcdef";
keksum_genesis_3 234 unsigned x, y, z, len = 2*block_len, i = 0;
keksum_genesis_3 235 for (y=0; y<5; ++y)
keksum_genesis_3 236 for (x=0; x<5; ++x)
keksum_genesis_3 237 for (z=0; z!=W; z+=8) {
keksum_genesis_3 238 uint8_t byte;
keksum_genesis_3 239 if (i==len) return;
keksum_genesis_3 240 byte = a(x,y) >> z;
keksum_genesis_3 241 /* Digits within the byte rendered big-endian,
keksum_genesis_3 242 * matching common practice but unfortunately
keksum_genesis_3 243 * not bitstream order. */
keksum_genesis_3 244 buf[i++] = hex[byte>>4];
keksum_genesis_3 245 buf[i++] = hex[byte&15];
keksum_genesis_3 246 }
keksum_genesis_3 247 }
keksum_genesis_3 248
keksum_genesis_3 249 static void extract(unsigned len) {
keksum_genesis_3 250 static char buf[2*B/8];
keksum_genesis_3 251 assert(2*len <= sizeof buf);
keksum_genesis_3 252 store_hex(buf,len);
keksum_genesis_3 253 write_all(1,buf,2*len);
keksum_genesis_3 254 }
keksum_genesis_3 255
keksum_genesis_3 256 /* Stream octets from stdin until EOF, then write hex to stdout. */
keksum_genesis_3 257 void sponge(unsigned capacity, size_t out_bits) {
keksum_genesis_3 258 static uint8_t buf[B/8];
keksum_genesis_3 259 unsigned rate = B - capacity,
keksum_genesis_3 260 len = rate/8,
keksum_genesis_3 261 n;
keksum_genesis_3 262 size_t out_len = out_bits/8;
keksum_genesis_3 263 assert(W == 8*sizeof(lane_t));
keksum_genesis_3 264 assert(0 < rate && rate < B);
keksum_genesis_3 265 assert(!(rate%8));
keksum_genesis_3 266 assert(len <= sizeof buf);
keksum_genesis_3 267
keksum_genesis_3 268 /* absorb */
keksum_genesis_3 269 reset();
keksum_genesis_3 270 while ((n = read_all(0,buf,len)) == len) {
keksum_genesis_3 271 load(buf,len);
keksum_genesis_3 272 keccakf();
keksum_genesis_3 273 }
keksum_genesis_3 274
keksum_genesis_3 275 /* pad (1000...1) */
keksum_genesis_3 276 if (n < len-1) {
keksum_genesis_3 277 buf[n] = 0x01; /* first bit (little-endian) */
keksum_genesis_3 278 for (++n; n < len-1; ++n)
keksum_genesis_3 279 buf[n] = 0;
keksum_genesis_3 280 buf[n] = 0x80; /* last bit (little-endian) */
keksum_genesis_3 281 }
keksum_genesis_3 282 else {
keksum_genesis_3 283 assert(n == len-1);
keksum_genesis_3 284 buf[n] = 0x81; /* first and last bits */
keksum_genesis_3 285 }
keksum_genesis_3 286 load(buf,len);
keksum_genesis_3 287 keccakf();
keksum_genesis_3 288
keksum_genesis_3 289 /* squeeze */
keksum_genesis_3 290 for (; out_len > len; out_len -= len) {
keksum_genesis_3 291 extract(len);
keksum_genesis_3 292 keccakf();
keksum_genesis_3 293 }
keksum_genesis_3 294 extract(out_len);
keksum_genesis_3 295 }
keksum_genesis_3 296
keksum_genesis_3 297 #ifdef TEST
keksum_genesis_3 298 int main() {
keksum_genesis_3 299 for (test=0; test<NTESTS; ++test) {
keksum_genesis_3 300 unsigned x, y, i=0;
keksum_genesis_3 301 /* initial state */
keksum_genesis_3 302 for (y=0; y<5; ++y)
keksum_genesis_3 303 for (x=0; x<5; ++x, ++i) {
keksum_genesis_3 304 assert(i<25);
keksum_genesis_3 305 a(x,y) = tests[test][0][i];
keksum_genesis_3 306 }
keksum_genesis_3 307 test_step = 1;
keksum_genesis_3 308 keccakf();
keksum_genesis_3 309 }
keksum_genesis_3 310 return test_fail;
keksum_genesis_3 311 }
keksum_genesis_3 312 #endif