#!/usr/bin/env python3

import random
import math
import os

def save(fn,x):
  if os.path.exists(fn):
    with open(fn) as f:
      cur = f.read()
    if cur == x: return
  with open(fn,'w') as f:
    f.write(x)

# ----- Python versions of the subroutines

def poly1305_chunks(m):
  m = list(m)
  L = len(m)
  q = math.ceil(L/16)
  c = {}
  for i in range(1,math.floor(L/16)+1):
    c[i] = sum(m[16*i-16+j]<<(8*j) for j in range(16))+(1<<128)
  if L%16 != 0:
    assert q not in c
    c[q] = sum(m[16*q-16+j]<<(8*j) for j in range(L%16))+(1<<(8*(L%16)))
  return c

def poly1305_chunks_v2(m): # "In other words" from the paper
  m = list(m)
  c = {}
  while len(m) > 0:
    if len(m) >= 16:
      ci,m = m[:16],m[16:]
      ci += [1]
    else:
      ci,m = m+[1],[]
      while len(ci) < 17: ci += [0]
    ci = sum(ci[j]<<(8*j) for j in range(17))
    c[len(c)+1] = ci
  return c

def poly1305(m,k):
  m = list(m)
  k = list(k)
  assert len(k) == 32
  r,s = k[:16],k[16:]
  for j in 3,7,11,15: r[j] %= 16
  for j in 4,8,12: r[j] &= ~3
  r = sum(r[j]<<(8*j) for j in range(16))
  s = sum(s[j]<<(8*j) for j in range(16))

  c = poly1305_chunks(m)
  assert c == poly1305_chunks_v2(m)
  
  L = len(m)
  q = math.ceil(L/16)
  assert sorted(c) == list(range(1,q+1))

  p = (1<<130)-5
  a = sum(c[j]*pow(r,q+1-j,p) for j in range(1,q+1))
  a %= p
  a = (a+s)%(1<<128)
  a = [255&(a>>(8*i)) for i in range(16)]

  return {'a':a,'m':m,'k':k}

# ----- precomputed test vectors

precomputed = {}

def precompute():
  global precomputed

  numtests = 100
  random.seed(f'onetimeauth')
  messagelengths = [random.randrange(1024) for loop in range(numtests)]
  keys = [[random.randrange(256) for j in range(32)] for loop in range(numtests)]
  messages = [[random.randrange(256) for j in range(messagelengths[loop])] for loop in range(numtests)]
  precomputed['onetimeauth',f'poly1305'] = [poly1305(m,k) for m,k in zip(messages,keys)]

  numedgetests = 16
  keys = [[loop]+[0]*31 for loop in range(numedgetests)]
  keys += [[255-loop]+[255]*15+[0]*16 for loop in range(numedgetests)] # will trigger clamping
  messages = [[255]*16 for loop in range(numedgetests*2)]
  precomputed['onetimeauth',f'poly1305'] += [poly1305(m,k) for m,k in zip(messages,keys)]

precompute()

# ----- generating test program

H = ['''\
#ifndef lib1305_test_h
#define lib1305_test_h

#define aligned lib1305_test_aligned
#define callocplus lib1305_test_callocplus
#define checksum lib1305_test_checksum
#define checksum_clear lib1305_test_checksum_clear
#define checksum_expected lib1305_test_checksum_expected
#define double_canary lib1305_test_double_canary
#define endianness lib1305_test_endianness
#define forked lib1305_test_forked
#define input_compare lib1305_test_input_compare
#define input_prepare lib1305_test_input_prepare
#define myrandom lib1305_test_myrandom
#define ok lib1305_test_ok
#define output_compare lib1305_test_output_compare
#define output_prepare lib1305_test_output_prepare
#define public lib1305_test_public
#define secret lib1305_test_secret
#define targeti lib1305_test_targeti
#define targetn lib1305_test_targetn
#define targetoffset lib1305_test_targetoffset
#define targeto lib1305_test_targeto
#define targetp lib1305_test_targetp
#define valgrind lib1305_test_valgrind

extern const char *targeto;
extern const char *targetp;
extern const char *targeti;
extern const char *targetn;
extern const char *targetoffset;
extern int ok;
extern int valgrind;

extern unsigned long long myrandom(void);
extern void forked(void (*)(long long),long long);
extern void *aligned(void *,long long);
extern void *callocplus(long long);
extern void secret(void *,long long);
extern void public(void *,long long);
extern void double_canary(unsigned char *,unsigned char *,unsigned long long);
extern void input_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void output_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void input_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern void output_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern void checksum_expected(const char *);
extern void checksum(const unsigned char *,unsigned long long);
extern void checksum_clear(void);
extern void endianness(unsigned char *,unsigned long long,unsigned long long);

''']

Z = [r'''/* WARNING: auto-generated (by autogen/test); do not edit */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <sys/resource.h>
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "crypto_declassify.h"
#include <lib1305.h> /* -l1305 */
#include <randombytes.h>
#include "lib1305_test.h"

const char *targeto = 0;
const char *targetp = 0;
const char *targeti = 0;
const char *targetn = 0;
const char *targetoffset = 0;

int ok = 1;

#define fail ((ok = 0),printf)

/* ----- valgrind support */

int valgrind = 0;
static unsigned char valgrind_undefined_byte = 0;
static char *volatile valgrind_pointer = 0;

static char *valgrind_malloc_1(void)
{
  char *x = malloc(1);
  if (!x) abort();
  *(char **volatile) &valgrind_pointer = x;
  return valgrind_pointer;
}

static void valgrind_init(void)
{
  char *e = getenv("valgrind_multiplier");
  char *x;
  if (!e) return;
  x = valgrind_malloc_1();
  valgrind_undefined_byte = x[0]+1;
  valgrind_undefined_byte *= atoi(e);
  valgrind_undefined_byte ^= x[0]+1;
  free(x);
  valgrind = 1;
}

void secret(void *xvoid,long long xlen)
{
  unsigned char *x = xvoid;
  while (xlen > 0) {
    *x ^= valgrind_undefined_byte;
    ++x;
    --xlen;
  }
}

void public(void *x,long long xlen)
{
  crypto_declassify(x,xlen);
}

/* ----- rng and hash, from supercop/try-anything.c */

typedef crypto_uint8 u8;
typedef crypto_uint32 u32;
typedef crypto_uint64 u64;

#define FOR(i,n) for (i = 0;i < n;++i)

static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); }

static u32 ld32(const u8 *x)
{
  u32 u = x[3];
  u = (u<<8)|x[2];
  u = (u<<8)|x[1];
  return (u<<8)|x[0];
}

static void st32(u8 *x,u32 u)
{
  int i;
  FOR(i,4) { x[i] = u; u >>= 8; }
}

static const u8 sigma[17] = "expand 32-byte k";

static void core_salsa(u8 *out,const u8 *in,const u8 *k)
{
  u32 w[16],x[16],y[16],t[4];
  int i,j,m;

  FOR(i,4) {
    x[5*i] = ld32(sigma+4*i);
    x[1+i] = ld32(k+4*i);
    x[6+i] = ld32(in+4*i);
    x[11+i] = ld32(k+16+4*i);
  }

  FOR(i,16) y[i] = x[i];

  FOR(i,20) {
    FOR(j,4) {
      FOR(m,4) t[m] = x[(5*j+4*m)%16];
      t[1] ^= L32(t[0]+t[3], 7);
      t[2] ^= L32(t[1]+t[0], 9);
      t[3] ^= L32(t[2]+t[1],13);
      t[0] ^= L32(t[3]+t[2],18);
      FOR(m,4) w[4*j+(j+m)%4] = t[m];
    }
    FOR(m,16) x[m] = w[m];
  }

  FOR(i,16) st32(out + 4 * i,x[i] + y[i]);
}

static void salsa20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
  u8 z[16],x[64];
  u32 u,i;
  if (!b) return;
  FOR(i,16) z[i] = 0;
  FOR(i,8) z[i] = n[i];
  while (b >= 64) {
    core_salsa(x,z,k);
    FOR(i,64) c[i] = x[i];
    u = 1;
    for (i = 8;i < 16;++i) {
      u += (u32) z[i];
      z[i] = u;
      u >>= 8;
    }
    b -= 64;
    c += 64;
  }
  if (b) {
    core_salsa(x,z,k);
    FOR(i,b) c[i] = x[i];
  }
}

static void increment(u8 *n)
{
  if (!++n[0])
    if (!++n[1])
      if (!++n[2])
        if (!++n[3])
          if (!++n[4])
            if (!++n[5])
              if (!++n[6])
                if (!++n[7])
                  ;
}

static unsigned char testvector_n[8];

static void testvector_clear(void)
{
  memset(testvector_n,0,sizeof testvector_n);
}

static void testvector(unsigned char *x,unsigned long long xlen)
{
  const static unsigned char testvector_k[33] = "generate inputs for test vectors";
  salsa20(x,xlen,testvector_n,testvector_k);
  increment(testvector_n);
}

unsigned long long myrandom(void)
{
  unsigned char x[8];
  unsigned long long result;
  testvector(x,8);
  result = x[7];
  result = (result<<8)|x[6];
  result = (result<<8)|x[5];
  result = (result<<8)|x[4];
  result = (result<<8)|x[3];
  result = (result<<8)|x[2];
  result = (result<<8)|x[1];
  result = (result<<8)|x[0];
  return result;
}

static unsigned char canary_n[8];

static void canary(unsigned char *x,unsigned long long xlen)
{
  const static unsigned char canary_k[33] = "generate pad to catch overwrites";
  salsa20(x,xlen,canary_n,canary_k);
  increment(canary_n);
}

void double_canary(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  if (valgrind) return;
  canary(x - 16,16);
  canary(x + xlen,16);
  memcpy(x2 - 16,x - 16,16);
  memcpy(x2 + xlen,x + xlen,16);
}

void input_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  testvector(x,xlen);
  if (valgrind) {
    memcpy(x2,x,xlen);
    return;
  }
  canary(x - 16,16);
  canary(x + xlen,16);
  memcpy(x2 - 16,x - 16,xlen + 32);
}

void input_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
  if (valgrind) return;
  if (memcmp(x2 - 16,x - 16,xlen + 32)) {
    fail("failure: %s overwrites input\n",fun);
  }
}

void output_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
  if (valgrind) {
    memcpy(x2,x,xlen);
    return;
  }
  canary(x - 16,xlen + 32);
  memcpy(x2 - 16,x - 16,xlen + 32);
}

void output_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
  if (valgrind) return;
  if (memcmp(x2 - 16,x - 16,16)) {
    fail("failure: %s writes before output\n",fun);
  }
  if (memcmp(x2 + xlen,x + xlen,16)) {
    fail("failure: %s writes after output\n",fun);
  }
}

/* ----- knownrandombytes */

static const int knownrandombytes_is_only_for_testing_not_for_cryptographic_use = 1;
#define knownrandombytes randombytes

#define QUARTERROUND(a,b,c,d) \
  a += b; d = L32(d^a,16); \
  c += d; b = L32(b^c,12); \
  a += b; d = L32(d^a, 8); \
  c += d; b = L32(b^c, 7);

static void core_chacha(u8 *out,const u8 *in,const u8 *k)
{
  u32 x[16],y[16];
  int i,j;
  FOR(i,4) {
    x[i] = ld32(sigma+4*i);
    x[12+i] = ld32(in+4*i);
  }
  FOR(i,8) x[4+i] = ld32(k+4*i);
  FOR(i,16) y[i] = x[i];
  FOR(i,10) {
    FOR(j,4) { QUARTERROUND(x[j],x[j+4],x[j+8],x[j+12]) }
    FOR(j,4) { QUARTERROUND(x[j],x[((j+1)&3)+4],x[((j+2)&3)+8],x[((j+3)&3)+12]) }
  }
  FOR(i,16) st32(out+4*i,x[i]+y[i]);
}

static void chacha20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
  u8 z[16],x[64];
  u32 u,i;
  if (!b) return;
  FOR(i,16) z[i] = 0;
  FOR(i,8) z[i+8] = n[i];
  while (b >= 64) {
    core_chacha(x,z,k);
    FOR(i,64) c[i] = x[i];
    u = 1;
    FOR(i,8) {
      u += (u32) z[i];
      z[i] = u;
      u >>= 8;
    }
    b -= 64;
    c += 64;
  }
  if (b) {
    core_chacha(x,z,k);
    FOR(i,b) c[i] = x[i];
  }
}

#define crypto_rng_OUTPUTBYTES 736

static int crypto_rng(
        unsigned char *r, /* random output */
        unsigned char *n, /* new key */
  const unsigned char *g  /* old key */
)
{
  static const unsigned char nonce[8] = {0};
  unsigned char x[32+crypto_rng_OUTPUTBYTES];
  chacha20(x,sizeof x,nonce,g);
  memcpy(n,x,32);
  memcpy(r,x+32,crypto_rng_OUTPUTBYTES);
  return 0;
}

static unsigned char knownrandombytes_g[32];
static unsigned char knownrandombytes_r[crypto_rng_OUTPUTBYTES];
static unsigned long long knownrandombytes_pos = crypto_rng_OUTPUTBYTES;

static void knownrandombytes_clear(void)
{
  memset(knownrandombytes_g,0,sizeof knownrandombytes_g);
  memset(knownrandombytes_r,0,sizeof knownrandombytes_r);
  knownrandombytes_pos = crypto_rng_OUTPUTBYTES;
}

void knownrandombytes_main(void *xvoid,long long xlen)
{
  unsigned char *x = xvoid;
  assert(knownrandombytes_is_only_for_testing_not_for_cryptographic_use);

  while (xlen > 0) {
    if (knownrandombytes_pos == crypto_rng_OUTPUTBYTES) {
      crypto_rng(knownrandombytes_r,knownrandombytes_g,knownrandombytes_g);
      knownrandombytes_pos = 0;
    }
    *x++ = knownrandombytes_r[knownrandombytes_pos];
    xlen -= 1;
    knownrandombytes_r[knownrandombytes_pos++] = 0;
  }
}

void knownrandombytes(void *xvoid,long long xlen)
{
  knownrandombytes_main(xvoid,xlen);
  secret(xvoid,xlen);
}

/* ----- checksums */

static unsigned char checksum_state[64];
static char checksum_hex[65];

void checksum_expected(const char *expected)
{
  long long i;
  for (i = 0;i < 32;++i) {
    checksum_hex[2 * i] = "0123456789abcdef"[15 & (checksum_state[i] >> 4)];
    checksum_hex[2 * i + 1] = "0123456789abcdef"[15 & checksum_state[i]];
  }
  checksum_hex[2 * i] = 0;

  if (strcmp(checksum_hex,expected))
    fail("failure: checksum mismatch: %s expected %s\n",checksum_hex,expected);
}

void checksum_clear(void)
{
  memset(checksum_state,0,sizeof checksum_state);
  knownrandombytes_clear();
  testvector_clear();
  /* not necessary to clear canary */
}

void checksum(const unsigned char *x,unsigned long long xlen)
{
  u8 block[16];
  int i;
  while (xlen >= 16) {
    core_salsa(checksum_state,x,checksum_state);
    x += 16;
    xlen -= 16;
  }
  FOR(i,16) block[i] = 0;
  FOR(i,xlen) block[i] = x[i];
  block[xlen] = 1;
  checksum_state[0] ^= 1;
  core_salsa(checksum_state,block,checksum_state);
}

#include "limits.inc"

void *callocplus(long long len)
{
  if (valgrind) {
    unsigned char *x = malloc(len);
    if (!x) abort();
    return x;
  } else {
    unsigned char *x = calloc(1,len + 256);
    long long i;
    if (!x) abort();
    for (i = 0;i < len + 256;++i) x[i] = random();
    return x;
  }
}

void *aligned(void *x,long long len)
{
  if (valgrind)
    return x;
  else {
    long long i;
    unsigned char *y = x;
    y += 64;
    y += 63 & (-(unsigned long) y);
    for (i = 0;i < len;++i) y[i] = 0;
    return y;
  }
}

/* ----- catching SIGILL, SIGBUS, SIGSEGV, etc. */

void forked(void (*test)(long long),long long impl)
{
  if (valgrind) {
    test(impl);
    return;
  }
  fflush(stdout);
  pid_t child = fork();
  int childstatus = -1;
  if (child == -1) {
    fprintf(stderr,"fatal: fork failed: %s",strerror(errno));
    exit(111);
  }
  if (child == 0) {
    ok = 1;
    limits();
    test(impl);
    if (!ok) exit(100);
    exit(0);
  }
  if (waitpid(child,&childstatus,0) != child) {
    fprintf(stderr,"fatal: wait failed: %s",strerror(errno));
    exit(111);
  }
  if (childstatus)
    fail("failure: process failed, status %d\n",childstatus);
  fflush(stdout);
}

/* ----- endianness */

/* on big-endian machines, flip into little-endian */
/* other types of endianness are not supported */
void endianness(unsigned char *e,unsigned long long words,unsigned long long bytesperword)
{
  long long i = 1;

  if (1 == *(unsigned char *) &i) return;

  while (words > 0) {
    for (i = 0;2 * i < bytesperword;++i) {
      long long j = bytesperword - 1 - i;
      unsigned char ei = e[i];
      e[i] = e[j];
      e[j] = ei;
    }
    e += bytesperword;
    words -= 1;
  }
}
''']

# ==========

checksums = {}
operations = []
primitives = {}
sizes = {}
exports = {}
prototypes = {}
nooverlap = set()

with open('api') as f:
  for line in f:
    line = line.strip()
    if line.startswith('crypto_'):
      line = line.split()
      x = line[0].split('/')
      assert len(x) == 2
      o = x[0].split('_')[1]
      if o not in operations: operations += [o]
      p = x[1]
      if o not in primitives: primitives[o] = []
      primitives[o] += [p]
      if len(line) >= 3:
        checksums[o,p] = line[1],line[2]
      for option in line[3:]:
        if option == 'nooverlap':
          nooverlap.add((o,p))
      continue
    if line.startswith('#define '):
      x = line.split(' ')
      x = x[1].split('_')
      assert len(x) == 4
      assert x[0] == 'crypto'
      o = x[1]
      p = x[2]
      if (o,p) not in sizes: sizes[o,p] = ''
      sizes[o,p] += line+'\n'
      continue
    if line.endswith(');'):
      fun,args = line[:-2].split('(')
      rettype,fun = fun.split()
      fun = fun.split('_')
      o = fun[1]
      assert fun[0] == 'crypto'
      if o not in exports: exports[o] = []
      exports[o] += ['_'.join(fun[1:])]
      if o not in prototypes: prototypes[o] = []
      prototypes[o] += [(rettype,fun,args)]

# ========== verify

Z += [r'''
/* ----- verify, derived from supercop/crypto_verify/try.c */
''']

for p in primitives['verify']:
  Z += [r'''
static int (*crypto_verify_BYTES)(const unsigned char *,const unsigned char *);

static unsigned char *test_verify_BYTES_x;
static unsigned char *test_verify_BYTES_y;

static void test_verify_BYTES_check(void)
{
  unsigned char *x = test_verify_BYTES_x;
  unsigned char *y = test_verify_BYTES_y;
  int r;

  secret(x,BYTES);
  secret(y,BYTES);
  r = crypto_verify_BYTES(x,y);
  public(x,BYTES);
  public(y,BYTES);
  public(&r,sizeof r);

  if (r == 0) {
    if (memcmp(x,y,BYTES))
      fail("failure: different strings pass verify\n");
  } else if (r == -1) {
    if (!memcmp(x,y,BYTES))
      fail("failure: equal strings fail verify\n");
  } else {
    fail("failure: weird return value\n");
  }
}

void test_verify_BYTES_impl(long long impl)
{
  unsigned char *x = test_verify_BYTES_x;
  unsigned char *y = test_verify_BYTES_y;

  if (targeti && strcmp(targeti,".") && strcmp(targeti,lib1305_dispatch_verify_BYTES_implementation(impl))) return;
  if (targetn && atol(targetn) != impl) return;
  if (impl >= 0) {
    crypto_verify_BYTES = lib1305_dispatch_verify_BYTES(impl);
    printf("verify_BYTES %lld implementation %s compiler %s\n",impl,lib1305_dispatch_verify_BYTES_implementation(impl),lib1305_dispatch_verify_BYTES_compiler(impl));
  } else {
    crypto_verify_BYTES = lib1305_verify_BYTES;
    printf("verify_BYTES selected implementation %s compiler %s\n",lib1305_verify_BYTES_implementation(),lib1305_verify_BYTES_compiler());
  }

  randombytes(x,BYTES);
  randombytes(y,BYTES);
  test_verify_BYTES_check();
  memcpy(y,x,BYTES);
  test_verify_BYTES_check();
  y[myrandom() % BYTES] = myrandom();
  test_verify_BYTES_check();
  y[myrandom() % BYTES] = myrandom();
  test_verify_BYTES_check();
  y[myrandom() % BYTES] = myrandom();
  test_verify_BYTES_check();
}

static void test_verify_BYTES(void)
{
  if (targeto && strcmp(targeto,"verify")) return;
  if (targetp && strcmp(targetp,"BYTES")) return;

  test_verify_BYTES_x = callocplus(BYTES);
  test_verify_BYTES_y = callocplus(BYTES);

  for (long long offset = 0;offset < 2;++offset) {
    if (targetoffset && atol(targetoffset) != offset) continue;
    if (offset && valgrind) break;
    printf("verify_BYTES offset %lld\n",offset);
    for (long long impl = -1;impl < lib1305_numimpl_verify_BYTES();++impl)
      forked(test_verify_BYTES_impl,impl);
    ++test_verify_BYTES_x;
    ++test_verify_BYTES_y;
  }
}
'''.replace('BYTES',p)]

# ==========

todo = (
  ('onetimeauth',(
    ('a','crypto_onetimeauth_BYTES','crypto_onetimeauth_BYTES'),
    ('m',None,'4096+crypto_onetimeauth_BYTES'),
    ('k','crypto_onetimeauth_KEYBYTES','crypto_onetimeauth_KEYBYTES'),
  ),(
    ('loops','4096','32768'),
    ('maxtest','128','4096'),
  ),(
    ('',('a',),(),('m','mlen','k')),
    ('_verify',(),(),('a','m','mlen','k')),
  )),
)

for t in todo:
  o,vars,howmuch,tests = t
  
  for p in primitives[o]:
    X = []

    X += [f'/* ----- {o}/{p}, derived from supercop/crypto_{o}/try.c */\n']
    X += ['\n']
    X += ['#include <stdio.h>\n']
    X += ['#include <stdlib.h>\n']
    X += ['#include <string.h>\n']
    X += ['#include <stdint.h>\n']
    X += ['#include <lib1305.h>\n']
    X += ['#include "lib1305_test.h"\n']
    X += ['\n']
    X += ['#define fail ((ok = 0),printf)\n']

    X += ['static const char *%s_%s_checksums[] = {\n' % (o,p)]
    X += ['  "%s",\n' % checksums[o,p][0]]
    X += ['  "%s",\n' % checksums[o,p][1]]
    X += ['} ;\n']
    X += ['\n']

    for rettype,fun,args in prototypes[o]:
      X += ['static %s (*%s)(%s);\n' % (rettype,'_'.join(fun),args)]

    if (o,p) in sizes:
      for line in sizes[o,p].splitlines():
        psize = line.split()[1]
        size1 = psize.replace('crypto_%s_%s_'%(o,p),'crypto_%s_'%o)
        size2 = psize.replace('crypto_','lib1305_')
        X += ['#define %s %s\n' % (size1,size2)]
      X += ['\n']

    for v,initsize,allocsize in vars:
      X += ['static void *storage_%s_%s_%s;\n' % (o,p,v)]
      X += ['static unsigned char *test_%s_%s_%s;\n' % (o,p,v)]
    for v,initsize,allocsize in vars:
      X += ['static void *storage_%s_%s_%s2;\n' % (o,p,v)]
      X += ['static unsigned char *test_%s_%s_%s2;\n' % (o,p,v)]
    X += ['\n']

    if (o,p) in precomputed:
      X += ['#define precomputed_%s_%s_NUM %d\n' % (o,p,len(precomputed[o,p]))]
      X += ['\n']
      precompbytes = sum(len(precomp[v]) for v,initsize,allocsize in vars for precomp in precomputed[o,p])
      X += ['static const unsigned char precomputed_%s_%s[%s] = {\n' % (o,p,precompbytes)]
      for v,initsize,allocsize in vars:
        for precomp in precomputed[o,p]:
          X += ['  %s,\n' % ','.join(str(c) for c in precomp[v])]
      X += ['} ;\n']
      X += ['\n']
      precomppos = 0
      for v,initsize,allocsize in vars:
        X += ['static const unsigned char *precomputed_%s_%s_%s[precomputed_%s_%s_NUM] = {\n' % (o,p,v,o,p)]
        for precomp in precomputed[o,p]:
          X += ['  precomputed_%s_%s+%s,\n' % (o,p,precomppos)]
          precomppos += len(precomp[v])
        X += ['} ;\n']
        X += ['\n']
      assert precomppos == precompbytes
      for v,initsize,allocsize in vars:
        if all(v+'len' not in output+inout+input for f,output,inout,input in tests):
          continue
        X += ['static const long long precomputed_%s_%s_%slen[precomputed_%s_%s_NUM] = {\n' % (o,p,v,o,p)]
        for precomp in precomputed[o,p]:
          X += ['  %s,\n' % len(precomp[v])]
        X += ['} ;\n']
        X += ['\n']

    X += ['static void test_%s_%s_impl(long long impl)\n' % (o,p)]
    X += ['{\n']
    for v,initsize,allocsize in vars:
      X += ['  unsigned char *%s = test_%s_%s_%s;\n' % (v,o,p,v)]
    for v,initsize,allocsize in vars:
      X += ['  unsigned char *%s2 = test_%s_%s_%s2;\n' % (v,o,p,v)]
    for v,initsize,allocsize in vars:
      if initsize is None:
        X += ['  long long %slen;\n' % v]
        if v == 'x':
          X += ['  long long xwords;\n']
      else:
        if v == 'x':
          X += ['  long long xwords = %s;\n' % (initsize)]
          X += ['  long long xlen;\n']
        else:
          X += ['  long long %slen = %s;\n' % (v,initsize)]
    X += ['\n']

    X += ['  if (targeti && strcmp(targeti,".") && strcmp(targeti,lib1305_dispatch_%s_%s_implementation(impl))) return;\n' % (o,p)]
    X += ['  if (targetn && atol(targetn) != impl) return;\n'] # XXX: atoll is slightly unportable

    X += ['  if (impl >= 0) {\n']
    for rettype,fun,args in prototypes[o]:
      f2 = ['lib1305','dispatch',o,p]+fun[2:]
      X += ['    %s = %s(impl);\n' % ('_'.join(fun),'_'.join(f2))]
    X += ['    printf("%s_%s %%lld implementation %%s compiler %%s\\n",impl,lib1305_dispatch_%s_%s_implementation(impl),lib1305_dispatch_%s_%s_compiler(impl));\n' % (o,p,o,p,o,p)]
    X += ['  } else {\n']
    for rettype,fun,args in prototypes[o]:
      f2 = ['lib1305',o,p]+fun[2:]
      X += ['    %s = %s;\n' % ('_'.join(fun),'_'.join(f2))]
    X += ['    printf("%s_%s selected implementation %%s compiler %%s\\n",lib1305_%s_%s_implementation(),lib1305_%s_%s_compiler());\n' % (o,p,o,p,o,p)]
    X += ['  }\n']

    X += ['  for (long long checksumbig = 0;checksumbig < 2;++checksumbig) {\n']

    maxtestdefined = False
    for v,small,big in howmuch:
      X += ['    long long %s = checksumbig ? %s : %s;\n' % (v,big,small)]
      if v == 'maxtest': maxtestdefined = True
    X += ['\n']
    X += ['    checksum_clear();\n']
    X += ['\n']
    X += ['    for (long long loop = 0;loop < loops;++loop) {\n']

    wantresult = False
    for f,output,inout,input in tests:
      cof = 'crypto_'+o+f
      for rettype,fun,args in prototypes[o]:
        if cof == '_'.join(fun):
          if rettype != 'void':
            wantresult = True
    if wantresult:
      X += ['      int result;\n']

    itembytes = f'crypto_{o}_BYTES' if o == 'sort' else f'crypto_{o}_ITEMBYTES'

    if maxtestdefined and any('mlen' in input for f,output,inout,input in tests):
      X += ['      mlen = myrandom() % (maxtest + 1);\n']
    if maxtestdefined and any('hlen' in input for f,output,inout,input in tests):
      X += ['      hlen = myrandom() % (maxtest + 1);\n']
    if maxtestdefined and any('xwords' in input for f,output,inout,input in tests):
      X += ['      xwords = myrandom() % (maxtest + 1);\n']
    if any('x' in output+inout+input for f,output,inout,input in tests):
      X += [f'      xlen = xwords*{itembytes};\n']
    X += ['\n']

    initialized = set()
    for f,output,inout,input in tests:
      cof = 'crypto_'+o+f

      cofrettype = None
      for rettype,fun,args in prototypes[o]:
        if cof == '_'.join(fun):
          cofrettype = rettype

      expected = '0'
      unexpected = 'nonzero'
      if cof == 'crypto_hashblocks':
        expected = 'mlen % crypto_hashblocks_BLOCKBYTES'
        unexpected = 'unexpected value'

      for v in output:
        if len(v) == 1:
          X += ['      output_prepare(%s2,%s,%slen);\n' % (v,v,v)]
          # v now has CDE where C is canary, D is canary, E is canary
          # v2 now has same CDE
          # D is at start of v with specified length
          # C is 16 bytes before beginning
          # E is 16 bytes past end
      for v in input+inout:
        if len(v) == 1:
          if v in initialized:
            X += ['      memcpy(%s2,%s,%slen);\n' % (v,v,v)]
            X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
          else:
            X += ['      input_prepare(%s2,%s,%slen);\n' % (v,v,v)]
            # v now has CTE where C is canary, T is test data, E is canary
            # v2 has same CTE
            initialized.add(v)

        if v == 'x':
          X += [f'      endianness(x,xwords,{itembytes});\n']

      for v in input+inout:
        if len(v) == 1:
          X += ['      secret(%s,%slen);\n' % (v,v)]

      args = ','.join(output+inout+input)
      if cofrettype == 'void':
        X += ['      %s(%s);\n' % (cof,args)]
      else:
        X += ['      result = %s(%s);\n' % (cof,args)]
        X += ['      public(&result,sizeof result);\n']
        X += ['      if (result != %s) fail("failure: %s returns %s\\n");\n' % (expected,cof,unexpected)]
    
      for v in input+inout+output:
        if len(v) == 1:
          X += ['      public(%s,%slen);\n' % (v,v)]

        if v == 'x':
          X += [f'      endianness(x,xwords,{itembytes});\n']

      if cof == 'crypto_kem_dec':
        X += ['      if (memcmp(t,k,klen) != 0) fail("failure: %s does not match k\\n");\n' % cof]

      for v in output+inout:
        if len(v) == 1:
          X += ['      checksum(%s,%slen);\n' % (v,v)]
          # output v,v2 now has COE,CDE where O is output; checksum O
          initialized.add(v)
      for v in output+inout:
        if len(v) == 1:
          if cof == 'crypto_sign_open' and v == 't':
            X += ['      output_compare(%s2,%s,%slen,"%s");\n' % (v,v,'c',cof)]
          else:
            X += ['      output_compare(%s2,%s,%slen,"%s");\n' % (v,v,v,cof)]
            # output_compare checks COE,CDE for equal C, equal E
      for v in input:
        if len(v) == 1:
          X += ['      input_compare(%s2,%s,%slen,"%s");\n' % (v,v,v,cof)]
          # input_compare checks CTE,CTE for equal C, equal T, equal E
    
      deterministic = True
      if inout+input == (): deterministic = False
      if cof == 'crypto_kem_enc': deterministic = False
    
      if deterministic:
        X += ['\n']
        for v in output+inout+input:
          if len(v) == 1:
            X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
            # old output v,v2: COE,CDE; new v,v2: FOG,FDG where F,G are new canaries
            # old inout v,v2: COE,CTE; new v,v2: FOG,FTG
            # old input v,v2: CTE,CTE; new v,v2: FTG,FTG
    
        for v in inout+input:
          if v == 'x':
            X += [f'      endianness(x2,xwords,{itembytes});\n']

        for v in input+inout:
          if len(v) == 1:
            X += ['      secret(%s2,%slen);\n' % (v,v)]

        args = ','.join([v if v.endswith('words') or v.endswith('len') else v+'2' for v in output+inout+input])
        if cofrettype == 'void':
          X += ['      %s(%s);\n' % (cof,args)]
        else:
          X += ['      result = %s(%s);\n' % (cof,args)]
          X += ['      public(&result,sizeof result);\n']
          X += ['      if (result != %s) fail("failure: %s returns %s\\n");\n' % (expected,cof,unexpected)]
    
        for v in input+inout+output:
          if len(v) == 1:
            X += ['      public(%s2,%slen);\n' % (v,v)]

          if v == 'x':
            X += [f'      endianness(x2,xwords,{itembytes});\n']

        for w in output + inout:
          if len(w) == 1:
            # w,w2: COE,COE; goal now is to compare O
            X += ['      if (memcmp(%s2,%s,%slen) != 0) fail("failure: %s is nondeterministic\\n");\n' % (w,w,w,cof)]
    
      overlap = deterministic
      if inout != (): overlap = False
      if (o,p) in nooverlap: overlap = False

      # XXX: overlap test assumes that inputs are at least as big as outputs
    
      if overlap:
        for y in output:
          if len(y) == 1:
            X += ['\n']
            for v in output:
              if len(v) == 1:
                X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
            for v in input:
              if len(v) == 1:
                X += ['      double_canary(%s2,%s,%slen);\n' % (v,v,v)]
            for x in input:
              if len(x) == 1:
                # try writing to x2 instead of y, while reading x2
                args = ','.join([x+'2' if v==y else v for v in output] + [x+'2' if v==x else v for v in input])
    
                for v in input+inout:
                  v2 = x+'2' if v==x else v
                  if len(v) == 1:
                    X += ['      secret(%s,%slen);\n' % (v2,v)]

                if cofrettype == 'void':
                  X += ['      %s(%s);\n' % (cof,args)]
                else:
                  X += ['      result = %s(%s);\n' % (cof,args)]
                  X += ['      public(&result,sizeof result);\n']
                  X += ['      if (result != %s) fail("failure: %s with %s=%s overlap returns %s\\n");\n' % (expected,cof,x,y,unexpected)]
    
                for v in output:
                  v2 = x+'2' if v==y else v
                  if len(v) == 1:
                    X += ['      public(%s,%slen);\n' % (v2,v)]
                for v in input:
                  if v == x: continue
                  if len(v) == 1:
                    X += ['      public(%s,%slen);\n' % (v,v)]

                X += ['      if (memcmp(%s2,%s,%slen) != 0) fail("failure: %s does not handle %s=%s overlap\\n");\n' % (x,y,y,cof,x,y)]
                X += ['      memcpy(%s2,%s,%slen);\n' % (x,x,x)]
    
      if cof == 'crypto_kem_dec':
        X += ['\n']
        for tweaks in range(3):
          X += ['      c[myrandom() % clen] += 1 + (myrandom() % 255);\n']
          X += ['      %s(t,c,s);\n' % cof]
          X += ['      checksum(t,tlen);\n']

      if cof == 'crypto_onetimeauth_verify':
        X += ['\n']
        for tweaks in range(3):
          X += ['      a[myrandom() % alen] += 1 + (myrandom() % 255);\n']
          X += ['      if (%s(a,m,mlen,k) == 0)\n' % cof]
          X += ['        if (memcmp(a2,a,alen) != 0)\n']
          X += ['          fail("%s accepts modified authenticators");\n' % cof]

    X += ['    }\n']
    if cof == 'crypto_core' and p.startswith('wforce'):
      X += ['    {\n']
      X += ['      long long weight,i,direction;\n']
      X += ['      for (weight = 0;weight <= nlen;++weight) {\n']
      X += ['        for (direction = 0;direction < 2;++direction) {\n']
      X += ['          output_prepare(h2,h,hlen);\n']
      X += ['          input_prepare(n2,n,nlen);\n']
      X += ['          input_prepare(k2,k,klen);\n']
      X += ['          input_prepare(c2,c,clen);\n']
      X += ['          for (i = 0;i < nlen;++i) {\n']
      X += ['            n[i] &= ~1;\n']
      X += ['            if (direction) {\n']
      X += ['              if (nlen-1-i < weight) n[i] += 1;\n']
      X += ['            } else {\n']
      X += ['              if (i < weight) n[i] += 1;\n']
      X += ['            }\n']
      X += ['            n2[i] = n[i];\n']
      X += ['          }\n']
      X += ['          crypto_core(h,n,k,c);\n']
      X += ['          checksum(h,hlen);\n']
      X += ['          output_compare(h2,h,hlen,"crypto_core");\n']
      X += ['          input_compare(n2,n,nlen,"crypto_core");\n']
      X += ['          input_compare(k2,k,klen,"crypto_core");\n']
      X += ['          input_compare(c2,c,clen,"crypto_core");\n']
      X += ['          double_canary(h2,h,hlen);\n']
      X += ['          double_canary(n2,n,nlen);\n']
      X += ['          double_canary(k2,k,klen);\n']
      X += ['          double_canary(c2,c,clen);\n']
      X += ['          crypto_core(h2,n2,k2,c2);\n']
      X += ['          if (memcmp(h2,h,hlen) != 0) fail("failure: crypto_core is nondeterministic");\n']
      X += ['        }\n']
      X += ['      }\n']
      X += ['    }\n']

    X += ['    checksum_expected(%s_%s_checksums[checksumbig]);\n' % (o,p)]
    X += ['  }\n']

    # ----- test vectors computed by python

    for f,output,inout,input in tests:
      cof = 'crypto_'+o+f
      if (o,p) in precomputed:
        X += ['  for (long long precomp = 0;precomp < precomputed_%s_%s_NUM;++precomp) {\n' % (o,p)]
        for v,initsize,allocsize in vars:
          if v in output:
            X += ['    output_prepare(%s2,%s,%s);\n' % (v,v,allocsize)]
          if v in input+inout:
            X += ['    input_prepare(%s2,%s,%s);\n' % (v,v,allocsize)]
            if v+'len' in output+inout+input:
              X += ['    %slen = precomputed_%s_%s_%slen[precomp];\n' % (v,o,p,v)]
              X += ['    memcpy(%s,precomputed_%s_%s_%s[precomp],%slen);\n' % (v,o,p,v,v)]
              X += ['    memcpy(%s2,precomputed_%s_%s_%s[precomp],%slen);\n' % (v,o,p,v,v)]
            else:
              X += ['    memcpy(%s,precomputed_%s_%s_%s[precomp],%s);\n' % (v,o,p,v,allocsize)]
              X += ['    memcpy(%s2,precomputed_%s_%s_%s[precomp],%s);\n' % (v,o,p,v,allocsize)]

        args = ','.join(output+inout+input)
        X += ['    %s(%s);\n' % (cof,args)]

        for v,initsize,allocsize in vars:
          if v in output+inout:
            X += ['    if (memcmp(%s,precomputed_%s_%s_%s[precomp],%s)) {\n' % (v,o,p,v,allocsize)]
            X += ['      fail("failure: %s fails precomputed test vectors\\n");\n' % cof]
            X += ['      printf("expected %s %%lld: ",precomp);\n' % v]
            X += ['      for (long long pos = 0;pos < %s;++pos) printf("%%02x",((unsigned char *) precomputed_%s_%s_%s[precomp])[pos]);\n' % (allocsize,o,p,v)]
            X += ['      printf("\\n");\n']
            X += ['      printf("received %s %%lld: ",precomp);\n' % v]
            X += ['      for (long long pos = 0;pos < %s;++pos) printf("%%02x",%s[pos]);\n' % (allocsize,v)]
            X += ['      printf("\\n");\n']
            X += ['    }\n']

        for v,initsize,allocsize in vars:
          if v in output+inout:
            X += ['    output_compare(%s2,%s,%s,"%s");\n' % (v,v,allocsize,cof)]
          if v in input:
            X += ['    input_compare(%s2,%s,%s,"%s");\n' % (v,v,allocsize,cof)]

        X += ['  }\n']

    X += ['}\n']
    X += ['\n']

    X += ['void test_%s_%s(void)\n' % (o,p)]
    X += ['{\n']
    X += ['  long long maxalloc = 0;\n']
    X += ['  if (targeto && strcmp(targeto,"%s")) return;\n' % o]
    X += ['  if (targetp && strcmp(targetp,"%s")) return;\n' % p]

    if cof == 'crypto_sort':
      for v,initsize,allocsize in vars:
        X += ['  storage_%s_%s_%s = callocplus(lib1305_sort_%s_BYTES*%s);\n' % (o,p,v,p,allocsize)]
        X += ['  test_%s_%s_%s = aligned(storage_%s_%s_%s,lib1305_sort_%s_BYTES*%s);\n' % (o,p,v,o,p,v,p,allocsize)]
        X += [f'  if (lib1305_sort_{p}_BYTES*{allocsize} > maxalloc) maxalloc = lib1305_sort_{p}_BYTES*{allocsize};\n']
      for v,initsize,allocsize in vars:
        X += ['  storage_%s_%s_%s2 = callocplus(maxalloc);\n' % (o,p,v)]
        X += ['  test_%s_%s_%s2 = aligned(storage_%s_%s_%s2,lib1305_sort_%s_BYTES*%s);\n' % (o,p,v,o,p,v,p,allocsize)]
    else:
      for v,initsize,allocsize in vars:
        X += ['  storage_%s_%s_%s = callocplus(%s);\n' % (o,p,v,allocsize)]
        X += ['  test_%s_%s_%s = aligned(storage_%s_%s_%s,%s);\n' % (o,p,v,o,p,v,allocsize)]
        X += [f'  if ({allocsize} > maxalloc) maxalloc = {allocsize};\n']
      for v,initsize,allocsize in vars:
        X += ['  storage_%s_%s_%s2 = callocplus(maxalloc);\n' % (o,p,v)]
        X += ['  test_%s_%s_%s2 = aligned(storage_%s_%s_%s2,%s);\n' % (o,p,v,o,p,v,allocsize)]
    X += ['\n']

    if o in ('encode','decode','sort'): # requires alignment
      X += ['  for (long long offset = 0;offset < 1;++offset) {\n']
    else:
      X += ['  for (long long offset = 0;offset < 2;++offset) {\n']
    X += ['    if (targetoffset && atol(targetoffset) != offset) continue;\n']
    X += ['    if (offset && valgrind) break;\n']
    X += ['    printf("%s_%s offset %%lld\\n",offset);\n' % (o,p)]
    X += ['    for (long long impl = -1;impl < lib1305_numimpl_%s_%s();++impl)\n' % (o,p)]
    X += ['      forked(test_%s_%s_impl,impl);\n' % (o,p)]
    for v,initsize,allocsize in vars:
      X += ['    ++test_%s_%s_%s;\n' % (o,p,v)]
    for v,initsize,allocsize in vars:
      X += ['    ++test_%s_%s_%s2;\n' % (o,p,v)]

    X += ['  }\n']

    for v,initsize,allocsize in reversed(vars):
      X += ['  free(storage_%s_%s_%s2);\n' % (o,p,v)]
    for v,initsize,allocsize in reversed(vars):
      X += ['  free(storage_%s_%s_%s);\n' % (o,p,v)]

    X += ['}\n']

    if (o,p) in sizes:
      for line in sizes[o,p].splitlines():
        psize = line.split()[1]
        size1 = psize.replace('crypto_%s_%s_'%(o,p),'crypto_%s_'%o)
        X += ['#undef %s\n' % size1]
      X += ['\n']

    save(f'command/lib1305-test_{o}_{p}.c',''.join(X))


Z += [r'''/* ----- top level */

#include "print_cpuid.inc"

int main(int argc,char **argv)
{
  valgrind_init();
  if (valgrind) limits();

  setvbuf(stdout,0,_IOLBF,0);
  printf("lib1305 version %s\n",lib1305_version);
  printf("lib1305 arch %s\n",lib1305_arch);
  print_cpuid();

  if (valgrind) {
    printf("valgrind %d",(int) valgrind);
    printf(" declassify %d",(int) crypto_declassify_uses_valgrind);
    if (!crypto_declassify_uses_valgrind)
      printf(" (expect false positives)");
    printf("\n");
  }

  if (*argv) ++argv;
  if (*argv) {
    targeto = *argv++;
    if (*argv) {
      targetp = *argv++;
      if (*argv) {
        targeti = *argv++;
        if (*argv) {
          targetn = *argv++;
          if (*argv) {
            targetoffset = *argv++;
          }
        }
      }
    }
  }

''']

for p in primitives['verify']:
  Z += ['  test_verify_BYTES();\n'.replace('BYTES',p)]

for t in todo:
  o,vars,howmuch,tests = t
  for p in primitives[o]:
    Z += ['  test_%s_%s();\n' % (o,p)]
    H += [f'#define test_{o}_{p} lib1305_test_{o}{p}\n']
    H += [f'extern void test_{o}_{p}(void);\n']

Z += [r'''
  if (!ok) {
    printf("some tests failed\n");
    return 100;
  }
  printf("all tests succeeded\n");
  return 0;
}
''']

H += ['#endif\n']

save('command/lib1305_test.h',''.join(H))
save('command/lib1305-test.c',''.join(Z))
