malb · blowfish880 · Jun 6, 2018 · Jun 6, 2018 · Jun 11, 2018 · Jun 15, 2018
diff --git a/README.md b/README.md
@@ -68,13 +68,29 @@ provided during initialization. Available algorithms are:
     Setup costs are roughly $σ²$ (as currently implemented) and table sizes linear
     in $σ$, but sampling is then just a randomized lookup. Any real-valued $c$ is 
     accepted.
+
+  - `DGS_DISC_GAUSS_CONVOLUTION` - Applies the convolution technique to alias
+    sampling in order to reduce memory overhead and setup cost at the cost of
+    running time. This is suitable for large $σ$. Any real-valued $c$ is accepted.
 
   Algorithm 2-4 are described in:
 
   Léo Ducas, Alain Durmus, Tancrède Lepoint and Vadim Lyubashevsky. *Lattice
   Signatures and Bimodal Gaussians*; in Advances in Cryptology – CRYPTO 2013;
   Lecture Notes in Computer Science Volume 8042, 2013, pp 40-56
   [(PDF)](http://www.di.ens.fr/~lyubash/papers/bimodal.pdf)
+
+  Algorithm 6 is described in:
+
+  Thomas Pöppelmann, Léo Ducas, Tim Güneysu. *Enhanced Lattice-Based Signatures 
+  on Reconfigurable Hardware*; in Cryptographic Hardware and Embedded 
+  Systems – CHES 2014; Lecture Notes in Computer Science Volume 8731, 
+  2014, pp 353-370 [(PDF)](https://eprint.iacr.org/2014/254.pdf)
+
+  Daniele Micciancio, Michael Walter. *Gaussian Sampling over the Integers: 
+  Efficient, Generic, Constant-Time*; in Advances in Cryptology – CRYPTO 2017;
+  Lecture Notes in Computer Science Volume 10402, 2017, pp 455-485
+  [(PDF)](https://eprint.iacr.org/2017/259.pdf) 
 
 ### Randomized Rounders
 This type of algorithm is useful to produce samples where the parameters of 
@@ -88,12 +104,22 @@ Available algorithms are:
 
   - `DGS_RROUND_KARNEY` - Karney's algorithm, similar in spirit to the sampler 
   `DGS_DISC_SIGMA2_LOGTABLE`, but without the need to precompute log tables and
-  without restriction on the center. 
+  without restriction on the center.
+
+  - `DGS_RROUND_CONVOLUTION` - Reduces the rounding problem to the sampling 
+  problem and invokes alias sampling. 
 
   Karney's algorithm is described in:
 
   Charles F. F. Karney. *Sampling Exactly from the Normal Distribution*; in
   ACM Trans. Mathematical Software 42(1), 3:1-14 (Jan. 2016) [(PDF)](https://arxiv.org/pdf/1303.6257)
+
+  The convolution approach to randomized rounding is described in
+
+  Daniele Micciancio, Michael Walter. *Gaussian Sampling over the Integers: 
+  Efficient, Generic, Constant-Time*; in Advances in Cryptology – CRYPTO 2017;
+  Lecture Notes in Computer Science Volume 10402, 2017, pp 455-485
+  [(PDF)](https://eprint.iacr.org/2017/259.pdf) 
 
 ## Precisions ##
 

diff --git a/benchmarks/bench.c b/benchmarks/bench.c
@@ -23,6 +23,8 @@ void print_gauss_z_help(const char *name) {
   printf("      %d -- sample from uniform distribution, exp() calls tabulated\n", DGS_DISC_GAUSS_UNIFORM_TABLE);
   printf("      %d -- sample from uniform distribution, exp() calls as Bernoulli oracles\n", DGS_DISC_GAUSS_UNIFORM_LOGTABLE);
   printf("      %d -- sample from k⋅σ2 distribution, exp() calls as Bernoulli oracles \n", DGS_DISC_GAUSS_SIGMA2_LOGTABLE);
+  printf("      %d -- use alias method (precomputed table) \n", DGS_DISC_GAUSS_ALIAS);
+  printf("      %d -- use convolution technique with alias method \n", DGS_DISC_GAUSS_CONVOLUTION);
   printf(" p -- precision: 0 for double precision, 1 for arbitrary precision\n");
   printf(" n -- number of trials > 0 (default: 100000)\n");
 }

diff --git a/benchmarks/bench_gauss.c b/benchmarks/bench_gauss.c
@@ -72,6 +72,7 @@ const char *alg_to_str(dgs_disc_gauss_alg_t alg) {
   case DGS_DISC_GAUSS_UNIFORM_LOGTABLE: return "uniform+logtable";
   case DGS_DISC_GAUSS_SIGMA2_LOGTABLE: return "sigma2+logtable";
   case DGS_DISC_GAUSS_ALIAS:           return "alias";
+  case DGS_DISC_GAUSS_CONVOLUTION:           return "convolution";
   default: return "unknown";
   }
 }

diff --git a/dgs/dgs_gauss.h b/dgs/dgs_gauss.h
@@ -31,6 +31,15 @@
     Bernoulli distributions (but no calls to `\exp`). Note that this sampler
     adjusts sigma to match `σ₂·k` for some integer `k`.  Only integer-valued
     `c` are supported.
+
+  - `DGS_DISC_GAUSS_ALIAS` - uses the [alias method](https://en.wikipedia.org/wiki/Alias_method).
+    Setup costs are roughly $σ²$ (as currently implemented) and table sizes linear
+    in $σ$, but sampling is then just a randomized lookup. Any real-valued $c$ is 
+    accepted.
+
+  - `DGS_DISC_GAUSS_CONVOLUTION` - Applies the convolution technique to alias
+    sampling in order to reduce memory overhead and setup cost at the cost of
+    running time. This is suitable for large $σ$. Any real-valued $c$ is accepted.
 
   AVAILABLE PRECISIONS:
 
@@ -124,6 +133,7 @@ typedef enum {
   DGS_DISC_GAUSS_UNIFORM_LOGTABLE  = 0x3, //<call dgs_disc_gauss_mp_call_uniform_logtable
   DGS_DISC_GAUSS_SIGMA2_LOGTABLE   = 0x7, //<call dgs_disc_gauss_mp_call_sigma2_logtable
   DGS_DISC_GAUSS_ALIAS             = 0x8, //<call dgs_disc_gauss_mp_call_alias
+  DGS_DISC_GAUSS_CONVOLUTION       = 0x9, //<call dgs_disc_gauss_mp_call_convolution
 } dgs_disc_gauss_alg_t;
 
 /**
@@ -311,6 +321,19 @@ typedef struct _dgs_disc_gauss_dp_t {
 
   long* alias;
   dgs_bern_dp_t** bias;
+
+  /**
+   * Base sampler for convolution
+   */
+  struct _dgs_disc_gauss_dp_t* base_sampler;
+  size_t n_coefficients;
+  long* coefficients;
+
+  /**
+   * Sampler to adjust center in convolution
+   */
+  struct _dgs_disc_gauss_dp_t* coset_sampler;
+
 } dgs_disc_gauss_dp_t;
 
 /**
@@ -370,6 +393,17 @@ long dgs_disc_gauss_dp_call_uniform_table_offset(dgs_disc_gauss_dp_t *self);
 
 long dgs_disc_gauss_dp_call_alias(dgs_disc_gauss_dp_t *self);
 
+/**
+   Sample from ``dgs_disc_gauss_dp_t`` by convolution sampling 
+   (base sampler = alias sampling). This can be used to reduce 
+   the memory overhead and setup costs of alias sampling for 
+   wide distributions, at the cost of increasing running time.
+
+   :param self: discrete Gaussian sampler
+ */
+
+long dgs_disc_gauss_dp_call_convolution(dgs_disc_gauss_dp_t *self);
+
 /**
   Sample from ``dgs_disc_gauss_dp_t`` by rejection sampling using the uniform
   distribution replacing all ``exp()`` calls with calls to Bernoulli
@@ -550,9 +584,20 @@ typedef struct _dgs_disc_gauss_mp_t {
     /**
    * Tables required for alias sampling.
    */
-
   mpz_t* alias;
   dgs_bern_mp_t** bias;
+
+  /**
+   * Base sampler for convolution
+   */
+  struct _dgs_disc_gauss_mp_t* base_sampler;
+  size_t n_coefficients;
+  mpz_t* coefficients;
+
+  /**
+   * Sampler to adjust center in convolution
+   */
+  struct _dgs_disc_gauss_mp_t* coset_sampler;
 
 } dgs_disc_gauss_mp_t;
 
@@ -595,6 +640,16 @@ void dgs_disc_gauss_mp_call_uniform_table_offset(mpz_t rop, dgs_disc_gauss_mp_t
  */
 void dgs_disc_gauss_mp_call_alias(mpz_t rop, dgs_disc_gauss_mp_t *self, gmp_randstate_t state);
 
+/**
+   Sample from ``dgs_disc_gauss_mp_t`` by convolution sampling 
+   (base sampler = alias sampling). This can be used to reduce 
+   the memory overhead and setup costs of alias sampling for 
+   wide distributions, at the cost of increasing running time.
+
+   :param self: discrete Gaussian sampler
+ */
+void dgs_disc_gauss_mp_call_convolution(mpz_t rop, dgs_disc_gauss_mp_t *self, gmp_randstate_t state);
+
 /**
   Sample from ``dgs_disc_gauss_mp_t`` by rejection sampling using the uniform
   distribution replacing all ``exp()`` calls with call to Bernoulli distributions.

diff --git a/dgs/dgs_gauss_dp.c b/dgs/dgs_gauss_dp.c
@@ -213,6 +213,11 @@ dgs_disc_gauss_dp_t *dgs_disc_gauss_dp_init(double sigma, double c, size_t tau,
     self->alias = (long*)malloc(sizeof(long)*self->two_upper_bound_minus_one);
     self->bias = (dgs_bern_dp_t**)malloc(sizeof(dgs_bern_dp_t*)*self->two_upper_bound_minus_one);
 
+    if (!self->alias || !self->bias){
+      dgs_disc_gauss_dp_clear(self);
+      dgs_die("out of memory");
+    }
+
     // simple robin hood strategy approximates good alias
     // this precomputation takes ~n^2, but could be reduced by 
     // using better data structures to compute min and max 
@@ -234,6 +239,81 @@ dgs_disc_gauss_dp_t *dgs_disc_gauss_dp_init(double sigma, double c, size_t tau,
     break;
   }
 
+  case DGS_DISC_GAUSS_CONVOLUTION: {
+    self->call = dgs_disc_gauss_dp_call_convolution;
+
+    double eta = 2;
+    double sigma1 = sigma;
+    double coset_sigma = sqrt(2)*eta;
+    if (fabs(self->c_r) > 0) {
+      // we might need to adjust the center
+      sigma1 = sqrt(sigma*sigma - coset_sigma*coset_sigma);
+    }
+
+    long table_size = 2*ceil(sigma1*tau) * (sizeof(dgs_bern_dp_t) + sizeof(long));
+    int recursion_level = 0;
+    double current_sigma = sigma1;
+    long z1 = 1;
+    long z2 = 1;
+
+    // compute recursion level for convolution
+    while (table_size > DGS_DISC_GAUSS_MAX_TABLE_SIZE_BYTES) {
+      recursion_level++;
+      z1 = floor(sqrt(current_sigma/(eta*2)));
+      if (z1 == 0) {
+        dgs_disc_gauss_dp_clear(self);
+        dgs_die("MAX_TABLE_SIZE too small to store alias sampler!");
+      }
+      z2 = (z1 > 1) ? z1 - 1 : 1;
+      current_sigma /= (sqrt(z1*z1 + z2*z2));
+      table_size = 2*ceil(current_sigma*tau) * (sizeof(dgs_bern_dp_t) + sizeof(long));
+    }
+
+    self->n_coefficients = 1 << recursion_level;
+    self->coefficients = (long*)malloc(sizeof(long)*self->n_coefficients);
+    for (int i = 0; i < self->n_coefficients; ++i) {
+      self->coefficients[i] = 1;
+    }
+
+    // if there is no convolution, we simply forward to alias and
+    // so we won't need adjustment of sigma, even if sampling off center
+    current_sigma = (recursion_level == 0)? sigma : sigma1;
+
+    // redo above computation to store coefficients
+    for (int i = 0; i < recursion_level; ++i) {
+      z1 = floor(sqrt(current_sigma/(eta*2)));
+      z2 = (z1 > 1) ? z1 - 1 : 1;
+
+      // we unroll the recursion on the coefficients on the fly
+      // so we don't have to use recursion during the call
+      int off = (1 << recursion_level - i - 1);
+      for (int j = 0; j < (1 << i); ++j) {
+        for (int k = 0; k < off;++k) {
+          self->coefficients[2*j*off + k] *= z1;
+        }
+      }
+
+      for (int j = 0; j < (1 << i); ++j) {
+        for (int k = 0; k < off;++k) {
+          self->coefficients[(2*j + 1)*off + k] *= z2;
+        }
+      }
+
+      current_sigma /= (sqrt(z1*z1 + z2*z2));
+    }
+
+    double base_c = self->c_r;
+    self->coset_sampler = NULL;
+    if (recursion_level > 0 && fabs(self->c_r) > 0) {
+      // we'll need to adjust the center
+      base_c = 0.0;
+      self->coset_sampler = dgs_disc_gauss_dp_init(coset_sigma, self->c_r, tau, DGS_DISC_GAUSS_ALIAS);
+    }
+    self->base_sampler = dgs_disc_gauss_dp_init(current_sigma, base_c, tau, DGS_DISC_GAUSS_ALIAS);
+
+    break;
+  }
+
   default:
     dgs_disc_gauss_dp_clear(self);
     dgs_die("unknown algorithm %d", algorithm);
@@ -289,6 +369,19 @@ long dgs_disc_gauss_dp_call_alias(dgs_disc_gauss_dp_t *self) {
   return x + self->c_z - self->upper_bound_minus_one;
 }
 
+long dgs_disc_gauss_dp_call_convolution(dgs_disc_gauss_dp_t *self) {
+  long x = 0;
+  for (int i = 0; i < self->n_coefficients; ++i) {
+    x += self->coefficients[i]*self->base_sampler->call(self->base_sampler);
+  }
+
+  // adjust center if necessary
+  if (self->coset_sampler) {
+    x += self->coset_sampler->call(self->coset_sampler);
+  }
+  return x + self->c_z;
+}
+
 long dgs_disc_gauss_dp_call_uniform_logtable(dgs_disc_gauss_dp_t *self) {
   long x;
   do {
@@ -334,6 +427,15 @@ void dgs_disc_gauss_dp_clear(dgs_disc_gauss_dp_t *self) {
     }
     free(self->bias);
   }
+  if (self->base_sampler) {
+    dgs_disc_gauss_dp_clear(self->base_sampler);
+  }
+  if (self->coefficients) {
+    free(self->coefficients);
+  }
+  if (self->coset_sampler) {
+    dgs_disc_gauss_dp_clear(self->coset_sampler);
+  }
 
   free(self);
 }