diff --git a/_CoqProject b/_CoqProject index 43bade89a5..ac37375417 100644 --- a/_CoqProject +++ b/_CoqProject @@ -38,6 +38,7 @@ src/StandaloneOCamlMain.v src/TAPSort.v src/UnderLets.v src/UnderLetsProofs.v +src/UnsaturatedSolinasHeuristics.v src/Algebra/Field.v src/Algebra/Field_test.v src/Algebra/Group.v diff --git a/primes.txt b/primes.txt index 4e9102b66a..5510575ea4 100644 --- a/primes.txt +++ b/primes.txt @@ -73,7 +73,7 @@ 2^450 - 2^225 - 1 2^480 - 2^240 - 1 # ridinghood -# two ore more taps +# two or more taps 2^205 - 45*2^198 - 1 2^224 - 2^96 + 1 # p224 diff --git a/src/PushButtonSynthesis/UnsaturatedSolinas.v b/src/PushButtonSynthesis/UnsaturatedSolinas.v index 3dfb6bfca6..9d1eaf0606 100644 --- a/src/PushButtonSynthesis/UnsaturatedSolinas.v +++ b/src/PushButtonSynthesis/UnsaturatedSolinas.v @@ -30,6 +30,7 @@ Require Import Crypto.Arithmetic.Partition. Require Import Crypto.Arithmetic.Freeze. Require Import Crypto.BoundsPipeline. Require Import Crypto.COperationSpecifications. +Require Import Crypto.UnsaturatedSolinasHeuristics. Require Import Crypto.PushButtonSynthesis.ReificationCache. Require Import Crypto.PushButtonSynthesis.Primitives. Require Import Crypto.PushButtonSynthesis.UnsaturatedSolinasReificationCache. @@ -77,35 +78,7 @@ Section __. (machine_wordsize : Z). Let limbwidth := (Z.log2_up (s - Associational.eval c) / Z.of_nat n)%Q. - (** Translating from https://github.com/mit-plv/fiat-crypto/blob/c60b1d2556a72c37f4bc7444204e9ddc0791ce4f/src/Specific/solinas64_2e448m2e224m1_8limbs/CurveParameters.v#L11-L35 -<< -if len(p) > 2: - # do interleaved carry chains, starting at where the taps are - starts = [(int(t[1] / (num_bits(p) / sz)) - 1) % sz for t in p[1:]] - chain2 = [] - for n in range(1,sz): - for j in starts: - chain2.append((j + n) % sz) - chain2 = remove_duplicates(chain2) - chain3 = list(map(lambda x:(x+1)%sz,starts)) - carry_chains = [starts,chain2,chain3] -else: - carry_chains = "default" ->> *) - (* p is [(value, weight)]; c is [(weight, value)] *) - Let p := [(s / 2^Z.log2 s, Z.log2 s)] ++ TAPSort.sort (List.map (fun '(w, v) => (-v, Z.log2 w)) c). - Let idxs : list nat - := if (2 (Qfloor (w / limbwidth) - 1) mod n) (tl p) in - let chain2 := flat_map - (fun n' : nat - => List.map (fun j => (j + n') mod n) starts) - (List.seq 1 (pred n)) in - let chain2 := remove_duplicates Z.eqb chain2 in - let chain3 := List.map (fun x => (x + 1) mod n) starts in - List.map Z.to_nat (starts ++ chain2 ++ chain3) - else (List.seq 0 n ++ [0; 1])%list%nat. + Let idxs : list nat := carry_chains n s c. Let coef := 2. Let n_bytes := bytes_n (Qnum limbwidth) (Qden limbwidth) n. Let prime_upperbound_list : list Z @@ -114,7 +87,7 @@ else: := encode_no_reduce (weight 8 1) n_bytes (s-1). Let tight_upperbounds : list Z := List.map - (fun v : Z => Qceiling (11/10 * v)) + (fun v : Z => Qceiling (tight_upperbound_fraction * v)) prime_upperbound_list. Definition prime_bound : ZRange.type.option.interp (base.type.Z) := Some r[0~>(s - Associational.eval c - 1)]%zrange. @@ -142,7 +115,7 @@ else: Definition tight_bounds : list (ZRange.type.option.interp base.type.Z) := List.map (fun u => Some r[0~>u]%zrange) tight_upperbounds. Definition loose_bounds : list (ZRange.type.option.interp base.type.Z) - := List.map (fun u => Some r[0 ~> 3*u]%zrange) tight_upperbounds. + := List.map (fun u => Some r[0 ~> loose_upperbound_extra_multiplicand*u]%zrange) tight_upperbounds. Lemma length_prime_upperbound_list : List.length prime_upperbound_list = n. Proof using Type. cbv [prime_upperbound_list]; now autorewrite with distr_length. Qed. @@ -586,6 +559,7 @@ else: intro H. repeat first [ let H' := fresh in destruct H as [H' H]; split; [ assumption | ] | let x := fresh in intro x; specialize (H x) ]. + cbv [loose_upperbound_extra_multiplicand]. Z.ltb_to_lt; lia. Qed. diff --git a/src/UnsaturatedSolinasHeuristics.v b/src/UnsaturatedSolinasHeuristics.v new file mode 100644 index 0000000000..5d75fa1b4a --- /dev/null +++ b/src/UnsaturatedSolinasHeuristics.v @@ -0,0 +1,110 @@ +Require Import Coq.Lists.List. +Require Import Coq.ZArith.ZArith. +Require Import Coq.QArith.QArith_base Coq.QArith.Qround. +Require Import Crypto.Arithmetic.Core. +Require Import Crypto.Arithmetic.ModOps. +Require Import Crypto.Util.ListUtil. +Require Import Crypto.Util.ZRange. +Require Crypto.TAPSort. +Import ListNotations. +Local Open Scope Z_scope. Local Open Scope list_scope. Local Open Scope bool_scope. + +Import Associational Positional. + +Notation limbwidth n s c := (inject_Z (Z.log2_up (s - Associational.eval c)) / inject_Z (Z.of_nat n))%Q. + +Local Coercion Z.of_nat : nat >-> Z. +Local Coercion QArith_base.inject_Z : Z >-> Q. +Local Coercion Z.pos : positive >-> Z. + +Section __. + Context (n : nat) + (s : Z) + (c : list (Z * Z)) + (machine_wordsize : Z). + + Let limbwidth := (limbwidth n s c). + (** Translating from https://github.com/mit-plv/fiat-crypto/blob/c60b1d2556a72c37f4bc7444204e9ddc0791ce4f/src/Specific/solinas64_2e448m2e224m1_8limbs/CurveParameters.v#L11-L35 +<< +if len(p) > 2: + # do interleaved carry chains, starting at where the taps are + starts = [(int(t[1] / (num_bits(p) / sz)) - 1) % sz for t in p[1:]] + chain2 = [] + for n in range(1,sz): + for j in starts: + chain2.append((j + n) % sz) + chain2 = remove_duplicates(chain2) + chain3 = list(map(lambda x:(x+1)%sz,starts)) + carry_chains = [starts,chain2,chain3] +else: + carry_chains = "default" +>> *) + (* p is [(value, weight)]; c is [(weight, value)] *) + Let p := [(s / 2^Z.log2 s, Z.log2 s)] ++ TAPSort.sort (List.map (fun '(w, v) => (-v, Z.log2 w)) c). + Definition carry_chains : list nat + := if (2 (Qfloor (w / limbwidth) - 1) mod n) (tl p) in + let chain2 := flat_map + (fun n' : nat + => List.map (fun j => (j + n') mod n) starts) + (List.seq 1 (pred n)) in + let chain2 := remove_duplicates Z.eqb chain2 in + let chain3 := List.map (fun x => (x + 1) mod n) starts in + List.map Z.to_nat (starts ++ chain2 ++ chain3) + else (List.seq 0 n ++ [0; 1])%list%nat. + + Definition tight_upperbound_fraction : Q := (11/10)%Q. + Definition loose_upperbound_extra_multiplicand : Z := 3. + Definition prime_upperbound_list : list Z + := encode_no_reduce (weight (Qnum limbwidth) (Qden limbwidth)) n (s-1). + Definition tight_upperbounds : list Z + := List.map + (fun v : Z => Qceiling (tight_upperbound_fraction * v)) + prime_upperbound_list. + + Definition loose_upperbounds : list Z + := List.map + (fun v : Z => loose_upperbound_extra_multiplicand * v) + tight_upperbounds. + + Definition tight_bounds : list (option zrange) + := List.map (fun u => Some r[0~>u]%zrange) tight_upperbounds. + Definition loose_bounds : list (option zrange) + := List.map (fun u => Some r[0~>u]%zrange) loose_upperbounds. + + (** check if the suggested number of limbs will overflow when adding + partial products after a multiplication and then doing solinas + reduction *) + Definition overflow_free : bool + := let v := squaremod (weight (Qnum limbwidth) (Qden limbwidth)) s c n loose_upperbounds in + forallb (fun k => Z.log2 k (s =? 2^Z.log2 s) + && (w =? 2^Z.log2 w) + && (Z.log2 s =? 2 * Z.log2 w) + | nil => false + end. +End __. + +Section ___. + Context (s : Z) + (c : list (Z * Z)) + (machine_wordsize : Z). + (** given a parsed prime, pick out all plausible numbers of (unsaturated) limbs *) + (** we want to leave enough bits unused to do a full solinas + reduction without carrying; the number of bits necessary is the + sum of the bits in the negative coefficients of p (other than + the most significant digit), i.e., in the positive coefficients + of c *) + Let num_bits_p := Z.log2_up s. + Let unused_bits := sum (map (fun t => Z.log2_up (fst t)) c). + Let min_limbs := Z.to_nat (Qceiling (num_bits_p / (machine_wordsize - unused_bits))). + (* don't search past 2x as many limbs as saturated representation; that's just wasteful *) + Let result := filter (fun n => overflow_free n s c machine_wordsize) (seq min_limbs min_limbs). + Definition get_possible_limbs : list nat + := result. +End ___.