from sympy.core.numbers import igcd, mod_inverse from sympy.core.power import integer_nthroot from sympy.ntheory.residue_ntheory import _sqrt_mod_prime_power from sympy.ntheory import isprime from math import log, sqrt import random rgen = random.Random() class SievePolynomial: def __init__(self, modified_coeff=(), a=None, b=None): """This class denotes the seive polynomial. If ``g(x) = (a*x + b)**2 - N``. `g(x)` can be expanded to ``a*x**2 + 2*a*b*x + b**2 - N``, so the coefficient is stored in the form `[a**2, 2*a*b, b**2 - N]`. This ensures faster `eval` method because we dont have to perform `a**2, 2*a*b, b**2` every time we call the `eval` method. As multiplication is more expensive than addition, by using modified_coefficient we get a faster seiving process. Parameters ========== modified_coeff : modified_coefficient of sieve polynomial a : parameter of the sieve polynomial b : parameter of the sieve polynomial """ self.modified_coeff = modified_coeff self.a = a self.b = b def eval(self, x): """ Compute the value of the sieve polynomial at point x. Parameters ========== x : Integer parameter for sieve polynomial """ ans = 0 for coeff in self.modified_coeff: ans *= x ans += coeff return ans class FactorBaseElem: """This class stores an element of the `factor_base`. """ def __init__(self, prime, tmem_p, log_p): """ Initialization of factor_base_elem. Parameters ========== prime : prime number of the factor_base tmem_p : Integer square root of x**2 = n mod prime log_p : Compute Natural Logarithm of the prime """ self.prime = prime self.tmem_p = tmem_p self.log_p = log_p self.soln1 = None self.soln2 = None self.a_inv = None self.b_ainv = None def _generate_factor_base(prime_bound, n): """Generate `factor_base` for Quadratic Sieve. The `factor_base` consists of all the points whose ``legendre_symbol(n, p) == 1`` and ``p < num_primes``. Along with the prime `factor_base` also stores natural logarithm of prime and the residue n modulo p. It also returns the of primes numbers in the `factor_base` which are close to 1000 and 5000. Parameters ========== prime_bound : upper prime bound of the factor_base n : integer to be factored """ from sympy.ntheory.generate import sieve factor_base = [] idx_1000, idx_5000 = None, None for prime in sieve.primerange(1, prime_bound): if pow(n, (prime - 1) // 2, prime) == 1: if prime > 1000 and idx_1000 is None: idx_1000 = len(factor_base) - 1 if prime > 5000 and idx_5000 is None: idx_5000 = len(factor_base) - 1 residue = _sqrt_mod_prime_power(n, prime, 1)[0] log_p = round(log(prime)*2**10) factor_base.append(FactorBaseElem(prime, residue, log_p)) return idx_1000, idx_5000, factor_base def _initialize_first_polynomial(N, M, factor_base, idx_1000, idx_5000, seed=None): """This step is the initialization of the 1st sieve polynomial. Here `a` is selected as a product of several primes of the factor_base such that `a` is about to ``sqrt(2*N) / M``. Other initial values of factor_base elem are also initialized which includes a_inv, b_ainv, soln1, soln2 which are used when the sieve polynomial is changed. The b_ainv is required for fast polynomial change as we do not have to calculate `2*b*mod_inverse(a, prime)` every time. We also ensure that the `factor_base` primes which make `a` are between 1000 and 5000. Parameters ========== N : Number to be factored M : sieve interval factor_base : factor_base primes idx_1000 : index of prime number in the factor_base near 1000 idx_5000 : index of prime number in the factor_base near to 5000 seed : Generate pseudoprime numbers """ if seed is not None: rgen.seed(seed) approx_val = sqrt(2*N) / M # `a` is a parameter of the sieve polynomial and `q` is the prime factors of `a` # randomly search for a combination of primes whose multiplication is close to approx_val # This multiplication of primes will be `a` and the primes will be `q` # `best_a` denotes that `a` is close to approx_val in the random search of combination best_a, best_q, best_ratio = None, None, None start = 0 if idx_1000 is None else idx_1000 end = len(factor_base) - 1 if idx_5000 is None else idx_5000 for _ in range(50): a = 1 q = [] while(a < approx_val): rand_p = 0 while(rand_p == 0 or rand_p in q): rand_p = rgen.randint(start, end) p = factor_base[rand_p].prime a *= p q.append(rand_p) ratio = a / approx_val if best_ratio is None or abs(ratio - 1) < abs(best_ratio - 1): best_q = q best_a = a best_ratio = ratio a = best_a q = best_q B = [] for idx, val in enumerate(q): q_l = factor_base[val].prime gamma = factor_base[val].tmem_p * mod_inverse(a // q_l, q_l) % q_l if gamma > q_l / 2: gamma = q_l - gamma B.append(a//q_l*gamma) b = sum(B) g = SievePolynomial([a*a, 2*a*b, b*b - N], a, b) for fb in factor_base: if a % fb.prime == 0: continue fb.a_inv = mod_inverse(a, fb.prime) fb.b_ainv = [2*b_elem*fb.a_inv % fb.prime for b_elem in B] fb.soln1 = (fb.a_inv*(fb.tmem_p - b)) % fb.prime fb.soln2 = (fb.a_inv*(-fb.tmem_p - b)) % fb.prime return g, B def _initialize_ith_poly(N, factor_base, i, g, B): """Initialization stage of ith poly. After we finish sieving 1`st polynomial here we quickly change to the next polynomial from which we will again start sieving. Suppose we generated ith sieve polynomial and now we want to generate (i + 1)th polynomial, where ``1 <= i <= 2**(j - 1) - 1`` where `j` is the number of prime factors of the coefficient `a` then this function can be used to go to the next polynomial. If ``i = 2**(j - 1) - 1`` then go to _initialize_first_polynomial stage. Parameters ========== N : number to be factored factor_base : factor_base primes i : integer denoting ith polynomial g : (i - 1)th polynomial B : array that stores a//q_l*gamma """ from sympy.functions.elementary.integers import ceiling v = 1 j = i while(j % 2 == 0): v += 1 j //= 2 if ceiling(i / (2**v)) % 2 == 1: neg_pow = -1 else: neg_pow = 1 b = g.b + 2*neg_pow*B[v - 1] a = g.a g = SievePolynomial([a*a, 2*a*b, b*b - N], a, b) for fb in factor_base: if a % fb.prime == 0: continue fb.soln1 = (fb.soln1 - neg_pow*fb.b_ainv[v - 1]) % fb.prime fb.soln2 = (fb.soln2 - neg_pow*fb.b_ainv[v - 1]) % fb.prime return g def _gen_sieve_array(M, factor_base): """Sieve Stage of the Quadratic Sieve. For every prime in the factor_base that does not divide the coefficient `a` we add log_p over the sieve_array such that ``-M <= soln1 + i*p <= M`` and ``-M <= soln2 + i*p <= M`` where `i` is an integer. When p = 2 then log_p is only added using ``-M <= soln1 + i*p <= M``. Parameters ========== M : sieve interval factor_base : factor_base primes """ sieve_array = [0]*(2*M + 1) for factor in factor_base: if factor.soln1 is None: #The prime does not divides a continue for idx in range((M + factor.soln1) % factor.prime, 2*M, factor.prime): sieve_array[idx] += factor.log_p if factor.prime == 2: continue #if prime is 2 then sieve only with soln_1_p for idx in range((M + factor.soln2) % factor.prime, 2*M, factor.prime): sieve_array[idx] += factor.log_p return sieve_array def _check_smoothness(num, factor_base): """Here we check that if `num` is a smooth number or not. If `a` is a smooth number then it returns a vector of prime exponents modulo 2. For example if a = 2 * 5**2 * 7**3 and the factor base contains {2, 3, 5, 7} then `a` is a smooth number and this function returns ([1, 0, 0, 1], True). If `a` is a partial relation which means that `a` a has one prime factor greater than the `factor_base` then it returns `(a, False)` which denotes `a` is a partial relation. Parameters ========== a : integer whose smootheness is to be checked factor_base : factor_base primes """ vec = [] if num < 0: vec.append(1) num *= -1 else: vec.append(0) #-1 is not included in factor_base add -1 in vector for factor in factor_base: if num % factor.prime != 0: vec.append(0) continue factor_exp = 0 while num % factor.prime == 0: factor_exp += 1 num //= factor.prime vec.append(factor_exp % 2) if num == 1: return vec, True if isprime(num): return num, False return None, None def _trial_division_stage(N, M, factor_base, sieve_array, sieve_poly, partial_relations, ERROR_TERM): """Trial division stage. Here we trial divide the values generetated by sieve_poly in the sieve interval and if it is a smooth number then it is stored in `smooth_relations`. Moreover, if we find two partial relations with same large prime then they are combined to form a smooth relation. First we iterate over sieve array and look for values which are greater than accumulated_val, as these values have a high chance of being smooth number. Then using these values we find smooth relations. In general, let ``t**2 = u*p modN`` and ``r**2 = v*p modN`` be two partial relations with the same large prime p. Then they can be combined ``(t*r/p)**2 = u*v modN`` to form a smooth relation. Parameters ========== N : Number to be factored M : sieve interval factor_base : factor_base primes sieve_array : stores log_p values sieve_poly : polynomial from which we find smooth relations partial_relations : stores partial relations with one large prime ERROR_TERM : error term for accumulated_val """ sqrt_n = sqrt(float(N)) accumulated_val = log(M * sqrt_n)*2**10 - ERROR_TERM smooth_relations = [] proper_factor = set() partial_relation_upper_bound = 128*factor_base[-1].prime for idx, val in enumerate(sieve_array): if val < accumulated_val: continue x = idx - M v = sieve_poly.eval(x) vec, is_smooth = _check_smoothness(v, factor_base) if is_smooth is None:#Neither smooth nor partial continue u = sieve_poly.a*x + sieve_poly.b # Update the partial relation # If 2 partial relation with same large prime is found then generate smooth relation if is_smooth is False:#partial relation found large_prime = vec #Consider the large_primes under 128*F if large_prime > partial_relation_upper_bound: continue if large_prime not in partial_relations: partial_relations[large_prime] = (u, v) continue else: u_prev, v_prev = partial_relations[large_prime] partial_relations.pop(large_prime) try: large_prime_inv = mod_inverse(large_prime, N) except ValueError:#if large_prine divides N proper_factor.add(large_prime) continue u = u*u_prev*large_prime_inv v = v*v_prev // (large_prime*large_prime) vec, is_smooth = _check_smoothness(v, factor_base) #assert u*u % N == v % N smooth_relations.append((u, v, vec)) return smooth_relations, proper_factor #LINEAR ALGEBRA STAGE def _build_matrix(smooth_relations): """Build a 2D matrix from smooth relations. Parameters ========== smooth_relations : Stores smooth relations """ matrix = [] for s_relation in smooth_relations: matrix.append(s_relation[2]) return matrix def _gauss_mod_2(A): """Fast gaussian reduction for modulo 2 matrix. Parameters ========== A : Matrix Examples ======== >>> from sympy.ntheory.qs import _gauss_mod_2 >>> _gauss_mod_2([[0, 1, 1], [1, 0, 1], [0, 1, 0], [1, 1, 1]]) ([[[1, 0, 1], 3]], [True, True, True, False], [[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 1]]) Reference ========== .. [1] A fast algorithm for gaussian elimination over GF(2) and its implementation on the GAPP. Cetin K.Koc, Sarath N.Arachchige""" import copy matrix = copy.deepcopy(A) row = len(matrix) col = len(matrix[0]) mark = [False]*row for c in range(col): for r in range(row): if matrix[r][c] == 1: break mark[r] = True for c1 in range(col): if c1 == c: continue if matrix[r][c1] == 1: for r2 in range(row): matrix[r2][c1] = (matrix[r2][c1] + matrix[r2][c]) % 2 dependent_row = [] for idx, val in enumerate(mark): if val == False: dependent_row.append([matrix[idx], idx]) return dependent_row, mark, matrix def _find_factor(dependent_rows, mark, gauss_matrix, index, smooth_relations, N): """Finds proper factor of N. Here, transform the dependent rows as a combination of independent rows of the gauss_matrix to form the desired relation of the form ``X**2 = Y**2 modN``. After obtaining the desired relation we obtain a proper factor of N by `gcd(X - Y, N)`. Parameters ========== dependent_rows : denoted dependent rows in the reduced matrix form mark : boolean array to denoted dependent and independent rows gauss_matrix : Reduced form of the smooth relations matrix index : denoted the index of the dependent_rows smooth_relations : Smooth relations vectors matrix N : Number to be factored """ idx_in_smooth = dependent_rows[index][1] independent_u = [smooth_relations[idx_in_smooth][0]] independent_v = [smooth_relations[idx_in_smooth][1]] dept_row = dependent_rows[index][0] for idx, val in enumerate(dept_row): if val == 1: for row in range(len(gauss_matrix)): if gauss_matrix[row][idx] == 1 and mark[row] == True: independent_u.append(smooth_relations[row][0]) independent_v.append(smooth_relations[row][1]) break u = 1 v = 1 for i in independent_u: u *= i for i in independent_v: v *= i #assert u**2 % N == v % N v = integer_nthroot(v, 2)[0] return igcd(u - v, N) def qs(N, prime_bound, M, ERROR_TERM=25, seed=1234): """Performs factorization using Self-Initializing Quadratic Sieve. In SIQS, let N be a number to be factored, and this N should not be a perfect power. If we find two integers such that ``X**2 = Y**2 modN`` and ``X != +-Y modN``, then `gcd(X + Y, N)` will reveal a proper factor of N. In order to find these integers X and Y we try to find relations of form t**2 = u modN where u is a product of small primes. If we have enough of these relations then we can form ``(t1*t2...ti)**2 = u1*u2...ui modN`` such that the right hand side is a square, thus we found a relation of ``X**2 = Y**2 modN``. Here, several optimizations are done like using multiple polynomials for sieving, fast changing between polynomials and using partial relations. The use of partial relations can speeds up the factoring by 2 times. Parameters ========== N : Number to be Factored prime_bound : upper bound for primes in the factor base M : Sieve Interval ERROR_TERM : Error term for checking smoothness threshold : Extra smooth relations for factorization seed : generate pseudo prime numbers Examples ======== >>> from sympy.ntheory import qs >>> qs(25645121643901801, 2000, 10000) {5394769, 4753701529} >>> qs(9804659461513846513, 2000, 10000) {4641991, 2112166839943} References ========== .. [1] https://pdfs.semanticscholar.org/5c52/8a975c1405bd35c65993abf5a4edb667c1db.pdf .. [2] https://www.rieselprime.de/ziki/Self-initializing_quadratic_sieve """ ERROR_TERM*=2**10 rgen.seed(seed) idx_1000, idx_5000, factor_base = _generate_factor_base(prime_bound, N) smooth_relations = [] ith_poly = 0 partial_relations = {} proper_factor = set() threshold = 5*len(factor_base) // 100 while True: if ith_poly == 0: ith_sieve_poly, B_array = _initialize_first_polynomial(N, M, factor_base, idx_1000, idx_5000) else: ith_sieve_poly = _initialize_ith_poly(N, factor_base, ith_poly, ith_sieve_poly, B_array) ith_poly += 1 if ith_poly >= 2**(len(B_array) - 1): # time to start with a new sieve polynomial ith_poly = 0 sieve_array = _gen_sieve_array(M, factor_base) s_rel, p_f = _trial_division_stage(N, M, factor_base, sieve_array, ith_sieve_poly, partial_relations, ERROR_TERM) smooth_relations += s_rel proper_factor |= p_f if len(smooth_relations) >= len(factor_base) + threshold: break matrix = _build_matrix(smooth_relations) dependent_row, mark, gauss_matrix = _gauss_mod_2(matrix) N_copy = N for index in range(len(dependent_row)): factor = _find_factor(dependent_row, mark, gauss_matrix, index, smooth_relations, N) if factor > 1 and factor < N: proper_factor.add(factor) while(N_copy % factor == 0): N_copy //= factor if isprime(N_copy): proper_factor.add(N_copy) break if(N_copy == 1): break return proper_factor