11/*
22 Copyright (C) 2010 Sebastian Pancratz
3- Copyright (C) 2011 Fredrik Johansson
3+ Copyright (C) 2011, 2026 Fredrik Johansson
44
55 This file is part of FLINT.
66
1414#include "fmpz.h"
1515#include "fmpz_vec.h"
1616#include "fmpz_poly.h"
17- #include "fmpq.h"
18- #include "fmpq_mat.h"
17+ #include "fmpz_mat.h"
1918#include "fmpq_poly.h"
2019
21- static void
22- _fmpq_mat_get_row (fmpz * rnum , fmpz_t den , fmpq_mat_t A , slong i )
23- {
24- slong j ;
25- fmpz_t t ;
26- fmpz_init (t );
27- fmpz_one (den );
28-
29- for (j = 0 ; j < fmpq_mat_ncols (A ); j ++ )
30- fmpz_lcm (den , den , fmpq_mat_entry_den (A , i , j ));
31-
32- for (j = 0 ; j < fmpq_mat_ncols (A ); j ++ )
33- {
34- fmpz_divexact (t , den , fmpq_mat_entry_den (A , i , j ));
35- fmpz_mul (rnum + j , fmpq_mat_entry_num (A , i , j ), t );
36- }
37-
38- fmpz_clear (t );
39- }
40-
41-
4220void
4321_fmpq_poly_compose_series_brent_kung (fmpz * res , fmpz_t den , const fmpz * poly1 ,
4422 const fmpz_t den1 , slong len1 , const fmpz * poly2 ,
4523 const fmpz_t den2 , slong len2 , slong n )
4624{
47- fmpq_mat_t A , B , C ;
48- fmpz_t tden , uden , hden ;
49- fmpz * t , * u , * h , * swap ;
50- slong i , j , m ;
51-
52- if (fmpz_is_one (den2 ))
53- {
54- _fmpz_poly_compose_series (res , poly1 , len1 , poly2 , len2 , n );
55- fmpz_set (den , den1 );
56- _fmpq_poly_canonicalise (res , den , n );
57- return ;
58- }
25+ fmpz_mat_t A , B , C ;
26+ fmpz * A_den , * t , * h ;
27+ fmpz_t lcd , scale ;
28+ fmpz_t C_den , tden , hden ;
29+ slong i , m ;
5930
6031 if (n == 1 )
6132 {
@@ -67,85 +38,98 @@ _fmpq_poly_compose_series_brent_kung(fmpz * res, fmpz_t den, const fmpz * poly1,
6738
6839 m = n_sqrt (n ) + 1 ;
6940
70- fmpq_mat_init (A , m , n );
71- fmpq_mat_init (B , m , m );
72- fmpq_mat_init (C , m , n );
41+ fmpz_mat_init (A , m , n );
42+ fmpz_mat_init (B , m , m );
43+ fmpz_mat_init (C , m , n );
44+ A_den = _fmpz_vec_init (m );
7345
46+ fmpz_init (lcd );
47+ fmpz_init (scale );
48+ fmpz_init (C_den );
7449 fmpz_init (tden );
75- fmpz_init (uden );
7650 fmpz_init (hden );
7751 h = _fmpz_vec_init (n );
7852 t = _fmpz_vec_init (n );
79- u = _fmpz_vec_init (n );
8053
81- /* Set rows of B to the segments of poly1 */
54+ /* Set rows of B to the segments of poly1 with common denominator den1. */
8255 for (i = 0 ; i < len1 ; i ++ )
56+ fmpz_set (fmpz_mat_entry (B , i / m , i % m ), poly1 + i );
57+ /* Remark: if the poly is non-canonical e.g. due to being a truncation
58+ of a longer power series, it could be helpful to remove its content
59+ here. We could also consider removing content of B row by row. */
60+
61+ /* Set rows of A to the numerators of powers of poly2 with corresponding
62+ denominators in A_den[i]. */
63+ fmpz_one (fmpz_mat_entry (A , 0 , 0 ));
64+ fmpz_one (A_den + 0 );
65+ _fmpz_vec_set (fmpz_mat_row (A , 1 ), poly2 , len2 );
66+ fmpz_set (A_den + 1 , den2 );
67+ /* Optional: may improve performance if poly2 is non-canonical e.g. due
68+ to being a truncation of a longer power series. */
69+ _fmpq_poly_canonicalise (fmpz_mat_row (A , 1 ), A_den + 1 , n );
70+
71+ for (i = 2 ; i < m ; i ++ )
8372 {
84- fmpz_set (fmpq_mat_entry_num (B , i / m , i % m ), poly1 + i );
85- fmpz_set (fmpq_mat_entry_den (B , i / m , i % m ), den1 );
86- fmpq_canonicalise (fmpq_mat_entry (B , i / m , i % m ));
73+ fmpz * Ai = fmpz_mat_row (A , i );
74+ fmpz * Ai1 = fmpz_mat_row (A , i - 1 );
75+ fmpz * Ai_den = A_den + i ;
76+ fmpz * Ai1_den = A_den + i - 1 ;
77+
78+ _fmpq_poly_mullow (Ai , Ai_den , Ai1 , Ai1_den , n , poly2 , den2 , len2 , n );
79+ _fmpq_poly_canonicalise (Ai , Ai_den , n );
8780 }
8881
89- /* Set rows of A to powers of poly2 */
90- fmpq_set_si (fmpq_mat_entry (A , 0 , 0 ), WORD (1 ), WORD (1 ));
82+ /* Compute h = poly2 ^ m */
83+ _fmpq_poly_mullow (h , hden , fmpz_mat_row (A , m - 1 ), A_den + m - 1 , n , poly2 , den2 , len2 , n );
84+ _fmpq_poly_canonicalise (h , hden , n );
9185
92- for (i = 0 ; i < len2 ; i ++ )
93- {
94- fmpz_set (fmpq_mat_entry_num (A , 1 , i ), poly2 + i );
95- fmpz_set (fmpq_mat_entry_den (A , 1 , i ), den2 );
96- fmpq_canonicalise (fmpq_mat_entry (A , 1 , i ));
97- }
86+ /* Matrix multiply C = B * A over the integers.
87+ B (m x m) has common denominator den1.
88+ A (m x n) has row denominator A_den[i].
9889
99- _fmpz_vec_set (h , poly2 , len2 );
100- fmpz_set (hden , den2 );
90+ We could remove gcd of A columnwise before multiplying, but
91+ this is slower for small to moderate n where we want to use Brent-Kung
92+ and faster only for large n where we want to use Kinoshita-Li instead. */
93+ fmpz_one (lcd );
94+ for (i = 0 ; i < m ; i ++ )
95+ fmpz_lcm (lcd , lcd , A_den + i );
10196
102- for (i = 2 ; i < m ; i ++ )
97+ for (i = 0 ; i < m ; i ++ )
10398 {
104- _fmpq_poly_mullow (t , tden , h , hden , n , poly2 , den2 , len2 , n );
105- _fmpq_poly_canonicalise (t , tden , n );
106-
107- for (j = 0 ; j < n ; j ++ )
108- {
109- fmpz_set (fmpq_mat_entry_num (A , i , j ), t + j );
110- fmpz_set (fmpq_mat_entry_den (A , i , j ), tden );
111- fmpq_canonicalise (fmpq_mat_entry (A , i , j ));
112- }
113- swap = t ; t = h ; h = swap ;
114- fmpz_swap (hden , tden );
99+ fmpz_divexact (scale , lcd , A_den + i );
100+ if (!fmpz_is_one (scale ))
101+ _fmpz_vec_scalar_mul_fmpz (fmpz_mat_row (A , i ), fmpz_mat_row (A , i ), n , scale );
115102 }
116103
117- /* Compute h = poly2 ^ m */
118- _fmpq_poly_mullow (t , tden , h , hden , n , poly2 , den2 , len2 , n );
119- _fmpq_poly_canonicalise (t , tden , n );
120- swap = t ; t = h ; h = swap ;
121- fmpz_swap (hden , tden );
104+ fmpz_mat_mul (C , B , A );
105+ fmpz_mat_clear (A );
106+ _fmpz_vec_clear (A_den , m );
107+ fmpz_mat_clear (B );
122108
123- /* Matrix multiply */
124- fmpq_mat_mul (C , B , A );
125- fmpq_mat_clear (A );
126- fmpq_mat_clear (B );
109+ fmpz_mul (C_den , den1 , lcd );
127110
111+ _fmpz_vec_set (res , fmpz_mat_row (C , m - 1 ), n );
112+ fmpz_set (den , C_den );
113+ _fmpq_poly_canonicalise (res , den , n );
128114 /* Evaluate block composition using the Horner scheme */
129- _fmpq_mat_get_row (res , den , C , m - 1 );
130-
131115 for (i = m - 2 ; i >= 0 ; i -- )
132116 {
133117 _fmpq_poly_mullow (t , tden , res , den , n , h , hden , n , n );
134- /* we could canonicalise t here, but it does not seem to make
135- much of a difference */
136- _fmpq_mat_get_row (u , uden , C , i );
137- _fmpq_poly_add (res , den , t , tden , n , u , uden , n );
118+ /* Remark: we could canonicalise t and/or fmpz_mat_row(C, i)
119+ here; in practice this seems to be slower at least for moderate n. */
120+ _fmpq_poly_add (res , den , t , tden , n , fmpz_mat_row (C , i ), C_den , n );
138121 }
139122
140123 _fmpq_poly_canonicalise (res , den , n );
141124
142- fmpq_mat_clear (C );
125+ fmpz_mat_clear (C );
143126
144127 _fmpz_vec_clear (t , n );
145- _fmpz_vec_clear (u , n );
146128 _fmpz_vec_clear (h , n );
129+ fmpz_clear (lcd );
130+ fmpz_clear (scale );
131+ fmpz_clear (C_den );
147132 fmpz_clear (tden );
148- fmpz_clear (uden );
149133 fmpz_clear (hden );
150134}
151135
0 commit comments