Skip to content

Commit e902c84

Browse files
committed
kernel: turn global GAP and module states back into static globals
... at least in plain GAP; keep them as before in HPC-GAP. The whole concept was invented to allow HPC-GAP to have thread local GAP and module state. In plain GAP, though, it introduced some minor overhead. Changing this here allows us to avoid the overhead, and it also prevents GAPState from becoming larger than 32kb. This can happen on platforms with a large jmp_buf (e.g., powerpc64).
1 parent aa52ee2 commit e902c84

14 files changed

Lines changed: 431 additions & 319 deletions

File tree

src/code.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,15 @@
4444

4545
GAP_STATIC_ASSERT(sizeof(StatHeader) == 8, "StatHeader has wrong size");
4646

47+
#ifdef HPCGAP
4748
struct CodeModuleState {
48-
Bag StackStat;
49-
Int CountStat;
49+
#endif
50+
DECL_MODULE_STATE Bag StackStat;
51+
DECL_MODULE_STATE Int CountStat;
5052

51-
Bag StackExpr;
52-
Int CountExpr;
53+
DECL_MODULE_STATE Bag StackExpr;
54+
DECL_MODULE_STATE Int CountExpr;
55+
#ifdef HPCGAP
5356
};
5457

5558
static ModuleStateOffset CodeStateOffset = -1;
@@ -60,6 +63,9 @@ extern inline struct CodeModuleState * CShelper(void)
6063
}
6164

6265
#define CS(x) (CShelper()->x)
66+
#else
67+
#define CS(x) (x)
68+
#endif
6369

6470

6571
/****************************************************************************
@@ -3250,8 +3256,10 @@ static StructInitInfo module = {
32503256
.preSave = PreSave,
32513257
.postRestore = PostRestore,
32523258

3259+
#ifdef HPCGAP
32533260
.moduleStateSize = sizeof(struct CodeModuleState),
32543261
.moduleStateOffsetPtr = &CodeStateOffset,
3262+
#endif
32553263
.initModuleState = InitModuleState,
32563264
};
32573265

src/collectors.cc

Lines changed: 68 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,19 @@ extern "C" {
4646
*F * * * * * * * * * * * * * module specific state * * * * * * * * * * * * *
4747
*/
4848

49+
#ifdef HPCGAP
4950
struct CollectorsState_ {
50-
Obj SC_NW_STACK;
51-
Obj SC_LW_STACK;
52-
Obj SC_PW_STACK;
53-
Obj SC_EW_STACK;
54-
Obj SC_GE_STACK;
55-
Obj SC_CW_VECTOR;
56-
Obj SC_CW2_VECTOR;
57-
UInt SC_MAX_STACK_SIZE;
51+
#endif
52+
53+
DECL_MODULE_STATE Obj SC_NW_STACK;
54+
DECL_MODULE_STATE Obj SC_LW_STACK;
55+
DECL_MODULE_STATE Obj SC_PW_STACK;
56+
DECL_MODULE_STATE Obj SC_EW_STACK;
57+
DECL_MODULE_STATE Obj SC_GE_STACK;
58+
DECL_MODULE_STATE Obj SC_CW_VECTOR;
59+
DECL_MODULE_STATE Obj SC_CW2_VECTOR;
60+
DECL_MODULE_STATE UInt SC_MAX_STACK_SIZE;
61+
#ifdef HPCGAP
5862
};
5963

6064
static ModuleStateOffset CollectorsStateOffset = -1;
@@ -64,6 +68,17 @@ extern inline struct CollectorsState_ * CollectorsState(void)
6468
return (struct CollectorsState_ *)StateSlotsAtOffset(CollectorsStateOffset);
6569
}
6670

71+
#define SC_NW_STACK (CollectorsState()->SC_NW_STACK)
72+
#define SC_LW_STACK (CollectorsState()->SC_LW_STACK)
73+
#define SC_PW_STACK (CollectorsState()->SC_PW_STACK)
74+
#define SC_EW_STACK (CollectorsState()->SC_EW_STACK)
75+
#define SC_GE_STACK (CollectorsState()->SC_GE_STACK)
76+
#define SC_CW_VECTOR (CollectorsState()->SC_CW_VECTOR)
77+
#define SC_CW2_VECTOR (CollectorsState()->SC_CW2_VECTOR)
78+
#define SC_MAX_STACK_SIZE (CollectorsState()->SC_MAX_STACK_SIZE)
79+
80+
#endif
81+
6782

6883
/****************************************************************************
6984
**
@@ -84,7 +99,7 @@ extern inline struct CollectorsState_ * CollectorsState(void)
8499
*/
85100
#define SC_PUSH_WORD( word, exp ) \
86101
if ( ++sp == max ) { \
87-
CollectorsState()->SC_MAX_STACK_SIZE *= 2; \
102+
SC_MAX_STACK_SIZE *= 2; \
88103
return -1; \
89104
} \
90105
*++nw = DATA_WORD(word); \
@@ -95,7 +110,7 @@ extern inline struct CollectorsState_ * CollectorsState(void)
95110

96111
#define SC_PUSH_GEN( gen, exp ) \
97112
if ( ++sp == max ) { \
98-
CollectorsState()->SC_MAX_STACK_SIZE *= 2; \
113+
SC_MAX_STACK_SIZE *= 2; \
99114
return -1; \
100115
} \
101116
*++nw = DATA_WORD(gen); \
@@ -385,22 +400,22 @@ static Int SingleCollectWord(Obj sc, Obj vv, Obj w)
385400
exps = (UInt)1 << (ebits-1);
386401

387402
// <nw> contains the stack of words to insert
388-
vnw = CollectorsState()->SC_NW_STACK;
403+
vnw = SC_NW_STACK;
389404

390405
// <lw> contains the word end of the word in <nw>
391-
vlw = CollectorsState()->SC_LW_STACK;
406+
vlw = SC_LW_STACK;
392407

393408
// <pw> contains the position of the word in <nw> to look at
394-
vpw = CollectorsState()->SC_PW_STACK;
409+
vpw = SC_PW_STACK;
395410

396411
// <ew> contains the unprocessed exponents at position <pw>
397-
vew = CollectorsState()->SC_EW_STACK;
412+
vew = SC_EW_STACK;
398413

399414
// <ge> contains the global exponent of the word
400-
vge = CollectorsState()->SC_GE_STACK;
415+
vge = SC_GE_STACK;
401416

402417
// get the maximal stack size
403-
max = CollectorsState()->SC_MAX_STACK_SIZE;
418+
max = SC_MAX_STACK_SIZE;
404419

405420
// ensure that the stacks are large enough
406421
const UInt desiredStackSize = sizeof(Obj) * (max + 2);
@@ -893,22 +908,22 @@ static Int CombiCollectWord(Obj sc, Obj vv, Obj w)
893908
exps = (UInt)1 << (ebits-1);
894909

895910
// <nw> contains the stack of words to insert
896-
vnw = CollectorsState()->SC_NW_STACK;
911+
vnw = SC_NW_STACK;
897912

898913
// <lw> contains the word end of the word in <nw>
899-
vlw = CollectorsState()->SC_LW_STACK;
914+
vlw = SC_LW_STACK;
900915

901916
// <pw> contains the position of the word in <nw> to look at
902-
vpw = CollectorsState()->SC_PW_STACK;
917+
vpw = SC_PW_STACK;
903918

904919
// <ew> contains the unprocessed exponents at position <pw>
905-
vew = CollectorsState()->SC_EW_STACK;
920+
vew = SC_EW_STACK;
906921

907922
// <ge> contains the global exponent of the word
908-
vge = CollectorsState()->SC_GE_STACK;
923+
vge = SC_GE_STACK;
909924

910925
// get the maximal stack size
911-
max = CollectorsState()->SC_MAX_STACK_SIZE;
926+
max = SC_MAX_STACK_SIZE;
912927

913928
// ensure that the stacks are large enough
914929
const UInt desiredStackSize = sizeof(Obj) * (max + 2);
@@ -1267,7 +1282,7 @@ static Obj ReducedComm(FinPowConjCol * fc, Obj sc, Obj w, Obj u)
12671282
Obj vc2; // collect vector
12681283

12691284
// use 'cwVector' to collect word <u>*<w> to
1270-
vcw = CollectorsState()->SC_CW_VECTOR;
1285+
vcw = SC_CW_VECTOR;
12711286
num = SC_NUMBER_RWS_GENERATORS(sc);
12721287

12731288
// check that it has the correct length, unpack <u> into it
@@ -1283,7 +1298,7 @@ static Obj ReducedComm(FinPowConjCol * fc, Obj sc, Obj w, Obj u)
12831298
}
12841299

12851300
// use 'cw2Vector' to collect word <w>*<u> to
1286-
vc2 = CollectorsState()->SC_CW2_VECTOR;
1301+
vc2 = SC_CW2_VECTOR;
12871302

12881303
// check that it has the correct length, unpack <w> into it
12891304
if ( fc->vectorWord( vc2, w, num ) == -1 ) {
@@ -1325,7 +1340,7 @@ static Obj ReducedForm(FinPowConjCol * fc, Obj sc, Obj w)
13251340
Obj type; // type of the return objue
13261341

13271342
// use 'cwVector' to collect word <w> to
1328-
vcw = CollectorsState()->SC_CW_VECTOR;
1343+
vcw = SC_CW_VECTOR;
13291344
num = SC_NUMBER_RWS_GENERATORS(sc);
13301345

13311346
// check that it has the correct length
@@ -1358,7 +1373,7 @@ static Obj ReducedLeftQuotient(FinPowConjCol * fc, Obj sc, Obj w, Obj u)
13581373
Obj vc2; // collect vector
13591374

13601375
// use 'cwVector' to collect word <w> to
1361-
vcw = CollectorsState()->SC_CW_VECTOR;
1376+
vcw = SC_CW_VECTOR;
13621377
num = SC_NUMBER_RWS_GENERATORS(sc);
13631378

13641379
// check that it has the correct length, unpack <w> into it
@@ -1368,7 +1383,7 @@ static Obj ReducedLeftQuotient(FinPowConjCol * fc, Obj sc, Obj w, Obj u)
13681383
}
13691384

13701385
// use 'cw2Vector' to collect word <u> to
1371-
vc2 = CollectorsState()->SC_CW2_VECTOR;
1386+
vc2 = SC_CW2_VECTOR;
13721387

13731388
// check that it has the correct length, unpack <u> into it
13741389
if ( fc->vectorWord( vc2, u, num ) == -1 ) {
@@ -1402,7 +1417,7 @@ static Obj ReducedProduct(FinPowConjCol * fc, Obj sc, Obj w, Obj u)
14021417
Obj vcw; // collect vector
14031418

14041419
// use 'cwVector' to collect word <w> to
1405-
vcw = CollectorsState()->SC_CW_VECTOR;
1420+
vcw = SC_CW_VECTOR;
14061421
num = SC_NUMBER_RWS_GENERATORS(sc);
14071422

14081423
// check that it has the correct length, unpack <w> into it
@@ -1441,8 +1456,8 @@ static Obj ReducedPowerSmallInt(FinPowConjCol * fc, Obj sc, Obj w, Obj vpow)
14411456
pow = INT_INTOBJ(vpow);
14421457

14431458
// use 'cwVector' and 'cw2Vector to collect words to
1444-
vcw = CollectorsState()->SC_CW_VECTOR;
1445-
vc2 = CollectorsState()->SC_CW2_VECTOR;
1459+
vcw = SC_CW_VECTOR;
1460+
vc2 = SC_CW2_VECTOR;
14461461
num = SC_NUMBER_RWS_GENERATORS(sc);
14471462
type = SC_DEFAULT_TYPE(sc);
14481463

@@ -1528,8 +1543,8 @@ static Obj ReducedQuotient(FinPowConjCol * fc, Obj sc, Obj w, Obj u)
15281543
Obj vc2; // collect vector
15291544

15301545
// use 'cwVector' to collect word <w> to
1531-
vcw = CollectorsState()->SC_CW_VECTOR;
1532-
vc2 = CollectorsState()->SC_CW2_VECTOR;
1546+
vcw = SC_CW_VECTOR;
1547+
vc2 = SC_CW2_VECTOR;
15331548
num = SC_NUMBER_RWS_GENERATORS(sc);
15341549
type = SC_DEFAULT_TYPE(sc);
15351550

@@ -1647,8 +1662,7 @@ static Obj FuncFinPowConjCol_ReducedQuotient ( Obj self, Obj sc, Obj w, Obj u )
16471662
*/
16481663
static Obj FuncSET_SCOBJ_MAX_STACK_SIZE(Obj self, Obj size)
16491664
{
1650-
CollectorsState()->SC_MAX_STACK_SIZE =
1651-
GetPositiveSmallInt(SELF_NAME, size);
1665+
SC_MAX_STACK_SIZE = GetPositiveSmallInt(SELF_NAME, size);
16521666
return 0;
16531667
}
16541668

@@ -1739,25 +1753,25 @@ static Int InitLibrary (
17391753
static Int InitModuleState(void)
17401754
{
17411755
// register global bags with the garbage collector
1742-
InitGlobalBag( &CollectorsState()->SC_NW_STACK, "SC_NW_STACK" );
1743-
InitGlobalBag( &CollectorsState()->SC_LW_STACK, "SC_LW_STACK" );
1744-
InitGlobalBag( &CollectorsState()->SC_PW_STACK, "SC_PW_STACK" );
1745-
InitGlobalBag( &CollectorsState()->SC_EW_STACK, "SC_EW_STACK" );
1746-
InitGlobalBag( &CollectorsState()->SC_GE_STACK, "SC_GE_STACK" );
1747-
InitGlobalBag( &CollectorsState()->SC_CW_VECTOR, "SC_CW_VECTOR" );
1748-
InitGlobalBag( &CollectorsState()->SC_CW2_VECTOR, "SC_CW2_VECTOR" );
1756+
InitGlobalBag(&SC_NW_STACK, "SC_NW_STACK");
1757+
InitGlobalBag(&SC_LW_STACK, "SC_LW_STACK");
1758+
InitGlobalBag(&SC_PW_STACK, "SC_PW_STACK");
1759+
InitGlobalBag(&SC_EW_STACK, "SC_EW_STACK");
1760+
InitGlobalBag(&SC_GE_STACK, "SC_GE_STACK");
1761+
InitGlobalBag(&SC_CW_VECTOR, "SC_CW_VECTOR");
1762+
InitGlobalBag(&SC_CW2_VECTOR, "SC_CW2_VECTOR");
17491763

17501764
const UInt maxStackSize = 256;
17511765
const UInt desiredStackSize = sizeof(Obj) * (maxStackSize + 2);
1752-
CollectorsState()->SC_NW_STACK = NewKernelBuffer(desiredStackSize);
1753-
CollectorsState()->SC_LW_STACK = NewKernelBuffer(desiredStackSize);
1754-
CollectorsState()->SC_PW_STACK = NewKernelBuffer(desiredStackSize);
1755-
CollectorsState()->SC_EW_STACK = NewKernelBuffer(desiredStackSize);
1756-
CollectorsState()->SC_GE_STACK = NewKernelBuffer(desiredStackSize);
1766+
SC_NW_STACK = NewKernelBuffer(desiredStackSize);
1767+
SC_LW_STACK = NewKernelBuffer(desiredStackSize);
1768+
SC_PW_STACK = NewKernelBuffer(desiredStackSize);
1769+
SC_EW_STACK = NewKernelBuffer(desiredStackSize);
1770+
SC_GE_STACK = NewKernelBuffer(desiredStackSize);
17571771

1758-
CollectorsState()->SC_CW_VECTOR = NEW_STRING(0);
1759-
CollectorsState()->SC_CW2_VECTOR = NEW_STRING(0);
1760-
CollectorsState()->SC_MAX_STACK_SIZE = maxStackSize;
1772+
SC_CW_VECTOR = NEW_STRING(0);
1773+
SC_CW2_VECTOR = NEW_STRING(0);
1774+
SC_MAX_STACK_SIZE = maxStackSize;
17611775

17621776
return 0;
17631777
}
@@ -1779,8 +1793,13 @@ static StructInitInfo module = {
17791793
/* preSave = */ 0,
17801794
/* postSave = */ 0,
17811795
/* postRestore = */ 0,
1796+
#ifdef HPCGAP
17821797
/* moduleStateSize = */ sizeof(CollectorsState_),
17831798
/* moduleStateOffsetPtr = */ &CollectorsStateOffset,
1799+
#else
1800+
/* moduleStateSize = */ 0,
1801+
/* moduleStateOffsetPtr = */ 0,
1802+
#endif
17841803
/* initModuleState = */ InitModuleState,
17851804
/* destroyModuleState = */ 0,
17861805
};

src/cyclotom.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,11 @@ static inline void SET_NOF_CYC(Obj cyc, Obj val)
142142

143143
// #define XXX_CYC(cyc,len) (EXPOS_CYC(cyc,len)[0])
144144

145-
145+
#ifdef HPCGAP
146146
static ModuleStateOffset CycStateOffset = -1;
147147

148148
struct CycModuleState {
149+
#endif
149150

150151
/****************************************************************************
151152
**
@@ -158,7 +159,7 @@ struct CycModuleState {
158159
** It is created in 'InitCyc' with room for up to 1000 coefficients and is
159160
** resized when need arises.
160161
*/
161-
Obj ResultCyc;
162+
DECL_MODULE_STATE Obj ResultCyc;
162163

163164
/****************************************************************************
164165
**
@@ -178,9 +179,10 @@ Obj ResultCyc;
178179
** is called to compute $e_n^i$ and can then do this easier by just putting
179180
** 1 at the <i>th place in 'ResultCyc' and then calling 'Cyclotomic'.
180181
*/
181-
Obj LastECyc;
182-
UInt LastNCyc;
182+
DECL_MODULE_STATE Obj LastECyc;
183+
DECL_MODULE_STATE UInt LastNCyc;
183184

185+
#ifdef HPCGAP
184186
}; // end of struct CycModuleState
185187

186188
extern inline struct CycModuleState *CycState(void)
@@ -189,10 +191,10 @@ extern inline struct CycModuleState *CycState(void)
189191
}
190192

191193
// For convenience and readability
192-
#define ResultCyc CycState()->ResultCyc
193-
#define LastECyc CycState()->LastECyc
194-
#define LastNCyc CycState()->LastNCyc
195-
194+
#define ResultCyc (CycState()->ResultCyc)
195+
#define LastECyc (CycState()->LastECyc)
196+
#define LastNCyc (CycState()->LastNCyc)
197+
#endif
196198

197199
static void GrowResultCyc(UInt size)
198200
{
@@ -2201,8 +2203,10 @@ static StructInitInfo module = {
22012203
.initKernel = InitKernel,
22022204
.initLibrary = InitLibrary,
22032205

2206+
#ifdef HPCGAP
22042207
.moduleStateSize = sizeof(struct CycModuleState),
22052208
.moduleStateOffsetPtr = &CycStateOffset,
2209+
#endif
22062210
.initModuleState = InitModuleState,
22072211
};
22082212

0 commit comments

Comments
 (0)