3535#include " runtime/perfData.hpp"
3636#include " runtime/threadSMR.hpp"
3737#include " utilities/copy.hpp"
38+ #include " utilities/integerCast.hpp"
3839
3940size_t ThreadLocalAllocBuffer::_max_size = 0 ;
4041unsigned int ThreadLocalAllocBuffer::_target_num_refills = 0 ;
@@ -75,43 +76,54 @@ size_t ThreadLocalAllocBuffer::remaining() {
7576}
7677
7778void ThreadLocalAllocBuffer::accumulate_and_reset_statistics (ThreadLocalAllocStats* stats) {
78- size_t capacity = Universe::heap ()->tlab_capacity ();
79- size_t used = Universe::heap ()->tlab_used ();
80-
8179 _gc_waste += (unsigned )remaining ();
82- uint64_t total_allocated = thread ()->allocated_bytes ();
83- uint64_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc;
84- _allocated_before_last_gc = total_allocated;
85-
86- print_stats (" gc" );
87-
88- if (_num_refills > 0 ) {
89- // Update allocation history if a reasonable amount of eden was allocated.
90- bool update_allocation_history = used > 0.5 * capacity;
91-
92- if (update_allocation_history) {
93- // Average the fraction of eden allocated in a tlab by this
94- // thread for use in the next resize operation.
95- // _gc_waste is not subtracted because it's included in
96- // "used".
97- // The result can be larger than 1.0 due to direct to old allocations.
98- // These allocations should ideally not be counted but since it is not possible
99- // to filter them out here we just cap the fraction to be at most 1.0.
100- // Keep alloc_frac as float and not double to avoid the double to float conversion
101- float alloc_frac = MIN2 (1 .0f , allocated_since_last_gc / (float ) used);
102- _allocation_fraction.sample (alloc_frac);
80+ const uint64_t allocated_bytes = thread ()->allocated_bytes ();
81+
82+ const size_t allocated_since_last_gc = integer_cast_permit_tautology<size_t >(allocated_bytes - _allocated_before_last_gc);
83+ _allocated_before_last_gc = allocated_bytes;
84+
85+ if (allocated_since_last_gc > 0 ) {
86+ const size_t tlab_capacity = Universe::heap ()->tlab_capacity ();
87+ const size_t tlab_used = Universe::heap ()->tlab_used ();
88+ if (tlab_used > 0.5 * tlab_capacity) {
89+ // To avoid divide-by-zero
90+ const size_t effective_tlab_capacity = MAX2 (tlab_capacity, size_t (1 ));
91+ const float alloc_frac = (float )allocated_since_last_gc / effective_tlab_capacity;
92+ _allocation_fraction.sample (MIN2 (alloc_frac, 1 .0f ));
10393 }
104-
105- stats->update_fast_allocations (_num_refills,
106- _allocated_size,
107- _gc_waste,
108- _refill_waste);
94+ stats->update_current_thread_stats (_num_refills,
95+ allocated_since_last_gc,
96+ _allocated_size,
97+ _gc_waste,
98+ _refill_waste,
99+ _num_slow_allocations);
109100 } else {
110- assert (_num_refills == 0 && _refill_waste == 0 && _gc_waste == 0 ,
101+ assert (_num_refills == 0 && _refill_waste == 0
102+ && _gc_waste == 0 && _num_slow_allocations == 0 ,
111103 " tlab stats == 0" );
112104 }
113105
114- stats->update_num_slow_allocations (_num_slow_allocations);
106+ {
107+ Log (gc, tlab) log;
108+ if (log.is_trace ()) {
109+ Thread* thrd = thread ();
110+ size_t waste = _gc_waste + _refill_waste;
111+ double waste_percent = percent_of (waste, _allocated_size);
112+ log.trace (" TLAB GC: thread: " PTR_FORMAT " [id: %2d]"
113+ " desired: %zuK"
114+ " allocated: %zuK"
115+ " slow allocs: %d refill waste: %zuB"
116+ " refills: %d waste %4.1f%% gc: %dB"
117+ " slow: %dB" ,
118+ p2i (thrd), thrd->osthread ()->thread_id (),
119+ _desired_size*HeapWordSize/K,
120+ allocated_since_last_gc/K,
121+ _num_slow_allocations, _refill_waste_limit * HeapWordSize,
122+ _num_refills, waste_percent,
123+ _gc_waste * HeapWordSize,
124+ _refill_waste * HeapWordSize);
125+ }
126+ }
115127
116128 reset_statistics ();
117129}
@@ -147,20 +159,27 @@ void ThreadLocalAllocBuffer::record_refill_waste() {
147159}
148160
149161void ThreadLocalAllocBuffer::resize () {
150- // Compute the next tlab size using expected allocation amount
151162 assert (ResizeTLAB, " Should not call this otherwise" );
152- size_t alloc = (size_t )(_allocation_fraction.average () *
153- (Universe::heap ()->tlab_capacity () / HeapWordSize));
163+ size_t capacity_in_words = Universe::heap ()->tlab_capacity () / HeapWordSize;
164+ float alloc_fraction = _allocation_fraction.average ();
165+ if (alloc_fraction == 0.0 ) {
166+ // No samples, use global alloc fraction as an approximation.
167+ const float total_frac = ThreadLocalAllocStats::total_requested_size_fraction_avg ();
168+ const uint num_threads = ThreadLocalAllocStats::num_allocating_threads_avg ();
169+ alloc_fraction = total_frac / num_threads;
170+ }
171+ size_t alloc = (size_t )(alloc_fraction * capacity_in_words);
154172 size_t new_size = alloc / _target_num_refills;
155173
156174 new_size = clamp (new_size, min_size (), max_size ());
157175
158176 size_t aligned_new_size = align_object_size (new_size);
159177
160- log_trace (gc, tlab)(" TLAB new size : thread: " PTR_FORMAT " [id: %2d]"
161- " refills %d alloc: %8.6f desired_size: %zu -> %zu " ,
178+ log_trace (gc, tlab)(" TLAB resize : thread: " PTR_FORMAT " [id: %2d]"
179+ " alloc-fraction : %.3f desired_size: %zuK -> %zuK " ,
162180 p2i (thread ()), thread ()->osthread ()->thread_id (),
163- _target_num_refills, _allocation_fraction.average (), desired_size (), aligned_new_size);
181+ alloc_fraction,
182+ desired_size () * HeapWordSize/K, aligned_new_size * HeapWordSize/K);
164183
165184 set_desired_size (aligned_new_size);
166185 set_refill_waste_limit (initial_refill_waste_limit ());
@@ -179,11 +198,24 @@ void ThreadLocalAllocBuffer::fill(HeapWord* start,
179198 size_t new_size) {
180199 _num_refills++;
181200 _allocated_size += new_size;
182- print_stats ( " fill " );
201+
183202 assert (top <= start + new_size - alignment_reserve (), " size too small" );
184203
185204 initialize (start, top, start + new_size - alignment_reserve ());
186-
205+ {
206+ Log (gc, tlab) log;
207+ if (log.is_trace ()) {
208+ Thread* thrd = thread ();
209+ log.trace (" TLAB fill: thread: " PTR_FORMAT " [id: %2d]"
210+ " capacity: %zuK"
211+ " slow allocs: %d "
212+ " refills: %d" ,
213+ p2i (thrd), thrd->osthread ()->thread_id (),
214+ pointer_delta (_end, _start, sizeof (char )) / K,
215+ _num_slow_allocations,
216+ _num_refills);
217+ }
218+ }
187219 // Reset amount of internal fragmentation
188220 set_refill_waste_limit (initial_refill_waste_limit ());
189221}
@@ -206,13 +238,6 @@ void ThreadLocalAllocBuffer::initialize() {
206238
207239 set_desired_size (initial_desired_size ());
208240
209- size_t capacity = Universe::heap ()->tlab_capacity () / HeapWordSize;
210- if (capacity > 0 ) {
211- // Keep alloc_frac as float and not double to avoid the double to float conversion
212- float alloc_frac = desired_size () * target_num_refills () / (float )capacity;
213- _allocation_fraction.sample (alloc_frac);
214- }
215-
216241 set_refill_waste_limit (initial_refill_waste_limit ());
217242
218243 reset_statistics ();
@@ -243,11 +268,11 @@ size_t ThreadLocalAllocBuffer::initial_desired_size() {
243268 if (TLABSize > 0 ) {
244269 init_sz = TLABSize / HeapWordSize;
245270 } else {
246- // Initial size is a function of the average number of allocating threads.
247- unsigned int num_threads = ThreadLocalAllocStats::num_allocating_threads_avg ();
248-
249- init_sz = ( Universe::heap ()-> tlab_capacity () / HeapWordSize) /
250- (num_threads * target_num_refills ()) ;
271+ const size_t predicted_total_requested_size = ( size_t )( ThreadLocalAllocStats::total_requested_size_fraction_avg () * Universe::heap ()-> tlab_capacity ());
272+ const uint num_threads = ThreadLocalAllocStats::num_allocating_threads_avg ();
273+ const size_t per_thread_requested_size = predicted_total_requested_size / num_threads;
274+ const size_t tlab_size = per_thread_requested_size / _target_num_refills;
275+ init_sz = tlab_size / HeapWordSize ;
251276 init_sz = align_object_size (init_sz);
252277 }
253278 // We can't use clamp() between min_size() and max_size() here because some
@@ -258,32 +283,7 @@ size_t ThreadLocalAllocBuffer::initial_desired_size() {
258283 return init_sz;
259284}
260285
261- void ThreadLocalAllocBuffer::print_stats (const char * tag) {
262- Log (gc, tlab) log;
263- if (!log.is_trace ()) {
264- return ;
265- }
266-
267- Thread* thrd = thread ();
268- size_t waste = _gc_waste + _refill_waste;
269- double waste_percent = percent_of (waste, _allocated_size);
270- size_t tlab_used = Universe::heap ()->tlab_used ();
271- log.trace (" TLAB: %s thread: " PTR_FORMAT " [id: %2d]"
272- " desired_size: %zuKB"
273- " slow allocs: %d refill waste: %zuB"
274- " alloc:%8.5f %8.0fKB refills: %d waste %4.1f%% gc: %dB"
275- " slow: %dB" ,
276- tag, p2i (thrd), thrd->osthread ()->thread_id (),
277- _desired_size / (K / HeapWordSize),
278- _num_slow_allocations, _refill_waste_limit * HeapWordSize,
279- _allocation_fraction.average (),
280- _allocation_fraction.average () * tlab_used / K,
281- _num_refills, waste_percent,
282- _gc_waste * HeapWordSize,
283- _refill_waste * HeapWordSize);
284- }
285-
286- Thread* ThreadLocalAllocBuffer::thread () {
286+ Thread* ThreadLocalAllocBuffer::thread () const {
287287 return (Thread*)(((char *)this ) + in_bytes (start_offset ()) - in_bytes (Thread::tlab_start_offset ()));
288288}
289289
@@ -314,6 +314,7 @@ PerfVariable* ThreadLocalAllocStats::_perf_max_refill_waste;
314314PerfVariable* ThreadLocalAllocStats::_perf_total_num_slow_allocations;
315315PerfVariable* ThreadLocalAllocStats::_perf_max_num_slow_allocations;
316316AdaptiveWeightedAverage ThreadLocalAllocStats::_num_allocating_threads_avg (0 );
317+ AdaptiveWeightedAverage ThreadLocalAllocStats::_total_requested_size_fraction (0 );
317318
318319static PerfVariable* create_perf_variable (const char * name, PerfData::Units unit, TRAPS) {
319320 ResourceMark rm;
@@ -324,6 +325,9 @@ void ThreadLocalAllocStats::initialize() {
324325 _num_allocating_threads_avg = AdaptiveWeightedAverage (TLABAllocationWeight);
325326 _num_allocating_threads_avg.sample (1 ); // One allocating thread at startup
326327
328+ _total_requested_size_fraction = AdaptiveWeightedAverage (TLABAllocationWeight);
329+ _total_requested_size_fraction.sample (0 .10f ); // 10%
330+
327331 if (UsePerfData) {
328332 EXCEPTION_MARK;
329333 _perf_num_allocating_threads = create_perf_variable (" allocThreads" , PerfData::U_None, CHECK);
@@ -344,6 +348,7 @@ ThreadLocalAllocStats::ThreadLocalAllocStats() :
344348 _total_num_refills(0 ),
345349 _max_num_refills(0 ),
346350 _total_allocated_size(0 ),
351+ _total_requested_bytes(0 ),
347352 _total_gc_waste(0 ),
348353 _max_gc_waste(0 ),
349354 _total_refill_waste(0 ),
@@ -355,21 +360,25 @@ unsigned int ThreadLocalAllocStats::num_allocating_threads_avg() {
355360 return MAX2 ((unsigned int )(_num_allocating_threads_avg.average () + 0.5 ), 1U );
356361}
357362
358- void ThreadLocalAllocStats::update_fast_allocations (unsigned int num_refills,
359- size_t allocated_size,
360- size_t gc_waste,
361- size_t refill_waste) {
362- _num_allocating_threads += 1 ;
363- _total_num_refills += num_refills;
364- _max_num_refills = MAX2 (_max_num_refills, num_refills);
365- _total_allocated_size += allocated_size;
366- _total_gc_waste += gc_waste;
367- _max_gc_waste = MAX2 (_max_gc_waste, gc_waste);
368- _total_refill_waste += refill_waste;
369- _max_refill_waste = MAX2 (_max_refill_waste, refill_waste);
363+ float ThreadLocalAllocStats::total_requested_size_fraction_avg () {
364+ return _total_requested_size_fraction.average ();
370365}
371366
372- void ThreadLocalAllocStats::update_num_slow_allocations (unsigned int num_slow_allocations) {
367+ void ThreadLocalAllocStats::update_current_thread_stats (unsigned int num_refills,
368+ size_t requested_bytes,
369+ size_t alloc_size_for_tlab,
370+ size_t gc_waste,
371+ size_t refill_waste,
372+ unsigned int num_slow_allocations) {
373+ _num_allocating_threads += 1 ;
374+ _total_num_refills += num_refills;
375+ _max_num_refills = MAX2 (_max_num_refills, num_refills);
376+ _total_allocated_size += alloc_size_for_tlab;
377+ _total_requested_bytes += requested_bytes;
378+ _total_gc_waste += gc_waste;
379+ _max_gc_waste = MAX2 (_max_gc_waste, gc_waste);
380+ _total_refill_waste += refill_waste;
381+ _max_refill_waste = MAX2 (_max_refill_waste, refill_waste);
373382 _total_num_slow_allocations += num_slow_allocations;
374383 _max_num_slow_allocations = MAX2 (_max_num_slow_allocations, num_slow_allocations);
375384}
@@ -379,6 +388,7 @@ void ThreadLocalAllocStats::update(const ThreadLocalAllocStats& other) {
379388 _total_num_refills += other._total_num_refills ;
380389 _max_num_refills = MAX2 (_max_num_refills, other._max_num_refills );
381390 _total_allocated_size += other._total_allocated_size ;
391+ _total_requested_bytes += other._total_requested_bytes ;
382392 _total_gc_waste += other._total_gc_waste ;
383393 _max_gc_waste = MAX2 (_max_gc_waste, other._max_gc_waste );
384394 _total_refill_waste += other._total_refill_waste ;
@@ -392,6 +402,7 @@ void ThreadLocalAllocStats::reset() {
392402 _total_num_refills = 0 ;
393403 _max_num_refills = 0 ;
394404 _total_allocated_size = 0 ;
405+ _total_requested_bytes = 0 ;
395406 _total_gc_waste = 0 ;
396407 _max_gc_waste = 0 ;
397408 _total_refill_waste = 0 ;
@@ -401,22 +412,37 @@ void ThreadLocalAllocStats::reset() {
401412}
402413
403414void ThreadLocalAllocStats::publish () {
404- if (_total_allocated_size == 0 ) {
415+ if (_total_requested_bytes == 0 ) {
405416 return ;
406417 }
407418
408419 _num_allocating_threads_avg.sample (_num_allocating_threads);
409420
421+ {
422+ const size_t tlab_capacity = Universe::heap ()->tlab_capacity ();
423+ const size_t tlab_used = Universe::heap ()->tlab_used ();
424+ if (tlab_used > 0.5 * tlab_capacity) {
425+ // To avoid divide-by-zero
426+ const size_t effective_tlab_capacity = MAX2 (tlab_capacity, size_t (1 ));
427+ const float requested_size_fraction = (float )_total_requested_bytes / effective_tlab_capacity;
428+ _total_requested_size_fraction.sample (MIN2 (requested_size_fraction, 1 .0f ));
429+ }
430+ }
431+
410432 const size_t waste = _total_gc_waste + _total_refill_waste;
411433 const double waste_percent = percent_of (waste, _total_allocated_size);
412- log_debug (gc, tlab)(" TLAB totals: thrds: %d refills: %d max: %d"
413- " slow allocs: %d max %d waste: %4.1f%%"
414- " gc: %zuB max: %zuB"
415- " slow: %zuB max: %zuB" ,
416- _num_allocating_threads, _total_num_refills, _max_num_refills,
434+
435+ const double gc_waste_pct = percent_of (_total_gc_waste, _total_allocated_size);
436+ const double refill_waste_pct = percent_of (_total_refill_waste, _total_allocated_size);
437+
438+ log_debug (gc, tlab)(" TLAB totals: thrds: %d alloc-frac: %.1f%% refills: %d max: %d"
439+ " slow allocs: %d max %d waste: %.1f%%"
440+ " gc: %zuB(%.1f%%) max: %zuB"
441+ " refill: %zuB(%.1f%%) max: %zuB" ,
442+ _num_allocating_threads, _total_requested_size_fraction.average () * 100 , _total_num_refills, _max_num_refills,
417443 _total_num_slow_allocations, _max_num_slow_allocations, waste_percent,
418- _total_gc_waste * HeapWordSize, _max_gc_waste * HeapWordSize,
419- _total_refill_waste * HeapWordSize, _max_refill_waste * HeapWordSize);
444+ _total_gc_waste * HeapWordSize, gc_waste_pct, _max_gc_waste * HeapWordSize,
445+ _total_refill_waste * HeapWordSize, refill_waste_pct, _max_refill_waste * HeapWordSize);
420446
421447 if (UsePerfData) {
422448 _perf_num_allocating_threads ->set_value (_num_allocating_threads);
0 commit comments