forked from facebook/hhvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinlining-decider.cpp
More file actions
681 lines (580 loc) · 20.8 KB
/
inlining-decider.cpp
File metadata and controls
681 lines (580 loc) · 20.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2016 Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/vm/jit/inlining-decider.h"
#include "hphp/runtime/base/runtime-option.h"
#include "hphp/runtime/ext/generator/ext_generator.h"
#include "hphp/runtime/vm/bytecode.h"
#include "hphp/runtime/vm/func.h"
#include "hphp/runtime/vm/hhbc.h"
#include "hphp/runtime/vm/jit/irgen.h"
#include "hphp/runtime/vm/jit/location.h"
#include "hphp/runtime/vm/jit/irlower.h"
#include "hphp/runtime/vm/jit/normalized-instruction.h"
#include "hphp/runtime/vm/jit/prof-data.h"
#include "hphp/runtime/vm/jit/region-selection.h"
#include "hphp/runtime/vm/jit/trans-cfg.h"
#include "hphp/runtime/vm/jit/translate-region.h"
#include "hphp/runtime/vm/srckey.h"
#include "hphp/util/arch.h"
#include "hphp/util/struct-log.h"
#include "hphp/util/trace.h"
#include <folly/RWSpinLock.h>
#include <folly/Synchronized.h>
#include <vector>
namespace HPHP { namespace jit {
///////////////////////////////////////////////////////////////////////////////
TRACE_SET_MOD(inlining);
namespace {
///////////////////////////////////////////////////////////////////////////////
bool traceRefusal(const Func* caller, const Func* callee, const char* why) {
if (Trace::enabled) {
UNUSED auto calleeName = callee ? callee->fullName()->data()
: "(unknown)";
assertx(caller);
FTRACE(2, "InliningDecider: refusing {}() <- {}{}\t<reason: {}>\n",
caller->fullName()->data(), calleeName, callee ? "()" : "", why);
}
return false;
}
std::atomic<bool> hasCalledDisableInliningIntrinsic;
hphp_hash_set<const StringData*,
string_data_hash,
string_data_isame> forbiddenInlinees;
SimpleMutex forbiddenInlineesLock;
bool inliningIsForbiddenFor(const Func* callee) {
if (!hasCalledDisableInliningIntrinsic.load()) return false;
SimpleLock locker(forbiddenInlineesLock);
return forbiddenInlinees.find(callee->fullName()) != forbiddenInlinees.end();
}
///////////////////////////////////////////////////////////////////////////////
// canInlineAt() helpers.
/*
* Check if the funcd of `inst' has any characteristics which prevent inlining,
* without peeking into its bytecode or regions.
*/
bool isCalleeInlinable(SrcKey callSK, const Func* callee) {
auto refuse = [&] (const char* why) {
return traceRefusal(callSK.func(), callee, why);
};
if (!callee) {
return refuse("callee not known");
}
if (inliningIsForbiddenFor(callee)) {
return refuse("inlining disabled for callee");
}
if (callee == callSK.func()) {
return refuse("call is recursive");
}
if (callee->hasVariadicCaptureParam()) {
if (callee->attrs() & AttrMayUseVV) {
return refuse("callee has variadic capture and MayUseVV");
}
// Refuse if the variadic parameter actually captures something.
auto pc = callSK.pc();
auto const numArgs = getImm(pc, 0).u_IVA;
auto const numParams = callee->numParams();
if (numArgs >= numParams) {
return refuse("callee has variadic capture with non-empty value");
}
}
if (callee->numIterators() != 0) {
return refuse("callee has iterators");
}
if (callee->isMagic()) {
return refuse("magic callee");
}
if (callee->isResumable()) {
return refuse("callee is resumable");
}
if (callee->maxStackCells() >= kStackCheckLeafPadding) {
return refuse("function stack depth too deep");
}
if (callee->isMethod() && callee->cls() == Generator::getClass()) {
return refuse("generator member function");
}
return true;
}
/*
* Check that we don't have any missing or extra arguments.
*/
bool checkNumArgs(SrcKey callSK, const Func* callee) {
assertx(callee);
auto refuse = [&] (const char* why) {
return traceRefusal(callSK.func(), callee, why);
};
auto pc = callSK.pc();
auto const numArgs = getImm(pc, 0).u_IVA;
auto const numParams = callee->numParams();
if (numArgs > numParams) {
return refuse("callee called with too many arguments");
}
// It's okay if we passed fewer arguments than there are parameters as long
// as the gap can be filled in by DV funclets.
for (auto i = numArgs; i < numParams; ++i) {
auto const& param = callee->params()[i];
if (!param.hasDefaultValue() &&
(i < numParams - 1 || !callee->hasVariadicCaptureParam())) {
return refuse("callee called with too few arguments");
}
}
return true;
}
///////////////////////////////////////////////////////////////////////////////
}
void InliningDecider::forbidInliningOf(const Func* callee) {
hasCalledDisableInliningIntrinsic.store(true);
SimpleLock locker(forbiddenInlineesLock);
forbiddenInlinees.insert(callee->fullName());
}
bool InliningDecider::canInlineAt(SrcKey callSK, const Func* callee) const {
if (!callee ||
!RuntimeOption::EvalHHIREnableGenTimeInlining ||
RuntimeOption::EvalJitEnableRenameFunction ||
callee->attrs() & AttrInterceptable) {
return false;
}
if (callee->cls()) {
if (!classHasPersistentRDS(callee->cls())) {
// if the callee's class is not persistent, its still ok
// to use it if we're jitting into a method of a subclass
auto ctx = callSK.func()->cls();
if (!ctx || !ctx->classof(callee->cls())) {
return false;
}
}
} else {
auto const handle = callee->funcHandle();
if (handle == rds::kInvalidHandle || !rds::isPersistentHandle(handle)) {
// if the callee isn't persistent, its still ok to
// use it if its defined at the top level in the same
// unit as the caller
if (callee->unit() != callSK.unit() || !callee->top()) {
return false;
}
}
}
// If inlining was disabled... don't inline.
if (m_disabled) return false;
// TODO(#3331014): We have this hack until more ARM codegen is working.
if (arch() == Arch::ARM) return false;
// We can only inline at normal FCalls.
if (callSK.op() != Op::FCall &&
callSK.op() != Op::FCallD) {
return false;
}
// Don't inline from resumed functions. The inlining mechanism doesn't have
// support for these---it has no way to redefine stack pointers relative to
// the frame pointer, because in a resumed function the frame pointer points
// into the heap instead of into the eval stack.
if (callSK.resumed()) return false;
// TODO(#4238160): Inlining into pseudomain callsites is still buggy.
if (callSK.func()->isPseudoMain()) return false;
if (!isCalleeInlinable(callSK, callee) || !checkNumArgs(callSK, callee)) {
return false;
}
return true;
}
namespace {
///////////////////////////////////////////////////////////////////////////////
// shouldInline() helpers.
/*
* Check if a builtin is inlinable.
*/
bool isInlinableCPPBuiltin(const Func* f) {
assertx(f->isCPPBuiltin());
// The callee needs to be callable with FCallBuiltin, because NativeImpl
// requires a frame.
if (!RuntimeOption::EvalEnableCallBuiltin ||
(f->attrs() & AttrNoFCallBuiltin) ||
(f->numParams() > Native::maxFCallBuiltinArgs()) ||
!f->nativeFuncPtr()) {
return false;
}
// ARM currently can't handle floating point returns.
if (f->returnType() == KindOfDouble &&
!Native::allowFCallBuiltinDoubles()) {
return false;
}
// For now, don't inline when we'd need to adjust ObjectData pointers.
if (f->cls() && f->cls()->preClass()->builtinODOffset() != 0) {
return false;
}
return true;
}
/*
* Conservative whitelist for HHBC opcodes we know are safe to inline, even if
* the entire callee body required a AttrMayUseVV.
*
* This affects cases where we're able to eliminate control flow while inlining
* due to the parameter types, and the AttrMayUseVV flag was due to something
* happening in a block we won't inline.
*/
bool isInliningVVSafe(Op op) {
switch (op) {
case Op::Null:
case Op::PopC:
case Op::CGetL:
case Op::SetL:
case Op::IsTypeL:
case Op::JmpNS:
case Op::JmpNZ:
case Op::JmpZ:
case Op::AssertRATL:
case Op::AssertRATStk:
case Op::VerifyParamType:
case Op::VerifyRetTypeC:
case Op::RetC:
return true;
default:
break;
}
return false;
}
struct InlineRegionKey {
explicit InlineRegionKey(const RegionDesc& region)
: entryKey(region.entry()->start())
, ctxType(region.inlineCtxType())
{
for (auto const ty : region.inlineInputTypes()) {
argTypes.push_back(ty);
}
}
InlineRegionKey(const InlineRegionKey& irk)
: entryKey(irk.entryKey)
, ctxType(irk.ctxType)
{
for (auto ty : irk.argTypes) argTypes.push_back(ty);
}
InlineRegionKey& operator=(const InlineRegionKey& irk) {
entryKey = irk.entryKey;
ctxType = irk.ctxType;
argTypes.clear();
for (auto ty : irk.argTypes) argTypes.push_back(ty);
return *this;
}
struct Eq {
size_t operator()(const InlineRegionKey& k1,
const InlineRegionKey& k2) const {
return
k1.entryKey == k2.entryKey &&
k1.ctxType == k2.ctxType &&
k1.argTypes == k2.argTypes;
}
};
struct Hash {
size_t operator()(const InlineRegionKey& key) const {
size_t h = 0;
h = hash_combine(h, key.entryKey.toAtomicInt());
h = hash_combine(h, key.ctxType.hash());
for (auto const ty : key.argTypes) {
h = hash_combine(h, ty.hash());
}
return h;
}
private:
template<class T>
static size_t hash_combine(size_t base, T other) {
return folly::hash::hash_128_to_64(
base, folly::hash::hash_combine(other));
}
};
SrcKey entryKey;
Type ctxType;
TinyVector<Type, 4> argTypes;
};
using InlineCostCache = std::unordered_map<
InlineRegionKey,
unsigned,
InlineRegionKey::Hash,
InlineRegionKey::Eq
>;
using RegionKeySet = std::unordered_set<
InlineRegionKey,
InlineRegionKey::Hash,
InlineRegionKey::Eq
>;
Vcost computeTranslationCostSlow(SrcKey at, const RegionDesc& region) {
TransContext ctx {
kInvalidTransID,
TransKind::Optimize,
TransFlags{},
at,
FPInvOffset{0}
};
auto const unit = irGenInlineRegion(ctx, region);
if (!unit) return {0, true};
SCOPE_ASSERT_DETAIL("Inline-IRUnit") { return show(*unit); };
return irlower::computeIRUnitCost(*unit);
}
folly::Synchronized<InlineCostCache, folly::RWSpinLock> s_inlCostCache;
int computeTranslationCost(SrcKey at, const RegionDesc& region) {
InlineRegionKey irk{region};
SYNCHRONIZED_CONST(s_inlCostCache) {
auto f = s_inlCostCache.find(irk);
if (f != s_inlCostCache.end()) return f->second;
}
auto const info = computeTranslationCostSlow(at, region);
auto cost = info.cost;
// If the region wasn't complete, don't cache the result, unless we already
// know it will be too expensive, or we've stopped profiling it
auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost;
if (info.incomplete) {
auto const fid = region.entry()->func()->getFuncId();
auto const profData = jit::profData();
auto const profiling = profData && profData->profiling(fid);
cost = std::numeric_limits<int>::max();
if (profiling && info.cost <= maxCost) return cost;
}
if (!s_inlCostCache.asConst()->count(irk)) {
s_inlCostCache->emplace(irk, cost);
}
return cost;
}
///////////////////////////////////////////////////////////////////////////////
}
/*
* Update context for start of inlining.
*/
void InliningDecider::accountForInlining(SrcKey callerSk,
const Func* callee,
const RegionDesc& region) {
int cost = computeTranslationCost(callerSk, region);
m_costStack.push_back(cost);
m_cost += cost;
m_callDepth += 1;
m_stackDepth += callee->maxStackCells();
}
void InliningDecider::initWithCallee(const Func* callee) {
m_costStack.push_back(0);
m_callDepth += 1;
m_stackDepth += callee->maxStackCells();
}
bool InliningDecider::shouldInline(SrcKey callerSk,
const Func* callee,
const RegionDesc& region,
uint32_t maxTotalCost) {
auto sk = region.empty() ? SrcKey() : region.start();
assertx(callee);
assertx(sk.func() == callee);
// Tracing return lambdas.
auto refuse = [&] (const char* why) {
FTRACE(2, "shouldInline: rejecting callee region: {}", show(region));
return traceRefusal(m_topFunc, callee, why);
};
auto accept = [&, this] (const char* kind) {
FTRACE(2, "InliningDecider: inlining {}() <- {}()\t<reason: {}>\n",
m_topFunc->fullName()->data(), callee->fullName()->data(), kind);
return true;
};
if (m_stackDepth + callee->maxStackCells() >= kStackCheckLeafPadding) {
return refuse("inlining stack depth limit exceeded");
}
// Even if the func contains NativeImpl we may have broken the trace before
// we hit it.
auto containsNativeImpl = [&] {
for (auto block : region.blocks()) {
if (!block->empty() && block->last().op() == OpNativeImpl) return true;
}
return false;
};
// Try to inline CPP builtin functions.
// The NativeImpl opcode may appear later in the function because of Asserts
// generated in hhbbc
if (callee->isCPPBuiltin() && containsNativeImpl()) {
if (isInlinableCPPBuiltin(callee)) {
return accept("inlinable CPP builtin");
}
return refuse("non-inlinable CPP builtin");
}
// If the function may use a VarEnv (which is stored in the ActRec) or may be
// variadic, we restrict inlined callees to certain whitelisted instructions
// which we know won't actually require these features.
const bool needsCheckVVSafe = callee->attrs() & AttrMayUseVV;
bool hasRet = false;
// Iterate through the region, checking its suitability for inlining.
for (auto const& block : region.blocks()) {
sk = block->start();
for (auto i = 0, n = block->length(); i < n; ++i, sk.advance()) {
auto op = sk.op();
// We don't allow inlined functions in the region. The client is
// expected to disable inlining for the region it gives us to peek.
if (sk.func() != callee) {
return refuse("got region with inlined calls");
}
// Restrict to VV-safe opcodes if necessary.
if (needsCheckVVSafe && !isInliningVVSafe(op)) {
return refuse(folly::format("{} may use dynamic environment",
opcodeToName(op)).str().c_str());
}
// Count the returns.
if (isReturnish(op)) {
hasRet = true;
}
// We can't inline FCallArray. XXX: Why?
if (op == Op::FCallArray) {
return refuse("can't inline FCallArray");
}
}
}
if (!hasRet) {
return refuse("region has no returns");
}
// Refuse if the cost exceeds our thresholds.
// We measure the cost of inlining each callstack and stop when it exceeds a
// certain threshold. (Note that we do not measure the total cost of all the
// inlined calls for a given caller---just the cost of each nested stack.)
const int maxCost = maxTotalCost - m_cost;
const int cost = computeTranslationCost(callerSk, region);
if (cost > maxCost) {
return refuse("too expensive");
}
return accept("small region with return");
}
///////////////////////////////////////////////////////////////////////////////
void InliningDecider::registerEndInlining(const Func* callee) {
auto cost = m_costStack.back();
m_costStack.pop_back();
m_cost -= cost;
m_callDepth -= 1;
m_stackDepth -= callee->maxStackCells();
}
namespace {
RegionDescPtr selectCalleeTracelet(const Func* callee,
const int numArgs,
Type ctxType,
std::vector<Type>& argTypes,
int32_t maxBCInstrs) {
auto const numParams = callee->numParams();
// Set up the RegionContext for the tracelet selector.
RegionContext ctx;
ctx.func = callee;
ctx.bcOffset = callee->getEntryForNumArgs(numArgs);
ctx.spOffset = FPInvOffset{safe_cast<int32_t>(callee->numSlotsInFrame())};
ctx.resumed = false;
for (uint32_t i = 0; i < numArgs; ++i) {
auto type = argTypes[i];
assertx((type <= TGen) || (type <= TCls));
ctx.liveTypes.push_back({Location::Local{i}, type});
}
for (unsigned i = numArgs; i < numParams; ++i) {
// These locals will be populated by DV init funclets but they'll start out
// as Uninit.
ctx.liveTypes.push_back({Location::Local{i}, TUninit});
}
// Produce a tracelet for the callee.
auto r = selectTracelet(
ctx,
TransKind::Live,
maxBCInstrs,
true /* inlining */
);
if (r) {
r->setInlineContext(ctxType, argTypes);
}
return r;
}
TransID findTransIDForCallee(const ProfData* profData,
const Func* callee, const int numArgs,
std::vector<Type>& argTypes) {
auto const idvec = profData->funcProfTransIDs(callee->getFuncId());
auto const offset = callee->getEntryForNumArgs(numArgs);
for (auto const id : idvec) {
auto const rec = profData->transRec(id);
if (rec->startBcOff() != offset) continue;
auto const region = rec->region();
auto const isvalid = [&] () {
for (auto const& typeloc : region->entry()->typePreConditions()) {
if (typeloc.location.tag() != LTag::Local) continue;
auto const locId = typeloc.location.localId();
if (locId < numArgs && !(argTypes[locId] <= typeloc.type)) {
return false;
}
}
return true;
}();
if (isvalid) return id;
}
return kInvalidTransID;
}
RegionDescPtr selectCalleeCFG(const Func* callee, const int numArgs,
Type ctxType, std::vector<Type>& argTypes,
int32_t maxBCInstrs) {
auto const profData = jit::profData();
if (!profData || !profData->profiling(callee->getFuncId())) return nullptr;
auto const dvID = findTransIDForCallee(profData, callee, numArgs, argTypes);
if (dvID == kInvalidTransID) {
return nullptr;
}
TransCFG cfg(callee->getFuncId(), profData, mcg->srcDB(),
true /* inlining */);
HotTransContext ctx;
ctx.tid = dvID;
ctx.cfg = &cfg;
ctx.profData = profData;
ctx.maxBCInstrs = maxBCInstrs;
ctx.inlining = true;
ctx.inputTypes = &argTypes;
auto r = selectHotCFG(ctx);
if (r) {
r->setInlineContext(ctxType, argTypes);
}
return r;
}
}
RegionDescPtr selectCalleeRegion(const SrcKey& sk,
const Func* callee,
const irgen::IRGS& irgs,
InliningDecider& inl,
int32_t maxBCInstrs) {
auto const op = sk.pc();
auto const numArgs = getImm(op, 0).u_IVA;
auto const& fpi = irgs.irb->fs().fpiStack();
assertx(!fpi.empty());
auto const ctx = fpi.back().ctxType;
std::vector<Type> argTypes;
for (int i = numArgs - 1; i >= 0; --i) {
// DataTypeGeneric is used because we're just passing the locals into the
// callee. It's up to the callee to constrain further if needed.
auto type = irgen::publicTopType(irgs, BCSPRelOffset{i});
// If we don't have sufficient type information to inline the region return
// early
if (!(type <= TCell) && !(type <= TBoxedCell) && !(type <= TCls)) {
return nullptr;
}
argTypes.push_back(type);
}
const auto mode = RuntimeOption::EvalInlineRegionMode;
if (mode == "tracelet" || mode == "both") {
auto region = selectCalleeTracelet(
callee,
numArgs,
ctx,
argTypes,
maxBCInstrs
);
auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost;
if (region && inl.shouldInline(sk, callee, *region, maxCost)) return region;
if (mode == "tracelet") return nullptr;
}
if (profData()) {
auto region = selectCalleeCFG(callee, numArgs, ctx, argTypes, maxBCInstrs);
auto const maxCost = RuntimeOption::EvalHHIRInliningMaxVasmCost;
if (region && inl.shouldInline(sk, callee, *region, maxCost)) return region;
}
return nullptr;
}
///////////////////////////////////////////////////////////////////////////////
}}