Skip to content

Commit cd4a4e6

Browse files
committed
platform: Improve oom-killer
1 parent a20f5fd commit cd4a4e6

3 files changed

Lines changed: 120 additions & 4 deletions

File tree

experimental/libbox/oom_report.go

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,63 @@ type oomReporter struct{}
6464
var _ oomkiller.OOMReporter = (*oomReporter)(nil)
6565

6666
func (r *oomReporter) WriteReport(memoryUsage uint64) error {
67-
now := time.Now().UTC()
67+
draftPath := filepath.Join(sWorkingPath, "oom_draft")
68+
draftInfo, err := os.Stat(draftPath)
69+
if err != nil {
70+
if !os.IsNotExist(err) {
71+
return err
72+
}
73+
draftInfo = nil
74+
}
6875
reportsDir := filepath.Join(sWorkingPath, "oom_reports")
69-
err := os.MkdirAll(reportsDir, 0o777)
76+
err = os.MkdirAll(reportsDir, 0o777)
7077
if err != nil {
7178
return err
7279
}
7380
chownReport(reportsDir)
7481

75-
destPath, err := nextAvailableReportPath(reportsDir, now)
82+
destPath, err := nextAvailableReportPath(reportsDir, time.Now().UTC())
7683
if err != nil {
7784
return err
7885
}
79-
err = os.MkdirAll(destPath, 0o777)
86+
err = r.writeSnapshot(destPath, memoryUsage)
87+
if err != nil {
88+
return err
89+
}
90+
return discardDraftIfCurrent(draftPath, draftInfo)
91+
}
92+
93+
func (r *oomReporter) WriteDraft(memoryUsage uint64) error {
94+
draftPath := filepath.Join(sWorkingPath, "oom_draft")
95+
os.RemoveAll(draftPath)
96+
return r.writeSnapshot(draftPath, memoryUsage)
97+
}
98+
99+
func (r *oomReporter) DiscardDraft() error {
100+
draftPath := filepath.Join(sWorkingPath, "oom_draft")
101+
return os.RemoveAll(draftPath)
102+
}
103+
104+
func discardDraftIfCurrent(draftPath string, draftInfo os.FileInfo) error {
105+
if draftInfo == nil {
106+
return nil
107+
}
108+
currentInfo, err := os.Stat(draftPath)
109+
if err != nil {
110+
if os.IsNotExist(err) {
111+
return nil
112+
}
113+
return err
114+
}
115+
if !os.SameFile(draftInfo, currentInfo) {
116+
return nil
117+
}
118+
return os.RemoveAll(draftPath)
119+
}
120+
121+
func (r *oomReporter) writeSnapshot(destPath string, memoryUsage uint64) error {
122+
now := time.Now().UTC()
123+
err := os.MkdirAll(destPath, 0o777)
80124
if err != nil {
81125
return err
82126
}
@@ -139,3 +183,36 @@ func writeOOMProfile(destPath string, name string) {
139183
}
140184
chownReport(filePath)
141185
}
186+
187+
func promoteOOMDraftAt(workingPath string) {
188+
draftPath := filepath.Join(workingPath, "oom_draft")
189+
info, err := os.Stat(draftPath)
190+
if err != nil || !info.IsDir() {
191+
return
192+
}
193+
reportsDir := filepath.Join(workingPath, "oom_reports")
194+
initReportDir(reportsDir)
195+
destPath, err := nextAvailableReportPath(reportsDir, info.ModTime().UTC())
196+
if err != nil {
197+
os.RemoveAll(draftPath)
198+
return
199+
}
200+
err = os.Rename(draftPath, destPath)
201+
if err != nil {
202+
os.RemoveAll(draftPath)
203+
return
204+
}
205+
chownReport(destPath)
206+
}
207+
208+
func promoteOOMDraft() {
209+
promoteOOMDraftAt(sWorkingPath)
210+
}
211+
212+
func PromoteOOMDraft() {
213+
promoteOOMDraft()
214+
}
215+
216+
func PromoteOOMDraftAt(workingPath string) {
217+
promoteOOMDraftAt(workingPath)
218+
}

service/oomkiller/service.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ import (
1515

1616
type OOMReporter interface {
1717
WriteReport(memoryUsage uint64) error
18+
WriteDraft(memoryUsage uint64) error
19+
DiscardDraft() error
1820
}
1921

2022
func RegisterService(registry *boxService.Registry) {
@@ -29,6 +31,7 @@ type Service struct {
2931
timerConfig timerConfig
3032
adaptiveTimer *adaptiveTimer
3133
lastReportTime atomic.Int64
34+
draftCancelled atomic.Bool
3235
}
3336

3437
func NewService(ctx context.Context, logger log.ContextLogger, tag string, options option.OOMKillerServiceOptions) (adapter.Service, error) {
@@ -81,3 +84,37 @@ func (s *Service) writeOOMReport(memoryUsage uint64) {
8184
s.logger.Info("OOM report saved")
8285
}
8386
}
87+
88+
func (s *Service) writeOOMDraft(memoryUsage uint64) {
89+
if s.draftCancelled.Load() {
90+
return
91+
}
92+
reporter := service.FromContext[OOMReporter](s.ctx)
93+
if reporter == nil {
94+
return
95+
}
96+
err := reporter.WriteDraft(memoryUsage)
97+
if s.draftCancelled.Load() {
98+
reporter.DiscardDraft()
99+
return
100+
}
101+
if err != nil {
102+
s.logger.Warn("failed to write OOM draft: ", err)
103+
} else {
104+
s.logger.Warn("OOM draft saved")
105+
}
106+
}
107+
108+
func (s *Service) discardOOMDraft() {
109+
s.draftCancelled.Store(true)
110+
reporter := service.FromContext[OOMReporter](s.ctx)
111+
if reporter == nil {
112+
return
113+
}
114+
err := reporter.DiscardDraft()
115+
if err != nil {
116+
s.logger.Warn("failed to discard OOM draft: ", err)
117+
} else {
118+
s.logger.Info("OOM draft discarded")
119+
}
120+
}

service/oomkiller/service_darwin.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ func (s *Service) Close() error {
8383
if isLast {
8484
C.stopMemoryPressureMonitor()
8585
}
86+
s.discardOOMDraft()
8687
}
8788
return nil
8889
}
@@ -100,6 +101,7 @@ func goMemoryPressureCallback(status C.ulong) {
100101
sample := readMemorySample(policyModeNetworkExtension)
101102
for _, s := range services {
102103
s.logger.Warn("memory pressure: critical, usage: ", byteformats.FormatMemoryBytes(sample.usage))
104+
s.writeOOMDraft(sample.usage)
103105
s.adaptiveTimer.notifyPressure()
104106
}
105107
}

0 commit comments

Comments
 (0)