Skip to content

Commit e45f6c4

Browse files
authored
[fix]: add test case with darwin OS (#1856)
* [fix]: add test case with darwin OS Signed-off-by: Sam Yuan <[email protected]> * [fix]: use same function naming conventions and behavior Signed-off-by: Sam Yuan <[email protected]> * [fix]: update with review comments Signed-off-by: Sam Yuan <[email protected]> * [fix]: update contributing.md as PR review comments Signed-off-by: Sam Yuan <[email protected]> * [fix]: remove unused package Signed-off-by: Sam Yuan <[email protected]> --------- Signed-off-by: Sam Yuan <[email protected]>
1 parent abe3eb9 commit e45f6c4

25 files changed

+520
-42
lines changed

CONTRIBUTING.md

+9
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,12 @@ Doc: update Developer.md
135135

136136
We enabled [stale bot](https://github.com/probot/stale) for house keeping. An
137137
Issue or Pull Request becomes stale if no any inactivity for 60 days.
138+
139+
## For Mac and Windows user
140+
141+
kepler currently focus on linux platform.
142+
for other platforms, to make kepler is easy for anyone contributes from
143+
any platform, we are welcome any benefits(PRs) for kepler including
144+
parts as compilable on other platform.
145+
before the specific platform is supported, we just running CI on linux
146+
as PR merge standard and official support.

cmd/exporter/exporter.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ func main() {
152152
platform.InitPowerImpl()
153153
defer platform.StopPower()
154154

155-
if config.EnabledGPU() {
155+
if config.IsGPUEnabled() {
156156
r := accelerator.GetRegistry()
157157
if a, err := accelerator.New(config.GPU, true); err == nil {
158158
r.MustRegister(a) // Register the accelerator with the registry

pkg/bpf/exporter.go

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//go:build !darwin
2+
// +build !darwin
3+
14
/*
25
Copyright 2021.
36

pkg/bpf/fake_mac.go

+293
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
//go:build darwin
2+
// +build darwin
3+
4+
/*
5+
Copyright 2021.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
*/
19+
20+
package bpf
21+
22+
import (
23+
"errors"
24+
"fmt"
25+
"os"
26+
"runtime"
27+
"time"
28+
29+
"github.com/cilium/ebpf"
30+
"github.com/cilium/ebpf/link"
31+
"github.com/cilium/ebpf/rlimit"
32+
"github.com/jaypipes/ghw"
33+
"github.com/sustainable-computing-io/kepler/pkg/config"
34+
"golang.org/x/sys/unix"
35+
"k8s.io/apimachinery/pkg/util/sets"
36+
"k8s.io/klog/v2"
37+
)
38+
39+
type exporter struct {
40+
bpfObjects keplerObjects
41+
42+
schedSwitchLink link.Link
43+
irqLink link.Link
44+
pageWriteLink link.Link
45+
pageReadLink link.Link
46+
47+
perfEvents *hardwarePerfEvents
48+
49+
enabledHardwareCounters sets.Set[string]
50+
enabledSoftwareCounters sets.Set[string]
51+
}
52+
53+
func NewExporter() (Exporter, error) {
54+
e := &exporter{
55+
enabledHardwareCounters: sets.New[string](config.BPFHwCounters()...),
56+
enabledSoftwareCounters: sets.New[string](config.BPFSwCounters()...),
57+
}
58+
err := e.attach()
59+
if err != nil {
60+
e.Detach()
61+
}
62+
return e, err
63+
}
64+
65+
func (e *exporter) SupportedMetrics() SupportedMetrics {
66+
return SupportedMetrics{
67+
HardwareCounters: e.enabledHardwareCounters.Clone(),
68+
SoftwareCounters: e.enabledSoftwareCounters.Clone(),
69+
}
70+
}
71+
72+
func (e *exporter) attach() error {
73+
// Remove resource limits for kernels <5.11.
74+
if err := rlimit.RemoveMemlock(); err != nil {
75+
return fmt.Errorf("error removing memlock: %v", err)
76+
}
77+
78+
// Load eBPF Specs
79+
specs, err := loadKepler()
80+
if err != nil {
81+
return fmt.Errorf("error loading eBPF specs: %v", err)
82+
}
83+
84+
// Adjust map sizes to the number of available CPUs
85+
numCPU := getCPUCores()
86+
klog.Infof("Number of CPUs: %d", numCPU)
87+
for _, m := range specs.Maps {
88+
// Only resize maps that have a MaxEntries of NUM_CPUS constant
89+
if m.MaxEntries == 128 {
90+
m.MaxEntries = uint32(numCPU)
91+
}
92+
}
93+
94+
// Set program global variables
95+
err = specs.RewriteConstants(map[string]interface{}{
96+
"SAMPLE_RATE": int32(config.GetBPFSampleRate()),
97+
})
98+
if err != nil {
99+
return fmt.Errorf("error rewriting program constants: %v", err)
100+
}
101+
102+
// Load the eBPF program(s)
103+
if err := specs.LoadAndAssign(&e.bpfObjects, nil); err != nil {
104+
return fmt.Errorf("error loading eBPF objects: %v", err)
105+
}
106+
107+
// Attach the eBPF program(s)
108+
e.schedSwitchLink, err = link.AttachTracing(link.TracingOptions{
109+
Program: e.bpfObjects.KeplerSchedSwitchTrace,
110+
AttachType: ebpf.AttachTraceRawTp,
111+
})
112+
if err != nil {
113+
return fmt.Errorf("error attaching sched_switch tracepoint: %v", err)
114+
}
115+
116+
if config.ExposeIRQCounterMetrics() {
117+
e.irqLink, err = link.AttachTracing(link.TracingOptions{
118+
Program: e.bpfObjects.KeplerIrqTrace,
119+
AttachType: ebpf.AttachTraceRawTp,
120+
})
121+
if err != nil {
122+
return fmt.Errorf("could not attach irq/softirq_entry: %w", err)
123+
}
124+
}
125+
126+
group := "writeback"
127+
name := "writeback_dirty_page"
128+
if _, err := os.Stat("/sys/kernel/debug/tracing/events/writeback/writeback_dirty_folio"); err == nil {
129+
name = "writeback_dirty_folio"
130+
}
131+
e.pageWriteLink, err = link.Tracepoint(group, name, e.bpfObjects.KeplerWritePageTrace, nil)
132+
if err != nil {
133+
klog.Warningf("failed to attach tp/%s/%s: %v. Kepler will not collect page cache write events. This will affect the DRAM power model estimation on VMs.", group, name, err)
134+
} else {
135+
e.enabledSoftwareCounters[config.PageCacheHit] = struct{}{}
136+
}
137+
138+
e.pageReadLink, err = link.AttachTracing(link.TracingOptions{
139+
Program: e.bpfObjects.KeplerReadPageTrace,
140+
AttachType: ebpf.AttachTraceFEntry,
141+
})
142+
if err != nil {
143+
klog.Warningf("failed to attach fentry/mark_page_accessed: %v. Kepler will not collect page cache read events. This will affect the DRAM power model estimation on VMs.", err)
144+
}
145+
146+
// Return early if hardware counters are not enabled
147+
if !config.ExposeHardwareCounterMetrics() {
148+
klog.Infof("Hardware counter metrics are disabled")
149+
return nil
150+
}
151+
152+
e.perfEvents, err = createHardwarePerfEvents(
153+
e.bpfObjects.CpuInstructionsEventReader,
154+
e.bpfObjects.CpuCyclesEventReader,
155+
e.bpfObjects.CacheMissEventReader,
156+
numCPU,
157+
)
158+
if err != nil {
159+
return nil
160+
}
161+
162+
return nil
163+
}
164+
165+
func (e *exporter) Detach() {
166+
// Links
167+
if e.schedSwitchLink != nil {
168+
e.schedSwitchLink.Close()
169+
e.schedSwitchLink = nil
170+
}
171+
172+
if e.irqLink != nil {
173+
e.irqLink.Close()
174+
e.irqLink = nil
175+
}
176+
177+
if e.pageWriteLink != nil {
178+
e.pageWriteLink.Close()
179+
e.pageWriteLink = nil
180+
}
181+
182+
if e.pageReadLink != nil {
183+
e.pageReadLink.Close()
184+
e.pageReadLink = nil
185+
}
186+
187+
// Perf events
188+
e.perfEvents.close()
189+
e.perfEvents = nil
190+
191+
// Objects
192+
e.bpfObjects.Close()
193+
}
194+
195+
func (e *exporter) CollectProcesses() ([]ProcessMetrics, error) {
196+
start := time.Now()
197+
// Get the max number of entries in the map
198+
maxEntries := e.bpfObjects.Processes.MaxEntries()
199+
total := 0
200+
deleteKeys := make([]uint32, maxEntries)
201+
deleteValues := make([]ProcessMetrics, maxEntries)
202+
var cursor ebpf.MapBatchCursor
203+
for {
204+
count, err := e.bpfObjects.Processes.BatchLookupAndDelete(
205+
&cursor,
206+
deleteKeys,
207+
deleteValues,
208+
&ebpf.BatchOptions{},
209+
)
210+
total += count
211+
if errors.Is(err, ebpf.ErrKeyNotExist) {
212+
break
213+
}
214+
if err != nil {
215+
return nil, fmt.Errorf("failed to batch lookup and delete: %v", err)
216+
}
217+
}
218+
klog.V(5).Infof("collected %d process samples in %v", total, time.Since(start))
219+
return deleteValues[:total], nil
220+
}
221+
222+
///////////////////////////////////////////////////////////////////////////
223+
// utility functions
224+
225+
func unixOpenPerfEvent(typ, conf, cpuCores int) ([]int, error) {
226+
return []int{}, nil
227+
}
228+
229+
func unixClosePerfEvents(fds []int) {
230+
for _, fd := range fds {
231+
_ = unix.SetNonblock(fd, true)
232+
unix.Close(fd)
233+
}
234+
}
235+
236+
func getCPUCores() int {
237+
cores := runtime.NumCPU()
238+
if cpu, err := ghw.CPU(); err == nil {
239+
// we need to get the number of all CPUs,
240+
// so if /proc/cpuinfo is available, we can get the number of all CPUs
241+
cores = int(cpu.TotalThreads)
242+
}
243+
return cores
244+
}
245+
246+
type hardwarePerfEvents struct {
247+
cpuCyclesPerfEvents []int
248+
cpuInstructionsPerfEvents []int
249+
cacheMissPerfEvents []int
250+
}
251+
252+
func (h *hardwarePerfEvents) close() {
253+
unixClosePerfEvents(h.cpuCyclesPerfEvents)
254+
unixClosePerfEvents(h.cpuInstructionsPerfEvents)
255+
unixClosePerfEvents(h.cacheMissPerfEvents)
256+
}
257+
258+
// CreateHardwarePerfEvents creates perf events for CPU cycles, CPU instructions, and cache misses
259+
// and updates the corresponding eBPF maps.
260+
func createHardwarePerfEvents(cpuInstructionsMap, cpuCyclesMap, cacheMissMap *ebpf.Map, numCPU int) (*hardwarePerfEvents, error) {
261+
var err error
262+
events := &hardwarePerfEvents{
263+
cpuCyclesPerfEvents: []int{},
264+
cpuInstructionsPerfEvents: []int{},
265+
cacheMissPerfEvents: []int{},
266+
}
267+
defer func() {
268+
if err != nil {
269+
unixClosePerfEvents(events.cpuCyclesPerfEvents)
270+
unixClosePerfEvents(events.cpuInstructionsPerfEvents)
271+
unixClosePerfEvents(events.cacheMissPerfEvents)
272+
}
273+
}()
274+
for i, fd := range events.cpuCyclesPerfEvents {
275+
if err = cpuCyclesMap.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil {
276+
klog.Warningf("Failed to update cpu_cycles_event_reader map: %v", err)
277+
return nil, err
278+
}
279+
}
280+
for i, fd := range events.cpuInstructionsPerfEvents {
281+
if err = cpuInstructionsMap.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil {
282+
klog.Warningf("Failed to update cpu_instructions_event_reader map: %v", err)
283+
return nil, err
284+
}
285+
}
286+
for i, fd := range events.cacheMissPerfEvents {
287+
if err = cacheMissMap.Update(uint32(i), uint32(fd), ebpf.UpdateAny); err != nil {
288+
klog.Warningf("Failed to update cache_miss_event_reader map: %v", err)
289+
return nil, err
290+
}
291+
}
292+
return events, nil
293+
}

pkg/bpftest/bpf_suite_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//go:build !darwin
2+
// +build !darwin
3+
14
package bpftest
25

36
import (

pkg/collector/energy/node_energy_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func UpdateNodeComponentsEnergy(nodeStats *stats.NodeStats, wg *sync.WaitGroup)
6666
// UpdateNodeGPUEnergy updates each GPU power consumption. Right now we don't support other types of accelerators
6767
func UpdateNodeGPUEnergy(nodeStats *stats.NodeStats, wg *sync.WaitGroup) {
6868
defer wg.Done()
69-
if config.EnabledGPU() {
69+
if config.IsGPUEnabled() {
7070
if gpu := acc.GetActiveAcceleratorByType(config.GPU); gpu != nil {
7171
gpuEnergy := gpu.Device().AbsEnergyFromDevice()
7272
for gpu, energy := range gpuEnergy {

pkg/collector/metric_collector.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ func (c *Collector) updateProcessResourceUtilizationMetrics(wg *sync.WaitGroup)
158158
// update process metrics regarding the resource utilization to be used to calculate the energy consumption
159159
// we first updates the bpf which is responsible to include new processes in the ProcessStats collection
160160
resourceBpf.UpdateProcessBPFMetrics(c.bpfExporter, c.ProcessStats)
161-
if config.EnabledGPU() {
161+
if config.IsGPUEnabled() {
162162
if acc.GetActiveAcceleratorByType(config.GPU) != nil {
163163
accelerator.UpdateProcessGPUUtilizationMetrics(c.ProcessStats)
164164
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//go:build darwin
2+
// +build darwin
3+
4+
package bpf
5+
6+
import (
7+
"github.com/sustainable-computing-io/kepler/pkg/bpf"
8+
"github.com/sustainable-computing-io/kepler/pkg/collector/stats"
9+
)
10+
11+
func UpdateProcessBPFMetrics(bpfExporter bpf.Exporter, processStats map[uint64]*stats.ProcessStats) {
12+
13+
}

pkg/collector/resourceutilization/bpf/process_bpf_collector.go

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//go:build !darwin
2+
// +build !darwin
3+
14
/*
25
Copyright 2021.
36

pkg/collector/resourceutilization/bpf/process_bpf_collector_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//go:build !darwin
2+
// +build !darwin
3+
14
package bpf
25

36
import (

pkg/collector/stats/node_stats.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ func (ne *NodeStats) ResetDeltaValues() {
5050

5151
func (ne *NodeStats) UpdateIdleEnergyWithMinValue(isComponentsSystemCollectionSupported bool) {
5252
// gpu metric
53-
if config.EnabledGPU() {
53+
if config.IsGPUEnabled() {
5454
if acc.GetActiveAcceleratorByType(config.GPU) != nil {
5555
ne.CalcIdleEnergy(config.AbsEnergyInGPU, config.IdleEnergyInGPU, config.GPUComputeUtilization)
5656
}

0 commit comments

Comments
 (0)