Skip to content

Commit b7add5d

Browse files
committed
fix: false detection of Grace Hopper GPU
This commit fixes the issue where Kepler was incorrectly identifying the Grace Hopper GPU when no GPU is actually present om the system. Signed-off-by: Vibhu Prashar <[email protected]>
1 parent 0358fc3 commit b7add5d

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

pkg/sensors/accelerator/devices/grace_acpi.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,12 @@ type gpuGraceACPI struct {
5555
}
5656

5757
func graceCheck(r *Registry) {
58-
if err := graceAccImpl.InitLib(); err != nil {
59-
klog.V(5).Infof("Error initializing Grace GPU: %v", err)
58+
if err := graceAccImpl.Init(); err != nil {
59+
klog.V(5).Infof("Grace GPU initialization failed: %v", err)
60+
return
61+
}
62+
if !graceAccImpl.IsDeviceCollectionSupported() {
63+
klog.V(5).Infof("No Grace GPU power modules found")
6064
return
6165
}
6266
graceType = GRACE
@@ -68,10 +72,6 @@ func graceCheck(r *Registry) {
6872
}
6973

7074
func graceDeviceStartup() Device {
71-
if err := graceAccImpl.Init(); err != nil {
72-
klog.Errorf("failed to init Grace GPU device: %v", err)
73-
return nil
74-
}
7575
return &graceAccImpl
7676
}
7777

@@ -163,9 +163,11 @@ func (g *gpuGraceACPI) Init() error {
163163
return err
164164
}
165165
g.collectionSupported = len(g.modulePowerPaths) > 0
166-
if g.collectionSupported {
167-
klog.V(4).Infof("Detected Grace Hopper system with %d GPUs", len(g.modulePowerPaths))
166+
if !g.collectionSupported {
167+
return fmt.Errorf("no Grace GPU power modules found")
168168
}
169+
170+
klog.V(4).Infof("Detected Grace Hopper system with %d GPUs", len(g.modulePowerPaths))
169171
return nil
170172
}
171173

0 commit comments

Comments
 (0)