diff --git a/docs/runtime_options.md b/docs/runtime_options.md index 9621a69474..8e3e07e3ab 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -185,7 +185,7 @@ perf_event_attr: { "core": { "events": [ - ["EVENT_NAME"] + "event_name" ], "custom_events": [ { @@ -193,13 +193,13 @@ perf_event_attr: "config": [ "0x304" ], - "name": "EVENT_NAME" + "name": "event_name" } ] }, "uncore": { "events": [ - ["EVENT_NAME"] + "event_name" ], "custom_events": [ { @@ -207,7 +207,7 @@ perf_event_attr: "config": [ "0x304" ], - "name": "EVENT_NAME" + "name": "event_name" } ] } @@ -228,9 +228,9 @@ Let's explain this by example: { "uncore": { "events": [ - ["uncore_imc/cas_count_read"], - ["uncore_imc_0/cas_count_write"], - ["cas_count_all"] + "uncore_imc/cas_count_read", + "uncore_imc_0/cas_count_write", + "cas_count_all" ], "custom_events": [ { @@ -314,14 +314,14 @@ and perf events configuration for listed events: { "core": { "events": [ - ["INSTRUCTIONS"], - ["INSTRUCTION_RETIRED"] + "instructions", + "instruction_retired" ] }, "uncore": { "events": [ - ["uncore_imc/UNC_M_CAS_COUNT:RD"], - ["uncore_imc/UNC_M_CAS_COUNT:WR"] + "uncore_imc/unc_m_cas_count:rd", + "uncore_imc/unc_m_cas_count:wr" ] } } @@ -329,6 +329,25 @@ and perf events configuration for listed events: Notice: PMU_PREFIX is provided in the same way as for configuration with config values. +#### Grouping + +```json +{ + "core": { + "events": [ + ["instructions", "instruction_retired"] + ] + }, + "uncore": { + "events": [ + ["uncore_imc_0/unc_m_cas_count:rd", "uncore_imc_0/unc_m_cas_count:wr"], + ["uncore_imc_1/unc_m_cas_count:rd", "uncore_imc_1/unc_m_cas_count:wr"] + ] + } +} +``` + + ### Further reading * [perf Examples](http://www.brendangregg.com/perf.html) on Brendan Gregg's blog @@ -342,8 +361,8 @@ See example configuration below: { "core": { "events": [ - ["instructions"], - ["instructions_retired"] + "instructions", + "instructions_retired" ], "custom_events": [ { @@ -357,7 +376,7 @@ See example configuration below: }, "uncore": { "events": [ - ["uncore_imc/cas_count_read"] + "uncore_imc/cas_count_read" ], "custom_events": [ { diff --git a/manager/manager.go b/manager/manager.go index 4185d930fb..0350682192 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -212,7 +212,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig newManager.machineInfo = *machineInfo klog.V(1).Infof("Machine: %+v", newManager.machineInfo) - newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores, machineInfo.Topology) + newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.Topology) if err != nil { return nil, err } diff --git a/perf/collector_libpfm.go b/perf/collector_libpfm.go index fc8ffcc09f..ac8008211a 100644 --- a/perf/collector_libpfm.go +++ b/perf/collector_libpfm.go @@ -41,33 +41,42 @@ import ( type collector struct { cgroupPath string events PerfEvents - cpuFiles map[string]map[int]readerCloser + cpuFiles map[int]group cpuFilesLock sync.Mutex - numCores int + onlineCPUs []int eventToCustomEvent map[Event]*CustomEvent uncore stats.Collector } +type group struct { + cpuFiles map[string]map[int]readerCloser + names []string + leaderName string +} + var ( isLibpfmInitialized = false libpmfMutex = sync.Mutex{} ) +const ( + groupLeaderFileDescriptor = -1 +) + func init() { libpmfMutex.Lock() defer libpmfMutex.Unlock() pErr := C.pfm_initialize() if pErr != C.PFM_SUCCESS { - fmt.Printf("unable to initialize libpfm: %d", int(pErr)) + klog.Errorf("unable to initialize libpfm: %d", int(pErr)) return } isLibpfmInitialized = true } -func newCollector(cgroupPath string, events PerfEvents, numCores int, topology []info.Node) *collector { - collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores, uncore: NewUncoreCollector(cgroupPath, events, topology)} +func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector { + collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket)} mapEventsToCustomEvents(collector) - return collector } @@ -82,47 +91,60 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error { stats.PerfStats = []info.PerfStat{} klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath) - for name, cpus := range c.cpuFiles { - for cpu, file := range cpus { - stat, err := readPerfStat(file, name, cpu) + + for _, group := range c.cpuFiles { + for cpu, file := range group.cpuFiles[group.leaderName] { + stat, err := readGroupPerfStat(file, group, cpu, c.cgroupPath) if err != nil { - klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", name, cpu, c.cgroupPath, err.Error()) + klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, c.cgroupPath, err.Error()) continue } - klog.V(5).Infof("Read perf event (event: %q, CPU: %d) for %q: %d", name, cpu, c.cgroupPath, stat.Value) - stats.PerfStats = append(stats.PerfStats, *stat) + stats.PerfStats = append(stats.PerfStats, stat...) } } return nil } -func readPerfStat(file readerCloser, name string, cpu int) (*info.PerfStat, error) { - value, err := getPerfValue(file, name) +func readGroupPerfStat(file readerCloser, group group, cpu int, cgroupPath string) ([]info.PerfStat, error) { + values, err := getPerfValues(file, group) if err != nil { return nil, err } - stat := info.PerfStat{ - PerfValue: value, - Cpu: cpu, + perfStats := make([]info.PerfStat, len(values)) + for i, value := range values { + klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", value.Name, cpu, cgroupPath, value.Value) + perfStats[i] = info.PerfStat{ + PerfValue: value, + Cpu: cpu, + } } - return &stat, nil + return perfStats, nil } -func getPerfValue(file readerCloser, name string) (info.PerfValue, error) { - buf := make([]byte, 32) +func getPerfValues(file readerCloser, group group) ([]info.PerfValue, error) { + // 24 bytes of GroupReadFormat struct. + // 16 bytes of Values struct for each element in group. + // See https://man7.org/linux/man-pages/man2/perf_event_open.2.html section "Reading results" with PERF_FORMAT_GROUP specified. + buf := make([]byte, 24+16*len(group.names)) _, err := file.Read(buf) if err != nil { - return info.PerfValue{}, err + return []info.PerfValue{}, fmt.Errorf("unable to read perf event group ( leader = %s ): %w", group.leaderName, err) } - perfData := &ReadFormat{} - reader := bytes.NewReader(buf) + perfData := &GroupReadFormat{} + reader := bytes.NewReader(buf[:24]) err = binary.Read(reader, binary.LittleEndian, perfData) if err != nil { - return info.PerfValue{}, err + return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group ( leader = %s ): %w", group.leaderName, err) + } + values := make([]Values, perfData.Nr) + reader = bytes.NewReader(buf[24:]) + err = binary.Read(reader, binary.LittleEndian, values) + if err != nil { + return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group values ( leader = %s ): %w", group.leaderName, err) } scalingRatio := 1.0 @@ -130,16 +152,26 @@ func getPerfValue(file readerCloser, name string) (info.PerfValue, error) { scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled) } - value := perfData.Value + perfValues := make([]info.PerfValue, perfData.Nr) if scalingRatio != float64(0) { - value = uint64(float64(value) / scalingRatio) + for i, name := range group.names { + perfValues[i] = info.PerfValue{ + ScalingRatio: scalingRatio, + Value: uint64(float64(values[i].Value) / scalingRatio), + Name: name, + } + } + } else { + for i, name := range group.names { + perfValues[i] = info.PerfValue{ + ScalingRatio: scalingRatio, + Value: values[i].Value, + Name: name, + } + } } - return info.PerfValue{ - Value: value, - Name: name, - ScalingRatio: scalingRatio, - }, nil + return perfValues, nil } func (c *collector) setup() error { @@ -152,68 +184,144 @@ func (c *collector) setup() error { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() cgroupFd := int(cgroup.Fd()) - for _, group := range c.events.Core.Events { - customEvent, ok := c.eventToCustomEvent[group[0]] - var err error - if ok { - err = c.setupRawNonGrouped(customEvent, cgroupFd) - } else { - err = c.setupNonGrouped(string(group[0]), cgroupFd) + for i, group := range c.events.Core.Events { + // CPUs file descriptors of group leader needed for perf_event_open. + leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs)) + for _, cpu := range c.onlineCPUs { + leaderFileDescriptors[cpu] = groupLeaderFileDescriptor } - if err != nil { - return err + + for j, event := range group.events { + // First element is group leader. + isGroupLeader := j == 0 + customEvent, ok := c.eventToCustomEvent[event] + if ok { + config := c.createConfigFromRawEvent(customEvent) + leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors) + if err != nil { + return err + } + } else { + config, err := c.createConfigFromEvent(event) + if err != nil { + return err + } + leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors) + if err != nil { + return err + } + // Clean memory allocated by C code. + C.free(unsafe.Pointer(config)) + } + } + + // Group is prepared so we should reset and enable counting. + for _, fd := range leaderFileDescriptors { + err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0) + if err != nil { + return err + } + err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0) + if err != nil { + return err + } } } return nil } -func (c *collector) setupRawNonGrouped(event *CustomEvent, cgroup int) error { - klog.V(5).Infof("Setting up non-grouped raw perf event %#v", event) - config := createPerfEventAttr(*event) - err := c.registerEvent(config, string(event.Name), cgroup) - if err != nil { - return err +func readPerfEventAttr(name string) (*unix.PerfEventAttr, error) { + perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{}))) + event := pfmPerfEncodeArgT{} + fstr := C.CString("") + event.fstr = unsafe.Pointer(fstr) + event.attr = perfEventAttrMemory + event.size = C.ulong(unsafe.Sizeof(event)) + cSafeName := C.CString(name) + pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event)) + if pErr != C.PFM_SUCCESS { + return nil, fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr)) } - return nil + return (*unix.PerfEventAttr)(perfEventAttrMemory), nil } -func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, pid int) error { - var cpu int - for cpu = 0; cpu < c.numCores; cpu++ { - groupFd, flags := -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP - fd, err := unix.PerfEventOpen(config, pid, cpu, groupFd, flags) +type eventInfo struct { + name string + config *unix.PerfEventAttr + pid int + groupIndex int + isGroupLeader bool +} + +func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int]int) (map[int]int, error) { + newLeaderFileDescriptors := make(map[int]int, len(c.onlineCPUs)) + var pid, flags int + if event.isGroupLeader { + pid = event.pid + flags = unix.PERF_FLAG_FD_CLOEXEC | unix.PERF_FLAG_PID_CGROUP + } else { + pid = -1 + flags = unix.PERF_FLAG_FD_CLOEXEC + } + + setAttributes(event.config, event.isGroupLeader) + + for _, cpu := range c.onlineCPUs { + fd, err := unix.PerfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags) if err != nil { - return fmt.Errorf("setting up perf event %#v failed: %q", config, err) + return nil, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err) } - perfFile := os.NewFile(uintptr(fd), name) + perfFile := os.NewFile(uintptr(fd), event.name) if perfFile == nil { - return fmt.Errorf("unable to create os.File from file descriptor %#v", fd) + return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd) } - c.addEventFile(name, cpu, perfFile) + c.addEventFile(event.groupIndex, event.name, cpu, perfFile) + + // If group leader, save fd for others. + if event.isGroupLeader { + newLeaderFileDescriptors[cpu] = fd + } + } + + if event.isGroupLeader { + return newLeaderFileDescriptors, nil + } else { + return leaderFileDescriptors, nil } - return nil } -func (c *collector) addEventFile(name string, cpu int, perfFile *os.File) { - _, ok := c.cpuFiles[name] +func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.File) { + _, ok := c.cpuFiles[index] + if !ok { + c.cpuFiles[index] = group{ + leaderName: name, + cpuFiles: map[string]map[int]readerCloser{}, + } + } + + _, ok = c.cpuFiles[index].cpuFiles[name] if !ok { - c.cpuFiles[name] = map[int]readerCloser{} + c.cpuFiles[index].cpuFiles[name] = map[int]readerCloser{} } - c.cpuFiles[name][cpu] = perfFile -} + c.cpuFiles[index].cpuFiles[name][cpu] = perfFile -func (c *collector) setupNonGrouped(name string, cgroup int) error { - perfEventAttr, err := getPerfEventAttr(name) - if err != nil { - return err + // Check if name is already stored. + for _, have := range c.cpuFiles[index].names { + if name == have { + return + } } - defer C.free(unsafe.Pointer(perfEventAttr)) - return c.registerEvent(perfEventAttr, name, cgroup) + // Otherwise save it. + c.cpuFiles[index] = group{ + cpuFiles: c.cpuFiles[index].cpuFiles, + names: append(c.cpuFiles[index].names, name), + leaderName: c.cpuFiles[index].leaderName, + } } func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr { @@ -230,43 +338,20 @@ func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr { config.Ext2 = event.Config[2] } - setAttributes(config) klog.V(5).Infof("perf_event_attr struct prepared: %#v", config) return config } -func getPerfEventAttr(name string) (*unix.PerfEventAttr, error) { - if !isLibpfmInitialized { - return nil, fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up") - } - - perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{}))) - event := pfmPerfEncodeArgT{} - - perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory) - fstr := C.CString("") - event.fstr = unsafe.Pointer(fstr) - event.attr = perfEventAttrMemory - event.size = C.ulong(unsafe.Sizeof(event)) - - cSafeName := C.CString(name) +func setAttributes(config *unix.PerfEventAttr, leader bool) { + config.Sample_type = perfSampleIdentifier + config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_GROUP | unix.PERF_FORMAT_ID + config.Bits = perfAttrBitsInherit | perfAttrBitsExcludeGuest - pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event)) - if pErr != C.PFM_SUCCESS { - return nil, fmt.Errorf("unable to transform event name %s to perf_event_attr: %v", name, int(pErr)) + // Group leader should have this flag set to disable counting until all group would be prepared. + if leader { + config.Bits |= unix.PerfBitDisabled } - klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr) - - setAttributes(perfEventAttr) - - return perfEventAttr, nil -} - -func setAttributes(config *unix.PerfEventAttr) { - config.Sample_type = perfSampleIdentifier - config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_ID - config.Bits = perfAttrBitsInherit | perfAttrBitsExcludeGuest config.Size = uint32(unsafe.Sizeof(unix.PerfEventAttr{})) } @@ -275,15 +360,17 @@ func (c *collector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for name, files := range c.cpuFiles { - for cpu, file := range files { - klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) - err := file.Close() - if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + for _, group := range c.cpuFiles { + for name, files := range group.cpuFiles { + for cpu, file := range files { + klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) + } } + delete(group.cpuFiles, name) } - delete(c.cpuFiles, name) } } @@ -308,3 +395,27 @@ func mapEventsToCustomEvents(collector *collector) { collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key] } } + +func (c *collector) createConfigFromRawEvent(event *CustomEvent) *unix.PerfEventAttr { + klog.V(5).Infof("Setting up raw perf event %#v", event) + + config := createPerfEventAttr(*event) + + klog.V(5).Infof("perf_event_attr: %#v", config) + + return config +} + +func (c *collector) createConfigFromEvent(event Event) (*unix.PerfEventAttr, error) { + klog.V(5).Infof("Setting up perf event %s", string(event)) + + config, err := readPerfEventAttr(string(event)) + if err != nil { + C.free((unsafe.Pointer)(config)) + return nil, err + } + + klog.V(5).Infof("perf_event_attr: %#v", config) + + return config, nil +} diff --git a/perf/collector_libpfm_test.go b/perf/collector_libpfm_test.go index 5d2f488203..4f43f0d682 100644 --- a/perf/collector_libpfm_test.go +++ b/perf/collector_libpfm_test.go @@ -40,30 +40,77 @@ func TestCollector_UpdateStats(t *testing.T) { collector := collector{uncore: &stats.NoopCollector{}} notScaledBuffer := buffer{bytes.NewBuffer([]byte{})} scaledBuffer := buffer{bytes.NewBuffer([]byte{})} - err := binary.Write(notScaledBuffer, binary.LittleEndian, ReadFormat{ - Value: 123456789, + groupedBuffer := buffer{bytes.NewBuffer([]byte{})} + err := binary.Write(notScaledBuffer, binary.LittleEndian, GroupReadFormat{ + Nr: 1, TimeEnabled: 100, TimeRunning: 100, - ID: 1, }) assert.NoError(t, err) - err = binary.Write(scaledBuffer, binary.LittleEndian, ReadFormat{ - Value: 333333333, + err = binary.Write(notScaledBuffer, binary.LittleEndian, Values{ + Value: 123456789, + ID: 0, + }) + assert.NoError(t, err) + err = binary.Write(scaledBuffer, binary.LittleEndian, GroupReadFormat{ + Nr: 1, TimeEnabled: 3, TimeRunning: 1, - ID: 2, }) assert.NoError(t, err) - collector.cpuFiles = map[string]map[int]readerCloser{ - "instructions": {0: notScaledBuffer}, - "cycles": {11: scaledBuffer}, + err = binary.Write(scaledBuffer, binary.LittleEndian, Values{ + Value: 333333333, + ID: 2, + }) + assert.NoError(t, err) + err = binary.Write(groupedBuffer, binary.LittleEndian, GroupReadFormat{ + Nr: 2, + TimeEnabled: 100, + TimeRunning: 100, + }) + assert.NoError(t, err) + err = binary.Write(groupedBuffer, binary.LittleEndian, Values{ + Value: 123456, + ID: 0, + }) + assert.NoError(t, err) + err = binary.Write(groupedBuffer, binary.LittleEndian, Values{ + Value: 654321, + ID: 1, + }) + assert.NoError(t, err) + + collector.cpuFiles = map[int]group{ + 1: { + cpuFiles: map[string]map[int]readerCloser{ + "instructions": {0: notScaledBuffer}, + }, + names: []string{"instructions"}, + leaderName: "instructions", + }, + 2: { + cpuFiles: map[string]map[int]readerCloser{ + "cycles": {11: scaledBuffer}, + }, + names: []string{"cycles"}, + leaderName: "cycles", + }, + 3: { + cpuFiles: map[string]map[int]readerCloser{ + "cache-misses": { + 0: groupedBuffer, + }, + }, + names: []string{"cache-misses", "cache-references"}, + leaderName: "cache-misses", + }, } stats := &info.ContainerStats{} err = collector.UpdateStats(stats) assert.NoError(t, err) - assert.Len(t, stats.PerfStats, 2) + assert.Len(t, stats.PerfStats, 4) assert.Contains(t, stats.PerfStats, info.PerfStat{ PerfValue: info.PerfValue{ @@ -81,6 +128,22 @@ func TestCollector_UpdateStats(t *testing.T) { }, Cpu: 0, }) + assert.Contains(t, stats.PerfStats, info.PerfStat{ + PerfValue: info.PerfValue{ + ScalingRatio: 1.0, + Value: 123456, + Name: "cache-misses", + }, + Cpu: 0, + }) + assert.Contains(t, stats.PerfStats, info.PerfStat{ + PerfValue: info.PerfValue{ + ScalingRatio: 1.0, + Value: 654321, + Name: "cache-references", + }, + Cpu: 0, + }) } func TestCreatePerfEventAttr(t *testing.T) { @@ -96,185 +159,230 @@ func TestCreatePerfEventAttr(t *testing.T) { assert.Equal(t, uint64(2), attributes.Config) assert.Equal(t, uint64(3), attributes.Ext1) assert.Equal(t, uint64(4), attributes.Ext2) +} + +func TestSetGroupAttributes(t *testing.T) { + event := CustomEvent{ + Type: 0x1, + Config: Config{uint64(0x2), uint64(0x3), uint64(0x4)}, + Name: "fake_event", + } + + attributes := createPerfEventAttr(event) + setAttributes(attributes, true) assert.Equal(t, uint64(65536), attributes.Sample_type) - assert.Equal(t, uint64(7), attributes.Read_format) - assert.Equal(t, uint64(1048578), attributes.Bits) + assert.Equal(t, uint64(0xf), attributes.Read_format) + assert.Equal(t, uint64(0x100003), attributes.Bits) + + attributes = createPerfEventAttr(event) + setAttributes(attributes, false) + assert.Equal(t, uint64(65536), attributes.Sample_type) + assert.Equal(t, uint64(0xf), attributes.Read_format) + assert.Equal(t, uint64(0x100002), attributes.Bits) } func TestNewCollector(t *testing.T) { perfCollector := newCollector("cgroup", PerfEvents{ Core: Events{ - Events: [][]Event{{"event_1"}, {"event_2"}}, + Events: []Group{{[]Event{"event_1"}, false}, {[]Event{"event_2"}, false}}, CustomEvents: []CustomEvent{{ Type: 0, Config: []uint64{1, 2, 3}, Name: "event_2", }}, }, - }, 1, []info.Node{}) + }, []int{0, 1, 2, 3}, map[int]int{}) assert.Len(t, perfCollector.eventToCustomEvent, 1) assert.Nil(t, perfCollector.eventToCustomEvent[Event("event_1")]) assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")]) } -var readPerfStatCases = []struct { - test string - file ReadFormat - name string - cpu int - perfStat info.PerfStat - err error +var readGroupPerfStatCases = []struct { + test string + file GroupReadFormat + valuesFile Values + name string + cpu int + perfStat []info.PerfStat + err error }{ { test: "no scaling", - file: ReadFormat{ - Value: 5, + file: GroupReadFormat{ TimeEnabled: 0, TimeRunning: 0, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 5, + ID: 0, }, name: "some metric", cpu: 1, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 1, Value: 5, Name: "some metric", }, Cpu: 1, - }, + }}, err: nil, }, { test: "no scaling - TimeEnabled = 0", - file: ReadFormat{ - Value: 5, + file: GroupReadFormat{ TimeEnabled: 0, TimeRunning: 1, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 5, + ID: 0, }, name: "some metric", cpu: 1, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 1, Value: 5, Name: "some metric", }, Cpu: 1, - }, + }}, err: nil, }, { test: "scaling - 0.5", - file: ReadFormat{ - Value: 4, + file: GroupReadFormat{ TimeEnabled: 4, TimeRunning: 2, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 4, + ID: 0, }, name: "some metric", cpu: 2, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 0.5, Value: 8, Name: "some metric", }, Cpu: 2, - }, + }}, err: nil, }, { test: "scaling - 0 (TimeEnabled = 1, TimeRunning = 0)", - file: ReadFormat{ - Value: 4, + file: GroupReadFormat{ TimeEnabled: 1, TimeRunning: 0, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 4, + ID: 0, }, name: "some metric", cpu: 3, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 1.0, Value: 4, Name: "some metric", }, Cpu: 3, - }, + }}, err: nil, }, { test: "scaling - 0 (TimeEnabled = 0, TimeRunning = 1)", - file: ReadFormat{ - Value: 4, + file: GroupReadFormat{ TimeEnabled: 0, TimeRunning: 1, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 4, + ID: 0, }, name: "some metric", cpu: 3, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 1.0, Value: 4, Name: "some metric", }, Cpu: 3, - }, + }}, err: nil, }, { test: "zeros, zeros everywhere", - file: ReadFormat{ - Value: 0, + file: GroupReadFormat{ TimeEnabled: 0, TimeRunning: 0, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 0, + ID: 0, }, name: "some metric", cpu: 4, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 1.0, Value: 0, Name: "some metric", }, Cpu: 4, - }, + }}, err: nil, }, { test: "non-zero TimeRunning", - file: ReadFormat{ - Value: 0, + file: GroupReadFormat{ TimeEnabled: 0, TimeRunning: 3, - ID: 0, + Nr: 1, + }, + valuesFile: Values{ + Value: 0, + ID: 0, }, name: "some metric", cpu: 4, - perfStat: info.PerfStat{ + perfStat: []info.PerfStat{{ PerfValue: info.PerfValue{ ScalingRatio: 1.0, Value: 0, Name: "some metric", }, Cpu: 4, - }, + }}, err: nil, }, } func TestReadPerfStat(t *testing.T) { - for _, test := range readPerfStatCases { + for _, test := range readGroupPerfStatCases { t.Run(test.test, func(tt *testing.T) { buf := &buffer{bytes.NewBuffer([]byte{})} err := binary.Write(buf, binary.LittleEndian, test.file) assert.NoError(tt, err) - stat, err := readPerfStat(buf, test.name, test.cpu) - assert.Equal(tt, test.perfStat, *stat) + err = binary.Write(buf, binary.LittleEndian, test.valuesFile) + assert.NoError(tt, err) + stat, err := readGroupPerfStat(buf, group{ + cpuFiles: nil, + names: []string{test.name}, + leaderName: test.name, + }, test.cpu, "/") + assert.Equal(tt, test.perfStat, stat) assert.Equal(tt, test.err, err) }) } diff --git a/perf/config.go b/perf/config.go index efdf370ada..4814c60d17 100644 --- a/perf/config.go +++ b/perf/config.go @@ -34,7 +34,7 @@ type PerfEvents struct { type Events struct { // List of perf events' names to be measured. - Events [][]Event `json:"events"` + Events []Group `json:"events"` // List of custom perf events' to be measured. It is impossible to // specify some events using their names and in such case you have @@ -89,3 +89,39 @@ func parseConfig(file *os.File) (events PerfEvents, err error) { } return } + +type Group struct { + events []Event + array bool +} + +func (g *Group) UnmarshalJSON(b []byte) error { + var jsonObj interface{} + err := json.Unmarshal(b, &jsonObj) + if err != nil { + return err + } + switch obj := jsonObj.(type) { + case string: + *g = Group{ + events: []Event{Event(obj)}, + array: false, + } + return nil + case []interface{}: + group := Group{ + events: make([]Event, 0, len(obj)), + array: true, + } + for _, v := range obj { + value, ok := v.(string) + if !ok { + return fmt.Errorf("cannot unmarshal %v", value) + } + group.events = append(group.events, Event(value)) + } + *g = group + return nil + } + return fmt.Errorf("unsupported type") +} diff --git a/perf/config_test.go b/perf/config_test.go index d5ac482ccd..9427bfd124 100644 --- a/perf/config_test.go +++ b/perf/config_test.go @@ -30,20 +30,18 @@ func TestConfigParsing(t *testing.T) { assert.Nil(t, err) assert.Len(t, events.Core.Events, 2) - assert.Len(t, events.Core.Events[0], 1) - assert.Equal(t, Event("instructions"), events.Core.Events[0][0]) - assert.Len(t, events.Core.Events[1], 1) - assert.Equal(t, Event("instructions_retired"), events.Core.Events[1][0]) - - assert.Len(t, events.Core.CustomEvents, 1) - assert.Equal(t, Config{0x5300c0}, events.Core.CustomEvents[0].Config) - assert.Equal(t, uint32(0x04), events.Core.CustomEvents[0].Type) - assert.Equal(t, Event("instructions_retired"), events.Core.CustomEvents[0].Name) + assert.Len(t, events.Core.Events[0].events, 2) + assert.Equal(t, true, events.Core.Events[0].array) + assert.Equal(t, Event("instructions"), events.Core.Events[0].events[0]) + assert.Equal(t, Event("instructions_retired"), events.Core.Events[0].events[1]) + assert.Len(t, events.Core.Events[1].events, 1) + assert.Equal(t, false, events.Core.Events[1].array) + assert.Equal(t, Event("cycles"), events.Core.Events[1].events[0]) assert.Len(t, events.Uncore.Events, 3) - assert.Equal(t, Event("cas_count_write"), events.Uncore.Events[0][0]) - assert.Equal(t, Event("uncore_imc_0/UNC_M_CAS_COUNT:RD"), events.Uncore.Events[1][0]) - assert.Equal(t, Event("uncore_ubox/UNC_U_EVENT_MSG"), events.Uncore.Events[2][0]) + assert.Equal(t, Event("cas_count_write"), events.Uncore.Events[0].events[0]) + assert.Equal(t, Event("uncore_imc_0/UNC_M_CAS_COUNT:RD"), events.Uncore.Events[1].events[0]) + assert.Equal(t, Event("uncore_ubox/UNC_U_EVENT_MSG"), events.Uncore.Events[2].events[0]) assert.Len(t, events.Uncore.CustomEvents, 1) assert.Equal(t, Config{0x5300}, events.Uncore.CustomEvents[0].Config) diff --git a/perf/manager_libpfm.go b/perf/manager_libpfm.go index 1edbaf9304..d4126d659b 100644 --- a/perf/manager_libpfm.go +++ b/perf/manager_libpfm.go @@ -23,48 +23,44 @@ import ( info "github.com/google/cadvisor/info/v1" "github.com/google/cadvisor/stats" + "github.com/google/cadvisor/utils/sysinfo" ) type manager struct { - events PerfEvents - numCores int - topology []info.Node + events PerfEvents + onlineCPUs []int + cpuToSocket map[int]int stats.NoopDestroy } -func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) { +func NewManager(configFile string, topology []info.Node) (stats.Manager, error) { if configFile == "" { return &stats.NoopManager{}, nil } file, err := os.Open(configFile) if err != nil { - return nil, fmt.Errorf("Unable to read configuration file %q: %q", configFile, err) + return nil, fmt.Errorf("unable to read configuration file %q: %w", configFile, err) } config, err := parseConfig(file) if err != nil { - return nil, fmt.Errorf("Unable to read configuration file %q: %q", configFile, err) + return nil, fmt.Errorf("unable to parse configuration file %q: %w", configFile, err) } - if areGroupedEventsUsed(config) { - return nil, fmt.Errorf("event grouping is not supported you must modify config file at %s", configFile) - } + onlineCPUs := sysinfo.GetOnlineCPUs(topology) - return &manager{events: config, numCores: numCores, topology: topology}, nil -} + cpuToSocket := make(map[int]int) -func areGroupedEventsUsed(events PerfEvents) bool { - for _, group := range events.Core.Events { - if len(group) > 1 { - return true - } + for _, cpu := range onlineCPUs { + cpuToSocket[cpu] = sysinfo.GetSocketFromCPU(topology, cpu) } - return false + + return &manager{events: config, onlineCPUs: onlineCPUs, cpuToSocket: cpuToSocket}, nil } func (m *manager) GetCollector(cgroupPath string) (stats.Collector, error) { - collector := newCollector(cgroupPath, m.events, m.numCores, m.topology) + collector := newCollector(cgroupPath, m.events, m.onlineCPUs, m.cpuToSocket) err := collector.setup() if err != nil { collector.Destroy() diff --git a/perf/manager_libpfm_test.go b/perf/manager_libpfm_test.go index eb341b846d..417f1b1564 100644 --- a/perf/manager_libpfm_test.go +++ b/perf/manager_libpfm_test.go @@ -20,14 +20,14 @@ package perf import ( "testing" + "github.com/stretchr/testify/assert" + info "github.com/google/cadvisor/info/v1" "github.com/google/cadvisor/stats" - - "github.com/stretchr/testify/assert" ) func TestNoConfigFilePassed(t *testing.T) { - manager, err := NewManager("", 1, []info.Node{}) + manager, err := NewManager("", []info.Node{}) assert.Nil(t, err) _, ok := manager.(*stats.NoopManager) @@ -35,28 +35,21 @@ func TestNoConfigFilePassed(t *testing.T) { } func TestNonExistentFile(t *testing.T) { - manager, err := NewManager("this-file-is-so-non-existent", 1, []info.Node{}) + manager, err := NewManager("this-file-is-so-non-existent", []info.Node{}) assert.NotNil(t, err) assert.Nil(t, manager) } func TestMalformedJsonFile(t *testing.T) { - manager, err := NewManager("testing/this-is-some-random.json", 1, []info.Node{}) - - assert.NotNil(t, err) - assert.Nil(t, manager) -} - -func TestGroupedEvents(t *testing.T) { - manager, err := NewManager("testing/grouped.json", 1, []info.Node{}) + manager, err := NewManager("testing/this-is-some-random.json", []info.Node{}) assert.NotNil(t, err) assert.Nil(t, manager) } func TestNewManager(t *testing.T) { - managerInstance, err := NewManager("testing/perf.json", 1, []info.Node{}) + managerInstance, err := NewManager("testing/perf.json", []info.Node{}) assert.Nil(t, err) _, ok := managerInstance.(*manager) diff --git a/perf/manager_no_libpfm.go b/perf/manager_no_libpfm.go index b0d7b9aeb7..d0fc4e78a1 100644 --- a/perf/manager_no_libpfm.go +++ b/perf/manager_no_libpfm.go @@ -24,7 +24,7 @@ import ( "k8s.io/klog/v2" ) -func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) { +func NewManager(configFile string, topology []info.Node) (stats.Manager, error) { klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Perf event counters are not available.") return &stats.NoopManager{}, nil } diff --git a/perf/testing/grouped.json b/perf/testing/grouped.json deleted file mode 100644 index 635322e9a6..0000000000 --- a/perf/testing/grouped.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "core": { - "events": [ - ["instructions", "instructions_retired"] - ], - "custom_events": [ - { - "type": 4, - "config": [ - "0x5300c0" - ], - "name": "instructions_retired" - } - ] - } -} diff --git a/perf/testing/perf-non-hardware.json b/perf/testing/perf-non-hardware.json index dc410874a0..b3e7558766 100644 --- a/perf/testing/perf-non-hardware.json +++ b/perf/testing/perf-non-hardware.json @@ -1,8 +1,8 @@ { "core": { "events": [ - ["context-switches"], - ["cpu-migrations-custom"] + "context-switches", + "cpu-migrations-custom" ], "custom_events": [ { diff --git a/perf/testing/perf.json b/perf/testing/perf.json index 3207901fea..6ef4255751 100644 --- a/perf/testing/perf.json +++ b/perf/testing/perf.json @@ -1,8 +1,8 @@ { "core": { "events": [ - ["instructions"], - ["instructions_retired"] + ["instructions", "instructions_retired"], + "cycles" ], "custom_events": [ { @@ -16,9 +16,9 @@ }, "uncore": { "events": [ - ["cas_count_write"], - ["uncore_imc_0/UNC_M_CAS_COUNT:RD"], - ["uncore_ubox/UNC_U_EVENT_MSG"] + "cas_count_write", + "uncore_imc_0/UNC_M_CAS_COUNT:RD", + "uncore_ubox/UNC_U_EVENT_MSG" ], "custom_events": [ { diff --git a/perf/types_libpfm.go b/perf/types_libpfm.go index 76a8196a68..f2beaf7fb4 100644 --- a/perf/types_libpfm.go +++ b/perf/types_libpfm.go @@ -29,12 +29,17 @@ const ( perfAttrBitsExcludeGuest = 1 << 20 ) -// ReadFormat allows to read perf event's value for non-grouped events -type ReadFormat struct { - Value uint64 /* The value of the event */ +// GroupReadFormat allows to read perf event's values for grouped events. +// See https://man7.org/linux/man-pages/man2/perf_event_open.2.html section "Reading results" with PERF_FORMAT_GROUP specified. +type GroupReadFormat struct { + Nr uint64 /* The number of events */ TimeEnabled uint64 /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ TimeRunning uint64 /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ - ID uint64 /* if PERF_FORMAT_ID */ +} + +type Values struct { + Value uint64 /* The value of the event */ + ID uint64 /* if PERF_FORMAT_ID */ } // pfmPerfEncodeArgT represents structure that is used to parse perf event nam diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go index 50a5e979b7..2e4cf20d53 100644 --- a/perf/uncore_libpfm.go +++ b/perf/uncore_libpfm.go @@ -27,6 +27,7 @@ import ( "io/ioutil" "os" "path/filepath" + "reflect" "regexp" "strconv" "strings" @@ -38,7 +39,6 @@ import ( info "github.com/google/cadvisor/info/v1" "github.com/google/cadvisor/stats" - "github.com/google/cadvisor/utils/sysinfo" ) type pmu struct { @@ -53,9 +53,10 @@ const ( pmuCpumaskFilename = "cpumask" systemDevicesPath = "/sys/devices" rootPerfEventPath = "/sys/fs/cgroup/perf_event" + uncorePID = -1 ) -func getPMU(pmus []pmu, gotType uint32) (*pmu, error) { +func getPMU(pmus uncorePMUs, gotType uint32) (*pmu, error) { for _, pmu := range pmus { if pmu.typeOf == gotType { return &pmu, nil @@ -96,7 +97,7 @@ func readUncorePMU(path string, name string, cpumaskRegexp *regexp.Regexp) (*pmu } func getUncorePMUs(devicesPath string) (uncorePMUs, error) { - pmus := make(uncorePMUs) + pmus := make(uncorePMUs, 0) // Depends on platform, cpu mask could be for example in form "0-1" or "0,1". cpumaskRegexp := regexp.MustCompile("[-,\n]") @@ -124,27 +125,29 @@ func getUncorePMUs(devicesPath string) (uncorePMUs, error) { } type uncoreCollector struct { - cpuFiles map[string]map[string]map[int]readerCloser cpuFilesLock sync.Mutex - events [][]Event + cpuFiles map[int]map[string]group + events []Group eventToCustomEvent map[Event]*CustomEvent - topology []info.Node + cpuToSocket map[int]int // Handle for mocking purposes. perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) + ioctlSetInt func(fd int, req uint, value int) error } -func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.Node) stats.Collector { +func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[int]int) stats.Collector { if cgroupPath != rootPerfEventPath { // Uncore metric doesn't exists for cgroups, only for entire platform. return &stats.NoopCollector{} } - collector := &uncoreCollector{topology: topology} - - // Default implementation of Linux perf_event_open function. - collector.perfEventOpen = unix.PerfEventOpen + collector := &uncoreCollector{ + cpuToSocket: cpuToSocket, + perfEventOpen: unix.PerfEventOpen, + ioctlSetInt: unix.IoctlSetInt, + } err := collector.setup(events, systemDevicesPath) if err != nil { @@ -157,56 +160,101 @@ func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.No } func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { - var err error readUncorePMUs, err := getUncorePMUs(devicesPath) if err != nil { return err } - // Maping from event name, pmu type, cpu. - c.cpuFiles = make(map[string]map[string]map[int]readerCloser) + c.cpuFiles = make(map[int]map[string]group) c.events = events.Uncore.Events c.eventToCustomEvent = parseUncoreEvents(events.Uncore) c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for _, group := range c.events { - if len(group) > 1 { - klog.Warning("grouping uncore perf events is not supported!") - continue + for i, group := range c.events { + // Check what PMUs are needed. + groupPMUs, err := parsePMUs(group, readUncorePMUs, c.eventToCustomEvent) + if err != nil { + return err } - eventName, pmuPrefix := parseEventName(string(group[0])) + err = checkGroup(group, groupPMUs) + if err != nil { + return err + } - var err error - customEvent, ok := c.eventToCustomEvent[group[0]] - if ok { - if customEvent.Type != 0 { - pmuPrefix = uncorePMUPrefix + // CPUs file descriptors of group leader needed for perf_event_open. + leaderFileDescriptors := make(map[string]map[uint32]int) + for _, pmu := range readUncorePMUs { + leaderFileDescriptors[pmu.name] = make(map[uint32]int) + for _, cpu := range pmu.cpus { + leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor } + } - pmus := obtainPMUs(pmuPrefix, readUncorePMUs) - if len(pmus) == 0 { - klog.Warningf("Cannot obtain any PMU matching prefix, pmu_prefix: %s, eventName: %s", pmuPrefix, eventName) - continue + for _, event := range group.events { + eventName, _ := parseEventName(string(event)) + customEvent, ok := c.eventToCustomEvent[event] + if ok { + err = c.setupRawEvent(customEvent, groupPMUs[event], i, leaderFileDescriptors) + } else { + err = c.setupEvent(eventName, groupPMUs[event], i, leaderFileDescriptors) } - err = c.setupRawNonGroupedUncore(customEvent, pmus) - } else { - pmus := obtainPMUs(pmuPrefix, readUncorePMUs) - if len(pmus) == 0 { - klog.Warningf("Cannot obtain any PMU matching prefix, pmu_prefix: %s, eventName: %s", pmuPrefix, eventName) - continue + + if err != nil { + return err } - err = c.setupNonGroupedUncore(eventName, pmus) } - if err != nil { - return err + + // Group is prepared so we should reset and enable counting. + for _, pmuCPUs := range leaderFileDescriptors { + for _, fd := range pmuCPUs { + // Call only for used PMUs. + if fd != groupLeaderFileDescriptor { + err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0) + if err != nil { + return err + } + err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0) + if err != nil { + return err + } + } + } } } return nil } +func checkGroup(group Group, eventPMUs map[Event]uncorePMUs) error { + if group.array { + var pmu uncorePMUs + for _, event := range group.events { + if len(eventPMUs[event]) > 1 { + return fmt.Errorf("the events in group usually have to be from single PMU, try reorganizing the \"%v\" group", group.events) + } + if len(eventPMUs[event]) == 1 { + if pmu == nil { + pmu = eventPMUs[event] + continue + } + + eq := reflect.DeepEqual(pmu, eventPMUs[event]) + if !eq { + return fmt.Errorf("the events in group usually have to be from the same PMU, try reorganizing the \"%v\" group", group.events) + } + } + } + return nil + } else { + if len(eventPMUs[group.events[0]]) < 1 { + return fmt.Errorf("the event %q don't have any PMU to count with", group.events[0]) + } + } + return nil +} + func parseEventName(eventName string) (string, string) { // First "/" separate pmu prefix and event name // ex. "uncore_imc_0/cas_count_read" -> uncore_imc_0 and cas_count_read. @@ -219,14 +267,35 @@ func parseEventName(eventName string) (string, string) { return eventName, pmuPrefix } -func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu { - var pmus []pmu +func parsePMUs(group Group, pmus uncorePMUs, customEvents map[Event]*CustomEvent) (map[Event]uncorePMUs, error) { + eventPMUs := make(map[Event]uncorePMUs) + for _, event := range group.events { + _, prefix := parseEventName(string(event)) + custom, ok := customEvents[event] + if ok { + if custom.Type != 0 { + pmu, err := getPMU(pmus, custom.Type) + if err != nil { + return nil, err + } + eventPMUs[event] = uncorePMUs{pmu.name: *pmu} + continue + } + } + eventPMUs[event] = obtainPMUs(prefix, pmus) + } + + return eventPMUs, nil +} + +func obtainPMUs(want string, gotPMUs uncorePMUs) uncorePMUs { + pmus := make(uncorePMUs) if want == "" { return pmus } for _, pmu := range gotPMUs { if strings.HasPrefix(pmu.name, want) { - pmus = append(pmus, pmu) + pmus[pmu.name] = pmu } } @@ -235,11 +304,13 @@ func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu { func parseUncoreEvents(events Events) map[Event]*CustomEvent { eventToCustomEvent := map[Event]*CustomEvent{} - for _, uncoreEvent := range events.Events { - for _, customEvent := range events.CustomEvents { - if uncoreEvent[0] == customEvent.Name { - eventToCustomEvent[customEvent.Name] = &customEvent - break + for _, group := range events.Events { + for _, uncoreEvent := range group.events { + for _, customEvent := range events.CustomEvents { + if uncoreEvent == customEvent.Name { + eventToCustomEvent[customEvent.Name] = &customEvent + break + } } } } @@ -251,34 +322,37 @@ func (c *uncoreCollector) Destroy() { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() - for name, pmus := range c.cpuFiles { - for pmu, cpus := range pmus { - for cpu, file := range cpus { - klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) - err := file.Close() - if err != nil { - klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + for groupIndex, groupPMUs := range c.cpuFiles { + for pmu, group := range groupPMUs { + for name, cpus := range group.cpuFiles { + for cpu, file := range cpus { + klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + } } + delete(group.cpuFiles, name) } - delete(pmus, pmu) + delete(groupPMUs, pmu) } - delete(c.cpuFiles, name) + delete(c.cpuFiles, groupIndex) } } func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error { klog.V(5).Info("Attempting to update uncore perf_event stats") - for name, pmus := range c.cpuFiles { - for pmu, cpus := range pmus { - for cpu, file := range cpus { - stat, err := readPerfUncoreStat(file, name, cpu, pmu, c.topology) + for _, groupPMUs := range c.cpuFiles { + for pmu, group := range groupPMUs { + for cpu, file := range group.cpuFiles[group.leaderName] { + stat, err := readPerfUncoreStat(file, group, cpu, pmu, c.cpuToSocket) if err != nil { - return fmt.Errorf("unable to read from uncore perf_event_file (event: %q, CPU: %d, PMU: %s): %q", name, cpu, pmu, err.Error()) + klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, pmu, err.Error()) + continue } - klog.V(5).Infof("Read uncore perf event (event: %q, CPU: %d, PMU: %s): %d", name, cpu, pmu, stat.Value) - stats.PerfUncoreStats = append(stats.PerfUncoreStats, *stat) + stats.PerfUncoreStats = append(stats.PerfUncoreStats, stat...) } } } @@ -286,96 +360,146 @@ func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error { return nil } -func (c *uncoreCollector) setupRawNonGroupedUncore(event *CustomEvent, pmus []pmu) error { - klog.V(5).Infof("Setting up non-grouped raw perf uncore event %#v", event) - - if event.Type == 0 { - // PMU isn't set. Register event for all PMUs. - for _, pmu := range pmus { - newEvent := CustomEvent{ - Type: pmu.typeOf, - Config: event.Config, - Name: event.Name, - } - config := createPerfEventAttr(newEvent) - err := c.registerUncoreEvent(config, string(newEvent.Name), pmu.cpus, pmu.name) - if err != nil { - return err - } - } - return nil - } - // Register event for the PMU. - config := createPerfEventAttr(*event) - pmu, err := getPMU(pmus, event.Type) - if err != nil { - return err +func (c *uncoreCollector) setupEvent(name string, pmus uncorePMUs, groupIndex int, leaderFileDescriptors map[string]map[uint32]int) error { + if !isLibpfmInitialized { + return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up") } - return c.registerUncoreEvent(config, string(event.Name), pmu.cpus, pmu.name) -} -func (c *uncoreCollector) setupNonGroupedUncore(name string, pmus []pmu) error { - perfEventAttr, err := getPerfEventAttr(name) + klog.V(5).Infof("Setting up uncore perf event %s", name) + + config, err := readPerfEventAttr(name) if err != nil { + C.free((unsafe.Pointer)(config)) return err } - defer C.free(unsafe.Pointer(perfEventAttr)) - - klog.V(5).Infof("Setting up non-grouped uncore perf event %s", name) // Register event for all memory controllers. for _, pmu := range pmus { - perfEventAttr.Type = pmu.typeOf - err = c.registerUncoreEvent(perfEventAttr, name, pmu.cpus, pmu.name) + config.Type = pmu.typeOf + isGroupLeader := leaderFileDescriptors[pmu.name][pmu.cpus[0]] == groupLeaderFileDescriptor + setAttributes(config, isGroupLeader) + leaderFileDescriptors[pmu.name], err = c.registerEvent(eventInfo{name, config, uncorePID, groupIndex, isGroupLeader}, pmu, leaderFileDescriptors[pmu.name]) if err != nil { return err } } + + // Clean memory allocated by C code. + C.free(unsafe.Pointer(config)) + return nil } -func (c *uncoreCollector) registerUncoreEvent(config *unix.PerfEventAttr, name string, cpus []uint32, pmu string) error { - for _, cpu := range cpus { - groupFd, pid, flags := -1, -1, 0 - fd, err := c.perfEventOpen(config, pid, int(cpu), groupFd, flags) +func (c *uncoreCollector) registerEvent(eventInfo eventInfo, pmu pmu, leaderFileDescriptors map[uint32]int) (map[uint32]int, error) { + newLeaderFileDescriptors := make(map[uint32]int) + isGroupLeader := false + for _, cpu := range pmu.cpus { + groupFd, flags := leaderFileDescriptors[cpu], 0 + fd, err := c.perfEventOpen(eventInfo.config, eventInfo.pid, int(cpu), groupFd, flags) if err != nil { - return fmt.Errorf("setting up perf event %#v failed: %q", config, err) + return nil, fmt.Errorf("setting up perf event %#v failed: %q | (pmu: %q, groupFd: %d, cpu: %d)", eventInfo.config, err, pmu, groupFd, cpu) } - perfFile := os.NewFile(uintptr(fd), name) + perfFile := os.NewFile(uintptr(fd), eventInfo.name) if perfFile == nil { - return fmt.Errorf("unable to create os.File from file descriptor %#v", fd) + return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd) } - c.addEventFile(name, pmu, int(cpu), perfFile) + c.addEventFile(eventInfo.groupIndex, eventInfo.name, pmu.name, int(cpu), perfFile) + + // If group leader, save fd for others. + if leaderFileDescriptors[cpu] == groupLeaderFileDescriptor { + newLeaderFileDescriptors[cpu] = fd + isGroupLeader = true + } + } + + if isGroupLeader { + return newLeaderFileDescriptors, nil + } else { + return leaderFileDescriptors, nil } - return nil } -func (c *uncoreCollector) addEventFile(name string, pmu string, cpu int, perfFile *os.File) { - _, ok := c.cpuFiles[name] +func (c *uncoreCollector) addEventFile(index int, name string, pmu string, cpu int, perfFile *os.File) { + _, ok := c.cpuFiles[index] + if !ok { + c.cpuFiles[index] = map[string]group{} + } + + _, ok = c.cpuFiles[index][pmu] if !ok { - c.cpuFiles[name] = map[string]map[int]readerCloser{} + c.cpuFiles[index][pmu] = group{ + cpuFiles: map[string]map[int]readerCloser{}, + leaderName: name, + } } - _, ok = c.cpuFiles[name][pmu] + _, ok = c.cpuFiles[index][pmu].cpuFiles[name] if !ok { - c.cpuFiles[name][pmu] = map[int]readerCloser{} + c.cpuFiles[index][pmu].cpuFiles[name] = map[int]readerCloser{} + } + + c.cpuFiles[index][pmu].cpuFiles[name][cpu] = perfFile + + // Check if name is already stored. + for _, have := range c.cpuFiles[index][pmu].names { + if name == have { + return + } } - c.cpuFiles[name][pmu][cpu] = perfFile + // Otherwise save it. + c.cpuFiles[index][pmu] = group{ + cpuFiles: c.cpuFiles[index][pmu].cpuFiles, + names: append(c.cpuFiles[index][pmu].names, name), + leaderName: c.cpuFiles[index][pmu].leaderName, + } } -func readPerfUncoreStat(file readerCloser, name string, cpu int, pmu string, topology []info.Node) (*info.PerfUncoreStat, error) { - value, err := getPerfValue(file, name) +func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, groupIndex int, leaderFileDescriptors map[string]map[uint32]int) error { + klog.V(5).Infof("Setting up raw perf uncore event %#v", event) + + for _, pmu := range pmus { + newEvent := CustomEvent{ + Type: pmu.typeOf, + Config: event.Config, + Name: event.Name, + } + config := createPerfEventAttr(newEvent) + isGroupLeader := leaderFileDescriptors[pmu.name][pmu.cpus[0]] == groupLeaderFileDescriptor + setAttributes(config, isGroupLeader) + var err error + leaderFileDescriptors[pmu.name], err = c.registerEvent(eventInfo{string(newEvent.Name), config, uncorePID, groupIndex, isGroupLeader}, pmu, leaderFileDescriptors[pmu.name]) + if err != nil { + return err + } + } + + return nil +} + +func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) { + values, err := getPerfValues(file, group) if err != nil { return nil, err } - stat := info.PerfUncoreStat{ - PerfValue: value, - Socket: sysinfo.GetSocketFromCPU(topology, cpu), - PMU: pmu, + + socket, ok := cpuToSocket[cpu] + if !ok { + // Socket is unknown. + socket = -1 + } + + perfUncoreStats := make([]info.PerfUncoreStat, len(values)) + for i, value := range values { + klog.V(5).Infof("Read metric for event %q for cpu %d from pmu %q: %d", value.Name, cpu, pmu, value.Value) + perfUncoreStats[i] = info.PerfUncoreStat{ + PerfValue: value, + Socket: socket, + PMU: pmu, + } } - return &stat, nil + return perfUncoreStats, nil } diff --git a/perf/uncore_libpfm_test.go b/perf/uncore_libpfm_test.go index 57cb6db482..d5b2ab8957 100644 --- a/perf/uncore_libpfm_test.go +++ b/perf/uncore_libpfm_test.go @@ -87,10 +87,11 @@ func TestUncore(t *testing.T) { } assert.Equal(t, expected, actual) - pmuSet := []pmu{ - actual["uncore_imc_0"], - actual["uncore_imc_1"], + pmuSet := uncorePMUs{ + "uncore_imc_0": actual["uncore_imc_0"], + "uncore_imc_1": actual["uncore_imc_1"], } + actualPMU, err := getPMU(pmuSet, expected["uncore_imc_0"].typeOf) assert.Nil(t, err) assert.Equal(t, expected["uncore_imc_0"], *actualPMU) @@ -106,52 +107,42 @@ func TestUncoreCollectorSetup(t *testing.T) { events := PerfEvents{ Core: Events{ - Events: [][]Event{ - {"cache-misses"}, + Events: []Group{ + {[]Event{"cache-misses"}, false}, }, }, Uncore: Events{ - Events: [][]Event{ - {"uncore_imc_0/cas_count_read"}, - {"uncore_imc/cas_count_write"}, + Events: []Group{ + {[]Event{"uncore_imc_1/cas_count_read"}, false}, + {[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, true}, }, CustomEvents: []CustomEvent{ + {19, Config{0x01, 0x02}, "uncore_imc_1/cas_count_read"}, + {0, Config{0x02, 0x03}, "uncore_imc_0/cas_count_write"}, {18, Config{0x01, 0x02}, "uncore_imc_0/cas_count_read"}, - {0, Config{0x01, 0x03}, "uncore_imc/cas_count_write"}, }, }, } collector := &uncoreCollector{} collector.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { - return 0, nil + return int(attr.Config), nil + } + collector.ioctlSetInt = func(fd int, req uint, value int) error { + return nil } err = collector.setup(events, path) // There are no errors. assert.Nil(t, err) - - // For "cas_count_write", collector has two registered PMUs, - // `uncore_imc_0 (of 18 type) and `uncore_imc_1` (of 19 type). - // Both of them has two cpus which corresponds to sockets. - assert.Equal(t, len(collector.cpuFiles["uncore_imc/cas_count_write"]["uncore_imc_0"]), 2) - assert.Equal(t, len(collector.cpuFiles["uncore_imc/cas_count_write"]["uncore_imc_1"]), 2) - - // For "cas_count_read", has only one registered PMU and it's `uncore_imc_0` (of 18 type) with two cpus which - // correspond to two sockets. - assert.Equal(t, len(collector.cpuFiles["uncore_imc_0/cas_count_read"]), 1) - assert.Equal(t, len(collector.cpuFiles["uncore_imc_0/cas_count_read"]["uncore_imc_0"]), 2) - - // For "cache-misses" it shouldn't register any PMU. - assert.Nil(t, collector.cpuFiles["cache-misses"]) } func TestParseUncoreEvents(t *testing.T) { events := PerfEvents{ Uncore: Events{ - Events: [][]Event{ - {"cas_count_read"}, - {"cas_count_write"}, + Events: []Group{ + {[]Event{"cas_count_read"}, false}, + {[]Event{"cas_count_write"}, false}, }, CustomEvents: []CustomEvent{ { @@ -175,19 +166,14 @@ func TestObtainPMUs(t *testing.T) { "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, } - expected := []pmu{ - {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, - {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, - } - actual := obtainPMUs("uncore_imc_0", got) - assert.Equal(t, []pmu{expected[0]}, actual) + assert.Equal(t, uncorePMUs{"uncore_imc_0": got["uncore_imc_0"]}, actual) actual = obtainPMUs("uncore_imc_1", got) - assert.Equal(t, []pmu{expected[1]}, actual) + assert.Equal(t, uncorePMUs{"uncore_imc_1": got["uncore_imc_1"]}, actual) actual = obtainPMUs("", got) - assert.Equal(t, []pmu(nil), actual) + assert.Equal(t, uncorePMUs{}, actual) } func TestUncoreParseEventName(t *testing.T) { @@ -204,14 +190,84 @@ func TestUncoreParseEventName(t *testing.T) { assert.Equal(t, "some_event/first_slash/second_slash", eventName) } +func TestCheckGroup(t *testing.T) { + var testCases = []struct { + group Group + eventPMUs map[Event]uncorePMUs + expectedOutput string + }{ + { + Group{[]Event{"uncore_imc/cas_count_write"}, false}, + map[Event]uncorePMUs{}, + "the event \"uncore_imc/cas_count_write\" don't have any PMU to count with", + }, + { + Group{[]Event{"uncore_imc/cas_count_write", "uncore_imc/cas_count_read"}, true}, + map[Event]uncorePMUs{"uncore_imc/cas_count_write": { + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + }, + "uncore_imc/cas_count_read": { + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + }, + }, + "the events in group usually have to be from single PMU, try reorganizing the \"[uncore_imc/cas_count_write uncore_imc/cas_count_read]\" group", + }, + { + Group{[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_1/cas_count_read"}, true}, + map[Event]uncorePMUs{"uncore_imc_0/cas_count_write": { + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + }, + "uncore_imc_1/cas_count_read": { + "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + }, + }, + "the events in group usually have to be from the same PMU, try reorganizing the \"[uncore_imc_0/cas_count_write uncore_imc_1/cas_count_read]\" group", + }, + { + Group{[]Event{"uncore_imc/cas_count_write"}, false}, + map[Event]uncorePMUs{"uncore_imc/cas_count_write": { + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + }}, + "", + }, + { + Group{[]Event{"uncore_imc_0/cas_count_write", "uncore_imc_0/cas_count_read"}, true}, + map[Event]uncorePMUs{"uncore_imc_0/cas_count_write": { + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + }, + "uncore_imc_0/cas_count_read": { + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + }}, + "", + }, + } + + for _, tc := range testCases { + err := checkGroup(tc.group, tc.eventPMUs) + if tc.expectedOutput == "" { + assert.Nil(t, err) + } else { + assert.EqualError(t, err, tc.expectedOutput) + } + } +} + func TestReadPerfUncoreStat(t *testing.T) { - file := ReadFormat{ - Value: 4, + file := GroupReadFormat{ TimeEnabled: 0, TimeRunning: 1, - ID: 0, + Nr: 1, + } + + valuesFile := Values{ + Value: 4, + ID: 0, } - expectedStat := v1.PerfUncoreStat{ + + expectedStat := []v1.PerfUncoreStat{{ PerfValue: v1.PerfValue{ ScalingRatio: 1, Value: 4, @@ -219,22 +275,23 @@ func TestReadPerfUncoreStat(t *testing.T) { }, Socket: 0, PMU: "bar", - } - topology := []v1.Node{{ - Id: 0, - HugePages: nil, - Cores: []v1.Core{{ - Id: 1, - Threads: []int{1, 2}, - SocketID: 0, - }}, }} + cpuToSocket := map[int]int{ + 1: 0, + 2: 0, + } buf := &buffer{bytes.NewBuffer([]byte{})} err := binary.Write(buf, binary.LittleEndian, file) assert.NoError(t, err) + err = binary.Write(buf, binary.LittleEndian, valuesFile) + assert.NoError(t, err) - stat, err := readPerfUncoreStat(buf, "foo", 1, "bar", topology) + stat, err := readPerfUncoreStat(buf, group{ + cpuFiles: nil, + names: []string{"foo"}, + leaderName: "foo", + }, 1, "bar", cpuToSocket) assert.NoError(t, err) - assert.Equal(t, expectedStat, *stat) + assert.Equal(t, expectedStat, stat) } diff --git a/utils/sysinfo/sysinfo.go b/utils/sysinfo/sysinfo.go index 9ef62b8580..9586492f58 100644 --- a/utils/sysinfo/sysinfo.go +++ b/utils/sysinfo/sysinfo.go @@ -377,7 +377,7 @@ func getNodeMemInfo(sysFs sysfs.SysFs, nodeDir string) (uint64, error) { return uint64(memory), nil } -// getCoresInfo retruns infromation about physical cores +// getCoresInfo returns information about physical cores func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) { cores := make([]info.Core, 0, len(cpuDirs)) for _, cpuDir := range cpuDirs { @@ -523,3 +523,14 @@ func GetSocketFromCPU(topology []info.Node, cpu int) int { } return -1 } + +// GetOnlineCPUs returns available cores. +func GetOnlineCPUs(topology []info.Node) []int { + onlineCPUs := make([]int, 0) + for _, node := range topology { + for _, core := range node.Cores { + onlineCPUs = append(onlineCPUs, core.Threads...) + } + } + return onlineCPUs +} diff --git a/utils/sysinfo/sysinfo_test.go b/utils/sysinfo/sysinfo_test.go index 25d18bc799..ba7af16207 100644 --- a/utils/sysinfo/sysinfo_test.go +++ b/utils/sysinfo/sysinfo_test.go @@ -1286,3 +1286,50 @@ func TestGetSocketFromCPU(t *testing.T) { socket = GetSocketFromCPU(topology, 8) assert.Equal(t, socket, -1) } + +func TestGetOnlineCPUs(t *testing.T) { + topology := []info.Node{ + { + Id: 0, + Memory: 0, + HugePages: nil, + Cores: []info.Core{ + { + Id: 0, + Threads: []int{0, 1}, + Caches: nil, + SocketID: 0, + }, + { + Id: 1, + Threads: []int{2, 3}, + Caches: nil, + SocketID: 0, + }, + }, + Caches: nil, + }, + { + Id: 1, + Memory: 0, + HugePages: nil, + Cores: []info.Core{ + { + Id: 0, + Threads: []int{4, 5}, + Caches: nil, + SocketID: 1, + }, + { + Id: 1, + Threads: []int{6, 7}, + Caches: nil, + SocketID: 1, + }, + }, + Caches: nil, + }, + } + onlineCPUs := GetOnlineCPUs(topology) + assert.Equal(t, onlineCPUs, []int{0, 1, 2, 3, 4, 5, 6, 7}) +}