open-telemetry · GregMefford · Jan 1, 2025 · Jan 12, 2025 · Jan 12, 2025 · Feb 16, 2025
@@ -0,0 +1,280 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package beam // import "go.opentelemetry.io/ebpf-profiler/interpreter/beam"
+
+// BEAM VM Unwinder support code
+
+// The BEAM VM is an interpreter for Erlang, as well as several other languages
+// that share the same bytecode, such as Elixir and Gleam.
+
+import (
+	"fmt"
+	"os"
+	"regexp"
+	"strconv"
+	"unsafe"
+
+	log "github.com/sirupsen/logrus"
+
+	"go.opentelemetry.io/ebpf-profiler/host"
+	"go.opentelemetry.io/ebpf-profiler/interpreter"
+	"go.opentelemetry.io/ebpf-profiler/libpf"
+	"go.opentelemetry.io/ebpf-profiler/libpf/pfelf"
+	"go.opentelemetry.io/ebpf-profiler/lpm"
+	"go.opentelemetry.io/ebpf-profiler/process"
+	"go.opentelemetry.io/ebpf-profiler/remotememory"
+	"go.opentelemetry.io/ebpf-profiler/reporter"
+	"go.opentelemetry.io/ebpf-profiler/support"
+)
+
+// #include "../../support/ebpf/types.h"
+// #include "../../support/ebpf/v8_tracer.h"
+import "C"
+
+var (
+	// regex for matching the process name
+	beamRegex                        = regexp.MustCompile(`beam.smp`)
+	_           interpreter.Data     = &beamData{}
+	_           interpreter.Instance = &beamInstance{}
+	BogusFileID                      = libpf.NewFileID(0xf00d, 0x1001)
+)
+
+type beamData struct {
+	version uint32
+}
+
+type beamInstance struct {
+	interpreter.InstanceStubs
+
+	data *beamData
+	rm   remotememory.RemoteMemory
+	// mappings is indexed by the Mapping to its generation
+	mappings map[process.Mapping]*uint32
+	// prefixes is indexed by the prefix added to ebpf maps (to be cleaned up) to its generation
+	prefixes map[lpm.Prefix]*uint32
+	// mappingGeneration is the current generation (so old entries can be pruned)
+	mappingGeneration uint32
+}
+
+func readSymbolValue(ef *pfelf.File, name libpf.SymbolName) ([]byte, error) {
+	sym, err := ef.LookupSymbol(name)
+	if err != nil {
+		return nil, fmt.Errorf("symbol not found: %v", err)
+	}
+
+	memory := make([]byte, sym.Size)
+	if _, err := ef.ReadVirtualMemory(memory, int64(sym.Address)); err != nil {
+		return nil, fmt.Errorf("failed to read process memory at 0x%x:%v", sym.Address, err)
+	}
+
+	log.Infof("read symbol value %s: %s", sym.Name, memory)
+	return memory, nil
+}
+
+func readReleaseVersion(ef *pfelf.File) (uint32, []byte, error) {
+	otpRelease, err := readSymbolValue(ef, "etp_otp_release")
+	if err != nil {
+		return 0, nil, fmt.Errorf("failed to read OTP release: %v", err)
+	}
+
+	// Slice off the null termination before converting
+	otpMajor, err := strconv.Atoi(string(otpRelease[:len(otpRelease)-1]))
+	if err != nil {
+		return 0, nil, fmt.Errorf("failed to parse OTP version: %v", err)
+	}
+
+	ertsVersion, err := readSymbolValue(ef, "etp_erts_version")
+	if err != nil {
+		return 0, nil, fmt.Errorf("failed to read erts version: %v", err)
+	}
+
+	return uint32(otpMajor), ertsVersion, nil
+}
+
+func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) {
+	matches := beamRegex.FindStringSubmatch(info.FileName())
+	if matches == nil {
+		return nil, nil
+	}
+	log.Infof("BEAM interpreter found: %v", matches)
+
+	ef, err := info.GetELF()
+	if err != nil {
+		return nil, err
+	}
+
+	otpVersion, _, err := readReleaseVersion(ef)
+	if err != nil {
+		return nil, err
+	}
+
+	symbolName := libpf.SymbolName("process_main")
+	interpRanges, err := info.GetSymbolAsRanges(symbolName)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = ebpf.UpdateInterpreterOffsets(support.ProgUnwindBEAM, info.FileID(), interpRanges); err != nil {
+		return nil, err
+	}
+
+	d := &beamData{
+		version: otpVersion,
+	}
+
+	log.Infof("BEAM loaded, otpVersion: %d, interpRanges: %v", otpVersion, interpRanges)
+
+	return d, nil
+}
+
+func (d *beamData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libpf.Address, rm remotememory.RemoteMemory) (interpreter.Instance, error) {
+	log.Infof("BEAM interpreter attaching")
+
+	data := C.BEAMProcInfo{
+		version: C.uint(d.version),
+	}
+	if err := ebpf.UpdateProcData(libpf.BEAM, pid, unsafe.Pointer(&data)); err != nil {
+		return nil, err
+	}
+
+	return &beamInstance{
+		data:     d,
+		rm:       rm,
+		mappings: make(map[process.Mapping]*uint32),
+		prefixes: make(map[lpm.Prefix]*uint32),
+	}, nil
+}
+
+func (i *beamInstance) SynchronizeMappings(ebpf interpreter.EbpfHandler,
+	_ reporter.SymbolReporter, pr process.Process, mappings []process.Mapping) error {
+	pid := pr.PID()
+	i.mappingGeneration++
+	for idx := range mappings {
+		m := &mappings[idx]
+		if !m.IsExecutable() || !m.IsAnonymous() {
+			continue
+		}
+
+		if _, exists := i.mappings[*m]; exists {
+			*i.mappings[*m] = i.mappingGeneration
+			continue
+		}
+
+		// Generate a new uint32 pointer which is shared for mapping and the prefixes it owns
+		// so updating the mapping above will reflect to prefixes also.
+		mappingGeneration := i.mappingGeneration
+		i.mappings[*m] = &mappingGeneration
+
+		// Just assume all anonymous and executable mappings are BEAM for now
+		log.Infof("Enabling BEAM for %#x/%#x", m.Vaddr, m.Length)
+
+		prefixes, err := lpm.CalculatePrefixList(m.Vaddr, m.Vaddr+m.Length)
+		if err != nil {
+			return fmt.Errorf("new anonymous mapping lpm failure %#x/%#x", m.Vaddr, m.Length)
+		}
+
+		for _, prefix := range prefixes {
+			_, exists := i.prefixes[prefix]
+			if !exists {
+				err := ebpf.UpdatePidInterpreterMapping(pid, prefix, support.ProgUnwindBEAM, 0, 0)
+				if err != nil {
+					return err
+				}
+			}
+			i.prefixes[prefix] = &mappingGeneration
+		}
+	}
+
+	// Remove prefixes not seen
+	for prefix, generationPtr := range i.prefixes {
+		if *generationPtr == i.mappingGeneration {
+			continue
+		}
+		log.Infof("Delete BEAM prefix %#v", prefix)
+		_ = ebpf.DeletePidInterpreterMapping(pid, prefix)
+		delete(i.prefixes, prefix)
+	}
+	for m, generationPtr := range i.mappings {
+		if *generationPtr == i.mappingGeneration {
+			continue
+		}
+		log.Infof("Disabling BEAM for %#x/%#x", m.Vaddr, m.Length)
+		delete(i.mappings, m)
+	}
+
+	return nil
+}
+
+func (i *beamInstance) SynchronizeMappingsFromJITDump(ebpf interpreter.EbpfHandler,
+	_ reporter.SymbolReporter, pr process.Process, mappings []process.Mapping) error {
+	pid := pr.PID()
+	file, err := os.Open(fmt.Sprintf("/tmp/jit-%d.dump", uint32(pid)))
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	header, err := ReadJITDumpHeader(file)
+	if err != nil {
+		return err
+	}
+	log.Infof("Parsed header: %v", *header)
+
+	for recordHeader, err := ReadJITDumpRecordHeader(file); err == nil; recordHeader, err = ReadJITDumpRecordHeader(file) {
+		switch recordHeader.ID {
+		case JITCodeLoad:
+			record, name, err := ReadJITDumpRecordCodeLoad(file, recordHeader)
+			if err != nil {
+				return err
+			}
+
+			log.Infof("JITDump Code Load %s @ 0x%x (%d bytes)", name, record.CodeAddr, record.CodeSize)
+
+			prefixes, err := lpm.CalculatePrefixList(record.CodeAddr, record.CodeAddr+record.CodeSize)
+			if err != nil {
+				return fmt.Errorf("lpm failure %#x/%#x", record.CodeAddr, record.CodeSize)
+			}
+
+			for _, prefix := range prefixes {
+				// TODO: Include FileID
+				err := ebpf.UpdatePidInterpreterMapping(pid, prefix, support.ProgUnwindBEAM, 0, 0)
+				if err != nil {
+					return err
+				}
+			}
+
+			// TODO: remove mappings that have been moved/unloaded
+
+		default:
+			log.Warnf("Ignoring JITDump record type %d", recordHeader.ID)
+			SkipJITDumpRecord(file, recordHeader)
+		}
+	}
+
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (i *beamInstance) Detach(interpreter.EbpfHandler, libpf.PID) error {
+	log.Infof("BEAM interpreter detaching")
+	return nil
+}
+
+func (i *beamInstance) Symbolize(symbolReporter reporter.SymbolReporter, frame *host.Frame, trace *libpf.Trace) error {
+	if !frame.Type.IsInterpType(libpf.BEAM) {
+		log.Warnf("BEAM failed to symbolize")
+		return interpreter.ErrMismatchInterpreterType
+	}
+	log.Infof("BEAM symbolizing %v", frame)
+	frameID := libpf.NewFrameID(BogusFileID, frame.Lineno)
+	symbolReporter.FrameMetadata(&reporter.FrameMetadataArgs{
+		FrameID:      frameID,
+		FunctionName: "Some Bogus Name",
+	})
+	trace.AppendFrameID(libpf.BEAMFrame, frameID)
+	return nil
+}
@@ -0,0 +1,113 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package beam // import "go.opentelemetry.io/ebpf-profiler/interpreter/beam"
+
+// Minimal JITDUMP file reader for BEAM
+
+// This has the minimal code we need to read the JITDUMP files that the BEAM
+// writes to `/tmp/jit-<pid>.dump`. It isn't BEAM-specific, so it could probably
+// be used more generally. The spec for this file format is at:
+// https://raw.githubusercontent.com/torvalds/linux/refs/heads/master/tools/perf/Documentation/jitdump-specification.txt
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+)
+
+type JITDumpHeader struct {
+	Magic     uint32 // the ASCII string "JiTD", written is as 0x4A695444. The reader will detect an endian mismatch when it reads 0x4454694a
+	Version   uint32 // a 4-byte value representing the format version. It is currently set to 1
+	TotalSize uint32 // size in bytes of file header
+	ELFMach   uint32 // ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
+	Pad1      uint32 // padding. Reserved for future use
+	Pid       uint32 // JIT runtime process identification (OS specific)
+	Timestamp uint64 // timestamp of when the file was created
+	Flags     uint64 // a bitmask of flags
+}
+
+type JITDumpRecordHeader struct {
+	ID        uint32 // a value identifying the record type (e.g. beam.JITCodeLoad)
+	TotalSize uint32 // the size in bytes of the record including this header
+	Timestamp uint64 // a timestamp of when the record was created
+}
+
+const (
+	JITCodeLoad          = 0 // record describing a jitted function
+	JITCodeMove          = 1 // record describing an already jitted function which is moved
+	JITCodeDebugInfo     = 2 // record describing the debug information for a jitted function
+	JITCodeClose         = 3 // record marking the end of the jit runtime (optional)
+	JITCodeUnwindingInfo = 4 // record describing a function unwinding information
+)
+
+type JITDumpRecordCodeLoad struct {
+	PID       uint32 // OS process id of the runtime generating the jitted code
+	TID       uint32 // OS thread identification of the runtime thread generating the jitted code
+	VMA       uint64 // virtual address of jitted code start
+	CodeAddr  uint64 // code start address for the jitted code. By default vma = code_addr
+	CodeSize  uint64 // size in bytes of the generated jitted code
+	CodeIndex uint64 // unique identifier for the jitted code
+}
+
+func ReadJITDumpHeader(file io.ReadSeeker) (*JITDumpHeader, error) {
+	header := JITDumpHeader{}
+	err := binary.Read(file, binary.LittleEndian, &header)
+	if err != nil {
+		return nil, err
+	}
+
+	if header.Magic != 0x4A695444 {
+		return nil, fmt.Errorf("File malformed, or maybe wrong endianness. Found magic number: %x", header.Magic)
+	}
+
+	return &header, nil
+}
+
+func ReadJITDumpRecordHeader(file io.ReadSeeker) (*JITDumpRecordHeader, error) {
+	header := JITDumpRecordHeader{}
+	err := binary.Read(file, binary.LittleEndian, &header)
+	if err != nil {
+		return nil, err
+	}
+	return &header, nil
+}
+
+func ReadJITDumpRecordCodeLoad(file io.ReadSeeker, header *JITDumpRecordHeader) (*JITDumpRecordCodeLoad, string, error) {
+	record := JITDumpRecordCodeLoad{}
+	err := binary.Read(file, binary.LittleEndian, &record)
+	if err != nil {
+		return nil, "", err
+	}
+
+	recordHeaderSize := uint32(16)
+	codeLoadRecordHeaderSize := uint32(40)
+	nameSize := header.TotalSize - uint32(record.CodeSize) - recordHeaderSize - codeLoadRecordHeaderSize
+	name := make([]byte, nameSize)
+	err = binary.Read(file, binary.LittleEndian, &name)
+	if err != nil {
+		return nil, "", err
+	}
+
+	if name[nameSize-1] != '\x00' {
+		return nil, "", fmt.Errorf("Expected null terminated string, found %c", name[nameSize-1])
+	}
+
+	// Skip over the actual native code because we don't need it but we
+	// probably do want to read the next record.
+	_, err = file.Seek(int64(record.CodeSize), io.SeekCurrent)
+	if err != nil {
+		return nil, "", err
+	}
+
+	return &record, string(name), nil
+}
+
+func SkipJITDumpRecord(file io.ReadSeeker, header *JITDumpRecordHeader) error {
+	recordHeaderSize := uint64(16)
+	_, err := file.Seek(int64(header.TotalSize)-int64(recordHeaderSize), io.SeekCurrent)
+	if err != nil {
+		return err
+	}
+	return nil
+}