From 7111fd28b7ee799fbb897126b1ef771726d8fdad Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Mon, 25 Mar 2024 23:06:08 -0500 Subject: [PATCH 1/3] rasdaemon: ras-mc-ctl: Add support to display mcastatus_msg string Currently, the mcastatus_msg string of struct mce_event is added to the SQLite database by the rasdaemon when it is recording errors. The same however, is not outputted by the ras-mc-ctl utility. The string provides important error information relating to the received MCE. For example, on AMD SMCA systems, the string outputs extended error code and description. As such, the string should be present in the output of ras-mc-ctl utility. Add support to output the string through the ras-mc-ctl utility. Signed-off-by: Avadhut Naik --- util/ras-mc-ctl.in | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index 227a77d..502f2a5 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1329,7 +1329,7 @@ sub errors { require DBI; my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); - my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); + my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); my ($bus_name, $dev_name, $driver_name, $reporter_name); my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); @@ -1497,10 +1497,10 @@ sub errors # MCE mce_record errors if ($has_mce == 1) { - $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; + $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); - $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); + $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); $out = ""; while($query_handle->fetch()) { $out .= "$id $time error: $msg"; @@ -1508,6 +1508,7 @@ sub errors $out .= ", bank $bank_name" if ($bank_name); $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); + $out .= ", mca $mcastatus_msg" if ($mcastatus_msg); $out .= ", $mc_location" if ($mc_location); $out .= ", $user_action" if ($user_action); $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); From a83def7887d735fcce1101bd36bde124a2968ed2 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Mon, 1 Apr 2024 23:33:07 -0500 Subject: [PATCH 2/3] rasdaemon: Add support to parse the PPIN field of mce tracepoint Support for exporting the PPIN (Protected Processor Inventory Number) is being added to the mce_record tracepoint. Add the required, corresponding support in the rasdaemon for the field to be parsed and logged or added to the database and viewed later through ras-mc-ctl utility. Signed-off-by: Avadhut Naik --- ras-mce-handler.c | 7 +++++++ ras-mce-handler.h | 1 + ras-record.c | 42 ++++++++++++++++++++++-------------------- util/ras-mc-ctl.in | 7 ++++--- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/ras-mce-handler.c b/ras-mce-handler.c index 93568e3..c9e8687 100644 --- a/ras-mce-handler.c +++ b/ras-mce-handler.c @@ -372,6 +372,9 @@ static void report_mce_event(struct ras_events *ras, trace_seq_printf(s, ", apicid= %x", e->apicid); + if (e->ppin) + trace_seq_printf(s, ", ppin= %llx", (long long)e->ppin); + if (!e->vdata_len) return; @@ -566,6 +569,10 @@ int ras_mce_event_handler(struct trace_seq *s, return -1; e.ipid = val; + /* Get PPIN */ + if (!tep_get_field_val(s, event, "ppin", record, &val, 1)) + e.ppin = val; + /* Get Vendor-specfic Data, if any */ e.vdata = tep_get_field_raw(s, event, "v_data", record, &e.vdata_len, 1); diff --git a/ras-mce-handler.h b/ras-mce-handler.h index 976fb4f..c8f975e 100644 --- a/ras-mce-handler.h +++ b/ras-mce-handler.h @@ -75,6 +75,7 @@ struct mce_event { uint8_t cpuvendor; uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */ uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + uint64_t ppin; int32_t vdata_len; const uint64_t *vdata; diff --git a/ras-record.c b/ras-record.c index f3ffafb..327d6ba 100644 --- a/ras-record.c +++ b/ras-record.c @@ -330,19 +330,20 @@ static const struct db_fields mce_record_fields[] = { { .name = "ip", .type = "INTEGER" }, { .name = "tsc", .type = "INTEGER" }, { .name = "walltime", .type = "INTEGER" }, - { .name = "cpu", .type = "INTEGER" }, // 10 + { .name = "ppin", .type = "INTEGER" }, // 10 + { .name = "cpu", .type = "INTEGER" }, { .name = "cpuid", .type = "INTEGER" }, { .name = "apicid", .type = "INTEGER" }, { .name = "socketid", .type = "INTEGER" }, - { .name = "cs", .type = "INTEGER" }, - { .name = "bank", .type = "INTEGER" }, //15 + { .name = "cs", .type = "INTEGER" }, // 15 + { .name = "bank", .type = "INTEGER" }, { .name = "cpuvendor", .type = "INTEGER" }, /* Parsed data - will likely change */ { .name = "bank_name", .type = "TEXT" }, { .name = "error_msg", .type = "TEXT" }, - { .name = "mcgstatus_msg", .type = "TEXT" }, - { .name = "mcistatus_msg", .type = "TEXT" }, // 20 + { .name = "mcgstatus_msg", .type = "TEXT" },// 20 + { .name = "mcistatus_msg", .type = "TEXT" }, { .name = "mcastatus_msg", .type = "TEXT" }, { .name = "user_action", .type = "TEXT" }, { .name = "mc_location", .type = "TEXT" }, @@ -372,21 +373,22 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) sqlite3_bind_int64(priv->stmt_mce_record, 7, ev->ip); sqlite3_bind_int64(priv->stmt_mce_record, 8, ev->tsc); sqlite3_bind_int64(priv->stmt_mce_record, 9, ev->walltime); - sqlite3_bind_int (priv->stmt_mce_record, 10, ev->cpu); - sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpuid); - sqlite3_bind_int (priv->stmt_mce_record, 12, ev->apicid); - sqlite3_bind_int (priv->stmt_mce_record, 13, ev->socketid); - sqlite3_bind_int (priv->stmt_mce_record, 14, ev->cs); - sqlite3_bind_int (priv->stmt_mce_record, 15, ev->bank); - sqlite3_bind_int (priv->stmt_mce_record, 16, ev->cpuvendor); - - sqlite3_bind_text(priv->stmt_mce_record, 17, ev->bank_name, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 18, ev->error_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 19, ev->mcgstatus_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcistatus_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcastatus_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 22, ev->user_action, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 23, ev->mc_location, -1, NULL); + sqlite3_bind_int64(priv->stmt_mce_record, 10, ev->ppin); + sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpu); + sqlite3_bind_int (priv->stmt_mce_record, 12, ev->cpuid); + sqlite3_bind_int (priv->stmt_mce_record, 13, ev->apicid); + sqlite3_bind_int (priv->stmt_mce_record, 14, ev->socketid); + sqlite3_bind_int (priv->stmt_mce_record, 15, ev->cs); + sqlite3_bind_int (priv->stmt_mce_record, 16, ev->bank); + sqlite3_bind_int (priv->stmt_mce_record, 17, ev->cpuvendor); + + sqlite3_bind_text(priv->stmt_mce_record, 18, ev->bank_name, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 19, ev->error_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcgstatus_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcistatus_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 22, ev->mcastatus_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 23, ev->user_action, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 24, ev->mc_location, -1, NULL); rc = sqlite3_step(priv->stmt_mce_record); if (rc != SQLITE_OK && rc != SQLITE_DONE) diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index 502f2a5..2fda1c6 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1329,7 +1329,7 @@ sub errors { require DBI; my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); - my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); + my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); my ($bus_name, $dev_name, $driver_name, $reporter_name); my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); @@ -1497,10 +1497,10 @@ sub errors # MCE mce_record errors if ($has_mce == 1) { - $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; + $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, ppin, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); - $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); + $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); $out = ""; while($query_handle->fetch()) { $out .= "$id $time error: $msg"; @@ -1519,6 +1519,7 @@ sub errors $out .= sprintf ", ip=0x%08x", $ip if ($ip); $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); + $out .= sprintf ", ppin=0x%08x", $ppin if ($ppin); $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); From 6d3bf618cf6822167db6851e17bfc39d9a400b02 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Tue, 2 Apr 2024 00:07:38 -0500 Subject: [PATCH 3/3] rasdaemon: Add support to parse microcode field of mce tracepoint Support for exporting the Microcode Revision is being added to the mce_record tracepoint. Add the required, corresponding support in the rasdaemon for the field to be parsed and logged or added to the database and viewed later through ras-mc-ctl utility. Signed-off-by: Avadhut Naik --- ras-mce-handler.c | 7 +++++++ ras-mce-handler.h | 1 + ras-record.c | 22 ++++++++++++---------- util/ras-mc-ctl.in | 7 ++++--- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/ras-mce-handler.c b/ras-mce-handler.c index c9e8687..0f09132 100644 --- a/ras-mce-handler.c +++ b/ras-mce-handler.c @@ -375,6 +375,9 @@ static void report_mce_event(struct ras_events *ras, if (e->ppin) trace_seq_printf(s, ", ppin= %llx", (long long)e->ppin); + if (e->microcode) + trace_seq_printf(s, ", microcode= %x", e->microcode); + if (!e->vdata_len) return; @@ -573,6 +576,10 @@ int ras_mce_event_handler(struct trace_seq *s, if (!tep_get_field_val(s, event, "ppin", record, &val, 1)) e.ppin = val; + /* Get Microcode Revision */ + if (!tep_get_field_val(s, event, "microcode", record, &val, 1)) + e.microcode = val; + /* Get Vendor-specfic Data, if any */ e.vdata = tep_get_field_raw(s, event, "v_data", record, &e.vdata_len, 1); diff --git a/ras-mce-handler.h b/ras-mce-handler.h index c8f975e..694bd41 100644 --- a/ras-mce-handler.h +++ b/ras-mce-handler.h @@ -76,6 +76,7 @@ struct mce_event { uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */ uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ uint64_t ppin; + uint32_t microcode; int32_t vdata_len; const uint64_t *vdata; diff --git a/ras-record.c b/ras-record.c index 327d6ba..56c25cb 100644 --- a/ras-record.c +++ b/ras-record.c @@ -338,11 +338,12 @@ static const struct db_fields mce_record_fields[] = { { .name = "cs", .type = "INTEGER" }, // 15 { .name = "bank", .type = "INTEGER" }, { .name = "cpuvendor", .type = "INTEGER" }, + { .name = "microcode", .type = "INTEGER" }, /* Parsed data - will likely change */ { .name = "bank_name", .type = "TEXT" }, - { .name = "error_msg", .type = "TEXT" }, - { .name = "mcgstatus_msg", .type = "TEXT" },// 20 + { .name = "error_msg", .type = "TEXT" }, // 20 + { .name = "mcgstatus_msg", .type = "TEXT" }, { .name = "mcistatus_msg", .type = "TEXT" }, { .name = "mcastatus_msg", .type = "TEXT" }, { .name = "user_action", .type = "TEXT" }, @@ -381,14 +382,15 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) sqlite3_bind_int (priv->stmt_mce_record, 15, ev->cs); sqlite3_bind_int (priv->stmt_mce_record, 16, ev->bank); sqlite3_bind_int (priv->stmt_mce_record, 17, ev->cpuvendor); - - sqlite3_bind_text(priv->stmt_mce_record, 18, ev->bank_name, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 19, ev->error_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcgstatus_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcistatus_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 22, ev->mcastatus_msg, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 23, ev->user_action, -1, NULL); - sqlite3_bind_text(priv->stmt_mce_record, 24, ev->mc_location, -1, NULL); + sqlite3_bind_int (priv->stmt_mce_record, 18, ev->microcode); + + sqlite3_bind_text(priv->stmt_mce_record, 19, ev->bank_name, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 20, ev->error_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcgstatus_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 22, ev->mcistatus_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 23, ev->mcastatus_msg, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 24, ev->user_action, -1, NULL); + sqlite3_bind_text(priv->stmt_mce_record, 25, ev->mc_location, -1, NULL); rc = sqlite3_step(priv->stmt_mce_record); if (rc != SQLITE_OK && rc != SQLITE_DONE) diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index 2fda1c6..5c9558b 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1329,7 +1329,7 @@ sub errors { require DBI; my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); - my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); + my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $microcode, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); my ($bus_name, $dev_name, $driver_name, $reporter_name); my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); @@ -1497,10 +1497,10 @@ sub errors # MCE mce_record errors if ($has_mce == 1) { - $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, ppin, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; + $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, ppin, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, microcode, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); - $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); + $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $ppin, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $microcode, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); $out = ""; while($query_handle->fetch()) { $out .= "$id $time error: $msg"; @@ -1526,6 +1526,7 @@ sub errors $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); $out .= sprintf ", cs=0x%08x", $cs if ($cs); $out .= sprintf ", bank=0x%08x", $bank if ($bank); + $out .= sprintf ", microcode=0x%08x", $microcode if ($microcode); $out .= "\n"; }