From b01925651579018cc39e3f001d5b4f48fa76a73c Mon Sep 17 00:00:00 2001
From: Dr-Noob <peibolms@gmail.com>
Date: Fri, 5 Jul 2024 08:37:54 +0100
Subject: [PATCH] [v1.05] Continue merging measure-freq #220

- [v1.05][X86] Show SSE if AVX/FMA is not supported
- [v1.05][X86] Do not stop if cach is NULL and check for non-NULL cache in get_topology_info functions
- [v1.05][X86] Fix bug where the number of cpus were not set if NULL was returned inside the loop. Ensure topo is not NULL in get_peak_performance. Fallback to UNKNOWN_DATA when we have no information about topology
---
 src/common/printer.c | 17 +++++++++++++++--
 src/x86/apic.c       |  5 +++++
 src/x86/cpuid.c      | 37 +++++++++++++++++++++----------------
 3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/src/common/printer.c b/src/common/printer.c
index 1a43e5fd..e02668fc 100644
--- a/src/common/printer.c
+++ b/src/common/printer.c
@@ -61,6 +61,7 @@ enum {
   ATTRIBUTE_NCORES,
   ATTRIBUTE_NCORES_DUAL,
 #ifdef ARCH_X86
+  ATTRIBUTE_SSE,
   ATTRIBUTE_AVX,
   ATTRIBUTE_FMA,
 #elif ARCH_PPC
@@ -96,6 +97,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
   "Cores:",
   "Cores (Total):",
 #ifdef ARCH_X86
+  "SSE:",
   "AVX:",
   "FMA:",
 #elif ARCH_PPC
@@ -131,6 +133,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
   "Cores:",
   "Cores (Total):",
 #ifdef ARCH_X86
+  "SSE:",
   "AVX:",
   "FMA:",
 #elif ARCH_PPC
@@ -591,6 +594,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
   for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
     char* max_frequency = get_str_freq(ptr->freq);
     char* avx = get_str_avx(ptr);
+    char* sse = get_str_sse(ptr);
     char* fma = get_str_fma(ptr);
     char* cpu_num = emalloc(sizeof(char) * 9);
 
@@ -625,8 +629,17 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
         setAttribute(art, ATTRIBUTE_NCORES, n_cores);
       }
     }
-    setAttribute(art, ATTRIBUTE_AVX, avx);
-    setAttribute(art, ATTRIBUTE_FMA, fma);
+
+    // Show the most modern vector instructions.
+    // If AVX is supported show it, otherwise show SSE
+    if (strcmp(avx, "No") == 0) {
+      setAttribute(art, ATTRIBUTE_SSE, sse);
+    }
+    else {
+      setAttribute(art, ATTRIBUTE_AVX, avx);
+      setAttribute(art, ATTRIBUTE_FMA, fma);
+    }
+
     if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
     if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
     if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
diff --git a/src/x86/apic.c b/src/x86/apic.c
index 2892468c..98dd1e4f 100644
--- a/src/x86/apic.c
+++ b/src/x86/apic.c
@@ -369,6 +369,11 @@ bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
 }
 
 bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
+  if (topo->cach == NULL) {
+    printWarn("get_topology_from_apic: cach is NULL");
+    return false;
+  }
+
   uint32_t apic_id;
   uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
   uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c
index 5f93afe8..a937cfa4 100644
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
@@ -218,7 +218,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
   #endif
 
     //First, check we have consistent data
-    if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
+    if(freq == UNKNOWN_DATA || topo == NULL || topo->logical_cores == UNKNOWN_DATA) {
       return -1;
     }
 
@@ -451,7 +451,7 @@ struct cpuInfo* get_cpu_info(void) {
   cpu->cach = NULL;
   cpu->feat = NULL;
 
-  uint32_t modules = 1;
+  cpu->num_cpus = 1;
   uint32_t eax = 0;
   uint32_t ebx = 0;
   uint32_t ecx = 0;
@@ -507,12 +507,12 @@ struct cpuInfo* get_cpu_info(void) {
     cpu->hybrid_flag = (edx >> 15) & 0x1;
   }
 
-  if(cpu->hybrid_flag) modules = 2;
+  if(cpu->hybrid_flag) cpu->num_cpus = 2;
 
   struct cpuInfo* ptr = cpu;
-  for(uint32_t i=0; i < modules; i++) {
+  for(uint32_t i=0; i < cpu->num_cpus; i++) {
     int32_t first_core;
-    set_cpu_module(i, modules, &first_core);
+    set_cpu_module(i, cpu->num_cpus, &first_core);
 
     if(i > 0) {
       ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
@@ -547,11 +547,7 @@ struct cpuInfo* get_cpu_info(void) {
       cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch);
     }
 
-    // If any field of the struct is NULL,
-    // return early, as next functions
-    // require non NULL fields in cach and topo
     ptr->cach = get_cache_info(ptr);
-    if(ptr->cach == NULL) return cpu;
 
     if(cpu->hybrid_flag) {
       ptr->topo = get_topology_info(ptr, ptr->cach, i);
@@ -559,16 +555,23 @@ struct cpuInfo* get_cpu_info(void) {
     else {
       ptr->topo = get_topology_info(ptr, ptr->cach, -1);
     }
-    if(cpu->topo == NULL) return cpu;
+
+    // If topo is NULL, return early, as get_peak_performance
+    // requries non-NULL topology.
+    if(ptr->topo == NULL) return cpu;
   }
 
-  cpu->num_cpus = modules;
   cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
 
   return cpu;
 }
 
 bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
+  if (topo->cach == NULL) {
+    printWarn("get_cache_topology_amd: cach is NULL");
+    return false;
+  }
+
   if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) {
     uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;
 
@@ -644,10 +647,12 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
 
 #ifdef __linux__
 void get_topology_from_udev(struct topology* topo) {
-  // TODO: To be improved in the future
   topo->total_cores = get_ncores_from_cpuinfo();
-  topo->logical_cores = topo->total_cores;
-  topo->physical_cores = topo->total_cores;
+  // TODO: To be improved in the future
+  // Conservative setting as we only know the total
+  // number of cores.
+  topo->logical_cores = UNKNOWN_DATA;
+  topo->physical_cores = UNKNOWN_DATA;
   topo->smt_available = 1;
   topo->smt_supported = 1;
   topo->sockets = 1;
@@ -711,8 +716,8 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int
       }
       else {
         printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
-        topo->physical_cores = 1;
-        topo->logical_cores = 1;
+        topo->physical_cores = UNKNOWN_DATA;
+        topo->logical_cores = UNKNOWN_DATA;
         topo->smt_available = 1;
         topo->smt_supported = 1;
       }