From b01925651579018cc39e3f001d5b4f48fa76a73c Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Fri, 5 Jul 2024 08:37:54 +0100 Subject: [PATCH] [v1.05] Continue merging measure-freq #220 - [v1.05][X86] Show SSE if AVX/FMA is not supported - [v1.05][X86] Do not stop if cach is NULL and check for non-NULL cache in get_topology_info functions - [v1.05][X86] Fix bug where the number of cpus were not set if NULL was returned inside the loop. Ensure topo is not NULL in get_peak_performance. Fallback to UNKNOWN_DATA when we have no information about topology --- src/common/printer.c | 17 +++++++++++++++-- src/x86/apic.c | 5 +++++ src/x86/cpuid.c | 37 +++++++++++++++++++++---------------- 3 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/common/printer.c b/src/common/printer.c index 1a43e5fd..e02668fc 100644 --- a/src/common/printer.c +++ b/src/common/printer.c @@ -61,6 +61,7 @@ enum { ATTRIBUTE_NCORES, ATTRIBUTE_NCORES_DUAL, #ifdef ARCH_X86 + ATTRIBUTE_SSE, ATTRIBUTE_AVX, ATTRIBUTE_FMA, #elif ARCH_PPC @@ -96,6 +97,7 @@ static const char* ATTRIBUTE_FIELDS [] = { "Cores:", "Cores (Total):", #ifdef ARCH_X86 + "SSE:", "AVX:", "FMA:", #elif ARCH_PPC @@ -131,6 +133,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = { "Cores:", "Cores (Total):", #ifdef ARCH_X86 + "SSE:", "AVX:", "FMA:", #elif ARCH_PPC @@ -591,6 +594,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { char* max_frequency = get_str_freq(ptr->freq); char* avx = get_str_avx(ptr); + char* sse = get_str_sse(ptr); char* fma = get_str_fma(ptr); char* cpu_num = emalloc(sizeof(char) * 9); @@ -625,8 +629,17 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct setAttribute(art, ATTRIBUTE_NCORES, n_cores); } } - setAttribute(art, ATTRIBUTE_AVX, avx); - setAttribute(art, ATTRIBUTE_FMA, fma); + + // Show the most modern vector instructions. + // If AVX is supported show it, otherwise show SSE + if (strcmp(avx, "No") == 0) { + setAttribute(art, ATTRIBUTE_SSE, sse); + } + else { + setAttribute(art, ATTRIBUTE_AVX, avx); + setAttribute(art, ATTRIBUTE_FMA, fma); + } + if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i); if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d); if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2); diff --git a/src/x86/apic.c b/src/x86/apic.c index 2892468c..98dd1e4f 100644 --- a/src/x86/apic.c +++ b/src/x86/apic.c @@ -369,6 +369,11 @@ bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) { } bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { + if (topo->cach == NULL) { + printWarn("get_topology_from_apic: cach is NULL"); + return false; + } + uint32_t apic_id; uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module); uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module); diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index 5f93afe8..a937cfa4 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -218,7 +218,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) { #endif //First, check we have consistent data - if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { + if(freq == UNKNOWN_DATA || topo == NULL || topo->logical_cores == UNKNOWN_DATA) { return -1; } @@ -451,7 +451,7 @@ struct cpuInfo* get_cpu_info(void) { cpu->cach = NULL; cpu->feat = NULL; - uint32_t modules = 1; + cpu->num_cpus = 1; uint32_t eax = 0; uint32_t ebx = 0; uint32_t ecx = 0; @@ -507,12 +507,12 @@ struct cpuInfo* get_cpu_info(void) { cpu->hybrid_flag = (edx >> 15) & 0x1; } - if(cpu->hybrid_flag) modules = 2; + if(cpu->hybrid_flag) cpu->num_cpus = 2; struct cpuInfo* ptr = cpu; - for(uint32_t i=0; i < modules; i++) { + for(uint32_t i=0; i < cpu->num_cpus; i++) { int32_t first_core; - set_cpu_module(i, modules, &first_core); + set_cpu_module(i, cpu->num_cpus, &first_core); if(i > 0) { ptr->next_cpu = emalloc(sizeof(struct cpuInfo)); @@ -547,11 +547,7 @@ struct cpuInfo* get_cpu_info(void) { cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch); } - // If any field of the struct is NULL, - // return early, as next functions - // require non NULL fields in cach and topo ptr->cach = get_cache_info(ptr); - if(ptr->cach == NULL) return cpu; if(cpu->hybrid_flag) { ptr->topo = get_topology_info(ptr, ptr->cach, i); @@ -559,16 +555,23 @@ struct cpuInfo* get_cpu_info(void) { else { ptr->topo = get_topology_info(ptr, ptr->cach, -1); } - if(cpu->topo == NULL) return cpu; + + // If topo is NULL, return early, as get_peak_performance + // requries non-NULL topology. + if(ptr->topo == NULL) return cpu; } - cpu->num_cpus = modules; cpu->peak_performance = get_peak_performance(cpu, accurate_pp()); return cpu; } bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) { + if (topo->cach == NULL) { + printWarn("get_cache_topology_amd: cach is NULL"); + return false; + } + if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) { uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level; @@ -644,10 +647,12 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) { #ifdef __linux__ void get_topology_from_udev(struct topology* topo) { - // TODO: To be improved in the future topo->total_cores = get_ncores_from_cpuinfo(); - topo->logical_cores = topo->total_cores; - topo->physical_cores = topo->total_cores; + // TODO: To be improved in the future + // Conservative setting as we only know the total + // number of cores. + topo->logical_cores = UNKNOWN_DATA; + topo->physical_cores = UNKNOWN_DATA; topo->smt_available = 1; topo->smt_supported = 1; topo->sockets = 1; @@ -711,8 +716,8 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int } else { printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); - topo->physical_cores = 1; - topo->logical_cores = 1; + topo->physical_cores = UNKNOWN_DATA; + topo->logical_cores = UNKNOWN_DATA; topo->smt_available = 1; topo->smt_supported = 1; }