Skip to content

Commit

Permalink
Handle NaN when binning histrogram data
Browse files Browse the repository at this point in the history
  • Loading branch information
Blake-Madden committed Jul 24, 2024
1 parent 505149d commit 3d6140b
Showing 1 changed file with 19 additions and 17 deletions.
36 changes: 19 additions & 17 deletions src/graphs/histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,13 +284,16 @@ namespace Wisteria::Graphs
void Histogram::SortIntoRanges(const std::optional<size_t> binCount)
{
if (GetDataset() == nullptr || m_validN == 0)
{ return; }
double minVal = *std::min_element(
m_continuousColumn->GetValues().cbegin(),
m_continuousColumn->GetValues().cend());
double maxVal = *std::max_element(
m_continuousColumn->GetValues().cbegin(),
m_continuousColumn->GetValues().cend());
{
return;
}
std::vector<double> validData;
validData.reserve(GetDataset()->GetRowCount());
std::copy_if(m_continuousColumn->GetValues().cbegin(),
m_continuousColumn->GetValues().cend(), std::back_inserter(validData),
[](auto x) { return std::isfinite(x); });
double minVal = *std::min_element(validData.cbegin(), validData.cend());
double maxVal = *std::max_element(validData.cbegin(), validData.cend());
// If data fails into a small range (e.g., < 2), then forcibly turn off rounding and integer binning.
// Make sure that the range is larger than 0 though (otherwise there will probably just be one bin
// and integer mode would be better there).
Expand Down Expand Up @@ -610,16 +613,15 @@ namespace Wisteria::Graphs
// Scott
else
{
const auto minVal = *std::min_element(
m_continuousColumn->GetValues().cbegin(),
m_continuousColumn->GetValues().cend());
const auto maxVal = *std::max_element(
m_continuousColumn->GetValues().cbegin(),
m_continuousColumn->GetValues().cend());
const auto sd = statistics::standard_deviation(
m_continuousColumn->GetValues(), true);
return safe_divide(maxVal - minVal,
3.5 * safe_divide(sd, std::cbrt(m_validN)) );
std::vector<double> validData;
validData.reserve(GetDataset()->GetRowCount());
std::copy_if(m_continuousColumn->GetValues().cbegin(),
m_continuousColumn->GetValues().cend(), std::back_inserter(validData),
[](auto x) { return std::isfinite(x); });
const auto minVal = *std::min_element(validData.cbegin(), validData.cend());
const auto maxVal = *std::max_element(validData.cbegin(), validData.cend());
const auto sd = statistics::standard_deviation(validData, true);
return safe_divide(maxVal - minVal, 3.5 * safe_divide(sd, std::cbrt(m_validN)));
}
}

Expand Down

0 comments on commit 3d6140b

Please sign in to comment.