From 59c875524c7eff63354b5f4aed61f11555a1c0a7 Mon Sep 17 00:00:00 2001 From: Nick Wardle Date: Mon, 17 Jun 2024 11:53:35 +0200 Subject: [PATCH] mean -> median --- src/HybridNew.cc | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/HybridNew.cc b/src/HybridNew.cc index fa1dcc458d9..9564f1cf0a2 100644 --- a/src/HybridNew.cc +++ b/src/HybridNew.cc @@ -1223,7 +1223,6 @@ std::pair HybridNew::eval(const RooStats::HypoTestResult &hcres, for (int i = 0, n = abssdist.size(); i < n; ++i) abssdist[i] = sdist[i]; } - // should remove outliers from the distribution (will return immediately if no threshold set) removeOutliers(&abssdist,&abssweight); removeOutliers(&absbdist,&absbweight); @@ -1235,6 +1234,21 @@ std::pair HybridNew::eval(const RooStats::HypoTestResult &hcres, result.SetTestStatisticData(absdata); if (testStat_ == "LHCFC") result.SetPValueIsRightTail(!result.GetPValueIsRightTail()); + // print comparison of Results before and after cleaning // + CombineLogger::instance().log("HybridNew.cc",__LINE__,std::string(Form("Original: r = %g, CLs = %g +/- %g\n\t1-Pb = %g +/- %g\n\tPmu = %g +/- %g" + ,rVal + ,hcres.CLs(), hcres.CLsError() + ,hcres.CLb(), hcres.CLbError() + ,hcres.CLsplusb(), hcres.CLsplusbError())) + ,__func__); + CombineLogger::instance().log("HybridNew.cc",__LINE__,std::string(Form("Update: r = %g, CLs = %g +/- %g\n\t1-Pb = %g +/- %g\n\tPmu = %g +/- %g" + ,rVal + ,result.CLs(), result.CLsError() + ,result.CLb(), result.CLbError() + ,result.CLsplusb(), result.CLsplusbError())) + ,__func__); + + if (CLs_) { return std::pair(result.CLs(), result.CLsError()); } else { @@ -1774,18 +1788,22 @@ std::vector > HybridNew::findIntervalsFromSplines(TGrap void HybridNew::removeOutliers(std::vector *sd, std::vector *sw){ if ( removeOulierForPvalThreshold_ <= 0 ) return; - // calculate mean and stddev of distriution - double mean = std::accumulate(sd->begin(), sd->end(), 0.0) / sd->size(); + // calculate median and stddev of distriution + std::vector sorted_sd(sd->size()); + std::partial_sort_copy(std::begin(*sd), std::end(*sd), std::begin(sorted_sd), std::end(sorted_sd)); + + double median = sorted_sd[(int)0.5*sorted_sd.size()]; + double accum = 0.0; std::for_each (std::begin(*sd), std::end(*sd), [&](const double d) { - accum += (d - mean) * (d - mean); + accum += (d - median) * (d - median); }); double stdev = sqrt(accum / (sd->size())); - + // filter outliers based on threshold std::vector indices; for(unsigned int i = 0; i < sd->size(); i++) { - if(std::abs((*sd)[i] - mean) > removeOulierForPvalThreshold_*stdev) { + if(std::abs((*sd)[i] - median) > removeOulierForPvalThreshold_*stdev) { indices.push_back(i); } } @@ -1795,4 +1813,5 @@ void HybridNew::removeOutliers(std::vector *sd, std::vector sd->erase(sd->begin() + *it); sw->erase(sw->begin() + *it); } -} \ No newline at end of file +} +