-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Automated outlier detection for adjust sum dialog #18723
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,11 @@ import { HomeAssistant } from "../../../types"; | |
import { showToast } from "../../../util/toast"; | ||
import type { DialogStatisticsAdjustSumParams } from "./show-dialog-statistics-adjust-sum"; | ||
|
||
interface CombinedStat { | ||
hour: StatisticValue | null; | ||
fiveMin: StatisticValue[]; | ||
} | ||
|
||
@customElement("dialog-statistics-adjust-sum") | ||
export class DialogStatisticsFixUnsupportedUnitMetadata extends LitElement { | ||
@property({ attribute: false }) public hass!: HomeAssistant; | ||
|
@@ -196,6 +201,13 @@ export class DialogStatisticsFixUnsupportedUnitMetadata extends LitElement { | |
@value-changed=${this._dateTimeSelectorChanged} | ||
></ha-selector-datetime> | ||
<div class="stat-list">${stats}</div> | ||
<mwc-button | ||
slot="secondaryAction" | ||
.label=${this.hass.localize( | ||
"ui.panel.developer-tools.tabs.statistics.fix_issue.adjust_sum.outliers" | ||
)} | ||
@click=${this._fetchOutliers} | ||
></mwc-button> | ||
<mwc-button | ||
slot="primaryAction" | ||
dialogAction="cancel" | ||
|
@@ -349,6 +361,101 @@ export class DialogStatisticsFixUnsupportedUnitMetadata extends LitElement { | |
statId in stats5MinData ? stats5MinData[statId].slice(0, 5) : []; | ||
} | ||
|
||
private async _fetchOutliers(): Promise<void> { | ||
this._stats5min = undefined; | ||
this._statsHour = undefined; | ||
const statId = this._params!.statistic.statistic_id; | ||
|
||
// Get all the data | ||
const start = new Date(0); | ||
const end = new Date(); | ||
|
||
const statsHourData = await fetchStatistics( | ||
this.hass, | ||
start, | ||
end, | ||
[statId], | ||
"hour" | ||
); | ||
|
||
const statsHour = statId in statsHourData ? statsHourData[statId] : []; | ||
if (statsHour.length === 0) { | ||
return; | ||
} | ||
|
||
const stats5MinData = await fetchStatistics( | ||
this.hass, | ||
start, | ||
end, | ||
[statId], | ||
"5minute" | ||
); | ||
|
||
const stats5Min = statId in stats5MinData ? stats5MinData[statId] : []; | ||
// First datapoint of 5 minute data in the history is always junk since it counts the entire sum | ||
// as the change, which we don't want here. | ||
stats5Min.shift(); | ||
|
||
const combinedStatsData: CombinedStat[] = []; | ||
statsHour.forEach((s) => { | ||
combinedStatsData.push({ hour: s, fiveMin: [] }); | ||
}); | ||
|
||
const lasthour: CombinedStat = { hour: null, fiveMin: [] }; | ||
|
||
let i = 0; | ||
stats5Min.forEach((s) => { | ||
let matched = false; | ||
for (i; i < combinedStatsData.length; i++) { | ||
const hour = combinedStatsData[i].hour; | ||
if (hour && s.start >= hour.start && s.end <= hour.end) { | ||
combinedStatsData[i].fiveMin.push(s); | ||
matched = true; | ||
break; | ||
} | ||
} | ||
if (!matched) { | ||
lasthour.fiveMin.push(s); | ||
} | ||
}); | ||
|
||
combinedStatsData.push(lasthour); | ||
|
||
let statsOutliers: StatisticValue[] = []; | ||
let min = 0; | ||
const numOutliers = 10; | ||
|
||
// Track the top 10 values. | ||
const addOutlier = (s) => { | ||
const val = Math.abs(s.change ?? 0); | ||
if (statsOutliers.length < numOutliers || val > min) { | ||
statsOutliers.push(s); | ||
statsOutliers = statsOutliers.sort( | ||
(a, b) => Math.abs(b.change ?? 0) - Math.abs(a.change ?? 0) | ||
); | ||
statsOutliers = statsOutliers.slice(0, numOutliers); | ||
min = statsOutliers[statsOutliers.length - 1].change ?? 0; | ||
} | ||
}; | ||
|
||
// If an hour has no five minute data, add the hour value | ||
// Otherwise, add the 5 minute values and ignore the hour value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An hour can have only half of the 5 minute data right? So we could miss a bunch? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about this a bit and I'm not sure if I can really think of a case where there would be a problem here. Possibly a minor edge case during the single hour where 5 minute data has been partially purged? But that just seems really unlikely to be a problem. I'm not really sure why we're even dealing with 5 minute data here at all as it is just temporary, but I was just sort of trying to mimic how the dialog already handled overlapping 5min/hour data. |
||
combinedStatsData.forEach((c) => { | ||
if (c.fiveMin.length === 0 && c.hour) { | ||
addOutlier(c.hour); | ||
} else { | ||
c.fiveMin.forEach((s) => { | ||
addOutlier(s); | ||
Comment on lines
+445
to
+448
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you can process 5min and hour the same way, a hour change normally already is 12 times as big? |
||
}); | ||
} | ||
}); | ||
|
||
// Outliers are a possible mix of hour/5minute data, but the distinction | ||
// is not relevant here, as long as only one array is populated. | ||
this._statsHour = statsOutliers; | ||
this._stats5min = []; | ||
} | ||
|
||
private async _fixIssue(): Promise<void> { | ||
const unit = getDisplayUnit( | ||
this.hass, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would say an outlier is not the biggest 10 numbers, but should be x% higher or lower than the mean change value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure the distinction would matter that much for the typical use case here, but I'm not opposed to doing that (it's just more cpu work required to calculate the mean).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree it probably doesn't add much