Skip to content

Commit

Permalink
Update datetime parsing to use UTC for consistency across datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
petya-vasileva committed Dec 11, 2024
1 parent 2387648 commit a5fe306
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/ml/create_thrpt_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def createThrptDataset(dateFrom, dateTo):
# get the data
rawDf = pd.DataFrame(queryData(dateFrom, dateTo))
print(rawDf.head())
rawDf['dt'] = pd.to_datetime(rawDf['from'], unit='ms')
rawDf['dt'] = pd.to_datetime(rawDf['from'], utc=True)
rawDf['src_site'] = rawDf['src_site'].str.upper()
rawDf['dest_site'] = rawDf['dest_site'].str.upper()

Expand Down
4 changes: 2 additions & 2 deletions src/ml/packet_loss_one_month_onehot.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ def one_month_data(plsDf_custom):
plsDf_onehot = pd.get_dummies(plsDf_custom,dtype=int)

# taking the index of the first 28 days for further training
date_s = list(pd.to_datetime(plsDf_onehot['dt'],unit='ms')[:1])[0]
date_s = list(pd.to_datetime(plsDf_onehot['dt'], utc=True)[:1])[0]
date_s = date_s.date()
date_s = (date_s + datetime.timedelta(days=28))
try:
end_index = plsDf_onehot.loc[(pd.to_datetime(plsDf_onehot['dt'],unit='ms').dt.date == date_s)][:1].index[0]
end_index = plsDf_onehot.loc[(pd.to_datetime(plsDf_onehot['dt'], utc=True).dt.date == date_s)][:1].index[0]
percentile = plsDf_onehot.index.get_loc(end_index) / len(plsDf_onehot)
except:
percentile = 0.8
Expand Down
4 changes: 2 additions & 2 deletions src/ml/packet_loss_preprocess_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def packet_loss_preprocess(plsDf_custom_x, model):

df_to_plot = plsDf_custom_x.copy()
df_to_plot['flag'] = y
df_to_plot['dt'] = (pd.to_datetime(df_to_plot['dt'], unit='ms'))
df_to_plot['dt'] = (pd.to_datetime(df_to_plot['dt'], utc=True))

print('df_to_plot', df_to_plot.shape)

Expand All @@ -21,7 +21,7 @@ def packet_loss_preprocess(plsDf_custom_x, model):
# convert timestamp back to datetime
plsDf_onehot_plot = df_to_plot.copy()
plsDf_onehot_plot['flag'] = plsDf_custom_y.copy()
plsDf_onehot_plot['dt'] = (pd.to_datetime(plsDf_onehot_plot['dt'], unit='ms'))
plsDf_onehot_plot['dt'] = (pd.to_datetime(plsDf_onehot_plot['dt'], utc=True))

print('plsDf_onehot_plot', plsDf_onehot_plot.shape)

Expand Down

0 comments on commit a5fe306

Please sign in to comment.