# 相关链接

（3)【第十届“泰迪杯”数据挖掘挑战赛】B题：电力系统负荷预测分析 问题二 时间突变分析 Python实现

# 完整代码下载链接

https://www.betterbench.top/#/35/detail

# 1 定义绘图函数

``````import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import plotly.io as pio
from chart_studio import plotly as py
import matplotlib.pyplot as plt
import plotly.graph_objs as go
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
``````
``````def plot_anomaly(ts,anomaly_pred = None,anomaly_true=None,file_name = 'file'):
fig = go.Figure()
yhat = go.Scatter(
x = ts.index,
y = ts,
mode = 'lines',  name = ts.name)
if  anomaly_pred is not None:
status = go.Scatter(
。。。略
if  anomaly_true is not None:
status = go.Scatter(
x = anomaly_true.index,
y = ts.loc[anomaly_true.index],
。。。略
fig.show()

def plot_anomaly_window(ts,anomaly_pred = None,file_name = 'file',window='1h'):
fig = go.Figure()
yhat = go.Scatter(
x = ts.index,
y = ts,
。。。略
fig.show()
``````

# 2 通过对原始测量应用阈值来查找异常值

``````df = pd.read_csv('./data/附件1-区域15分钟负荷数据.csv')
df = df.rename(columns={'数据时间':'Time'})
df['Time']= pd.to_datetime(df['Time'])
df.set_index('Time',inplace=True)
col = df.columns[0]
df.plot()
``````

``````df
``````

# 3 手动设置阈值

``````min_t = 203505
max_t = 300392.7345
df[col+'threshold_alarm'] = (df[col].clip(lower = min_t,upper=max_t) != df[col])
plot_anomaly(df[col],anomaly_pred = df[df[col+'threshold_alarm']==True][col+'threshold_alarm'],anomaly_true=None,file_name = 'file')
``````

# 4 使用分位数设置阈值

``````min_t = df[col].quantile(0.03)
max_t = df[col].quantile(0.97)
df[col+'threshold_alarm'] = (df[col].clip(lower = min_t,upper=max_t) != df[col])
plot_anomaly(df[col],anomaly_pred = df[df[col+'threshold_alarm']==True][col+'threshold_alarm'],anomaly_true=None,file_name = 'file')
``````

# 5 3Sigma原则（IQR）

[Q1−c×IQR，Q3+c×IQR]

``````Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3- Q1
。。。略

df[col+'threshold_alarm'] = (df[col].clip(lower = min_t,upper=max_t) != df[col])
plot_anomaly(df[col],anomaly_pred = df[df[col+'threshold_alarm']==True][col+'threshold_alarm'],anomaly_true=None,file_name = 'file')
``````

# 6 设定分位数

``````window = 1
df[col+'_diff'] = df[col].diff(periods= window).fillna(0)

Q1 = df[col+'_diff'].quantile(0.25)
Q3 = df[col+'_diff'].quantile(0.75)
IQR = Q3- Q1
c = 2
min_t = Q1 - c*IQR
max_t = Q3 + c*IQR
df[col+'diff_alarm'] = (df[col+'_diff'].clip(lower = min_t,upper=max_t)!= df[col+'_diff'])
plot_anomaly_window(df[col],anomaly_pred = df[df[col+'diff_alarm']==True][col+'diff_alarm'],file_name = 'file',window=f'{window}h')
``````

## 6.1 导入数据

``````import pandas as pd
metrics_df=pd.pivot_table(df,values='有功功率最大值（kw）',index='数据时间',columns='行业类型')
``````

``````metrics_df.reset_index(inplace=True)
metrics_df.fillna(0,inplace=True)
metrics_df = metrics_df.rename(columns={'数据时间':'Time'})
``````

``````df = metrics_df
df['Time']= pd.to_datetime(df['Time'])
df.set_index('Time',inplace=True)
col = df.columns[0]
df.plot()
``````

``````col = df.columns[1]
``````
``````Q1 = df[col].quantile(0.25)
Q3 = df[col].quantile(0.75)
IQR = Q3- Q1
。。。略

df[col+'threshold_alarm'] = (df[col].clip(lower = min_t,upper=max_t) != df[col])
plot_anomaly_window(df[col],anomaly_pred = df[df[col+'threshold_alarm']==True][col+'threshold_alarm'],window='1h',file_name = 'file')
``````

# 7 根据分位数找到时间突变

``````window = 5
df[col+'ma'] = df[col].rolling(window=window,closed='left').mean()
kpi_col = col+'ma'+'diff'
df[kpi_col] = (df[col]-df[col+'ma']).fillna(0)

Q1 = df[kpi_col].quantile(0.25)
Q3 = df[kpi_col].quantile(0.75)
IQR = Q3- Q1
c = 2
min_t = Q1 - c*IQR
max_t = Q3 + c*IQR
df[kpi_col+'threshold_alarm'] = (df[kpi_col].clip(lower = min_t,upper=max_t) != df[kpi_col])
plot_anomaly_window(df[col],anomaly_pred = df[df[kpi_col+'threshold_alarm']==True][kpi_col+'threshold_alarm'],file_name = 'file',window=f'{window}h')
``````