597 lines
25 KiB
Python
597 lines
25 KiB
Python
"""
|
|
Author Zahra Kharazian, zahra.kharazian@dsv.su.se
|
|
|
|
This code implements the CoPAL algorithm that employs
|
|
conformal prediction in active learning for regression tasks.
|
|
Suitable for multi-variate time series data
|
|
"""
|
|
|
|
|
|
import random
|
|
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.ensemble import RandomForestRegressor
|
|
import xgboost as xgb
|
|
from crepes import WrapRegressor
|
|
import matplotlib.pyplot as plt
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
from crepes import ConformalRegressor, ConformalPredictiveSystem
|
|
from crepes.extras import binning, DifficultyEstimator
|
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
|
import sys
|
|
|
|
|
|
random_state = 1
|
|
data = 'componentX'
|
|
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
CP = 'norm_Mondrian_CPS'
|
|
# CP = 'std_Mondrian_CPS'
|
|
# CP = 'Mondrian_regressor'
|
|
|
|
evaluation_metric = mean_squared_error
|
|
num_vehicle_selection = 226
|
|
al_iterations_count = 6
|
|
data_fraction = 1
|
|
num_rounds = 5
|
|
train_percentage = 0.01
|
|
test_percentage = 0.25
|
|
cal_percentage = 0.14
|
|
pool_percentage = 0.6
|
|
|
|
|
|
np.random.seed(random_state)
|
|
|
|
# If all the readouts are needed for training
|
|
def join_op_tte_prep2(data_op, data_tte):
|
|
|
|
data_tte = data_tte.sample(frac=data_fraction)
|
|
data_tte = data_tte[data_tte['in_study_repair'] == 1]
|
|
data_op = data_op[data_op['vehicle_id'].isin(data_tte['vehicle_id'])]
|
|
data_op_tte = pd.merge(data_op, data_tte, on=['vehicle_id'], how='left')
|
|
data_op_tte['RUL'] = data_op_tte['length_of_study_time_step'] - data_op_tte['time_step']
|
|
data_op_tte = data_op_tte.dropna()
|
|
|
|
return data_op_tte
|
|
|
|
def X_y_split(df):
|
|
y = df[['RUL']]
|
|
X = df.drop(columns=['RUL', 'vehicle_id', 'length_of_study_time_step', 'time_step', 'in_study_repair'])
|
|
return X, y
|
|
|
|
|
|
def conformal_prediction(model, df_train, df_pool, regressor='Mondrian_regressor'): # This only works with RF but not with other models
|
|
|
|
X_train, y_train = X_y_split(df_train)
|
|
X_train = X_train.values.astype(float)
|
|
y_train = y_train.values.ravel()
|
|
|
|
X_pool, y_pool = X_y_split(df_pool)
|
|
X_pool = X_pool.values.astype(float)
|
|
y_pool = y_pool.values.ravel()
|
|
|
|
mdl = WrapRegressor(model)
|
|
mdl.fit(X_train, y_train)
|
|
de = DifficultyEstimator()
|
|
de.fit(X_train, y=y_train)
|
|
|
|
|
|
sigmas_cal = de.apply(X_cal)
|
|
sigmas_pool = de.apply(X_pool)
|
|
sigmas_test = de.apply(X_test)
|
|
y_hat_cal = mdl.predict(X_cal)
|
|
residuals_cal = y_cal - y_hat_cal
|
|
y_hat_pool = mdl.predict(X_pool)
|
|
y_hat_test = mdl.predict(X_test)
|
|
|
|
if regressor == 'Mondrian_regressor':
|
|
bins_cal, bin_thresholds = binning(sigmas_cal, bins=10)
|
|
mdl.calibrate(X_cal, y_cal, bins=bins_cal)
|
|
|
|
bins_pool = binning(sigmas_pool, bins=bin_thresholds)
|
|
pool_intervals = mdl.predict_int(X_pool, bins=bins_pool)
|
|
|
|
bins_test = binning(sigmas_test, bins=bin_thresholds)
|
|
test_intervals = mdl.predict_int(X_test, bins=bins_test)
|
|
|
|
elif regressor == 'norm_Mondrian_CPS':
|
|
bins_cal, bin_thresholds = binning(y_hat_cal, bins=5)
|
|
mdl.calibrate(X_cal, y_cal, sigmas=sigmas_cal, bins=bins_cal, cps=True)
|
|
|
|
bins_pool = binning(mdl.predict(X_pool), bins=bin_thresholds)
|
|
|
|
cps_mond_norm = ConformalPredictiveSystem().fit(residuals_cal,
|
|
sigmas=sigmas_cal,
|
|
bins=bins_cal)
|
|
|
|
pool_intervals = cps_mond_norm.predict(y_hat_pool,
|
|
sigmas=sigmas_pool,
|
|
bins=bins_pool, lower_percentiles=2.5,
|
|
higher_percentiles=97.5)
|
|
bins_test = binning(mdl.predict(X_test), bins=bin_thresholds)
|
|
|
|
test_intervals = cps_mond_norm.predict(y_hat_test,
|
|
sigmas=sigmas_test,
|
|
bins=bins_test, lower_percentiles=2.5,
|
|
higher_percentiles=97.5)
|
|
|
|
elif regressor == 'std_Mondrian_CPS':
|
|
|
|
bins_cal, bin_thresholds = binning(y_hat_cal, bins=5)
|
|
mdl.calibrate(X_cal, y_cal, sigmas=sigmas_cal, bins=bins_cal, cps=True)
|
|
|
|
bins_pool = binning(mdl.predict(X_pool), bins=bin_thresholds)
|
|
cps_mond_std = ConformalPredictiveSystem().fit(residuals_cal, bins=bins_cal)
|
|
|
|
pool_intervals = cps_mond_std.predict(y_hat_pool,
|
|
sigmas=sigmas_pool,
|
|
bins=bins_pool, lower_percentiles=2.5,
|
|
higher_percentiles=97.5)
|
|
bins_test = binning(mdl.predict(X_test), bins=bin_thresholds)
|
|
test_intervals = cps_mond_std.predict(y_hat_test,
|
|
sigmas=sigmas_test,
|
|
bins=bins_test, lower_percentiles=2.5,
|
|
higher_percentiles=97.5)
|
|
|
|
|
|
return pool_intervals, test_intervals
|
|
|
|
|
|
|
|
def sample_selection(pred_intervals_cps, df_pool, df_train, policy = 'most_uncertain'):
|
|
|
|
df_interval_temp = pd.DataFrame(pred_intervals_cps, columns=['min_int', 'max_int'])
|
|
merged_df = df_pool.copy()
|
|
merged_df.reset_index(drop=True, inplace=True)
|
|
merged_df['max_int'] = df_interval_temp['max_int']
|
|
merged_df['min_int'] = df_interval_temp['min_int']
|
|
merged_df['diff_int'] = merged_df['max_int'] - merged_df['min_int']
|
|
merged_ave_df = merged_df.groupby('vehicle_id').agg(Ave_intervals=('diff_int', 'mean')).reset_index()
|
|
|
|
|
|
if policy == 'most_uncertain':
|
|
merged_ave_df_sorted = merged_ave_df.sort_values(by='Ave_intervals', ascending=False)
|
|
df_int_certainty = merged_ave_df_sorted.head(num_vehicle_selection)
|
|
# remove these "num_vehicle_selection" samples from pool
|
|
selected_vehicles = df_int_certainty['vehicle_id']
|
|
|
|
elif policy == 'most_uncertain_roulette':
|
|
merged_ave_df['probability'] = merged_ave_df['Ave_intervals'] / merged_ave_df['Ave_intervals'].sum()
|
|
selected_vehicles = merged_ave_df.sample(n=num_vehicle_selection, weights='probability', random_state=random_state)['vehicle_id']
|
|
|
|
elif policy == 'most_certain_roulette':
|
|
merged_ave_df['flipped_Ave_intervals'] = (merged_ave_df['Ave_intervals'].max() + 1 - merged_ave_df['Ave_intervals'])
|
|
merged_ave_df['probability'] = merged_ave_df['flipped_Ave_intervals'] / merged_ave_df['flipped_Ave_intervals'].sum()
|
|
selected_vehicles = merged_ave_df.sample(n=num_vehicle_selection, weights='probability', random_state=random_state)['vehicle_id']
|
|
|
|
|
|
elif policy == 'most_certain':
|
|
merged_ave_df_sorted = merged_ave_df.sort_values(by='Ave_intervals', ascending=True)
|
|
df_int_certainty = merged_ave_df_sorted.head(num_vehicle_selection)
|
|
# remove these "num_vehicle_selection" samples from pool
|
|
selected_vehicles = df_int_certainty['vehicle_id']
|
|
|
|
elif policy == 'random':
|
|
unique_vehicles = df_pool.vehicle_id.unique()
|
|
unique_vehicles = pd.DataFrame({'vehicle_id':unique_vehicles})
|
|
selected_vehicles = unique_vehicles.sample(n=num_vehicle_selection, random_state=random_state)
|
|
selected_vehicles = selected_vehicles['vehicle_id']
|
|
|
|
pool_new_df = df_pool[~df_pool['vehicle_id'].isin(selected_vehicles)]
|
|
|
|
# add these "num_vehicle_selection" samples to train
|
|
df_pool_selected_vehicles = df_pool[df_pool['vehicle_id'].isin(selected_vehicles)]
|
|
train_new_df = pd.concat([df_pool_selected_vehicles, df_train])
|
|
|
|
return train_new_df, pool_new_df
|
|
|
|
def evaluate_regressor(model):
|
|
pred = model.predict(X_test)
|
|
rmse = evaluation_metric(y_test, pred, squared=False)
|
|
return rmse
|
|
|
|
|
|
def split_df(df):
|
|
df = df.sample(frac=1, random_state=random_state)
|
|
# Group by 'Group' and get unique groups
|
|
unique_groups = df['vehicle_id'].unique()
|
|
# Calculate sizes for each part
|
|
total_groups = len(unique_groups)
|
|
pool_size = int(pool_percentage * total_groups)
|
|
train_size = int(train_percentage * total_groups)
|
|
test_size = int(test_percentage * total_groups)
|
|
calibration_size = int(cal_percentage * total_groups)
|
|
|
|
# Divide the unique groups into parts
|
|
parts = [unique_groups[:pool_size],
|
|
unique_groups[pool_size:pool_size + train_size],
|
|
unique_groups[pool_size + train_size:pool_size + train_size + test_size],
|
|
unique_groups[pool_size + train_size + test_size:]]
|
|
|
|
# Create DataFrames for each part
|
|
part_dfs = []
|
|
for part in parts:
|
|
part_df = df[df['vehicle_id'].isin(part)]
|
|
part_dfs.append(part_df)
|
|
|
|
return part_dfs[0], part_dfs[1], part_dfs[2], part_dfs[3]
|
|
|
|
"""Download the data here:
|
|
https://snd.se/en/catalogue/dataset/2024-34
|
|
"""
|
|
|
|
train_op = pd.read_csv("your address here/train_operational_readouts.csv")
|
|
train_tte = pd.read_csv("your address here/train_tte.csv")
|
|
train_spec = pd.read_csv("your address here/train_specification.csv")
|
|
|
|
data_op_tte = join_op_tte_prep2(train_op, train_tte)
|
|
######################## One random readout for each vehicle ######################
|
|
# Group the DataFrame by 'vehicle_id' and sample a random readout per vehicle
|
|
# data_op_tte = data_op_tte.groupby('vehicle_id').apply(lambda x: x.sample(n=1)).reset_index(drop=True)
|
|
|
|
|
|
results_mu = []
|
|
results_mc = []
|
|
results_mur = []
|
|
results_mcr = []
|
|
results_rnd = []
|
|
|
|
intervals_mu = []
|
|
intervals_mc = []
|
|
intervals_mur = []
|
|
intervals_mcr = []
|
|
intervals_rnd = []
|
|
|
|
figure_counter = 1
|
|
|
|
for round in range(num_rounds):
|
|
random_state = round + 1
|
|
print('\n\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> round: ', round)
|
|
|
|
# Flush the output
|
|
sys.stdout.flush()
|
|
|
|
df_pool, df_train, df_test, df_cal = split_df(data_op_tte)
|
|
|
|
X_test, y_test = X_y_split(df_test)
|
|
X_cal, y_cal = X_y_split(df_cal)
|
|
|
|
X_cal = X_cal.values.astype(float)
|
|
y_cal = y_cal.values.ravel()
|
|
|
|
df_train_ur = df_train.copy()
|
|
df_pool_ur = df_pool.copy()
|
|
|
|
df_train_c = df_train.copy()
|
|
df_pool_c = df_pool.copy()
|
|
|
|
df_train_cr = df_train.copy()
|
|
df_pool_cr = df_pool.copy()
|
|
|
|
df_train_rand = df_train.copy()
|
|
df_pool_rand = df_pool.copy()
|
|
|
|
mae_least_conf = []
|
|
mae_most_conf = []
|
|
mae_most_conf_roulette = []
|
|
mae_least_conf_roulette = []
|
|
mae_random = []
|
|
|
|
test_int_u = []
|
|
test_int_ur = []
|
|
test_int_c = []
|
|
test_int_cr = []
|
|
test_int_rand = []
|
|
|
|
for iter in range(al_iterations_count):
|
|
print('iter: ', iter, ' #####################################################################################################################')
|
|
|
|
# Flush the output
|
|
sys.stdout.flush()
|
|
|
|
########################################### Random ######################################
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
_ , test_intervals_rand = conformal_prediction(model, df_train_rand, df_pool_rand, regressor = CP)
|
|
average_test_inter_rand = (test_intervals_rand[:, 1] - test_intervals_rand[:, 0]).mean()
|
|
test_int_rand.append(average_test_inter_rand)
|
|
|
|
MAE_rand = evaluate_regressor(model)
|
|
|
|
print('RMSE_rand: ', MAE_rand, ', X_train_rand.shape[0]:', df_train_rand.shape[0], ', vehicles in X_train_rand:', df_train_rand.vehicle_id.nunique(), ', X_pool_rand.shape[0]:', df_pool_rand.shape[0], 'vehicles in X_pool:', df_pool_rand.vehicle_id.nunique())
|
|
mae_random.append(MAE_rand)
|
|
|
|
df_train_rand, df_pool_rand = sample_selection(None, df_pool_rand, df_train_rand,
|
|
policy = 'random')
|
|
|
|
###################################### Most Uncertain ######################################
|
|
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
#
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
pred_intervals_cps, test_intervals_u = conformal_prediction(model, df_train, df_pool, regressor = CP)
|
|
average_test_inter_u = (test_intervals_u[:, 1] - test_intervals_u[:, 0]).mean()
|
|
test_int_u.append(average_test_inter_u)
|
|
|
|
MAE_u = evaluate_regressor(model)
|
|
|
|
print('RMSE_most_uncertain: ', MAE_u, ', X_train.shape[0]:', df_train.shape[0], ', vehicles in X_train:',
|
|
df_train.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool.shape[0], 'vehicles in X_pool:',
|
|
df_pool.vehicle_id.nunique())
|
|
mae_least_conf.append(MAE_u)
|
|
|
|
df_train, df_pool= sample_selection(pred_intervals_cps,
|
|
df_pool,
|
|
df_train,
|
|
policy='most_uncertain')
|
|
|
|
###################################### Unertain roulette ######################################
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
#
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
pred_intervals_cps_ur, test_intervals_ur = conformal_prediction(model, df_train_ur, df_pool_ur, regressor = CP)
|
|
average_test_inter_ur = (test_intervals_ur[:, 1] - test_intervals_ur[:, 0]).mean()
|
|
test_int_ur.append(average_test_inter_ur)
|
|
|
|
MAE_ur = evaluate_regressor(model)
|
|
|
|
print('RMSE_most_uncertain_roulette: ', MAE_ur, ', X_train.shape[0]:', df_train_ur.shape[0], ', vehicles in X_train:',
|
|
df_train_ur.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_ur.shape[0], 'vehicles in X_pool:',
|
|
df_pool_ur.vehicle_id.nunique())
|
|
mae_least_conf_roulette.append(MAE_ur)
|
|
|
|
df_train_ur, df_pool_ur = sample_selection(pred_intervals_cps_ur,
|
|
df_pool_ur,
|
|
df_train_ur,
|
|
policy='most_uncertain_roulette')
|
|
|
|
###################################### Most certain ######################################
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
#
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
pred_intervals_cps_c, test_intervals_c = conformal_prediction(model, df_train_c, df_pool_c, regressor = CP)
|
|
average_test_inter_c = (test_intervals_c[:, 1] - test_intervals_c[:, 0]).mean()
|
|
test_int_c.append(average_test_inter_c)
|
|
|
|
MAE_c = evaluate_regressor(model)
|
|
|
|
print('RMSE_most_certain: ', MAE_c, ', X_train.shape[0]:', df_train_c.shape[0], ', vehicles in X_train:', df_train_c.vehicle_id.nunique() ,', X_pool.shape[0]:', df_pool_c.shape[0],'vehicles in X_pool:', df_pool_c.vehicle_id.nunique())
|
|
mae_most_conf.append(MAE_c)
|
|
|
|
df_train_c, df_pool_c= sample_selection(pred_intervals_cps_c,
|
|
df_pool_c,
|
|
df_train_c,
|
|
policy = 'most_certain')
|
|
|
|
###################################### certain roulette ######################################
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
#
|
|
# model =RandomForestRegressor(random_state=random_state)
|
|
|
|
pred_intervals_cps_cr, test_intervals_cr = conformal_prediction(model, df_train_cr, df_pool_cr, regressor = CP)
|
|
average_test_inter_cr = (test_intervals_cr[:, 1] - test_intervals_cr[:, 0]).mean()
|
|
test_int_cr.append(average_test_inter_cr)
|
|
|
|
MAE_cr = evaluate_regressor(model)
|
|
|
|
print('RMSE_most_certain_roulette: ', MAE_cr, ', X_train.shape[0]:', df_train_cr.shape[0], ', vehicles in X_train:',
|
|
df_train_cr.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_cr.shape[0], 'vehicles in X_pool:',
|
|
df_pool_cr.vehicle_id.nunique())
|
|
mae_most_conf_roulette.append(MAE_cr)
|
|
|
|
df_train_cr, df_pool_cr = sample_selection(pred_intervals_cps_cr,
|
|
df_pool_cr,
|
|
df_train_cr,
|
|
policy='most_certain_roulette')
|
|
|
|
|
|
print('iter: Last', ' #####################################################################################################################')
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
_ , test_int = conformal_prediction(model, df_train_rand, df_pool_rand, regressor = CP)
|
|
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
|
|
test_int_rand.append(average_test_inter)
|
|
MAE_rand = evaluate_regressor(model)
|
|
print('RMSE_rand: ', MAE_rand, ', X_train_rand.shape[0]:', df_train_rand.shape[0], ', vehicles in X_train_rand:', df_train_rand.vehicle_id.nunique(), ', X_pool_rand.shape[0]:', df_pool_rand.shape[0], 'vehicles in X_pool:', df_pool_rand.vehicle_id.nunique())
|
|
mae_random.append(MAE_rand)
|
|
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
_ , test_int = conformal_prediction(model, df_train, df_pool, regressor = CP)
|
|
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
|
|
test_int_u.append(average_test_inter)
|
|
MAE_u = evaluate_regressor(model)
|
|
print('RMSE_most_uncertain: ', MAE_u, ', X_train.shape[0]:', df_train.shape[0], ', vehicles in X_train:',
|
|
df_train.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool.shape[0], 'vehicles in X_pool:',
|
|
df_pool.vehicle_id.nunique())
|
|
mae_least_conf.append(MAE_u)
|
|
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
_ , test_int = conformal_prediction(model, df_train_ur, df_pool_ur, regressor = CP)
|
|
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
|
|
test_int_ur.append(average_test_inter)
|
|
MAE_ur = evaluate_regressor(model)
|
|
print('RMSE_most_uncertain_roulette: ', MAE_ur, ', X_train.shape[0]:', df_train_ur.shape[0],
|
|
', vehicles in X_train:',
|
|
df_train_ur.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_ur.shape[0], 'vehicles in X_pool:',
|
|
df_pool_ur.vehicle_id.nunique())
|
|
mae_least_conf_roulette.append(MAE_ur)
|
|
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
_ , test_int = conformal_prediction(model,df_train_c, df_pool_c, regressor = CP)
|
|
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
|
|
test_int_c.append(average_test_inter)
|
|
MAE_c = evaluate_regressor(model)
|
|
print('RMSE_most_certain: ', MAE_c, ', X_train.shape[0]:', df_train_c.shape[0], ', vehicles in X_train:',
|
|
df_train_c.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_c.shape[0], 'vehicles in X_pool:',
|
|
df_pool_c.vehicle_id.nunique())
|
|
mae_most_conf.append(MAE_c)
|
|
|
|
model = xgb.XGBRegressor(random_state=random_state)
|
|
# model = RandomForestRegressor(random_state=random_state)
|
|
|
|
_ , test_int = conformal_prediction(model, df_train_cr, df_pool_cr, regressor = CP)
|
|
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
|
|
test_int_cr.append(average_test_inter)
|
|
MAE_cr = evaluate_regressor(model)
|
|
print('RMSE_most_certain_roulette: ', MAE_cr, ', X_train.shape[0]:', df_train_cr.shape[0], ', vehicles in X_train:',
|
|
df_train_cr.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_cr.shape[0], 'vehicles in X_pool:',
|
|
df_pool_cr.vehicle_id.nunique())
|
|
mae_most_conf_roulette.append(MAE_cr)
|
|
|
|
|
|
sizes_list = list(range(df_train_rand.vehicle_id.nunique(), 0, -num_vehicle_selection))
|
|
sizes_array = np.array(sizes_list)
|
|
sizes_array.sort() # Sort the list in ascending order
|
|
|
|
# Plotting MAE values with dataset size on secondary x-axis
|
|
plt.figure()
|
|
|
|
# Plot MAE values
|
|
plt.plot(range(al_iterations_count + 1), mae_least_conf, label='least confident')
|
|
plt.plot(range(al_iterations_count + 1), mae_least_conf_roulette, label='least confident roulette')
|
|
plt.plot(range(al_iterations_count + 1), mae_most_conf, label='most confident')
|
|
plt.plot(range(al_iterations_count + 1), mae_most_conf_roulette, label='most confident roulette')
|
|
plt.plot(range(al_iterations_count + 1), mae_random, '-.', label='random selection')
|
|
|
|
# Add dataset size labels on the secondary x-axis
|
|
ax = plt.gca()
|
|
ax2 = ax.twiny()
|
|
ax2.set_xlim(ax.get_xlim())
|
|
ax2.set_xticks(range(al_iterations_count + 1))
|
|
ax2.set_xticklabels(sizes_array)
|
|
|
|
# Set labels for both x-axes
|
|
ax.set_xlabel("Active Learning Iterations")
|
|
ax2.set_xlabel("Number of vehicles in the train set")
|
|
ax.set_ylabel("RMSE")
|
|
plt.title('Active Learning round ' + str(round))
|
|
ax.legend()
|
|
figure_counter = figure_counter + 1
|
|
|
|
# plot test interval levels
|
|
plt.figure()
|
|
plt.plot(range(al_iterations_count + 1), test_int_u, label='most uncertain')
|
|
plt.plot(range(al_iterations_count + 1), test_int_ur, label='most uncertain roulette')
|
|
plt.plot(range(al_iterations_count + 1), test_int_c, label='most certain')
|
|
plt.plot(range(al_iterations_count + 1), test_int_cr, label='most certain roulette')
|
|
plt.plot(range(al_iterations_count + 1), test_int_rand, '-.', label='random selection')
|
|
# Add dataset size labels on the secondary x-axis
|
|
ax = plt.gca()
|
|
ax2 = ax.twiny()
|
|
ax2.set_xlim(ax.get_xlim())
|
|
ax2.set_xticks(range(al_iterations_count + 1))
|
|
ax2.set_xticklabels(sizes_array)
|
|
# Set labels for both x-axes
|
|
ax.set_xlabel("Active Learning Iterations")
|
|
ax2.set_xlabel("Number of vehicles in the train set")
|
|
ax.set_ylabel("Average Test Intervals")
|
|
plt.title('Active Learning round ' + str(round))
|
|
ax.legend()
|
|
figure_counter = figure_counter + 1
|
|
|
|
|
|
results_mu.append(mae_least_conf)
|
|
results_mur.append(mae_least_conf_roulette)
|
|
results_mc.append(mae_most_conf)
|
|
results_mcr.append(mae_most_conf_roulette)
|
|
results_rnd.append(mae_random)
|
|
|
|
intervals_mu.append(test_int_u)
|
|
intervals_mur.append(test_int_ur)
|
|
intervals_mc.append(test_int_c)
|
|
intervals_mcr.append(test_int_cr)
|
|
intervals_rnd.append(test_int_rand)
|
|
|
|
final_res = pd.DataFrame({'mu':results_mu, 'mur':results_mur,
|
|
'mc':results_mc, 'mcr': results_mcr,
|
|
'rnd':results_rnd})
|
|
|
|
# final_res = pd.DataFrame({'mu':results_mu, 'mur':results_mur, 'rnd':results_rnd})
|
|
|
|
mu_df = pd.DataFrame(final_res.mu.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
|
|
mur_df = pd.DataFrame(final_res.mur.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
|
|
mc_df = pd.DataFrame(final_res.mc.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)])
|
|
mcr_df = pd.DataFrame(final_res.mcr.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)])
|
|
rnd_df = pd.DataFrame(final_res.rnd.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)])
|
|
|
|
|
|
final_intervals = pd.DataFrame({'mu':intervals_mu, 'mur':intervals_mur,
|
|
'mc':intervals_mc, 'mcr': intervals_mcr,
|
|
'rnd':intervals_rnd})
|
|
|
|
mu_interval_df = pd.DataFrame(final_intervals.mu.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1 )])
|
|
mur_interval_df = pd.DataFrame(final_intervals.mur.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
|
|
mc_interval_df = pd.DataFrame(final_intervals.mc.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
|
|
mcr_interval_df = pd.DataFrame(final_intervals.mcr.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
|
|
rnd_interval_df = pd.DataFrame(final_intervals.rnd.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count +1)])
|
|
|
|
x = range(al_iterations_count+1)
|
|
plt.figure()
|
|
|
|
def plot_one_curve(df, label, color):
|
|
y = np.array(df.mean().to_list())
|
|
deviation = np.array(df.std().to_list())
|
|
plt.plot(x, y, label=label)
|
|
plt.fill_between(x, y - deviation, y + deviation, color=color, alpha=0.1)
|
|
|
|
plot_one_curve(mu_df, 'most uncertain', 'blue')
|
|
plot_one_curve(mur_df, 'most uncertain roulette', 'red')
|
|
plot_one_curve(mc_df, 'most certain', 'green')
|
|
plot_one_curve(mcr_df, 'most certain roulette', 'magenta')
|
|
plot_one_curve(rnd_df, 'random selection', 'purple')
|
|
# Add dataset size labels on the secondary x-axis
|
|
ax = plt.gca()
|
|
ax2 = ax.twiny()
|
|
ax2.set_xlim(ax.get_xlim())
|
|
ax2.set_xticks(range(al_iterations_count + 1))
|
|
ax2.set_xticklabels(sizes_array)
|
|
|
|
# Set labels for both x-axes
|
|
ax.set_xlabel("Active Learning Iterations")
|
|
ax2.set_xlabel("Number of vehicles in the train set")
|
|
ax.set_ylabel("RMSE")
|
|
plt.title('Active Learning')
|
|
ax.legend()
|
|
|
|
figure_counter = figure_counter + 1
|
|
|
|
x = range(al_iterations_count + 1)
|
|
plt.figure()
|
|
|
|
plot_one_curve(mu_interval_df, 'most uncertain', 'blue')
|
|
plot_one_curve(mur_interval_df, 'most uncertain roulette', 'red')
|
|
plot_one_curve(mc_interval_df, 'most certain', 'green')
|
|
plot_one_curve(mcr_interval_df, 'most certain roulette', 'magenta')
|
|
plot_one_curve(rnd_interval_df, 'random selection', 'purple')
|
|
|
|
|
|
ax = plt.gca()
|
|
ax2 = ax.twiny()
|
|
ax2.set_xlim(ax.get_xlim())
|
|
ax2.set_xticks(range(al_iterations_count + 1))
|
|
ax2.set_xticklabels(sizes_array)
|
|
|
|
# Set labels for both x-axes
|
|
ax.set_xlabel("Active Learning Iterations")
|
|
ax2.set_xlabel("Number of vehicles in the train set")
|
|
ax.set_ylabel("Prediction Interval for Test set")
|
|
plt.title('Active Learning')
|
|
ax.legend()
|
|
figure_counter = figure_counter + 1
|
|
|
|
plt.show()
|