CoPAL/CoPAL_Share.py
2024-04-15 09:22:45 +02:00

597 lines
25 KiB
Python

"""
Author Zahra Kharazian, zahra.kharazian@dsv.su.se
This code implements the CoPAL algorithm that employs
conformal prediction in active learning for regression tasks.
Suitable for multi-variate time series data
"""
import random
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from crepes import WrapRegressor
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from crepes import ConformalRegressor, ConformalPredictiveSystem
from crepes.extras import binning, DifficultyEstimator
from sklearn.metrics import mean_absolute_error, mean_squared_error
import sys
random_state = 1
data = 'componentX'
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
CP = 'norm_Mondrian_CPS'
# CP = 'std_Mondrian_CPS'
# CP = 'Mondrian_regressor'
evaluation_metric = mean_squared_error
num_vehicle_selection = 226
al_iterations_count = 6
data_fraction = 1
num_rounds = 5
train_percentage = 0.01
test_percentage = 0.25
cal_percentage = 0.14
pool_percentage = 0.6
np.random.seed(random_state)
# If all the readouts are needed for training
def join_op_tte_prep2(data_op, data_tte):
data_tte = data_tte.sample(frac=data_fraction)
data_tte = data_tte[data_tte['in_study_repair'] == 1]
data_op = data_op[data_op['vehicle_id'].isin(data_tte['vehicle_id'])]
data_op_tte = pd.merge(data_op, data_tte, on=['vehicle_id'], how='left')
data_op_tte['RUL'] = data_op_tte['length_of_study_time_step'] - data_op_tte['time_step']
data_op_tte = data_op_tte.dropna()
return data_op_tte
def X_y_split(df):
y = df[['RUL']]
X = df.drop(columns=['RUL', 'vehicle_id', 'length_of_study_time_step', 'time_step', 'in_study_repair'])
return X, y
def conformal_prediction(model, df_train, df_pool, regressor='Mondrian_regressor'): # This only works with RF but not with other models
X_train, y_train = X_y_split(df_train)
X_train = X_train.values.astype(float)
y_train = y_train.values.ravel()
X_pool, y_pool = X_y_split(df_pool)
X_pool = X_pool.values.astype(float)
y_pool = y_pool.values.ravel()
mdl = WrapRegressor(model)
mdl.fit(X_train, y_train)
de = DifficultyEstimator()
de.fit(X_train, y=y_train)
sigmas_cal = de.apply(X_cal)
sigmas_pool = de.apply(X_pool)
sigmas_test = de.apply(X_test)
y_hat_cal = mdl.predict(X_cal)
residuals_cal = y_cal - y_hat_cal
y_hat_pool = mdl.predict(X_pool)
y_hat_test = mdl.predict(X_test)
if regressor == 'Mondrian_regressor':
bins_cal, bin_thresholds = binning(sigmas_cal, bins=10)
mdl.calibrate(X_cal, y_cal, bins=bins_cal)
bins_pool = binning(sigmas_pool, bins=bin_thresholds)
pool_intervals = mdl.predict_int(X_pool, bins=bins_pool)
bins_test = binning(sigmas_test, bins=bin_thresholds)
test_intervals = mdl.predict_int(X_test, bins=bins_test)
elif regressor == 'norm_Mondrian_CPS':
bins_cal, bin_thresholds = binning(y_hat_cal, bins=5)
mdl.calibrate(X_cal, y_cal, sigmas=sigmas_cal, bins=bins_cal, cps=True)
bins_pool = binning(mdl.predict(X_pool), bins=bin_thresholds)
cps_mond_norm = ConformalPredictiveSystem().fit(residuals_cal,
sigmas=sigmas_cal,
bins=bins_cal)
pool_intervals = cps_mond_norm.predict(y_hat_pool,
sigmas=sigmas_pool,
bins=bins_pool, lower_percentiles=2.5,
higher_percentiles=97.5)
bins_test = binning(mdl.predict(X_test), bins=bin_thresholds)
test_intervals = cps_mond_norm.predict(y_hat_test,
sigmas=sigmas_test,
bins=bins_test, lower_percentiles=2.5,
higher_percentiles=97.5)
elif regressor == 'std_Mondrian_CPS':
bins_cal, bin_thresholds = binning(y_hat_cal, bins=5)
mdl.calibrate(X_cal, y_cal, sigmas=sigmas_cal, bins=bins_cal, cps=True)
bins_pool = binning(mdl.predict(X_pool), bins=bin_thresholds)
cps_mond_std = ConformalPredictiveSystem().fit(residuals_cal, bins=bins_cal)
pool_intervals = cps_mond_std.predict(y_hat_pool,
sigmas=sigmas_pool,
bins=bins_pool, lower_percentiles=2.5,
higher_percentiles=97.5)
bins_test = binning(mdl.predict(X_test), bins=bin_thresholds)
test_intervals = cps_mond_std.predict(y_hat_test,
sigmas=sigmas_test,
bins=bins_test, lower_percentiles=2.5,
higher_percentiles=97.5)
return pool_intervals, test_intervals
def sample_selection(pred_intervals_cps, df_pool, df_train, policy = 'most_uncertain'):
df_interval_temp = pd.DataFrame(pred_intervals_cps, columns=['min_int', 'max_int'])
merged_df = df_pool.copy()
merged_df.reset_index(drop=True, inplace=True)
merged_df['max_int'] = df_interval_temp['max_int']
merged_df['min_int'] = df_interval_temp['min_int']
merged_df['diff_int'] = merged_df['max_int'] - merged_df['min_int']
merged_ave_df = merged_df.groupby('vehicle_id').agg(Ave_intervals=('diff_int', 'mean')).reset_index()
if policy == 'most_uncertain':
merged_ave_df_sorted = merged_ave_df.sort_values(by='Ave_intervals', ascending=False)
df_int_certainty = merged_ave_df_sorted.head(num_vehicle_selection)
# remove these "num_vehicle_selection" samples from pool
selected_vehicles = df_int_certainty['vehicle_id']
elif policy == 'most_uncertain_roulette':
merged_ave_df['probability'] = merged_ave_df['Ave_intervals'] / merged_ave_df['Ave_intervals'].sum()
selected_vehicles = merged_ave_df.sample(n=num_vehicle_selection, weights='probability', random_state=random_state)['vehicle_id']
elif policy == 'most_certain_roulette':
merged_ave_df['flipped_Ave_intervals'] = (merged_ave_df['Ave_intervals'].max() + 1 - merged_ave_df['Ave_intervals'])
merged_ave_df['probability'] = merged_ave_df['flipped_Ave_intervals'] / merged_ave_df['flipped_Ave_intervals'].sum()
selected_vehicles = merged_ave_df.sample(n=num_vehicle_selection, weights='probability', random_state=random_state)['vehicle_id']
elif policy == 'most_certain':
merged_ave_df_sorted = merged_ave_df.sort_values(by='Ave_intervals', ascending=True)
df_int_certainty = merged_ave_df_sorted.head(num_vehicle_selection)
# remove these "num_vehicle_selection" samples from pool
selected_vehicles = df_int_certainty['vehicle_id']
elif policy == 'random':
unique_vehicles = df_pool.vehicle_id.unique()
unique_vehicles = pd.DataFrame({'vehicle_id':unique_vehicles})
selected_vehicles = unique_vehicles.sample(n=num_vehicle_selection, random_state=random_state)
selected_vehicles = selected_vehicles['vehicle_id']
pool_new_df = df_pool[~df_pool['vehicle_id'].isin(selected_vehicles)]
# add these "num_vehicle_selection" samples to train
df_pool_selected_vehicles = df_pool[df_pool['vehicle_id'].isin(selected_vehicles)]
train_new_df = pd.concat([df_pool_selected_vehicles, df_train])
return train_new_df, pool_new_df
def evaluate_regressor(model):
pred = model.predict(X_test)
rmse = evaluation_metric(y_test, pred, squared=False)
return rmse
def split_df(df):
df = df.sample(frac=1, random_state=random_state)
# Group by 'Group' and get unique groups
unique_groups = df['vehicle_id'].unique()
# Calculate sizes for each part
total_groups = len(unique_groups)
pool_size = int(pool_percentage * total_groups)
train_size = int(train_percentage * total_groups)
test_size = int(test_percentage * total_groups)
calibration_size = int(cal_percentage * total_groups)
# Divide the unique groups into parts
parts = [unique_groups[:pool_size],
unique_groups[pool_size:pool_size + train_size],
unique_groups[pool_size + train_size:pool_size + train_size + test_size],
unique_groups[pool_size + train_size + test_size:]]
# Create DataFrames for each part
part_dfs = []
for part in parts:
part_df = df[df['vehicle_id'].isin(part)]
part_dfs.append(part_df)
return part_dfs[0], part_dfs[1], part_dfs[2], part_dfs[3]
"""Download the data here:
https://snd.se/en/catalogue/dataset/2024-34
"""
train_op = pd.read_csv("your address here/train_operational_readouts.csv")
train_tte = pd.read_csv("your address here/train_tte.csv")
train_spec = pd.read_csv("your address here/train_specification.csv")
data_op_tte = join_op_tte_prep2(train_op, train_tte)
######################## One random readout for each vehicle ######################
# Group the DataFrame by 'vehicle_id' and sample a random readout per vehicle
# data_op_tte = data_op_tte.groupby('vehicle_id').apply(lambda x: x.sample(n=1)).reset_index(drop=True)
results_mu = []
results_mc = []
results_mur = []
results_mcr = []
results_rnd = []
intervals_mu = []
intervals_mc = []
intervals_mur = []
intervals_mcr = []
intervals_rnd = []
figure_counter = 1
for round in range(num_rounds):
random_state = round + 1
print('\n\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> round: ', round)
# Flush the output
sys.stdout.flush()
df_pool, df_train, df_test, df_cal = split_df(data_op_tte)
X_test, y_test = X_y_split(df_test)
X_cal, y_cal = X_y_split(df_cal)
X_cal = X_cal.values.astype(float)
y_cal = y_cal.values.ravel()
df_train_ur = df_train.copy()
df_pool_ur = df_pool.copy()
df_train_c = df_train.copy()
df_pool_c = df_pool.copy()
df_train_cr = df_train.copy()
df_pool_cr = df_pool.copy()
df_train_rand = df_train.copy()
df_pool_rand = df_pool.copy()
mae_least_conf = []
mae_most_conf = []
mae_most_conf_roulette = []
mae_least_conf_roulette = []
mae_random = []
test_int_u = []
test_int_ur = []
test_int_c = []
test_int_cr = []
test_int_rand = []
for iter in range(al_iterations_count):
print('iter: ', iter, ' #####################################################################################################################')
# Flush the output
sys.stdout.flush()
########################################### Random ######################################
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
_ , test_intervals_rand = conformal_prediction(model, df_train_rand, df_pool_rand, regressor = CP)
average_test_inter_rand = (test_intervals_rand[:, 1] - test_intervals_rand[:, 0]).mean()
test_int_rand.append(average_test_inter_rand)
MAE_rand = evaluate_regressor(model)
print('RMSE_rand: ', MAE_rand, ', X_train_rand.shape[0]:', df_train_rand.shape[0], ', vehicles in X_train_rand:', df_train_rand.vehicle_id.nunique(), ', X_pool_rand.shape[0]:', df_pool_rand.shape[0], 'vehicles in X_pool:', df_pool_rand.vehicle_id.nunique())
mae_random.append(MAE_rand)
df_train_rand, df_pool_rand = sample_selection(None, df_pool_rand, df_train_rand,
policy = 'random')
###################################### Most Uncertain ######################################
model = xgb.XGBRegressor(random_state=random_state)
#
# model = RandomForestRegressor(random_state=random_state)
pred_intervals_cps, test_intervals_u = conformal_prediction(model, df_train, df_pool, regressor = CP)
average_test_inter_u = (test_intervals_u[:, 1] - test_intervals_u[:, 0]).mean()
test_int_u.append(average_test_inter_u)
MAE_u = evaluate_regressor(model)
print('RMSE_most_uncertain: ', MAE_u, ', X_train.shape[0]:', df_train.shape[0], ', vehicles in X_train:',
df_train.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool.shape[0], 'vehicles in X_pool:',
df_pool.vehicle_id.nunique())
mae_least_conf.append(MAE_u)
df_train, df_pool= sample_selection(pred_intervals_cps,
df_pool,
df_train,
policy='most_uncertain')
###################################### Unertain roulette ######################################
model = xgb.XGBRegressor(random_state=random_state)
#
# model = RandomForestRegressor(random_state=random_state)
pred_intervals_cps_ur, test_intervals_ur = conformal_prediction(model, df_train_ur, df_pool_ur, regressor = CP)
average_test_inter_ur = (test_intervals_ur[:, 1] - test_intervals_ur[:, 0]).mean()
test_int_ur.append(average_test_inter_ur)
MAE_ur = evaluate_regressor(model)
print('RMSE_most_uncertain_roulette: ', MAE_ur, ', X_train.shape[0]:', df_train_ur.shape[0], ', vehicles in X_train:',
df_train_ur.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_ur.shape[0], 'vehicles in X_pool:',
df_pool_ur.vehicle_id.nunique())
mae_least_conf_roulette.append(MAE_ur)
df_train_ur, df_pool_ur = sample_selection(pred_intervals_cps_ur,
df_pool_ur,
df_train_ur,
policy='most_uncertain_roulette')
###################################### Most certain ######################################
model = xgb.XGBRegressor(random_state=random_state)
#
# model = RandomForestRegressor(random_state=random_state)
pred_intervals_cps_c, test_intervals_c = conformal_prediction(model, df_train_c, df_pool_c, regressor = CP)
average_test_inter_c = (test_intervals_c[:, 1] - test_intervals_c[:, 0]).mean()
test_int_c.append(average_test_inter_c)
MAE_c = evaluate_regressor(model)
print('RMSE_most_certain: ', MAE_c, ', X_train.shape[0]:', df_train_c.shape[0], ', vehicles in X_train:', df_train_c.vehicle_id.nunique() ,', X_pool.shape[0]:', df_pool_c.shape[0],'vehicles in X_pool:', df_pool_c.vehicle_id.nunique())
mae_most_conf.append(MAE_c)
df_train_c, df_pool_c= sample_selection(pred_intervals_cps_c,
df_pool_c,
df_train_c,
policy = 'most_certain')
###################################### certain roulette ######################################
model = xgb.XGBRegressor(random_state=random_state)
#
# model =RandomForestRegressor(random_state=random_state)
pred_intervals_cps_cr, test_intervals_cr = conformal_prediction(model, df_train_cr, df_pool_cr, regressor = CP)
average_test_inter_cr = (test_intervals_cr[:, 1] - test_intervals_cr[:, 0]).mean()
test_int_cr.append(average_test_inter_cr)
MAE_cr = evaluate_regressor(model)
print('RMSE_most_certain_roulette: ', MAE_cr, ', X_train.shape[0]:', df_train_cr.shape[0], ', vehicles in X_train:',
df_train_cr.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_cr.shape[0], 'vehicles in X_pool:',
df_pool_cr.vehicle_id.nunique())
mae_most_conf_roulette.append(MAE_cr)
df_train_cr, df_pool_cr = sample_selection(pred_intervals_cps_cr,
df_pool_cr,
df_train_cr,
policy='most_certain_roulette')
print('iter: Last', ' #####################################################################################################################')
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
_ , test_int = conformal_prediction(model, df_train_rand, df_pool_rand, regressor = CP)
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
test_int_rand.append(average_test_inter)
MAE_rand = evaluate_regressor(model)
print('RMSE_rand: ', MAE_rand, ', X_train_rand.shape[0]:', df_train_rand.shape[0], ', vehicles in X_train_rand:', df_train_rand.vehicle_id.nunique(), ', X_pool_rand.shape[0]:', df_pool_rand.shape[0], 'vehicles in X_pool:', df_pool_rand.vehicle_id.nunique())
mae_random.append(MAE_rand)
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
_ , test_int = conformal_prediction(model, df_train, df_pool, regressor = CP)
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
test_int_u.append(average_test_inter)
MAE_u = evaluate_regressor(model)
print('RMSE_most_uncertain: ', MAE_u, ', X_train.shape[0]:', df_train.shape[0], ', vehicles in X_train:',
df_train.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool.shape[0], 'vehicles in X_pool:',
df_pool.vehicle_id.nunique())
mae_least_conf.append(MAE_u)
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
_ , test_int = conformal_prediction(model, df_train_ur, df_pool_ur, regressor = CP)
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
test_int_ur.append(average_test_inter)
MAE_ur = evaluate_regressor(model)
print('RMSE_most_uncertain_roulette: ', MAE_ur, ', X_train.shape[0]:', df_train_ur.shape[0],
', vehicles in X_train:',
df_train_ur.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_ur.shape[0], 'vehicles in X_pool:',
df_pool_ur.vehicle_id.nunique())
mae_least_conf_roulette.append(MAE_ur)
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
_ , test_int = conformal_prediction(model,df_train_c, df_pool_c, regressor = CP)
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
test_int_c.append(average_test_inter)
MAE_c = evaluate_regressor(model)
print('RMSE_most_certain: ', MAE_c, ', X_train.shape[0]:', df_train_c.shape[0], ', vehicles in X_train:',
df_train_c.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_c.shape[0], 'vehicles in X_pool:',
df_pool_c.vehicle_id.nunique())
mae_most_conf.append(MAE_c)
model = xgb.XGBRegressor(random_state=random_state)
# model = RandomForestRegressor(random_state=random_state)
_ , test_int = conformal_prediction(model, df_train_cr, df_pool_cr, regressor = CP)
average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean()
test_int_cr.append(average_test_inter)
MAE_cr = evaluate_regressor(model)
print('RMSE_most_certain_roulette: ', MAE_cr, ', X_train.shape[0]:', df_train_cr.shape[0], ', vehicles in X_train:',
df_train_cr.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_cr.shape[0], 'vehicles in X_pool:',
df_pool_cr.vehicle_id.nunique())
mae_most_conf_roulette.append(MAE_cr)
sizes_list = list(range(df_train_rand.vehicle_id.nunique(), 0, -num_vehicle_selection))
sizes_array = np.array(sizes_list)
sizes_array.sort() # Sort the list in ascending order
# Plotting MAE values with dataset size on secondary x-axis
plt.figure()
# Plot MAE values
plt.plot(range(al_iterations_count + 1), mae_least_conf, label='least confident')
plt.plot(range(al_iterations_count + 1), mae_least_conf_roulette, label='least confident roulette')
plt.plot(range(al_iterations_count + 1), mae_most_conf, label='most confident')
plt.plot(range(al_iterations_count + 1), mae_most_conf_roulette, label='most confident roulette')
plt.plot(range(al_iterations_count + 1), mae_random, '-.', label='random selection')
# Add dataset size labels on the secondary x-axis
ax = plt.gca()
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(range(al_iterations_count + 1))
ax2.set_xticklabels(sizes_array)
# Set labels for both x-axes
ax.set_xlabel("Active Learning Iterations")
ax2.set_xlabel("Number of vehicles in the train set")
ax.set_ylabel("RMSE")
plt.title('Active Learning round ' + str(round))
ax.legend()
figure_counter = figure_counter + 1
# plot test interval levels
plt.figure()
plt.plot(range(al_iterations_count + 1), test_int_u, label='most uncertain')
plt.plot(range(al_iterations_count + 1), test_int_ur, label='most uncertain roulette')
plt.plot(range(al_iterations_count + 1), test_int_c, label='most certain')
plt.plot(range(al_iterations_count + 1), test_int_cr, label='most certain roulette')
plt.plot(range(al_iterations_count + 1), test_int_rand, '-.', label='random selection')
# Add dataset size labels on the secondary x-axis
ax = plt.gca()
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(range(al_iterations_count + 1))
ax2.set_xticklabels(sizes_array)
# Set labels for both x-axes
ax.set_xlabel("Active Learning Iterations")
ax2.set_xlabel("Number of vehicles in the train set")
ax.set_ylabel("Average Test Intervals")
plt.title('Active Learning round ' + str(round))
ax.legend()
figure_counter = figure_counter + 1
results_mu.append(mae_least_conf)
results_mur.append(mae_least_conf_roulette)
results_mc.append(mae_most_conf)
results_mcr.append(mae_most_conf_roulette)
results_rnd.append(mae_random)
intervals_mu.append(test_int_u)
intervals_mur.append(test_int_ur)
intervals_mc.append(test_int_c)
intervals_mcr.append(test_int_cr)
intervals_rnd.append(test_int_rand)
final_res = pd.DataFrame({'mu':results_mu, 'mur':results_mur,
'mc':results_mc, 'mcr': results_mcr,
'rnd':results_rnd})
# final_res = pd.DataFrame({'mu':results_mu, 'mur':results_mur, 'rnd':results_rnd})
mu_df = pd.DataFrame(final_res.mu.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
mur_df = pd.DataFrame(final_res.mur.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
mc_df = pd.DataFrame(final_res.mc.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)])
mcr_df = pd.DataFrame(final_res.mcr.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)])
rnd_df = pd.DataFrame(final_res.rnd.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)])
final_intervals = pd.DataFrame({'mu':intervals_mu, 'mur':intervals_mur,
'mc':intervals_mc, 'mcr': intervals_mcr,
'rnd':intervals_rnd})
mu_interval_df = pd.DataFrame(final_intervals.mu.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1 )])
mur_interval_df = pd.DataFrame(final_intervals.mur.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
mc_interval_df = pd.DataFrame(final_intervals.mc.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
mcr_interval_df = pd.DataFrame(final_intervals.mcr.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)])
rnd_interval_df = pd.DataFrame(final_intervals.rnd.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count +1)])
x = range(al_iterations_count+1)
plt.figure()
def plot_one_curve(df, label, color):
y = np.array(df.mean().to_list())
deviation = np.array(df.std().to_list())
plt.plot(x, y, label=label)
plt.fill_between(x, y - deviation, y + deviation, color=color, alpha=0.1)
plot_one_curve(mu_df, 'most uncertain', 'blue')
plot_one_curve(mur_df, 'most uncertain roulette', 'red')
plot_one_curve(mc_df, 'most certain', 'green')
plot_one_curve(mcr_df, 'most certain roulette', 'magenta')
plot_one_curve(rnd_df, 'random selection', 'purple')
# Add dataset size labels on the secondary x-axis
ax = plt.gca()
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(range(al_iterations_count + 1))
ax2.set_xticklabels(sizes_array)
# Set labels for both x-axes
ax.set_xlabel("Active Learning Iterations")
ax2.set_xlabel("Number of vehicles in the train set")
ax.set_ylabel("RMSE")
plt.title('Active Learning')
ax.legend()
figure_counter = figure_counter + 1
x = range(al_iterations_count + 1)
plt.figure()
plot_one_curve(mu_interval_df, 'most uncertain', 'blue')
plot_one_curve(mur_interval_df, 'most uncertain roulette', 'red')
plot_one_curve(mc_interval_df, 'most certain', 'green')
plot_one_curve(mcr_interval_df, 'most certain roulette', 'magenta')
plot_one_curve(rnd_interval_df, 'random selection', 'purple')
ax = plt.gca()
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(range(al_iterations_count + 1))
ax2.set_xticklabels(sizes_array)
# Set labels for both x-axes
ax.set_xlabel("Active Learning Iterations")
ax2.set_xlabel("Number of vehicles in the train set")
ax.set_ylabel("Prediction Interval for Test set")
plt.title('Active Learning')
ax.legend()
figure_counter = figure_counter + 1
plt.show()