""" Author Zahra Kharazian, zahra.kharazian@dsv.su.se This code implements the CoPAL algorithm that employs conformal prediction in active learning for regression tasks. Suitable for multi-variate time series data """ import random import numpy as np import pandas as pd from sklearn.ensemble import RandomForestRegressor import xgboost as xgb from crepes import WrapRegressor import matplotlib.pyplot as plt import warnings warnings.filterwarnings("ignore") from crepes import ConformalRegressor, ConformalPredictiveSystem from crepes.extras import binning, DifficultyEstimator from sklearn.metrics import mean_absolute_error, mean_squared_error import sys random_state = 1 data = 'componentX' model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) CP = 'norm_Mondrian_CPS' # CP = 'std_Mondrian_CPS' # CP = 'Mondrian_regressor' evaluation_metric = mean_squared_error num_vehicle_selection = 226 al_iterations_count = 6 data_fraction = 1 num_rounds = 5 train_percentage = 0.01 test_percentage = 0.25 cal_percentage = 0.14 pool_percentage = 0.6 np.random.seed(random_state) # If all the readouts are needed for training def join_op_tte_prep2(data_op, data_tte): data_tte = data_tte.sample(frac=data_fraction) data_tte = data_tte[data_tte['in_study_repair'] == 1] data_op = data_op[data_op['vehicle_id'].isin(data_tte['vehicle_id'])] data_op_tte = pd.merge(data_op, data_tte, on=['vehicle_id'], how='left') data_op_tte['RUL'] = data_op_tte['length_of_study_time_step'] - data_op_tte['time_step'] data_op_tte = data_op_tte.dropna() return data_op_tte def X_y_split(df): y = df[['RUL']] X = df.drop(columns=['RUL', 'vehicle_id', 'length_of_study_time_step', 'time_step', 'in_study_repair']) return X, y def conformal_prediction(model, df_train, df_pool, regressor='Mondrian_regressor'): # This only works with RF but not with other models X_train, y_train = X_y_split(df_train) X_train = X_train.values.astype(float) y_train = y_train.values.ravel() X_pool, y_pool = X_y_split(df_pool) X_pool = X_pool.values.astype(float) y_pool = y_pool.values.ravel() mdl = WrapRegressor(model) mdl.fit(X_train, y_train) de = DifficultyEstimator() de.fit(X_train, y=y_train) sigmas_cal = de.apply(X_cal) sigmas_pool = de.apply(X_pool) sigmas_test = de.apply(X_test) y_hat_cal = mdl.predict(X_cal) residuals_cal = y_cal - y_hat_cal y_hat_pool = mdl.predict(X_pool) y_hat_test = mdl.predict(X_test) if regressor == 'Mondrian_regressor': bins_cal, bin_thresholds = binning(sigmas_cal, bins=10) mdl.calibrate(X_cal, y_cal, bins=bins_cal) bins_pool = binning(sigmas_pool, bins=bin_thresholds) pool_intervals = mdl.predict_int(X_pool, bins=bins_pool) bins_test = binning(sigmas_test, bins=bin_thresholds) test_intervals = mdl.predict_int(X_test, bins=bins_test) elif regressor == 'norm_Mondrian_CPS': bins_cal, bin_thresholds = binning(y_hat_cal, bins=5) mdl.calibrate(X_cal, y_cal, sigmas=sigmas_cal, bins=bins_cal, cps=True) bins_pool = binning(mdl.predict(X_pool), bins=bin_thresholds) cps_mond_norm = ConformalPredictiveSystem().fit(residuals_cal, sigmas=sigmas_cal, bins=bins_cal) pool_intervals = cps_mond_norm.predict(y_hat_pool, sigmas=sigmas_pool, bins=bins_pool, lower_percentiles=2.5, higher_percentiles=97.5) bins_test = binning(mdl.predict(X_test), bins=bin_thresholds) test_intervals = cps_mond_norm.predict(y_hat_test, sigmas=sigmas_test, bins=bins_test, lower_percentiles=2.5, higher_percentiles=97.5) elif regressor == 'std_Mondrian_CPS': bins_cal, bin_thresholds = binning(y_hat_cal, bins=5) mdl.calibrate(X_cal, y_cal, sigmas=sigmas_cal, bins=bins_cal, cps=True) bins_pool = binning(mdl.predict(X_pool), bins=bin_thresholds) cps_mond_std = ConformalPredictiveSystem().fit(residuals_cal, bins=bins_cal) pool_intervals = cps_mond_std.predict(y_hat_pool, sigmas=sigmas_pool, bins=bins_pool, lower_percentiles=2.5, higher_percentiles=97.5) bins_test = binning(mdl.predict(X_test), bins=bin_thresholds) test_intervals = cps_mond_std.predict(y_hat_test, sigmas=sigmas_test, bins=bins_test, lower_percentiles=2.5, higher_percentiles=97.5) return pool_intervals, test_intervals def sample_selection(pred_intervals_cps, df_pool, df_train, policy = 'most_uncertain'): df_interval_temp = pd.DataFrame(pred_intervals_cps, columns=['min_int', 'max_int']) merged_df = df_pool.copy() merged_df.reset_index(drop=True, inplace=True) merged_df['max_int'] = df_interval_temp['max_int'] merged_df['min_int'] = df_interval_temp['min_int'] merged_df['diff_int'] = merged_df['max_int'] - merged_df['min_int'] merged_ave_df = merged_df.groupby('vehicle_id').agg(Ave_intervals=('diff_int', 'mean')).reset_index() if policy == 'most_uncertain': merged_ave_df_sorted = merged_ave_df.sort_values(by='Ave_intervals', ascending=False) df_int_certainty = merged_ave_df_sorted.head(num_vehicle_selection) # remove these "num_vehicle_selection" samples from pool selected_vehicles = df_int_certainty['vehicle_id'] elif policy == 'most_uncertain_roulette': merged_ave_df['probability'] = merged_ave_df['Ave_intervals'] / merged_ave_df['Ave_intervals'].sum() selected_vehicles = merged_ave_df.sample(n=num_vehicle_selection, weights='probability', random_state=random_state)['vehicle_id'] elif policy == 'most_certain_roulette': merged_ave_df['flipped_Ave_intervals'] = (merged_ave_df['Ave_intervals'].max() + 1 - merged_ave_df['Ave_intervals']) merged_ave_df['probability'] = merged_ave_df['flipped_Ave_intervals'] / merged_ave_df['flipped_Ave_intervals'].sum() selected_vehicles = merged_ave_df.sample(n=num_vehicle_selection, weights='probability', random_state=random_state)['vehicle_id'] elif policy == 'most_certain': merged_ave_df_sorted = merged_ave_df.sort_values(by='Ave_intervals', ascending=True) df_int_certainty = merged_ave_df_sorted.head(num_vehicle_selection) # remove these "num_vehicle_selection" samples from pool selected_vehicles = df_int_certainty['vehicle_id'] elif policy == 'random': unique_vehicles = df_pool.vehicle_id.unique() unique_vehicles = pd.DataFrame({'vehicle_id':unique_vehicles}) selected_vehicles = unique_vehicles.sample(n=num_vehicle_selection, random_state=random_state) selected_vehicles = selected_vehicles['vehicle_id'] pool_new_df = df_pool[~df_pool['vehicle_id'].isin(selected_vehicles)] # add these "num_vehicle_selection" samples to train df_pool_selected_vehicles = df_pool[df_pool['vehicle_id'].isin(selected_vehicles)] train_new_df = pd.concat([df_pool_selected_vehicles, df_train]) return train_new_df, pool_new_df def evaluate_regressor(model): pred = model.predict(X_test) rmse = evaluation_metric(y_test, pred, squared=False) return rmse def split_df(df): df = df.sample(frac=1, random_state=random_state) # Group by 'Group' and get unique groups unique_groups = df['vehicle_id'].unique() # Calculate sizes for each part total_groups = len(unique_groups) pool_size = int(pool_percentage * total_groups) train_size = int(train_percentage * total_groups) test_size = int(test_percentage * total_groups) calibration_size = int(cal_percentage * total_groups) # Divide the unique groups into parts parts = [unique_groups[:pool_size], unique_groups[pool_size:pool_size + train_size], unique_groups[pool_size + train_size:pool_size + train_size + test_size], unique_groups[pool_size + train_size + test_size:]] # Create DataFrames for each part part_dfs = [] for part in parts: part_df = df[df['vehicle_id'].isin(part)] part_dfs.append(part_df) return part_dfs[0], part_dfs[1], part_dfs[2], part_dfs[3] """Download the data here: https://snd.se/en/catalogue/dataset/2024-34 """ train_op = pd.read_csv("your address here/train_operational_readouts.csv") train_tte = pd.read_csv("your address here/train_tte.csv") train_spec = pd.read_csv("your address here/train_specification.csv") data_op_tte = join_op_tte_prep2(train_op, train_tte) ######################## One random readout for each vehicle ###################### # Group the DataFrame by 'vehicle_id' and sample a random readout per vehicle # data_op_tte = data_op_tte.groupby('vehicle_id').apply(lambda x: x.sample(n=1)).reset_index(drop=True) results_mu = [] results_mc = [] results_mur = [] results_mcr = [] results_rnd = [] intervals_mu = [] intervals_mc = [] intervals_mur = [] intervals_mcr = [] intervals_rnd = [] figure_counter = 1 for round in range(num_rounds): random_state = round + 1 print('\n\n\n>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> round: ', round) # Flush the output sys.stdout.flush() df_pool, df_train, df_test, df_cal = split_df(data_op_tte) X_test, y_test = X_y_split(df_test) X_cal, y_cal = X_y_split(df_cal) X_cal = X_cal.values.astype(float) y_cal = y_cal.values.ravel() df_train_ur = df_train.copy() df_pool_ur = df_pool.copy() df_train_c = df_train.copy() df_pool_c = df_pool.copy() df_train_cr = df_train.copy() df_pool_cr = df_pool.copy() df_train_rand = df_train.copy() df_pool_rand = df_pool.copy() mae_least_conf = [] mae_most_conf = [] mae_most_conf_roulette = [] mae_least_conf_roulette = [] mae_random = [] test_int_u = [] test_int_ur = [] test_int_c = [] test_int_cr = [] test_int_rand = [] for iter in range(al_iterations_count): print('iter: ', iter, ' #####################################################################################################################') # Flush the output sys.stdout.flush() ########################################### Random ###################################### model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) _ , test_intervals_rand = conformal_prediction(model, df_train_rand, df_pool_rand, regressor = CP) average_test_inter_rand = (test_intervals_rand[:, 1] - test_intervals_rand[:, 0]).mean() test_int_rand.append(average_test_inter_rand) MAE_rand = evaluate_regressor(model) print('RMSE_rand: ', MAE_rand, ', X_train_rand.shape[0]:', df_train_rand.shape[0], ', vehicles in X_train_rand:', df_train_rand.vehicle_id.nunique(), ', X_pool_rand.shape[0]:', df_pool_rand.shape[0], 'vehicles in X_pool:', df_pool_rand.vehicle_id.nunique()) mae_random.append(MAE_rand) df_train_rand, df_pool_rand = sample_selection(None, df_pool_rand, df_train_rand, policy = 'random') ###################################### Most Uncertain ###################################### model = xgb.XGBRegressor(random_state=random_state) # # model = RandomForestRegressor(random_state=random_state) pred_intervals_cps, test_intervals_u = conformal_prediction(model, df_train, df_pool, regressor = CP) average_test_inter_u = (test_intervals_u[:, 1] - test_intervals_u[:, 0]).mean() test_int_u.append(average_test_inter_u) MAE_u = evaluate_regressor(model) print('RMSE_most_uncertain: ', MAE_u, ', X_train.shape[0]:', df_train.shape[0], ', vehicles in X_train:', df_train.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool.shape[0], 'vehicles in X_pool:', df_pool.vehicle_id.nunique()) mae_least_conf.append(MAE_u) df_train, df_pool= sample_selection(pred_intervals_cps, df_pool, df_train, policy='most_uncertain') ###################################### Unertain roulette ###################################### model = xgb.XGBRegressor(random_state=random_state) # # model = RandomForestRegressor(random_state=random_state) pred_intervals_cps_ur, test_intervals_ur = conformal_prediction(model, df_train_ur, df_pool_ur, regressor = CP) average_test_inter_ur = (test_intervals_ur[:, 1] - test_intervals_ur[:, 0]).mean() test_int_ur.append(average_test_inter_ur) MAE_ur = evaluate_regressor(model) print('RMSE_most_uncertain_roulette: ', MAE_ur, ', X_train.shape[0]:', df_train_ur.shape[0], ', vehicles in X_train:', df_train_ur.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_ur.shape[0], 'vehicles in X_pool:', df_pool_ur.vehicle_id.nunique()) mae_least_conf_roulette.append(MAE_ur) df_train_ur, df_pool_ur = sample_selection(pred_intervals_cps_ur, df_pool_ur, df_train_ur, policy='most_uncertain_roulette') ###################################### Most certain ###################################### model = xgb.XGBRegressor(random_state=random_state) # # model = RandomForestRegressor(random_state=random_state) pred_intervals_cps_c, test_intervals_c = conformal_prediction(model, df_train_c, df_pool_c, regressor = CP) average_test_inter_c = (test_intervals_c[:, 1] - test_intervals_c[:, 0]).mean() test_int_c.append(average_test_inter_c) MAE_c = evaluate_regressor(model) print('RMSE_most_certain: ', MAE_c, ', X_train.shape[0]:', df_train_c.shape[0], ', vehicles in X_train:', df_train_c.vehicle_id.nunique() ,', X_pool.shape[0]:', df_pool_c.shape[0],'vehicles in X_pool:', df_pool_c.vehicle_id.nunique()) mae_most_conf.append(MAE_c) df_train_c, df_pool_c= sample_selection(pred_intervals_cps_c, df_pool_c, df_train_c, policy = 'most_certain') ###################################### certain roulette ###################################### model = xgb.XGBRegressor(random_state=random_state) # # model =RandomForestRegressor(random_state=random_state) pred_intervals_cps_cr, test_intervals_cr = conformal_prediction(model, df_train_cr, df_pool_cr, regressor = CP) average_test_inter_cr = (test_intervals_cr[:, 1] - test_intervals_cr[:, 0]).mean() test_int_cr.append(average_test_inter_cr) MAE_cr = evaluate_regressor(model) print('RMSE_most_certain_roulette: ', MAE_cr, ', X_train.shape[0]:', df_train_cr.shape[0], ', vehicles in X_train:', df_train_cr.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_cr.shape[0], 'vehicles in X_pool:', df_pool_cr.vehicle_id.nunique()) mae_most_conf_roulette.append(MAE_cr) df_train_cr, df_pool_cr = sample_selection(pred_intervals_cps_cr, df_pool_cr, df_train_cr, policy='most_certain_roulette') print('iter: Last', ' #####################################################################################################################') model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) _ , test_int = conformal_prediction(model, df_train_rand, df_pool_rand, regressor = CP) average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean() test_int_rand.append(average_test_inter) MAE_rand = evaluate_regressor(model) print('RMSE_rand: ', MAE_rand, ', X_train_rand.shape[0]:', df_train_rand.shape[0], ', vehicles in X_train_rand:', df_train_rand.vehicle_id.nunique(), ', X_pool_rand.shape[0]:', df_pool_rand.shape[0], 'vehicles in X_pool:', df_pool_rand.vehicle_id.nunique()) mae_random.append(MAE_rand) model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) _ , test_int = conformal_prediction(model, df_train, df_pool, regressor = CP) average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean() test_int_u.append(average_test_inter) MAE_u = evaluate_regressor(model) print('RMSE_most_uncertain: ', MAE_u, ', X_train.shape[0]:', df_train.shape[0], ', vehicles in X_train:', df_train.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool.shape[0], 'vehicles in X_pool:', df_pool.vehicle_id.nunique()) mae_least_conf.append(MAE_u) model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) _ , test_int = conformal_prediction(model, df_train_ur, df_pool_ur, regressor = CP) average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean() test_int_ur.append(average_test_inter) MAE_ur = evaluate_regressor(model) print('RMSE_most_uncertain_roulette: ', MAE_ur, ', X_train.shape[0]:', df_train_ur.shape[0], ', vehicles in X_train:', df_train_ur.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_ur.shape[0], 'vehicles in X_pool:', df_pool_ur.vehicle_id.nunique()) mae_least_conf_roulette.append(MAE_ur) model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) _ , test_int = conformal_prediction(model,df_train_c, df_pool_c, regressor = CP) average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean() test_int_c.append(average_test_inter) MAE_c = evaluate_regressor(model) print('RMSE_most_certain: ', MAE_c, ', X_train.shape[0]:', df_train_c.shape[0], ', vehicles in X_train:', df_train_c.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_c.shape[0], 'vehicles in X_pool:', df_pool_c.vehicle_id.nunique()) mae_most_conf.append(MAE_c) model = xgb.XGBRegressor(random_state=random_state) # model = RandomForestRegressor(random_state=random_state) _ , test_int = conformal_prediction(model, df_train_cr, df_pool_cr, regressor = CP) average_test_inter = (test_int[:, 1] - test_int[:, 0]).mean() test_int_cr.append(average_test_inter) MAE_cr = evaluate_regressor(model) print('RMSE_most_certain_roulette: ', MAE_cr, ', X_train.shape[0]:', df_train_cr.shape[0], ', vehicles in X_train:', df_train_cr.vehicle_id.nunique(), ', X_pool.shape[0]:', df_pool_cr.shape[0], 'vehicles in X_pool:', df_pool_cr.vehicle_id.nunique()) mae_most_conf_roulette.append(MAE_cr) sizes_list = list(range(df_train_rand.vehicle_id.nunique(), 0, -num_vehicle_selection)) sizes_array = np.array(sizes_list) sizes_array.sort() # Sort the list in ascending order # Plotting MAE values with dataset size on secondary x-axis plt.figure() # Plot MAE values plt.plot(range(al_iterations_count + 1), mae_least_conf, label='least confident') plt.plot(range(al_iterations_count + 1), mae_least_conf_roulette, label='least confident roulette') plt.plot(range(al_iterations_count + 1), mae_most_conf, label='most confident') plt.plot(range(al_iterations_count + 1), mae_most_conf_roulette, label='most confident roulette') plt.plot(range(al_iterations_count + 1), mae_random, '-.', label='random selection') # Add dataset size labels on the secondary x-axis ax = plt.gca() ax2 = ax.twiny() ax2.set_xlim(ax.get_xlim()) ax2.set_xticks(range(al_iterations_count + 1)) ax2.set_xticklabels(sizes_array) # Set labels for both x-axes ax.set_xlabel("Active Learning Iterations") ax2.set_xlabel("Number of vehicles in the train set") ax.set_ylabel("RMSE") plt.title('Active Learning round ' + str(round)) ax.legend() figure_counter = figure_counter + 1 # plot test interval levels plt.figure() plt.plot(range(al_iterations_count + 1), test_int_u, label='most uncertain') plt.plot(range(al_iterations_count + 1), test_int_ur, label='most uncertain roulette') plt.plot(range(al_iterations_count + 1), test_int_c, label='most certain') plt.plot(range(al_iterations_count + 1), test_int_cr, label='most certain roulette') plt.plot(range(al_iterations_count + 1), test_int_rand, '-.', label='random selection') # Add dataset size labels on the secondary x-axis ax = plt.gca() ax2 = ax.twiny() ax2.set_xlim(ax.get_xlim()) ax2.set_xticks(range(al_iterations_count + 1)) ax2.set_xticklabels(sizes_array) # Set labels for both x-axes ax.set_xlabel("Active Learning Iterations") ax2.set_xlabel("Number of vehicles in the train set") ax.set_ylabel("Average Test Intervals") plt.title('Active Learning round ' + str(round)) ax.legend() figure_counter = figure_counter + 1 results_mu.append(mae_least_conf) results_mur.append(mae_least_conf_roulette) results_mc.append(mae_most_conf) results_mcr.append(mae_most_conf_roulette) results_rnd.append(mae_random) intervals_mu.append(test_int_u) intervals_mur.append(test_int_ur) intervals_mc.append(test_int_c) intervals_mcr.append(test_int_cr) intervals_rnd.append(test_int_rand) final_res = pd.DataFrame({'mu':results_mu, 'mur':results_mur, 'mc':results_mc, 'mcr': results_mcr, 'rnd':results_rnd}) # final_res = pd.DataFrame({'mu':results_mu, 'mur':results_mur, 'rnd':results_rnd}) mu_df = pd.DataFrame(final_res.mu.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)]) mur_df = pd.DataFrame(final_res.mur.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)]) mc_df = pd.DataFrame(final_res.mc.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)]) mcr_df = pd.DataFrame(final_res.mcr.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)]) rnd_df = pd.DataFrame(final_res.rnd.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count+1)]) final_intervals = pd.DataFrame({'mu':intervals_mu, 'mur':intervals_mur, 'mc':intervals_mc, 'mcr': intervals_mcr, 'rnd':intervals_rnd}) mu_interval_df = pd.DataFrame(final_intervals.mu.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1 )]) mur_interval_df = pd.DataFrame(final_intervals.mur.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)]) mc_interval_df = pd.DataFrame(final_intervals.mc.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)]) mcr_interval_df = pd.DataFrame(final_intervals.mcr.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count + 1)]) rnd_interval_df = pd.DataFrame(final_intervals.rnd.to_list(), columns=['itr'+str(i) for i in range(al_iterations_count +1)]) x = range(al_iterations_count+1) plt.figure() def plot_one_curve(df, label, color): y = np.array(df.mean().to_list()) deviation = np.array(df.std().to_list()) plt.plot(x, y, label=label) plt.fill_between(x, y - deviation, y + deviation, color=color, alpha=0.1) plot_one_curve(mu_df, 'most uncertain', 'blue') plot_one_curve(mur_df, 'most uncertain roulette', 'red') plot_one_curve(mc_df, 'most certain', 'green') plot_one_curve(mcr_df, 'most certain roulette', 'magenta') plot_one_curve(rnd_df, 'random selection', 'purple') # Add dataset size labels on the secondary x-axis ax = plt.gca() ax2 = ax.twiny() ax2.set_xlim(ax.get_xlim()) ax2.set_xticks(range(al_iterations_count + 1)) ax2.set_xticklabels(sizes_array) # Set labels for both x-axes ax.set_xlabel("Active Learning Iterations") ax2.set_xlabel("Number of vehicles in the train set") ax.set_ylabel("RMSE") plt.title('Active Learning') ax.legend() figure_counter = figure_counter + 1 x = range(al_iterations_count + 1) plt.figure() plot_one_curve(mu_interval_df, 'most uncertain', 'blue') plot_one_curve(mur_interval_df, 'most uncertain roulette', 'red') plot_one_curve(mc_interval_df, 'most certain', 'green') plot_one_curve(mcr_interval_df, 'most certain roulette', 'magenta') plot_one_curve(rnd_interval_df, 'random selection', 'purple') ax = plt.gca() ax2 = ax.twiny() ax2.set_xlim(ax.get_xlim()) ax2.set_xticks(range(al_iterations_count + 1)) ax2.set_xticklabels(sizes_array) # Set labels for both x-axes ax.set_xlabel("Active Learning Iterations") ax2.set_xlabel("Number of vehicles in the train set") ax.set_ylabel("Prediction Interval for Test set") plt.title('Active Learning') ax.legend() figure_counter = figure_counter + 1 plt.show()