EXTREMUM_web/base/views_backup.py

352 lines
12 KiB
Python

def home(request):
# request.session.flush()
if "fig" in request.session:
fig = request.session.get("fig")
else:
fig = None
if "fig2" in request.session:
fig2 = request.session.get("fig2")
else:
fig2 = None
if "pca" in request.session:
pca = request.session.get("pca")
else:
pca = None
if "tsne" in request.session:
tsne = request.session.get("tsne")
else:
tsne = None
if "cfrow_og" in request.session:
cfrow_og = request.session.get("cfrow_og")
else:
cfrow_og = None
if "cfrow_cf" in request.session:
cfrow_cf = request.session.get("cfrow_cf")
else:
cfrow_cf = None
if "cfdf_rows" in request.session:
cfdf_rows = request.session.get("cfdf_rows")
else:
cfdf_rows = None
if "clas_report" in request.session:
clas_report = request.session.get("clas_report")
else:
clas_report = None
if "excel_file_name" in request.session:
excel_file_name = request.session.get("excel_file_name")
else:
# name for default dataset. If session variable
# is not set that means there was never a csv
# UPLOAD and thus the default dataset should used
excel_file_name = FILE_NAME
request.session["excel_file_name"] = excel_file_name
if "excel_file_name_preprocessed" in request.session:
excel_file_name_preprocessed = request.session.get(
"excel_file_name_preprocessed"
)
else:
excel_file_name_preprocessed = PROCESS_FILE_NAME
request.session["excel_file_name_preprocessed"] = excel_file_name_preprocessed
# ajax request condition
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
return ajax_requests(request.POST.get("action"), request)
df = pd.DataFrame()
if request.method == "POST":
if "csv" in request.POST:
excel_file = request.FILES["excel_file"]
excel_file_name = request.FILES["excel_file"].name
fig = None
fig2 = None
pca = None
tsne = None
clas_report = None
cfrow_cf = None
cfrow_og = None
# here we dont use the name of the file since the
# uploaded file is not yet saved
# In every other case we just need the name
# fs = FileSystemStorage() # defaults to MEDIA_ROOT
request.session["excel_file_name"] = excel_file_name
# fs.save(excel_file_name, excel_file)
df = pd.read_csv(excel_file)
# df.drop(["id"], axis=1, inplace=True)
df.to_csv(excel_file_name, index=False)
feature1 = df.columns[3]
feature2 = df.columns[2]
request.session["feature1"] = feature1
request.session["feature2"] = feature2
fig = stats(
excel_file_name,
request.session["feature1"],
request.session["feature2"],
)
elif "plot" in request.POST:
feature1 = request.POST.get("feature1")
feature2 = request.POST.get("feature2")
request.session["feature1"] = feature1
request.session["feature2"] = feature2
fig = stats(
excel_file_name,
request.session["feature1"],
request.session["feature2"],
)
elif "traintest" in request.POST:
mode = request.POST.get("colorRadio")
model = request.POST.get("model")
test_size = float(request.POST.get("split_input"))
request.session["model"] = model
if mode == "train":
if model == "logit":
con = training(excel_file_name_preprocessed, "logit", test_size)
elif model == "xgb":
con = training(excel_file_name_preprocessed, "xgb", test_size)
elif model == "dt":
con = training(excel_file_name_preprocessed, "dt", test_size)
elif model == "svm":
con = training(excel_file_name_preprocessed, "svm", test_size)
elif model == "rf":
con = training(excel_file_name_preprocessed, "rf", test_size)
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif mode == "test":
if model == "logit":
con = testing(excel_file_name_preprocessed, "logit")
elif model == "xgb":
con = testing(excel_file_name_preprocessed, "xgb")
elif model == "dt":
con = testing(excel_file_name_preprocessed, "dt")
elif model == "svm":
con = testing(excel_file_name_preprocessed, "svm")
elif model == "rf":
con = training(excel_file_name_preprocessed, "rf", test_size)
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif "preprocess" in request.POST:
value_list = request.POST.getlist("boxes")
# if file for preprocessing does not exist create it
# also apply basic preprocessing
if os.path.exists(excel_file_name_preprocessed) == False:
# generate filename
idx = excel_file_name.index(".")
excel_file_name_preprocessed = (
excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
)
# save file for preprocessing
preprocess_df = pd.read_csv(excel_file_name)
request.session["excel_file_name_preprocessed"] = (
excel_file_name_preprocessed
)
preprocess_df.drop(
["perimeter_mean", "area_mean"], axis=1, inplace=True
)
preprocess_df.drop(
["perimeter_worst", "area_worst"], axis=1, inplace=True
)
preprocess_df.drop(["perimeter_se", "area_se"], axis=1, inplace=True)
preprocess_df.drop(
[
"radius_worst",
"concave_points_mean",
"texture_worst",
"symmetry_worst",
"smoothness_worst",
],
axis=1,
inplace=True,
)
le = LabelEncoder()
preprocess_df["diagnosis"] = le.fit_transform(
preprocess_df["diagnosis"]
)
preprocess_df.to_csv(excel_file_name_preprocessed, index=False)
else:
preprocess_df = pd.read_csv(excel_file_name_preprocessed)
preprocess(preprocess_df, value_list, excel_file_name_preprocessed)
preprocess_df.drop(["id"], axis=1, inplace=True)
# PCA
pca = PCA()
pca.fit(preprocess_df.loc[:, "radius_mean":])
exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
pca = px.area(
x=range(1, exp_var_cumul.shape[0] + 1),
y=exp_var_cumul,
labels={"x": "# Components", "y": "Explained Variance"},
).to_html()
# tSNE
tsne = TSNE(n_components=2, random_state=0)
projections = tsne.fit_transform(
preprocess_df.drop(["diagnosis"], axis=1).values
)
tsne_df = pd.DataFrame(
{
"0": projections[:, 0],
"1": projections[:, 1],
"diagnosis": preprocess_df["diagnosis"],
}
)
tsne = px.scatter(
tsne_df,
x="0",
y="1",
color="diagnosis",
color_continuous_scale=px.colors.sequential.Rainbow,
)
tsne.update_layout(clickmode="event+select")
# tsne_opacity.update_layout(clickmode="event+select")
pickle.dump(tsne, open("tsne.sav", "wb"))
tsne = tsne.to_html()
request.session["tsne_projection"] = projections.tolist()
elif "cf" in request.POST:
excel_file_name_preprocessed = request.session.get(
"excel_file_name_preprocessed"
)
df = pd.read_csv(excel_file_name_preprocessed)
df_id = request.session.get("cfrow_id")
model = request.session.get("model")
row = df.iloc[[int(df_id)]]
counterfactuals(row, model, excel_file_name_preprocessed)
# get coordinates of the clicked point (saved during the actual click)
clicked_point = request.session.get("clicked_point")
clicked_point_df = pd.DataFrame(
{
"0": clicked_point[0],
"1": clicked_point[1],
"diagnosis": row.diagnosis,
}
)
clicked_point_df.reset_index(drop=True)
# tSNE
cf_df = pd.read_csv("counterfactuals.csv")
# get rows count
request.session["cfdf_rows"] = cf_df.index.values.tolist()
# select a cf randomly for demonstration
request.session["cfrow_cf"] = cf_df.iloc[:1].to_html()
df_merged = pd.concat(
[cf_df, df.drop("id", axis=1)], ignore_index=True, axis=0
)
tsne_cf = TSNE(n_components=2, random_state=0)
projections = tsne_cf.fit_transform(
df_merged.drop(["diagnosis"], axis=1).values
)
cf_df = pd.DataFrame(
{
"0": projections[:3, 0],
"1": projections[:3, 1],
"diagnosis": cf_df.diagnosis.iloc[:3],
}
)
cf_df = pd.concat([cf_df, clicked_point_df], ignore_index=True, axis=0)
tsne = joblib.load("tsne.sav")
cf_s = px.scatter(
cf_df,
x="0",
y="1",
color="diagnosis",
color_continuous_scale=px.colors.sequential.Rainbow,
)
cf_s.update_traces(
marker=dict(
size=10,
symbol="circle",
)
)
tsne.add_trace(cf_s.data[0])
pickle.dump(tsne, open("tsne_cfs.sav", "wb"))
tsne = tsne.to_html()
else:
if os.path.exists(excel_file_name) == False:
excel_file_name = "dataset.csv"
request.session["excel_file_name"] = excel_file_name
df = pd.read_csv(excel_file_name)
# just random columns to plot
feature1 = df.columns[3]
feature2 = df.columns[2]
request.session["feature1"] = feature1
request.session["feature2"] = feature2
fig = stats(
excel_file_name, request.session["feature1"], request.session["feature2"]
)
if df.empty:
df = pd.read_csv(excel_file_name)
request.session["fig"] = fig
request.session["fig2"] = fig2
request.session["pca"] = pca
request.session["tsne"] = tsne
request.session["clas_report"] = clas_report
data_to_display = df[:10].to_html(index=False)
request.session["data_to_display"] = data_to_display
labels = df.columns[2:]
context = {
"data_to_display": data_to_display,
"excel_file": excel_file_name,
"labels": labels,
"fig": fig,
"fig2": fig2,
"feature1": request.session["feature1"],
"feature2": request.session["feature2"],
"clas_report": clas_report,
"pca": pca,
"tsne": tsne,
"cfrow_og": cfrow_og,
"cfrow_cf": cfrow_cf,
"cfdf_rows": cfdf_rows,
}