352 lines
12 KiB
Python
352 lines
12 KiB
Python
def home(request):
|
|
# request.session.flush()
|
|
if "fig" in request.session:
|
|
fig = request.session.get("fig")
|
|
else:
|
|
fig = None
|
|
|
|
if "fig2" in request.session:
|
|
fig2 = request.session.get("fig2")
|
|
else:
|
|
fig2 = None
|
|
|
|
if "pca" in request.session:
|
|
pca = request.session.get("pca")
|
|
else:
|
|
pca = None
|
|
|
|
if "tsne" in request.session:
|
|
tsne = request.session.get("tsne")
|
|
else:
|
|
tsne = None
|
|
|
|
if "cfrow_og" in request.session:
|
|
cfrow_og = request.session.get("cfrow_og")
|
|
else:
|
|
cfrow_og = None
|
|
|
|
if "cfrow_cf" in request.session:
|
|
cfrow_cf = request.session.get("cfrow_cf")
|
|
else:
|
|
cfrow_cf = None
|
|
|
|
if "cfdf_rows" in request.session:
|
|
cfdf_rows = request.session.get("cfdf_rows")
|
|
else:
|
|
cfdf_rows = None
|
|
|
|
if "clas_report" in request.session:
|
|
clas_report = request.session.get("clas_report")
|
|
else:
|
|
clas_report = None
|
|
|
|
if "excel_file_name" in request.session:
|
|
excel_file_name = request.session.get("excel_file_name")
|
|
else:
|
|
# name for default dataset. If session variable
|
|
# is not set that means there was never a csv
|
|
# UPLOAD and thus the default dataset should used
|
|
excel_file_name = FILE_NAME
|
|
request.session["excel_file_name"] = excel_file_name
|
|
|
|
if "excel_file_name_preprocessed" in request.session:
|
|
excel_file_name_preprocessed = request.session.get(
|
|
"excel_file_name_preprocessed"
|
|
)
|
|
else:
|
|
excel_file_name_preprocessed = PROCESS_FILE_NAME
|
|
request.session["excel_file_name_preprocessed"] = excel_file_name_preprocessed
|
|
|
|
# ajax request condition
|
|
if request.headers.get("X-Requested-With") == "XMLHttpRequest":
|
|
return ajax_requests(request.POST.get("action"), request)
|
|
|
|
df = pd.DataFrame()
|
|
if request.method == "POST":
|
|
if "csv" in request.POST:
|
|
|
|
excel_file = request.FILES["excel_file"]
|
|
excel_file_name = request.FILES["excel_file"].name
|
|
|
|
fig = None
|
|
fig2 = None
|
|
pca = None
|
|
tsne = None
|
|
clas_report = None
|
|
cfrow_cf = None
|
|
cfrow_og = None
|
|
|
|
# here we dont use the name of the file since the
|
|
# uploaded file is not yet saved
|
|
# In every other case we just need the name
|
|
|
|
# fs = FileSystemStorage() # defaults to MEDIA_ROOT
|
|
request.session["excel_file_name"] = excel_file_name
|
|
# fs.save(excel_file_name, excel_file)
|
|
|
|
df = pd.read_csv(excel_file)
|
|
# df.drop(["id"], axis=1, inplace=True)
|
|
df.to_csv(excel_file_name, index=False)
|
|
|
|
feature1 = df.columns[3]
|
|
feature2 = df.columns[2]
|
|
request.session["feature1"] = feature1
|
|
request.session["feature2"] = feature2
|
|
|
|
fig = stats(
|
|
excel_file_name,
|
|
request.session["feature1"],
|
|
request.session["feature2"],
|
|
)
|
|
|
|
elif "plot" in request.POST:
|
|
feature1 = request.POST.get("feature1")
|
|
feature2 = request.POST.get("feature2")
|
|
request.session["feature1"] = feature1
|
|
request.session["feature2"] = feature2
|
|
fig = stats(
|
|
excel_file_name,
|
|
request.session["feature1"],
|
|
request.session["feature2"],
|
|
)
|
|
|
|
elif "traintest" in request.POST:
|
|
mode = request.POST.get("colorRadio")
|
|
model = request.POST.get("model")
|
|
test_size = float(request.POST.get("split_input"))
|
|
|
|
request.session["model"] = model
|
|
if mode == "train":
|
|
if model == "logit":
|
|
con = training(excel_file_name_preprocessed, "logit", test_size)
|
|
|
|
elif model == "xgb":
|
|
con = training(excel_file_name_preprocessed, "xgb", test_size)
|
|
|
|
elif model == "dt":
|
|
con = training(excel_file_name_preprocessed, "dt", test_size)
|
|
|
|
elif model == "svm":
|
|
con = training(excel_file_name_preprocessed, "svm", test_size)
|
|
|
|
elif model == "rf":
|
|
con = training(excel_file_name_preprocessed, "rf", test_size)
|
|
|
|
fig2 = con["fig2"]
|
|
clas_report = con["clas_report"].to_html()
|
|
elif mode == "test":
|
|
if model == "logit":
|
|
con = testing(excel_file_name_preprocessed, "logit")
|
|
|
|
elif model == "xgb":
|
|
con = testing(excel_file_name_preprocessed, "xgb")
|
|
|
|
elif model == "dt":
|
|
con = testing(excel_file_name_preprocessed, "dt")
|
|
|
|
elif model == "svm":
|
|
con = testing(excel_file_name_preprocessed, "svm")
|
|
|
|
elif model == "rf":
|
|
con = training(excel_file_name_preprocessed, "rf", test_size)
|
|
|
|
fig2 = con["fig2"]
|
|
clas_report = con["clas_report"].to_html()
|
|
|
|
elif "preprocess" in request.POST:
|
|
value_list = request.POST.getlist("boxes")
|
|
|
|
# if file for preprocessing does not exist create it
|
|
# also apply basic preprocessing
|
|
if os.path.exists(excel_file_name_preprocessed) == False:
|
|
# generate filename
|
|
idx = excel_file_name.index(".")
|
|
excel_file_name_preprocessed = (
|
|
excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
|
|
)
|
|
# save file for preprocessing
|
|
preprocess_df = pd.read_csv(excel_file_name)
|
|
request.session["excel_file_name_preprocessed"] = (
|
|
excel_file_name_preprocessed
|
|
)
|
|
preprocess_df.drop(
|
|
["perimeter_mean", "area_mean"], axis=1, inplace=True
|
|
)
|
|
preprocess_df.drop(
|
|
["perimeter_worst", "area_worst"], axis=1, inplace=True
|
|
)
|
|
preprocess_df.drop(["perimeter_se", "area_se"], axis=1, inplace=True)
|
|
preprocess_df.drop(
|
|
[
|
|
"radius_worst",
|
|
"concave_points_mean",
|
|
"texture_worst",
|
|
"symmetry_worst",
|
|
"smoothness_worst",
|
|
],
|
|
axis=1,
|
|
inplace=True,
|
|
)
|
|
|
|
le = LabelEncoder()
|
|
preprocess_df["diagnosis"] = le.fit_transform(
|
|
preprocess_df["diagnosis"]
|
|
)
|
|
|
|
preprocess_df.to_csv(excel_file_name_preprocessed, index=False)
|
|
else:
|
|
preprocess_df = pd.read_csv(excel_file_name_preprocessed)
|
|
|
|
preprocess(preprocess_df, value_list, excel_file_name_preprocessed)
|
|
preprocess_df.drop(["id"], axis=1, inplace=True)
|
|
|
|
# PCA
|
|
pca = PCA()
|
|
pca.fit(preprocess_df.loc[:, "radius_mean":])
|
|
exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
|
|
pca = px.area(
|
|
x=range(1, exp_var_cumul.shape[0] + 1),
|
|
y=exp_var_cumul,
|
|
labels={"x": "# Components", "y": "Explained Variance"},
|
|
).to_html()
|
|
|
|
# tSNE
|
|
tsne = TSNE(n_components=2, random_state=0)
|
|
projections = tsne.fit_transform(
|
|
preprocess_df.drop(["diagnosis"], axis=1).values
|
|
)
|
|
tsne_df = pd.DataFrame(
|
|
{
|
|
"0": projections[:, 0],
|
|
"1": projections[:, 1],
|
|
"diagnosis": preprocess_df["diagnosis"],
|
|
}
|
|
)
|
|
|
|
tsne = px.scatter(
|
|
tsne_df,
|
|
x="0",
|
|
y="1",
|
|
color="diagnosis",
|
|
color_continuous_scale=px.colors.sequential.Rainbow,
|
|
)
|
|
|
|
tsne.update_layout(clickmode="event+select")
|
|
# tsne_opacity.update_layout(clickmode="event+select")
|
|
pickle.dump(tsne, open("tsne.sav", "wb"))
|
|
tsne = tsne.to_html()
|
|
request.session["tsne_projection"] = projections.tolist()
|
|
elif "cf" in request.POST:
|
|
|
|
excel_file_name_preprocessed = request.session.get(
|
|
"excel_file_name_preprocessed"
|
|
)
|
|
|
|
df = pd.read_csv(excel_file_name_preprocessed)
|
|
df_id = request.session.get("cfrow_id")
|
|
model = request.session.get("model")
|
|
row = df.iloc[[int(df_id)]]
|
|
counterfactuals(row, model, excel_file_name_preprocessed)
|
|
|
|
# get coordinates of the clicked point (saved during the actual click)
|
|
clicked_point = request.session.get("clicked_point")
|
|
clicked_point_df = pd.DataFrame(
|
|
{
|
|
"0": clicked_point[0],
|
|
"1": clicked_point[1],
|
|
"diagnosis": row.diagnosis,
|
|
}
|
|
)
|
|
clicked_point_df.reset_index(drop=True)
|
|
|
|
# tSNE
|
|
cf_df = pd.read_csv("counterfactuals.csv")
|
|
|
|
# get rows count
|
|
request.session["cfdf_rows"] = cf_df.index.values.tolist()
|
|
|
|
# select a cf randomly for demonstration
|
|
request.session["cfrow_cf"] = cf_df.iloc[:1].to_html()
|
|
|
|
df_merged = pd.concat(
|
|
[cf_df, df.drop("id", axis=1)], ignore_index=True, axis=0
|
|
)
|
|
tsne_cf = TSNE(n_components=2, random_state=0)
|
|
projections = tsne_cf.fit_transform(
|
|
df_merged.drop(["diagnosis"], axis=1).values
|
|
)
|
|
|
|
cf_df = pd.DataFrame(
|
|
{
|
|
"0": projections[:3, 0],
|
|
"1": projections[:3, 1],
|
|
"diagnosis": cf_df.diagnosis.iloc[:3],
|
|
}
|
|
)
|
|
|
|
cf_df = pd.concat([cf_df, clicked_point_df], ignore_index=True, axis=0)
|
|
|
|
tsne = joblib.load("tsne.sav")
|
|
cf_s = px.scatter(
|
|
cf_df,
|
|
x="0",
|
|
y="1",
|
|
color="diagnosis",
|
|
color_continuous_scale=px.colors.sequential.Rainbow,
|
|
)
|
|
|
|
cf_s.update_traces(
|
|
marker=dict(
|
|
size=10,
|
|
symbol="circle",
|
|
)
|
|
)
|
|
|
|
tsne.add_trace(cf_s.data[0])
|
|
pickle.dump(tsne, open("tsne_cfs.sav", "wb"))
|
|
tsne = tsne.to_html()
|
|
|
|
else:
|
|
if os.path.exists(excel_file_name) == False:
|
|
excel_file_name = "dataset.csv"
|
|
request.session["excel_file_name"] = excel_file_name
|
|
|
|
df = pd.read_csv(excel_file_name)
|
|
|
|
# just random columns to plot
|
|
feature1 = df.columns[3]
|
|
feature2 = df.columns[2]
|
|
request.session["feature1"] = feature1
|
|
request.session["feature2"] = feature2
|
|
fig = stats(
|
|
excel_file_name, request.session["feature1"], request.session["feature2"]
|
|
)
|
|
|
|
if df.empty:
|
|
df = pd.read_csv(excel_file_name)
|
|
|
|
request.session["fig"] = fig
|
|
request.session["fig2"] = fig2
|
|
request.session["pca"] = pca
|
|
request.session["tsne"] = tsne
|
|
request.session["clas_report"] = clas_report
|
|
|
|
data_to_display = df[:10].to_html(index=False)
|
|
request.session["data_to_display"] = data_to_display
|
|
labels = df.columns[2:]
|
|
context = {
|
|
"data_to_display": data_to_display,
|
|
"excel_file": excel_file_name,
|
|
"labels": labels,
|
|
"fig": fig,
|
|
"fig2": fig2,
|
|
"feature1": request.session["feature1"],
|
|
"feature2": request.session["feature2"],
|
|
"clas_report": clas_report,
|
|
"pca": pca,
|
|
"tsne": tsne,
|
|
"cfrow_og": cfrow_og,
|
|
"cfrow_cf": cfrow_cf,
|
|
"cfdf_rows": cfdf_rows,
|
|
}
|