EXTREMUM_web/base/views_backup.py

def home(request):
    # request.session.flush()
    if "fig" in request.session:
        fig = request.session.get("fig")
    else:
        fig = None

    if "fig2" in request.session:
        fig2 = request.session.get("fig2")
    else:
        fig2 = None

    if "pca" in request.session:
        pca = request.session.get("pca")
    else:
        pca = None

    if "tsne" in request.session:
        tsne = request.session.get("tsne")
    else:
        tsne = None

    if "cfrow_og" in request.session:
        cfrow_og = request.session.get("cfrow_og")
    else:
        cfrow_og = None

    if "cfrow_cf" in request.session:
        cfrow_cf = request.session.get("cfrow_cf")
    else:
        cfrow_cf = None

    if "cfdf_rows" in request.session:
        cfdf_rows = request.session.get("cfdf_rows")
    else:
        cfdf_rows = None

    if "clas_report" in request.session:
        clas_report = request.session.get("clas_report")
    else:
        clas_report = None

    if "excel_file_name" in request.session:
        excel_file_name = request.session.get("excel_file_name")
    else:
        # name for default dataset. If session variable
        # is not set that means there was never a csv
        # UPLOAD and thus the default dataset should used
        excel_file_name = FILE_NAME
        request.session["excel_file_name"] = excel_file_name

    if "excel_file_name_preprocessed" in request.session:
        excel_file_name_preprocessed = request.session.get(
            "excel_file_name_preprocessed"
        )
    else:
        excel_file_name_preprocessed = PROCESS_FILE_NAME
        request.session["excel_file_name_preprocessed"] = excel_file_name_preprocessed

    # ajax request condition
    if request.headers.get("X-Requested-With") == "XMLHttpRequest":
        return ajax_requests(request.POST.get("action"), request)

    df = pd.DataFrame()
    if request.method == "POST":
        if "csv" in request.POST:

            excel_file = request.FILES["excel_file"]
            excel_file_name = request.FILES["excel_file"].name

            fig = None
            fig2 = None
            pca = None
            tsne = None
            clas_report = None
            cfrow_cf = None
            cfrow_og = None

            # here we dont use the name of the file since the
            # uploaded file is not yet saved
            # In every other case we just need the name

            # fs = FileSystemStorage()  # defaults to MEDIA_ROOT
            request.session["excel_file_name"] = excel_file_name
            # fs.save(excel_file_name, excel_file)

            df = pd.read_csv(excel_file)
            # df.drop(["id"], axis=1, inplace=True)
            df.to_csv(excel_file_name, index=False)

            feature1 = df.columns[3]
            feature2 = df.columns[2]
            request.session["feature1"] = feature1
            request.session["feature2"] = feature2

            fig = stats(
                excel_file_name,
                request.session["feature1"],
                request.session["feature2"],
            )

        elif "plot" in request.POST:
            feature1 = request.POST.get("feature1")
            feature2 = request.POST.get("feature2")
            request.session["feature1"] = feature1
            request.session["feature2"] = feature2
            fig = stats(
                excel_file_name,
                request.session["feature1"],
                request.session["feature2"],
            )

        elif "traintest" in request.POST:
            mode = request.POST.get("colorRadio")
            model = request.POST.get("model")
            test_size = float(request.POST.get("split_input"))

            request.session["model"] = model
            if mode == "train":
                if model == "logit":
                    con = training(excel_file_name_preprocessed, "logit", test_size)

                elif model == "xgb":
                    con = training(excel_file_name_preprocessed, "xgb", test_size)

                elif model == "dt":
                    con = training(excel_file_name_preprocessed, "dt", test_size)

                elif model == "svm":
                    con = training(excel_file_name_preprocessed, "svm", test_size)

                elif model == "rf":
                    con = training(excel_file_name_preprocessed, "rf", test_size)

                fig2 = con["fig2"]
                clas_report = con["clas_report"].to_html()
            elif mode == "test":
                if model == "logit":
                    con = testing(excel_file_name_preprocessed, "logit")

                elif model == "xgb":
                    con = testing(excel_file_name_preprocessed, "xgb")

                elif model == "dt":
                    con = testing(excel_file_name_preprocessed, "dt")

                elif model == "svm":
                    con = testing(excel_file_name_preprocessed, "svm")

                elif model == "rf":
                    con = training(excel_file_name_preprocessed, "rf", test_size)

                fig2 = con["fig2"]
                clas_report = con["clas_report"].to_html()

        elif "preprocess" in request.POST:
            value_list = request.POST.getlist("boxes")

            # if file for preprocessing does not exist create it
            # also apply basic preprocessing
            if os.path.exists(excel_file_name_preprocessed) == False:
                # generate filename
                idx = excel_file_name.index(".")
                excel_file_name_preprocessed = (
                    excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
                )
                # save file for preprocessing
                preprocess_df = pd.read_csv(excel_file_name)
                request.session["excel_file_name_preprocessed"] = (
                    excel_file_name_preprocessed
                )
                preprocess_df.drop(
                    ["perimeter_mean", "area_mean"], axis=1, inplace=True
                )
                preprocess_df.drop(
                    ["perimeter_worst", "area_worst"], axis=1, inplace=True
                )
                preprocess_df.drop(["perimeter_se", "area_se"], axis=1, inplace=True)
                preprocess_df.drop(
                    [
                        "radius_worst",
                        "concave_points_mean",
                        "texture_worst",
                        "symmetry_worst",
                        "smoothness_worst",
                    ],
                    axis=1,
                    inplace=True,
                )

                le = LabelEncoder()
                preprocess_df["diagnosis"] = le.fit_transform(
                    preprocess_df["diagnosis"]
                )

                preprocess_df.to_csv(excel_file_name_preprocessed, index=False)
            else:
                preprocess_df = pd.read_csv(excel_file_name_preprocessed)

            preprocess(preprocess_df, value_list, excel_file_name_preprocessed)
            preprocess_df.drop(["id"], axis=1, inplace=True)

            # PCA
            pca = PCA()
            pca.fit(preprocess_df.loc[:, "radius_mean":])
            exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
            pca = px.area(
                x=range(1, exp_var_cumul.shape[0] + 1),
                y=exp_var_cumul,
                labels={"x": "# Components", "y": "Explained Variance"},
            ).to_html()

            # tSNE
            tsne = TSNE(n_components=2, random_state=0)
            projections = tsne.fit_transform(
                preprocess_df.drop(["diagnosis"], axis=1).values
            )
            tsne_df = pd.DataFrame(
                {
                    "0": projections[:, 0],
                    "1": projections[:, 1],
                    "diagnosis": preprocess_df["diagnosis"],
                }
            )

            tsne = px.scatter(
                tsne_df,
                x="0",
                y="1",
                color="diagnosis",
                color_continuous_scale=px.colors.sequential.Rainbow,
            )

            tsne.update_layout(clickmode="event+select")
            # tsne_opacity.update_layout(clickmode="event+select")
            pickle.dump(tsne, open("tsne.sav", "wb"))
            tsne = tsne.to_html()
            request.session["tsne_projection"] = projections.tolist()
        elif "cf" in request.POST:

            excel_file_name_preprocessed = request.session.get(
                "excel_file_name_preprocessed"
            )

            df = pd.read_csv(excel_file_name_preprocessed)
            df_id = request.session.get("cfrow_id")
            model = request.session.get("model")
            row = df.iloc[[int(df_id)]]
            counterfactuals(row, model, excel_file_name_preprocessed)

            # get coordinates of the clicked point (saved during the actual click)
            clicked_point = request.session.get("clicked_point")
            clicked_point_df = pd.DataFrame(
                {
                    "0": clicked_point[0],
                    "1": clicked_point[1],
                    "diagnosis": row.diagnosis,
                }
            )
            clicked_point_df.reset_index(drop=True)

            # tSNE
            cf_df = pd.read_csv("counterfactuals.csv")

            # get rows count
            request.session["cfdf_rows"] = cf_df.index.values.tolist()

            # select a cf randomly for demonstration
            request.session["cfrow_cf"] = cf_df.iloc[:1].to_html()

            df_merged = pd.concat(
                [cf_df, df.drop("id", axis=1)], ignore_index=True, axis=0
            )
            tsne_cf = TSNE(n_components=2, random_state=0)
            projections = tsne_cf.fit_transform(
                df_merged.drop(["diagnosis"], axis=1).values
            )

            cf_df = pd.DataFrame(
                {
                    "0": projections[:3, 0],
                    "1": projections[:3, 1],
                    "diagnosis": cf_df.diagnosis.iloc[:3],
                }
            )

            cf_df = pd.concat([cf_df, clicked_point_df], ignore_index=True, axis=0)

            tsne = joblib.load("tsne.sav")
            cf_s = px.scatter(
                cf_df,
                x="0",
                y="1",
                color="diagnosis",
                color_continuous_scale=px.colors.sequential.Rainbow,
            )

            cf_s.update_traces(
                marker=dict(
                    size=10,
                    symbol="circle",
                )
            )

            tsne.add_trace(cf_s.data[0])
            pickle.dump(tsne, open("tsne_cfs.sav", "wb"))
            tsne = tsne.to_html()

    else:
        if os.path.exists(excel_file_name) == False:
            excel_file_name = "dataset.csv"
            request.session["excel_file_name"] = excel_file_name

        df = pd.read_csv(excel_file_name)

        # just random columns to plot
        feature1 = df.columns[3]
        feature2 = df.columns[2]
        request.session["feature1"] = feature1
        request.session["feature2"] = feature2
        fig = stats(
            excel_file_name, request.session["feature1"], request.session["feature2"]
        )

    if df.empty:
        df = pd.read_csv(excel_file_name)

    request.session["fig"] = fig
    request.session["fig2"] = fig2
    request.session["pca"] = pca
    request.session["tsne"] = tsne
    request.session["clas_report"] = clas_report

    data_to_display = df[:10].to_html(index=False)
    request.session["data_to_display"] = data_to_display
    labels = df.columns[2:]
    context = {
        "data_to_display": data_to_display,
        "excel_file": excel_file_name,
        "labels": labels,
        "fig": fig,
        "fig2": fig2,
        "feature1": request.session["feature1"],
        "feature2": request.session["feature2"],
        "clas_report": clas_report,
        "pca": pca,
        "tsne": tsne,
        "cfrow_og": cfrow_og,
        "cfrow_cf": cfrow_cf,
        "cfdf_rows": cfdf_rows,
    }