EXTREMUM_web/backup/base/views.py

from django.shortcuts import render, redirect
import pandas as pd
from django.core.files.storage import FileSystemStorage
import pickle, os
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
import numpy as np
from . import utils


fig = None
excel_file_name_preprocessed = "dataset_preprocessed.csv"
excel_file_name = "dataset.csv"

def home(request):
    global fig
    global excel_file_name
    global excel_file_name_preprocessed

    if request.method == 'POST':
        excel_file = request.FILES["excel_file"]
        excel_file_name = request.FILES["excel_file"].name
        df = pd.read_csv(excel_file)

        fs = FileSystemStorage() #defaults to MEDIA_ROOT
        request.session['excel_file_name'] = excel_file_name
        fs.save(excel_file_name, excel_file)

        idx = excel_file_name.index('.')
        excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
        fs = FileSystemStorage() #defaults to MEDIA_ROOT
        request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
        fs.save(excel_file_name_preprocessed, excel_file)

    # if file for preprocessing does not exist create it
    if os.path.exists(excel_file_name_preprocessed) == False:
        df = pd.read_csv(excel_file_name)
        df.to_csv(excel_file_name_preprocessed)

    # collect the data to render
    df = pd.read_csv(excel_file_name)
    data_to_display = df[:5].to_html()

    request.session['data_to_display'] = data_to_display
    request.session['excel_file_name'] = excel_file_name
    request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
    labels = df.columns

    context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig}
    return render(request,'base/home.html', context)

def stats(request):
    global fig
    excel_file = request.session.get('excel_file_name')
    df = pd.read_csv(excel_file)
    import plotly.express as px
    if request.method == 'POST':
        feature1 = request.POST.get('feature1')
        feature2 = request.POST.get('feature2')
    else:
        feature1 = "MonthlyCharges"
        feature2 = "tenure"

    fig = px.scatter(df, x=feature1, y=feature2, color='Churn')
    fig = fig.to_html(full_html=False)
    request.session['fig'] = fig
    return redirect('home')

def preprocess(request):
    from sklearn.preprocessing import StandardScaler

    excel_file_name_preprocessed = request.session.get('excel_file_name_preprocessed')
    print(excel_file_name_preprocessed)
    data = pd.read_csv(excel_file_name_preprocessed)

    if set(['No','customerID','Churn']).issubset(data.columns):
        data.drop(['No','customerID','Churn'],axis=1,inplace=True)

    if request.method == 'POST':
        if 'std' in request.POST:
            # define standard scaler
            scaler = StandardScaler()
            # transform data
            num_d = data.select_dtypes(exclude=['object'])
            data[num_d.columns] = scaler.fit_transform(num_d)

        if 'onehot' in request.POST:
            data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
                                'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                                'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
                                'PaperlessBilling', 'PaymentMethod'],drop_first = True)

        if 'imp' in request.POST:
            data_numeric = data.select_dtypes(exclude=['object'])
            data_categorical = data.select_dtypes(exclude=['number'])
            imp = SimpleImputer(missing_values=np.nan, strategy='mean')
            data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns
            data = pd.concat([data_numeric, data_categorical], axis = 1)

        os.remove(excel_file_name_preprocessed)
        data.to_csv(excel_file_name_preprocessed)
    return redirect('home')

def training(request):
    global fig
    excel_file = request.session.get('excel_file_name')
    data = pd.read_csv(excel_file)

    y=data['Churn']
    y=y.replace({"Yes":1,"No":0})
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
    X_train.to_csv("X_train.csv")
    X_test.to_csv("X_test.csv")
    y_train.to_csv("y_train.csv")
    y_test.to_csv("y_test.csv")

    if 'logit' in request.POST:
        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression(random_state=0).fit(X_train, y_train)
        filename = 'lg.sav'
        pickle.dump(clf, open(filename, 'wb'))
        return redirect('home')

    if 'xgb' in request.POST:
        from xgboost import XGBClassifier
        xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
        file_name = 'xgb.sav'
        pickle.dump(xgb,open(file_name,'wb'))
        return render(request,'base/home.html', {})