132 lines
5.2 KiB
Python
132 lines
5.2 KiB
Python
from django.shortcuts import render, redirect
|
|
import pandas as pd
|
|
from django.core.files.storage import FileSystemStorage
|
|
import pickle, os
|
|
from sklearn.preprocessing import OneHotEncoder
|
|
from sklearn.impute import SimpleImputer
|
|
import numpy as np
|
|
from . import utils
|
|
|
|
|
|
fig = None
|
|
excel_file_name_preprocessed = "dataset_preprocessed.csv"
|
|
excel_file_name = "dataset.csv"
|
|
|
|
def home(request):
|
|
global fig
|
|
global excel_file_name
|
|
global excel_file_name_preprocessed
|
|
|
|
if request.method == 'POST':
|
|
excel_file = request.FILES["excel_file"]
|
|
excel_file_name = request.FILES["excel_file"].name
|
|
df = pd.read_csv(excel_file)
|
|
|
|
fs = FileSystemStorage() #defaults to MEDIA_ROOT
|
|
request.session['excel_file_name'] = excel_file_name
|
|
fs.save(excel_file_name, excel_file)
|
|
|
|
idx = excel_file_name.index('.')
|
|
excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
|
|
fs = FileSystemStorage() #defaults to MEDIA_ROOT
|
|
request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
|
|
fs.save(excel_file_name_preprocessed, excel_file)
|
|
|
|
# if file for preprocessing does not exist create it
|
|
if os.path.exists(excel_file_name_preprocessed) == False:
|
|
df = pd.read_csv(excel_file_name)
|
|
df.to_csv(excel_file_name_preprocessed)
|
|
|
|
# collect the data to render
|
|
df = pd.read_csv(excel_file_name)
|
|
data_to_display = df[:5].to_html()
|
|
|
|
request.session['data_to_display'] = data_to_display
|
|
request.session['excel_file_name'] = excel_file_name
|
|
request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
|
|
labels = df.columns
|
|
|
|
context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig}
|
|
return render(request,'base/home.html', context)
|
|
|
|
def stats(request):
|
|
global fig
|
|
excel_file = request.session.get('excel_file_name')
|
|
df = pd.read_csv(excel_file)
|
|
import plotly.express as px
|
|
if request.method == 'POST':
|
|
feature1 = request.POST.get('feature1')
|
|
feature2 = request.POST.get('feature2')
|
|
else:
|
|
feature1 = "MonthlyCharges"
|
|
feature2 = "tenure"
|
|
|
|
fig = px.scatter(df, x=feature1, y=feature2, color='Churn')
|
|
fig = fig.to_html(full_html=False)
|
|
request.session['fig'] = fig
|
|
return redirect('home')
|
|
|
|
def preprocess(request):
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
|
excel_file_name_preprocessed = request.session.get('excel_file_name_preprocessed')
|
|
print(excel_file_name_preprocessed)
|
|
data = pd.read_csv(excel_file_name_preprocessed)
|
|
|
|
if set(['No','customerID','Churn']).issubset(data.columns):
|
|
data.drop(['No','customerID','Churn'],axis=1,inplace=True)
|
|
|
|
if request.method == 'POST':
|
|
if 'std' in request.POST:
|
|
# define standard scaler
|
|
scaler = StandardScaler()
|
|
# transform data
|
|
num_d = data.select_dtypes(exclude=['object'])
|
|
data[num_d.columns] = scaler.fit_transform(num_d)
|
|
|
|
if 'onehot' in request.POST:
|
|
data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
|
|
'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
|
|
'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
|
|
'PaperlessBilling', 'PaymentMethod'],drop_first = True)
|
|
|
|
if 'imp' in request.POST:
|
|
data_numeric = data.select_dtypes(exclude=['object'])
|
|
data_categorical = data.select_dtypes(exclude=['number'])
|
|
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
|
|
data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns
|
|
data = pd.concat([data_numeric, data_categorical], axis = 1)
|
|
|
|
os.remove(excel_file_name_preprocessed)
|
|
data.to_csv(excel_file_name_preprocessed)
|
|
return redirect('home')
|
|
|
|
def training(request):
|
|
global fig
|
|
excel_file = request.session.get('excel_file_name')
|
|
data = pd.read_csv(excel_file)
|
|
|
|
y=data['Churn']
|
|
y=y.replace({"Yes":1,"No":0})
|
|
from sklearn.model_selection import train_test_split
|
|
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
|
|
X_train.to_csv("X_train.csv")
|
|
X_test.to_csv("X_test.csv")
|
|
y_train.to_csv("y_train.csv")
|
|
y_test.to_csv("y_test.csv")
|
|
|
|
if 'logit' in request.POST:
|
|
from sklearn.linear_model import LogisticRegression
|
|
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
|
|
filename = 'lg.sav'
|
|
pickle.dump(clf, open(filename, 'wb'))
|
|
return redirect('home')
|
|
|
|
if 'xgb' in request.POST:
|
|
from xgboost import XGBClassifier
|
|
xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
|
|
file_name = 'xgb.sav'
|
|
pickle.dump(xgb,open(file_name,'wb'))
|
|
return render(request,'base/home.html', {})
|
|
|