2024-05-27 19:16:14 +03:00

132 lines
5.2 KiB
Python

from django.shortcuts import render, redirect
import pandas as pd
from django.core.files.storage import FileSystemStorage
import pickle, os
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
import numpy as np
from . import utils
fig = None
excel_file_name_preprocessed = "dataset_preprocessed.csv"
excel_file_name = "dataset.csv"
def home(request):
global fig
global excel_file_name
global excel_file_name_preprocessed
if request.method == 'POST':
excel_file = request.FILES["excel_file"]
excel_file_name = request.FILES["excel_file"].name
df = pd.read_csv(excel_file)
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name'] = excel_file_name
fs.save(excel_file_name, excel_file)
idx = excel_file_name.index('.')
excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
fs.save(excel_file_name_preprocessed, excel_file)
# if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False:
df = pd.read_csv(excel_file_name)
df.to_csv(excel_file_name_preprocessed)
# collect the data to render
df = pd.read_csv(excel_file_name)
data_to_display = df[:5].to_html()
request.session['data_to_display'] = data_to_display
request.session['excel_file_name'] = excel_file_name
request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
labels = df.columns
context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig}
return render(request,'base/home.html', context)
def stats(request):
global fig
excel_file = request.session.get('excel_file_name')
df = pd.read_csv(excel_file)
import plotly.express as px
if request.method == 'POST':
feature1 = request.POST.get('feature1')
feature2 = request.POST.get('feature2')
else:
feature1 = "MonthlyCharges"
feature2 = "tenure"
fig = px.scatter(df, x=feature1, y=feature2, color='Churn')
fig = fig.to_html(full_html=False)
request.session['fig'] = fig
return redirect('home')
def preprocess(request):
from sklearn.preprocessing import StandardScaler
excel_file_name_preprocessed = request.session.get('excel_file_name_preprocessed')
print(excel_file_name_preprocessed)
data = pd.read_csv(excel_file_name_preprocessed)
if set(['No','customerID','Churn']).issubset(data.columns):
data.drop(['No','customerID','Churn'],axis=1,inplace=True)
if request.method == 'POST':
if 'std' in request.POST:
# define standard scaler
scaler = StandardScaler()
# transform data
num_d = data.select_dtypes(exclude=['object'])
data[num_d.columns] = scaler.fit_transform(num_d)
if 'onehot' in request.POST:
data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
'PaperlessBilling', 'PaymentMethod'],drop_first = True)
if 'imp' in request.POST:
data_numeric = data.select_dtypes(exclude=['object'])
data_categorical = data.select_dtypes(exclude=['number'])
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns
data = pd.concat([data_numeric, data_categorical], axis = 1)
os.remove(excel_file_name_preprocessed)
data.to_csv(excel_file_name_preprocessed)
return redirect('home')
def training(request):
global fig
excel_file = request.session.get('excel_file_name')
data = pd.read_csv(excel_file)
y=data['Churn']
y=y.replace({"Yes":1,"No":0})
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
X_train.to_csv("X_train.csv")
X_test.to_csv("X_test.csv")
y_train.to_csv("y_train.csv")
y_test.to_csv("y_test.csv")
if 'logit' in request.POST:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
filename = 'lg.sav'
pickle.dump(clf, open(filename, 'wb'))
return redirect('home')
if 'xgb' in request.POST:
from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
file_name = 'xgb.sav'
pickle.dump(xgb,open(file_name,'wb'))
return render(request,'base/home.html', {})