This commit is contained in:
atla8167 2024-05-28 11:52:40 +03:00
parent affbe5570b
commit 539aa9849c
30 changed files with 0 additions and 18306 deletions

1199
X_test.csv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,3 +0,0 @@
from django.contrib import admin
# Register your models here.

@ -1,6 +0,0 @@
from django.apps import AppConfig
class BaseConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'base'

@ -1,4 +0,0 @@
from django import forms
class DocumentForm(forms.Form):
docfile = forms.FileField(label='Select a file')

@ -1,22 +0,0 @@
# Generated by Django 5.0.6 on 2024-05-20 15:19
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='Upload',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('uploadFile', models.FileField(upload_to='')),
('uploadDate', models.DateTimeField(auto_now_add=True)),
],
),
]

@ -1,23 +0,0 @@
# Generated by Django 5.0.6 on 2024-05-20 15:33
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('base', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='Document',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('docfile', models.FileField(upload_to='documents/%Y/%m/%d')),
],
),
migrations.DeleteModel(
name='Upload',
),
]

@ -1 +0,0 @@
from django.db import models

@ -1,7 +0,0 @@
$(document).ready( function()
{
$('#spinner').on('click', function()
{
$('body').addClass('busy');
});
});

@ -1,128 +0,0 @@
{% extends 'main.html' %}
{% block content %}
{% load static %}
<div class="container-fluid">
<div class="mx-auto p-4 text-center bg-white shadow-sm">
<a href="/" >
<h1>
EXTREMUM
</h1>
</a>
</div>
<br>
<br>
<div class="row">
<div class="col d-flex justify-content-center">
<h3>
<i class="fas fa-upload"></i> Import a file
</h3>
</div>
</div>
<div class="row">
<div class = "mx-auto p-4 text-center">
<form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data">
{% csrf_token %}
<input type="file"
title="Upload excel file"
name="excel_file"
style="border: 1px solid black; padding: 5px;"
required="required">
<input action="uploadCsv" type="submit" method="POST"
value="Upload"
name="csv"
style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;">
</form>
</div>
</div>
<br>
<div class="row">
<div class="col">
<div class="scrollit">
{{ data_to_display|safe }}
</div>
</div>
</div>
<br>
<div class="row">
<div class="col d-flex justify-content-center">
<h3 >
<i class="fa-solid fa-chart-simple"></i> Stats
</h3>
</div>
</div>
<div class="row">
<div class="col d-flex justify-content-center">
<form action="{% url 'stats' %}" method="POST" id="stats">
{% csrf_token %}
<select id="feature1" name="feature1">
<option disabled selected> Feature 1 </option>
{% for label in labels %}
<option value={{label}}>{{label}}</option>
{% endfor %}
</select>
<select id="feature2" name="feature2">
<option disabled selected> Feature 2 </option>
{% for label in labels %}
<option value={{label}}>{{label}}</option>
{% endfor %}
</select>
<input type="submit" value="Plot">
</form>
</div>
</div>
{% if fig %}
<div class="row">
<div class="col d-flex justify-content-center">
{{ fig|safe }}
</div>
</div>
{% endif %}
<div class="row">
<div class="col d-flex justify-content-center">
<h3 >
<i class="fas fa-cog"></i> Preprocessing
</h3>
</div>
</div>
<div class="row">
<div class="col d-flex justify-content-center multi-button">
<form action="/preprocess" method="POST">
{% csrf_token %}
<div class="multi-button">
<button type="submit" name = "std" class="button-6" role="button" >Standardization</button>
<button type="submit" name="onehot" style="margin:0;margin-left:16px;" class="button-6" role="button" >One Hot Encoding</button>
<button type="submit" name="imp" style="margin:0;margin-left:16px;" class="button-6" role="button" >Imputations</button>
</div>
</form>
</div>
</div>
<br>
<br>
<div class="row">
<div class="col d-flex justify-content-center">
<form action="/train" method="POST">
{% csrf_token %}
<button type="submit" name = "svm" style="margin:0;margin-left:16px;" class="button-6" role="button" >Support Vector Machine</button>
<button type="submit" name = "logit" style="margin:0;margin-left:16px;" class="button-6" role="button" >Logistic Regression</button>
<button type="submit" name="xgb" style="margin:0;margin-left:16px;" class="button-6" role="button" >XGBoost</button>
<button type="submit" name="dt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Decision Tree</button>
<button type="submit" name="rt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Random Forest</button>
</form>
</div>
</div>
<br>
<br>
</div>
{% endblock content%}

@ -1,3 +0,0 @@
from django.test import TestCase
# Create your tests here.

@ -1,10 +0,0 @@
from django.urls import path
from . import views
from . import models
urlpatterns = [
path('', views.home, name="home"),
path('preprocess', views.preprocess, name="preprocess"),
path('stats', views.stats, name="stats"),
path('train',views.training, name = 'training'),
]

@ -1,5 +0,0 @@
def stats(feature1, feature2, df):
import plotly.express as px
fig = px.scatter(df, x=feature1, y=feature2, color='Churn')
fig = fig.to_html(full_html=False)
return fig

@ -1,131 +0,0 @@
from django.shortcuts import render, redirect
import pandas as pd
from django.core.files.storage import FileSystemStorage
import pickle, os
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
import numpy as np
from . import utils
fig = None
excel_file_name_preprocessed = "dataset_preprocessed.csv"
excel_file_name = "dataset.csv"
def home(request):
global fig
global excel_file_name
global excel_file_name_preprocessed
if request.method == 'POST':
excel_file = request.FILES["excel_file"]
excel_file_name = request.FILES["excel_file"].name
df = pd.read_csv(excel_file)
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name'] = excel_file_name
fs.save(excel_file_name, excel_file)
idx = excel_file_name.index('.')
excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
fs.save(excel_file_name_preprocessed, excel_file)
# if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False:
df = pd.read_csv(excel_file_name)
df.to_csv(excel_file_name_preprocessed)
# collect the data to render
df = pd.read_csv(excel_file_name)
data_to_display = df[:5].to_html()
request.session['data_to_display'] = data_to_display
request.session['excel_file_name'] = excel_file_name
request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
labels = df.columns
context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig}
return render(request,'base/home.html', context)
def stats(request):
global fig
excel_file = request.session.get('excel_file_name')
df = pd.read_csv(excel_file)
import plotly.express as px
if request.method == 'POST':
feature1 = request.POST.get('feature1')
feature2 = request.POST.get('feature2')
else:
feature1 = "MonthlyCharges"
feature2 = "tenure"
fig = px.scatter(df, x=feature1, y=feature2, color='Churn')
fig = fig.to_html(full_html=False)
request.session['fig'] = fig
return redirect('home')
def preprocess(request):
from sklearn.preprocessing import StandardScaler
excel_file_name_preprocessed = request.session.get('excel_file_name_preprocessed')
print(excel_file_name_preprocessed)
data = pd.read_csv(excel_file_name_preprocessed)
if set(['No','customerID','Churn']).issubset(data.columns):
data.drop(['No','customerID','Churn'],axis=1,inplace=True)
if request.method == 'POST':
if 'std' in request.POST:
# define standard scaler
scaler = StandardScaler()
# transform data
num_d = data.select_dtypes(exclude=['object'])
data[num_d.columns] = scaler.fit_transform(num_d)
if 'onehot' in request.POST:
data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
'PaperlessBilling', 'PaymentMethod'],drop_first = True)
if 'imp' in request.POST:
data_numeric = data.select_dtypes(exclude=['object'])
data_categorical = data.select_dtypes(exclude=['number'])
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns
data = pd.concat([data_numeric, data_categorical], axis = 1)
os.remove(excel_file_name_preprocessed)
data.to_csv(excel_file_name_preprocessed)
return redirect('home')
def training(request):
global fig
excel_file = request.session.get('excel_file_name')
data = pd.read_csv(excel_file)
y=data['Churn']
y=y.replace({"Yes":1,"No":0})
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
X_train.to_csv("X_train.csv")
X_test.to_csv("X_test.csv")
y_train.to_csv("y_train.csv")
y_test.to_csv("y_test.csv")
if 'logit' in request.POST:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
filename = 'lg.sav'
pickle.dump(clf, open(filename, 'wb'))
return redirect('home')
if 'xgb' in request.POST:
from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
file_name = 'xgb.sav'
pickle.dump(xgb,open(file_name,'wb'))
return render(request,'base/home.html', {})

File diff suppressed because it is too large Load Diff

BIN
lg.sav

Binary file not shown.

BIN
xgb.sav

Binary file not shown.

1199
y_test.csv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff