Tabular data Train and Testing for Breast Cancer dataset

This commit is contained in:
atla8167 2024-06-10 11:38:59 +03:00
parent 9a02a02658
commit 6047fad7ce
18 changed files with 6166 additions and 6327 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1 +1,3 @@
from django.db import models from django.db import models

414
base/static/css/style.css Normal file

@ -0,0 +1,414 @@
@import url("https://fonts.googleapis.com/css2?family=Poppins:wght@200;300;400;500;600;700&display=swap");
.container-fluid {
padding-bottom: 100px;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: "Poppins", sans-serif;
}
nav {
position: fixed;
z-index: 99;
width: 100%;
background: #242526;
}
nav .wrapper {
margin-bottom: 10px;
position: relative;
max-width: 1300px;
padding: 0px 30px;
height: 70px;
line-height: 70px;
margin: auto;
display: flex;
align-items: center;
justify-content: space-between;
}
.wrapper .logo a {
color: #f2f2f2;
font-size: 30px;
font-weight: 600;
text-decoration: none;
}
.wrapper .nav-links {
display: inline-flex;
}
.nav-links li {
list-style: none;
}
.nav-links li a {
color: #f2f2f2;
text-decoration: none;
font-size: 18px;
font-weight: 500;
padding: 9px 15px;
border-radius: 5px;
transition: all 0.3s ease;
}
.nav-links li a:hover {
background: #3a3b3c;
}
.nav-links .mobile-item {
display: none;
}
.nav-links .drop-menu {
position: absolute;
background: #242526;
width: 180px;
line-height: 45px;
top: 85px;
opacity: 0;
visibility: hidden;
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
}
.nav-links li:hover .drop-menu,
.nav-links li:hover .mega-box {
transition: all 0.3s ease;
top: 70px;
opacity: 1;
visibility: visible;
}
.drop-menu li a {
width: 100%;
display: block;
padding: 0 0 0 15px;
font-weight: 400;
border-radius: 0px;
}
.mega-box {
position: absolute;
left: 0;
width: 100%;
padding: 0 30px;
top: 85px;
opacity: 0;
visibility: hidden;
}
.mega-box .content {
background: #242526;
padding: 25px 20px;
display: flex;
width: 100%;
justify-content: space-between;
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
}
.mega-box .content .row {
width: calc(25% - 30px);
line-height: 45px;
}
.content .row img {
width: 100%;
height: 100%;
object-fit: cover;
}
.content .row header {
color: #f2f2f2;
font-size: 20px;
font-weight: 500;
}
.content .row .mega-links {
margin-left: -40px;
border-left: 1px solid rgba(255, 255, 255, 0.09);
}
.row .mega-links li {
padding: 0 20px;
}
.row .mega-links li a {
padding: 0px;
padding: 0 20px;
color: #d9d9d9;
font-size: 17px;
display: block;
}
.row .mega-links li a:hover {
color: #f2f2f2;
}
.wrapper .btn {
color: #fff;
font-size: 20px;
cursor: pointer;
display: none;
}
.wrapper .btn.close-btn {
position: absolute;
right: 30px;
top: 10px;
}
@media screen and (max-width: 970px) {
.wrapper .btn {
display: block;
}
.wrapper .nav-links {
position: fixed;
height: 100vh;
width: 100%;
max-width: 350px;
top: 0;
left: -100%;
background: #171f27;
display: block;
padding: 50px 10px;
line-height: 50px;
overflow-y: auto;
box-shadow: 0px 15px 15px rgba(0, 0, 0, 0.18);
transition: all 0.3s ease;
}
/* custom scroll bar */
::-webkit-scrollbar {
width: 10px;
}
::-webkit-scrollbar-track {
background: #242526;
}
::-webkit-scrollbar-thumb {
background: #3a3b3c;
}
#menu-btn:checked ~ .nav-links {
left: 0%;
}
#menu-btn:checked ~ .btn.menu-btn {
display: none;
}
#close-btn:checked ~ .btn.menu-btn {
display: block;
}
.nav-links li {
margin: 15px 10px;
}
.nav-links li a {
padding: 0 20px;
display: block;
font-size: 20px;
}
.nav-links .drop-menu {
position: static;
opacity: 1;
top: 65px;
visibility: visible;
padding-left: 20px;
width: 100%;
max-height: 0px;
overflow: hidden;
box-shadow: none;
transition: all 0.3s ease;
}
#showDrop:checked ~ .drop-menu,
#showMega:checked ~ .mega-box {
max-height: 100%;
}
.nav-links .desktop-item {
display: none;
}
.nav-links .mobile-item {
display: block;
color: #f2f2f2;
font-size: 20px;
font-weight: 500;
padding-left: 20px;
cursor: pointer;
border-radius: 5px;
transition: all 0.3s ease;
}
.nav-links .mobile-item:hover {
background: #3a3b3c;
}
.drop-menu li {
margin: 0;
}
.drop-menu li a {
border-radius: 5px;
font-size: 18px;
}
.mega-box {
position: static;
top: 65px;
opacity: 1;
visibility: visible;
padding: 0 20px;
max-height: 0px;
overflow: hidden;
transition: all 0.3s ease;
}
.mega-box .content {
box-shadow: none;
flex-direction: column;
padding: 20px 20px 0 20px;
}
.mega-box .content .row {
width: 100%;
margin-bottom: 15px;
border-top: 1px solid rgba(255, 255, 255, 0.08);
}
.mega-box .content .row:nth-child(1),
.mega-box .content .row:nth-child(2) {
border-top: 0px;
}
.content .row .mega-links {
border-left: 0px;
padding-left: 15px;
}
.row .mega-links li {
margin: 0;
}
.content .row header {
font-size: 19px;
}
}
nav input {
display: none;
}
.body-text {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
width: 100%;
text-align: center;
padding: 0 30px;
}
.body-text div {
font-size: 45px;
font-weight: 600;
}
/* csv display */
.scrollit {
margin-top: 60px;
overflow: auto;
position: absolute;
height: min-content;
max-height: 40%;
max-width: 35%;
}
.dataframe {
font-size: 9pt;
font-family: Arial;
border-collapse: collapse;
font-size: 0.9em;
}
.dataframe thead tr {
text-align: left;
font-weight: bold;
}
.dataframe th,
.dataframe td {
padding: 12px 15px;
text-align: left;
}
.dataframe tbody tr {
border-bottom: 1px solid #dddddd;
}
.dataframe tbody tr:nth-of-type(even) {
background-color: #f3f3f3;
}
.dataframe tbody tr:last-of-type {
border-bottom: 2px solid #009879;
}
/* plotly toolbar */
.modebar {
display: none !important;
}
/* button */
/* CSS */
.button-6 {
align-items: center;
background-color: #FFFFFF;
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: .25rem;
box-shadow: rgba(0, 0, 0, 0.02) 0 1px 3px 0;
box-sizing: border-box;
color: rgba(0, 0, 0, 0.85);
cursor: pointer;
display: inline-flex;
font-family: system-ui,-apple-system,system-ui,"Helvetica Neue",Helvetica,Arial,sans-serif;
font-size: 16px;
font-weight: 600;
justify-content: center;
line-height: 1.25;
margin: 0;
min-height: 3rem;
padding: calc(.875rem - 1px) calc(1.5rem - 1px);
position: relative;
text-decoration: none;
transition: all 250ms;
user-select: none;
-webkit-user-select: none;
touch-action: manipulation;
vertical-align: baseline;
width: auto;
}
.button-6:hover,
.button-6:focus {
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.1) 0 4px 12px;
color: rgba(0, 0, 0, 0.65);
}
.button-6:hover {
transform: translateY(-1px);
}
.button-6:active {
background-color: #F0F0F1;
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.06) 0 2px 4px;
color: rgba(0, 0, 0, 0.65);
transform: translateY(0);
}
/* radio buttons */
.radio-inputs {
position: relative;
display: flex;
flex-wrap: wrap;
border-radius: 0.5rem;
background-color: #EEE;
box-sizing: border-box;
box-shadow: 0 0 0px 1px rgba(0, 0, 0, 0.06);
padding: 0.25rem;
width: 300px;
font-size: 14px;
}
.radio-inputs .radio {
flex: 1 1 auto;
text-align: center;
}
.radio-inputs .radio input {
display: none;
}
.radio-inputs .radio .name {
display: flex;
cursor: pointer;
align-items: center;
justify-content: center;
border-radius: 0.5rem;
border: none;
padding: .5rem 0;
color: rgba(51, 65, 85, 1);
transition: all .15s ease-in-out;
}
.radio-inputs .radio input:checked + .name {
background-color: #fff;
font-weight: 600;
}

@ -0,0 +1,11 @@
$(document).ready(function () {
$('.trainTestButton').click(function() {
if($("#train").is(':checked')){
$("#train-me").show();
$("#test-me").hide();
}else{
$("#train-me").hide();
$("#test-me").show();
}
})
});

@ -0,0 +1,12 @@
function setScreen() {
var yScreen = localStorage.getItem("yPos");
window.scrollTo(0, yScreen);
}
function setScroll() {
var yScroll = window.pageYOffset;
localStorage.setItem("yPos", yScroll);
}
function clearScreen() {
localStorage.setItem("yPos", 0);
window.scrollTo(0, 0);
}

@ -1,7 +0,0 @@
$(document).ready( function()
{
$('#spinner').on('click', function()
{
$('body').addClass('busy');
});
});

6
base/static/js/slider.js Normal file

@ -0,0 +1,6 @@
const value = document.querySelector("#value");
const input = document.querySelector("#split_input");
value.textContent = input.value;
input.addEventListener("input", (event) => {
value.textContent = event.target.value;
});

@ -1,25 +1,14 @@
{% extends 'main.html' %} {% extends 'main.html' %}
{% block content %} {% block content %}
{% load static %} {% load static %}
<div class="container-fluid"> <div class="container-fluid">
<div class="mx-auto p-4 text-center bg-white shadow-sm">
<a href="/" >
<h1>
EXTREMUM
</h1>
</a>
</div>
<br>
<br>
<div class="row"> <div class="row">
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data"> <form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data">
<label style="display:flex; <label style="display:flex;
flex-direction:column; flex-direction:column;
align-items: center;"> align-items: center;">
<h3> <h3>
<i class="fas fa-upload"></i> Import a file <i class="fas fa-upload"></i> Import a file
</h3> </h3>
@ -36,11 +25,11 @@
style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;"> style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;">
</form> </form>
</div> </div>
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" name="plot" method="POST" id="stats"> <form action="{% url 'home' %}" name="plot" method="POST" id="stats">
<label style="display:flex; <label style="display:flex;
flex-direction:column; flex-direction:column;
align-items: center;"> align-items: center;">
<h3> <h3>
<i class="fa-solid fa-chart-simple"></i> Stats <i class="fa-solid fa-chart-simple"></i> Stats
</h3> </h3>
@ -66,62 +55,158 @@
</form> </form>
</div> </div>
</div> </div>
<br>
<br> <br>
<div class="row"> <div class="row">
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
<div class="scrollit"> <div class="scrollit">
{{ data_to_display|safe }} {{ data_to_display|safe }}
</div> </div>
</div> </div>
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
{{ fig|safe }} {{ fig|safe }}
</div> </div>
</div> </div>
<br> </div>
<br> <br>
<div class="row justify-content-center"> <br>
<div class="col d-flex justify-content-center"> <div class="container-fluid">
<form action="{% url 'home' %}" method="POST"> <div class="row">
<label style="display:flex; <section>
flex-direction:column; <label style="display:flex;
align-items: center;"> flex-direction:column;
<h3> align-items: center;">
<i class="fa-solid fa-wand-magic-sparkles"></i> Training <h2>
</h3> <i class="fas fa-cog"></i> Preprocessing
</h2>
</label> </label>
</section>
</div>
<br>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" method="POST" id="preprocess-form">
{% csrf_token %} {% csrf_token %}
<div class="btn-toolbar" role="toolbar"> <div style="scale: 1.2;">
<button type="submit" name="svm" style="margin:0;margin-left:16px;" class="button-6" role="button" >Support Vector Machine</button> <div class="form-check form-check-inline">
<button type="submit" name="logit" style="margin:0;margin-left:16px;" class="button-6" role="button" >Logistic Regression</button> <input class="form-check-input" type="checkbox" value="std" name="boxes" checked>
<button type="submit" name="xgb" style="margin:0;margin-left:16px;" class="button-6" role="button" >XGBoost</button> <input class="form-check-input" type="checkbox" value="std" name="boxes">
</div> <label class="form-check-label" for="inlineCheckbox1">Standardization</label>
<br> </div>
<div class="justify-content-center btn-toolbar" role="toolbar"> <div class="form-check form-check-inline">
<button type="submit" name="dt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Decision Tree</button> <input class="form-check-input" type="checkbox" value="onehot" name="boxes">
<button type="submit" name="rt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Random Forest</button> <label class="form-check-label" for="inlineCheckbox2">One Hot Encoding</label>
</div>
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="imp" name="boxes">
<label class="form-check-label" for="inlineCheckbox3">Imputations</label>
</div>
</div> </div>
</form> </form>
</div> </div>
<div class="col d-flex justify-content-center"> </div>
<div class="col d-flex justify-content-center multi-button"> <br>
<form action="{% url 'home' %}" method="POST"> <div class="row">
{% csrf_token %} <div class="col-sm d-flex justify-content-center">
<label style="display:flex; <button type="submit" class="button-6" role="button" name="preprocess" form="preprocess-form">Go!</button>
</div>
</div>
<div class="row">
{% if pca %}
<div class="col-sm d-flex justify-content-center">
{{ pca|safe }}
</div>
{% endif %}
{% if tsne %}
<div class="col-sm d-flex justify-content-center">
{{ tsne|safe }}
</div>
{% endif %}
</div>
<br>
<br>
</div>
<div class="container-fluid">
<form action="{% url 'home' %}" method="POST" id="traintest-form">
{% csrf_token %}
<div class="row">
<div class="col-sm d-flex justify-content-center">
<label style="display:flex;
flex-direction:column; flex-direction:column;
align-items: center;"> align-items: center;">
<h3> <h2>
<i class="fas fa-cog"></i> Preprocessing <i class="fas fa-magic"></i>Pick a model
</h3> </h2>
</label> <select id="model" name="model" style="scale: 1.2;">
<div class="multi-button"> <option type="submit" value="svm">Support Vector Machine</option>
<button type="submit" name="std" class="button-6" role="button" >Standardization</button> <option type="submit" value="logit" selected>Logistic Regression</option>
<button type="submit" name="onehot" style="margin:0;margin-left:16px;" class="button-6" role="button" >One Hot Encoding</button> <option type="submit" value="xgb">XGBoost</option>
<button type="submit" name="imp" style="margin:0;margin-left:16px;" class="button-6" role="button" >Imputations</button> <option type="submit" value="dt">Decision Tree</option>
</div> <option type="submit" value="rt">Random Forest</option>
</form> </select>
</label>
</div> </div>
</div> </div>
<br>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<div style="scale: 1.5;">
<input type="radio" name="colorRadio" value="train" class="trainTestButton" id="train"/>
<label for="radio1">Train </label>
<input type="radio" name="colorRadio" value="test" class="trainTestButton" id="test"/>
<label for="radio2">Test </label>
</div>
</div>
</div>
<br>
<div id="train-me" style="display: none;">
<br>
<div class="row">
<div class="col-md d-flex justify-content-center">
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h3>
Test set ratio
</h3>
<input name="split_input" id="split_input" type="range" min="0" max="1" step="0.1" style="width: 200px;"/>
<output id="value"></output>
</label>
</div>
</div>
</div>
<br>
</form>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<button type="submit" class="button-6" role="button" name="traintest" form="traintest-form">Go!</button>
</div>
</div>
<br>
<div class="row">
{% if fig2 %}
<div class="col-sm d-flex justify-content-center">
{{ fig2|safe }}
</div>
{% endif %}
{% if clas_report %}
<div class="col-sm d-flex justify-content-center">
<div class="scrollit">
{{ clas_report|safe }}
</div>
</div>
{% endif %}
</div> </div>
</div> </div>
{% endblock content%} {% endblock content%}

@ -1,10 +1,10 @@
from django.urls import path from django.urls import path, include
from . import views from . import views
from . import models from . import models
urlpatterns = [ urlpatterns = [
path('', views.home, name="home"), path('', views.home, name="home"),
path('preprocess', views.preprocess, name="preprocess"), path('preprocess', views.preprocess, name="preprocess"),
path('stats', views.stats, name="stats"), path('stats', views.stats, name="stats"),
path('train',views.training, name = 'training'),
] ]

@ -2,168 +2,434 @@ from django.shortcuts import render, redirect
import pandas as pd import pandas as pd
from django.core.files.storage import FileSystemStorage from django.core.files.storage import FileSystemStorage
import pickle, os import pickle, os
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np import numpy as np
from pandas.api.types import is_string_dtype from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype from pandas.api.types import is_numeric_dtype
from . import utils from sklearn.metrics import accuracy_score, classification_report
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import LabelEncoder
import joblib
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
fig = None clas_report = None
excel_file_name_preprocessed = "dataset_preprocessed.csv" FILE_NAME = "dataset.csv"
excel_file_name = "dataset.csv" PROCESS_FILE_NAME = "dataset_preprocessed.csv"
def home(request): def home(request):
global fig global clas_report
global excel_file_name # request.session.flush()
global excel_file_name_preprocessed if "fig" in request.session:
fig = request.session.get("fig")
if request.method == 'POST': else:
if 'csv' in request.POST: fig = None
if "fig2" in request.session:
fig2 = request.session.get("fig2")
else:
fig2 = None
if "pca" in request.session:
pca = request.session.get("pca")
else:
pca = None
if "tsne" in request.session:
tsne = request.session.get("tsne")
else:
tsne = None
if "excel_file_name" in request.session:
excel_file_name = request.session.get("excel_file_name")
else:
# name for default dataset. If session variable
# is not set that means there was never a csv
# UPLOAD and thus the default dataset should used
excel_file_name = FILE_NAME
request.session["excel_file_name"] = excel_file_name
if "excel_file_name_preprocessed" in request.session:
excel_file_name_preprocessed = request.session.get(
"excel_file_name_preprocessed"
)
else:
excel_file_name_preprocessed = PROCESS_FILE_NAME
request.session["excel_file_name_preprocessed"] = excel_file_name_preprocessed
df = pd.DataFrame()
if request.method == "POST":
if "csv" in request.POST:
excel_file = request.FILES["excel_file"] excel_file = request.FILES["excel_file"]
excel_file_name = request.FILES["excel_file"].name excel_file_name = request.FILES["excel_file"].name
# here we dont use the name of the file since the fig = None
fig2 = None
# here we dont use the name of the file since the
# uploaded file is not yet saved # uploaded file is not yet saved
# In every other case we just need the name # In every other case we just need the name
df = pd.read_csv(excel_file)
# fs = FileSystemStorage() # defaults to MEDIA_ROOT
fs = FileSystemStorage() #defaults to MEDIA_ROOT request.session["excel_file_name"] = excel_file_name
request.session['excel_file_name'] = excel_file_name # fs.save(excel_file_name, excel_file)
fs.save(excel_file_name, excel_file)
df = pd.read_csv(excel_file)
idx = excel_file_name.index('.') df.drop(["id"], axis=1, inplace=True)
excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:] df.to_csv(excel_file_name, index=False)
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
fs.save(excel_file_name_preprocessed, excel_file)
# if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False:
df.to_csv(excel_file_name_preprocessed)
feature1 = df.columns[0] feature1 = df.columns[0]
feature2 = df.columns[1] feature2 = df.columns[1]
request.session['feature1'] = feature1 request.session["feature1"] = feature1
request.session['feature2'] = feature2 request.session["feature2"] = feature2
elif 'plot' in request.POST: fig = stats(
df = pd.read_csv(excel_file_name) excel_file_name,
feature1 = request.POST.get('feature1') request.session["feature1"],
feature2 = request.POST.get('feature2') request.session["feature2"],
print(feature1) )
request.session['feature1'] = feature1
request.session['feature2'] = feature2 elif "plot" in request.POST:
else: feature1 = request.POST.get("feature1")
df = pd.read_csv(excel_file_name) feature2 = request.POST.get("feature2")
request.session["feature1"] = feature1
if 'std' in request.POST: request.session["feature2"] = feature2
preprocess(excel_file_name_preprocessed, 'std') fig = stats(
if 'onehot' in request.POST: excel_file_name,
preprocess(excel_file_name_preprocessed, 'onehot') request.session["feature1"],
if 'imp' in request.POST: request.session["feature2"],
preprocess(excel_file_name_preprocessed, 'imp') )
if 'logit' in request.POST: elif "traintest" in request.POST:
training(excel_file_name_preprocessed, 'logit') mode = request.POST.get("colorRadio")
if 'xgb' in request.POST: model = request.POST.get("model")
training(excel_file_name_preprocessed, 'xgb') test_size = float(request.POST.get("split_input"))
else: print(test_size, mode, model)
df = pd.read_csv(excel_file_name) if mode == "train":
if model == "logit":
con = training(excel_file_name_preprocessed, "logit", test_size)
elif model == "xgb":
con = training(excel_file_name_preprocessed, "xgb", test_size)
elif model == "dt":
con = training(excel_file_name_preprocessed, "dt", test_size)
elif model == "svm":
con = training(excel_file_name_preprocessed, "svm", test_size)
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif mode == "test":
if model == "logit":
con = testing(excel_file_name_preprocessed, "logit")
elif model == "xgb":
con = testing(excel_file_name_preprocessed, "xgb")
elif model == "dt":
con = testing(excel_file_name_preprocessed, "dt")
elif model == "svm":
con = testing(excel_file_name_preprocessed, "svm")
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif "preprocess" in request.POST:
value_list = request.POST.getlist("boxes")
# if file for preprocessing does not exist create it # if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False: # also apply basic preprocessing
df.to_csv(excel_file_name_preprocessed) if os.path.exists(excel_file_name_preprocessed) == False:
# generate filename
idx = excel_file_name.index(".")
excel_file_name_preprocessed = (
excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
)
# save file for preprocessing
preprocess_df = pd.read_csv(excel_file_name)
fs = FileSystemStorage() # defaults to MEDIA_ROOT
request.session["excel_file_name_preprocessed"] = (
excel_file_name_preprocessed
)
preprocess_df.to_csv(excel_file_name_preprocessed, index=False)
preprocess_df.drop(
["perimeter_mean", "area_mean"], axis=1, inplace=True
)
preprocess_df.drop(
["perimeter_worst", "area_worst"], axis=1, inplace=True
)
preprocess_df.drop(["perimeter_se", "area_se"], axis=1, inplace=True)
preprocess_df.drop(
[
"radius_worst",
"concave_points_mean",
"texture_worst",
"symmetry_worst",
"smoothness_worst",
],
axis=1,
inplace=True,
)
# preprocess_df.drop(["id"], axis=1, inplace=True)
le = LabelEncoder()
preprocess_df["diagnosis"] = le.fit_transform(
preprocess_df["diagnosis"]
)
else:
preprocess_df = pd.read_csv(excel_file_name_preprocessed)
preprocess(preprocess_df, value_list, excel_file_name_preprocessed)
pca = PCA()
pca.fit(preprocess_df)
exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
pca = px.area(
x=range(1, exp_var_cumul.shape[0] + 1),
y=exp_var_cumul,
labels={"x": "# Components", "y": "Explained Variance"},
).to_html()
features = preprocess_df.loc[:, :"compactness_se"]
tsne = TSNE(n_components=2, random_state=0)
projections = tsne.fit_transform(features)
tsne = px.scatter(
projections,
x=0,
y=1,
color=preprocess_df.diagnosis,
labels={"color": "diagnosis"},
).to_html()
else:
if os.path.exists(excel_file_name) == False:
excel_file_name = "dataset.csv"
request.session["excel_file_name"] = excel_file_name
df = pd.read_csv(excel_file_name)
fig2 = None
# just random columns to plot
feature1 = df.columns[0] feature1 = df.columns[0]
feature2 = df.columns[1] feature2 = df.columns[1]
request.session['feature1'] = feature1 request.session["feature1"] = feature1
request.session['feature2'] = feature2 request.session["feature2"] = feature2
fig = stats(
fig = stats(excel_file_name, request.session['feature1'], request.session['feature2']) excel_file_name, request.session["feature1"], request.session["feature2"]
)
data_to_display = df[:5].to_html()
request.session['data_to_display'] = data_to_display if df.empty:
request.session['excel_file_name'] = excel_file_name df = pd.read_csv(excel_file_name)
request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
request.session["fig"] = fig
request.session["fig2"] = fig2
request.session["pca"] = pca
request.session["tsne"] = tsne
data_to_display = df[:5].to_html()
request.session["data_to_display"] = data_to_display
labels = df.columns labels = df.columns
context = {
context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig, 'feature1': request.session['feature1'], 'feature2': request.session['feature2']} "data_to_display": data_to_display,
return render(request,'base/home.html', context) "excel_file": excel_file_name,
"labels": labels,
"fig": fig,
"fig2": fig2,
"feature1": request.session["feature1"],
"feature2": request.session["feature2"],
"clas_report": clas_report,
"pca": pca,
"tsne": tsne,
}
return render(request, "base/home.html", context)
def stats(name, feature1, feature2): def stats(name, feature1, feature2):
global fig
df = pd.read_csv(name) df = pd.read_csv(name)
import plotly.express as px binary1 = df[feature1].isin([0, 1]).all()
if is_numeric_dtype(df[feature1]) and is_numeric_dtype(df[feature2]): binary2 = df[feature2].isin([0, 1]).all()
print("if") if binary1 == True or binary2 == True:
fig = px.scatter(df, feature1, feature2, color='DEATH_EVENT') fig = px.histogram(df, x=feature1, color=feature2)
elif is_string_dtype(df[feature1]) and is_string_dtype(df[feature2]): elif is_numeric_dtype(df[feature1]) or is_numeric_dtype(df[feature2]):
print("elseif") if not is_numeric_dtype(df[feature1]):
fig = px.histogram(df, feature1) # feature1 is not numeric but feature2 should be
fig = px.histogram(df, x=feature2, color=feature1)
elif not is_numeric_dtype(df[feature2]):
# feature2 is not numeric but feature1 should be
fig = px.histogram(df, x=feature1, color=feature2)
else:
# they both are numeric so do scatter
fig = px.scatter(
df, x=feature1, y=feature2, color=df["diagnosis"].astype(str)
)
else: else:
print("else") # they both are categorical so do scatter
fig = px.strip(df, feature1, feature2) fig = px.histogram(df, x=feature1, color=feature2)
fig = fig.to_html(full_html=False) fig = fig.to_html(full_html=False)
return fig return fig
def preprocess(name, type):
def preprocess(data, value_list, name):
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
data = pd.read_csv(name)
if set(['No','customerID','Churn']).issubset(data.columns):
data.drop(['No','customerID','Churn'],axis=1,inplace=True)
if type == 'std':
# define standard scaler
scaler = StandardScaler()
# transform data
print("HEY")
num_d = data.select_dtypes(exclude=['object'])
data[num_d.columns] = scaler.fit_transform(num_d)
if type=='onehot':
data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
'PaperlessBilling', 'PaymentMethod'],drop_first = True)
if type == 'imp':
data_numeric = data.select_dtypes(exclude=['object'])
data_categorical = data.select_dtypes(exclude=['number'])
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns
data = pd.concat([data_numeric, data_categorical], axis = 1)
os.remove(excel_file_name_preprocessed)
data.to_csv(excel_file_name_preprocessed)
return
def training(name, type): for type in value_list:
global fig if type == "std":
# define standard scaler
scaler = StandardScaler()
y = data["diagnosis"]
if is_numeric_dtype(data["diagnosis"]):
# if class column is numeric do not
# apply preprocessing
data = data.drop(["diagnosis"], axis=1)
# transform data
cols = data.select_dtypes(np.number).columns
data[cols] = scaler.fit_transform(data[cols])
y = y.to_frame()
data = data.join(y)
if type == "onehot":
data = pd.get_dummies(data)
if type == "imp":
data_numeric = data.select_dtypes(exclude=["object"])
data_categorical = data.select_dtypes(exclude=["number"])
imp = SimpleImputer(missing_values=np.nan, strategy="most_frequent")
data_numeric = pd.DataFrame(
imp.fit_transform(data_numeric),
columns=data_numeric.columns,
index=data_numeric.index,
) # only apply imputer to numeric columns
data = pd.concat(
[data_numeric, data_categorical], axis=1, ignore_index=False
)
os.remove(name)
data.to_csv(name, index=False)
return
def training(name, type, test_size=0.7):
data = pd.read_csv(name) data = pd.read_csv(name)
y=data['Churn']
y=y.replace({"Yes":1,"No":0})
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
X_train.to_csv("X_train.csv")
X_test.to_csv("X_test.csv")
y_train.to_csv("y_train.csv")
y_test.to_csv("y_test.csv")
if 'logit' == type:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
filename = 'lg.sav'
pickle.dump(clf, open(filename, 'wb'))
if 'xgb' == type: y = data["diagnosis"]
X = data.drop("diagnosis", axis=1)
X_train, X_test, y_train, y_test = train_test_split(
X, y, shuffle=True, test_size=test_size, stratify=y, random_state=42
)
test = X_test.join(y_test.to_frame())
test.to_csv("test.csv", index=False)
train = X_train.join(y_train.to_frame())
train.to_csv("train.csv", index=False)
if "logit" == type:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
y_pred = clf.predict(X_test)
filename = "lg.sav"
pickle.dump(clf, open(filename, "wb"))
importance = clf.coef_[0]
model = clf
if "xgb" == type:
from xgboost import XGBClassifier from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
file_name = 'xgb.sav' xgb = XGBClassifier(learning_rate=0.01, n_estimators=1000).fit(X_train, y_train)
pickle.dump(xgb,open(file_name,'wb')) y_pred = xgb.predict(X_test)
filename = "xgb.sav"
return importance = xgb.feature_importances_
model = xgb
if "dt" == type:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=4, random_state=1)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
filename = "dt.sav"
importance = dt.feature_importances_
model = dt
if "svm" == type:
from sklearn import svm
svc = svm.LinearSVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
filename = "svc.sav"
importance = svc.coef_[0]
model = svc
clas_report = classification_report(y_test, y_pred, output_dict=True)
clas_report = pd.DataFrame(clas_report).transpose()
clas_report = clas_report.sort_values(by=["f1-score"], ascending=False)
fig2 = px.bar(x=importance, y=X_train.columns)
pickle.dump(model, open(filename, "wb"))
con = {
"fig2": fig2.to_html(),
"clas_report": clas_report,
}
return con
def testing(name, type):
data = pd.read_csv(name)
y_test = data["diagnosis"]
X_test = data.drop("diagnosis", axis=1)
if "logit" == type:
filename = "lg.sav"
clf = joblib.load(filename)
y_pred = clf.predict(X_test)
importance = clf.coef_[0]
model = clf
if "xgb" == type:
filename = "xgb.sav"
xgb = joblib.load(filename)
y_pred = xgb.predict(X_test)
filename = "xgb.sav"
importance = xgb.feature_importances_
model = xgb
if "dt" == type:
filename = "dt.sav"
dt = joblib.load(filename)
y_pred = dt.predict(X_test)
importance = dt.feature_importances_
model = dt
if "svm" == type:
filename = "svc.sav"
svc = joblib.load(filename)
y_pred = svc.predict(X_test)
importance = svc.coef_[0]
model = svc
clas_report = classification_report(y_test, y_pred, output_dict=True)
clas_report = pd.DataFrame(clas_report).transpose()
clas_report = clas_report.sort_values(by=["f1-score"], ascending=False)
fig2 = px.bar(x=importance, y=X_test.columns)
pickle.dump(model, open(filename, "wb"))
con = {
"fig2": fig2.to_html(),
"clas_report": clas_report,
}
return con

11098
dataset.csv

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

@ -21,7 +21,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent
# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/ # See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret! # SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-0w^ybt_7vclag#rrutc_eo)m+l^@ml)t%jsg6n06siu)xyls+-' SECRET_KEY = "django-insecure-0w^ybt_7vclag#rrutc_eo)m+l^@ml)t%jsg6n06siu)xyls+-"
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True DEBUG = True
@ -32,56 +32,54 @@ ALLOWED_HOSTS = []
# Application definition # Application definition
INSTALLED_APPS = [ INSTALLED_APPS = [
'django.contrib.admin', "django.contrib.admin",
'django.contrib.auth', "django.contrib.auth",
'django.contrib.contenttypes', "django.contrib.contenttypes",
'django.contrib.sessions', "django.contrib.sessions",
'django.contrib.messages', "django.contrib.messages",
'django.contrib.staticfiles', "django.contrib.staticfiles",
'base.apps.BaseConfig', "base.apps.BaseConfig",
'bootstrap5', "bootstrap5",
] ]
MIDDLEWARE = [ MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware', "django.middleware.security.SecurityMiddleware",
'django.contrib.sessions.middleware.SessionMiddleware', "django.contrib.sessions.middleware.SessionMiddleware",
'django.middleware.common.CommonMiddleware', "django.middleware.common.CommonMiddleware",
'django.middleware.csrf.CsrfViewMiddleware', "django.middleware.csrf.CsrfViewMiddleware",
'django.contrib.auth.middleware.AuthenticationMiddleware', "django.contrib.auth.middleware.AuthenticationMiddleware",
'django.contrib.messages.middleware.MessageMiddleware', "django.contrib.messages.middleware.MessageMiddleware",
'django.middleware.clickjacking.XFrameOptionsMiddleware', "django.middleware.clickjacking.XFrameOptionsMiddleware",
] ]
ROOT_URLCONF = 'extremum.urls' ROOT_URLCONF = "extremum.urls"
TEMPLATES = [ TEMPLATES = [
{ {
'BACKEND': 'django.template.backends.django.DjangoTemplates', "BACKEND": "django.template.backends.django.DjangoTemplates",
'DIRS': [ "DIRS": [BASE_DIR / "templates"],
"APP_DIRS": True,
BASE_DIR / 'templates'], "OPTIONS": {
'APP_DIRS': True, "context_processors": [
'OPTIONS': { "django.template.context_processors.debug",
'context_processors': [ "django.template.context_processors.request",
'django.template.context_processors.debug', "django.contrib.auth.context_processors.auth",
'django.template.context_processors.request', "django.contrib.messages.context_processors.messages",
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
], ],
}, },
}, },
] ]
WSGI_APPLICATION = 'extremum.wsgi.application' WSGI_APPLICATION = "extremum.wsgi.application"
# Database # Database
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases # https://docs.djangoproject.com/en/5.0/ref/settings/#databases
DATABASES = { DATABASES = {
'default': { "default": {
'ENGINE': 'django.db.backends.sqlite3', "ENGINE": "django.db.backends.sqlite3",
'NAME': BASE_DIR / 'db.sqlite3', "NAME": BASE_DIR / "db.sqlite3",
} }
} }
@ -91,16 +89,16 @@ DATABASES = {
AUTH_PASSWORD_VALIDATORS = [ AUTH_PASSWORD_VALIDATORS = [
{ {
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
}, },
{ {
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
}, },
{ {
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
}, },
{ {
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
}, },
] ]
@ -108,9 +106,9 @@ AUTH_PASSWORD_VALIDATORS = [
# Internationalization # Internationalization
# https://docs.djangoproject.com/en/5.0/topics/i18n/ # https://docs.djangoproject.com/en/5.0/topics/i18n/
LANGUAGE_CODE = 'en-us' LANGUAGE_CODE = "en-us"
TIME_ZONE = 'UTC' TIME_ZONE = "UTC"
USE_I18N = True USE_I18N = True
@ -120,11 +118,10 @@ USE_TZ = True
# Static files (CSS, JavaScript, Images) # Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.0/howto/static-files/ # https://docs.djangoproject.com/en/5.0/howto/static-files/
STATIC_URL = '/static/' STATIC_URL = "base/static/"
MEDIA_URL = '/images/' STATICFILES_DIRS = [BASE_DIR / "base/static"]
STATICFILES_DIRS = (os.path.join(BASE_DIR,'base/static'),)
# Default primary key field type # Default primary key field type
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

@ -1,111 +1,40 @@
{% load static %}
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
{% load bootstrap5 %}
{% bootstrap_css %}
{% bootstrap_javascript %}
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="'IE=edge" /> <meta http-equiv="X-UA-Compatible" content="'IE=edge" />
<title>EXTREMUM</title> <title>EXTREMUM</title>
<meta name="viewport" content="'width=device-width, initial-scale=1" /> <meta name="viewport" content="'width=device-width, initial-scale=1" />
{% load static %} <link rel="stylesheet" href="{% static 'css/style.css' %}">
{% load bootstrap5 %} <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
{% bootstrap_css %}
{% bootstrap_javascript %}
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css">
</head> </head>
<style> <nav>
<div class="wrapper">
.scrollit { <div class="logo"><a href="#">Extremum</a></div>
overflow: auto; <input type="radio" name="slider" id="menu-btn">
position: fixed; <input type="radio" name="slider" id="close-btn">
float: left; <label for="menu-btn" class="btn menu-btn"><i class="fas fa-bars"></i></label>
height: auto; </div>
width: 40%; </nav>
max-width: fit-content; <br>
margin: 0px auto; <br>
} <br>
<br>
<br>
<body onscroll="setScroll()" onload="setScreen()">
.dataframe { {% block content %}
font-size: 11pt; {% endblock content %}
font-family: Arial;
border-collapse: collapse; <script src="{% static 'js/hide_seek.js' %}"></script>
margin: 25px 0; <script src="{% static 'js/slider.js' %}"></script>
font-size: 0.9em; <script src="{% static 'js/keep_scroll_on_load.js' %}"></script>
}
.dataframe thead tr {
text-align: left;
font-weight: bold;
}
.dataframe th,
.dataframe td {
padding: 12px 15px;
text-align: left;
}
.dataframe tbody tr {
border-bottom: 1px solid #dddddd;
}
.dataframe tbody tr:nth-of-type(even) {
background-color: #f3f3f3;
}
.dataframe tbody tr:last-of-type {
border-bottom: 2px solid #009879;
}
/* CSS */
.button-6 {
align-items: center;
background-color: #FFFFFF;
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: .25rem;
box-shadow: rgba(0, 0, 0, 0.02) 0 1px 3px 0;
box-sizing: border-box;
color: rgba(0, 0, 0, 0.85);
cursor: pointer;
display: inline-flex;
font-family: system-ui,-apple-system,system-ui,"Helvetica Neue",Helvetica,Arial,sans-serif;
font-size: 16px;
font-weight: 600;
justify-content: center;
line-height: 1.25;
margin: 0;
min-height: 3rem;
padding: calc(.875rem - 1px) calc(1.5rem - 1px);
position: relative;
text-decoration: none;
transition: all 250ms;
user-select: none;
-webkit-user-select: none;
touch-action: manipulation;
vertical-align: baseline;
width: auto;
}
.button-6:hover,
.button-6:focus {
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.1) 0 4px 12px;
color: rgba(0, 0, 0, 0.65);
}
.button-6:hover {
transform: translateY(-1px);
}
.button-6:active {
background-color: #F0F0F1;
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.06) 0 2px 4px;
color: rgba(0, 0, 0, 0.65);
transform: translateY(0);
}
</style>
<body>
{% block content %} {% endblock content %}
</body> </body>
</html> </html>