Tabular data Train and Testing for Breast Cancer dataset

This commit is contained in:
atla8167 2024-06-10 11:38:59 +03:00
parent 9a02a02658
commit 6047fad7ce
18 changed files with 6166 additions and 6327 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1 +1,3 @@
from django.db import models from django.db import models

414
base/static/css/style.css Normal file

@ -0,0 +1,414 @@
@import url("https://fonts.googleapis.com/css2?family=Poppins:wght@200;300;400;500;600;700&display=swap");
.container-fluid {
padding-bottom: 100px;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: "Poppins", sans-serif;
}
nav {
position: fixed;
z-index: 99;
width: 100%;
background: #242526;
}
nav .wrapper {
margin-bottom: 10px;
position: relative;
max-width: 1300px;
padding: 0px 30px;
height: 70px;
line-height: 70px;
margin: auto;
display: flex;
align-items: center;
justify-content: space-between;
}
.wrapper .logo a {
color: #f2f2f2;
font-size: 30px;
font-weight: 600;
text-decoration: none;
}
.wrapper .nav-links {
display: inline-flex;
}
.nav-links li {
list-style: none;
}
.nav-links li a {
color: #f2f2f2;
text-decoration: none;
font-size: 18px;
font-weight: 500;
padding: 9px 15px;
border-radius: 5px;
transition: all 0.3s ease;
}
.nav-links li a:hover {
background: #3a3b3c;
}
.nav-links .mobile-item {
display: none;
}
.nav-links .drop-menu {
position: absolute;
background: #242526;
width: 180px;
line-height: 45px;
top: 85px;
opacity: 0;
visibility: hidden;
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
}
.nav-links li:hover .drop-menu,
.nav-links li:hover .mega-box {
transition: all 0.3s ease;
top: 70px;
opacity: 1;
visibility: visible;
}
.drop-menu li a {
width: 100%;
display: block;
padding: 0 0 0 15px;
font-weight: 400;
border-radius: 0px;
}
.mega-box {
position: absolute;
left: 0;
width: 100%;
padding: 0 30px;
top: 85px;
opacity: 0;
visibility: hidden;
}
.mega-box .content {
background: #242526;
padding: 25px 20px;
display: flex;
width: 100%;
justify-content: space-between;
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
}
.mega-box .content .row {
width: calc(25% - 30px);
line-height: 45px;
}
.content .row img {
width: 100%;
height: 100%;
object-fit: cover;
}
.content .row header {
color: #f2f2f2;
font-size: 20px;
font-weight: 500;
}
.content .row .mega-links {
margin-left: -40px;
border-left: 1px solid rgba(255, 255, 255, 0.09);
}
.row .mega-links li {
padding: 0 20px;
}
.row .mega-links li a {
padding: 0px;
padding: 0 20px;
color: #d9d9d9;
font-size: 17px;
display: block;
}
.row .mega-links li a:hover {
color: #f2f2f2;
}
.wrapper .btn {
color: #fff;
font-size: 20px;
cursor: pointer;
display: none;
}
.wrapper .btn.close-btn {
position: absolute;
right: 30px;
top: 10px;
}
@media screen and (max-width: 970px) {
.wrapper .btn {
display: block;
}
.wrapper .nav-links {
position: fixed;
height: 100vh;
width: 100%;
max-width: 350px;
top: 0;
left: -100%;
background: #171f27;
display: block;
padding: 50px 10px;
line-height: 50px;
overflow-y: auto;
box-shadow: 0px 15px 15px rgba(0, 0, 0, 0.18);
transition: all 0.3s ease;
}
/* custom scroll bar */
::-webkit-scrollbar {
width: 10px;
}
::-webkit-scrollbar-track {
background: #242526;
}
::-webkit-scrollbar-thumb {
background: #3a3b3c;
}
#menu-btn:checked ~ .nav-links {
left: 0%;
}
#menu-btn:checked ~ .btn.menu-btn {
display: none;
}
#close-btn:checked ~ .btn.menu-btn {
display: block;
}
.nav-links li {
margin: 15px 10px;
}
.nav-links li a {
padding: 0 20px;
display: block;
font-size: 20px;
}
.nav-links .drop-menu {
position: static;
opacity: 1;
top: 65px;
visibility: visible;
padding-left: 20px;
width: 100%;
max-height: 0px;
overflow: hidden;
box-shadow: none;
transition: all 0.3s ease;
}
#showDrop:checked ~ .drop-menu,
#showMega:checked ~ .mega-box {
max-height: 100%;
}
.nav-links .desktop-item {
display: none;
}
.nav-links .mobile-item {
display: block;
color: #f2f2f2;
font-size: 20px;
font-weight: 500;
padding-left: 20px;
cursor: pointer;
border-radius: 5px;
transition: all 0.3s ease;
}
.nav-links .mobile-item:hover {
background: #3a3b3c;
}
.drop-menu li {
margin: 0;
}
.drop-menu li a {
border-radius: 5px;
font-size: 18px;
}
.mega-box {
position: static;
top: 65px;
opacity: 1;
visibility: visible;
padding: 0 20px;
max-height: 0px;
overflow: hidden;
transition: all 0.3s ease;
}
.mega-box .content {
box-shadow: none;
flex-direction: column;
padding: 20px 20px 0 20px;
}
.mega-box .content .row {
width: 100%;
margin-bottom: 15px;
border-top: 1px solid rgba(255, 255, 255, 0.08);
}
.mega-box .content .row:nth-child(1),
.mega-box .content .row:nth-child(2) {
border-top: 0px;
}
.content .row .mega-links {
border-left: 0px;
padding-left: 15px;
}
.row .mega-links li {
margin: 0;
}
.content .row header {
font-size: 19px;
}
}
nav input {
display: none;
}
.body-text {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
width: 100%;
text-align: center;
padding: 0 30px;
}
.body-text div {
font-size: 45px;
font-weight: 600;
}
/* csv display */
.scrollit {
margin-top: 60px;
overflow: auto;
position: absolute;
height: min-content;
max-height: 40%;
max-width: 35%;
}
.dataframe {
font-size: 9pt;
font-family: Arial;
border-collapse: collapse;
font-size: 0.9em;
}
.dataframe thead tr {
text-align: left;
font-weight: bold;
}
.dataframe th,
.dataframe td {
padding: 12px 15px;
text-align: left;
}
.dataframe tbody tr {
border-bottom: 1px solid #dddddd;
}
.dataframe tbody tr:nth-of-type(even) {
background-color: #f3f3f3;
}
.dataframe tbody tr:last-of-type {
border-bottom: 2px solid #009879;
}
/* plotly toolbar */
.modebar {
display: none !important;
}
/* button */
/* CSS */
.button-6 {
align-items: center;
background-color: #FFFFFF;
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: .25rem;
box-shadow: rgba(0, 0, 0, 0.02) 0 1px 3px 0;
box-sizing: border-box;
color: rgba(0, 0, 0, 0.85);
cursor: pointer;
display: inline-flex;
font-family: system-ui,-apple-system,system-ui,"Helvetica Neue",Helvetica,Arial,sans-serif;
font-size: 16px;
font-weight: 600;
justify-content: center;
line-height: 1.25;
margin: 0;
min-height: 3rem;
padding: calc(.875rem - 1px) calc(1.5rem - 1px);
position: relative;
text-decoration: none;
transition: all 250ms;
user-select: none;
-webkit-user-select: none;
touch-action: manipulation;
vertical-align: baseline;
width: auto;
}
.button-6:hover,
.button-6:focus {
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.1) 0 4px 12px;
color: rgba(0, 0, 0, 0.65);
}
.button-6:hover {
transform: translateY(-1px);
}
.button-6:active {
background-color: #F0F0F1;
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.06) 0 2px 4px;
color: rgba(0, 0, 0, 0.65);
transform: translateY(0);
}
/* radio buttons */
.radio-inputs {
position: relative;
display: flex;
flex-wrap: wrap;
border-radius: 0.5rem;
background-color: #EEE;
box-sizing: border-box;
box-shadow: 0 0 0px 1px rgba(0, 0, 0, 0.06);
padding: 0.25rem;
width: 300px;
font-size: 14px;
}
.radio-inputs .radio {
flex: 1 1 auto;
text-align: center;
}
.radio-inputs .radio input {
display: none;
}
.radio-inputs .radio .name {
display: flex;
cursor: pointer;
align-items: center;
justify-content: center;
border-radius: 0.5rem;
border: none;
padding: .5rem 0;
color: rgba(51, 65, 85, 1);
transition: all .15s ease-in-out;
}
.radio-inputs .radio input:checked + .name {
background-color: #fff;
font-weight: 600;
}

@ -0,0 +1,11 @@
$(document).ready(function () {
$('.trainTestButton').click(function() {
if($("#train").is(':checked')){
$("#train-me").show();
$("#test-me").hide();
}else{
$("#train-me").hide();
$("#test-me").show();
}
})
});

@ -0,0 +1,12 @@
function setScreen() {
var yScreen = localStorage.getItem("yPos");
window.scrollTo(0, yScreen);
}
function setScroll() {
var yScroll = window.pageYOffset;
localStorage.setItem("yPos", yScroll);
}
function clearScreen() {
localStorage.setItem("yPos", 0);
window.scrollTo(0, 0);
}

@ -1,7 +0,0 @@
$(document).ready( function()
{
$('#spinner').on('click', function()
{
$('body').addClass('busy');
});
});

6
base/static/js/slider.js Normal file

@ -0,0 +1,6 @@
const value = document.querySelector("#value");
const input = document.querySelector("#split_input");
value.textContent = input.value;
input.addEventListener("input", (event) => {
value.textContent = event.target.value;
});

@ -1,21 +1,10 @@
{% extends 'main.html' %} {% extends 'main.html' %}
{% block content %} {% block content %}
{% load static %} {% load static %}
<div class="container-fluid"> <div class="container-fluid">
<div class="mx-auto p-4 text-center bg-white shadow-sm">
<a href="/" >
<h1>
EXTREMUM
</h1>
</a>
</div>
<br>
<br>
<div class="row"> <div class="row">
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data"> <form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data">
<label style="display:flex; <label style="display:flex;
flex-direction:column; flex-direction:column;
@ -36,7 +25,7 @@
style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;"> style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;">
</form> </form>
</div> </div>
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" name="plot" method="POST" id="stats"> <form action="{% url 'home' %}" name="plot" method="POST" id="stats">
<label style="display:flex; <label style="display:flex;
flex-direction:column; flex-direction:column;
@ -66,62 +55,158 @@
</form> </form>
</div> </div>
</div> </div>
<br>
<br> <br>
<div class="row"> <div class="row">
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
<div class="scrollit"> <div class="scrollit">
{{ data_to_display|safe }} {{ data_to_display|safe }}
</div> </div>
</div> </div>
<div class="col d-flex justify-content-center"> <div class="col-sm d-flex justify-content-center">
{{ fig|safe }} {{ fig|safe }}
</div> </div>
</div> </div>
<br> </div>
<br> <br>
<div class="row justify-content-center"> <br>
<div class="col d-flex justify-content-center"> <div class="container-fluid">
<form action="{% url 'home' %}" method="POST"> <div class="row">
<section>
<label style="display:flex; <label style="display:flex;
flex-direction:column; flex-direction:column;
align-items: center;"> align-items: center;">
<h3> <h2>
<i class="fa-solid fa-wand-magic-sparkles"></i> Training
</h3>
</label>
{% csrf_token %}
<div class="btn-toolbar" role="toolbar">
<button type="submit" name="svm" style="margin:0;margin-left:16px;" class="button-6" role="button" >Support Vector Machine</button>
<button type="submit" name="logit" style="margin:0;margin-left:16px;" class="button-6" role="button" >Logistic Regression</button>
<button type="submit" name="xgb" style="margin:0;margin-left:16px;" class="button-6" role="button" >XGBoost</button>
</div>
<br>
<div class="justify-content-center btn-toolbar" role="toolbar">
<button type="submit" name="dt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Decision Tree</button>
<button type="submit" name="rt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Random Forest</button>
</div>
</form>
</div>
<div class="col d-flex justify-content-center">
<div class="col d-flex justify-content-center multi-button">
<form action="{% url 'home' %}" method="POST">
{% csrf_token %}
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h3>
<i class="fas fa-cog"></i> Preprocessing <i class="fas fa-cog"></i> Preprocessing
</h3> </h2>
</label> </label>
<div class="multi-button"> </section>
<button type="submit" name="std" class="button-6" role="button" >Standardization</button> </div>
<button type="submit" name="onehot" style="margin:0;margin-left:16px;" class="button-6" role="button" >One Hot Encoding</button> <br>
<button type="submit" name="imp" style="margin:0;margin-left:16px;" class="button-6" role="button" >Imputations</button> <br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" method="POST" id="preprocess-form">
{% csrf_token %}
<div style="scale: 1.2;">
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="std" name="boxes" checked>
<input class="form-check-input" type="checkbox" value="std" name="boxes">
<label class="form-check-label" for="inlineCheckbox1">Standardization</label>
</div>
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="onehot" name="boxes">
<label class="form-check-label" for="inlineCheckbox2">One Hot Encoding</label>
</div>
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="imp" name="boxes">
<label class="form-check-label" for="inlineCheckbox3">Imputations</label>
</div>
</div> </div>
</form> </form>
</div> </div>
</div> </div>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<button type="submit" class="button-6" role="button" name="preprocess" form="preprocess-form">Go!</button>
</div>
</div>
<div class="row">
{% if pca %}
<div class="col-sm d-flex justify-content-center">
{{ pca|safe }}
</div>
{% endif %}
{% if tsne %}
<div class="col-sm d-flex justify-content-center">
{{ tsne|safe }}
</div>
{% endif %}
</div>
<br>
<br>
</div>
<div class="container-fluid">
<form action="{% url 'home' %}" method="POST" id="traintest-form">
{% csrf_token %}
<div class="row">
<div class="col-sm d-flex justify-content-center">
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h2>
<i class="fas fa-magic"></i>Pick a model
</h2>
<select id="model" name="model" style="scale: 1.2;">
<option type="submit" value="svm">Support Vector Machine</option>
<option type="submit" value="logit" selected>Logistic Regression</option>
<option type="submit" value="xgb">XGBoost</option>
<option type="submit" value="dt">Decision Tree</option>
<option type="submit" value="rt">Random Forest</option>
</select>
</label>
</div>
</div>
<br>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<div style="scale: 1.5;">
<input type="radio" name="colorRadio" value="train" class="trainTestButton" id="train"/>
<label for="radio1">Train </label>
<input type="radio" name="colorRadio" value="test" class="trainTestButton" id="test"/>
<label for="radio2">Test </label>
</div>
</div>
</div>
<br>
<div id="train-me" style="display: none;">
<br>
<div class="row">
<div class="col-md d-flex justify-content-center">
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h3>
Test set ratio
</h3>
<input name="split_input" id="split_input" type="range" min="0" max="1" step="0.1" style="width: 200px;"/>
<output id="value"></output>
</label>
</div>
</div>
</div>
<br>
</form>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<button type="submit" class="button-6" role="button" name="traintest" form="traintest-form">Go!</button>
</div>
</div>
<br>
<div class="row">
{% if fig2 %}
<div class="col-sm d-flex justify-content-center">
{{ fig2|safe }}
</div>
{% endif %}
{% if clas_report %}
<div class="col-sm d-flex justify-content-center">
<div class="scrollit">
{{ clas_report|safe }}
</div>
</div>
{% endif %}
</div> </div>
</div> </div>
{% endblock content%} {% endblock content%}

@ -1,10 +1,10 @@
from django.urls import path from django.urls import path, include
from . import views from . import views
from . import models from . import models
urlpatterns = [ urlpatterns = [
path('', views.home, name="home"), path('', views.home, name="home"),
path('preprocess', views.preprocess, name="preprocess"), path('preprocess', views.preprocess, name="preprocess"),
path('stats', views.stats, name="stats"), path('stats', views.stats, name="stats"),
path('train',views.training, name = 'training'),
] ]

@ -2,168 +2,434 @@ from django.shortcuts import render, redirect
import pandas as pd import pandas as pd
from django.core.files.storage import FileSystemStorage from django.core.files.storage import FileSystemStorage
import pickle, os import pickle, os
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np import numpy as np
from pandas.api.types import is_string_dtype from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype from pandas.api.types import is_numeric_dtype
from . import utils from sklearn.metrics import accuracy_score, classification_report
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import LabelEncoder
import joblib
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
fig = None clas_report = None
excel_file_name_preprocessed = "dataset_preprocessed.csv" FILE_NAME = "dataset.csv"
excel_file_name = "dataset.csv" PROCESS_FILE_NAME = "dataset_preprocessed.csv"
def home(request): def home(request):
global fig global clas_report
global excel_file_name # request.session.flush()
global excel_file_name_preprocessed if "fig" in request.session:
fig = request.session.get("fig")
else:
fig = None
if request.method == 'POST': if "fig2" in request.session:
if 'csv' in request.POST: fig2 = request.session.get("fig2")
else:
fig2 = None
if "pca" in request.session:
pca = request.session.get("pca")
else:
pca = None
if "tsne" in request.session:
tsne = request.session.get("tsne")
else:
tsne = None
if "excel_file_name" in request.session:
excel_file_name = request.session.get("excel_file_name")
else:
# name for default dataset. If session variable
# is not set that means there was never a csv
# UPLOAD and thus the default dataset should used
excel_file_name = FILE_NAME
request.session["excel_file_name"] = excel_file_name
if "excel_file_name_preprocessed" in request.session:
excel_file_name_preprocessed = request.session.get(
"excel_file_name_preprocessed"
)
else:
excel_file_name_preprocessed = PROCESS_FILE_NAME
request.session["excel_file_name_preprocessed"] = excel_file_name_preprocessed
df = pd.DataFrame()
if request.method == "POST":
if "csv" in request.POST:
excel_file = request.FILES["excel_file"] excel_file = request.FILES["excel_file"]
excel_file_name = request.FILES["excel_file"].name excel_file_name = request.FILES["excel_file"].name
fig = None
fig2 = None
# here we dont use the name of the file since the # here we dont use the name of the file since the
# uploaded file is not yet saved # uploaded file is not yet saved
# In every other case we just need the name # In every other case we just need the name
# fs = FileSystemStorage() # defaults to MEDIA_ROOT
request.session["excel_file_name"] = excel_file_name
# fs.save(excel_file_name, excel_file)
df = pd.read_csv(excel_file) df = pd.read_csv(excel_file)
df.drop(["id"], axis=1, inplace=True)
fs = FileSystemStorage() #defaults to MEDIA_ROOT df.to_csv(excel_file_name, index=False)
request.session['excel_file_name'] = excel_file_name
fs.save(excel_file_name, excel_file)
idx = excel_file_name.index('.')
excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
fs.save(excel_file_name_preprocessed, excel_file)
# if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False:
df.to_csv(excel_file_name_preprocessed)
feature1 = df.columns[0] feature1 = df.columns[0]
feature2 = df.columns[1] feature2 = df.columns[1]
request.session['feature1'] = feature1 request.session["feature1"] = feature1
request.session['feature2'] = feature2 request.session["feature2"] = feature2
elif 'plot' in request.POST: fig = stats(
df = pd.read_csv(excel_file_name) excel_file_name,
feature1 = request.POST.get('feature1') request.session["feature1"],
feature2 = request.POST.get('feature2') request.session["feature2"],
print(feature1) )
request.session['feature1'] = feature1
request.session['feature2'] = feature2
else:
df = pd.read_csv(excel_file_name)
if 'std' in request.POST: elif "plot" in request.POST:
preprocess(excel_file_name_preprocessed, 'std') feature1 = request.POST.get("feature1")
if 'onehot' in request.POST: feature2 = request.POST.get("feature2")
preprocess(excel_file_name_preprocessed, 'onehot') request.session["feature1"] = feature1
if 'imp' in request.POST: request.session["feature2"] = feature2
preprocess(excel_file_name_preprocessed, 'imp') fig = stats(
excel_file_name,
request.session["feature1"],
request.session["feature2"],
)
elif "traintest" in request.POST:
mode = request.POST.get("colorRadio")
model = request.POST.get("model")
test_size = float(request.POST.get("split_input"))
print(test_size, mode, model)
if mode == "train":
if model == "logit":
con = training(excel_file_name_preprocessed, "logit", test_size)
elif model == "xgb":
con = training(excel_file_name_preprocessed, "xgb", test_size)
elif model == "dt":
con = training(excel_file_name_preprocessed, "dt", test_size)
elif model == "svm":
con = training(excel_file_name_preprocessed, "svm", test_size)
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif mode == "test":
if model == "logit":
con = testing(excel_file_name_preprocessed, "logit")
elif model == "xgb":
con = testing(excel_file_name_preprocessed, "xgb")
elif model == "dt":
con = testing(excel_file_name_preprocessed, "dt")
elif model == "svm":
con = testing(excel_file_name_preprocessed, "svm")
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif "preprocess" in request.POST:
value_list = request.POST.getlist("boxes")
if 'logit' in request.POST:
training(excel_file_name_preprocessed, 'logit')
if 'xgb' in request.POST:
training(excel_file_name_preprocessed, 'xgb')
else:
df = pd.read_csv(excel_file_name)
# if file for preprocessing does not exist create it # if file for preprocessing does not exist create it
# also apply basic preprocessing
if os.path.exists(excel_file_name_preprocessed) == False: if os.path.exists(excel_file_name_preprocessed) == False:
df.to_csv(excel_file_name_preprocessed)
# generate filename
idx = excel_file_name.index(".")
excel_file_name_preprocessed = (
excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
)
# save file for preprocessing
preprocess_df = pd.read_csv(excel_file_name)
fs = FileSystemStorage() # defaults to MEDIA_ROOT
request.session["excel_file_name_preprocessed"] = (
excel_file_name_preprocessed
)
preprocess_df.to_csv(excel_file_name_preprocessed, index=False)
preprocess_df.drop(
["perimeter_mean", "area_mean"], axis=1, inplace=True
)
preprocess_df.drop(
["perimeter_worst", "area_worst"], axis=1, inplace=True
)
preprocess_df.drop(["perimeter_se", "area_se"], axis=1, inplace=True)
preprocess_df.drop(
[
"radius_worst",
"concave_points_mean",
"texture_worst",
"symmetry_worst",
"smoothness_worst",
],
axis=1,
inplace=True,
)
# preprocess_df.drop(["id"], axis=1, inplace=True)
le = LabelEncoder()
preprocess_df["diagnosis"] = le.fit_transform(
preprocess_df["diagnosis"]
)
else:
preprocess_df = pd.read_csv(excel_file_name_preprocessed)
preprocess(preprocess_df, value_list, excel_file_name_preprocessed)
pca = PCA()
pca.fit(preprocess_df)
exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
pca = px.area(
x=range(1, exp_var_cumul.shape[0] + 1),
y=exp_var_cumul,
labels={"x": "# Components", "y": "Explained Variance"},
).to_html()
features = preprocess_df.loc[:, :"compactness_se"]
tsne = TSNE(n_components=2, random_state=0)
projections = tsne.fit_transform(features)
tsne = px.scatter(
projections,
x=0,
y=1,
color=preprocess_df.diagnosis,
labels={"color": "diagnosis"},
).to_html()
else:
if os.path.exists(excel_file_name) == False:
excel_file_name = "dataset.csv"
request.session["excel_file_name"] = excel_file_name
df = pd.read_csv(excel_file_name)
fig2 = None
# just random columns to plot
feature1 = df.columns[0] feature1 = df.columns[0]
feature2 = df.columns[1] feature2 = df.columns[1]
request.session['feature1'] = feature1 request.session["feature1"] = feature1
request.session['feature2'] = feature2 request.session["feature2"] = feature2
fig = stats(
excel_file_name, request.session["feature1"], request.session["feature2"]
)
fig = stats(excel_file_name, request.session['feature1'], request.session['feature2']) if df.empty:
df = pd.read_csv(excel_file_name)
request.session["fig"] = fig
request.session["fig2"] = fig2
request.session["pca"] = pca
request.session["tsne"] = tsne
data_to_display = df[:5].to_html() data_to_display = df[:5].to_html()
request.session["data_to_display"] = data_to_display
request.session['data_to_display'] = data_to_display
request.session['excel_file_name'] = excel_file_name
request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
labels = df.columns labels = df.columns
context = {
"data_to_display": data_to_display,
"excel_file": excel_file_name,
"labels": labels,
"fig": fig,
"fig2": fig2,
"feature1": request.session["feature1"],
"feature2": request.session["feature2"],
"clas_report": clas_report,
"pca": pca,
"tsne": tsne,
}
return render(request, "base/home.html", context)
context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig, 'feature1': request.session['feature1'], 'feature2': request.session['feature2']}
return render(request,'base/home.html', context)
def stats(name, feature1, feature2): def stats(name, feature1, feature2):
global fig
df = pd.read_csv(name) df = pd.read_csv(name)
import plotly.express as px binary1 = df[feature1].isin([0, 1]).all()
if is_numeric_dtype(df[feature1]) and is_numeric_dtype(df[feature2]): binary2 = df[feature2].isin([0, 1]).all()
print("if") if binary1 == True or binary2 == True:
fig = px.scatter(df, feature1, feature2, color='DEATH_EVENT') fig = px.histogram(df, x=feature1, color=feature2)
elif is_string_dtype(df[feature1]) and is_string_dtype(df[feature2]): elif is_numeric_dtype(df[feature1]) or is_numeric_dtype(df[feature2]):
print("elseif") if not is_numeric_dtype(df[feature1]):
fig = px.histogram(df, feature1) # feature1 is not numeric but feature2 should be
fig = px.histogram(df, x=feature2, color=feature1)
elif not is_numeric_dtype(df[feature2]):
# feature2 is not numeric but feature1 should be
fig = px.histogram(df, x=feature1, color=feature2)
else: else:
print("else") # they both are numeric so do scatter
fig = px.strip(df, feature1, feature2) fig = px.scatter(
df, x=feature1, y=feature2, color=df["diagnosis"].astype(str)
)
else:
# they both are categorical so do scatter
fig = px.histogram(df, x=feature1, color=feature2)
fig = fig.to_html(full_html=False) fig = fig.to_html(full_html=False)
return fig return fig
def preprocess(name, type):
def preprocess(data, value_list, name):
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
data = pd.read_csv(name)
if set(['No','customerID','Churn']).issubset(data.columns): for type in value_list:
data.drop(['No','customerID','Churn'],axis=1,inplace=True) if type == "std":
if type == 'std':
# define standard scaler # define standard scaler
scaler = StandardScaler() scaler = StandardScaler()
y = data["diagnosis"]
if is_numeric_dtype(data["diagnosis"]):
# if class column is numeric do not
# apply preprocessing
data = data.drop(["diagnosis"], axis=1)
# transform data # transform data
print("HEY") cols = data.select_dtypes(np.number).columns
num_d = data.select_dtypes(exclude=['object']) data[cols] = scaler.fit_transform(data[cols])
data[num_d.columns] = scaler.fit_transform(num_d) y = y.to_frame()
data = data.join(y)
if type=='onehot': if type == "onehot":
data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', data = pd.get_dummies(data)
'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
'PaperlessBilling', 'PaymentMethod'],drop_first = True)
if type == 'imp': if type == "imp":
data_numeric = data.select_dtypes(exclude=['object']) data_numeric = data.select_dtypes(exclude=["object"])
data_categorical = data.select_dtypes(exclude=['number']) data_categorical = data.select_dtypes(exclude=["number"])
imp = SimpleImputer(missing_values=np.nan, strategy='mean') imp = SimpleImputer(missing_values=np.nan, strategy="most_frequent")
data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns data_numeric = pd.DataFrame(
data = pd.concat([data_numeric, data_categorical], axis = 1) imp.fit_transform(data_numeric),
columns=data_numeric.columns,
index=data_numeric.index,
) # only apply imputer to numeric columns
data = pd.concat(
[data_numeric, data_categorical], axis=1, ignore_index=False
)
os.remove(excel_file_name_preprocessed) os.remove(name)
data.to_csv(excel_file_name_preprocessed) data.to_csv(name, index=False)
return return
def training(name, type):
global fig def training(name, type, test_size=0.7):
data = pd.read_csv(name) data = pd.read_csv(name)
y=data['Churn']
y=y.replace({"Yes":1,"No":0})
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
X_train.to_csv("X_train.csv")
X_test.to_csv("X_test.csv")
y_train.to_csv("y_train.csv")
y_test.to_csv("y_test.csv")
if 'logit' == type: y = data["diagnosis"]
X = data.drop("diagnosis", axis=1)
X_train, X_test, y_train, y_test = train_test_split(
X, y, shuffle=True, test_size=test_size, stratify=y, random_state=42
)
test = X_test.join(y_test.to_frame())
test.to_csv("test.csv", index=False)
train = X_train.join(y_train.to_frame())
train.to_csv("train.csv", index=False)
if "logit" == type:
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train) clf = LogisticRegression(random_state=0).fit(X_train, y_train)
filename = 'lg.sav' y_pred = clf.predict(X_test)
pickle.dump(clf, open(filename, 'wb')) filename = "lg.sav"
pickle.dump(clf, open(filename, "wb"))
importance = clf.coef_[0]
model = clf
if 'xgb' == type: if "xgb" == type:
from xgboost import XGBClassifier from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
file_name = 'xgb.sav'
pickle.dump(xgb,open(file_name,'wb'))
return xgb = XGBClassifier(learning_rate=0.01, n_estimators=1000).fit(X_train, y_train)
y_pred = xgb.predict(X_test)
filename = "xgb.sav"
importance = xgb.feature_importances_
model = xgb
if "dt" == type:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=4, random_state=1)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
filename = "dt.sav"
importance = dt.feature_importances_
model = dt
if "svm" == type:
from sklearn import svm
svc = svm.LinearSVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
filename = "svc.sav"
importance = svc.coef_[0]
model = svc
clas_report = classification_report(y_test, y_pred, output_dict=True)
clas_report = pd.DataFrame(clas_report).transpose()
clas_report = clas_report.sort_values(by=["f1-score"], ascending=False)
fig2 = px.bar(x=importance, y=X_train.columns)
pickle.dump(model, open(filename, "wb"))
con = {
"fig2": fig2.to_html(),
"clas_report": clas_report,
}
return con
def testing(name, type):
data = pd.read_csv(name)
y_test = data["diagnosis"]
X_test = data.drop("diagnosis", axis=1)
if "logit" == type:
filename = "lg.sav"
clf = joblib.load(filename)
y_pred = clf.predict(X_test)
importance = clf.coef_[0]
model = clf
if "xgb" == type:
filename = "xgb.sav"
xgb = joblib.load(filename)
y_pred = xgb.predict(X_test)
filename = "xgb.sav"
importance = xgb.feature_importances_
model = xgb
if "dt" == type:
filename = "dt.sav"
dt = joblib.load(filename)
y_pred = dt.predict(X_test)
importance = dt.feature_importances_
model = dt
if "svm" == type:
filename = "svc.sav"
svc = joblib.load(filename)
y_pred = svc.predict(X_test)
importance = svc.coef_[0]
model = svc
clas_report = classification_report(y_test, y_pred, output_dict=True)
clas_report = pd.DataFrame(clas_report).transpose()
clas_report = clas_report.sort_values(by=["f1-score"], ascending=False)
fig2 = px.bar(x=importance, y=X_test.columns)
pickle.dump(model, open(filename, "wb"))
con = {
"fig2": fig2.to_html(),
"clas_report": clas_report,
}
return con

11098
dataset.csv

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

@ -21,7 +21,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent
# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/ # See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret! # SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-0w^ybt_7vclag#rrutc_eo)m+l^@ml)t%jsg6n06siu)xyls+-' SECRET_KEY = "django-insecure-0w^ybt_7vclag#rrutc_eo)m+l^@ml)t%jsg6n06siu)xyls+-"
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True DEBUG = True
@ -32,56 +32,54 @@ ALLOWED_HOSTS = []
# Application definition # Application definition
INSTALLED_APPS = [ INSTALLED_APPS = [
'django.contrib.admin', "django.contrib.admin",
'django.contrib.auth', "django.contrib.auth",
'django.contrib.contenttypes', "django.contrib.contenttypes",
'django.contrib.sessions', "django.contrib.sessions",
'django.contrib.messages', "django.contrib.messages",
'django.contrib.staticfiles', "django.contrib.staticfiles",
'base.apps.BaseConfig', "base.apps.BaseConfig",
'bootstrap5', "bootstrap5",
] ]
MIDDLEWARE = [ MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware', "django.middleware.security.SecurityMiddleware",
'django.contrib.sessions.middleware.SessionMiddleware', "django.contrib.sessions.middleware.SessionMiddleware",
'django.middleware.common.CommonMiddleware', "django.middleware.common.CommonMiddleware",
'django.middleware.csrf.CsrfViewMiddleware', "django.middleware.csrf.CsrfViewMiddleware",
'django.contrib.auth.middleware.AuthenticationMiddleware', "django.contrib.auth.middleware.AuthenticationMiddleware",
'django.contrib.messages.middleware.MessageMiddleware', "django.contrib.messages.middleware.MessageMiddleware",
'django.middleware.clickjacking.XFrameOptionsMiddleware', "django.middleware.clickjacking.XFrameOptionsMiddleware",
] ]
ROOT_URLCONF = 'extremum.urls' ROOT_URLCONF = "extremum.urls"
TEMPLATES = [ TEMPLATES = [
{ {
'BACKEND': 'django.template.backends.django.DjangoTemplates', "BACKEND": "django.template.backends.django.DjangoTemplates",
'DIRS': [ "DIRS": [BASE_DIR / "templates"],
"APP_DIRS": True,
BASE_DIR / 'templates'], "OPTIONS": {
'APP_DIRS': True, "context_processors": [
'OPTIONS': { "django.template.context_processors.debug",
'context_processors': [ "django.template.context_processors.request",
'django.template.context_processors.debug', "django.contrib.auth.context_processors.auth",
'django.template.context_processors.request', "django.contrib.messages.context_processors.messages",
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
], ],
}, },
}, },
] ]
WSGI_APPLICATION = 'extremum.wsgi.application' WSGI_APPLICATION = "extremum.wsgi.application"
# Database # Database
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases # https://docs.djangoproject.com/en/5.0/ref/settings/#databases
DATABASES = { DATABASES = {
'default': { "default": {
'ENGINE': 'django.db.backends.sqlite3', "ENGINE": "django.db.backends.sqlite3",
'NAME': BASE_DIR / 'db.sqlite3', "NAME": BASE_DIR / "db.sqlite3",
} }
} }
@ -91,16 +89,16 @@ DATABASES = {
AUTH_PASSWORD_VALIDATORS = [ AUTH_PASSWORD_VALIDATORS = [
{ {
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
}, },
{ {
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
}, },
{ {
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
}, },
{ {
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
}, },
] ]
@ -108,9 +106,9 @@ AUTH_PASSWORD_VALIDATORS = [
# Internationalization # Internationalization
# https://docs.djangoproject.com/en/5.0/topics/i18n/ # https://docs.djangoproject.com/en/5.0/topics/i18n/
LANGUAGE_CODE = 'en-us' LANGUAGE_CODE = "en-us"
TIME_ZONE = 'UTC' TIME_ZONE = "UTC"
USE_I18N = True USE_I18N = True
@ -120,11 +118,10 @@ USE_TZ = True
# Static files (CSS, JavaScript, Images) # Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.0/howto/static-files/ # https://docs.djangoproject.com/en/5.0/howto/static-files/
STATIC_URL = '/static/' STATIC_URL = "base/static/"
MEDIA_URL = '/images/' STATICFILES_DIRS = [BASE_DIR / "base/static"]
STATICFILES_DIRS = (os.path.join(BASE_DIR,'base/static'),)
# Default primary key field type # Default primary key field type
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

@ -1,111 +1,40 @@
{% load static %}
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
{% load bootstrap5 %}
{% bootstrap_css %}
{% bootstrap_javascript %}
<meta charset="utf-8" /> <meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="'IE=edge" /> <meta http-equiv="X-UA-Compatible" content="'IE=edge" />
<title>EXTREMUM</title> <title>EXTREMUM</title>
<meta name="viewport" content="'width=device-width, initial-scale=1" /> <meta name="viewport" content="'width=device-width, initial-scale=1" />
{% load static %} <link rel="stylesheet" href="{% static 'css/style.css' %}">
{% load bootstrap5 %} <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
{% bootstrap_css %}
{% bootstrap_javascript %}
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css">
</head> </head>
<style> <nav>
<div class="wrapper">
<div class="logo"><a href="#">Extremum</a></div>
<input type="radio" name="slider" id="menu-btn">
<input type="radio" name="slider" id="close-btn">
<label for="menu-btn" class="btn menu-btn"><i class="fas fa-bars"></i></label>
</div>
</nav>
<br>
<br>
<br>
<br>
<br>
<body onscroll="setScroll()" onload="setScreen()">
.scrollit { {% block content %}
overflow: auto; {% endblock content %}
position: fixed;
float: left;
height: auto;
width: 40%;
max-width: fit-content;
margin: 0px auto;
}
.dataframe { <script src="{% static 'js/hide_seek.js' %}"></script>
font-size: 11pt; <script src="{% static 'js/slider.js' %}"></script>
font-family: Arial; <script src="{% static 'js/keep_scroll_on_load.js' %}"></script>
border-collapse: collapse;
margin: 25px 0;
font-size: 0.9em;
}
.dataframe thead tr {
text-align: left;
font-weight: bold;
}
.dataframe th,
.dataframe td {
padding: 12px 15px;
text-align: left;
}
.dataframe tbody tr {
border-bottom: 1px solid #dddddd;
}
.dataframe tbody tr:nth-of-type(even) {
background-color: #f3f3f3;
}
.dataframe tbody tr:last-of-type {
border-bottom: 2px solid #009879;
}
/* CSS */
.button-6 {
align-items: center;
background-color: #FFFFFF;
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: .25rem;
box-shadow: rgba(0, 0, 0, 0.02) 0 1px 3px 0;
box-sizing: border-box;
color: rgba(0, 0, 0, 0.85);
cursor: pointer;
display: inline-flex;
font-family: system-ui,-apple-system,system-ui,"Helvetica Neue",Helvetica,Arial,sans-serif;
font-size: 16px;
font-weight: 600;
justify-content: center;
line-height: 1.25;
margin: 0;
min-height: 3rem;
padding: calc(.875rem - 1px) calc(1.5rem - 1px);
position: relative;
text-decoration: none;
transition: all 250ms;
user-select: none;
-webkit-user-select: none;
touch-action: manipulation;
vertical-align: baseline;
width: auto;
}
.button-6:hover,
.button-6:focus {
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.1) 0 4px 12px;
color: rgba(0, 0, 0, 0.65);
}
.button-6:hover {
transform: translateY(-1px);
}
.button-6:active {
background-color: #F0F0F1;
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.06) 0 2px 4px;
color: rgba(0, 0, 0, 0.65);
transform: translateY(0);
}
</style>
<body>
{% block content %} {% endblock content %}
</body> </body>
</html> </html>