Tabular data Train and Testing for Breast Cancer dataset

This commit is contained in:
atla8167 2024-06-10 11:38:59 +03:00
parent 9a02a02658
commit 6047fad7ce
18 changed files with 6166 additions and 6327 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -1 +1,3 @@
from django.db import models
from django.db import models

414
base/static/css/style.css Normal file

@ -0,0 +1,414 @@
@import url("https://fonts.googleapis.com/css2?family=Poppins:wght@200;300;400;500;600;700&display=swap");
.container-fluid {
padding-bottom: 100px;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: "Poppins", sans-serif;
}
nav {
position: fixed;
z-index: 99;
width: 100%;
background: #242526;
}
nav .wrapper {
margin-bottom: 10px;
position: relative;
max-width: 1300px;
padding: 0px 30px;
height: 70px;
line-height: 70px;
margin: auto;
display: flex;
align-items: center;
justify-content: space-between;
}
.wrapper .logo a {
color: #f2f2f2;
font-size: 30px;
font-weight: 600;
text-decoration: none;
}
.wrapper .nav-links {
display: inline-flex;
}
.nav-links li {
list-style: none;
}
.nav-links li a {
color: #f2f2f2;
text-decoration: none;
font-size: 18px;
font-weight: 500;
padding: 9px 15px;
border-radius: 5px;
transition: all 0.3s ease;
}
.nav-links li a:hover {
background: #3a3b3c;
}
.nav-links .mobile-item {
display: none;
}
.nav-links .drop-menu {
position: absolute;
background: #242526;
width: 180px;
line-height: 45px;
top: 85px;
opacity: 0;
visibility: hidden;
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
}
.nav-links li:hover .drop-menu,
.nav-links li:hover .mega-box {
transition: all 0.3s ease;
top: 70px;
opacity: 1;
visibility: visible;
}
.drop-menu li a {
width: 100%;
display: block;
padding: 0 0 0 15px;
font-weight: 400;
border-radius: 0px;
}
.mega-box {
position: absolute;
left: 0;
width: 100%;
padding: 0 30px;
top: 85px;
opacity: 0;
visibility: hidden;
}
.mega-box .content {
background: #242526;
padding: 25px 20px;
display: flex;
width: 100%;
justify-content: space-between;
box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
}
.mega-box .content .row {
width: calc(25% - 30px);
line-height: 45px;
}
.content .row img {
width: 100%;
height: 100%;
object-fit: cover;
}
.content .row header {
color: #f2f2f2;
font-size: 20px;
font-weight: 500;
}
.content .row .mega-links {
margin-left: -40px;
border-left: 1px solid rgba(255, 255, 255, 0.09);
}
.row .mega-links li {
padding: 0 20px;
}
.row .mega-links li a {
padding: 0px;
padding: 0 20px;
color: #d9d9d9;
font-size: 17px;
display: block;
}
.row .mega-links li a:hover {
color: #f2f2f2;
}
.wrapper .btn {
color: #fff;
font-size: 20px;
cursor: pointer;
display: none;
}
.wrapper .btn.close-btn {
position: absolute;
right: 30px;
top: 10px;
}
@media screen and (max-width: 970px) {
.wrapper .btn {
display: block;
}
.wrapper .nav-links {
position: fixed;
height: 100vh;
width: 100%;
max-width: 350px;
top: 0;
left: -100%;
background: #171f27;
display: block;
padding: 50px 10px;
line-height: 50px;
overflow-y: auto;
box-shadow: 0px 15px 15px rgba(0, 0, 0, 0.18);
transition: all 0.3s ease;
}
/* custom scroll bar */
::-webkit-scrollbar {
width: 10px;
}
::-webkit-scrollbar-track {
background: #242526;
}
::-webkit-scrollbar-thumb {
background: #3a3b3c;
}
#menu-btn:checked ~ .nav-links {
left: 0%;
}
#menu-btn:checked ~ .btn.menu-btn {
display: none;
}
#close-btn:checked ~ .btn.menu-btn {
display: block;
}
.nav-links li {
margin: 15px 10px;
}
.nav-links li a {
padding: 0 20px;
display: block;
font-size: 20px;
}
.nav-links .drop-menu {
position: static;
opacity: 1;
top: 65px;
visibility: visible;
padding-left: 20px;
width: 100%;
max-height: 0px;
overflow: hidden;
box-shadow: none;
transition: all 0.3s ease;
}
#showDrop:checked ~ .drop-menu,
#showMega:checked ~ .mega-box {
max-height: 100%;
}
.nav-links .desktop-item {
display: none;
}
.nav-links .mobile-item {
display: block;
color: #f2f2f2;
font-size: 20px;
font-weight: 500;
padding-left: 20px;
cursor: pointer;
border-radius: 5px;
transition: all 0.3s ease;
}
.nav-links .mobile-item:hover {
background: #3a3b3c;
}
.drop-menu li {
margin: 0;
}
.drop-menu li a {
border-radius: 5px;
font-size: 18px;
}
.mega-box {
position: static;
top: 65px;
opacity: 1;
visibility: visible;
padding: 0 20px;
max-height: 0px;
overflow: hidden;
transition: all 0.3s ease;
}
.mega-box .content {
box-shadow: none;
flex-direction: column;
padding: 20px 20px 0 20px;
}
.mega-box .content .row {
width: 100%;
margin-bottom: 15px;
border-top: 1px solid rgba(255, 255, 255, 0.08);
}
.mega-box .content .row:nth-child(1),
.mega-box .content .row:nth-child(2) {
border-top: 0px;
}
.content .row .mega-links {
border-left: 0px;
padding-left: 15px;
}
.row .mega-links li {
margin: 0;
}
.content .row header {
font-size: 19px;
}
}
nav input {
display: none;
}
.body-text {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
width: 100%;
text-align: center;
padding: 0 30px;
}
.body-text div {
font-size: 45px;
font-weight: 600;
}
/* csv display */
.scrollit {
margin-top: 60px;
overflow: auto;
position: absolute;
height: min-content;
max-height: 40%;
max-width: 35%;
}
.dataframe {
font-size: 9pt;
font-family: Arial;
border-collapse: collapse;
font-size: 0.9em;
}
.dataframe thead tr {
text-align: left;
font-weight: bold;
}
.dataframe th,
.dataframe td {
padding: 12px 15px;
text-align: left;
}
.dataframe tbody tr {
border-bottom: 1px solid #dddddd;
}
.dataframe tbody tr:nth-of-type(even) {
background-color: #f3f3f3;
}
.dataframe tbody tr:last-of-type {
border-bottom: 2px solid #009879;
}
/* plotly toolbar */
.modebar {
display: none !important;
}
/* button */
/* CSS */
.button-6 {
align-items: center;
background-color: #FFFFFF;
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: .25rem;
box-shadow: rgba(0, 0, 0, 0.02) 0 1px 3px 0;
box-sizing: border-box;
color: rgba(0, 0, 0, 0.85);
cursor: pointer;
display: inline-flex;
font-family: system-ui,-apple-system,system-ui,"Helvetica Neue",Helvetica,Arial,sans-serif;
font-size: 16px;
font-weight: 600;
justify-content: center;
line-height: 1.25;
margin: 0;
min-height: 3rem;
padding: calc(.875rem - 1px) calc(1.5rem - 1px);
position: relative;
text-decoration: none;
transition: all 250ms;
user-select: none;
-webkit-user-select: none;
touch-action: manipulation;
vertical-align: baseline;
width: auto;
}
.button-6:hover,
.button-6:focus {
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.1) 0 4px 12px;
color: rgba(0, 0, 0, 0.65);
}
.button-6:hover {
transform: translateY(-1px);
}
.button-6:active {
background-color: #F0F0F1;
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.06) 0 2px 4px;
color: rgba(0, 0, 0, 0.65);
transform: translateY(0);
}
/* radio buttons */
.radio-inputs {
position: relative;
display: flex;
flex-wrap: wrap;
border-radius: 0.5rem;
background-color: #EEE;
box-sizing: border-box;
box-shadow: 0 0 0px 1px rgba(0, 0, 0, 0.06);
padding: 0.25rem;
width: 300px;
font-size: 14px;
}
.radio-inputs .radio {
flex: 1 1 auto;
text-align: center;
}
.radio-inputs .radio input {
display: none;
}
.radio-inputs .radio .name {
display: flex;
cursor: pointer;
align-items: center;
justify-content: center;
border-radius: 0.5rem;
border: none;
padding: .5rem 0;
color: rgba(51, 65, 85, 1);
transition: all .15s ease-in-out;
}
.radio-inputs .radio input:checked + .name {
background-color: #fff;
font-weight: 600;
}

@ -0,0 +1,11 @@
$(document).ready(function () {
$('.trainTestButton').click(function() {
if($("#train").is(':checked')){
$("#train-me").show();
$("#test-me").hide();
}else{
$("#train-me").hide();
$("#test-me").show();
}
})
});

@ -0,0 +1,12 @@
function setScreen() {
var yScreen = localStorage.getItem("yPos");
window.scrollTo(0, yScreen);
}
function setScroll() {
var yScroll = window.pageYOffset;
localStorage.setItem("yPos", yScroll);
}
function clearScreen() {
localStorage.setItem("yPos", 0);
window.scrollTo(0, 0);
}

@ -1,7 +0,0 @@
$(document).ready( function()
{
$('#spinner').on('click', function()
{
$('body').addClass('busy');
});
});

6
base/static/js/slider.js Normal file

@ -0,0 +1,6 @@
const value = document.querySelector("#value");
const input = document.querySelector("#split_input");
value.textContent = input.value;
input.addEventListener("input", (event) => {
value.textContent = event.target.value;
});

@ -1,25 +1,14 @@
{% extends 'main.html' %}
{% block content %}
{% load static %}
<div class="container-fluid">
<div class="mx-auto p-4 text-center bg-white shadow-sm">
<a href="/" >
<h1>
EXTREMUM
</h1>
</a>
</div>
<br>
<br>
<div class="row">
<div class="col d-flex justify-content-center">
<div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data">
<label style="display:flex;
flex-direction:column;
align-items: center;">
flex-direction:column;
align-items: center;">
<h3>
<i class="fas fa-upload"></i> Import a file
</h3>
@ -36,11 +25,11 @@
style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;">
</form>
</div>
<div class="col d-flex justify-content-center">
<div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" name="plot" method="POST" id="stats">
<label style="display:flex;
flex-direction:column;
align-items: center;">
flex-direction:column;
align-items: center;">
<h3>
<i class="fa-solid fa-chart-simple"></i> Stats
</h3>
@ -66,62 +55,158 @@
</form>
</div>
</div>
<br>
<br>
<div class="row">
<div class="col d-flex justify-content-center">
<div class="col-sm d-flex justify-content-center">
<div class="scrollit">
{{ data_to_display|safe }}
</div>
</div>
<div class="col d-flex justify-content-center">
<div class="col-sm d-flex justify-content-center">
{{ fig|safe }}
</div>
</div>
<br>
<br>
<div class="row justify-content-center">
<div class="col d-flex justify-content-center">
<form action="{% url 'home' %}" method="POST">
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h3>
<i class="fa-solid fa-wand-magic-sparkles"></i> Training
</h3>
</div>
<br>
<br>
<div class="container-fluid">
<div class="row">
<section>
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h2>
<i class="fas fa-cog"></i> Preprocessing
</h2>
</label>
</section>
</div>
<br>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<form action="{% url 'home' %}" method="POST" id="preprocess-form">
{% csrf_token %}
<div class="btn-toolbar" role="toolbar">
<button type="submit" name="svm" style="margin:0;margin-left:16px;" class="button-6" role="button" >Support Vector Machine</button>
<button type="submit" name="logit" style="margin:0;margin-left:16px;" class="button-6" role="button" >Logistic Regression</button>
<button type="submit" name="xgb" style="margin:0;margin-left:16px;" class="button-6" role="button" >XGBoost</button>
</div>
<br>
<div class="justify-content-center btn-toolbar" role="toolbar">
<button type="submit" name="dt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Decision Tree</button>
<button type="submit" name="rt" style="margin:0;margin-left:16px;" class="button-6" role="button" >Random Forest</button>
<div style="scale: 1.2;">
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="std" name="boxes" checked>
<input class="form-check-input" type="checkbox" value="std" name="boxes">
<label class="form-check-label" for="inlineCheckbox1">Standardization</label>
</div>
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="onehot" name="boxes">
<label class="form-check-label" for="inlineCheckbox2">One Hot Encoding</label>
</div>
<div class="form-check form-check-inline">
<input class="form-check-input" type="checkbox" value="imp" name="boxes">
<label class="form-check-label" for="inlineCheckbox3">Imputations</label>
</div>
</div>
</form>
</div>
<div class="col d-flex justify-content-center">
<div class="col d-flex justify-content-center multi-button">
<form action="{% url 'home' %}" method="POST">
{% csrf_token %}
<label style="display:flex;
</div>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<button type="submit" class="button-6" role="button" name="preprocess" form="preprocess-form">Go!</button>
</div>
</div>
<div class="row">
{% if pca %}
<div class="col-sm d-flex justify-content-center">
{{ pca|safe }}
</div>
{% endif %}
{% if tsne %}
<div class="col-sm d-flex justify-content-center">
{{ tsne|safe }}
</div>
{% endif %}
</div>
<br>
<br>
</div>
<div class="container-fluid">
<form action="{% url 'home' %}" method="POST" id="traintest-form">
{% csrf_token %}
<div class="row">
<div class="col-sm d-flex justify-content-center">
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h3>
<i class="fas fa-cog"></i> Preprocessing
</h3>
</label>
<div class="multi-button">
<button type="submit" name="std" class="button-6" role="button" >Standardization</button>
<button type="submit" name="onehot" style="margin:0;margin-left:16px;" class="button-6" role="button" >One Hot Encoding</button>
<button type="submit" name="imp" style="margin:0;margin-left:16px;" class="button-6" role="button" >Imputations</button>
</div>
</form>
<h2>
<i class="fas fa-magic"></i>Pick a model
</h2>
<select id="model" name="model" style="scale: 1.2;">
<option type="submit" value="svm">Support Vector Machine</option>
<option type="submit" value="logit" selected>Logistic Regression</option>
<option type="submit" value="xgb">XGBoost</option>
<option type="submit" value="dt">Decision Tree</option>
<option type="submit" value="rt">Random Forest</option>
</select>
</label>
</div>
</div>
<br>
<br>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<div style="scale: 1.5;">
<input type="radio" name="colorRadio" value="train" class="trainTestButton" id="train"/>
<label for="radio1">Train </label>
<input type="radio" name="colorRadio" value="test" class="trainTestButton" id="test"/>
<label for="radio2">Test </label>
</div>
</div>
</div>
<br>
<div id="train-me" style="display: none;">
<br>
<div class="row">
<div class="col-md d-flex justify-content-center">
<label style="display:flex;
flex-direction:column;
align-items: center;">
<h3>
Test set ratio
</h3>
<input name="split_input" id="split_input" type="range" min="0" max="1" step="0.1" style="width: 200px;"/>
<output id="value"></output>
</label>
</div>
</div>
</div>
<br>
</form>
<div class="row">
<div class="col-sm d-flex justify-content-center">
<button type="submit" class="button-6" role="button" name="traintest" form="traintest-form">Go!</button>
</div>
</div>
<br>
<div class="row">
{% if fig2 %}
<div class="col-sm d-flex justify-content-center">
{{ fig2|safe }}
</div>
{% endif %}
{% if clas_report %}
<div class="col-sm d-flex justify-content-center">
<div class="scrollit">
{{ clas_report|safe }}
</div>
</div>
{% endif %}
</div>
</div>
{% endblock content%}

@ -1,10 +1,10 @@
from django.urls import path
from django.urls import path, include
from . import views
from . import models
urlpatterns = [
path('', views.home, name="home"),
path('preprocess', views.preprocess, name="preprocess"),
path('stats', views.stats, name="stats"),
path('train',views.training, name = 'training'),
path('stats', views.stats, name="stats"),
]

@ -2,168 +2,434 @@ from django.shortcuts import render, redirect
import pandas as pd
from django.core.files.storage import FileSystemStorage
import pickle, os
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from . import utils
from sklearn.metrics import accuracy_score, classification_report
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import LabelEncoder
import joblib
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
fig = None
excel_file_name_preprocessed = "dataset_preprocessed.csv"
excel_file_name = "dataset.csv"
clas_report = None
FILE_NAME = "dataset.csv"
PROCESS_FILE_NAME = "dataset_preprocessed.csv"
def home(request):
global fig
global excel_file_name
global excel_file_name_preprocessed
if request.method == 'POST':
if 'csv' in request.POST:
global clas_report
# request.session.flush()
if "fig" in request.session:
fig = request.session.get("fig")
else:
fig = None
if "fig2" in request.session:
fig2 = request.session.get("fig2")
else:
fig2 = None
if "pca" in request.session:
pca = request.session.get("pca")
else:
pca = None
if "tsne" in request.session:
tsne = request.session.get("tsne")
else:
tsne = None
if "excel_file_name" in request.session:
excel_file_name = request.session.get("excel_file_name")
else:
# name for default dataset. If session variable
# is not set that means there was never a csv
# UPLOAD and thus the default dataset should used
excel_file_name = FILE_NAME
request.session["excel_file_name"] = excel_file_name
if "excel_file_name_preprocessed" in request.session:
excel_file_name_preprocessed = request.session.get(
"excel_file_name_preprocessed"
)
else:
excel_file_name_preprocessed = PROCESS_FILE_NAME
request.session["excel_file_name_preprocessed"] = excel_file_name_preprocessed
df = pd.DataFrame()
if request.method == "POST":
if "csv" in request.POST:
excel_file = request.FILES["excel_file"]
excel_file_name = request.FILES["excel_file"].name
# here we dont use the name of the file since the
fig = None
fig2 = None
# here we dont use the name of the file since the
# uploaded file is not yet saved
# In every other case we just need the name
df = pd.read_csv(excel_file)
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name'] = excel_file_name
fs.save(excel_file_name, excel_file)
idx = excel_file_name.index('.')
excel_file_name_preprocessed = excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
fs = FileSystemStorage() #defaults to MEDIA_ROOT
request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed
fs.save(excel_file_name_preprocessed, excel_file)
# if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False:
df.to_csv(excel_file_name_preprocessed)
# fs = FileSystemStorage() # defaults to MEDIA_ROOT
request.session["excel_file_name"] = excel_file_name
# fs.save(excel_file_name, excel_file)
df = pd.read_csv(excel_file)
df.drop(["id"], axis=1, inplace=True)
df.to_csv(excel_file_name, index=False)
feature1 = df.columns[0]
feature2 = df.columns[1]
request.session['feature1'] = feature1
request.session['feature2'] = feature2
elif 'plot' in request.POST:
df = pd.read_csv(excel_file_name)
feature1 = request.POST.get('feature1')
feature2 = request.POST.get('feature2')
print(feature1)
request.session['feature1'] = feature1
request.session['feature2'] = feature2
else:
df = pd.read_csv(excel_file_name)
if 'std' in request.POST:
preprocess(excel_file_name_preprocessed, 'std')
if 'onehot' in request.POST:
preprocess(excel_file_name_preprocessed, 'onehot')
if 'imp' in request.POST:
preprocess(excel_file_name_preprocessed, 'imp')
if 'logit' in request.POST:
training(excel_file_name_preprocessed, 'logit')
if 'xgb' in request.POST:
training(excel_file_name_preprocessed, 'xgb')
else:
df = pd.read_csv(excel_file_name)
request.session["feature1"] = feature1
request.session["feature2"] = feature2
fig = stats(
excel_file_name,
request.session["feature1"],
request.session["feature2"],
)
elif "plot" in request.POST:
feature1 = request.POST.get("feature1")
feature2 = request.POST.get("feature2")
request.session["feature1"] = feature1
request.session["feature2"] = feature2
fig = stats(
excel_file_name,
request.session["feature1"],
request.session["feature2"],
)
elif "traintest" in request.POST:
mode = request.POST.get("colorRadio")
model = request.POST.get("model")
test_size = float(request.POST.get("split_input"))
print(test_size, mode, model)
if mode == "train":
if model == "logit":
con = training(excel_file_name_preprocessed, "logit", test_size)
elif model == "xgb":
con = training(excel_file_name_preprocessed, "xgb", test_size)
elif model == "dt":
con = training(excel_file_name_preprocessed, "dt", test_size)
elif model == "svm":
con = training(excel_file_name_preprocessed, "svm", test_size)
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif mode == "test":
if model == "logit":
con = testing(excel_file_name_preprocessed, "logit")
elif model == "xgb":
con = testing(excel_file_name_preprocessed, "xgb")
elif model == "dt":
con = testing(excel_file_name_preprocessed, "dt")
elif model == "svm":
con = testing(excel_file_name_preprocessed, "svm")
fig2 = con["fig2"]
clas_report = con["clas_report"].to_html()
elif "preprocess" in request.POST:
value_list = request.POST.getlist("boxes")
# if file for preprocessing does not exist create it
if os.path.exists(excel_file_name_preprocessed) == False:
df.to_csv(excel_file_name_preprocessed)
# also apply basic preprocessing
if os.path.exists(excel_file_name_preprocessed) == False:
# generate filename
idx = excel_file_name.index(".")
excel_file_name_preprocessed = (
excel_file_name[:idx] + "_preprocessed" + excel_file_name[idx:]
)
# save file for preprocessing
preprocess_df = pd.read_csv(excel_file_name)
fs = FileSystemStorage() # defaults to MEDIA_ROOT
request.session["excel_file_name_preprocessed"] = (
excel_file_name_preprocessed
)
preprocess_df.to_csv(excel_file_name_preprocessed, index=False)
preprocess_df.drop(
["perimeter_mean", "area_mean"], axis=1, inplace=True
)
preprocess_df.drop(
["perimeter_worst", "area_worst"], axis=1, inplace=True
)
preprocess_df.drop(["perimeter_se", "area_se"], axis=1, inplace=True)
preprocess_df.drop(
[
"radius_worst",
"concave_points_mean",
"texture_worst",
"symmetry_worst",
"smoothness_worst",
],
axis=1,
inplace=True,
)
# preprocess_df.drop(["id"], axis=1, inplace=True)
le = LabelEncoder()
preprocess_df["diagnosis"] = le.fit_transform(
preprocess_df["diagnosis"]
)
else:
preprocess_df = pd.read_csv(excel_file_name_preprocessed)
preprocess(preprocess_df, value_list, excel_file_name_preprocessed)
pca = PCA()
pca.fit(preprocess_df)
exp_var_cumul = np.cumsum(pca.explained_variance_ratio_)
pca = px.area(
x=range(1, exp_var_cumul.shape[0] + 1),
y=exp_var_cumul,
labels={"x": "# Components", "y": "Explained Variance"},
).to_html()
features = preprocess_df.loc[:, :"compactness_se"]
tsne = TSNE(n_components=2, random_state=0)
projections = tsne.fit_transform(features)
tsne = px.scatter(
projections,
x=0,
y=1,
color=preprocess_df.diagnosis,
labels={"color": "diagnosis"},
).to_html()
else:
if os.path.exists(excel_file_name) == False:
excel_file_name = "dataset.csv"
request.session["excel_file_name"] = excel_file_name
df = pd.read_csv(excel_file_name)
fig2 = None
# just random columns to plot
feature1 = df.columns[0]
feature2 = df.columns[1]
request.session['feature1'] = feature1
request.session['feature2'] = feature2
fig = stats(excel_file_name, request.session['feature1'], request.session['feature2'])
data_to_display = df[:5].to_html()
request.session["feature1"] = feature1
request.session["feature2"] = feature2
fig = stats(
excel_file_name, request.session["feature1"], request.session["feature2"]
)
request.session['data_to_display'] = data_to_display
request.session['excel_file_name'] = excel_file_name
request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed
if df.empty:
df = pd.read_csv(excel_file_name)
request.session["fig"] = fig
request.session["fig2"] = fig2
request.session["pca"] = pca
request.session["tsne"] = tsne
data_to_display = df[:5].to_html()
request.session["data_to_display"] = data_to_display
labels = df.columns
context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig, 'feature1': request.session['feature1'], 'feature2': request.session['feature2']}
return render(request,'base/home.html', context)
context = {
"data_to_display": data_to_display,
"excel_file": excel_file_name,
"labels": labels,
"fig": fig,
"fig2": fig2,
"feature1": request.session["feature1"],
"feature2": request.session["feature2"],
"clas_report": clas_report,
"pca": pca,
"tsne": tsne,
}
return render(request, "base/home.html", context)
def stats(name, feature1, feature2):
global fig
df = pd.read_csv(name)
import plotly.express as px
if is_numeric_dtype(df[feature1]) and is_numeric_dtype(df[feature2]):
print("if")
fig = px.scatter(df, feature1, feature2, color='DEATH_EVENT')
elif is_string_dtype(df[feature1]) and is_string_dtype(df[feature2]):
print("elseif")
fig = px.histogram(df, feature1)
binary1 = df[feature1].isin([0, 1]).all()
binary2 = df[feature2].isin([0, 1]).all()
if binary1 == True or binary2 == True:
fig = px.histogram(df, x=feature1, color=feature2)
elif is_numeric_dtype(df[feature1]) or is_numeric_dtype(df[feature2]):
if not is_numeric_dtype(df[feature1]):
# feature1 is not numeric but feature2 should be
fig = px.histogram(df, x=feature2, color=feature1)
elif not is_numeric_dtype(df[feature2]):
# feature2 is not numeric but feature1 should be
fig = px.histogram(df, x=feature1, color=feature2)
else:
# they both are numeric so do scatter
fig = px.scatter(
df, x=feature1, y=feature2, color=df["diagnosis"].astype(str)
)
else:
print("else")
fig = px.strip(df, feature1, feature2)
# they both are categorical so do scatter
fig = px.histogram(df, x=feature1, color=feature2)
fig = fig.to_html(full_html=False)
return fig
def preprocess(name, type):
def preprocess(data, value_list, name):
from sklearn.preprocessing import StandardScaler
data = pd.read_csv(name)
if set(['No','customerID','Churn']).issubset(data.columns):
data.drop(['No','customerID','Churn'],axis=1,inplace=True)
if type == 'std':
# define standard scaler
scaler = StandardScaler()
# transform data
print("HEY")
num_d = data.select_dtypes(exclude=['object'])
data[num_d.columns] = scaler.fit_transform(num_d)
if type=='onehot':
data = pd.get_dummies(data,columns=['gender', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines',
'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract',
'PaperlessBilling', 'PaymentMethod'],drop_first = True)
if type == 'imp':
data_numeric = data.select_dtypes(exclude=['object'])
data_categorical = data.select_dtypes(exclude=['number'])
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
data_numeric = pd.DataFrame(imp.fit_transform(data_numeric), columns = data_numeric.columns, index=data_numeric.index) #only apply imputer to numeric columns
data = pd.concat([data_numeric, data_categorical], axis = 1)
os.remove(excel_file_name_preprocessed)
data.to_csv(excel_file_name_preprocessed)
return
def training(name, type):
global fig
for type in value_list:
if type == "std":
# define standard scaler
scaler = StandardScaler()
y = data["diagnosis"]
if is_numeric_dtype(data["diagnosis"]):
# if class column is numeric do not
# apply preprocessing
data = data.drop(["diagnosis"], axis=1)
# transform data
cols = data.select_dtypes(np.number).columns
data[cols] = scaler.fit_transform(data[cols])
y = y.to_frame()
data = data.join(y)
if type == "onehot":
data = pd.get_dummies(data)
if type == "imp":
data_numeric = data.select_dtypes(exclude=["object"])
data_categorical = data.select_dtypes(exclude=["number"])
imp = SimpleImputer(missing_values=np.nan, strategy="most_frequent")
data_numeric = pd.DataFrame(
imp.fit_transform(data_numeric),
columns=data_numeric.columns,
index=data_numeric.index,
) # only apply imputer to numeric columns
data = pd.concat(
[data_numeric, data_categorical], axis=1, ignore_index=False
)
os.remove(name)
data.to_csv(name, index=False)
return
def training(name, type, test_size=0.7):
data = pd.read_csv(name)
y=data['Churn']
y=y.replace({"Yes":1,"No":0})
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=0)
X_train.to_csv("X_train.csv")
X_test.to_csv("X_test.csv")
y_train.to_csv("y_train.csv")
y_test.to_csv("y_test.csv")
if 'logit' == type:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
filename = 'lg.sav'
pickle.dump(clf, open(filename, 'wb'))
if 'xgb' == type:
y = data["diagnosis"]
X = data.drop("diagnosis", axis=1)
X_train, X_test, y_train, y_test = train_test_split(
X, y, shuffle=True, test_size=test_size, stratify=y, random_state=42
)
test = X_test.join(y_test.to_frame())
test.to_csv("test.csv", index=False)
train = X_train.join(y_train.to_frame())
train.to_csv("train.csv", index=False)
if "logit" == type:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
y_pred = clf.predict(X_test)
filename = "lg.sav"
pickle.dump(clf, open(filename, "wb"))
importance = clf.coef_[0]
model = clf
if "xgb" == type:
from xgboost import XGBClassifier
xgb = XGBClassifier(learning_rate = 0.01,n_estimators=1000).fit(X_train, y_train)
file_name = 'xgb.sav'
pickle.dump(xgb,open(file_name,'wb'))
return
xgb = XGBClassifier(learning_rate=0.01, n_estimators=1000).fit(X_train, y_train)
y_pred = xgb.predict(X_test)
filename = "xgb.sav"
importance = xgb.feature_importances_
model = xgb
if "dt" == type:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(max_depth=4, random_state=1)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
filename = "dt.sav"
importance = dt.feature_importances_
model = dt
if "svm" == type:
from sklearn import svm
svc = svm.LinearSVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
filename = "svc.sav"
importance = svc.coef_[0]
model = svc
clas_report = classification_report(y_test, y_pred, output_dict=True)
clas_report = pd.DataFrame(clas_report).transpose()
clas_report = clas_report.sort_values(by=["f1-score"], ascending=False)
fig2 = px.bar(x=importance, y=X_train.columns)
pickle.dump(model, open(filename, "wb"))
con = {
"fig2": fig2.to_html(),
"clas_report": clas_report,
}
return con
def testing(name, type):
data = pd.read_csv(name)
y_test = data["diagnosis"]
X_test = data.drop("diagnosis", axis=1)
if "logit" == type:
filename = "lg.sav"
clf = joblib.load(filename)
y_pred = clf.predict(X_test)
importance = clf.coef_[0]
model = clf
if "xgb" == type:
filename = "xgb.sav"
xgb = joblib.load(filename)
y_pred = xgb.predict(X_test)
filename = "xgb.sav"
importance = xgb.feature_importances_
model = xgb
if "dt" == type:
filename = "dt.sav"
dt = joblib.load(filename)
y_pred = dt.predict(X_test)
importance = dt.feature_importances_
model = dt
if "svm" == type:
filename = "svc.sav"
svc = joblib.load(filename)
y_pred = svc.predict(X_test)
importance = svc.coef_[0]
model = svc
clas_report = classification_report(y_test, y_pred, output_dict=True)
clas_report = pd.DataFrame(clas_report).transpose()
clas_report = clas_report.sort_values(by=["f1-score"], ascending=False)
fig2 = px.bar(x=importance, y=X_test.columns)
pickle.dump(model, open(filename, "wb"))
con = {
"fig2": fig2.to_html(),
"clas_report": clas_report,
}
return con

11098
dataset.csv

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

@ -21,7 +21,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent
# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-0w^ybt_7vclag#rrutc_eo)m+l^@ml)t%jsg6n06siu)xyls+-'
SECRET_KEY = "django-insecure-0w^ybt_7vclag#rrutc_eo)m+l^@ml)t%jsg6n06siu)xyls+-"
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
@ -32,56 +32,54 @@ ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'base.apps.BaseConfig',
'bootstrap5',
"django.contrib.admin",
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
"base.apps.BaseConfig",
"bootstrap5",
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
]
ROOT_URLCONF = 'extremum.urls'
ROOT_URLCONF = "extremum.urls"
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [
BASE_DIR / 'templates'],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": [BASE_DIR / "templates"],
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
"django.template.context_processors.debug",
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
],
},
},
]
WSGI_APPLICATION = 'extremum.wsgi.application'
WSGI_APPLICATION = "extremum.wsgi.application"
# Database
# https://docs.djangoproject.com/en/5.0/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": BASE_DIR / "db.sqlite3",
}
}
@ -91,16 +89,16 @@ DATABASES = {
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
},
]
@ -108,9 +106,9 @@ AUTH_PASSWORD_VALIDATORS = [
# Internationalization
# https://docs.djangoproject.com/en/5.0/topics/i18n/
LANGUAGE_CODE = 'en-us'
LANGUAGE_CODE = "en-us"
TIME_ZONE = 'UTC'
TIME_ZONE = "UTC"
USE_I18N = True
@ -120,11 +118,10 @@ USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/5.0/howto/static-files/
STATIC_URL = '/static/'
MEDIA_URL = '/images/'
STATICFILES_DIRS = (os.path.join(BASE_DIR,'base/static'),)
STATIC_URL = "base/static/"
STATICFILES_DIRS = [BASE_DIR / "base/static"]
# Default primary key field type
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

@ -1,111 +1,40 @@
{% load static %}
<!DOCTYPE html>
<html>
<head>
{% load bootstrap5 %}
{% bootstrap_css %}
{% bootstrap_javascript %}
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="'IE=edge" />
<title>EXTREMUM</title>
<meta name="viewport" content="'width=device-width, initial-scale=1" />
{% load static %}
{% load bootstrap5 %}
{% bootstrap_css %}
{% bootstrap_javascript %}
<link rel="stylesheet" href="{% static 'css/style.css' %}">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css">
</head>
<style>
.scrollit {
overflow: auto;
position: fixed;
float: left;
height: auto;
width: 40%;
max-width: fit-content;
margin: 0px auto;
}
<nav>
<div class="wrapper">
<div class="logo"><a href="#">Extremum</a></div>
<input type="radio" name="slider" id="menu-btn">
<input type="radio" name="slider" id="close-btn">
<label for="menu-btn" class="btn menu-btn"><i class="fas fa-bars"></i></label>
</div>
</nav>
<br>
<br>
<br>
<br>
<br>
<body onscroll="setScroll()" onload="setScreen()">
.dataframe {
font-size: 11pt;
font-family: Arial;
border-collapse: collapse;
margin: 25px 0;
font-size: 0.9em;
}
.dataframe thead tr {
text-align: left;
font-weight: bold;
}
.dataframe th,
.dataframe td {
padding: 12px 15px;
text-align: left;
}
.dataframe tbody tr {
border-bottom: 1px solid #dddddd;
}
.dataframe tbody tr:nth-of-type(even) {
background-color: #f3f3f3;
}
.dataframe tbody tr:last-of-type {
border-bottom: 2px solid #009879;
}
/* CSS */
.button-6 {
align-items: center;
background-color: #FFFFFF;
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: .25rem;
box-shadow: rgba(0, 0, 0, 0.02) 0 1px 3px 0;
box-sizing: border-box;
color: rgba(0, 0, 0, 0.85);
cursor: pointer;
display: inline-flex;
font-family: system-ui,-apple-system,system-ui,"Helvetica Neue",Helvetica,Arial,sans-serif;
font-size: 16px;
font-weight: 600;
justify-content: center;
line-height: 1.25;
margin: 0;
min-height: 3rem;
padding: calc(.875rem - 1px) calc(1.5rem - 1px);
position: relative;
text-decoration: none;
transition: all 250ms;
user-select: none;
-webkit-user-select: none;
touch-action: manipulation;
vertical-align: baseline;
width: auto;
}
.button-6:hover,
.button-6:focus {
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.1) 0 4px 12px;
color: rgba(0, 0, 0, 0.65);
}
.button-6:hover {
transform: translateY(-1px);
}
.button-6:active {
background-color: #F0F0F1;
border-color: rgba(0, 0, 0, 0.15);
box-shadow: rgba(0, 0, 0, 0.06) 0 2px 4px;
color: rgba(0, 0, 0, 0.65);
transform: translateY(0);
}
</style>
<body>
{% block content %} {% endblock content %}
{% block content %}
{% endblock content %}
<script src="{% static 'js/hide_seek.js' %}"></script>
<script src="{% static 'js/slider.js' %}"></script>
<script src="{% static 'js/keep_scroll_on_load.js' %}"></script>
</body>
</html>