diff --git a/README.md b/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/base/templates/base/home.html b/base/templates/base/home.html index a354839a5..192f806e5 100644 --- a/base/templates/base/home.html +++ b/base/templates/base/home.html @@ -13,18 +13,17 @@ <br> <br> - - <div class="row"> - <div class="col d-flex justify-content-center"> - <h3> - <i class="fas fa-upload"></i> Import a file - </h3> - </div> - </div> <div class="row"> - <div class = "mx-auto p-4 text-center"> + <div class="col d-flex justify-content-center"> <form action="{% url 'home' %}" id="csv" method="post" enctype="multipart/form-data"> + <label style="display:flex; + flex-direction:column; + align-items: center;"> + <h3> + <i class="fas fa-upload"></i> Import a file + </h3> + </label> {% csrf_token %} <input type="file" title="Upload excel file" @@ -37,19 +36,6 @@ style="border: 1px solid green; padding:5px; border-radius: 2px; cursor: pointer;"> </form> </div> - </div> - - <br> - <div class="row"> - <div class="col"> - <div class="scrollit"> - {{ data_to_display|safe }} - </div> - </div> - </div> - <br> - - <div class="row justify-content-center"> <div class="col d-flex justify-content-center"> <form action="{% url 'home' %}" name="plot" method="POST" id="stats"> <label style="display:flex; @@ -79,30 +65,22 @@ <input type="submit" value="Plot" name="plot"> </form> </div> - - <div class="col d-flex justify-content-center multi-button"> - <form action="{% url 'home' %}" method="POST"> - {% csrf_token %} - <label style="display:flex; - flex-direction:column; - align-items: center;"> - <h3> - <i class="fas fa-cog"></i> Preprocessing - </h3> - </label> - <div class="multi-button"> - <button type="submit" name="std" class="button-6" role="button" >Standardization</button> - <button type="submit" name="onehot" style="margin:0;margin-left:16px;" class="button-6" role="button" >One Hot Encoding</button> - <button type="submit" name="imp" style="margin:0;margin-left:16px;" class="button-6" role="button" >Imputations</button> - </div> - </form> - </div> </div> + <br> - <div class="row justify-content-center"> + <div class="row"> + <div class="col d-flex justify-content-center"> + <div class="scrollit"> + {{ data_to_display|safe }} + </div> + </div> <div class="col d-flex justify-content-center"> {{ fig|safe }} </div> + </div> + <br> + <br> + <div class="row justify-content-center"> <div class="col d-flex justify-content-center"> <form action="{% url 'home' %}" method="POST"> <label style="display:flex; @@ -125,6 +103,25 @@ </div> </form> </div> + <div class="col d-flex justify-content-center"> + <div class="col d-flex justify-content-center multi-button"> + <form action="{% url 'home' %}" method="POST"> + {% csrf_token %} + <label style="display:flex; + flex-direction:column; + align-items: center;"> + <h3> + <i class="fas fa-cog"></i> Preprocessing + </h3> + </label> + <div class="multi-button"> + <button type="submit" name="std" class="button-6" role="button" >Standardization</button> + <button type="submit" name="onehot" style="margin:0;margin-left:16px;" class="button-6" role="button" >One Hot Encoding</button> + <button type="submit" name="imp" style="margin:0;margin-left:16px;" class="button-6" role="button" >Imputations</button> + </div> + </form> + </div> + </div> </div> </div> {% endblock content%} diff --git a/base/views.py b/base/views.py index 645ce1820..829ba7576 100644 --- a/base/views.py +++ b/base/views.py @@ -6,6 +6,8 @@ from sklearn.preprocessing import OneHotEncoder from sklearn.impute import SimpleImputer from sklearn.preprocessing import StandardScaler import numpy as np +from pandas.api.types import is_string_dtype +from pandas.api.types import is_numeric_dtype from . import utils @@ -18,19 +20,15 @@ def home(request): global excel_file_name global excel_file_name_preprocessed - # if file for preprocessing does not exist create it - if os.path.exists(excel_file_name_preprocessed) == False: - df = pd.read_csv(excel_file_name) - df.to_csv(excel_file_name_preprocessed) - if request.method == 'POST': - print("HEYEYEYEYEYE") - feature1 = request.POST.get('feature1') - feature2 = request.POST.get('feature2') if 'csv' in request.POST: excel_file = request.FILES["excel_file"] excel_file_name = request.FILES["excel_file"].name - df = pd.read_csv(excel_file) + + # here we dont use the name of the file since the + # uploaded file is not yet saved + # In every other case we just need the name + df = pd.read_csv(excel_file) fs = FileSystemStorage() #defaults to MEDIA_ROOT request.session['excel_file_name'] = excel_file_name @@ -41,29 +39,49 @@ def home(request): fs = FileSystemStorage() #defaults to MEDIA_ROOT request.session['excel_file_name_preprocess'] = excel_file_name_preprocessed fs.save(excel_file_name_preprocessed, excel_file) - - if 'std' in request.POST: - preprocess(excel_file_name_preprocessed, 'std') - if 'onehot' in request.POST: - preprocess(excel_file_name_preprocessed, 'onehot') - if 'imp' in request.POST: - preprocess(excel_file_name_preprocessed, 'imp') - - if 'plot' in request.POST: - fig = stats(excel_file_name, feature1, feature2) + # if file for preprocessing does not exist create it + if os.path.exists(excel_file_name_preprocessed) == False: + df.to_csv(excel_file_name_preprocessed) + + feature1 = df.columns[0] + feature2 = df.columns[1] + request.session['feature1'] = feature1 + request.session['feature2'] = feature2 - if 'logit' in request.POST: - training(excel_file_name_preprocessed, 'logit') - if 'xgb' in request.POST: - training(excel_file_name_preprocessed, 'xgb') + elif 'plot' in request.POST: + df = pd.read_csv(excel_file_name) + feature1 = request.POST.get('feature1') + feature2 = request.POST.get('feature2') + print(feature1) + request.session['feature1'] = feature1 + request.session['feature2'] = feature2 + else: + df = pd.read_csv(excel_file_name) + + if 'std' in request.POST: + preprocess(excel_file_name_preprocessed, 'std') + if 'onehot' in request.POST: + preprocess(excel_file_name_preprocessed, 'onehot') + if 'imp' in request.POST: + preprocess(excel_file_name_preprocessed, 'imp') + + if 'logit' in request.POST: + training(excel_file_name_preprocessed, 'logit') + if 'xgb' in request.POST: + training(excel_file_name_preprocessed, 'xgb') else: - # if not post, meaning either a lanch of the page or a refresh - feature1 = 'MonthlyCharges' - feature2 = 'tenure' - fig = stats(excel_file_name, feature1, feature2) - - # collect the data to render - df = pd.read_csv(excel_file_name) + df = pd.read_csv(excel_file_name) + # if file for preprocessing does not exist create it + if os.path.exists(excel_file_name_preprocessed) == False: + df.to_csv(excel_file_name_preprocessed) + + feature1 = df.columns[0] + feature2 = df.columns[1] + request.session['feature1'] = feature1 + request.session['feature2'] = feature2 + + fig = stats(excel_file_name, request.session['feature1'], request.session['feature2']) + data_to_display = df[:5].to_html() request.session['data_to_display'] = data_to_display @@ -71,14 +89,23 @@ def home(request): request.session['excel_file_name_preprocessed'] = excel_file_name_preprocessed labels = df.columns - context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig, 'feature1': feature1, 'feature2': feature2} + context = {'data_to_display': data_to_display, 'excel_file': excel_file_name, 'labels': labels, 'fig': fig, 'feature1': request.session['feature1'], 'feature2': request.session['feature2']} return render(request,'base/home.html', context) def stats(name, feature1, feature2): global fig df = pd.read_csv(name) import plotly.express as px - fig = px.scatter(df, x=feature1, y=feature2, color='Churn') + if is_numeric_dtype(df[feature1]) and is_numeric_dtype(df[feature2]): + print("if") + fig = px.scatter(df, feature1, feature2, color='DEATH_EVENT') + elif is_string_dtype(df[feature1]) and is_string_dtype(df[feature2]): + print("elseif") + fig = px.histogram(df, feature1) + else: + print("else") + fig = px.strip(df, feature1, feature2) + fig = fig.to_html(full_html=False) return fig diff --git a/db.sqlite3 b/db.sqlite3 index e45d1517d..d91c9b6fc 100644 Binary files a/db.sqlite3 and b/db.sqlite3 differ diff --git a/templates/main.html b/templates/main.html index b603b0d62..9e765e7b3 100644 --- a/templates/main.html +++ b/templates/main.html @@ -16,8 +16,10 @@ .scrollit { overflow: auto; + position: fixed; + float: left; height: auto; - width: 60%; + width: 40%; max-width: fit-content; margin: 0px auto; }