2021-12-03 17:58:48 +01:00

54 lines
2.6 KiB
Python

from sklearn.datasets import load_breast_cancer, load_boston, load_diabetes, load_iris
import sqlite3
ds1 = load_breast_cancer()
ds2 = load_boston()
ds3 = load_diabetes()
ds4 = load_iris()
def get_data(scenario):
if scenario == "1":
data = query_database("SELECT timestamp,url,domain,uuid FROM webpage ORDER BY timestamp DESC;")
columns = ["Timestamp", "URL","Domain", "Annotator"]
elif scenario == "2":
data = query_database("SELECT DISTINCT webpage.timestamp,categories.category,webpage.url,webpage.uuid,categories.cohen_kappa_score FROM categories,webpage WHERE webpage.sha256 = categories.sha256 ORDER BY timestamp DESC;")
columns = ["Timestamp", "Category", "URL", "Annotator", "Cohen's kappa"]
elif scenario == "3":
data = query_database("SELECT DISTINCT timestamp, url, categories.category, notes.note, notes.uuid FROM webpage INNER JOIN notes ON webpage.sha256=notes.sha256 INNER JOIN categories ON webpage.sha256=categories.sha256 ORDER BY timestamp DESC;")
columns = ["Timestamp", "URL", "Category", "Annotation", "Annotator"]
elif scenario == "4":
data = query_database("SELECT DISTINCT webpage.timestamp, webpage.url, highlightedText.highlightedText, highlightedText.uuid FROM highlightedText INNER JOIN webpage ON webpage.sha256 = highlightedText.sha256 ORDER BY timestamp DESC;")
columns = ["Timestamp", "URL", "Excerpt", "Annotator", "Annotation", "Category"]
elif scenario == "graphs":
data = query_database("SELECT categories.category,webpage.url FROM categories INNER JOIN webpage ON categories.sha256 = webpage.sha256;")
columns = ["Category", "URL"]
elif scenario == "total":
data = query_database("SELECT category FROM categories;")
columns = ["Index", "Category"]
else:
data = query_database("SELECT timestamp,url,domain,uuid FROM webpage;")
columns = ["Timestamp", "URL","Domain", "Annotator"]
return data, columns
def get_web_page(url):
sql = "SELECT DISTINCT content,url,sha256 FROM rawpage WHERE url LIKE \"%" + url + "%\";"
web_page = query_database(sql)
print(type(web_page), len(web_page))
return web_page #query_database(sql)
def search_archive(keyword):
sql = "SELECT * FROM rawpage WHERE content LIKE\"%" + keyword + "%\";"
keyword_search = query_database(sql)
print("Returning keyword result:" , keyword_search)
return keyword_search
def query_database(sql):
db_connection = sqlite3.connect("/home/nodejs/D3/D3-Centraliser/annotations.db")
db_cursor = db_connection.cursor()
db_result = db_cursor.execute(sql).fetchall()
return db_result
db_connection.close()