54 lines
2.6 KiB
Python
54 lines
2.6 KiB
Python
from sklearn.datasets import load_breast_cancer, load_boston, load_diabetes, load_iris
|
|
import sqlite3
|
|
|
|
ds1 = load_breast_cancer()
|
|
ds2 = load_boston()
|
|
ds3 = load_diabetes()
|
|
ds4 = load_iris()
|
|
|
|
def get_data(scenario):
|
|
if scenario == "1":
|
|
data = query_database("SELECT timestamp,url,domain,uuid FROM webpage ORDER BY timestamp DESC;")
|
|
columns = ["Timestamp", "URL","Domain", "Annotator"]
|
|
elif scenario == "2":
|
|
data = query_database("SELECT DISTINCT webpage.timestamp,categories.category,webpage.url,webpage.uuid,categories.cohen_kappa_score FROM categories,webpage WHERE webpage.sha256 = categories.sha256 ORDER BY timestamp DESC;")
|
|
columns = ["Timestamp", "Category", "URL", "Annotator", "Cohen's kappa"]
|
|
elif scenario == "3":
|
|
data = query_database("SELECT DISTINCT timestamp, url, categories.category, notes.note, notes.uuid FROM webpage INNER JOIN notes ON webpage.sha256=notes.sha256 INNER JOIN categories ON webpage.sha256=categories.sha256 ORDER BY timestamp DESC;")
|
|
columns = ["Timestamp", "URL", "Category", "Annotation", "Annotator"]
|
|
elif scenario == "4":
|
|
data = query_database("SELECT DISTINCT webpage.timestamp, webpage.url, highlightedText.highlightedText, highlightedText.uuid FROM highlightedText INNER JOIN webpage ON webpage.sha256 = highlightedText.sha256 ORDER BY timestamp DESC;")
|
|
columns = ["Timestamp", "URL", "Excerpt", "Annotator", "Annotation", "Category"]
|
|
elif scenario == "graphs":
|
|
data = query_database("SELECT categories.category,webpage.url FROM categories INNER JOIN webpage ON categories.sha256 = webpage.sha256;")
|
|
columns = ["Category", "URL"]
|
|
elif scenario == "total":
|
|
data = query_database("SELECT category FROM categories;")
|
|
columns = ["Index", "Category"]
|
|
else:
|
|
data = query_database("SELECT timestamp,url,domain,uuid FROM webpage;")
|
|
columns = ["Timestamp", "URL","Domain", "Annotator"]
|
|
|
|
return data, columns
|
|
|
|
def get_web_page(url):
|
|
sql = "SELECT DISTINCT content,url,sha256 FROM rawpage WHERE url LIKE \"%" + url + "%\";"
|
|
web_page = query_database(sql)
|
|
print(type(web_page), len(web_page))
|
|
return web_page #query_database(sql)
|
|
|
|
def search_archive(keyword):
|
|
sql = "SELECT * FROM rawpage WHERE content LIKE\"%" + keyword + "%\";"
|
|
keyword_search = query_database(sql)
|
|
print("Returning keyword result:" , keyword_search)
|
|
return keyword_search
|
|
|
|
def query_database(sql):
|
|
db_connection = sqlite3.connect("/home/nodejs/D3/D3-Centraliser/annotations.db")
|
|
db_cursor = db_connection.cursor()
|
|
db_result = db_cursor.execute(sql).fetchall()
|
|
|
|
return db_result
|
|
|
|
db_connection.close()
|