from sklearn.datasets import load_breast_cancer, load_boston, load_diabetes, load_iris import sqlite3 ds1 = load_breast_cancer() ds2 = load_boston() ds3 = load_diabetes() ds4 = load_iris() def get_data(scenario): if scenario == "1": data = query_database("SELECT timestamp,url,domain,uuid FROM webpage ORDER BY timestamp DESC;") columns = ["Timestamp", "URL","Domain", "Annotator"] elif scenario == "2": data = query_database("SELECT DISTINCT webpage.timestamp,categories.category,webpage.url,webpage.uuid,categories.cohen_kappa_score FROM categories,webpage WHERE webpage.sha256 = categories.sha256 ORDER BY timestamp DESC;") columns = ["Timestamp", "Category", "URL", "Annotator", "Cohen's kappa"] elif scenario == "3": data = query_database("SELECT DISTINCT timestamp, url, categories.category, notes.note, notes.uuid FROM webpage INNER JOIN notes ON webpage.sha256=notes.sha256 INNER JOIN categories ON webpage.sha256=categories.sha256 ORDER BY timestamp DESC;") columns = ["Timestamp", "URL", "Category", "Annotation", "Annotator"] elif scenario == "4": data = query_database("SELECT DISTINCT webpage.timestamp, webpage.url, highlightedText.highlightedText, highlightedText.uuid FROM highlightedText INNER JOIN webpage ON webpage.sha256 = highlightedText.sha256 ORDER BY timestamp DESC;") columns = ["Timestamp", "URL", "Excerpt", "Annotator", "Annotation", "Category"] elif scenario == "graphs": data = query_database("SELECT categories.category,webpage.url FROM categories INNER JOIN webpage ON categories.sha256 = webpage.sha256;") columns = ["Category", "URL"] elif scenario == "total": data = query_database("SELECT category FROM categories;") columns = ["Index", "Category"] else: data = query_database("SELECT timestamp,url,domain,uuid FROM webpage;") columns = ["Timestamp", "URL","Domain", "Annotator"] return data, columns def get_web_page(url): sql = "SELECT DISTINCT content,url,sha256 FROM rawpage WHERE url LIKE \"%" + url + "%\";" web_page = query_database(sql) print(type(web_page), len(web_page)) return web_page #query_database(sql) def search_archive(keyword): sql = "SELECT * FROM rawpage WHERE content LIKE\"%" + keyword + "%\";" keyword_search = query_database(sql) print("Returning keyword result:" , keyword_search) return keyword_search def query_database(sql): db_connection = sqlite3.connect("/home/nodejs/D3/D3-Centraliser/annotations.db") db_cursor = db_connection.cursor() db_result = db_cursor.execute(sql).fetchall() return db_result db_connection.close()