SLR/slr_helper.py

163 lines
6.4 KiB
Python

#####################################################
# Title: SLR Helper (slr_helper)
# Author: Jesper Bergman (jesperbe@dsv.su.se)
# Licence: GPLv3
#####################################################
#!/usr/bin/python
"""SLR Helper - Systematic Literature Review Helper
SLR Helper is small program that helps you do systematic literature
reviews by fetching academic works and sieving through their abstracts using
inclusion/exclusion criteria.
Usage:
slr_helper.py search -a <api_key> -t <inst_token> -k <keywords> -w <output_file>
slr_helper.py search -a <api_key> -t <inst_token> -k <keywords> -s <subject>
slr_helper.py search -a <api_key> -t <inst_token> -k <keywords> -s <subject> -w <output_file>
slr_helper.py search -a <api_key> -t <inst_token> -k <keywords> -s <subject> -l <language> -w <output_file>
slr_helper.py scrutinise -i <input_file> -x <exp> -d <database>
Arguments:
FILE optional input file
Examples:
slr_helper.py search -a apikey -t token -k "dark web crawler" -s "COMP"
slr_helper.py search -a <api_key> -u <url> -k <keywords> -s <subject> -l "English"
slr_helper.py scrutinise -i <input_file.json> -x "cardiovascular,depression" -d "SLR_Cardiovasular_Depression.sql"
Options:
-a --api_key For downloading: API key (supported now: Scopus, IEEExplore)
-t --inst_token For downloading via institution token from Scopus
-i --input For scrutinising: Input file(s) to sieve through
-k --keywords For downloading: Keywords to search for on the URL (i.e. academic database) used
-l --language For language specific articles, e.g. English
-H --help Show manual
-x --expression For scrutinising: Reg ex to filter [default: ALL]
-s --subject Specification of subject matter [default: ALL]. E.g. "COMP" or "AGRI"
-d --database For scrutinising: Database to store the results in [default: NONE]
-u --url For downloading: Academic database (or search engine) URL [default: Scopus].
-w --write Write to output file.
"""
# Import standard libraries
from docopt import docopt
from datetime import datetime
import sys
import logging
import requests as req
import json
global counter
counter = 0
# Main menu
def main(arguments):
# args = docopt(__doc__)
api_key = arguments['<api_key>']
inst_token = arguments['<inst_token>']
input_file = arguments['<input_file>']
url = "https://api.elsevier.com/content/search/scopus?query="
keywords = arguments['<keywords>']
#subject = arguments['<subject>']
language = arguments['<language>']
search_engine = "scopus"
output_file = arguments["<output_file>"]
if input_file:
with open(input_file) as json_file:
json_data = json.load(json_file)
for key, value in json_data.items():
for i in value['entry']:
print(i, "\n\n")
else:
fetch_scopus(api_key, inst_token, url, keywords)
# class SLR def __init___(self,)
def fetch_scopus(api_key, inst_token, url, keywords):
if api_key:
query = str(url + keywords + "&apiKey=" + api_key + "&insttoken=" + inst_token + "&view=COMPLETE")
query_response = req.get(query)
dta = query_response.json()
print_summary(dta)
if query_response.status_code == 429:
print("Error 429")
if query_response.status_code != 200:
print("Not 200!")
def fetch_next_scopus(url):
print("Fetching next 25: ", url)
response = req.get(url, headers ={"Accept" : "application/json"})
data = response.json()
print_summary(data)
if response.status_code == 429:
print("Error 429")
if response.status_code != 200:
print("Not 200!")
def print_summary(total_articles):
global counter
# Debug
print("Printing summary\n--------------------\nTotal hits: ", total_articles["search-results"]["opensearch:totalResults"])
print("\nSearch term:", total_articles["search-results"]["opensearch:Query"]["@searchTerms"], "\n\n--------------------")
write_to_file(("\nSearch term:", str(total_articles["search-results"]["opensearch:Query"]["@searchTerms"])))
# Get all entries from result list
total_articles_list = total_articles["search-results"]["entry"]
# Iterate over the entries in the result list and print META data
for i in total_articles_list:
counter = counter + 1
#try:
print("Counter: ", counter, " ", i.get("authkeywords"))
slr_output = ("\n\nTitle: ", str(i.get("dc:title")), "\nAbstract: ", str(i.get("dc:description")), "\nAuthors: ", str(i.get("dc:creator")),"\nPublication: ", str(i.get("prism:publicationName")),"\nPublication Type: ", str(i.get("prism:aggregationType")), "\nArticle Type: ", str(i.get("subtypeDescription")), "\nScopus ID: ", str(i.get("dc:identifier")), "\nDOI: ", str(i.get("prism:doi")), "\nURL: ", str(i.get("prism:url")), "\nKeywords: ", str(i.get("authkeywords")) ,"\nCounter: ", str(counter))
# reg ex filtering for abstracts
# re.(i.get(["dc:description"]))
write_to_file(slr_output)
#except KeyError:
# print("Search result list reached its end.")
for ln in total_articles["search-results"]["link"]:
if ln["@ref"] == "next":
print("Following the next 25: ", ln["@href"])
fetch_next_scopus(ln["@href"])
elif ln["@ref"] == "self":
print("Current page: ", ln["@href"])
elif ln["@ref"] == "last":
print("Last page: ", ln["@href"])
def write_to_file(output):
sucess = 0
try:
file_name = open(arguments["<output_file>"], "a")
file_name.writelines(output)
file_name.writelines(["\nTime: ", str(datetime.now())])
success = 1
except:
success = 0
logging.error("Could not write to output file with writelines()")
file_name.close()
return success
# Main menu constructor
if __name__ == "__main__":
arguments = docopt(__doc__, version='slr_helper 0.1')
main(arguments)
"""
as1 = ArticleSearch("api_key", "keywords", "database", "output_file")
print ("Search for more stuff (Y/n)?")
s = input('--> ')
if (s == "y" or s == "Y"):
fetch_scopus()
"""