SLR/slr_helper.py

#####################################################
# Title: SLR Helper (slr_helper)
# Author: Jesper Bergman (jesperbe@dsv.su.se)
# Licence: GPLv3
#####################################################

#!/usr/bin/python

"""SLR Helper - Systematic Literature Review Helper


SLR Helper is small program that helps you do systematic literature
reviews by fetching academic works and sieving through their abstracts using
inclusion/exclusion criteria.

Usage:
    slr_helper.py search -a <api_key> -t <inst_token>  -k <keywords> -w <output_file>
    slr_helper.py search -a <api_key> -t <inst_token>  -k <keywords> -s <subject>
    slr_helper.py search -a <api_key> -t <inst_token>  -k <keywords> -s <subject> -w <output_file>
    slr_helper.py search -a <api_key> -t <inst_token> -k <keywords> -s <subject> -l <language> -w <output_file>
    slr_helper.py scrutinise -i <input_file> -x <exp> -d <database>

Arguments:
    FILE optional input file

Examples:
    slr_helper.py search -a apikey -t token -k "dark web crawler" -s "COMP"
    slr_helper.py search -a <api_key> -u <url> -k <keywords> -s <subject> -l "English"
    slr_helper.py scrutinise -i <input_file.json> -x "cardiovascular,depression" -d "SLR_Cardiovasular_Depression.sql"

Options:
    -a --api_key                        For downloading: API key (supported now: Scopus, IEEExplore)
    -t --inst_token                     For downloading via institution token from Scopus
    -i --input                          For scrutinising: Input file(s) to sieve through
    -k --keywords                       For downloading: Keywords to search for on the URL (i.e. academic database) used
    -l --language                       For language specific articles, e.g. English
    -H --help                           Show manual
    -x --expression                     For scrutinising: Reg ex to filter [default: ALL]
    -s --subject                        Specification of subject matter [default: ALL]. E.g. "COMP" or "AGRI"
    -d --database                       For scrutinising: Database to store the results in [default: NONE]
    -u --url                            For downloading: Academic database (or search engine) URL [default: Scopus].
    -w --write                          Write to output file.
"""

# Import standard libraries
from docopt import docopt
from datetime import datetime
import sys
import logging
import requests as req
import json
global counter
counter = 0

# Main menu
def main(arguments):
    # args = docopt(__doc__)
    api_key = arguments['<api_key>']
    inst_token = arguments['<inst_token>']
    input_file = arguments['<input_file>']
    url = "https://api.elsevier.com/content/search/scopus?query="
    keywords = arguments['<keywords>']
    #subject = arguments['<subject>']
    language = arguments['<language>']
    search_engine = "scopus"
    output_file = arguments["<output_file>"]

    if input_file:
        with open(input_file) as json_file:
            json_data = json.load(json_file)
            for key, value in json_data.items():
                for i in value['entry']:
                  print(i, "\n\n")
    else:
        fetch_scopus(api_key, inst_token, url, keywords)

# class SLR def __init___(self,)

def fetch_scopus(api_key, inst_token, url, keywords):
    if api_key:
        query = str(url + keywords + "&apiKey=" + api_key + "&insttoken=" + inst_token + "&view=COMPLETE")
        query_response = req.get(query)
        dta = query_response.json()
        print_summary(dta)
    if query_response.status_code == 429:
        print("Error 429")
    if query_response.status_code != 200:
        print("Not 200!")

def fetch_next_scopus(url):
    print("Fetching next 25: ", url)
    response = req.get(url, headers ={"Accept" : "application/json"})
    data = response.json()
    print_summary(data)
    if response.status_code == 429:
        print("Error 429")
    if response.status_code != 200:
        print("Not 200!")

def print_summary(total_articles):
    global counter

    # Debug
    print("Printing summary\n--------------------\nTotal hits: ", total_articles["search-results"]["opensearch:totalResults"])
    print("\nSearch term:", total_articles["search-results"]["opensearch:Query"]["@searchTerms"], "\n\n--------------------")
    write_to_file(("\nSearch term:", str(total_articles["search-results"]["opensearch:Query"]["@searchTerms"])))

    # Get all entries from result list
    total_articles_list = total_articles["search-results"]["entry"]

    # Iterate over the entries in the result list and print META data
    for i in total_articles_list:
        counter = counter + 1
        #try:
        print("Counter: ", counter, " ", i.get("authkeywords"))

        slr_output = ("\n\nTitle: ", str(i.get("dc:title")), "\nAbstract: ", str(i.get("dc:description")), "\nAuthors: ", str(i.get("dc:creator")),"\nPublication: ", str(i.get("prism:publicationName")),"\nPublication Type: ", str(i.get("prism:aggregationType")), "\nArticle Type: ", str(i.get("subtypeDescription")), "\nScopus ID: ", str(i.get("dc:identifier")), "\nDOI: ", str(i.get("prism:doi")), "\nURL: ", str(i.get("prism:url")), "\nKeywords: ", str(i.get("authkeywords"))  ,"\nCounter: ", str(counter))


        # reg ex filtering for abstracts
        # re.(i.get(["dc:description"]))

        write_to_file(slr_output)
        #except KeyError:
        #    print("Search result list reached its end.")

    for ln in total_articles["search-results"]["link"]:
        if ln["@ref"] == "next":
            print("Following the next 25: ", ln["@href"])
            fetch_next_scopus(ln["@href"])
        elif ln["@ref"] == "self":
            print("Current page: ", ln["@href"])
        elif ln["@ref"] == "last":
            print("Last page: ", ln["@href"])

def write_to_file(output):
    sucess = 0
    try:
        file_name = open(arguments["<output_file>"], "a")
        file_name.writelines(output)
        file_name.writelines(["\nTime: ", str(datetime.now())])
        success = 1
    except:
        success = 0
        logging.error("Could not write to output file with writelines()")
    file_name.close()

    return success

# Main menu constructor
if __name__ == "__main__":
    arguments = docopt(__doc__, version='slr_helper 0.1')
    main(arguments)

    """
    as1 = ArticleSearch("api_key", "keywords", "database", "output_file")
    print ("Search for more stuff (Y/n)?")
    s = input('--> ')
    if (s == "y" or s == "Y"):
        fetch_scopus()
    """