play-daemon/daemon/packager.py

from datetime import datetime, timedelta
import json
import os
import re
import shutil
import logging

from requests.auth import HTTPBasicAuth
from requests.sessions import Session

from daisy import DaisyHandler


class PackageException(Exception):
    def __init__(self, package, problem):
        self.package = package
        self.problem = problem

class Mediasite:
    def __init__(self, config):
        self.incoming   = config['daemon']['incoming']
        self.auth = HTTPBasicAuth(config['mediasite']['user'],
                                  config['mediasite']['password'])
        self.chunk_size = 10485760 # 10MiB; seems optimal for speed
                                   # Tested 8k, 10MiB and 20MiB
        self.logger = logging.getLogger('play-daemon')

    def pack(self, pres_id, queue_item):
        data = queue_item['data']
        base = os.path.join(self.incoming, pres_id)
        if os.path.exists(base):
            shutil.rmtree(base)
        os.mkdir(base)
        presenters = []
        for item in data['presenters']:
            presenters.append(item.split('@')[0])
        mypackage = {'id':              pres_id,
                     'base':            base,
                     'origin':          queue_item['type'],
                     'creation':        data['created'],
                     'title':           data['title'],
                     'description':     data['description'],
                     'presenters':      presenters,
                     'courses':         data['courses'],
                     'duration':        int(data['duration'])/1000,
                     'tags':            data['tags'],
                     'sources':         []}
        if 'id' in data:
            mypackage['notification_id'] = data['id']

        # Create one session for all the downloads
        with Session() as session:
            session.auth = self.auth
            session.stream = True
            mypackage['thumb'] = self._download(base, data['thumb'], session)

            # Download video sources, and store the information with a local file path in mypackage for the transcoder
            for source in data['sources']:
                mysource = {'video':     self._download(base, source['video'], session),
                            'poster':    self._download(base, source['poster'], session),
                            'playAudio': source['playAudio']}
                mypackage['sources'].append(mysource)

            # Slides exist, create a package for creating a video from the slides
            if 'slides' in data:
                slides_path = os.path.join(base, 'slides')
                os.mkdir(slides_path)
                slides = []
                demux_file = os.path.join(slides_path, 'demux.txt')

                with open(demux_file, 'w') as f:
                    f.write('ffconcat version 1.0\n')
                    num_slides = len(data['slides'])

                    # loop all slides and download, calculate the duration and create a text file holding all the info for he ffmpeg demuxer
                    for i in range(num_slides):

                        # Download the source file and store the url as a local file path
                        myslide = { 'url': os.path.join(slides_path, self._download(slides_path, data['slides'][i]['url'], session))}

                        # Handle the different edgecases for indiviual slide duration
                        if i == num_slides - 1: # last slide
                            myslide['duration'] = '{}ms'.format(data['duration'] - int(data['slides'][i]['duration']))
                        elif i == 0: # first slide
                            myslide['duration'] = '{}ms'.format(int(data['slides'][i+1]['duration']))
                        else: # all other slides
                            myslide['duration'] = '{}ms'.format(int(data['slides'][i+1]['duration']) - int(data['slides'][i]['duration']))

                        # Commit to the demuxfile. Duration is assumed to be seconds unless staded otherwise
                        # https://ffmpeg.org/ffmpeg-utils.html#Time-duration
                        # https://trac.ffmpeg.org/wiki/Slideshow
                        f.write('file \'{}\'\n'.format(myslide['url']))
                        f.write('duration {}\n'.format(myslide['duration']))
                        slides.append(myslide)

                    # to accomodate for an ffmpeg quirk that needs the last slide twice
                    f.write('file \'{}\'\n'.format(slides[-1]['url']))

                # put all the slides info in mypackage for the transcoder to rework into a mp4 video
                mypackage['sources'].append({'demux_file': demux_file,
                                            'poster': slides[0]['url'],
                                            'playAudio': False })
        return mypackage

    # function to download the material from mediasite
    def _download(self, base, remotefile, session):
        localname = remotefile.split('/')[-1]
        localpath = os.path.join(base, localname)
        r = session.get(remotefile)
        r.raise_for_status()
        with open(localpath, 'xb') as f:
            for chunk in r.iter_content(chunk_size=self.chunk_size):
                f.write(chunk)
        return localname


class Manual:
    def pack(self, pres_id, queue_item):
        data = queue_item['data']
        mypackage = {'id':              pres_id,
                     'base':            data['base'],
                     'origin':          queue_item['type'],
                     'notification_id': data['id'],
                     'creation':        data['created'],
                     'title':           data['title'],
                     'description':     data['description'],
                     'presenters':      data['presenters'],
                     'courses':         data['courses'],
                     'duration':        data['duration'],
                     'thumb':           data.get('thumb', ''),
                     'tags':            data['tags'],
                     'sources':         []}
        for source in data['sources']:
            mysource = {'video':     source['video'],
                        'poster':    source.get('poster', ''),
                        'playAudio': source['playAudio']}
            mypackage['sources'].append(mysource)
        return mypackage


class Cattura:
    def __init__(self, config):
        self.recorders = config['recorders']
        self.daisy = DaisyHandler(config)

    def pack(self, pres_id, queue_item):
        data     = queue_item['data']
        recorder = queue_item['recorder']
        info     = self._find_packageinfo(pres_id, data['publishedOutputs'])
        name     = info['name']
        rawpath  = info['path']
        regex    = re.compile('^(sftp://[^/]+/)(.+?)/([^/]+)$')
        host, path, pkgfile = regex.match(rawpath).group(1, 2, 3)

        mediapackage = None
        with open(os.path.join(path, pkgfile)) as f:
            mediapackage = json.load(f)

        mypackage = {'id':         pres_id,
                     'base':       path,
                     'origin':     'cattura',
                     'creation':   int(mediapackage['creationDate'] / 1000),
                     'title':      "Unknown title",
                     'presenters': [],
                     'courses':    [],
                     'duration':   0,
                     'thumb':      '',
                     'tags':       [],
                     'sources':    []}

        outputs = mediapackage['outputs']
        for key in outputs.keys():
            if key.startswith('media/'):
                media      = outputs[key]
                richfile   = self._find_enrichment(
                    media['element']['video']['sourceID'], mediapackage)
                enrichment = None
                with open(os.path.join(path, richfile)) as f:
                    enrichment = json.load(f)

                source = {'name':      media['element']['name'],
                          'video':     media['file'],
                          'poster':    self._find_poster(enrichment),
                          'playAudio': False}
                if source['name'] == 'main':
                    source['playAudio']   = True
                    dur = media['element']['duration']['timestamp'] / 1000
                    mypackage['duration'] = dur
                    mypackage['thumb']    = source['poster']
                mypackage['sources'].append(source)

        mypackage['tags'].append(
            self.daisy.get_room_name(self.recorders[recorder]))

        start   = datetime.fromtimestamp(mypackage['creation'])
        end     = start + timedelta(days=1, seconds=mypackage['duration'])
        booking = self.daisy.get_booking(start, end,
                                         self.recorders[recorder])
        if booking is not None:
            mypackage['title']      = booking['title']
            mypackage['presenters'] = [self.daisy.get_person(teacher['id'])
                                       for teacher in booking['teachers']]
            mypackage['description'] = booking['description']
            if not mypackage['presenters']:
                mypackage['presenters'].append(
                    self.daisy.get_person(booking['bookedBy']))
            mypackage['courses'] = [self.daisy.get_course(course['id'])
                                    for course
                                    in booking['courseSegmentInstances']]
            mypackage['tags'].append(booking['educationalType'])
        return mypackage

    def _find_packageinfo(self, pres_id, data):
        for key in data.keys():
            if key.startswith('mediapackage:'):
                return data[key]
        raise PackageException(json.dumps(data),
                               "can't find mediapackage element")

    def _find_enrichment(self, sourceid, mediapackage):
        _, sourceid = sourceid.split(',')
        tocid       = 'toc:' + sourceid
        return mediapackage['outputs'][tocid]['file']

    def _find_poster(self, enrichment):
        for frame in enrichment['entries']:
            if frame['timestamp']['timestamp'] == 0:
                return frame['screenshot']['path']
        return ""

class Update:
    def pack(self, temp_id, queue_item):
        data      = queue_item['data']
        mypackage = {'id':         temp_id,
                     'update_id':  data['id'],
                     'base':       data['base'],
                     'origin':     queue_item['type'],
                     'creation':   data['created'],
                     'title':      data['title'],
                     'presenters': data['presenters'],
                     'courses':    data['courses'],
                     'duration':   data['duration'],
                     'thumb':      data.get('thumb', ''),
                     'tags':       data['tags'],
                     'sources':    []}
        for source in data['sources']:
            mysource = {'video':     source['video'],
                        'poster':    source.get('poster', ''),
                        'playAudio': source['playAudio']}
            mypackage['sources'].append(mysource)
        return mypackage