play-daemon/daemon/packager.py

from datetime import datetime, timedelta
import json
import os
import re
import shutil
import logging

from requests.auth import HTTPBasicAuth
from requests.sessions import Session
from requests.exceptions import HTTPError

from daisy import DaisyHandler


class PackageException(Exception):
    def __init__(self, package, problem):
        self.package = package
        self.problem = problem

class Mediasite:
    def __init__(self, config):
        self.incoming   = config['daemon']['incoming']
        self.auth = HTTPBasicAuth(config['mediasite']['user'],
                                  config['mediasite']['password'])
        self.chunk_size = 10485760 # 10MiB; seems optimal for speed
                                   # Tested 8k, 10MiB and 20MiB
        self.logger = logging.getLogger('play-daemon')

    def pack(self, pres_id, queue_item):
        data = queue_item['data']
        base = os.path.join(self.incoming, pres_id)
        if os.path.exists(base):
            shutil.rmtree(base)
        os.mkdir(base)
        presenters = []
        for item in data['presenters']:
            presenters.append(item.split('@')[0])
        mypackage = {'id':              pres_id,
                     'base':            base,
                     'origin':          queue_item['type'],
                     'creation':        data['created'],
                     'title':           data['title'],
                     'description':     data.get('description', ''),
                     'thumb':           '',
                     'presenters':      presenters,
                     'courses':         data['courses'],
                     'visibility':      data.get('visibility', ''),
                     'duration':        data['duration']/1000,
                     'tags':            data['tags'],
                     'subtitles':       '',
                     'sources':         []}
        if 'id' in data:
            mypackage['notification_id'] = data['id']

        # Create one session for all the downloads
        with Session() as session:
            session.auth = self.auth
            session.stream = True
            try:
                mypackage['thumb'] = self._download(base,
                                                    data['thumb'],
                                                    session)
            except HTTPError:
                # Missing thumb can be generated
                pass

            # Download video sources, and store the information with a local
            # file path in mypackage for the transcoder
            stream_index = 0
            for source in data['sources']:
                mysource = {'video':     None,
                            'name':      source.get('name', str(stream_index)),
                            'poster':    '',
                            'playAudio': source['playAudio']}
                try:
                    mysource['video'] = self._download(base,
                                                       source['video'],
                                                       session)
                except HTTPError:
                    # Mediasite has lost the stream, skip it.
                    m = '%s - Skipped missing mediasite stream: %s'
                    self.logger.info(m, pres_id, source['video'])
                    continue
                try:
                    mysource['poster'] = self._download(base,
                                                        source['poster'],
                                                        session)
                except HTTPError:
                    # Missing poster can be created later
                    pass
                mypackage['sources'].append(mysource)
                stream_index += 1

            if 'slides' in data:
                # Slides exist, create a package for creating
                # a video from the slides
                try:
                    mypackage['sources'].append(
                        self._download_slides(base, data, session))
                except HTTPError:
                    # Another lost stream, nothing to do but ignore and continue
                    pass
        return mypackage

    # function to download the material from mediasite
    def _download(self, base, remotefile, session):
        localname = remotefile.split('/')[-1]
        localpath = os.path.join(base, localname)
        r = session.get(remotefile, verify=False)
        r.raise_for_status()
        with open(localpath, 'xb') as f:
            for chunk in r.iter_content(chunk_size=self.chunk_size):
                f.write(chunk)
        return localname

    def _download_slides(self, base, data, session):
        # https://trac.ffmpeg.org/wiki/Slideshow
        slides_path = os.path.join(base, 'slides')
        os.mkdir(slides_path)
        slides = []
        demux_file = os.path.join(slides_path, 'demux.txt')

        with open(demux_file, 'w') as f:
            f.write('ffconcat version 1.0\n')
            num_slides = len(data['slides'])

            # loop all slides and download, calculate the duration and
            # create a text file holding all the info for the
            # ffmpeg demuxer
            for i in range(num_slides):
                slide = data['slides'][i]

                # Download the source file
                # and store the url as a local file path
                slide_name = self._download(slides_path,
                                            slide['url'],
                                            session)
                slide_path = os.path.join(slides_path,
                                          slide_name)

                # Handle the different edgecases
                # for indiviual slide duration
                if i == num_slides - 1: # last slide
                    duration = data['duration'] - int(slide['duration'])
                else:
                    next_slide = data['slides'][i+1]
                    if i == 0: # first slide
                        duration = next_slide['duration']
                    else:
                        duration = (int(next_slide['duration'])
                                    - int(slide['duration']))

                # Commit to the demuxfile.
                f.write(f"file '{slide_path}'\n")
                # The format assumes seconds, so we specify ms.
                # https://ffmpeg.org/ffmpeg-utils.html#Time-duration
                f.write(f'duration {duration}ms\n')
                slides.append({'url': slide_path,
                               'duration': f'{duration}ms'})

            # to accomodate for an ffmpeg quirk that needs
            # the last slide twice
            f.write(f"file '{slides[-1]['url']}'\n")

        # put all the slides info in mypackage for
        # the transcoder to rework into a mp4 video
        return {'demux_file': demux_file,
                'name': 'slide',
                'poster': slides[0]['url'],
                'playAudio': False }


class Manual:
    def pack(self, pres_id, queue_item):
        data = queue_item['data']
        mypackage = {'id':              pres_id,
                     'base':            data['base'],
                     'origin':          queue_item['type'],
                     # Transitionary handling of new field name:
                     'notification_id': data.get('notification_id',
                                                 data['id']),
                     'creation':        data['created'],
                     'title':           data['title'],
                     'description':     data.get('description', ''),
                     'presenters':      data['presenters'],
                     'courses':         data['courses'],
                     'duration':        data['duration'],
                     'thumb':           data.get('thumb', ''),
                     'tags':            data['tags'],
                     'subtitles':       data.get('subtitles', ''),
                     'sources':         []}

        stream_index = 0
        for source in data['sources']:
            mysource = {'video':     source['video'],
                        'name':      source.get('name', str(stream_index)),
                        'poster':    source.get('poster', ''),
                        'playAudio': source['playAudio']}
            mypackage['sources'].append(mysource)
            stream_index += 1
        return mypackage


class Cattura:
    def __init__(self, config):
        self.recorders = config['recorders']
        self.daisy = DaisyHandler(config)
        self.path_regex = re.compile('^(sftp://[^/]+/)(.+?)/([^/]+)$')

    def pack(self, pres_id, queue_item):
        data     = queue_item['data']
        recorder = queue_item['recorder']
        info     = self._find_packageinfo(pres_id, data['publishedOutputs'])
        name     = info['name']
        rawpath  = info['path']
        host, path, pkgfile = self.path_regex.match(rawpath).group(1, 2, 3)

        mediapackage = None
        with open(os.path.join(path, pkgfile)) as f:
            mediapackage = json.load(f)

        mypackage = {'id':          pres_id,
                     'base':        path,
                     'origin':      'cattura',
                     'creation':    0,
                     'title':       {'sv': name,
                                     'en': name},
                     'description': '',
                     'presenters':  [],
                     'courses':     [],
                     'duration':    0,
                     'thumb':       '',
                     'tags':        [],
                     'subtitles':   '',
                     'sources':     []}

        outputs = mediapackage['outputs']
        for key in outputs.keys():
            if key.startswith('media/'):
                media      = outputs[key]
                poster = ''
                # The package format seems to not be consistent enough for this
                #richfile   = self._find_enrichment(
                #    media['element']['video']['sourceID'], mediapackage)
                #if richfile:
                #    with open(os.path.join(path, richfile)) as f:
                #        enrichment = json.load(f)
                #    poster = self._find_poster(enrichment)

                source = {'name':      media['element']['name'],
                          'video':     media['file'],
                          'poster':    poster,
                          'playAudio': False}
                if source['name'] == 'main':
                    end = int(media['element']['creationDate'] / 1000)
                    dur = media['element']['duration']['timestamp'] / 1000
                    mypackage['duration'] = dur
                    mypackage['creation'] = int(end - dur)
                # The primary stream doesn't get tagged with its configured
                # name, but instead gets the presentation name.
                # The camera is the primary, so we play the sound from there
                # for sync purposes
                elif source['name'] == name:
                    source['playAudio'] = True
                mypackage['sources'].append(source)

        mypackage['tags'].append(
            self.daisy.get_room_name(self.recorders[recorder]))

        start   = datetime.fromtimestamp(mypackage['creation'])
        end     = start + timedelta(seconds=mypackage['duration'])
        booking = self.daisy.get_booking(start, end,
                                         self.recorders[recorder])
        if booking is not None:
            title = {'sv': booking['displayStringLong']['swedish'],
                     'en': booking['displayStringLong']['english']}
            mypackage['title']      = title
            mypackage['presenters'] = booking['teachers']
            if booking['description']:
                mypackage['description'] = booking['description']
            if not mypackage['presenters'] and booking['bookedBy']:
                mypackage['presenters'].append(booking['bookedBy'])
            mypackage['courses'] = booking['courseSegmentInstances']
            # This is just an int and there is no way to get a string:
            #mypackage['tags'].append(booking['educationalType'])
        return mypackage

    def _find_packageinfo(self, pres_id, data):
        for key in data.keys():
            if key.startswith('mediapackage:'):
                return data[key]
        raise PackageException(json.dumps(data),
                               'cannot find mediapackage element')

    def _find_enrichment(self, sourceid, mediapackage):
        _, splitid = sourceid.split(',')
        tocid      = 'toc:' + splitid
        try:
            return mediapackage['outputs'][tocid]['file']
        except KeyError:
            return ''

    def _find_poster(self, enrichment):
        for frame in enrichment['entries']:
            if frame['timestamp']['timestamp'] == 0:
                return frame['screenshot']['path']
        return ''


class Update:
    def pack(self, temp_id, queue_item):
        data      = queue_item['data']
        # Mandatory fields
        mypackage = {'id':              temp_id,
                     'orig_id':         data['id'],
                     'notification_id': data['notification_id'],
                     'base':            data['base'],
                     'origin':          queue_item['type'],
                     'creation':        data['created'],
                     'title':           data['title'],
                     'description':     data.get['description'],
                     'presenters':      data['presenters'],
                     'courses':         data['courses'],
                     'duration':        data['duration'],
                     'tags':            data['tags'],
                     'delete':          data['delete'],
                     'sources':         []}
        # Optional fields
        for field in ['thumb', 'subtitles']:
            if field in data:
                mypackage[field] = data[field]
        for source in data['sources']:
            # Mandatory fields
            mysource = {'name':      source['name'],
                        'playAudio': source['playAudio']}
            # Optional fields
            for field in ['poster', 'video']:
                if field in source:
                    mysource[field] = source[field]
            mypackage['sources'].append(mysource)
        return mypackage