play-daemon-threaded/pipeline/preprocessors/cattura.py

import json
import os
import re

from datetime import datetime, timedelta

from .preprocessor import Preprocessor


@Preprocessor.register
class CatturaProcessor(Preprocessor):
    """
    Preprocessing for cattura jobs.

    The recorder must be configured to at least provide the sources 'main'
    and 'camera', and 'camera' must be marked as the primary source.
    Further sources should be marked 'left' and 'right' as appropriate, with
    the positions relative to 'main'.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.path_regex = re.compile('^(sftp://[^/]+/)(.+?)/([^/]+)$')

    def validate(self, queueitem):
        # No point in validating the very complex cattura format here
        return True

    def _preprocess(self, job):
        jobid = job['jobid']
        queueitem = job['queueitem']
        recorder = queueitem['recorder']
        room_id = self.config[recorder]
        data = queueitem['data']
        info = self._find_packageinfo(data['publishedOutputs'])
        presname = info['name']
        rawpath = info['path']
        host, path, pkgfile = self.path_regex.match(rawpath).group(1, 2, 3)

        with open(os.path.join(path, pkgfile)) as f:
            mediapackage = json.load(f)

        outspec = self._init_jobspec(path, 0, presname)

        duration = None

        outputs = mediapackage['outputs']
        for key in outputs.keys():
            if key.startswith('media/'):
                media = outputs[key]
                srcname = media['element']['name']
                source = {'video': media['file'],
                          'poster': '',
                          'playAudio': False}

                if duration == None:
                    duration = media['element']['duration']['timestamp'] / 1000

                # The name of the main stream should probably be configurable
                if srcname == 'main':
                    end = int(media['element']['creationDate'] / 1000)
                    outspec['created'] = int(end - duration)

                # The source marked 'primary' in cattura doesn't get tagged
                # with its configured name, but instead gets the presentation
                # name. The camera is the primary, so we play the sound from
                # there for sync purposes. We also generate the subtitles from
                # there for the same reason.
                elif srcname == presname:
                    source['playAudio'] = True
                    subtitlespec = {'Generated': {'type': 'whisper',
                                                  'source': srcname}}
                    outspec['generate_subtitles'] = subtitlespec

                outspec['sources'][srcname] = source

        start = datetime.fromtimestamp(outspec['created'])
        end = start + timedelta(seconds=duration)
        self._fill_jobspec_from_daisy(start, end, room_id, outspec)
        return {'jobid': jobid,
                'jobspec': outspec}

    def _find_packageinfo(self, data):
        for key in data.keys():
            if key.startswith('mediapackage:'):
                return data[key]
        raise KeyError("no 'mediapackage' key in job specification")