Erik Thuning dec2da4310 Added a preprocessor for arec recorders.
Also moved the code for creating a basic jobspec and pulling information from
the relevant daisy booking into the preprocessor superclass so it can be
called by both the cattura and arec preprocessors.
2024-06-04 15:53:46 +02:00

87 lines
3.2 KiB
Python

import json
import os
import re
from datetime import datetime, timedelta
from .preprocessor import Preprocessor
@Preprocessor.register
class CatturaProcessor(Preprocessor):
"""
Preprocessing for cattura jobs.
The recorder must be configured to at least provide the sources 'main'
and 'camera', and 'camera' must be marked as the primary source.
Further sources should be marked 'left' and 'right' as appropriate, with
the positions relative to 'main'.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.path_regex = re.compile('^(sftp://[^/]+/)(.+?)/([^/]+)$')
def validate(self, queueitem):
# No point in validating the very complex cattura format here
return True
def _preprocess(self, job):
jobid = job['jobid']
queueitem = job['queueitem']
recorder = queueitem['recorder']
room_id = self.config[recorder]
data = queueitem['data']
info = self._find_packageinfo(data['publishedOutputs'])
presname = info['name']
rawpath = info['path']
host, path, pkgfile = self.path_regex.match(rawpath).group(1, 2, 3)
with open(os.path.join(path, pkgfile)) as f:
mediapackage = json.load(f)
outspec = self._init_jobspec(path, 0, presname)
duration = None
outputs = mediapackage['outputs']
for key in outputs.keys():
if key.startswith('media/'):
media = outputs[key]
srcname = media['element']['name']
source = {'video': media['file'],
'poster': '',
'playAudio': False}
if duration == None:
duration = media['element']['duration']['timestamp'] / 1000
# The name of the main stream should probably be configurable
if srcname == 'main':
end = int(media['element']['creationDate'] / 1000)
outspec['created'] = int(end - duration)
# The source marked 'primary' in cattura doesn't get tagged
# with its configured name, but instead gets the presentation
# name. The camera is the primary, so we play the sound from
# there for sync purposes. We also generate the subtitles from
# there for the same reason.
elif srcname == presname:
source['playAudio'] = True
subtitlespec = {'Generated': {'type': 'whisper',
'source': srcname}}
outspec['generate_subtitles'] = subtitlespec
outspec['sources'][srcname] = source
start = datetime.fromtimestamp(outspec['created'])
end = start + timedelta(seconds=duration)
self._fill_jobspec_from_daisy(start, end, room_id, outspec)
return {'jobid': jobid,
'jobspec': outspec}
def _find_packageinfo(self, data):
for key in data.keys():
if key.startswith('mediapackage:'):
return data[key]
raise KeyError("no 'mediapackage' key in job specification")