105 lines
4.0 KiB
Python
105 lines
4.0 KiB
Python
import json
|
|
import os
|
|
import re
|
|
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from .preprocessor import Preprocessor
|
|
|
|
|
|
@Preprocessor.register
|
|
class ArecProcessor(Preprocessor):
|
|
'''
|
|
Preprocessing for arec jobs.
|
|
|
|
Requires that at least CH1 be present in the upload. This is not checked
|
|
by the validation function - it must be configured properly when the
|
|
recorder is installed.
|
|
|
|
CH1 is used for audio playback and subtitles generation, so should be
|
|
the camera input.
|
|
'''
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
# This regex matches the name format for the individual channel files
|
|
# in a capture, which is this format:
|
|
# some-prefix_1970_12_31_23_45_00_CH?_Name.mp4
|
|
# The above ? represents a number between 1 and 4.
|
|
# Capture group 1 is there to make the upload name prefix optional
|
|
# Capture group 2 is only there to shorten the regex.
|
|
# Capture group 3 capures the channel number,
|
|
# capture group 4 captures the channel name.
|
|
self.name_regex = re.compile(
|
|
'^([^_]+_)?\d{4}(_\d{2}){5}_CH([1-4])_([^.]+).mp4$')
|
|
|
|
def validate(self, queueitem):
|
|
if 'upload_dir' not in queueitem.keys():
|
|
raise KeyError('upload_dir missing from job specification.')
|
|
upload_dir = Path(queueitem['upload_dir'])
|
|
if not upload_dir.exists():
|
|
raise ValueError('Specified upload_dir does not exist.')
|
|
|
|
arec_data = self._parse_arec_json(upload_dir)
|
|
required_strings = ['Title',
|
|
'Device_description']
|
|
required_datetimes = ['Start_time',
|
|
'End_time']
|
|
for key in required_strings + required_datetimes:
|
|
if key not in arec_data:
|
|
raise KeyError(f'{key} missing from arec json file.')
|
|
|
|
for key in required_datetimes:
|
|
# Will throw an exception if the format is invalid
|
|
datetime.fromisoformat(arec_data[key])
|
|
|
|
return True
|
|
|
|
def _preprocess(self, job):
|
|
jobid = job['jobid']
|
|
queueitem = job['queueitem']
|
|
upload_dir = Path(queueitem['upload_dir'])
|
|
arec_data = self._parse_arec_json(upload_dir)
|
|
raw_title = arec_data['Title']
|
|
recorder = arec_data['Device_description']
|
|
room_id = self.config[recorder]
|
|
starttime_tz = datetime.fromisoformat(arec_data['Start_time'])
|
|
starttime = starttime_tz.replace(tzinfo=None)
|
|
endtime_tz = datetime.fromisoformat(arec_data['End_time'])
|
|
endtime = endtime_tz.replace(tzinfo=None)
|
|
|
|
start_timestamp = int(starttime.timestamp())
|
|
outspec = self._init_jobspec(upload_dir,
|
|
start_timestamp,
|
|
raw_title)
|
|
|
|
self._fill_jobspec_from_daisy(starttime, endtime, room_id, outspec)
|
|
|
|
# Populate the sources
|
|
sources = {}
|
|
for item in upload_dir.iterdir():
|
|
match = self.name_regex.match(item.name)
|
|
if match:
|
|
item_channel_no, item_channel_name = match.group(3, 4)
|
|
source = {'video': item.name,
|
|
'poster': '',
|
|
'playAudio': False}
|
|
if item_channel_no == '1':
|
|
source['playAudio'] = True
|
|
sources[f'Channel {item_channel_no}'] = source
|
|
outspec['sources'] = sources
|
|
|
|
# Configure subtitle generation settings
|
|
if 'Channel 1' in sources.keys():
|
|
outspec['generate_subtitles'] = {'Generated':
|
|
{'type': 'whisper',
|
|
'source': 'Channel 1'}}
|
|
|
|
return {'jobid': jobid,
|
|
'jobspec': outspec}
|
|
|
|
def _parse_arec_json(self, upload_dir: Path) -> dict:
|
|
with open(upload_dir / 'information.json') as f:
|
|
data = json.load(f)
|
|
return {item['name']: item['value'] for item in data}
|