play-daemon/daemon/packager.py
2022-10-07 11:43:04 +02:00

342 lines
14 KiB
Python

from datetime import datetime, timedelta
import json
import os
import re
import shutil
import logging
from requests.auth import HTTPBasicAuth
from requests.sessions import Session
from requests.exceptions import HTTPError
from daisy import DaisyHandler
class PackageException(Exception):
def __init__(self, package, problem):
self.package = package
self.problem = problem
class Mediasite:
def __init__(self, config):
self.incoming = config['daemon']['incoming']
self.auth = HTTPBasicAuth(config['mediasite']['user'],
config['mediasite']['password'])
self.chunk_size = 10485760 # 10MiB; seems optimal for speed
# Tested 8k, 10MiB and 20MiB
self.logger = logging.getLogger('play-daemon')
def pack(self, pres_id, queue_item):
data = queue_item['data']
base = os.path.join(self.incoming, pres_id)
if os.path.exists(base):
shutil.rmtree(base)
os.mkdir(base)
presenters = []
for item in data['presenters']:
presenters.append(item.split('@')[0])
mypackage = {'id': pres_id,
'base': base,
'origin': queue_item['type'],
'creation': data['created'],
'title': data['title'],
'description': data.get('description', ''),
'thumb': '',
'presenters': presenters,
'courses': data['courses'],
'visibility': data.get('visibility', ''),
'duration': data['duration']/1000,
'tags': data['tags'],
'subtitles': '',
'sources': []}
if 'id' in data:
mypackage['notification_id'] = data['id']
# Create one session for all the downloads
with Session() as session:
session.auth = self.auth
session.stream = True
try:
mypackage['thumb'] = self._download(base,
data['thumb'],
session)
except HTTPError:
# Missing thumb can be generated
pass
# Download video sources, and store the information with a local
# file path in mypackage for the transcoder
stream_index = 0
for source in data['sources']:
mysource = {'video': None,
'name': source.get('name', str(stream_index)),
'poster': '',
'playAudio': source['playAudio']}
try:
mysource['video'] = self._download(base,
source['video'],
session)
except HTTPError:
# Mediasite has lost the stream, skip it.
m = '%s - Skipped missing mediasite stream: %s'
self.logger.info(m, pres_id, source['video'])
continue
try:
mysource['poster'] = self._download(base,
source['poster'],
session)
except HTTPError:
# Missing poster can be created later
pass
mypackage['sources'].append(mysource)
stream_index += 1
if 'slides' in data:
# Slides exist, create a package for creating
# a video from the slides
try:
mypackage['sources'].append(
self._download_slides(base, data, session))
except HTTPError:
# Another lost stream, nothing to do but ignore and continue
pass
return mypackage
# function to download the material from mediasite
def _download(self, base, remotefile, session):
localname = remotefile.split('/')[-1]
localpath = os.path.join(base, localname)
r = session.get(remotefile, verify=False)
r.raise_for_status()
with open(localpath, 'xb') as f:
for chunk in r.iter_content(chunk_size=self.chunk_size):
f.write(chunk)
return localname
def _download_slides(self, base, data, session):
# https://trac.ffmpeg.org/wiki/Slideshow
slides_path = os.path.join(base, 'slides')
os.mkdir(slides_path)
slides = []
demux_file = os.path.join(slides_path, 'demux.txt')
with open(demux_file, 'w') as f:
f.write('ffconcat version 1.0\n')
num_slides = len(data['slides'])
# loop all slides and download, calculate the duration and
# create a text file holding all the info for the
# ffmpeg demuxer
for i in range(num_slides):
slide = data['slides'][i]
# Download the source file
# and store the url as a local file path
slide_name = self._download(slides_path,
slide['url'],
session)
slide_path = os.path.join(slides_path,
slide_name)
# Handle the different edgecases
# for indiviual slide duration
if i == num_slides - 1: # last slide
duration = data['duration'] - int(slide['duration'])
else:
next_slide = data['slides'][i+1]
if i == 0: # first slide
duration = next_slide['duration']
else:
duration = (int(next_slide['duration'])
- int(slide['duration']))
# Commit to the demuxfile.
f.write(f"file '{slide_path}'\n")
# The format assumes seconds, so we specify ms.
# https://ffmpeg.org/ffmpeg-utils.html#Time-duration
f.write(f'duration {duration}ms\n')
slides.append({'url': slide_path,
'duration': f'{duration}ms'})
# to accomodate for an ffmpeg quirk that needs
# the last slide twice
f.write(f"file '{slides[-1]['url']}'\n")
# put all the slides info in mypackage for
# the transcoder to rework into a mp4 video
return {'demux_file': demux_file,
'name': 'slide',
'poster': slides[0]['url'],
'playAudio': False }
class Manual:
def pack(self, pres_id, queue_item):
data = queue_item['data']
mypackage = {'id': pres_id,
'base': data['base'],
'origin': queue_item['type'],
# Transitionary handling of new field name:
'notification_id': data.get('notification_id',
data['id']),
'creation': data['created'],
'title': data['title'],
'description': data.get('description', ''),
'presenters': data['presenters'],
'courses': data['courses'],
'duration': data['duration'],
'thumb': data.get('thumb', ''),
'tags': data['tags'],
'subtitles': data.get('subtitles', ''),
'sources': []}
stream_index = 0
for source in data['sources']:
mysource = {'video': source['video'],
'name': source.get('name', str(stream_index)),
'poster': source.get('poster', ''),
'playAudio': source['playAudio']}
mypackage['sources'].append(mysource)
stream_index += 1
return mypackage
class Cattura:
def __init__(self, config):
self.recorders = config['recorders']
self.daisy = DaisyHandler(config)
self.path_regex = re.compile('^(sftp://[^/]+/)(.+?)/([^/]+)$')
def pack(self, pres_id, queue_item):
data = queue_item['data']
recorder = queue_item['recorder']
info = self._find_packageinfo(pres_id, data['publishedOutputs'])
name = info['name']
rawpath = info['path']
host, path, pkgfile = self.path_regex.match(rawpath).group(1, 2, 3)
mediapackage = None
with open(os.path.join(path, pkgfile)) as f:
mediapackage = json.load(f)
mypackage = {'id': pres_id,
'base': path,
'origin': 'cattura',
'creation': 0,
'title': {'sv': name,
'en': name},
'description': '',
'presenters': [],
'courses': [],
'duration': 0,
'thumb': '',
'tags': [],
'subtitles': '',
'sources': []}
outputs = mediapackage['outputs']
for key in outputs.keys():
if key.startswith('media/'):
media = outputs[key]
poster = ''
# The package format seems to not be consistent enough for this
#richfile = self._find_enrichment(
# media['element']['video']['sourceID'], mediapackage)
#if richfile:
# with open(os.path.join(path, richfile)) as f:
# enrichment = json.load(f)
# poster = self._find_poster(enrichment)
source = {'name': media['element']['name'],
'video': media['file'],
'poster': poster,
'playAudio': False}
if source['name'] == 'main':
end = int(media['element']['creationDate'] / 1000)
dur = media['element']['duration']['timestamp'] / 1000
mypackage['duration'] = dur
mypackage['creation'] = int(end - dur)
# The primary stream doesn't get tagged with its configured
# name, but instead gets the presentation name.
# The camera is the primary, so we play the sound from there
# for sync purposes
elif source['name'] == name:
source['playAudio'] = True
mypackage['sources'].append(source)
mypackage['tags'].append(
self.daisy.get_room_name(self.recorders[recorder]))
start = datetime.fromtimestamp(mypackage['creation'])
end = start + timedelta(seconds=mypackage['duration'])
booking = self.daisy.get_booking(start, end,
self.recorders[recorder])
if booking is not None:
title = {'sv': booking['displayStringLong']['swedish'],
'en': booking['displayStringLong']['english']}
mypackage['title'] = title
mypackage['presenters'] = booking['teachers']
if booking['description']:
mypackage['description'] = booking['description']
if not mypackage['presenters'] and booking['bookedBy']:
mypackage['presenters'].append(booking['bookedBy'])
mypackage['courses'] = booking['courseSegmentInstances']
# This is just an int and there is no way to get a string:
#mypackage['tags'].append(booking['educationalType'])
return mypackage
def _find_packageinfo(self, pres_id, data):
for key in data.keys():
if key.startswith('mediapackage:'):
return data[key]
raise PackageException(json.dumps(data),
'cannot find mediapackage element')
def _find_enrichment(self, sourceid, mediapackage):
_, splitid = sourceid.split(',')
tocid = 'toc:' + splitid
try:
return mediapackage['outputs'][tocid]['file']
except KeyError:
return ''
def _find_poster(self, enrichment):
for frame in enrichment['entries']:
if frame['timestamp']['timestamp'] == 0:
return frame['screenshot']['path']
return ''
class Update:
def pack(self, temp_id, queue_item):
data = queue_item['data']
# Mandatory fields
mypackage = {'id': temp_id,
'orig_id': data['id'],
'notification_id': data['notification_id'],
'base': data['base'],
'origin': queue_item['type'],
'creation': data['created'],
'title': data['title'],
'description': data.get['description'],
'presenters': data['presenters'],
'courses': data['courses'],
'duration': data['duration'],
'tags': data['tags'],
'delete': data['delete'],
'sources': []}
# Optional fields
for field in ['thumb', 'subtitles']:
if field in data:
mypackage[field] = data[field]
for source in data['sources']:
# Mandatory fields
mysource = {'name': source['name'],
'playAudio': source['playAudio']}
# Optional fields
for field in ['poster', 'video']:
if field in source:
mysource[field] = source[field]
mypackage['sources'].append(mysource)
return mypackage